about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/rqtl2_wrapper.R521
1 files changed, 521 insertions, 0 deletions
diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R
new file mode 100644
index 0000000..1ff3e18
--- /dev/null
+++ b/scripts/rqtl2_wrapper.R
@@ -0,0 +1,521 @@
+# This file contains  an  implementation of qtl mapping using r-qtl2
+# For r-qtl1 implementation see: ./rqtl_wrapper.R
+
+library(qtl2)
+library(rjson)
+library(stringi)
+library(optparse)
+
+# TODO add mapping function as a parameter:
+
+option_list <- list(
+   make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."),   
+make_option(c("-i", "--input_file"), action="store", default=NULL, type='character', help="a yaml or json file with required data to create the cross file"),
+ make_option(c("-o", "--output_file"), action="store", default=NULL, type='character', help="a file path  of where to write the output json results"),
+   make_option(c("-c", "--cores"), type="integer", default=1, help="No of cores to use while making
+  computation"),
+  make_option(c("-p", "--nperm"), type="integer", default= 1,  action="store_true", help="No  of permutations "), 
+ make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)"),
+  make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata"),
+    make_option(c("-t", "--threshold"), type="integer", default= 1,  action="store_true", help="Minimum LOD score for a Peak")
+)
+
+opt_parser = OptionParser(option_list=option_list);
+opt <- parse_args(opt_parser)
+NO_OF_CORES = opt$cores
+SCAN_METHOD = opt$method
+NO_OF_PERMUTATION = opt$nperm
+
+# Step: check for mandatory file paths 
+# NOTE this is the working dir where the cross file will be generated 
+# NOTE this is where the cross file is generated
+
+if(is.null(opt$directory) || !(dir.exists(opt$directory))){
+# check if directory exists
+  print_help(opt_parser)
+ stop("The working directory does not exists or is NULL\n")
+}
+
+INPUT_FILE_PATH = opt$input_file
+OUTPUT_FILE_PATH = opt$output_file 
+if (!(file.exists(INPUT_FILE_PATH))) {
+  print_help(opt_parser)
+  stop("The input file", INPUT_FILE_PATH, " you provided does not exists\n")
+} else {
+  cat("Input file exists Reading the input file .... \n")
+}
+if (!(file.exists(OUTPUT_FILE_PATH))) {
+  print_help(opt_parser)
+  stop("The output file  ",OUTPUT_FILE_PATH, " you provided does not exists\n")
+} else {
+  cat("Output file exists ...", OUTPUT_FILE_PATH, "\n")
+}
+
+# Utility function to generate random file names of size n:
+genRandomFileName <- function(prefix, string_size = 9 , file_ext = ".txt") {
+  randStr = paste(prefix, stri_rand_strings(1, string_size, pattern = "[A-Za-z0-9]"), sep =
+                    "_")
+  return(paste(randStr, file_ext, sep = ""))
+}
+
+
+# Step: Generate the control file name
+
+control_file_path  <- file.path(opt$directory,
+                                genRandomFileName(prefix = "control", file_ext = ".json"))
+cat("Generated the control file path at ", control_file_path, "\n")
+
+
+# Step: Reading and Parsing the input file
+cat("Reading and parsing the input file \n")
+json_data  = fromJSON(file = INPUT_FILE_PATH)
+if (is.null(json_data$sep)){
+cat("Using ',' as a default sep for cross file\n")
+json_data$sep = ","
+}
+if (is.null(json_data$na.strings)){
+cat("Using '-' and 'NA' as the default na.strings\n")
+ json_data$na.strings = c("-" , "NA")
+}
+default_keys = c(
+                "geno_transposed", "founder_geno_transposed",
+                "pheno_transposed" , "covar_transposed",
+		"phenocovar_transposed")
+
+for (item in default_keys) {
+if (!(item %in% names(json_data))){
+  cat("Using FALSE as default parameter for ", item, "\n")
+  json_data[item] =  FALSE
+}
+}
+
+
+
+# Note the files below should be in the same directory as the location for the crosss file
+generate_cross_object  <- function(control_file_path, json_data) {
+ # function to write the cross object from a json data object
+  return (
+    write_control_file(
+      control_file_path,
+      crosstype = json_data$crosstype,
+      geno_file = json_data$geno_file,
+      pheno_file = json_data$pheno_file,
+      gmap_file = json_data$geno_map_file,
+      pmap_file = json_data$pheno_map_file,
+      phenocovar_file = json_data$phenocovar_file,
+      geno_codes = json_data$geno_codes,
+      alleles = json_data$alleles,
+      na.strings = json_data$na.strings,
+      geno_transposed = json_data$geno_transposed,
+      sex_file = json_data$sex_file,
+      founder_geno_file = json_data$founder_geno_file,
+      covar_file = json_data$covar_file,
+      sex_covar = json_data$sex_covar,
+      sex_codes = json_data$sex_codes,
+      crossinfo_file = json_data$crossinfo_file,
+      crossinfo_covar = json_data$crossinfo_covar,
+      crossinfo_codes = json_data$crossinfo_codes,
+      xchr = json_data$xchr,
+      overwrite = TRUE
+    )
+  )
+}
+
+# Step: generate the cross file
+cat("Generating the cross object at ", control_file_path, "\n")
+generate_cross_object(control_file_path, json_data)
+
+cat("reading the cross object from", control_file_path, "\n")
+cross  <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path
+# check integrity of the cross
+cat("Check the integrity of the cross object")
+check_cross2(cross)
+if (check_cross2(cross)) {
+  print("Cross meets required specifications for a cross")
+} else {
+  print("Cross does not meet required specifications")
+}
+
+
+# Cross Summarys
+cat("A Summary about the Cross You Provided\n")
+summary(cross)
+n_ind(cross)
+n_chr(cross)
+cat("names of markers in the  object\n")
+marker_names(cross)
+cat("names of phenotypes in a the object")
+pheno_names(cross)
+cat("IDs for all individuals in the cross cross object that have genotype data\n")
+ind_ids_geno(cross)
+cat(" IDs for all individuals in the cross object that have phenotype data")
+ind_ids_pheno(cross)
+cat("Name of the founder Strains/n")
+founders(cross)
+
+
+# Function for  computing the genetic probabilities
+perform_genetic_pr <- function(cross,
+                               cores =  NO_OF_CORES,
+			       step=1,
+			       map=NULL,
+			       map_function=c("haldane", "kosambi", "c-f", "morgan"),
+                               error_prob = 0.002
+                               ) {
+    #' Function to calculate the  genetic probabilities
+    #' @description function to perform genetic probabilities
+    #' @param  cores number no of cores to use Defaults to "1"
+    #' @param map  Genetic map of markers.  defaults to "NONE"
+    #' @param  use_pseudomarkers option to insert pseudo markers in the gmap default "FALSE"
+    #' @param error_prob
+    #' @param map_function Character string indicating the map function to use to convert genetic
+    #' @param step for  default "1"
+    #' @return a list of three-dimensional arrays of probabilities, individuals x genotypes x pst
+
+  return(calc_genoprob(cross, map=map,
+                       error_prob=error_prob, map_function=map_function,
+		       quiet=FALSE, cores=cores)) 
+}
+
+
+#Step:  insert pseudomarkers to genetic map
+# TODO need to review this to match rqtl1
+cat("Inserting pseudomarkers to the genetic map with steps", 1 , "and stepwidth" , "fixed\n")
+MAP <- insert_pseudomarkers(cross$gmap, step= 1, stepwidth = "fixed", cores = NO_OF_CORES)
+
+
+# Step: calculate the genetic probabilities
+cat("Calculating the genetic probabilities\n")
+Pr = perform_genetic_pr(cross, map = MAP)
+
+
+
+
+# Step: perform allele probabilites if cross ways
+if (cross$crosstype == "4way"){
+ cat("Calculating Allele Genetic probability for 4way cross\n")
+  aPr <- genoprob_to_alleleprob(pr)
+}
+
+#Function to  Calculate genotyping error LOD scores
+cat("Calculating the  genotype error LOD scores\n")
+error_lod <- calc_errorlod(cross, Pr, quiet = FALSE, cores = NO_OF_CORES)
+# combine into one matrix
+error_lod <- do.call("cbind", error_lod)
+
+
+
+
+## grab phenotypes and covariates; ensure that covariates have names attribute
+# TODO rework on this 
+cat("Getting the phenotypes and covariates\n")
+pheno <- cross$pheno
+covar <- match(cross$covar$sex, c("f", "m")) # make numeric
+covar
+
+if (!is.null(covar)){
+ names(covar) <- rownames(cross$covar)
+ 
+}
+print("The covariates are")
+covar
+
+Xcovar <- get_x_covar(cross)
+print("The Xcovar are ")
+print(Xcovar)
+
+#  Function to calculate the kinship 
+get_kinship <- function(probability, method="LMM"){
+if (opt$method == "LMM"){
+    kinship = calc_kinship(genome_prob)
+} else if (opt$method == "LOCO"){
+    kinship = calc_kinship(genome_prob, "loco")
+}else {
+ kinship = NULL
+}
+}
+
+
+cat("Calculating the kinship for the genetic probability\n")
+if (cross$crosstype == "4way"){
+  kinship <- get_kinship(aPr, opt$method)
+} else {
+   kinship <- get_kinship(Pr, "loco")
+}
+
+
+
+# Function to perform genome scan
+perform_genome_scan <- function(cross,
+                                genome_prob,
+                                method,
+                                addcovar = NULL,
+				intcovar = NULL,
+				kinship = NULL,
+				model = c("normal","binary"),
+                                Xcovar = NULL) {
+    #' perform genome scan
+    #' @description  perform scan1 using haley-knott regression, perform scan1 using haley-knott      #'  or linear model, or LOCO linear model
+    #' the cross object required to pull the pheno
+    #' @param  method to method to perform scan1 either by haley-knott regression(HL),
+    #' linear mixed model(LMM) or , for the LOCO method(LOCO)
+    #' @param intcovar A numeric optional matrix of interactive covariates.
+    #' @param addcovar An optional numeric matrix of additive covariates.
+    #' @param Xcovar An optional numeric matrix with additional additive covariates used for null     #'  used for null hypothesis when scanning the X chromosome.
+    #' @param model Indicates whether to use a normal model (least squares) or binary model
+    #' @return An object of class "scan1"
+
+
+  if (method == "LMM") {
+    # provide parameters for this
+    cat("Performing scan1 using Linear mixed model\n")
+    out  <- scan1(
+      genome_prob,
+      cross$pheno,
+      kinship = kinship,
+      addcovar = covar,
+      Xcovar = Xcovar,
+      intcovar = intcovar,
+      model = model,
+      cores = NO_OF_CORES
+    )
+  }  else if (method == "LOCO") {
+    cat("Performing scan1 using Leave one chromosome out\n")
+    out <- scan1(
+      genome_prob,
+      cross$pheno,
+      kinship = kinship,
+      addcovar = covar,
+      intcovar = intcovar,
+      model = model,
+      Xcovar = Xcovar,
+      cores = NO_OF_CORES
+    )
+  }
+  
+  else if (method == "HK"){
+    cat("Performing scan1 using Haley Knott\n")
+    out <- scan1(genome_prob,
+                 cross$pheno,
+                 addcovar = NULL,
+		 intcovar = intcovar,
+		 model = model,
+                 Xcovar = Xcovar,
+		 cores = NO_OF_CORES		 
+		 )
+  }
+
+
+  return (out)
+}
+
+# Perform the genome scan for the cross object
+if (cross$crosstype == "4way"){
+  sex <- (cross$covar$Sex == "male")*1
+  names(sex) <- rownames(cross$covar)
+  sex <- setNames( (cross$covar$Sex == "male")*1, rownames(cross$covar))
+  scan_results <- perform_genome_scan(aPr, cross, kinship=kinship, method = "LOCO", addcovar = sex)  
+} else {
+  scan_results <- perform_genome_scan(cross = cross,
+                               genome_prob = Pr,
+			       kinship = kinship,
+                               method = SCAN_METHOD)
+}
+# function plot for the LOD scores from  performing the genome scan
+generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") {
+  #' @description Plot LOD curves for a genome scan
+  #' @param the cross object
+  #' @param scan1 results
+  #' @param the method used to compute the scan1 results HK,LMM or LOCO
+  #' @param base_dir the path to write the generated plot
+  #' @return a string with the file path for the plot
+  cat("Generting the lod plot for the LOD scores\n")
+  color <- c("slateblue", "violetred", "green3")
+  par(mar = c(4.1, 4.1, 1.6, 1.1))
+  ymx <- maxlod(scan_result)
+  file_name = genRandomFileName(prefix = "RQTL_LOD_SCORE_", file_ext = ".png")
+  image_loc = file.path(base_dir , file_name)
+  png(image_loc,
+      width = 1000,
+      height = 600,
+      type = 'cairo-png')
+  plot(
+    scan_result,
+    cross$gmap,
+    lodcolumn = 1,
+    col = color[1],
+    main = colnames(cross$pheno)[1],
+    ylim = c(0, ymx * 1.02)
+  )
+  legend(
+    "topleft",
+    lwd = 2,
+    col = color[1],
+    method,
+    bg = "gray90",
+    lty = c(1, 1, 2)
+  )
+  dev.off()
+  return (image_loc)
+}
+
+
+lod_plot_path <- generate_lod_plot(cross, scan_results, "HK", base_dir=opt$directory)
+cat("Generated the lod plot at ", lod_plot_path, "\n")
+
+
+# function: perform  permutation tests for single-QTL method
+perform_permutation_test <- function(cross,
+                                     genome_prob,
+                                     n_perm,
+                                     method =  opt$method,
+                                     covar = NULL,
+                                     Xcovar = NULL,
+				     addcovar = NULL,
+				     intcovar = NULL,
+				     perm_Xsp = FALSE,
+				     kinship = NULL,
+				     model = c("normal", "binary"),
+				     chr_lengths = NULL,
+                                     perm_strata = NULL) {
+
+  #' Function to peform permutation tests for single QTL method
+  #' @description The scan1perm() function takes the
+  #' same arguments as scan1(), plus additional a #rguments to control the permutations:  
+  #' @param cross the cross object required to fetch the pheno
+  #' @param genome_prob the genomic probability matrix
+  #' @param method to computation method used to perform the genomic scan
+  #' @param intcovar
+  #' @param addcovar
+  #' @param Xcovar
+  #' @param perm_Xsp  If TRUE, do separate permutations for the autosomes and the X chromosome.
+  #' @param perm_strata Vector of strata, for a stratified permutation test.  
+  #' @param n_perm Number of permutation replicates.
+  #' @param chr_lengths engths of the chromosomes;
+  #' @return  object of class "scan1perm". 
+  cat("performing permutation test for the cross object with permutations", n_perm, "\n")
+  return (scan1perm(
+      genome_prob,
+      cross$pheno,
+      kinship = kinship,
+      Xcovar = Xcovar,
+      intcovar = intcovar,
+      addcovar = addcovar,
+      n_perm = n_perm,
+      perm_Xsp = perm_Xsp,
+      model = model,
+      chr_lengths = chr_lengths,
+      cores = NO_OF_CORES
+    ))
+}
+
+
+
+
+# check if pstrata
+if (!(is.null(opt$pstrata)) && (!is.null(Xcovar))){
+perm_strata <- mat2strata(Xcovar)
+} else {
+perm_strata <- NULL
+}
+
+# Step: Performing the permutation test
+perm <- perform_permutation_test(cross, Pr, n_perm = NO_OF_PERMUTATION,perm_strata = perm_strata, method = opt$method)
+
+
+# get the permutation summary with a significance threshold
+get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){
+     cat("Getting the permutation summary with significance thresholds as ", threshold, "\n")
+     summary(perm, alpha = threshold)
+}
+
+lod_significance <- get_lod_significance(perm)
+
+# step: get the lod peaks
+# TODO fix the threshold here
+
+
+cat("Fetching the lod peaks with threshold", opt$threshold, "\n")
+lod_peaks = find_peaks(
+  scan_results,
+  threshold =opt$threshold,
+  map = cross$gmap,
+  cores = NO_OF_CORES
+)
+
+
+# step: get the estimated qtl effects
+get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){
+     cat("Finding the qtl effect\n")
+     chr_Pr <- geno_prob[,chromosome]
+     if (!is.null(chr_Pr)){
+      cat("Finding qtl effect for chromosome ", chromosome, "\n")
+     if (!is.null(LOCO)) {
+        cat("Finding qtl effect for chromosome ", chromosome, "with LOCO \n")
+     kinship <- calc_kinship(chr_Pr, "loco")[[chromosome]]
+     return(scan1coef(chr_Pr, pheno, kinship, addcovar=covar))
+     }
+     else {
+      return(scan1coef(chr_Pr, pheno, addcovar=covar))
+     }
+     }
+    return(NULL)
+}
+
+
+
+# take the first phenotype in the cross
+# grab phenotypes and covariates; ensure that covariates have names attribute
+pheno <- cross$pheno[,1]
+if (!is.null(cross$covar) && !is.null(cross$covar$sex)){
+ covar <- match(cross$covar$sex, c("f", "m")) # make numeric
+ names(covar) <- rownames(cross$covar)
+} else {
+covar  <- NULL
+}
+
+meffects <- c()
+meffects_plots <- c()
+# TODO add plots for meffects
+for (chr in chr_names(cross)){
+  cat("Getting the qtl effect for chromosome", chr, "\n")
+   if (cross$crosstype == "4way"){
+     coeff_results <- get_qtl_effect(chr, aPr, pheno, LOCO="LOCO", covar = sex)
+     cat("Generating the qtl effects plots\n")
+     file_name = genRandomFileName(prefix = "RQTL_EFFECT_", file_ext = ".png")
+     image_loc = file.path(base_dir , file_name)
+     par(mar=c(4.1, 4.1, 0.6, 0.6))
+       png(image_loc,
+      width = 1000,
+      height = 600,
+      type = 'cairo-png') 
+      plot(
+      coeff_results,
+      cross$gmap[chr],
+     bgcolor="gray95",
+     legend="bottomleft"
+     )
+     meffects <- append(meffects_plots, image_loc)
+   } else {
+    coeff_results  <- get_qtl_effect(chr, Pr, pheno)
+   } 
+    meffects <- append(meffects, coeff_results)
+}
+output = list(lod_peaks = lod_peaks,
+             scan_results =scan_results,
+	     genetic_probabilities = Pr,
+	     lod_significance = lod_significance,
+	     permutation_results = perm,
+	     lod_peaks = lod_peaks,
+	     chromosomes  = chr_names(cross),
+	     meffects = meffects,
+	     error_lod = error_lod,
+	     meffects_plots = meffects_plots,
+	     lod_plot_path =lod_plot_path,
+	     scan_method = SCAN_METHOD  
+	     )
+output_json_data <-toJSON(output)
+cat("The output file path generated is",  OUTPUT_FILE_PATH, "\n")
+cat("Writing to the output file\n")
+write(output_json_data, file=OUTPUT_FILE_PATH)
+