about summary refs log tree commit diff
path: root/scripts/rqtl2_wrapper.R
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/rqtl2_wrapper.R')
-rw-r--r--scripts/rqtl2_wrapper.R358
1 files changed, 358 insertions, 0 deletions
diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R
new file mode 100644
index 0000000..af13efa
--- /dev/null
+++ b/scripts/rqtl2_wrapper.R
@@ -0,0 +1,358 @@
+library(qtl2)
+library(rjson)
+library(stringi)
+library(optparse)
+
+# Define command-line options
+option_list <- list(
+  make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", 
+              help = "Temporary working directory: should also host the input file."),
+  make_option(c("-i", "--input_file"), action = "store", default = NULL, type = 'character', 
+              help = "A YAML or JSON file with required data to create the cross file."),
+  make_option(c("-o", "--output_file"), action = "store", default = NULL, type = 'character', 
+              help = "A file path of where to write the output JSON results."),
+  make_option(c("-c", "--cores"), type = "integer", default = 1, 
+              help = "Number of cores to use while making computation."),
+  make_option(c("-p", "--nperm"), type = "integer", default = 0, 
+              help = "Number of permutations."),
+  make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", 
+              help = "Scan Mapping Method - HK (Haley Knott), LMM (Linear Mixed Model), LOCO (Leave One Chromosome Out)."),
+  make_option(c("--pstrata"), action = "store_true", default = NULL, 
+              help = "Use permutation strata."),
+  make_option(c("-t", "--threshold"), type = "integer", default = 1, 
+              help = "Minimum LOD score for a Peak.")
+)
+
+# Parse command-line arguments
+opt_parser <- OptionParser(option_list = option_list)
+opt <- parse_args(opt_parser)
+
+# Assign parsed arguments to variables
+NO_OF_CORES <- opt$cores
+SCAN_METHOD <- opt$method
+NO_OF_PERMUTATION <- opt$nperm
+
+NO_OF_CORES <- 20
+
+# Validate input and output file paths
+validate_file_paths <- function(opt) {
+  if (is.null(opt$directory) || !dir.exists(opt$directory)) {
+    print_help(opt_parser)
+    stop("The working directory does not exist or is NULL.\n")
+  }
+  
+  INPUT_FILE_PATH <- opt$input_file
+  OUTPUT_FILE_PATH <- opt$output_file
+  
+  if (!file.exists(INPUT_FILE_PATH)) {
+    print_help(opt_parser)
+    stop("The input file ", INPUT_FILE_PATH, " you provided does not exist.\n")
+  } else {
+    cat("Input file exists. Reading the input file...\n")
+  }
+  
+  if (!file.exists(OUTPUT_FILE_PATH)) {
+    print_help(opt_parser)
+    stop("The output file ", OUTPUT_FILE_PATH, " you provided does not exist.\n")
+  } else {
+    cat("Output file exists...", OUTPUT_FILE_PATH, "\n")
+  }
+  
+  return(list(input = INPUT_FILE_PATH, output = OUTPUT_FILE_PATH))
+}
+
+file_paths <- validate_file_paths(opt)
+INPUT_FILE_PATH <- file_paths$input
+OUTPUT_FILE_PATH <- file_paths$output
+
+# Utility function to generate random file names
+genRandomFileName <- function(prefix, string_size = 9, file_ext = ".txt") {
+  randStr <- paste(prefix, stri_rand_strings(1, string_size, pattern = "[A-Za-z0-9]"), sep = "_")
+  return(paste(randStr, file_ext, sep = ""))
+}
+
+# Generate control file path
+control_file_path <- file.path(opt$directory, genRandomFileName(prefix = "control", file_ext = ".json"))
+
+cat("Generated the control file path at", control_file_path, "\n")
+# Read and parse the input file
+cat("Reading and parsing the input file.\n")
+json_data <- fromJSON(file = INPUT_FILE_PATH)
+
+# Set default values for JSON data
+set_default_values <- function(json_data) {
+  if (is.null(json_data$sep)) {
+    cat("Using ',' as a default separator for cross file.\n")
+    json_data$sep <- ","
+  }
+  if (is.null(json_data$na.strings)) {
+    cat("Using '-' and 'NA' as the default na.strings.\n")
+    json_data$na.strings <- c("-", "NA")
+  }
+  
+  default_keys <- c("geno_transposed", "founder_geno_transposed", "pheno_transposed", 
+                    "covar_transposed", "phenocovar_transposed")
+  
+  for (item in default_keys) {
+    if (!(item %in% names(json_data))) {
+      cat("Using FALSE as default parameter for", item, "\n")
+      json_data[item] <- FALSE
+    }
+  }
+  
+  return(json_data)
+}
+
+json_data <- set_default_values(json_data)
+
+# Function to generate the cross object
+generate_cross_object <- function(control_file_path, json_data) {
+  write_control_file(
+    control_file_path,
+    crosstype = json_data$crosstype,
+    geno_file = json_data$geno_file,
+    pheno_file = json_data$pheno_file,
+    gmap_file = json_data$geno_map_file,
+    pmap_file = json_data$physical_map_file,
+    phenocovar_file = json_data$phenocovar_file,
+    geno_codes = json_data$geno_codes,
+    alleles = json_data$alleles,
+    na.strings = json_data$na.strings,
+    sex_file = json_data$sex_file,
+    founder_geno_file = json_data$founder_geno_file,
+    covar_file = json_data$covar_file,
+    sex_covar = json_data$sex_covar,
+    sex_codes = json_data$sex_codes,
+    crossinfo_file = json_data$crossinfo_file,
+    crossinfo_covar = json_data$crossinfo_covar,
+    crossinfo_codes = json_data$crossinfo_codes,
+    xchr = json_data$xchr,
+    overwrite = TRUE,
+    founder_geno_transposed = json_data$founder_geno_transposed,
+    geno_transposed = json_data$geno_transposed
+  )
+}
+
+# Generate the cross object
+cat("Generating the cross object at", control_file_path, "\n")
+generate_cross_object(control_file_path, json_data)
+
+# Read the cross object
+cat("Reading the cross object from", control_file_path, "\n")
+
+cross <- read_cross2(control_file_path, quiet = FALSE)
+
+# Check the integrity of the cross object
+cat("Checking the integrity of the cross object.\n")
+if (check_cross2(cross)) {
+  cat("Cross meets required specifications for a cross.\n")
+} else {
+  cat("Cross does not meet required specifications.\n")
+}
+
+# Print cross summary
+cat("A summary about the cross you provided:\n")
+summary(cross)
+
+# Function to compute genetic probabilities
+perform_genetic_pr <- function(cross, cores = NO_OF_CORES, step = 1, map = NULL, 
+                               map_function = c("haldane", "kosambi", "c-f", "morgan"), 
+                               error_prob = 0.002) {
+  calc_genoprob(cross, map = map, error_prob = error_prob, map_function = map_function, 
+                quiet = FALSE, cores = cores)
+}
+
+# Insert pseudomarkers to the genetic map
+cat("Inserting pseudomarkers to the genetic map with step 1 and stepwidth fixed.\n")
+
+MAP <- insert_pseudomarkers(cross$gmap, step = 1, stepwidth = "fixed", cores = NO_OF_CORES)
+
+# Calculate genetic probabilities
+cat("Calculating the genetic probabilities.\n")
+Pr <- perform_genetic_pr(cross)
+
+# Calculate allele probabilities for 4-way cross
+if (cross$crosstype == "4way") {
+  cat("Calculating allele genetic probability for 4-way cross.\n")
+  aPr <- genoprob_to_alleleprob(Pr)
+}
+
+# Calculate genotyping error LOD scores
+cat("Calculating the genotype error LOD scores.\n")
+error_lod <- calc_errorlod(cross, Pr, quiet = FALSE, cores = NO_OF_CORES)
+error_lod <- do.call("cbind", error_lod)
+
+# Get phenotypes and covariates
+cat("Getting the phenotypes and covariates.\n")
+pheno <- cross$pheno
+# covar <- match(cross$covar$sex, c("f", "m")) # make numeric
+# TODO rework on this
+covar <- NULL
+if (!is.null(covar)) {
+  names(covar) <- rownames(cross$covar)
+}
+
+Xcovar <- get_x_covar(cross)
+cat("The covariates are:\n")
+print(covar)
+cat("The Xcovar are:\n")
+print(Xcovar)
+
+# Function to calculate kinship
+get_kinship <- function(probability, method = "LMM") {
+  if (method == "LMM") {
+    kinship <- calc_kinship(probability)
+  } else if (method == "LOCO") {
+    kinship <- calc_kinship(probability, "loco")
+  } else {
+    kinship <- NULL
+  }
+  return(kinship)
+}
+
+# Calculate kinship for the genetic probability
+cat("Calculating the kinship for the genetic probability.\n")
+if (cross$crosstype == "4way") {
+  kinship <- get_kinship(aPr, opt$method)
+} else {
+  kinship <- get_kinship(Pr, "loco")
+}
+
+# Function to perform genome scan
+perform_genome_scan <- function(cross, genome_prob, method, addcovar = NULL, intcovar = NULL, 
+                                kinship = NULL, model = c("normal", "binary"), Xcovar = NULL) {
+  if (method == "LMM") {
+    cat("Performing scan1 using Linear Mixed Model.\n")
+    out <- scan1(genome_prob, cross$pheno, kinship = kinship, model = model, cores = NO_OF_CORES)
+  } else if (method == "LOCO") {
+    cat("Performing scan1 using Leave One Chromosome Out.\n")
+    out <- scan1(genome_prob, cross$pheno, kinship = kinship, model = model, cores = NO_OF_CORES)
+  } else if (method == "HK") {
+    cat("Performing scan1 using Haley Knott.\n")
+    out <- scan1(genome_prob, cross$pheno, addcovar = addcovar, intcovar = intcovar, 
+                 model = model, Xcovar = Xcovar, cores = NO_OF_CORES)
+  }
+  return(out)
+}
+
+# Perform the genome scan for the cross object
+if (cross$crosstype == "4way") {
+  sex <- setNames((cross$covar$Sex == "male") * 1, rownames(cross$covar))
+  scan_results <- perform_genome_scan(aPr, cross, kinship = kinship, method = "LOCO", addcovar = sex)
+} else {
+  scan_results <- perform_genome_scan(cross = cross, genome_prob = Pr, kinship = kinship, 
+                                     method = SCAN_METHOD)
+}
+
+# Save scan results
+scan_file <- file.path(opt$directory, "scan_results.csv")
+write.csv(scan_results, scan_file)
+
+# Function to perform permutation tests
+perform_permutation_test <- function(cross, genome_prob, n_perm, method = opt$method, 
+                                     covar = NULL, Xcovar = NULL, addcovar = NULL, 
+                                     intcovar = NULL, perm_Xsp = FALSE, kinship = NULL, 
+                                     model = c("normal", "binary"), chr_lengths = NULL, 
+                                     perm_strata = NULL) {
+  scan1perm(genome_prob, cross$pheno, kinship = kinship, Xcovar = Xcovar, intcovar = intcovar, 
+            addcovar = addcovar, n_perm = n_perm, perm_Xsp = perm_Xsp, model = model, 
+            chr_lengths = chr_lengths, cores = NO_OF_CORES)
+}
+
+# Check if permutation strata is needed
+if (!is.null(opt$pstrata) && !is.null(Xcovar)) {
+  perm_strata <- mat2strata(Xcovar)
+} else {
+  perm_strata <- NULL
+}
+
+# Perform permutation test if requested
+permutation_results_file <- file.path(opt$directory, "permutation.csv")
+significance_results_file <- file.path(opt$directory, "significance.csv")
+
+if (NO_OF_PERMUTATION > 0) {
+  cat("Performing permutation test for the cross object with", NO_OF_PERMUTATION, "permutations.\n")
+  perm <- perform_permutation_test(cross, Pr, n_perm = NO_OF_PERMUTATION, perm_strata = perm_strata, 
+                                   method = opt$method)
+  
+  # Function to get LOD significance thresholds
+  get_lod_significance <- function(perm, thresholds = c(0.01, 0.05, 0.63)) {
+    cat("Getting the permutation summary with significance thresholds:", thresholds, "\n")
+    summary(perm, alpha = thresholds)
+  }
+  
+  # Compute LOD significance
+  lod_significance <- get_lod_significance(perm)
+  
+  # Save results
+  write.csv(lod_significance, significance_results_file)
+  write.csv(perm, permutation_results_file)
+}
+
+
+
+# Function to get QTL effects
+get_qtl_effect <- function(chromosome, geno_prob, pheno, covar = NULL, LOCO = NULL) {
+  cat("Finding the QTL effect for chromosome", chromosome, "\n")
+  chr_Pr <- geno_prob[, chromosome]
+  if (!is.null(chr_Pr)) {
+    if (!is.null(LOCO)) {
+      cat("Finding QTL effect for chromosome", chromosome, "with LOCO.\n")
+      kinship <- calc_kinship(chr_Pr, "loco")[[chromosome]]
+      return(scan1coef(chr_Pr, pheno, kinship, addcovar = covar))
+    } else {
+      return(scan1coef(chr_Pr, pheno, addcovar = covar))
+    }
+  }
+  return(NULL)
+}
+
+# Get QTL effects for each chromosome
+# TODO
+
+# Prepare output data
+gmap_file <- file.path(opt$directory, json_data$geno_map_file)
+pmap_file <- file.path(opt$directory, json_data$physical_map_file)
+
+
+
+
+
+# Construct the Map object from cross with columns (Marker, chr, cM, Mb)
+gmap <- cross$gmap  # Genetic map in cM
+pmap <- cross$pmap  # Physical map in Mb
+# Convert lists to data frames
+gmap_df <- data.frame(
+  marker = unlist(lapply(gmap, names)), 
+  chr = rep(names(gmap), sapply(gmap, length)),  # Add chromosome info
+  CM = unlist(gmap), 
+  stringsAsFactors = FALSE
+)
+
+pmap_df <- data.frame(
+  marker = unlist(lapply(pmap, names)), 
+  chr = rep(names(pmap), sapply(pmap, length)),  # Add chromosome info
+  MB = unlist(pmap), 
+  stringsAsFactors = FALSE
+)
+# Merge using full outer join (by marker and chromosome)
+merged_map <- merge(gmap_df, pmap_df, by = c("marker", "chr"), all = TRUE)
+map_file <- file.path(opt$directory, "map.csv")
+write.csv(merged_map, map_file, row.names = FALSE)
+
+output <- list(
+  permutation_file = permutation_results_file,
+  significance_file = significance_results_file,
+  scan_file = scan_file,
+  gmap_file = gmap_file,
+  pmap_file = pmap_file,
+  map_file  = map_file,
+  permutations = NO_OF_PERMUTATION,
+  scan_method = SCAN_METHOD
+)
+
+# Write output to JSON file
+output_json_data <- toJSON(output)
+cat("The output file path generated is", OUTPUT_FILE_PATH, "\n")
+cat("Writing to the output file.\n")
+write(output_json_data, file = OUTPUT_FILE_PATH)
\ No newline at end of file