diff options
Diffstat (limited to 'scripts/rqtl2_wrapper.R')
| -rw-r--r-- | scripts/rqtl2_wrapper.R | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R new file mode 100644 index 0000000..af13efa --- /dev/null +++ b/scripts/rqtl2_wrapper.R @@ -0,0 +1,358 @@ +library(qtl2) +library(rjson) +library(stringi) +library(optparse) + +# Define command-line options +option_list <- list( + make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", + help = "Temporary working directory: should also host the input file."), + make_option(c("-i", "--input_file"), action = "store", default = NULL, type = 'character', + help = "A YAML or JSON file with required data to create the cross file."), + make_option(c("-o", "--output_file"), action = "store", default = NULL, type = 'character', + help = "A file path of where to write the output JSON results."), + make_option(c("-c", "--cores"), type = "integer", default = 1, + help = "Number of cores to use while making computation."), + make_option(c("-p", "--nperm"), type = "integer", default = 0, + help = "Number of permutations."), + make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", + help = "Scan Mapping Method - HK (Haley Knott), LMM (Linear Mixed Model), LOCO (Leave One Chromosome Out)."), + make_option(c("--pstrata"), action = "store_true", default = NULL, + help = "Use permutation strata."), + make_option(c("-t", "--threshold"), type = "integer", default = 1, + help = "Minimum LOD score for a Peak.") +) + +# Parse command-line arguments +opt_parser <- OptionParser(option_list = option_list) +opt <- parse_args(opt_parser) + +# Assign parsed arguments to variables +NO_OF_CORES <- opt$cores +SCAN_METHOD <- opt$method +NO_OF_PERMUTATION <- opt$nperm + +NO_OF_CORES <- 20 + +# Validate input and output file paths +validate_file_paths <- function(opt) { + if (is.null(opt$directory) || !dir.exists(opt$directory)) { + print_help(opt_parser) + stop("The working directory does not exist or is NULL.\n") + } + + INPUT_FILE_PATH <- opt$input_file + OUTPUT_FILE_PATH <- opt$output_file + + if (!file.exists(INPUT_FILE_PATH)) { + print_help(opt_parser) + stop("The input file ", INPUT_FILE_PATH, " you provided does not exist.\n") + } else { + cat("Input file exists. Reading the input file...\n") + } + + if (!file.exists(OUTPUT_FILE_PATH)) { + print_help(opt_parser) + stop("The output file ", OUTPUT_FILE_PATH, " you provided does not exist.\n") + } else { + cat("Output file exists...", OUTPUT_FILE_PATH, "\n") + } + + return(list(input = INPUT_FILE_PATH, output = OUTPUT_FILE_PATH)) +} + +file_paths <- validate_file_paths(opt) +INPUT_FILE_PATH <- file_paths$input +OUTPUT_FILE_PATH <- file_paths$output + +# Utility function to generate random file names +genRandomFileName <- function(prefix, string_size = 9, file_ext = ".txt") { + randStr <- paste(prefix, stri_rand_strings(1, string_size, pattern = "[A-Za-z0-9]"), sep = "_") + return(paste(randStr, file_ext, sep = "")) +} + +# Generate control file path +control_file_path <- file.path(opt$directory, genRandomFileName(prefix = "control", file_ext = ".json")) + +cat("Generated the control file path at", control_file_path, "\n") +# Read and parse the input file +cat("Reading and parsing the input file.\n") +json_data <- fromJSON(file = INPUT_FILE_PATH) + +# Set default values for JSON data +set_default_values <- function(json_data) { + if (is.null(json_data$sep)) { + cat("Using ',' as a default separator for cross file.\n") + json_data$sep <- "," + } + if (is.null(json_data$na.strings)) { + cat("Using '-' and 'NA' as the default na.strings.\n") + json_data$na.strings <- c("-", "NA") + } + + default_keys <- c("geno_transposed", "founder_geno_transposed", "pheno_transposed", + "covar_transposed", "phenocovar_transposed") + + for (item in default_keys) { + if (!(item %in% names(json_data))) { + cat("Using FALSE as default parameter for", item, "\n") + json_data[item] <- FALSE + } + } + + return(json_data) +} + +json_data <- set_default_values(json_data) + +# Function to generate the cross object +generate_cross_object <- function(control_file_path, json_data) { + write_control_file( + control_file_path, + crosstype = json_data$crosstype, + geno_file = json_data$geno_file, + pheno_file = json_data$pheno_file, + gmap_file = json_data$geno_map_file, + pmap_file = json_data$physical_map_file, + phenocovar_file = json_data$phenocovar_file, + geno_codes = json_data$geno_codes, + alleles = json_data$alleles, + na.strings = json_data$na.strings, + sex_file = json_data$sex_file, + founder_geno_file = json_data$founder_geno_file, + covar_file = json_data$covar_file, + sex_covar = json_data$sex_covar, + sex_codes = json_data$sex_codes, + crossinfo_file = json_data$crossinfo_file, + crossinfo_covar = json_data$crossinfo_covar, + crossinfo_codes = json_data$crossinfo_codes, + xchr = json_data$xchr, + overwrite = TRUE, + founder_geno_transposed = json_data$founder_geno_transposed, + geno_transposed = json_data$geno_transposed + ) +} + +# Generate the cross object +cat("Generating the cross object at", control_file_path, "\n") +generate_cross_object(control_file_path, json_data) + +# Read the cross object +cat("Reading the cross object from", control_file_path, "\n") + +cross <- read_cross2(control_file_path, quiet = FALSE) + +# Check the integrity of the cross object +cat("Checking the integrity of the cross object.\n") +if (check_cross2(cross)) { + cat("Cross meets required specifications for a cross.\n") +} else { + cat("Cross does not meet required specifications.\n") +} + +# Print cross summary +cat("A summary about the cross you provided:\n") +summary(cross) + +# Function to compute genetic probabilities +perform_genetic_pr <- function(cross, cores = NO_OF_CORES, step = 1, map = NULL, + map_function = c("haldane", "kosambi", "c-f", "morgan"), + error_prob = 0.002) { + calc_genoprob(cross, map = map, error_prob = error_prob, map_function = map_function, + quiet = FALSE, cores = cores) +} + +# Insert pseudomarkers to the genetic map +cat("Inserting pseudomarkers to the genetic map with step 1 and stepwidth fixed.\n") + +MAP <- insert_pseudomarkers(cross$gmap, step = 1, stepwidth = "fixed", cores = NO_OF_CORES) + +# Calculate genetic probabilities +cat("Calculating the genetic probabilities.\n") +Pr <- perform_genetic_pr(cross) + +# Calculate allele probabilities for 4-way cross +if (cross$crosstype == "4way") { + cat("Calculating allele genetic probability for 4-way cross.\n") + aPr <- genoprob_to_alleleprob(Pr) +} + +# Calculate genotyping error LOD scores +cat("Calculating the genotype error LOD scores.\n") +error_lod <- calc_errorlod(cross, Pr, quiet = FALSE, cores = NO_OF_CORES) +error_lod <- do.call("cbind", error_lod) + +# Get phenotypes and covariates +cat("Getting the phenotypes and covariates.\n") +pheno <- cross$pheno +# covar <- match(cross$covar$sex, c("f", "m")) # make numeric +# TODO rework on this +covar <- NULL +if (!is.null(covar)) { + names(covar) <- rownames(cross$covar) +} + +Xcovar <- get_x_covar(cross) +cat("The covariates are:\n") +print(covar) +cat("The Xcovar are:\n") +print(Xcovar) + +# Function to calculate kinship +get_kinship <- function(probability, method = "LMM") { + if (method == "LMM") { + kinship <- calc_kinship(probability) + } else if (method == "LOCO") { + kinship <- calc_kinship(probability, "loco") + } else { + kinship <- NULL + } + return(kinship) +} + +# Calculate kinship for the genetic probability +cat("Calculating the kinship for the genetic probability.\n") +if (cross$crosstype == "4way") { + kinship <- get_kinship(aPr, opt$method) +} else { + kinship <- get_kinship(Pr, "loco") +} + +# Function to perform genome scan +perform_genome_scan <- function(cross, genome_prob, method, addcovar = NULL, intcovar = NULL, + kinship = NULL, model = c("normal", "binary"), Xcovar = NULL) { + if (method == "LMM") { + cat("Performing scan1 using Linear Mixed Model.\n") + out <- scan1(genome_prob, cross$pheno, kinship = kinship, model = model, cores = NO_OF_CORES) + } else if (method == "LOCO") { + cat("Performing scan1 using Leave One Chromosome Out.\n") + out <- scan1(genome_prob, cross$pheno, kinship = kinship, model = model, cores = NO_OF_CORES) + } else if (method == "HK") { + cat("Performing scan1 using Haley Knott.\n") + out <- scan1(genome_prob, cross$pheno, addcovar = addcovar, intcovar = intcovar, + model = model, Xcovar = Xcovar, cores = NO_OF_CORES) + } + return(out) +} + +# Perform the genome scan for the cross object +if (cross$crosstype == "4way") { + sex <- setNames((cross$covar$Sex == "male") * 1, rownames(cross$covar)) + scan_results <- perform_genome_scan(aPr, cross, kinship = kinship, method = "LOCO", addcovar = sex) +} else { + scan_results <- perform_genome_scan(cross = cross, genome_prob = Pr, kinship = kinship, + method = SCAN_METHOD) +} + +# Save scan results +scan_file <- file.path(opt$directory, "scan_results.csv") +write.csv(scan_results, scan_file) + +# Function to perform permutation tests +perform_permutation_test <- function(cross, genome_prob, n_perm, method = opt$method, + covar = NULL, Xcovar = NULL, addcovar = NULL, + intcovar = NULL, perm_Xsp = FALSE, kinship = NULL, + model = c("normal", "binary"), chr_lengths = NULL, + perm_strata = NULL) { + scan1perm(genome_prob, cross$pheno, kinship = kinship, Xcovar = Xcovar, intcovar = intcovar, + addcovar = addcovar, n_perm = n_perm, perm_Xsp = perm_Xsp, model = model, + chr_lengths = chr_lengths, cores = NO_OF_CORES) +} + +# Check if permutation strata is needed +if (!is.null(opt$pstrata) && !is.null(Xcovar)) { + perm_strata <- mat2strata(Xcovar) +} else { + perm_strata <- NULL +} + +# Perform permutation test if requested +permutation_results_file <- file.path(opt$directory, "permutation.csv") +significance_results_file <- file.path(opt$directory, "significance.csv") + +if (NO_OF_PERMUTATION > 0) { + cat("Performing permutation test for the cross object with", NO_OF_PERMUTATION, "permutations.\n") + perm <- perform_permutation_test(cross, Pr, n_perm = NO_OF_PERMUTATION, perm_strata = perm_strata, + method = opt$method) + + # Function to get LOD significance thresholds + get_lod_significance <- function(perm, thresholds = c(0.01, 0.05, 0.63)) { + cat("Getting the permutation summary with significance thresholds:", thresholds, "\n") + summary(perm, alpha = thresholds) + } + + # Compute LOD significance + lod_significance <- get_lod_significance(perm) + + # Save results + write.csv(lod_significance, significance_results_file) + write.csv(perm, permutation_results_file) +} + + + +# Function to get QTL effects +get_qtl_effect <- function(chromosome, geno_prob, pheno, covar = NULL, LOCO = NULL) { + cat("Finding the QTL effect for chromosome", chromosome, "\n") + chr_Pr <- geno_prob[, chromosome] + if (!is.null(chr_Pr)) { + if (!is.null(LOCO)) { + cat("Finding QTL effect for chromosome", chromosome, "with LOCO.\n") + kinship <- calc_kinship(chr_Pr, "loco")[[chromosome]] + return(scan1coef(chr_Pr, pheno, kinship, addcovar = covar)) + } else { + return(scan1coef(chr_Pr, pheno, addcovar = covar)) + } + } + return(NULL) +} + +# Get QTL effects for each chromosome +# TODO + +# Prepare output data +gmap_file <- file.path(opt$directory, json_data$geno_map_file) +pmap_file <- file.path(opt$directory, json_data$physical_map_file) + + + + + +# Construct the Map object from cross with columns (Marker, chr, cM, Mb) +gmap <- cross$gmap # Genetic map in cM +pmap <- cross$pmap # Physical map in Mb +# Convert lists to data frames +gmap_df <- data.frame( + marker = unlist(lapply(gmap, names)), + chr = rep(names(gmap), sapply(gmap, length)), # Add chromosome info + CM = unlist(gmap), + stringsAsFactors = FALSE +) + +pmap_df <- data.frame( + marker = unlist(lapply(pmap, names)), + chr = rep(names(pmap), sapply(pmap, length)), # Add chromosome info + MB = unlist(pmap), + stringsAsFactors = FALSE +) +# Merge using full outer join (by marker and chromosome) +merged_map <- merge(gmap_df, pmap_df, by = c("marker", "chr"), all = TRUE) +map_file <- file.path(opt$directory, "map.csv") +write.csv(merged_map, map_file, row.names = FALSE) + +output <- list( + permutation_file = permutation_results_file, + significance_file = significance_results_file, + scan_file = scan_file, + gmap_file = gmap_file, + pmap_file = pmap_file, + map_file = map_file, + permutations = NO_OF_PERMUTATION, + scan_method = SCAN_METHOD +) + +# Write output to JSON file +output_json_data <- toJSON(output) +cat("The output file path generated is", OUTPUT_FILE_PATH, "\n") +cat("Writing to the output file.\n") +write(output_json_data, file = OUTPUT_FILE_PATH) \ No newline at end of file |
