From 0d0e3ada632b1870b0d6f3400bf11122d1b8b0cd Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Thu, 24 Oct 2024 17:09:33 +0300 Subject: feat: create implementation for creating cross object using r-qtl2. --- scripts/rqtl2_wrapper.R | 110 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 scripts/rqtl2_wrapper.R diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R new file mode 100644 index 0000000..30806b8 --- /dev/null +++ b/scripts/rqtl2_wrapper.R @@ -0,0 +1,110 @@ +# This script file contains the an implementation of qtl mapping using r-qtl2 +# For r-qtl1 implementation see: Scripts/rqtl_wrapper.R +# load the library + +library(qtl2) +library(rjson) +library(stringi) +library(stringr) + +options(stringsAsFactors = FALSE); +args = commandArgs(trailingOnly=TRUE) + +# get the json file path with pre metadata required to create the cross + + +if (length(args)==0) { + stop("Argument for the metadata file is Missing ", call.=FALSE) +} else { + + json_file_path = args[1] + # convert this to an absolute file path + +} + +# validation for the json file +if (!(file.exists(json_file_path))) { + stop("The input file path does not exists") +} else { +str_glue("The input path for the metadata >>>>>>> {json_file_path}") +json_data = fromJSON(file = json_file_path) +} + + +# generate random string file path here +genRandomFileName <- function(prefix,file_ext=".txt"){ + + randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") + + return(paste(randStr,file_ext,sep="")) +} + +# this should be read from the json file assigned to variables +# TODO improve on this or use option + + +crosstype <- json_data$crosstype +geno_file <- json_data$geno_file +pheno_file <- json_data$pheno_file +geno_map_file <- json_data$geno_map_file +pheno_covar_file <- json_data$phenocovar_file +alleles <- json_data$alleles + +# geno_codes handle the geno codes here + +# make assertion for the geno_file and the geno_file exists +# make assertion for the physical map file or the geno map file exists + +# create temp directory for this workspace +control_file_path <- file.path("/home/kabui", genRandomFileName(prefix="control_", file_ext=".json")) + +str_glue( + "Generated control file path is {control_file_path}" +) + + + +# create the cross file here from the arguments provided +# todo add more validation checks here + +# issue I can no define the different paths for files for example pheno_file + +dataset <- write_control_file(control_file_path, + crosstype= crosstype, + geno_file= geno_file, + pheno_file= pheno_file, + gmap_file= geno_map_file, + phenocovar_file= pheno_covar_file, + geno_codes=c(L=1L, C=2L), + alleles= alleles, + na.strings=c("-", "NA"), + overwrite = TRUE) + + +# make validation for the data +dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path + +# check integrity of the cross +cat("Check the integrity of the cross object") +check_cross2(dataset) +if (check_cross2(dataset)){ + print("Dataset meets required specifications for a cross") +} else { + print("Dataset does not meet required specifications") +} + +# Dataset Summarys +cat("A Summary about the Dataset You Provided\n") +summary(dataset) +n_ind(dataset) +n_chr(dataset) +cat("names of markers in the object\n") +marker_names(dataset) +cat("names of phenotypes in a the object") +pheno_names(dataset) +cat("IDs for all individuals in the dataset cross object that have genotype data\n") +ind_ids_geno(dataset) +cat(" IDs for all individuals in the dataset object that have phenotype data") +ind_ids_pheno(dataset) +cat("Name of the founder Strains/n") +founders(dataset) -- cgit 1.4.1 From 0bfe8bae9f699ec8ce7586d5c780915daf7e0e9e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 12:28:24 +0300 Subject: feat: add code to perform genetic probabilities. --- scripts/rqtl2_wrapper.R | 74 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 6 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 30806b8..9d41c6a 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -18,11 +18,11 @@ if (length(args)==0) { } else { json_file_path = args[1] - # convert this to an absolute file path - } -# validation for the json file +# validation for the json file + + if (!(file.exists(json_file_path))) { stop("The input file path does not exists") } else { @@ -31,11 +31,10 @@ json_data = fromJSON(file = json_file_path) } + # generate random string file path here genRandomFileName <- function(prefix,file_ext=".txt"){ - randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") - return(paste(randStr,file_ext,sep="")) } @@ -43,12 +42,25 @@ genRandomFileName <- function(prefix,file_ext=".txt"){ # TODO improve on this or use option + +# should put constraints for items data required for this crosstype <- json_data$crosstype geno_file <- json_data$geno_file pheno_file <- json_data$pheno_file geno_map_file <- json_data$geno_map_file pheno_covar_file <- json_data$phenocovar_file alleles <- json_data$alleles +founder_geno_file = json_data$founder_geno_file +gmap_file = json_data$gmap_file + + +# work on the optional parameters + +# better fit for reading the data +# make validations + +# parsing the required data for example the geno_codes + # geno_codes handle the geno codes here @@ -69,6 +81,24 @@ str_glue( # issue I can no define the different paths for files for example pheno_file +# think about the issue about geno codes ~~~~ +# function to generate a cross file from a json list + +generate_cross_object <- function(json_data){ +return (write_control_file(control_file_path, + crosstype= json_data$crosstype, + geno_file= json_data$geno_file, + pheno_file= json_data$pheno_file, + gmap_file= json_data$geno_map_file, + phenocovar_file= json_data$phenocovar_file, + geno_codes= json_data$geno_codes, + alleles= json_data$alleles, + na.strings=json_data$na.strings, + overwrite = TRUE)) +} + + +# alternatively pass a yaml file with dataset <- write_control_file(control_file_path, crosstype= crosstype, geno_file= geno_file, @@ -80,7 +110,6 @@ dataset <- write_control_file(control_file_path, na.strings=c("-", "NA"), overwrite = TRUE) - # make validation for the data dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path @@ -108,3 +137,36 @@ cat(" IDs for all individuals in the dataset object that have phenotype data") ind_ids_pheno(dataset) cat("Name of the founder Strains/n") founders(dataset) + +# Work on computing the genetic probabilities + + +analysis_type <- "single" + + +perform_genetic_pr <- function(cross, cores= 1, error_prob=0.002, analysis_type="single"){ +# improve on this +if (analysis_type == "single"){ + pr <- calc_genoprob(cross, error_prob= error_prob, quiet=FALSE, cores=cores) + return (pr) +} +} + +# get the genetic probability + +Pr = perform_genetic_pr(dataset) +cat("Summaries on the genetic probabilites \n") +print(Pr) +summary(Pr) + + +#calculate genotyping error LOD scores, to help identify potential genotyping errors (and problem markers and/or individuals +error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4) +print(error_lod) + + +# + + + + -- cgit 1.4.1 From be3dcbae4c48083fac5e51851579dc171932c66c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 13:03:08 +0300 Subject: feat: Add method to perform 1 pair scan. --- scripts/rqtl2_wrapper.R | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 9d41c6a..a2fbf1e 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -165,8 +165,47 @@ error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4) print(error_lod) -# +# Perform genome scane +# rework on this issue +## grab phenotypes and covariates; ensure that covariates have names attribute +pheno <- dataset$pheno +covar <- match(dataset$covar$sex, c("f", "m")) # make numeric +names(covar) <- rownames(dataset$covar) +Xcovar <- get_x_covar(dataset) +print(pheno) +print(covar) +print(Xcovar) +# rework on fetching th Xcovar and getting the covar data + +# perform kinship + + +perform_genome_scan <- function(cross, genome_prob, method, covar =NULL, xCovar=NULL) +# perform scan1 using haley-knott regression or linear model, or LOCO linear model +{ +if (method == "LMM") { + # provide parameters for this + kinship = kinship(genome_prob) + out <- scan1(genome_prob, cross$pheno, kinship=kinship, addcovar=covar, Xcovar=Xcovar) +} + +if (method == "LOCO") { +# perform kinship inside better option + kinship = kinship(genome_prob, "loco") + out <- scan1(genome_prob, cross$pheno, kinship=kinship,addcovar=covar, Xcovar=Xcovar) +} +else { +# perform using Haley Knott +out <- scan1(genome_prob, cross$pheno, addcovar=NULL, Xcovar=Xcovar) +} +return (out) +} + +results <- perform_genome_scan(cross=dataset, genome_prob=Pr, method = "HMM") + + +results -- cgit 1.4.1 From 2c4807422a54b64cfc075c1976b0b9087ab1d7ed Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 14:37:52 +0300 Subject: feat: add method to generate LOD curves for the genome scan. --- scripts/rqtl2_wrapper.R | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index a2fbf1e..52877ae 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -207,5 +207,24 @@ return (out) results <- perform_genome_scan(cross=dataset, genome_prob=Pr, method = "HMM") -results +results # this should probably return the method use here + +# plot for the LOD scores from performing the genome scan + +generate_lod_plot <- function(cross, scan_result, method, base_dir="."){ +# Plot LOD curves for a genome scan +color <- c("slateblue", "violetred", "green3") +par(mar=c(4.1, 4.1, 1.6, 1.1)) +ymx <- maxlod(scan_result) +file_name = genRandomFileName(prefix="RQTL_LOD_SCORE_",file_ext=".png") +image_loc = file.path(base_dir ,file_name) +png(image_loc, width=1000, height=600, type='cairo-png') +plot(scan_result, cross$gmap, lodcolumn=1, col=color[1], main=colnames(cross$pheno)[1], + ylim=c(0, ymx*1.02)) +legend("topleft", lwd=2, col=color[1], method, bg="gray90", lty=c(1,1,2)) +dev.off() +return (image_loc) +} +lod_file_path <- generate_lod_plot(dataset, results, "HK") +lod_file_path \ No newline at end of file -- cgit 1.4.1 From 7c18d1e49db9ac565f9a46be7233e4b8e82c498d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 16:33:14 +0300 Subject: feat: add code to perform permutation for single Qtl. --- scripts/rqtl2_wrapper.R | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 52877ae..1fe754e 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -188,13 +188,13 @@ perform_genome_scan <- function(cross, genome_prob, method, covar =NULL, xCovar= { if (method == "LMM") { # provide parameters for this - kinship = kinship(genome_prob) + kinship = calc_kinship(genome_prob) out <- scan1(genome_prob, cross$pheno, kinship=kinship, addcovar=covar, Xcovar=Xcovar) } if (method == "LOCO") { # perform kinship inside better option - kinship = kinship(genome_prob, "loco") + kinship = calc_kinship(genome_prob, "loco") out <- scan1(genome_prob, cross$pheno, kinship=kinship,addcovar=covar, Xcovar=Xcovar) } else { @@ -227,4 +227,38 @@ return (image_loc) } lod_file_path <- generate_lod_plot(dataset, results, "HK") -lod_file_path \ No newline at end of file +lod_file_path + +# work on 2 pair scan multiple pair scan # multiple pair scan + +# Q how do we perform geno scan with Genome scan with a single-QTL model ???? + +# perform permutation tests for single-QTL method + + +perform_permutation_test <- function(cross, genome_prob, n_perm, method="HKK", covar=NULL, Xcovar=NULL, perm_strata=NULL){ +# todo add chr_lengths and perm_Xsp + +if (method == "HKK") { + perm <- scan1perm(genome_prob, cross$pheno, Xcovar=Xcovar, n_perm= n_perm, perm_strata=perm_strata) +} +else if (method == "LMM") { + kinship = calc_kinship(genome_prob) + perm <- scan1perm(genome_prob, cross$pheno, + kinship=kinship, Xcovar=Xcovar, + n_perm=n_perm) +} +else if (method == "LOCO") { + kinship = calc_kinship(genome_prob, "loco") +operm3 <- scan1perm(genome_prob, cross$pheno, + kinship=kinship , perm_strata=perm_strata, + Xcovar= Xcovar, n_perm=n_perm) +} +return (perm) +} + +# TODO ! get these parameters from argument from the user +perm <- perform_permutation_test(dataset,Pr, n_perm=2, method="LMM") +# get the permutation summary with a significance threshold +summary(perm, alpha=c(0.2, 0.05)) + -- cgit 1.4.1 From 436163a37a2c2b0843b122508d1caee47049e7b9 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 16:38:09 +0300 Subject: Refactor: apply code formatting. --- scripts/rqtl2_wrapper.R | 279 ++++++++++++++++++++++++++++++------------------ 1 file changed, 174 insertions(+), 105 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 1fe754e..c6d1911 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -7,16 +7,16 @@ library(rjson) library(stringi) library(stringr) -options(stringsAsFactors = FALSE); -args = commandArgs(trailingOnly=TRUE) +options(stringsAsFactors = FALSE) + +args = commandArgs(trailingOnly = TRUE) # get the json file path with pre metadata required to create the cross -if (length(args)==0) { - stop("Argument for the metadata file is Missing ", call.=FALSE) +if (length(args) == 0) { + stop("Argument for the metadata file is Missing ", call. = FALSE) } else { - json_file_path = args[1] } @@ -24,18 +24,19 @@ if (length(args)==0) { if (!(file.exists(json_file_path))) { - stop("The input file path does not exists") + stop("The input file path does not exists") } else { -str_glue("The input path for the metadata >>>>>>> {json_file_path}") -json_data = fromJSON(file = json_file_path) + str_glue("The input path for the metadata >>>>>>> {json_file_path}") + json_data = fromJSON(file = json_file_path) } # generate random string file path here -genRandomFileName <- function(prefix,file_ext=".txt"){ - randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") - return(paste(randStr,file_ext,sep="")) +genRandomFileName <- function(prefix, file_ext = ".txt") { + randStr = paste(prefix, stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"), sep = + "_") + return(paste(randStr, file_ext, sep = "")) } # this should be read from the json file assigned to variables @@ -43,11 +44,11 @@ genRandomFileName <- function(prefix,file_ext=".txt"){ -# should put constraints for items data required for this +# should put constraints for items data required for this crosstype <- json_data$crosstype geno_file <- json_data$geno_file pheno_file <- json_data$pheno_file -geno_map_file <- json_data$geno_map_file +geno_map_file <- json_data$geno_map_file pheno_covar_file <- json_data$phenocovar_file alleles <- json_data$alleles founder_geno_file = json_data$founder_geno_file @@ -62,53 +63,58 @@ gmap_file = json_data$gmap_file # parsing the required data for example the geno_codes -# geno_codes handle the geno codes here +# geno_codes handle the geno codes here # make assertion for the geno_file and the geno_file exists # make assertion for the physical map file or the geno map file exists -# create temp directory for this workspace -control_file_path <- file.path("/home/kabui", genRandomFileName(prefix="control_", file_ext=".json")) +# create temp directory for this workspace +control_file_path <- file.path("/home/kabui", + genRandomFileName(prefix = "control_", file_ext = ".json")) -str_glue( - "Generated control file path is {control_file_path}" -) +str_glue("Generated control file path is {control_file_path}") # create the cross file here from the arguments provided -# todo add more validation checks here +# todo add more validation checks here -# issue I can no define the different paths for files for example pheno_file +# issue I can no define the different paths for files for example pheno_file # think about the issue about geno codes ~~~~ # function to generate a cross file from a json list -generate_cross_object <- function(json_data){ -return (write_control_file(control_file_path, - crosstype= json_data$crosstype, - geno_file= json_data$geno_file, - pheno_file= json_data$pheno_file, - gmap_file= json_data$geno_map_file, - phenocovar_file= json_data$phenocovar_file, - geno_codes= json_data$geno_codes, - alleles= json_data$alleles, - na.strings=json_data$na.strings, - overwrite = TRUE)) +generate_cross_object <- function(json_data) { + return ( + write_control_file( + control_file_path, + crosstype = json_data$crosstype, + geno_file = json_data$geno_file, + pheno_file = json_data$pheno_file, + gmap_file = json_data$geno_map_file, + phenocovar_file = json_data$phenocovar_file, + geno_codes = json_data$geno_codes, + alleles = json_data$alleles, + na.strings = json_data$na.strings, + overwrite = TRUE + ) + ) } # alternatively pass a yaml file with -dataset <- write_control_file(control_file_path, - crosstype= crosstype, - geno_file= geno_file, - pheno_file= pheno_file, - gmap_file= geno_map_file, - phenocovar_file= pheno_covar_file, - geno_codes=c(L=1L, C=2L), - alleles= alleles, - na.strings=c("-", "NA"), - overwrite = TRUE) +dataset <- write_control_file( + control_file_path, + crosstype = crosstype, + geno_file = geno_file, + pheno_file = pheno_file, + gmap_file = geno_map_file, + phenocovar_file = pheno_covar_file, + geno_codes = c(L = 1L, C = 2L), + alleles = alleles, + na.strings = c("-", "NA"), + overwrite = TRUE +) # make validation for the data dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path @@ -116,13 +122,13 @@ dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a # check integrity of the cross cat("Check the integrity of the cross object") check_cross2(dataset) -if (check_cross2(dataset)){ +if (check_cross2(dataset)) { print("Dataset meets required specifications for a cross") } else { print("Dataset does not meet required specifications") } -# Dataset Summarys +# Dataset Summarys cat("A Summary about the Dataset You Provided\n") summary(dataset) n_ind(dataset) @@ -144,12 +150,18 @@ founders(dataset) analysis_type <- "single" -perform_genetic_pr <- function(cross, cores= 1, error_prob=0.002, analysis_type="single"){ -# improve on this -if (analysis_type == "single"){ - pr <- calc_genoprob(cross, error_prob= error_prob, quiet=FALSE, cores=cores) +perform_genetic_pr <- function(cross, + cores = 1, + error_prob = 0.002, + analysis_type = "single") { + # improve on this + if (analysis_type == "single") { + pr <- calc_genoprob(cross, + error_prob = error_prob, + quiet = FALSE, + cores = cores) return (pr) -} + } } # get the genetic probability @@ -178,52 +190,89 @@ print(pheno) print(covar) print(Xcovar) -# rework on fetching th Xcovar and getting the covar data +# rework on fetching th Xcovar and getting the covar data # perform kinship -perform_genome_scan <- function(cross, genome_prob, method, covar =NULL, xCovar=NULL) -# perform scan1 using haley-knott regression or linear model, or LOCO linear model +perform_genome_scan <- function(cross, + genome_prob, + method, + covar = NULL, + xCovar = NULL) + # perform scan1 using haley-knott regression or linear model, or LOCO linear model { -if (method == "LMM") { - # provide parameters for this - kinship = calc_kinship(genome_prob) - out <- scan1(genome_prob, cross$pheno, kinship=kinship, addcovar=covar, Xcovar=Xcovar) + if (method == "LMM") { + # provide parameters for this + kinship = calc_kinship(genome_prob) + out <- scan1( + genome_prob, + cross$pheno, + kinship = kinship, + addcovar = covar, + Xcovar = Xcovar + ) + } + + if (method == "LOCO") { + # perform kinship inside better option + kinship = calc_kinship(genome_prob, "loco") + out <- scan1( + genome_prob, + cross$pheno, + kinship = kinship, + addcovar = covar, + Xcovar = Xcovar + ) + } + else { + # perform using Haley Knott + out <- scan1(genome_prob, + cross$pheno, + addcovar = NULL, + Xcovar = Xcovar) + } + return (out) } -if (method == "LOCO") { -# perform kinship inside better option - kinship = calc_kinship(genome_prob, "loco") - out <- scan1(genome_prob, cross$pheno, kinship=kinship,addcovar=covar, Xcovar=Xcovar) -} -else { -# perform using Haley Knott -out <- scan1(genome_prob, cross$pheno, addcovar=NULL, Xcovar=Xcovar) -} -return (out) -} - -results <- perform_genome_scan(cross=dataset, genome_prob=Pr, method = "HMM") +results <- perform_genome_scan(cross = dataset, + genome_prob = Pr, + method = "HMM") -results # this should probably return the method use here +results # this should probably return the method use here # plot for the LOD scores from performing the genome scan -generate_lod_plot <- function(cross, scan_result, method, base_dir="."){ -# Plot LOD curves for a genome scan -color <- c("slateblue", "violetred", "green3") -par(mar=c(4.1, 4.1, 1.6, 1.1)) -ymx <- maxlod(scan_result) -file_name = genRandomFileName(prefix="RQTL_LOD_SCORE_",file_ext=".png") -image_loc = file.path(base_dir ,file_name) -png(image_loc, width=1000, height=600, type='cairo-png') -plot(scan_result, cross$gmap, lodcolumn=1, col=color[1], main=colnames(cross$pheno)[1], - ylim=c(0, ymx*1.02)) -legend("topleft", lwd=2, col=color[1], method, bg="gray90", lty=c(1,1,2)) -dev.off() -return (image_loc) +generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { + # Plot LOD curves for a genome scan + color <- c("slateblue", "violetred", "green3") + par(mar = c(4.1, 4.1, 1.6, 1.1)) + ymx <- maxlod(scan_result) + file_name = genRandomFileName(prefix = "RQTL_LOD_SCORE_", file_ext = ".png") + image_loc = file.path(base_dir , file_name) + png(image_loc, + width = 1000, + height = 600, + type = 'cairo-png') + plot( + scan_result, + cross$gmap, + lodcolumn = 1, + col = color[1], + main = colnames(cross$pheno)[1], + ylim = c(0, ymx * 1.02) + ) + legend( + "topleft", + lwd = 2, + col = color[1], + method, + bg = "gray90", + lty = c(1, 1, 2) + ) + dev.off() + return (image_loc) } lod_file_path <- generate_lod_plot(dataset, results, "HK") @@ -234,31 +283,51 @@ lod_file_path # Q how do we perform geno scan with Genome scan with a single-QTL model ???? # perform permutation tests for single-QTL method - -perform_permutation_test <- function(cross, genome_prob, n_perm, method="HKK", covar=NULL, Xcovar=NULL, perm_strata=NULL){ -# todo add chr_lengths and perm_Xsp -if (method == "HKK") { - perm <- scan1perm(genome_prob, cross$pheno, Xcovar=Xcovar, n_perm= n_perm, perm_strata=perm_strata) -} -else if (method == "LMM") { - kinship = calc_kinship(genome_prob) - perm <- scan1perm(genome_prob, cross$pheno, - kinship=kinship, Xcovar=Xcovar, - n_perm=n_perm) -} -else if (method == "LOCO") { - kinship = calc_kinship(genome_prob, "loco") -operm3 <- scan1perm(genome_prob, cross$pheno, - kinship=kinship , perm_strata=perm_strata, - Xcovar= Xcovar, n_perm=n_perm) -} -return (perm) +perform_permutation_test <- function(cross, + genome_prob, + n_perm, + method = "HKK", + covar = NULL, + Xcovar = NULL, + perm_strata = NULL) { + # todo add chr_lengths and perm_Xsp + + if (method == "HKK") { + perm <- scan1perm( + genome_prob, + cross$pheno, + Xcovar = Xcovar, + n_perm = n_perm, + perm_strata = perm_strata + ) + } + else if (method == "LMM") { + kinship = calc_kinship(genome_prob) + perm <- scan1perm( + genome_prob, + cross$pheno, + kinship = kinship, + Xcovar = Xcovar, + n_perm = n_perm + ) + } + else if (method == "LOCO") { + kinship = calc_kinship(genome_prob, "loco") + operm3 <- scan1perm( + genome_prob, + cross$pheno, + kinship = kinship , + perm_strata = perm_strata, + Xcovar = Xcovar, + n_perm = n_perm + ) + } + return (perm) } # TODO ! get these parameters from argument from the user -perm <- perform_permutation_test(dataset,Pr, n_perm=2, method="LMM") +perm <- perform_permutation_test(dataset, Pr, n_perm = 2, method = "LMM") # get the permutation summary with a significance threshold -summary(perm, alpha=c(0.2, 0.05)) - +summary(perm, alpha = c(0.2, 0.05)) \ No newline at end of file -- cgit 1.4.1 From faeee466b37a1df2792b9596c669496fce38d3df Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 16:40:20 +0300 Subject: Minor fix. --- scripts/rqtl2_wrapper.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index c6d1911..babf19e 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -330,4 +330,4 @@ perform_permutation_test <- function(cross, # TODO ! get these parameters from argument from the user perm <- perform_permutation_test(dataset, Pr, n_perm = 2, method = "LMM") # get the permutation summary with a significance threshold -summary(perm, alpha = c(0.2, 0.05)) \ No newline at end of file +summary(perm, alpha = c(0.2, 0.05)) -- cgit 1.4.1 From fe1e3b600511ceedbd6778618d2c0d7e9ea2ffb6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 25 Oct 2024 16:51:55 +0300 Subject: feat: add function to fetch load peaks for 1 QTL. --- scripts/rqtl2_wrapper.R | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index babf19e..cbf2b93 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -315,7 +315,7 @@ perform_permutation_test <- function(cross, } else if (method == "LOCO") { kinship = calc_kinship(genome_prob, "loco") - operm3 <- scan1perm( + perm <- scan1perm( genome_prob, cross$pheno, kinship = kinship , @@ -331,3 +331,22 @@ perform_permutation_test <- function(cross, perm <- perform_permutation_test(dataset, Pr, n_perm = 2, method = "LMM") # get the permutation summary with a significance threshold summary(perm, alpha = c(0.2, 0.05)) + +# find function to perform the LOD peaks + +find_lod_peaks <-function(scan_results, cross, threshold=4, drop=1.5){ +# can this take pmap??? which map should we use??? +# TODO add more ags +print("Finding the lod peaks with thresholds n and drop n\n") +return (find_peaks(scan_results, cross$gmap, threshold= threshold, drop=drop)) +} + +# add the number of cores +lod_peaks <- find_lod_peaks(results, dataset) +print(load_peaks) + +# how can we perform qtl effect computations ??? with input from user + +# what data should we return to the user + +# improve on this script \ No newline at end of file -- cgit 1.4.1 From fa5f846dc000ed9327e791e5a6e9a2f4d731dea5 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 28 Oct 2024 11:47:24 +0300 Subject: refactor: Minor fixes for file, remove comments --- scripts/rqtl2_wrapper.R | 54 ++++++++++++------------------------------------- 1 file changed, 13 insertions(+), 41 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index cbf2b93..78db11b 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -20,7 +20,7 @@ if (length(args) == 0) { json_file_path = args[1] } -# validation for the json file +# TODO validation for the json file if (!(file.exists(json_file_path))) { @@ -39,23 +39,9 @@ genRandomFileName <- function(prefix, file_ext = ".txt") { return(paste(randStr, file_ext, sep = "")) } -# this should be read from the json file assigned to variables -# TODO improve on this or use option - -# should put constraints for items data required for this -crosstype <- json_data$crosstype -geno_file <- json_data$geno_file -pheno_file <- json_data$pheno_file -geno_map_file <- json_data$geno_map_file -pheno_covar_file <- json_data$phenocovar_file -alleles <- json_data$alleles -founder_geno_file = json_data$founder_geno_file -gmap_file = json_data$gmap_file - - -# work on the optional parameters +# TODO work on the optional parameters # better fit for reading the data # make validations @@ -103,18 +89,9 @@ generate_cross_object <- function(json_data) { # alternatively pass a yaml file with -dataset <- write_control_file( - control_file_path, - crosstype = crosstype, - geno_file = geno_file, - pheno_file = pheno_file, - gmap_file = geno_map_file, - phenocovar_file = pheno_covar_file, - geno_codes = c(L = 1L, C = 2L), - alleles = alleles, - na.strings = c("-", "NA"), - overwrite = TRUE -) + + +generate_cross_object(json_data) # make validation for the data dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path @@ -177,7 +154,7 @@ error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4) print(error_lod) -# Perform genome scane +# Perform genome scan # rework on this issue ## grab phenotypes and covariates; ensure that covariates have names attribute @@ -190,9 +167,8 @@ print(pheno) print(covar) print(Xcovar) -# rework on fetching th Xcovar and getting the covar data +# TODO: rework on fetching th Xcovar and getting the covar data -# perform kinship perform_genome_scan <- function(cross, @@ -292,7 +268,8 @@ perform_permutation_test <- function(cross, covar = NULL, Xcovar = NULL, perm_strata = NULL) { - # todo add chr_lengths and perm_Xsp + + # TODO! add chr_lengths and perm_Xsp if (method == "HKK") { perm <- scan1perm( @@ -332,7 +309,7 @@ perm <- perform_permutation_test(dataset, Pr, n_perm = 2, method = "LMM") # get the permutation summary with a significance threshold summary(perm, alpha = c(0.2, 0.05)) -# find function to perform the LOD peaks +# function to perform the LOD peaks find_lod_peaks <-function(scan_results, cross, threshold=4, drop=1.5){ # can this take pmap??? which map should we use??? @@ -340,13 +317,8 @@ find_lod_peaks <-function(scan_results, cross, threshold=4, drop=1.5){ print("Finding the lod peaks with thresholds n and drop n\n") return (find_peaks(scan_results, cross$gmap, threshold= threshold, drop=drop)) } - -# add the number of cores +# TODO! add the number of cores lod_peaks <- find_lod_peaks(results, dataset) -print(load_peaks) - -# how can we perform qtl effect computations ??? with input from user - -# what data should we return to the user +print(lod_peaks) -# improve on this script \ No newline at end of file +# TODO! format to return the data ??? \ No newline at end of file -- cgit 1.4.1 From 472c98460ef57f9f67f653b25df4ce8b2e29b3ea Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 13:32:49 +0300 Subject: feat: add defaults for creating cross objects. --- scripts/rqtl2_wrapper.R | 87 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 78db11b..db4797a 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -70,7 +70,78 @@ str_glue("Generated control file path is {control_file_path}") # think about the issue about geno codes ~~~~ # function to generate a cross file from a json list + + +# work on the default from the json_data validation here +# better defaults for the json file + + +json_data + +json_data$na_strings + + +if (is.null(json_data$sep)){ +cat("Using ',' as a default sep for cross file\n") +json_data$sep = "," +} + +if (is.null(json_data$na.strings)){ +cat("Using '-' and 'NA' as the default na.strings\n") + json_data$na.strings = c("-" , "NA") +} + +if (is.null(json_data$geno_transposed)) { +cat("Using FALSE as default parameter for geno_transposed\n") +json_data$geno_transposed = FALSE +} + +if (is.null(json_data$founder_geno_transposed)){ +cat("Using FALSE as default parameter for founder_geno_transposed\n") +json_data$founder_geno_transposed = FALSE +} + + +if (is.null(json_data$pheno_transposed)) { +cat("Using FALSE as default parameter for pheno_transposed\n") +json_data$geno_transposed = FALSE +} + + + +if( is.null(json_data$covar_transposed) ){ +cat("Using FALSE as default parameter for covar_transposed\n") +json_data$covar_transposed = FALSE +} + + +if (is.null(json_data$phenocovar_transposed)){ +cat("Using FALSE as default parameter for phenocovar_transposed\n") +json_data$phenocovar_transposed = FALSE +} + + + +# probably check on using a dict for check this parameter +# more like default object parameter + + +cross_default1 <- c( + "sep" = ",", + "na_strings" = c("-", "NA"), + "geno_transposed" = FALSE, + "covar_transposed" = FALSE, + "pheno_transposed" = FALSE, + "founder_geno_transposed" = FALSE, + "phenocovar_transposed" = FALSE + +) + + + +# mssing something here generate_cross_object <- function(json_data) { + # function to write the cross object from a json data object return ( write_control_file( control_file_path, @@ -78,10 +149,21 @@ generate_cross_object <- function(json_data) { geno_file = json_data$geno_file, pheno_file = json_data$pheno_file, gmap_file = json_data$geno_map_file, + pmap_file = json_data$pheno_map_file, phenocovar_file = json_data$phenocovar_file, geno_codes = json_data$geno_codes, alleles = json_data$alleles, na.strings = json_data$na.strings, + geno_transposed = json_data$geno_transposed, + sex_file = json_data$sex_file, + founder_geno_file = json_data$founder_geno_file, + covar_file = json_data$covar_file, + sex_covar = json_data$sex_covar, + sex_codes = json_data$sex_codes, + crossinfo_file = json_data$crossinfo_file, + crossinfo_covar = json_data$crossinfo_covar, + crossinfo_codes = json_data$crossinfo_codes, + xchr = json_data$xchr, overwrite = TRUE ) ) @@ -90,13 +172,12 @@ generate_cross_object <- function(json_data) { # alternatively pass a yaml file with - generate_cross_object(json_data) -# make validation for the data dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path - # check integrity of the cross + + cat("Check the integrity of the cross object") check_cross2(dataset) if (check_cross2(dataset)) { -- cgit 1.4.1 From a9d6fe11b969f1e715656aeb71fd00602e71b940 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 14:47:09 +0300 Subject: Refactor: use FALSE for better defaults. --- scripts/rqtl2_wrapper.R | 87 ++++++------------------------------------------- 1 file changed, 10 insertions(+), 77 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index db4797a..bd5b302 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -30,8 +30,6 @@ if (!(file.exists(json_file_path))) { json_data = fromJSON(file = json_file_path) } - - # generate random string file path here genRandomFileName <- function(prefix, file_ext = ".txt") { randStr = paste(prefix, stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"), sep = @@ -39,18 +37,8 @@ genRandomFileName <- function(prefix, file_ext = ".txt") { return(paste(randStr, file_ext, sep = "")) } - - # TODO work on the optional parameters -# better fit for reading the data -# make validations - -# parsing the required data for example the geno_codes - - -# geno_codes handle the geno codes here - # make assertion for the geno_file and the geno_file exists # make assertion for the physical map file or the geno map file exists @@ -59,87 +47,32 @@ control_file_path <- file.path("/home/kabui", genRandomFileName(prefix = "control_", file_ext = ".json")) str_glue("Generated control file path is {control_file_path}") - - - -# create the cross file here from the arguments provided -# todo add more validation checks here - -# issue I can no define the different paths for files for example pheno_file - -# think about the issue about geno codes ~~~~ -# function to generate a cross file from a json list - - - -# work on the default from the json_data validation here # better defaults for the json file - - -json_data - -json_data$na_strings - - if (is.null(json_data$sep)){ cat("Using ',' as a default sep for cross file\n") json_data$sep = "," } - if (is.null(json_data$na.strings)){ cat("Using '-' and 'NA' as the default na.strings\n") json_data$na.strings = c("-" , "NA") } -if (is.null(json_data$geno_transposed)) { -cat("Using FALSE as default parameter for geno_transposed\n") -json_data$geno_transposed = FALSE -} - -if (is.null(json_data$founder_geno_transposed)){ -cat("Using FALSE as default parameter for founder_geno_transposed\n") -json_data$founder_geno_transposed = FALSE -} - - -if (is.null(json_data$pheno_transposed)) { -cat("Using FALSE as default parameter for pheno_transposed\n") -json_data$geno_transposed = FALSE -} - +# use this better defaults +default_keys = c( + "geno_transposed", "founder_geno_transposed", + "pheno_transposed" , "covar_transposed", + "phenocovar_transposed") -if( is.null(json_data$covar_transposed) ){ -cat("Using FALSE as default parameter for covar_transposed\n") -json_data$covar_transposed = FALSE +for (item in default_keys) { +if (!(item %in% names(json_data))){ + cat("Using FALSE as default parameter for ", item) + cat("\n") + json_data[item] = FALSE } - - -if (is.null(json_data$phenocovar_transposed)){ -cat("Using FALSE as default parameter for phenocovar_transposed\n") -json_data$phenocovar_transposed = FALSE } - -# probably check on using a dict for check this parameter -# more like default object parameter - - -cross_default1 <- c( - "sep" = ",", - "na_strings" = c("-", "NA"), - "geno_transposed" = FALSE, - "covar_transposed" = FALSE, - "pheno_transposed" = FALSE, - "founder_geno_transposed" = FALSE, - "phenocovar_transposed" = FALSE - -) - - - -# mssing something here generate_cross_object <- function(json_data) { # function to write the cross object from a json data object return ( -- cgit 1.4.1 From df068544c17bcb976c24baed0461b3848168e9fd Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 14:54:03 +0300 Subject: Refactor: cleanup comments. --- scripts/rqtl2_wrapper.R | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index bd5b302..1a56207 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -13,7 +13,6 @@ args = commandArgs(trailingOnly = TRUE) # get the json file path with pre metadata required to create the cross - if (length(args) == 0) { stop("Argument for the metadata file is Missing ", call. = FALSE) } else { @@ -30,24 +29,23 @@ if (!(file.exists(json_file_path))) { json_data = fromJSON(file = json_file_path) } -# generate random string file path here +# generate random string file paths + genRandomFileName <- function(prefix, file_ext = ".txt") { randStr = paste(prefix, stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"), sep = "_") return(paste(randStr, file_ext, sep = "")) } -# TODO work on the optional parameters +# TODO work on the optional parameters e.g cores, type of computation -# make assertion for the geno_file and the geno_file exists -# make assertion for the physical map file or the geno map file exists +# TODO create temp directory for this workspace pass this as argument -# create temp directory for this workspace control_file_path <- file.path("/home/kabui", genRandomFileName(prefix = "control_", file_ext = ".json")) str_glue("Generated control file path is {control_file_path}") -# better defaults for the json file + if (is.null(json_data$sep)){ cat("Using ',' as a default sep for cross file\n") json_data$sep = "," @@ -57,7 +55,6 @@ cat("Using '-' and 'NA' as the default na.strings\n") json_data$na.strings = c("-" , "NA") } - # use this better defaults default_keys = c( "geno_transposed", "founder_geno_transposed", @@ -72,7 +69,6 @@ if (!(item %in% names(json_data))){ } } - generate_cross_object <- function(json_data) { # function to write the cross object from a json data object return ( @@ -102,14 +98,14 @@ generate_cross_object <- function(json_data) { ) } - -# alternatively pass a yaml file with +# generate the cross file generate_cross_object(json_data) +# read from the cross file path dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path -# check integrity of the cross +# check integrity of the cross cat("Check the integrity of the cross object") check_cross2(dataset) @@ -137,10 +133,7 @@ founders(dataset) # Work on computing the genetic probabilities - analysis_type <- "single" - - perform_genetic_pr <- function(cross, cores = 1, error_prob = 0.002, @@ -162,29 +155,24 @@ cat("Summaries on the genetic probabilites \n") print(Pr) summary(Pr) +#calculate genotyping error LOD scores -#calculate genotyping error LOD scores, to help identify potential genotyping errors (and problem markers and/or individuals error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4) print(error_lod) # Perform genome scan - -# rework on this issue +# TODO! rework on this issue ## grab phenotypes and covariates; ensure that covariates have names attribute pheno <- dataset$pheno covar <- match(dataset$covar$sex, c("f", "m")) # make numeric names(covar) <- rownames(dataset$covar) Xcovar <- get_x_covar(dataset) - print(pheno) print(covar) print(Xcovar) # TODO: rework on fetching th Xcovar and getting the covar data - - - perform_genome_scan <- function(cross, genome_prob, method, @@ -233,7 +221,6 @@ results <- perform_genome_scan(cross = dataset, results # this should probably return the method use here # plot for the LOD scores from performing the genome scan - generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { # Plot LOD curves for a genome scan color <- c("slateblue", "violetred", "green3") @@ -268,13 +255,10 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { lod_file_path <- generate_lod_plot(dataset, results, "HK") lod_file_path -# work on 2 pair scan multiple pair scan # multiple pair scan -# Q how do we perform geno scan with Genome scan with a single-QTL model ???? +# Note pair scan does not exists in rqtl2 # perform permutation tests for single-QTL method - - perform_permutation_test <- function(cross, genome_prob, n_perm, @@ -284,7 +268,8 @@ perform_permutation_test <- function(cross, perm_strata = NULL) { # TODO! add chr_lengths and perm_Xsp - + + cat("performing permutation tes for the cross object\n") if (method == "HKK") { perm <- scan1perm( genome_prob, -- cgit 1.4.1 From da4ae79ddce7c36b48d4bb4ba314503ef9743cd7 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 15:30:21 +0300 Subject: Refactor: refactor functions to compute genetic probabilities: --- scripts/rqtl2_wrapper.R | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 1a56207..5decd85 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -131,32 +131,45 @@ ind_ids_pheno(dataset) cat("Name of the founder Strains/n") founders(dataset) -# Work on computing the genetic probabilities - -analysis_type <- "single" +# Function for computing the genetic probabilities perform_genetic_pr <- function(cross, cores = 1, - error_prob = 0.002, - analysis_type = "single") { - # improve on this - if (analysis_type == "single") { - pr <- calc_genoprob(cross, - error_prob = error_prob, - quiet = FALSE, - cores = cores) - return (pr) + step=1, + map=NULL, + use_pseudomarkers=FALSE, + map_function=c("haldane", "kosambi", "c-f", "morgan"), + error_prob = 0.002 + ) { + #' Function to calculate the genetic probabilities + #' @description function to perform genetic probabilities + #' @param cores number no of cores to use Defaults to "1" + #' @param map Genetic map of markers. defaults to "NONE" + #' @param use_pseudomarkers option to insert pseudo markers in the gmap default "FALSE" + #' @param error_prob + #' @param map_function Character string indicating the map function to use to convert genetic + #' @param step for default "1" + #' @return a list of three-dimensional arrays of probabilities, individuals x genotypes x pst + + cat("Finding the genetic Probabilities\n") + if (use_pseudomarkers){ + map <- insert_pseudomarkers(cross$gmap, step=step) + return(calc_genoprob(cross, map=map, + error_prob=error_prob, map_function=map_function, + quiet=FALSE, cores=cores)) } -} - -# get the genetic probability + else { + return (calc_genoprob(cross, map=map, error_prob=error_prob, + quiet = FALSE, map_function =map_function, + cores = cores)) + }} Pr = perform_genetic_pr(dataset) cat("Summaries on the genetic probabilites \n") print(Pr) summary(Pr) -#calculate genotyping error LOD scores +#calculate genotyping error LOD scores error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4) print(error_lod) -- cgit 1.4.1 From 7194eaf537c889c979b34a1f092ba5f2c0a8e030 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 15:39:55 +0300 Subject: Refactor: refactor function to calculate error LOD scores. --- scripts/rqtl2_wrapper.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 5decd85..2c38dd4 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -11,6 +11,8 @@ options(stringsAsFactors = FALSE) args = commandArgs(trailingOnly = TRUE) +NO_OF_CORES = 4 + # get the json file path with pre metadata required to create the cross if (length(args) == 0) { @@ -169,9 +171,14 @@ print(Pr) summary(Pr) -#calculate genotyping error LOD scores -error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4) -print(error_lod) +#Function to Calculate genotyping error LOD scores +cat("Calculate genotype error LOD scores\n") +error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = NO_OF_CORES) +# combine into one matrix +error_lod <- do.call("cbind", error_lod) +print(error_lod + + # Perform genome scan -- cgit 1.4.1 From 9043e19f7b5b79a1492af9fd1e20a9008b31e982 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 16:28:04 +0300 Subject: Refactor: refactor method to compute scan 1. --- scripts/rqtl2_wrapper.R | 70 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 2c38dd4..27c0195 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -12,6 +12,7 @@ options(stringsAsFactors = FALSE) args = commandArgs(trailingOnly = TRUE) NO_OF_CORES = 4 +SCAN_METHOD = "HK" # get the json file path with pre metadata required to create the cross @@ -176,7 +177,7 @@ cat("Calculate genotype error LOD scores\n") error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = NO_OF_CORES) # combine into one matrix error_lod <- do.call("cbind", error_lod) -print(error_lod +print(error_lod) @@ -192,53 +193,82 @@ print(pheno) print(covar) print(Xcovar) + + # TODO: rework on fetching th Xcovar and getting the covar data + +# Function to perform scan1 + perform_genome_scan <- function(cross, genome_prob, - method, - covar = NULL, - xCovar = NULL) - # perform scan1 using haley-knott regression or linear model, or LOCO linear model -{ + method="HK", + addcovar = NULL, + intcovar = NULL, + model = c("normal","binary"), + Xcovar = NULL) { + #' perform genome scan + #' @description perform scan1 using haley-knott regression, perform scan1 using haley-knott #' or linear model, or LOCO linear model + #' the cross object required to pull the pheno + #' @param method to method to perform scan1 either by haley-knott regression(HL), + #' linear mixed model(LMM) or , for the LOCO method(LOCO) + #' @param intcovar A numeric optional matrix of interactive covariates. + #' @param addcovar An optional numeric matrix of additive covariates. + #' @param Xcovar An optional numeric matrix with additional additive covariates used for null #' used for null hypothesis when scanning the X chromosome. + #' @param model Indicates whether to use a normal model (least squares) or binary model + #' @return An object of class "scan1" + + if (method == "LMM") { # provide parameters for this + cat("Performing scan1 using Linear mixed model\n") kinship = calc_kinship(genome_prob) out <- scan1( genome_prob, cross$pheno, kinship = kinship, addcovar = covar, - Xcovar = Xcovar + Xcovar = Xcovar, + intcovar = intcovar, + model = model, + cores = NO_OF_CORES ) - } - - if (method == "LOCO") { - # perform kinship inside better option + } else if (method == "LOCO") { + cat("Performing scan1 using Leave one chromosome out\n") kinship = calc_kinship(genome_prob, "loco") out <- scan1( genome_prob, cross$pheno, kinship = kinship, addcovar = covar, - Xcovar = Xcovar + intcovar = intcovar, + model = model, + Xcovar = Xcovar, + cores = NO_OF_CORES ) } - else { - # perform using Haley Knott + + else if (method == "HK"){ + cat("Performing scan1 using Haley Knott\n") out <- scan1(genome_prob, cross$pheno, addcovar = NULL, - Xcovar = Xcovar) + intcovar = intcovar, + model = model, + Xcovar = Xcovar, + cores = NO_OF_CORES + ) } + return (out) } -results <- perform_genome_scan(cross = dataset, +# TODO rename this to genome scan results +scan_results <- perform_genome_scan(cross = dataset, genome_prob = Pr, - method = "HMM") + method = SCAN_METHOD) -results # this should probably return the method use here +scan_results # plot for the LOD scores from performing the genome scan generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { @@ -272,7 +302,7 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { return (image_loc) } -lod_file_path <- generate_lod_plot(dataset, results, "HK") +lod_file_path <- generate_lod_plot(dataset, scan_results, "HK") lod_file_path @@ -337,7 +367,7 @@ print("Finding the lod peaks with thresholds n and drop n\n") return (find_peaks(scan_results, cross$gmap, threshold= threshold, drop=drop)) } # TODO! add the number of cores -lod_peaks <- find_lod_peaks(results, dataset) +lod_peaks <- find_lod_peaks(scan_results, dataset) print(lod_peaks) # TODO! format to return the data ??? \ No newline at end of file -- cgit 1.4.1 From 6772765677c5558532850d8ab03f336b5aeb9f27 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 16:33:16 +0300 Subject: Refactor: refactor function to generate plots from scan1 function. --- scripts/rqtl2_wrapper.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 27c0195..3a0fcd5 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -272,7 +272,12 @@ scan_results # plot for the LOD scores from performing the genome scan generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { - # Plot LOD curves for a genome scan + #' @description Plot LOD curves for a genome scan + #' @param the cross object + #' @param scan1 results + #' @param the method used to compute the scan1 results HK,LMM or LOCO + #' @param base_dir the path to write the generated plot + #' @return a string with the file path for the plot color <- c("slateblue", "violetred", "green3") par(mar = c(4.1, 4.1, 1.6, 1.1)) ymx <- maxlod(scan_result) @@ -302,6 +307,7 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { return (image_loc) } + lod_file_path <- generate_lod_plot(dataset, scan_results, "HK") lod_file_path -- cgit 1.4.1 From 965ba2a3878808fd05f36aca76f722b36ca64353 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 17:00:27 +0300 Subject: Refactor: refactoring for code to perform permutation tests. --- scripts/rqtl2_wrapper.R | 67 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 3a0fcd5..946b76e 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -13,6 +13,7 @@ args = commandArgs(trailingOnly = TRUE) NO_OF_CORES = 4 SCAN_METHOD = "HK" +NO_OF_PERMUTATION = 2 # get the json file path with pre metadata required to create the cross @@ -312,27 +313,53 @@ lod_file_path <- generate_lod_plot(dataset, scan_results, "HK") lod_file_path -# Note pair scan does not exists in rqtl2 - # perform permutation tests for single-QTL method + perform_permutation_test <- function(cross, genome_prob, n_perm, method = "HKK", covar = NULL, Xcovar = NULL, + addcovar = NULL, + intcovar = NULL, + perm_Xsp = FALSE, + model = c("normal", "binary"), + chr_lengths = NULL, perm_strata = NULL) { - + + #' Function to peform permutation tests for single QTL method + #' @description The scan1perm() function takes the + #' same arguments as scan1(), plus additional a #rguments to control the permutations: + #" @param cross the cross object required to fetch the pheno + #' @param genome_prob the genomic probability matrix + #' @param method to computation method used to perform the genomic scan + #' @param intcovar + #' @param addcovar + #' @param Xcovar + #' @param perm_Xsp If TRUE, do separate permutations for the autosomes and the X chromosome. + #' @param perm_strata Vector of strata, for a stratified permutation test. + #' @param n_perm Number of permutation replicates. + #' @param chr_lengths engths of the chromosomes; + #' @return object of class "scan1perm". # TODO! add chr_lengths and perm_Xsp - cat("performing permutation tes for the cross object\n") - if (method == "HKK") { + cat("performing permutation test for the cross object\n") + if (method == "HK") { + + perm <- scan1perm( genome_prob, cross$pheno, Xcovar = Xcovar, + intcovar = intcovar, + addcovar = addcovar, n_perm = n_perm, - perm_strata = perm_strata + model = model, + perm_Xsp = perm_Xsp, + perm_strata = perm_strata, + chr_lengths = chr_lengths, + cores = NO_OF_CORES ) } else if (method == "LMM") { @@ -342,7 +369,13 @@ perform_permutation_test <- function(cross, cross$pheno, kinship = kinship, Xcovar = Xcovar, - n_perm = n_perm + intcovar = intcovar, + addcovar = addcovar, + n_perm = n_perm, + perm_Xsp = perm_Xsp, + model = model, + chr_lengths = chr_lengths, + cores = NO_OF_CORES ) } else if (method == "LOCO") { @@ -353,17 +386,31 @@ perform_permutation_test <- function(cross, kinship = kinship , perm_strata = perm_strata, Xcovar = Xcovar, - n_perm = n_perm + intcovar = intcovar, + addcovar = addcovar, + n_perm = n_perm, + perm_Xsp = perm_Xsp, + model = model, + chr_lengths = chr_lengths, + cores = NO_OF_CORES ) } return (perm) } # TODO ! get these parameters from argument from the user -perm <- perform_permutation_test(dataset, Pr, n_perm = 2, method = "LMM") +perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION, method = "LMM") # get the permutation summary with a significance threshold -summary(perm, alpha = c(0.2, 0.05)) +get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){ + cat("Fetch the lod with significance thresholds ") + cat(threshold) + cat("\n") + summary(perm, alpha = threshold) +} + +lod_significance <- get_lod_significance(perm) +stop("Test kkk") # function to perform the LOD peaks find_lod_peaks <-function(scan_results, cross, threshold=4, drop=1.5){ -- cgit 1.4.1 From 741e88920bc71945b76702bc3ddd0bee2e93acd6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 17:11:50 +0300 Subject: Refactor: refactor code to find lod peaks. --- scripts/rqtl2_wrapper.R | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 946b76e..d8cf755 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -23,8 +23,6 @@ if (length(args) == 0) { json_file_path = args[1] } -# TODO validation for the json file - if (!(file.exists(json_file_path))) { stop("The input file path does not exists") @@ -407,20 +405,19 @@ get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){ cat("\n") summary(perm, alpha = threshold) } - lod_significance <- get_lod_significance(perm) -stop("Test kkk") -# function to perform the LOD peaks -find_lod_peaks <-function(scan_results, cross, threshold=4, drop=1.5){ -# can this take pmap??? which map should we use??? -# TODO add more ags -print("Finding the lod peaks with thresholds n and drop n\n") -return (find_peaks(scan_results, cross$gmap, threshold= threshold, drop=drop)) -} -# TODO! add the number of cores -lod_peaks <- find_lod_peaks(scan_results, dataset) -print(lod_peaks) -# TODO! format to return the data ??? \ No newline at end of file +# get the lod peaks +# fix issue with map + +cat("Fetching the lod peaks\n") +lod_peaks = find_peaks( + scan_results, + threshold =0, + map = dataset$gmap, + cores = NO_OF_CORES +) + +lod_peaks -- cgit 1.4.1 From 3cd3b252e8fe89d9b835f8362cc017289d97b257 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 17:27:08 +0300 Subject: feat: write output results as json. --- scripts/rqtl2_wrapper.R | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index d8cf755..4c1e3dc 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -307,8 +307,8 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { } -lod_file_path <- generate_lod_plot(dataset, scan_results, "HK") -lod_file_path +lod_plot_path <- generate_lod_plot(dataset, scan_results, "HK") +lod_plot_path # perform permutation tests for single-QTL method @@ -421,3 +421,20 @@ lod_peaks = find_peaks( ) lod_peaks +cat("Generating the ouput data as vector\n") +output = list(lod_peaks = lod_peaks, + scan_results =scan_results, + lod_significance = lod_significance, + permutation_results = perm, + lod_peaks = lod_peaks, + lod_plot_path =lod_plot_path, + scan_method = SCAN_METHOD + ) + +output_json_data <-toJSON(output) +file_name = genRandomFileName(prefix = "RQTL_output_", file_ext = ".json") +output_file_path = file.path("." , file_name) +str_glue("The output file path is {output_file_path}") +cat("Writing to the output file\n") +write(output_json_data, file=output_file_path) + -- cgit 1.4.1 From 1d88b3b392f9148f26aa9401eb8a083b2419ad9b Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 18:27:11 +0300 Subject: feat: add optparse for the arguments. --- scripts/rqtl2_wrapper.R | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 4c1e3dc..eda6247 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -6,24 +6,38 @@ library(qtl2) library(rjson) library(stringi) library(stringr) +library(optparse) -options(stringsAsFactors = FALSE) -args = commandArgs(trailingOnly = TRUE) -NO_OF_CORES = 4 -SCAN_METHOD = "HK" -NO_OF_PERMUTATION = 2 +option_list <- list( + make_option(c("-c", "--cores"), type="integer", default=1, help="No of cores to use while making + computation"), +make_option(c("-i", "--input_file"), action="store", default=NULL, type='character', help="a yaml or json file with required data to create the cross file"), +make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), + make_option(c("-d", "--directory"), action = "store", default = tempdir(), type = "character", help="Temporary working directory: should also host the input file ."), + make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)") + +) + + +opt <- parse_args(OptionParser(option_list=option_list)) + + +NO_OF_CORES = opt$cores +SCAN_METHOD = opt$method +NO_OF_PERMUTATION = opt$nperm # get the json file path with pre metadata required to create the cross -if (length(args) == 0) { - stop("Argument for the metadata file is Missing ", call. = FALSE) +if (is.null(opt$input_file)) { + stop("Argument for the Input metadata file is Missing ", call. = FALSE) } else { - json_file_path = args[1] + json_file_path = opt$input_file } + if (!(file.exists(json_file_path))) { stop("The input file path does not exists") } else { @@ -258,15 +272,15 @@ perform_genome_scan <- function(cross, ) } + return (out) } -# TODO rename this to genome scan results +# Perform the genome scan for the cross object scan_results <- perform_genome_scan(cross = dataset, genome_prob = Pr, method = SCAN_METHOD) - scan_results # plot for the LOD scores from performing the genome scan -- cgit 1.4.1 From 99ba5e9fc76cf0334a5c7537ea55c8367e40414d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 18:39:16 +0300 Subject: Refactor: ensure the temp directory is not null. ensure the input_file exists in the working directory. --- scripts/rqtl2_wrapper.R | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index eda6247..eca1c2a 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -15,34 +15,40 @@ option_list <- list( computation"), make_option(c("-i", "--input_file"), action="store", default=NULL, type='character', help="a yaml or json file with required data to create the cross file"), make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), - make_option(c("-d", "--directory"), action = "store", default = tempdir(), type = "character", help="Temporary working directory: should also host the input file ."), + make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."), make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)") ) - -opt <- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list=option_list)) +# check for mandatory args +if(is.null(opt$directory)){ +stop("You need to specify a working temporary directory which should have be base_dir for the input file") +} NO_OF_CORES = opt$cores SCAN_METHOD = opt$method NO_OF_PERMUTATION = opt$nperm # get the json file path with pre metadata required to create the cross +# assumption is that the input file should be in the temp working directory if (is.null(opt$input_file)) { stop("Argument for the Input metadata file is Missing ", call. = FALSE) } else { - json_file_path = opt$input_file + input_file = opt$input_file } +# file_path +input_file_path = file.path(opt$directory, input_file) -if (!(file.exists(json_file_path))) { - stop("The input file path does not exists") +if (!(file.exists(input_file_path))) { + str_glue("The input file {opt$input_file} path does not exists in the directory {opt$directory}") } else { - str_glue("The input path for the metadata >>>>>>> {json_file_path}") - json_data = fromJSON(file = json_file_path) + str_glue("The input path for the metadata >>>>>>> {input_file_path}") + json_data = fromJSON(file = input_file_path) } # generate random string file paths -- cgit 1.4.1 From 82ae3c5c7efa710d34b11e28c89c608a475e9071 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 20:28:28 +0300 Subject: feat: add output_file as a command line argument. --- scripts/rqtl2_wrapper.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index eca1c2a..cdbbd17 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -16,8 +16,8 @@ option_list <- list( make_option(c("-i", "--input_file"), action="store", default=NULL, type='character', help="a yaml or json file with required data to create the cross file"), make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."), - make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)") - + make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)"), +make_option(c("-o", "--output_file"), action="store", default=NULL, type='character', help="a file name of where to write the output json results") ) @@ -39,6 +39,9 @@ if (is.null(opt$input_file)) { } else { input_file = opt$input_file } +(is.null(opt$output_file)){ +stop("You need to provide an output file to write the ouput data") +} # file_path @@ -452,8 +455,7 @@ output = list(lod_peaks = lod_peaks, ) output_json_data <-toJSON(output) -file_name = genRandomFileName(prefix = "RQTL_output_", file_ext = ".json") -output_file_path = file.path("." , file_name) +output_file_path = file.path(opt$directory , opt$output_file) str_glue("The output file path is {output_file_path}") cat("Writing to the output file\n") write(output_json_data, file=output_file_path) -- cgit 1.4.1 From 4c303f6c43173c1b87f6f675b1db865f4c25a7b0 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 20:31:42 +0300 Subject: Refactor: make plot in the temp directory passed in the script arguments. --- scripts/rqtl2_wrapper.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index cdbbd17..ab3fa3e 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -330,7 +330,7 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { } -lod_plot_path <- generate_lod_plot(dataset, scan_results, "HK") +lod_plot_path <- generate_lod_plot(dataset, scan_results, "HK", base_dir=opt$directory) lod_plot_path -- cgit 1.4.1 From 4667ea4675f2ec3f545c5376a4f942ae2d9615d7 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 20:52:46 +0300 Subject: Refactor: remove hardcoded dir_path for control files. --- scripts/rqtl2_wrapper.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index ab3fa3e..f71fb08 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -66,7 +66,7 @@ genRandomFileName <- function(prefix, file_ext = ".txt") { # TODO create temp directory for this workspace pass this as argument -control_file_path <- file.path("/home/kabui", +control_file_path <- file.path(opt$directory, genRandomFileName(prefix = "control_", file_ext = ".json")) str_glue("Generated control file path is {control_file_path}") -- cgit 1.4.1 From 4d2995d3ca8890bf49dd146510115a044dfb48c0 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Tue, 29 Oct 2024 22:12:42 +0300 Subject: Refactor: pre compute kinship for both permutation and genome scan. --- scripts/rqtl2_wrapper.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index f71fb08..ccbef84 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -221,11 +221,23 @@ print(Xcovar) # Function to perform scan1 + +cat("Computing the kinship") +if (method == "LMM"){ + kinship = calc_kinship(genome_prob) +} else if (method == "LOCO"){ + kinship = calc_kinship(genome_prob, "loco") +}else { + kinship = NULL +} + + perform_genome_scan <- function(cross, genome_prob, method="HK", addcovar = NULL, intcovar = NULL, + kinship = NULL, model = c("normal","binary"), Xcovar = NULL) { #' perform genome scan @@ -243,7 +255,6 @@ perform_genome_scan <- function(cross, if (method == "LMM") { # provide parameters for this cat("Performing scan1 using Linear mixed model\n") - kinship = calc_kinship(genome_prob) out <- scan1( genome_prob, cross$pheno, @@ -256,7 +267,6 @@ perform_genome_scan <- function(cross, ) } else if (method == "LOCO") { cat("Performing scan1 using Leave one chromosome out\n") - kinship = calc_kinship(genome_prob, "loco") out <- scan1( genome_prob, cross$pheno, @@ -288,6 +298,7 @@ perform_genome_scan <- function(cross, # Perform the genome scan for the cross object scan_results <- perform_genome_scan(cross = dataset, genome_prob = Pr, + kinship = kinship, method = SCAN_METHOD) scan_results @@ -345,6 +356,7 @@ perform_permutation_test <- function(cross, addcovar = NULL, intcovar = NULL, perm_Xsp = FALSE, + kinship = NULL, model = c("normal", "binary"), chr_lengths = NULL, perm_strata = NULL) { @@ -367,8 +379,6 @@ perform_permutation_test <- function(cross, cat("performing permutation test for the cross object\n") if (method == "HK") { - - perm <- scan1perm( genome_prob, cross$pheno, @@ -384,7 +394,6 @@ perform_permutation_test <- function(cross, ) } else if (method == "LMM") { - kinship = calc_kinship(genome_prob) perm <- scan1perm( genome_prob, cross$pheno, @@ -400,7 +409,6 @@ perform_permutation_test <- function(cross, ) } else if (method == "LOCO") { - kinship = calc_kinship(genome_prob, "loco") perm <- scan1perm( genome_prob, cross$pheno, -- cgit 1.4.1 From eb0bf73bad1bcf4453e105204acbeeeb8f75c7d1 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 30 Oct 2024 13:13:17 +0300 Subject: Refactor: Remove reduntant code. Fix issue when computing the kinship --- scripts/rqtl2_wrapper.R | 55 ++++++++++--------------------------------------- 1 file changed, 11 insertions(+), 44 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index ccbef84..07b9874 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -39,7 +39,7 @@ if (is.null(opt$input_file)) { } else { input_file = opt$input_file } -(is.null(opt$output_file)){ +if (is.null(opt$output_file)){ stop("You need to provide an output file to write the ouput data") } @@ -49,6 +49,7 @@ input_file_path = file.path(opt$directory, input_file) if (!(file.exists(input_file_path))) { str_glue("The input file {opt$input_file} path does not exists in the directory {opt$directory}") + stop("The input file does not exists the temp directory") } else { str_glue("The input path for the metadata >>>>>>> {input_file_path}") json_data = fromJSON(file = input_file_path) @@ -223,9 +224,9 @@ print(Xcovar) cat("Computing the kinship") -if (method == "LMM"){ +if (opt$method == "LMM"){ kinship = calc_kinship(genome_prob) -} else if (method == "LOCO"){ +} else if (opt$method == "LOCO"){ kinship = calc_kinship(genome_prob, "loco") }else { kinship = NULL @@ -364,7 +365,7 @@ perform_permutation_test <- function(cross, #' Function to peform permutation tests for single QTL method #' @description The scan1perm() function takes the #' same arguments as scan1(), plus additional a #rguments to control the permutations: - #" @param cross the cross object required to fetch the pheno + #' @param cross the cross object required to fetch the pheno #' @param genome_prob the genomic probability matrix #' @param method to computation method used to perform the genomic scan #' @param intcovar @@ -375,26 +376,8 @@ perform_permutation_test <- function(cross, #' @param n_perm Number of permutation replicates. #' @param chr_lengths engths of the chromosomes; #' @return object of class "scan1perm". - # TODO! add chr_lengths and perm_Xsp - - cat("performing permutation test for the cross object\n") - if (method == "HK") { - perm <- scan1perm( - genome_prob, - cross$pheno, - Xcovar = Xcovar, - intcovar = intcovar, - addcovar = addcovar, - n_perm = n_perm, - model = model, - perm_Xsp = perm_Xsp, - perm_strata = perm_strata, - chr_lengths = chr_lengths, - cores = NO_OF_CORES - ) - } - else if (method == "LMM") { - perm <- scan1perm( + cat("performing permutation test for the cross object\n") + return (scan1perm( genome_prob, cross$pheno, kinship = kinship, @@ -406,27 +389,11 @@ perform_permutation_test <- function(cross, model = model, chr_lengths = chr_lengths, cores = NO_OF_CORES - ) - } - else if (method == "LOCO") { - perm <- scan1perm( - genome_prob, - cross$pheno, - kinship = kinship , - perm_strata = perm_strata, - Xcovar = Xcovar, - intcovar = intcovar, - addcovar = addcovar, - n_perm = n_perm, - perm_Xsp = perm_Xsp, - model = model, - chr_lengths = chr_lengths, - cores = NO_OF_CORES - ) - } - return (perm) + )) } + + # TODO ! get these parameters from argument from the user perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION, method = "LMM") # get the permutation summary with a significance threshold @@ -441,7 +408,7 @@ lod_significance <- get_lod_significance(perm) # get the lod peaks -# fix issue with map +# TODO fix issue when fetching the gmap or allow to use pseudomarkers cat("Fetching the lod peaks\n") lod_peaks = find_peaks( -- cgit 1.4.1 From 8c87de4d70795c07098608342d9fee1acc571346 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Wed, 30 Oct 2024 13:36:53 +0300 Subject: feat: add option for using pstrata for permutation. --- scripts/rqtl2_wrapper.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 07b9874..966d3c6 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -17,7 +17,8 @@ make_option(c("-i", "--input_file"), action="store", default=NULL, type='charact make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."), make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)"), -make_option(c("-o", "--output_file"), action="store", default=NULL, type='character', help="a file name of where to write the output json results") +make_option(c("-o", "--output_file"), action="store", default=NULL, type='character', help="a file name of where to write the output json results"), + make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata") ) @@ -394,8 +395,17 @@ perform_permutation_test <- function(cross, -# TODO ! get these parameters from argument from the user -perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION, method = "LMM") + +# check if pstrata + +if (!(is.null(opt$pstrata)) && (!is.null(Xcovar))){ +perm_strata <- mat2strata(Xcovar) +} else { +perm_strata <- NULL +} + +perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION,perm_strata = perm_strata, method = "LMM") + # get the permutation summary with a significance threshold get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){ cat("Fetch the lod with significance thresholds ") -- cgit 1.4.1 From 5a996dfe02abf9fa25563844662372f003dbd67e Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Fri, 8 Nov 2024 12:19:07 +0300 Subject: feat: init add functionality to estimate qtl effect. --- scripts/rqtl2_wrapper.R | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 966d3c6..b68c82f 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -430,8 +430,50 @@ lod_peaks = find_peaks( lod_peaks cat("Generating the ouput data as vector\n") + +# get the estimated qtl effects +cat("Getting the estimated qtl effects\n") + +get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){ + cat("Finding the qtl effect\n") + chr_Pr <- geno_prob[,chromosome] + if (!is.null(chr_Pr)){ + str_glue("Find qtl effect for chromosome {chromosome} and pheno {pheno}") + if (!is.null(LOCO)) { + str_glue("Find qtl effect for chromosome {chromosome} and pheno {pheno} and LOCO {chromosome}") + kinship <- calc_kinship(chr_Pr, "loco")[[chromosome]] + return(scan1coef(chr_Pr, pheno, kinship, addcovar=covar)) + } + else { + return(scan1coef(chr_Pr, pheno, addcovar=covar)) + } + } + return(NULL) +} + + + +# take the first phenotype in the dataset + +# grab phenotypes and covariates; ensure that covariates have names attribute +# in this case we are using the first chromosome +pheno <- dataset$pheno[,1] +if (!is.null(dataset$covar) && !is.null(dataset$covar$sex)){ + covar <- match(dataset$covar$sex, c("f", "m")) # make numeric + names(covar) <- rownames(dataset$covar) +} else { +covar <- NULL +} +covar + + +# TODO fix this hardcoded chromosome +# TODO get all chromosomes and iterate coeff_results for a given chromosomes +coeff_results <- get_qtl_effect("5", Pr, pheno) + output = list(lod_peaks = lod_peaks, scan_results =scan_results, + genetic_probabilities = Pr, lod_significance = lod_significance, permutation_results = perm, lod_peaks = lod_peaks, @@ -445,3 +487,6 @@ str_glue("The output file path is {output_file_path}") cat("Writing to the output file\n") write(output_json_data, file=output_file_path) + + + -- cgit 1.4.1 From e177ba8d9ad80261f980efca59a87aae2c0939c6 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 10:27:05 +0300 Subject: feat: export chromosome names and qtl effects. --- scripts/rqtl2_wrapper.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index b68c82f..7bdc29a 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -467,16 +467,29 @@ covar <- NULL covar + # TODO fix this hardcoded chromosome # TODO get all chromosomes and iterate coeff_results for a given chromosomes coeff_results <- get_qtl_effect("5", Pr, pheno) +meffects <- c() +# TODO add plots for meffects + +for (chr in chr_names(dataset)){ + cat("Getting the qtl effect for chromosome", chr) + cat("\n") + coeff_results <- get_qtl_effect(chr, Pr, pheno) + meffects <- append(meffects, coeff_results) +} + output = list(lod_peaks = lod_peaks, scan_results =scan_results, genetic_probabilities = Pr, lod_significance = lod_significance, permutation_results = perm, lod_peaks = lod_peaks, + chromosomes = chr_names(dataset), + meffects = meffects, lod_plot_path =lod_plot_path, scan_method = SCAN_METHOD ) @@ -487,6 +500,3 @@ str_glue("The output file path is {output_file_path}") cat("Writing to the output file\n") write(output_json_data, file=output_file_path) - - - -- cgit 1.4.1 From 1c0c1823ade014d9f35fdd3a13d77cdd5c4fc45d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 11:18:36 +0300 Subject: feat: implementation for multiparent genome scan. --- scripts/rqtl2_wrapper.R | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 7bdc29a..c3f6932 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -196,6 +196,14 @@ print(Pr) summary(Pr) +# perform allele probabilites if cross ways + +# convert this to lower +if (dataset$crosstype == "4way"){ + # + aPr <- genoprob_to_alleleprob(pr) +} + #Function to Calculate genotyping error LOD scores cat("Calculate genotype error LOD scores\n") error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = NO_OF_CORES) @@ -224,7 +232,10 @@ print(Xcovar) # Function to perform scan1 -cat("Computing the kinship") +# refactor this to a function + + +get_kinship <- function(probability, method="LMM"){ if (opt$method == "LMM"){ kinship = calc_kinship(genome_prob) } else if (opt$method == "LOCO"){ @@ -232,6 +243,16 @@ if (opt$method == "LMM"){ }else { kinship = NULL } +} + + +if (dataset$crosstype == "4way"){ + kinship <- get_kinship(aPr, opt$method) +} else { + kinship <- get_kinship(Pr, "loco") +} + + perform_genome_scan <- function(cross, @@ -285,7 +306,7 @@ perform_genome_scan <- function(cross, cat("Performing scan1 using Haley Knott\n") out <- scan1(genome_prob, cross$pheno, - addcovar = NULL, + addcovar = covar, intcovar = intcovar, model = model, Xcovar = Xcovar, @@ -298,10 +319,18 @@ perform_genome_scan <- function(cross, } # Perform the genome scan for the cross object -scan_results <- perform_genome_scan(cross = dataset, +if (dataset$crosstype == "4way"){ + sex <- (DOex$covar$Sex == "male")*1 + names(sex) <- rownames(dataset$covar) + sex <- setNames( (dataset$covar$Sex == "male")*1, rownames(DOex$covar)) + scan_results <- perform_genoeme_scan(aPr, dataset, kinship=kinship, method = "LOCO", addcovar = sex) +} else { + scan_results <- perform_genome_scan(cross = dataset, genome_prob = Pr, kinship = kinship, method = SCAN_METHOD) +} + scan_results -- cgit 1.4.1 From 038b3cd1416d406ae8a55948795c93a3106b4acc Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 11:38:34 +0300 Subject: feat: implementation for getting meffects for multiparent population. --- scripts/rqtl2_wrapper.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index c3f6932..a4fcbfb 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -496,18 +496,17 @@ covar <- NULL covar - -# TODO fix this hardcoded chromosome -# TODO get all chromosomes and iterate coeff_results for a given chromosomes -coeff_results <- get_qtl_effect("5", Pr, pheno) - meffects <- c() # TODO add plots for meffects - for (chr in chr_names(dataset)){ cat("Getting the qtl effect for chromosome", chr) cat("\n") - coeff_results <- get_qtl_effect(chr, Pr, pheno) + if (dataset$crosstype == "4way"){ + coeff_results <- get_qtl_effect(chr, aPr, pheno, LOCO="LOCO", covar = sex) + } else { + coeff_results <- get_qtl_effect(chr, Pr, pheno) + } + meffects <- append(meffects, coeff_results) } -- cgit 1.4.1 From 3efd12e3058e300e7d99e9233699a31088c2fcd1 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 11:49:42 +0300 Subject: feat: add plots for qtl effect multi parent population. --- scripts/rqtl2_wrapper.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index a4fcbfb..f2d6a87 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -497,16 +497,32 @@ covar meffects <- c() +meffects_plots <- c() + # TODO add plots for meffects for (chr in chr_names(dataset)){ cat("Getting the qtl effect for chromosome", chr) cat("\n") if (dataset$crosstype == "4way"){ coeff_results <- get_qtl_effect(chr, aPr, pheno, LOCO="LOCO", covar = sex) + file_name = genRandomFileName(prefix = "RQTL_EFFECT_", file_ext = ".png") + image_loc = file.path(base_dir , file_name) + par(mar=c(4.1, 4.1, 0.6, 0.6)) + png(image_loc, + width = 1000, + height = 600, + type = 'cairo-png') + plot( + coeff_results, + cross$gmap[chr], + bgcolor="gray95", + legend="bottomleft" + ) + meffects <- append(meffects_plots, image_loc) } else { coeff_results <- get_qtl_effect(chr, Pr, pheno) } - + meffects <- append(meffects, coeff_results) } -- cgit 1.4.1 From 5494e2ba34bafb7820cf1d578d34302a39eb9811 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 11:52:49 +0300 Subject: export qtl effect plots for multi parent populations. --- scripts/rqtl2_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index f2d6a87..f514e73 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -534,6 +534,7 @@ output = list(lod_peaks = lod_peaks, lod_peaks = lod_peaks, chromosomes = chr_names(dataset), meffects = meffects, + meffects_plots = meffects_plots, lod_plot_path =lod_plot_path, scan_method = SCAN_METHOD ) -- cgit 1.4.1 From 0a5c1f7d4ccc88d3555e44b50beec8ee91bc43fc Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 12:04:42 +0300 Subject: refactor: refactor printing out lod thresholds. --- scripts/rqtl2_wrapper.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index f514e73..6581ed4 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -9,7 +9,6 @@ library(stringr) library(optparse) - option_list <- list( make_option(c("-c", "--cores"), type="integer", default=1, help="No of cores to use while making computation"), @@ -306,7 +305,7 @@ perform_genome_scan <- function(cross, cat("Performing scan1 using Haley Knott\n") out <- scan1(genome_prob, cross$pheno, - addcovar = covar, + addcovar = NULL, intcovar = intcovar, model = model, Xcovar = Xcovar, @@ -437,9 +436,7 @@ perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION,perm_st # get the permutation summary with a significance threshold get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){ - cat("Fetch the lod with significance thresholds ") - cat(threshold) - cat("\n") + cat("Fetch the lod with significance thresholds ", threshold, "\n") summary(perm, alpha = threshold) } lod_significance <- get_lod_significance(perm) -- cgit 1.4.1 From 006f3f73c20431754b8b6e5c2fc50202b2b7d926 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 12:07:45 +0300 Subject: refactor: adding pseudomarkers o genetic probabilites map. --- scripts/rqtl2_wrapper.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 6581ed4..d025935 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -178,16 +178,13 @@ perform_genetic_pr <- function(cross, cat("Finding the genetic Probabilities\n") if (use_pseudomarkers){ + cat("Using pseudo markers for genetic probabilites\n") map <- insert_pseudomarkers(cross$gmap, step=step) + } return(calc_genoprob(cross, map=map, error_prob=error_prob, map_function=map_function, - quiet=FALSE, cores=cores)) - } - else { - return (calc_genoprob(cross, map=map, error_prob=error_prob, - quiet = FALSE, map_function =map_function, - cores = cores)) - }} + quiet=FALSE, cores=cores)) +} Pr = perform_genetic_pr(dataset) cat("Summaries on the genetic probabilites \n") -- cgit 1.4.1 From a164214fc5789db5056dccdc12f4acd370abef8a Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 12:09:49 +0300 Subject: refactor: use NO_OF_CORES as default and pass control file to arguments. --- scripts/rqtl2_wrapper.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index d025935..50008cf 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -95,7 +95,7 @@ if (!(item %in% names(json_data))){ } } -generate_cross_object <- function(json_data) { +generate_cross_object <- function(control_file_path, json_data) { # function to write the cross object from a json data object return ( write_control_file( @@ -126,7 +126,7 @@ generate_cross_object <- function(json_data) { # generate the cross file -generate_cross_object(json_data) +generate_cross_object(control_file_path, json_data) # read from the cross file path dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path @@ -159,7 +159,7 @@ founders(dataset) # Function for computing the genetic probabilities perform_genetic_pr <- function(cross, - cores = 1, + cores = NO_OF_CORES, step=1, map=NULL, use_pseudomarkers=FALSE, -- cgit 1.4.1 From ffd1f4dcbea516a4f5fd0e86bb684781addbfd74 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 12:13:01 +0300 Subject: refactor: refactor for using cat and new line. --- scripts/rqtl2_wrapper.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 50008cf..c2c8caa 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -49,6 +49,7 @@ input_file_path = file.path(opt$directory, input_file) if (!(file.exists(input_file_path))) { str_glue("The input file {opt$input_file} path does not exists in the directory {opt$directory}") + stop("The input file does not exists the temp directory") } else { str_glue("The input path for the metadata >>>>>>> {input_file_path}") @@ -89,8 +90,7 @@ default_keys = c( for (item in default_keys) { if (!(item %in% names(json_data))){ - cat("Using FALSE as default parameter for ", item) - cat("\n") + cat("Using FALSE as default parameter for ", item, "\n") json_data[item] = FALSE } } @@ -495,8 +495,7 @@ meffects_plots <- c() # TODO add plots for meffects for (chr in chr_names(dataset)){ - cat("Getting the qtl effect for chromosome", chr) - cat("\n") + cat("Getting the qtl effect for chromosome", chr, "\n") if (dataset$crosstype == "4way"){ coeff_results <- get_qtl_effect(chr, aPr, pheno, LOCO="LOCO", covar = sex) file_name = genRandomFileName(prefix = "RQTL_EFFECT_", file_ext = ".png") -- cgit 1.4.1 From f300d55b203f52a92257dabbc51db08d95eec91d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 12:22:11 +0300 Subject: refactor: remove stringr as a script dependency: refactor: refactor use of str_glue to cat. --- scripts/rqtl2_wrapper.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index c2c8caa..283ceef 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -4,8 +4,8 @@ library(qtl2) library(rjson) + library(stringi) -library(stringr) library(optparse) @@ -48,11 +48,9 @@ stop("You need to provide an output file to write the ouput data") input_file_path = file.path(opt$directory, input_file) if (!(file.exists(input_file_path))) { - str_glue("The input file {opt$input_file} path does not exists in the directory {opt$directory}") - - stop("The input file does not exists the temp directory") + stop("The input file you provided does not exists the temp directory") } else { - str_glue("The input path for the metadata >>>>>>> {input_file_path}") + cat("The input file for the metadata is >>>>>", input_file_path, "\n") json_data = fromJSON(file = input_file_path) } @@ -71,7 +69,7 @@ genRandomFileName <- function(prefix, file_ext = ".txt") { control_file_path <- file.path(opt$directory, genRandomFileName(prefix = "control_", file_ext = ".json")) -str_glue("Generated control file path is {control_file_path}") +cat("Generated the control file path at", control_file_path, "\n") if (is.null(json_data$sep)){ cat("Using ',' as a default sep for cross file\n") @@ -461,9 +459,9 @@ get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){ cat("Finding the qtl effect\n") chr_Pr <- geno_prob[,chromosome] if (!is.null(chr_Pr)){ - str_glue("Find qtl effect for chromosome {chromosome} and pheno {pheno}") + cat("Finding qtl effect for chromosome ", chromosome, "\n") if (!is.null(LOCO)) { - str_glue("Find qtl effect for chromosome {chromosome} and pheno {pheno} and LOCO {chromosome}") + cat("Finding qtl effect for chromosome ", chromosome, "with LOCO \n") kinship <- calc_kinship(chr_Pr, "loco")[[chromosome]] return(scan1coef(chr_Pr, pheno, kinship, addcovar=covar)) } @@ -534,7 +532,7 @@ output = list(lod_peaks = lod_peaks, output_json_data <-toJSON(output) output_file_path = file.path(opt$directory , opt$output_file) -str_glue("The output file path is {output_file_path}") +cat("The output file path generated is", output_file_path, "\n") cat("Writing to the output file\n") write(output_json_data, file=output_file_path) -- cgit 1.4.1 From 762e684dc2952fe7155f8d2920ed981d7404ca2f Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 13:00:01 +0300 Subject: Refactor: refactor checks for working directory, input and output file. --- scripts/rqtl2_wrapper.R | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 283ceef..a6432d7 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -22,40 +22,35 @@ make_option(c("-o", "--output_file"), action="store", default=NULL, type='charac opt <- parse_args(OptionParser(option_list=option_list)) -# check for mandatory args -if(is.null(opt$directory)){ -stop("You need to specify a working temporary directory which should have be base_dir for the input file") -} - NO_OF_CORES = opt$cores SCAN_METHOD = opt$method NO_OF_PERMUTATION = opt$nperm -# get the json file path with pre metadata required to create the cross -# assumption is that the input file should be in the temp working directory +# Step: check for mandatory file paths +# NOTE this is the working dir should be the path for both INPUT and OUTPUT_FILE +# NOTE this is where the cross file is generated -if (is.null(opt$input_file)) { - stop("Argument for the Input metadata file is Missing ", call. = FALSE) -} else { - input_file = opt$input_file +if(is.null(opt$directory) || !(dir.exists(opt$directory))){ +# check if directory exists +stop("The working directory does not exists or is NULL\n") } -if (is.null(opt$output_file)){ -stop("You need to provide an output file to write the ouput data") -} - -# file_path -input_file_path = file.path(opt$directory, input_file) +INPUT_FILE_PATH = file.path(opt$directory, opt$input_file) +OUTPUT_FILE_PATH = file.path(opt$directory , opt$output_file) -if (!(file.exists(input_file_path))) { - stop("The input file you provided does not exists the temp directory") +if (!(file.exists(INPUT_FILE_PATH))) { + stop("The input file", INPUT_FILE_PATH, " you provided does not exists the directory", opt$directory, "\n") } else { - cat("The input file for the metadata is >>>>>", input_file_path, "\n") - json_data = fromJSON(file = input_file_path) + cat("Input file exists Reading the input file .... ", INPUT_FILE_PATH, "\n") + json_data = fromJSON(file = INPUT_FILE_PATH) +} +if (!(file.exists(OUTPUT_FILE_PATH))) { + stop("The output file ",OUTPUT_FILE_PATH, " you provided does not exists in the directory", opt$directory, "\n") +} else { + cat("Output file exists ...", OUTPUT_FILE_PATH, "\n") } # generate random string file paths - genRandomFileName <- function(prefix, file_ext = ".txt") { randStr = paste(prefix, stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"), sep = "_") @@ -531,8 +526,8 @@ output = list(lod_peaks = lod_peaks, ) output_json_data <-toJSON(output) -output_file_path = file.path(opt$directory , opt$output_file) -cat("The output file path generated is", output_file_path, "\n") + +cat("The output file path generated is", OUTPUT_FILE_PATH, "\n") cat("Writing to the output file\n") -write(output_json_data, file=output_file_path) +write(output_json_data, file=OUTPUT_FILE_PATH) -- cgit 1.4.1 From 492836416920b9479ba28fb7a66fb180daafbc65 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 13:13:43 +0300 Subject: refactor: refactor output for preprocessing steps. --- scripts/rqtl2_wrapper.R | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index a6432d7..dc5955f 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -41,8 +41,8 @@ OUTPUT_FILE_PATH = file.path(opt$directory , opt$output_file) if (!(file.exists(INPUT_FILE_PATH))) { stop("The input file", INPUT_FILE_PATH, " you provided does not exists the directory", opt$directory, "\n") } else { - cat("Input file exists Reading the input file .... ", INPUT_FILE_PATH, "\n") - json_data = fromJSON(file = INPUT_FILE_PATH) + cat("Input file exists Reading the input file .... \n") + } if (!(file.exists(OUTPUT_FILE_PATH))) { stop("The output file ",OUTPUT_FILE_PATH, " you provided does not exists in the directory", opt$directory, "\n") @@ -50,22 +50,26 @@ if (!(file.exists(OUTPUT_FILE_PATH))) { cat("Output file exists ...", OUTPUT_FILE_PATH, "\n") } -# generate random string file paths -genRandomFileName <- function(prefix, file_ext = ".txt") { - randStr = paste(prefix, stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"), sep = +# Utility function to generate random file names of size n: +genRandomFileName <- function(prefix, string_size = 9 , file_ext = ".txt") { + randStr = paste(prefix, stri_rand_strings(1, string_size, pattern = "[A-Za-z0-9]"), sep = "_") return(paste(randStr, file_ext, sep = "")) } -# TODO work on the optional parameters e.g cores, type of computation -# TODO create temp directory for this workspace pass this as argument + + +# Step: Generate the control file name control_file_path <- file.path(opt$directory, - genRandomFileName(prefix = "control_", file_ext = ".json")) + genRandomFileName(prefix = "control", file_ext = ".json")) +cat("Generated the control file path at ", control_file_path, "\n") -cat("Generated the control file path at", control_file_path, "\n") +# Step: Reading and Parsing the input file +cat("Reading and parsing the input file \n") +json_data = fromJSON(file = INPUT_FILE_PATH) if (is.null(json_data$sep)){ cat("Using ',' as a default sep for cross file\n") json_data$sep = "," @@ -74,8 +78,6 @@ if (is.null(json_data$na.strings)){ cat("Using '-' and 'NA' as the default na.strings\n") json_data$na.strings = c("-" , "NA") } - -# use this better defaults default_keys = c( "geno_transposed", "founder_geno_transposed", "pheno_transposed" , "covar_transposed", @@ -88,6 +90,9 @@ if (!(item %in% names(json_data))){ } } + + + generate_cross_object <- function(control_file_path, json_data) { # function to write the cross object from a json data object return ( @@ -117,15 +122,13 @@ generate_cross_object <- function(control_file_path, json_data) { ) } -# generate the cross file - +# Step: generate the cross file +cat("Generating the cross object at ", control_file_path, "\n") generate_cross_object(control_file_path, json_data) -# read from the cross file path +cat("reading the cross object from", control_file_path, "\n") dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path - # check integrity of the cross - cat("Check the integrity of the cross object") check_cross2(dataset) if (check_cross2(dataset)) { @@ -134,6 +137,7 @@ if (check_cross2(dataset)) { print("Dataset does not meet required specifications") } + # Dataset Summarys cat("A Summary about the Dataset You Provided\n") summary(dataset) @@ -150,6 +154,7 @@ ind_ids_pheno(dataset) cat("Name of the founder Strains/n") founders(dataset) + # Function for computing the genetic probabilities perform_genetic_pr <- function(cross, cores = NO_OF_CORES, -- cgit 1.4.1 From effd9e79f36877979bdefa1ece1f1961345cdf5d Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 13:48:55 +0300 Subject: Refactor: add better message for computation steps. --- scripts/rqtl2_wrapper.R | 94 +++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index dc5955f..31bd2b4 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -174,7 +174,7 @@ perform_genetic_pr <- function(cross, #' @param step for default "1" #' @return a list of three-dimensional arrays of probabilities, individuals x genotypes x pst - cat("Finding the genetic Probabilities\n") + if (use_pseudomarkers){ cat("Using pseudo markers for genetic probabilites\n") map <- insert_pseudomarkers(cross$gmap, step=step) @@ -184,51 +184,45 @@ perform_genetic_pr <- function(cross, quiet=FALSE, cores=cores)) } +# Step: calculate the genetic probabilities +cat("Calculating the genetic probabilities\n") Pr = perform_genetic_pr(dataset) -cat("Summaries on the genetic probabilites \n") -print(Pr) -summary(Pr) - -# perform allele probabilites if cross ways -# convert this to lower +# Step: perform allele probabilites if cross ways if (dataset$crosstype == "4way"){ - # + cat("Calculating Allele Genetic probability for 4way cross\n") aPr <- genoprob_to_alleleprob(pr) } + + #Function to Calculate genotyping error LOD scores -cat("Calculate genotype error LOD scores\n") +cat("Calculating the genotype error LOD scores\n") error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = NO_OF_CORES) # combine into one matrix error_lod <- do.call("cbind", error_lod) -print(error_lod) -# Perform genome scan -# TODO! rework on this issue ## grab phenotypes and covariates; ensure that covariates have names attribute +# TODO rework on this +cat("Getting the phenotypes and covariates\n") pheno <- dataset$pheno + covar <- match(dataset$covar$sex, c("f", "m")) # make numeric -names(covar) <- rownames(dataset$covar) -Xcovar <- get_x_covar(dataset) -print(pheno) +if (!is.null(covar)){ + names(covar) <- rownames(dataset$covar) +} +print("The covariates are") print(covar) -print(Xcovar) - - - -# TODO: rework on fetching th Xcovar and getting the covar data - -# Function to perform scan1 - - -# refactor this to a function +Xcovar <- get_x_covar(dataset) +print("The Xcovar are ") +print(Xcovar) +# Function to calculate the kinship get_kinship <- function(probability, method="LMM"){ if (opt$method == "LMM"){ kinship = calc_kinship(genome_prob) @@ -240,6 +234,7 @@ if (opt$method == "LMM"){ } +cat("Calculating the kinship for the genetic probability\n") if (dataset$crosstype == "4way"){ kinship <- get_kinship(aPr, opt$method) } else { @@ -248,10 +243,10 @@ if (dataset$crosstype == "4way"){ - +# Function to perform genome scan perform_genome_scan <- function(cross, genome_prob, - method="HK", + method, addcovar = NULL, intcovar = NULL, kinship = NULL, @@ -313,6 +308,8 @@ perform_genome_scan <- function(cross, } # Perform the genome scan for the cross object + + if (dataset$crosstype == "4way"){ sex <- (DOex$covar$Sex == "male")*1 names(sex) <- rownames(dataset$covar) @@ -324,11 +321,7 @@ if (dataset$crosstype == "4way"){ kinship = kinship, method = SCAN_METHOD) } - - -scan_results - -# plot for the LOD scores from performing the genome scan +# function plot for the LOD scores from performing the genome scan generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { #' @description Plot LOD curves for a genome scan #' @param the cross object @@ -336,6 +329,7 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { #' @param the method used to compute the scan1 results HK,LMM or LOCO #' @param base_dir the path to write the generated plot #' @return a string with the file path for the plot + cat("Generting the lod plot for the LOD scores\n") color <- c("slateblue", "violetred", "green3") par(mar = c(4.1, 4.1, 1.6, 1.1)) ymx <- maxlod(scan_result) @@ -367,15 +361,14 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { lod_plot_path <- generate_lod_plot(dataset, scan_results, "HK", base_dir=opt$directory) -lod_plot_path - +cat("Generated the lod plot at ", lod_plot_path, "\n") -# perform permutation tests for single-QTL method +# function: perform permutation tests for single-QTL method perform_permutation_test <- function(cross, genome_prob, n_perm, - method = "HKK", + method = opt$method, covar = NULL, Xcovar = NULL, addcovar = NULL, @@ -400,7 +393,7 @@ perform_permutation_test <- function(cross, #' @param n_perm Number of permutation replicates. #' @param chr_lengths engths of the chromosomes; #' @return object of class "scan1perm". - cat("performing permutation test for the cross object\n") + cat("performing permutation test for the cross object with permutations", n_perm, "\n") return (scan1perm( genome_prob, cross$pheno, @@ -420,26 +413,28 @@ perform_permutation_test <- function(cross, # check if pstrata - if (!(is.null(opt$pstrata)) && (!is.null(Xcovar))){ perm_strata <- mat2strata(Xcovar) } else { perm_strata <- NULL } -perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION,perm_strata = perm_strata, method = "LMM") +# Step: Performing the permutation test +perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION,perm_strata = perm_strata, method = opt$method) + # get the permutation summary with a significance threshold get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){ - cat("Fetch the lod with significance thresholds ", threshold, "\n") + cat("Fetching the lod with significance thresholds as ", threshold, "\n") summary(perm, alpha = threshold) } lod_significance <- get_lod_significance(perm) -# get the lod peaks -# TODO fix issue when fetching the gmap or allow to use pseudomarkers +# step: get the lod peaks +# TODO fix the threshold here + cat("Fetching the lod peaks\n") lod_peaks = find_peaks( @@ -449,12 +444,8 @@ lod_peaks = find_peaks( cores = NO_OF_CORES ) -lod_peaks -cat("Generating the ouput data as vector\n") - -# get the estimated qtl effects -cat("Getting the estimated qtl effects\n") +# step: get the estimated qtl effects get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){ cat("Finding the qtl effect\n") chr_Pr <- geno_prob[,chromosome] @@ -475,9 +466,8 @@ get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){ # take the first phenotype in the dataset - # grab phenotypes and covariates; ensure that covariates have names attribute -# in this case we are using the first chromosome + pheno <- dataset$pheno[,1] if (!is.null(dataset$covar) && !is.null(dataset$covar$sex)){ covar <- match(dataset$covar$sex, c("f", "m")) # make numeric @@ -490,12 +480,12 @@ covar meffects <- c() meffects_plots <- c() - # TODO add plots for meffects for (chr in chr_names(dataset)){ cat("Getting the qtl effect for chromosome", chr, "\n") if (dataset$crosstype == "4way"){ coeff_results <- get_qtl_effect(chr, aPr, pheno, LOCO="LOCO", covar = sex) + cat("Generating the qtl effects plots\n") file_name = genRandomFileName(prefix = "RQTL_EFFECT_", file_ext = ".png") image_loc = file.path(base_dir , file_name) par(mar=c(4.1, 4.1, 0.6, 0.6)) @@ -516,7 +506,6 @@ for (chr in chr_names(dataset)){ meffects <- append(meffects, coeff_results) } - output = list(lod_peaks = lod_peaks, scan_results =scan_results, genetic_probabilities = Pr, @@ -525,13 +514,12 @@ output = list(lod_peaks = lod_peaks, lod_peaks = lod_peaks, chromosomes = chr_names(dataset), meffects = meffects, + error_lod = error_lod, meffects_plots = meffects_plots, lod_plot_path =lod_plot_path, scan_method = SCAN_METHOD ) - output_json_data <-toJSON(output) - cat("The output file path generated is", OUTPUT_FILE_PATH, "\n") cat("Writing to the output file\n") write(output_json_data, file=OUTPUT_FILE_PATH) -- cgit 1.4.1 From c568db6548e306ccf7e43ae188e749427c580642 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 14:22:47 +0300 Subject: Refactor: remove requirement for input and output file to be in the same directory. --- scripts/rqtl2_wrapper.R | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 31bd2b4..7b83284 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -10,24 +10,23 @@ library(optparse) option_list <- list( - make_option(c("-c", "--cores"), type="integer", default=1, help="No of cores to use while making - computation"), + make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."), make_option(c("-i", "--input_file"), action="store", default=NULL, type='character', help="a yaml or json file with required data to create the cross file"), -make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), - make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."), + make_option(c("-o", "--output_file"), action="store", default=NULL, type='character', help="a file path of where to write the output json results"), + make_option(c("-c", "--cores"), type="integer", default=1, help="No of cores to use while making + computation"), + make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)"), -make_option(c("-o", "--output_file"), action="store", default=NULL, type='character', help="a file name of where to write the output json results"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata") ) - opt <- parse_args(OptionParser(option_list=option_list)) NO_OF_CORES = opt$cores SCAN_METHOD = opt$method NO_OF_PERMUTATION = opt$nperm # Step: check for mandatory file paths -# NOTE this is the working dir should be the path for both INPUT and OUTPUT_FILE +# NOTE this is the working dir where the cross file will be generated # NOTE this is where the cross file is generated if(is.null(opt$directory) || !(dir.exists(opt$directory))){ @@ -35,17 +34,17 @@ if(is.null(opt$directory) || !(dir.exists(opt$directory))){ stop("The working directory does not exists or is NULL\n") } -INPUT_FILE_PATH = file.path(opt$directory, opt$input_file) -OUTPUT_FILE_PATH = file.path(opt$directory , opt$output_file) +INPUT_FILE_PATH = opt$input_file +OUTPUT_FILE_PATH = opt$output_file if (!(file.exists(INPUT_FILE_PATH))) { - stop("The input file", INPUT_FILE_PATH, " you provided does not exists the directory", opt$directory, "\n") + stop("The input file", INPUT_FILE_PATH, " you provided does not exists\n") } else { cat("Input file exists Reading the input file .... \n") } if (!(file.exists(OUTPUT_FILE_PATH))) { - stop("The output file ",OUTPUT_FILE_PATH, " you provided does not exists in the directory", opt$directory, "\n") + stop("The output file ",OUTPUT_FILE_PATH, " you provided does not exists\n") } else { cat("Output file exists ...", OUTPUT_FILE_PATH, "\n") } @@ -92,7 +91,7 @@ if (!(item %in% names(json_data))){ - +# Note the files below should be in the same directory as the location for the crosss file generate_cross_object <- function(control_file_path, json_data) { # function to write the cross object from a json data object return ( @@ -308,13 +307,11 @@ perform_genome_scan <- function(cross, } # Perform the genome scan for the cross object - - if (dataset$crosstype == "4way"){ sex <- (DOex$covar$Sex == "male")*1 names(sex) <- rownames(dataset$covar) sex <- setNames( (dataset$covar$Sex == "male")*1, rownames(DOex$covar)) - scan_results <- perform_genoeme_scan(aPr, dataset, kinship=kinship, method = "LOCO", addcovar = sex) + scan_results <- perform_genome_scan(aPr, dataset, kinship=kinship, method = "LOCO", addcovar = sex) } else { scan_results <- perform_genome_scan(cross = dataset, genome_prob = Pr, -- cgit 1.4.1 From dbc9bf9478d29a9fa1c8d2a807f15d0fca569846 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 14:36:55 +0300 Subject: feat: print_help when certain parameters are not provided. --- scripts/rqtl2_wrapper.R | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 7b83284..ee56d90 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -20,7 +20,8 @@ make_option(c("-i", "--input_file"), action="store", default=NULL, type='charact make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata") ) -opt <- parse_args(OptionParser(option_list=option_list)) +opt_parser = OptionParser(option_list=option_list); +opt <- parse_args(opt_parser) NO_OF_CORES = opt$cores SCAN_METHOD = opt$method NO_OF_PERMUTATION = opt$nperm @@ -36,14 +37,14 @@ stop("The working directory does not exists or is NULL\n") INPUT_FILE_PATH = opt$input_file OUTPUT_FILE_PATH = opt$output_file - if (!(file.exists(INPUT_FILE_PATH))) { + print_help(opt_parser) stop("The input file", INPUT_FILE_PATH, " you provided does not exists\n") } else { cat("Input file exists Reading the input file .... \n") - } if (!(file.exists(OUTPUT_FILE_PATH))) { + print_help(opt_parser) stop("The output file ",OUTPUT_FILE_PATH, " you provided does not exists\n") } else { cat("Output file exists ...", OUTPUT_FILE_PATH, "\n") @@ -57,9 +58,6 @@ genRandomFileName <- function(prefix, string_size = 9 , file_ext = ".txt") { } - - - # Step: Generate the control file name control_file_path <- file.path(opt$directory, genRandomFileName(prefix = "control", file_ext = ".json")) @@ -422,12 +420,11 @@ perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION,perm_st # get the permutation summary with a significance threshold get_lod_significance <- function(perm, threshold = c(0.2, 0.05)){ - cat("Fetching the lod with significance thresholds as ", threshold, "\n") - summary(perm, alpha = threshold) + cat("Getting the permutation summary with significance thresholds as ", threshold, "\n") + summary(perm, alpha = threshold) } -lod_significance <- get_lod_significance(perm) - +lod_significance <- get_lod_significance(perm) # step: get the lod peaks # TODO fix the threshold here @@ -472,7 +469,7 @@ if (!is.null(dataset$covar) && !is.null(dataset$covar$sex)){ } else { covar <- NULL } -covar + meffects <- c() @@ -499,8 +496,7 @@ for (chr in chr_names(dataset)){ meffects <- append(meffects_plots, image_loc) } else { coeff_results <- get_qtl_effect(chr, Pr, pheno) - } - + } meffects <- append(meffects, coeff_results) } output = list(lod_peaks = lod_peaks, -- cgit 1.4.1 From 172f8c30dadc5cb8517112939202c6ced94361c2 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 14:42:20 +0300 Subject: feat: add a threshold argument for lod score peak with default 1. --- scripts/rqtl2_wrapper.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index ee56d90..753a88d 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -17,7 +17,8 @@ make_option(c("-i", "--input_file"), action="store", default=NULL, type='charact computation"), make_option(c("-p", "--nperm"), type="integer", default= 1, action="store_true", help="No of permutations "), make_option(c("-m", "--method"), action = "store", default = "HK", type = "character", help="Scan Mapping Method - HK (Haley Knott), LMM( Linear Mixed Model ), LOCO (Leave one Chromosome Out)"), - make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata") + make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata"), + make_option(c("-t", "--threshold"), type="integer", default= 1, action="store_true", help="Minimum LOD score for a Peak") ) opt_parser = OptionParser(option_list=option_list); @@ -32,7 +33,8 @@ NO_OF_PERMUTATION = opt$nperm if(is.null(opt$directory) || !(dir.exists(opt$directory))){ # check if directory exists -stop("The working directory does not exists or is NULL\n") + print_help(opt_parser) + stop("The working directory does not exists or is NULL\n") } INPUT_FILE_PATH = opt$input_file @@ -430,10 +432,10 @@ lod_significance <- get_lod_significance(perm) # TODO fix the threshold here -cat("Fetching the lod peaks\n") +cat("Fetching the lod peaks with threshold", opt$threshold, "\n") lod_peaks = find_peaks( scan_results, - threshold =0, + threshold =opt$threshold, map = dataset$gmap, cores = NO_OF_CORES ) @@ -461,7 +463,6 @@ get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){ # take the first phenotype in the dataset # grab phenotypes and covariates; ensure that covariates have names attribute - pheno <- dataset$pheno[,1] if (!is.null(dataset$covar) && !is.null(dataset$covar$sex)){ covar <- match(dataset$covar$sex, c("f", "m")) # make numeric @@ -470,8 +471,6 @@ if (!is.null(dataset$covar) && !is.null(dataset$covar$sex)){ covar <- NULL } - - meffects <- c() meffects_plots <- c() # TODO add plots for meffects -- cgit 1.4.1 From 8dc26d3907374198bdea28cd7b0a0499cfb96557 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 14:58:40 +0300 Subject: refactor: rename dataset to cross. --- scripts/rqtl2_wrapper.R | 82 ++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 753a88d..5908934 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -126,32 +126,32 @@ cat("Generating the cross object at ", control_file_path, "\n") generate_cross_object(control_file_path, json_data) cat("reading the cross object from", control_file_path, "\n") -dataset <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path +cross <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path # check integrity of the cross cat("Check the integrity of the cross object") -check_cross2(dataset) -if (check_cross2(dataset)) { - print("Dataset meets required specifications for a cross") +check_cross2(cross) +if (check_cross2(cross)) { + print("Cross meets required specifications for a cross") } else { - print("Dataset does not meet required specifications") + print("Cross does not meet required specifications") } -# Dataset Summarys -cat("A Summary about the Dataset You Provided\n") -summary(dataset) -n_ind(dataset) -n_chr(dataset) +# Cross Summarys +cat("A Summary about the Cross You Provided\n") +summary(cross) +n_ind(cross) +n_chr(cross) cat("names of markers in the object\n") -marker_names(dataset) +marker_names(cross) cat("names of phenotypes in a the object") -pheno_names(dataset) -cat("IDs for all individuals in the dataset cross object that have genotype data\n") -ind_ids_geno(dataset) -cat(" IDs for all individuals in the dataset object that have phenotype data") -ind_ids_pheno(dataset) +pheno_names(cross) +cat("IDs for all individuals in the cross cross object that have genotype data\n") +ind_ids_geno(cross) +cat(" IDs for all individuals in the cross object that have phenotype data") +ind_ids_pheno(cross) cat("Name of the founder Strains/n") -founders(dataset) +founders(cross) # Function for computing the genetic probabilities @@ -185,11 +185,11 @@ perform_genetic_pr <- function(cross, # Step: calculate the genetic probabilities cat("Calculating the genetic probabilities\n") -Pr = perform_genetic_pr(dataset) +Pr = perform_genetic_pr(cross) # Step: perform allele probabilites if cross ways -if (dataset$crosstype == "4way"){ +if (cross$crosstype == "4way"){ cat("Calculating Allele Genetic probability for 4way cross\n") aPr <- genoprob_to_alleleprob(pr) } @@ -198,7 +198,7 @@ if (dataset$crosstype == "4way"){ #Function to Calculate genotyping error LOD scores cat("Calculating the genotype error LOD scores\n") -error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = NO_OF_CORES) +error_lod <- calc_errorlod(cross, Pr, quiet = FALSE, cores = NO_OF_CORES) # combine into one matrix error_lod <- do.call("cbind", error_lod) @@ -208,16 +208,16 @@ error_lod <- do.call("cbind", error_lod) ## grab phenotypes and covariates; ensure that covariates have names attribute # TODO rework on this cat("Getting the phenotypes and covariates\n") -pheno <- dataset$pheno +pheno <- cross$pheno -covar <- match(dataset$covar$sex, c("f", "m")) # make numeric +covar <- match(cross$covar$sex, c("f", "m")) # make numeric if (!is.null(covar)){ - names(covar) <- rownames(dataset$covar) + names(covar) <- rownames(cross$covar) } print("The covariates are") print(covar) -Xcovar <- get_x_covar(dataset) +Xcovar <- get_x_covar(cross) print("The Xcovar are ") print(Xcovar) @@ -234,7 +234,7 @@ if (opt$method == "LMM"){ cat("Calculating the kinship for the genetic probability\n") -if (dataset$crosstype == "4way"){ +if (cross$crosstype == "4way"){ kinship <- get_kinship(aPr, opt$method) } else { kinship <- get_kinship(Pr, "loco") @@ -307,13 +307,13 @@ perform_genome_scan <- function(cross, } # Perform the genome scan for the cross object -if (dataset$crosstype == "4way"){ +if (cross$crosstype == "4way"){ sex <- (DOex$covar$Sex == "male")*1 - names(sex) <- rownames(dataset$covar) - sex <- setNames( (dataset$covar$Sex == "male")*1, rownames(DOex$covar)) - scan_results <- perform_genome_scan(aPr, dataset, kinship=kinship, method = "LOCO", addcovar = sex) + names(sex) <- rownames(cross$covar) + sex <- setNames( (cross$covar$Sex == "male")*1, rownames(DOex$covar)) + scan_results <- perform_genome_scan(aPr, cross, kinship=kinship, method = "LOCO", addcovar = sex) } else { - scan_results <- perform_genome_scan(cross = dataset, + scan_results <- perform_genome_scan(cross = cross, genome_prob = Pr, kinship = kinship, method = SCAN_METHOD) @@ -357,7 +357,7 @@ generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") { } -lod_plot_path <- generate_lod_plot(dataset, scan_results, "HK", base_dir=opt$directory) +lod_plot_path <- generate_lod_plot(cross, scan_results, "HK", base_dir=opt$directory) cat("Generated the lod plot at ", lod_plot_path, "\n") @@ -417,7 +417,7 @@ perm_strata <- NULL } # Step: Performing the permutation test -perm <- perform_permutation_test(dataset, Pr, n_perm = NO_OF_PERMUTATION,perm_strata = perm_strata, method = opt$method) +perm <- perform_permutation_test(cross, Pr, n_perm = NO_OF_PERMUTATION,perm_strata = perm_strata, method = opt$method) # get the permutation summary with a significance threshold @@ -436,7 +436,7 @@ cat("Fetching the lod peaks with threshold", opt$threshold, "\n") lod_peaks = find_peaks( scan_results, threshold =opt$threshold, - map = dataset$gmap, + map = cross$gmap, cores = NO_OF_CORES ) @@ -461,12 +461,12 @@ get_qtl_effect <- function(chromosome,geno_prob,pheno,covar=NULL,LOCO= NULL){ -# take the first phenotype in the dataset +# take the first phenotype in the cross # grab phenotypes and covariates; ensure that covariates have names attribute -pheno <- dataset$pheno[,1] -if (!is.null(dataset$covar) && !is.null(dataset$covar$sex)){ - covar <- match(dataset$covar$sex, c("f", "m")) # make numeric - names(covar) <- rownames(dataset$covar) +pheno <- cross$pheno[,1] +if (!is.null(cross$covar) && !is.null(cross$covar$sex)){ + covar <- match(cross$covar$sex, c("f", "m")) # make numeric + names(covar) <- rownames(cross$covar) } else { covar <- NULL } @@ -474,9 +474,9 @@ covar <- NULL meffects <- c() meffects_plots <- c() # TODO add plots for meffects -for (chr in chr_names(dataset)){ +for (chr in chr_names(cross)){ cat("Getting the qtl effect for chromosome", chr, "\n") - if (dataset$crosstype == "4way"){ + if (cross$crosstype == "4way"){ coeff_results <- get_qtl_effect(chr, aPr, pheno, LOCO="LOCO", covar = sex) cat("Generating the qtl effects plots\n") file_name = genRandomFileName(prefix = "RQTL_EFFECT_", file_ext = ".png") @@ -504,7 +504,7 @@ output = list(lod_peaks = lod_peaks, lod_significance = lod_significance, permutation_results = perm, lod_peaks = lod_peaks, - chromosomes = chr_names(dataset), + chromosomes = chr_names(cross), meffects = meffects, error_lod = error_lod, meffects_plots = meffects_plots, -- cgit 1.4.1 From 9d0cda2c1dc386cdc79505b34dacbeb164afa765 Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 15:14:34 +0300 Subject: fix: fix issue fetching covar: --- scripts/rqtl2_wrapper.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 5908934..4355769 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -194,8 +194,6 @@ if (cross$crosstype == "4way"){ aPr <- genoprob_to_alleleprob(pr) } - - #Function to Calculate genotyping error LOD scores cat("Calculating the genotype error LOD scores\n") error_lod <- calc_errorlod(cross, Pr, quiet = FALSE, cores = NO_OF_CORES) @@ -211,12 +209,12 @@ cat("Getting the phenotypes and covariates\n") pheno <- cross$pheno covar <- match(cross$covar$sex, c("f", "m")) # make numeric + if (!is.null(covar)){ names(covar) <- rownames(cross$covar) } print("The covariates are") print(covar) - Xcovar <- get_x_covar(cross) print("The Xcovar are ") print(Xcovar) @@ -308,9 +306,9 @@ perform_genome_scan <- function(cross, # Perform the genome scan for the cross object if (cross$crosstype == "4way"){ - sex <- (DOex$covar$Sex == "male")*1 + sex <- (cross$covar$Sex == "male")*1 names(sex) <- rownames(cross$covar) - sex <- setNames( (cross$covar$Sex == "male")*1, rownames(DOex$covar)) + sex <- setNames( (cross$covar$Sex == "male")*1, rownames(cross$covar)) scan_results <- perform_genome_scan(aPr, cross, kinship=kinship, method = "LOCO", addcovar = sex) } else { scan_results <- perform_genome_scan(cross = cross, -- cgit 1.4.1 From c4f5f3dba5a22a625cba88246efa45c8bc4ddf2c Mon Sep 17 00:00:00 2001 From: Alexander_Kabui Date: Mon, 11 Nov 2024 16:41:18 +0300 Subject: feat: add step to insert pseudomarkers to the gmap. --- scripts/rqtl2_wrapper.R | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/scripts/rqtl2_wrapper.R b/scripts/rqtl2_wrapper.R index 4355769..1ff3e18 100644 --- a/scripts/rqtl2_wrapper.R +++ b/scripts/rqtl2_wrapper.R @@ -1,13 +1,12 @@ -# This script file contains the an implementation of qtl mapping using r-qtl2 -# For r-qtl1 implementation see: Scripts/rqtl_wrapper.R -# load the library +# This file contains an implementation of qtl mapping using r-qtl2 +# For r-qtl1 implementation see: ./rqtl_wrapper.R library(qtl2) library(rjson) - library(stringi) library(optparse) +# TODO add mapping function as a parameter: option_list <- list( make_option(c("-d", "--directory"), action = "store", default = NULL, type = "character", help="Temporary working directory: should also host the input file ."), @@ -61,6 +60,7 @@ genRandomFileName <- function(prefix, string_size = 9 , file_ext = ".txt") { # Step: Generate the control file name + control_file_path <- file.path(opt$directory, genRandomFileName(prefix = "control", file_ext = ".json")) cat("Generated the control file path at ", control_file_path, "\n") @@ -159,7 +159,6 @@ perform_genetic_pr <- function(cross, cores = NO_OF_CORES, step=1, map=NULL, - use_pseudomarkers=FALSE, map_function=c("haldane", "kosambi", "c-f", "morgan"), error_prob = 0.002 ) { @@ -173,19 +172,23 @@ perform_genetic_pr <- function(cross, #' @param step for default "1" #' @return a list of three-dimensional arrays of probabilities, individuals x genotypes x pst - - if (use_pseudomarkers){ - cat("Using pseudo markers for genetic probabilites\n") - map <- insert_pseudomarkers(cross$gmap, step=step) - } return(calc_genoprob(cross, map=map, error_prob=error_prob, map_function=map_function, quiet=FALSE, cores=cores)) } + +#Step: insert pseudomarkers to genetic map +# TODO need to review this to match rqtl1 +cat("Inserting pseudomarkers to the genetic map with steps", 1 , "and stepwidth" , "fixed\n") +MAP <- insert_pseudomarkers(cross$gmap, step= 1, stepwidth = "fixed", cores = NO_OF_CORES) + + # Step: calculate the genetic probabilities cat("Calculating the genetic probabilities\n") -Pr = perform_genetic_pr(cross) +Pr = perform_genetic_pr(cross, map = MAP) + + # Step: perform allele probabilites if cross ways @@ -207,14 +210,16 @@ error_lod <- do.call("cbind", error_lod) # TODO rework on this cat("Getting the phenotypes and covariates\n") pheno <- cross$pheno - covar <- match(cross$covar$sex, c("f", "m")) # make numeric +covar if (!is.null(covar)){ names(covar) <- rownames(cross$covar) + } print("The covariates are") -print(covar) +covar + Xcovar <- get_x_covar(cross) print("The Xcovar are ") print(Xcovar) -- cgit 1.4.1