From 238e3a1cfd4bc8677a530410af2767d28e93af62 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 22 Jul 2021 23:47:58 +0000 Subject: Added option for running pairscan to rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index f7e0406..98b1231 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -11,6 +11,7 @@ option_list = list( make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), -- cgit v1.2.3 From c3d59414b4bd23e52823a8b7e846f732e36d8c56 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 19:40:04 +0000 Subject: - Added scan_func function that determines whether scanone or scantwo (pairscan) is used - For pairscan default to using step 20 (subject to change, but some step is required during calc.genoprob to make it run fast enough) - Added some new verbose prints --- scripts/rqtl_wrapper.R | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 98b1231..77ed227 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=20) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -164,6 +167,7 @@ if (!is.null(opt$interval)) { # Pull covariates out of cross object, if they exist covars = vector(mode = "list", length = length(trait_names) - 1) if (!is.null(opt$addcovar)) { + verbose_print('Pulling covariates out of cross object\n') #If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { covar_names = trait_names[3:length(trait_names) - 1] @@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) { # Pull permutation strata out of cross object, if it is being used perm_strata = vector() if (!is.null(opt$pstrata)) { + verbose_print('Pulling permutation strata out of cross object\n') strata_col = trait_names[length(trait_names)] perm_strata <- pull.pheno(cross_object, strata_col) } # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { + verbose_print('Creating marker covariates and binding them to covariates vector\n') marker_covars = create_marker_covars(cross_object, opt$control) covars <- cbind(covars, marker_covars) } +if (!is.null(opt$pairscan)) { + scan_func <- function(...){ + scantwo(...) + } +} else { + scan_func <- function(...){ + scanone(...) + } +} + # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ @@ -197,18 +213,18 @@ if (opt$nperm > 0) { if (!is.null(opt$addcovar) || !is.null(opt$control)){ if (!is.null(opt$pstrata)) { verbose_print('Running permutations with cofactors and strata\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { verbose_print('Running permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) } } else { if (!is.null(opt$pstrata)) { verbose_print('Running permutations with strata\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { verbose_print('Running permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) } } write.csv(perm_results, perm_out_file) @@ -221,10 +237,11 @@ if (!is.null(opt$filename)){ } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running scanone with cofactors\n') - qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) + verbose_print('Running scan with cofactors\n') + qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) } else { - verbose_print('Running scanone\n') - qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) + verbose_print('Running scan\n') + qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } + write.csv(qtl_results, out_file) -- cgit v1.2.3 From b3e6105b5d8d2c2f30f2609290ef62fa17f32e62 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 20:08:36 +0000 Subject: Added line priting pair-scan results to CSV and changed the default step-size to 10cM for pair-scan --- scripts/rqtl_wrapper.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 77ed227..d8ecfaa 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -158,7 +158,7 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=20) + cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } -write.csv(qtl_results, out_file) +verbose_print('Writing results to CSV file\n') +if (!is.null(opt$pairscan)) { + write.csv(qtl_results[1], out_file) +} else { + write.csv(qtl_results, out_file) +} -- cgit v1.2.3 From a675e25d885f385c6ecd708f186b2ee448b97cd1 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ebb746c..ff76356 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..405b743 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 67f064606b9c8d4314701911dc4eedbaaa025cb0 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ff76356..b0405f4 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 85762647a0e54888bcc7cbd8a11d64cf22e07960 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 2 Aug 2021 21:07:52 +0000 Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan (since we need the list of markers/pseudomarkers and their positions) --- scripts/rqtl_wrapper.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index d8ecfaa..f9c2c3b 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -157,7 +157,7 @@ if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') + verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') @@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Writing results to CSV file\n') if (!is.null(opt$pairscan)) { + map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" )) write.csv(qtl_results[1], out_file) + write.csv(qtl_results[2], map_out_file) } else { write.csv(qtl_results, out_file) } -- cgit v1.2.3 From 58343f1005cb5a6aec47db22553313cf9fa17310 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 405b743..00e1db9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3