From 238e3a1cfd4bc8677a530410af2767d28e93af62 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 22 Jul 2021 23:47:58 +0000 Subject: Added option for running pairscan to rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index f7e0406..98b1231 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -11,6 +11,7 @@ option_list = list( make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), -- cgit v1.2.3 From c3d59414b4bd23e52823a8b7e846f732e36d8c56 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 19:40:04 +0000 Subject: - Added scan_func function that determines whether scanone or scantwo (pairscan) is used - For pairscan default to using step 20 (subject to change, but some step is required during calc.genoprob to make it run fast enough) - Added some new verbose prints --- scripts/rqtl_wrapper.R | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 98b1231..77ed227 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=20) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -164,6 +167,7 @@ if (!is.null(opt$interval)) { # Pull covariates out of cross object, if they exist covars = vector(mode = "list", length = length(trait_names) - 1) if (!is.null(opt$addcovar)) { + verbose_print('Pulling covariates out of cross object\n') #If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { covar_names = trait_names[3:length(trait_names) - 1] @@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) { # Pull permutation strata out of cross object, if it is being used perm_strata = vector() if (!is.null(opt$pstrata)) { + verbose_print('Pulling permutation strata out of cross object\n') strata_col = trait_names[length(trait_names)] perm_strata <- pull.pheno(cross_object, strata_col) } # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { + verbose_print('Creating marker covariates and binding them to covariates vector\n') marker_covars = create_marker_covars(cross_object, opt$control) covars <- cbind(covars, marker_covars) } +if (!is.null(opt$pairscan)) { + scan_func <- function(...){ + scantwo(...) + } +} else { + scan_func <- function(...){ + scanone(...) + } +} + # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ @@ -197,18 +213,18 @@ if (opt$nperm > 0) { if (!is.null(opt$addcovar) || !is.null(opt$control)){ if (!is.null(opt$pstrata)) { verbose_print('Running permutations with cofactors and strata\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { verbose_print('Running permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) } } else { if (!is.null(opt$pstrata)) { verbose_print('Running permutations with strata\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { verbose_print('Running permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) } } write.csv(perm_results, perm_out_file) @@ -221,10 +237,11 @@ if (!is.null(opt$filename)){ } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running scanone with cofactors\n') - qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) + verbose_print('Running scan with cofactors\n') + qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) } else { - verbose_print('Running scanone\n') - qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) + verbose_print('Running scan\n') + qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } + write.csv(qtl_results, out_file) -- cgit v1.2.3 From b3e6105b5d8d2c2f30f2609290ef62fa17f32e62 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 20:08:36 +0000 Subject: Added line priting pair-scan results to CSV and changed the default step-size to 10cM for pair-scan --- scripts/rqtl_wrapper.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 77ed227..d8ecfaa 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -158,7 +158,7 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=20) + cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } -write.csv(qtl_results, out_file) +verbose_print('Writing results to CSV file\n') +if (!is.null(opt$pairscan)) { + write.csv(qtl_results[1], out_file) +} else { + write.csv(qtl_results, out_file) +} -- cgit v1.2.3 From a675e25d885f385c6ecd708f186b2ee448b97cd1 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ebb746c..ff76356 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..405b743 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 67f064606b9c8d4314701911dc4eedbaaa025cb0 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ff76356..b0405f4 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 85762647a0e54888bcc7cbd8a11d64cf22e07960 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 2 Aug 2021 21:07:52 +0000 Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan (since we need the list of markers/pseudomarkers and their positions) --- scripts/rqtl_wrapper.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index d8ecfaa..f9c2c3b 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -157,7 +157,7 @@ if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') + verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') @@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Writing results to CSV file\n') if (!is.null(opt$pairscan)) { + map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" )) write.csv(qtl_results[1], out_file) + write.csv(qtl_results[2], map_out_file) } else { write.csv(qtl_results, out_file) } -- cgit v1.2.3 From 58343f1005cb5a6aec47db22553313cf9fa17310 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 405b743..00e1db9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From e9110f840c8817092d0264990a4c7f6d16e966ba Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 22 Jul 2021 23:47:58 +0000 Subject: Added option for running pairscan to rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 7518175..db0b83c 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -11,6 +11,7 @@ option_list = list( make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), -- cgit v1.2.3 From 14d76308a779f6e4da4c9f4c17f971f5a0c938b0 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 19:40:04 +0000 Subject: - Added scan_func function that determines whether scanone or scantwo (pairscan) is used - For pairscan default to using step 20 (subject to change, but some step is required during calc.genoprob to make it run fast enough) - Added some new verbose prints --- scripts/rqtl_wrapper.R | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index db0b83c..2641271 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=20) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -164,6 +167,7 @@ if (!is.null(opt$interval)) { # Pull covariates out of cross object, if they exist covars = vector(mode = "list", length = length(trait_names) - 1) if (!is.null(opt$addcovar)) { + verbose_print('Pulling covariates out of cross object\n') #If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { covar_names = trait_names[3:length(trait_names) - 1] @@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) { # Pull permutation strata out of cross object, if it is being used perm_strata = vector() if (!is.null(opt$pstrata)) { + verbose_print('Pulling permutation strata out of cross object\n') strata_col = trait_names[length(trait_names)] perm_strata <- pull.pheno(cross_object, strata_col) } # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { + verbose_print('Creating marker covariates and binding them to covariates vector\n') marker_covars = create_marker_covars(cross_object, opt$control) covars <- cbind(covars, marker_covars) } +if (!is.null(opt$pairscan)) { + scan_func <- function(...){ + scantwo(...) + } +} else { + scan_func <- function(...){ + scanone(...) + } +} + # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ @@ -196,19 +212,19 @@ if (opt$nperm > 0) { if (!is.null(opt$addcovar) || !is.null(opt$control)){ if (!is.null(opt$pstrata)) { - verbose_print('Running ', opt$nperm, ' permutations with cofactors and strata\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + verbose_print('Running permutations with cofactors and strata\n') + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { - verbose_print('Running ', opt$nperm, ' permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + verbose_print('Running permutations with cofactors\n') + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) } } else { if (!is.null(opt$pstrata)) { - verbose_print('Running ', opt$nperm, ' permutations with strata\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + verbose_print('Running permutations with strata\n') + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { - verbose_print('Running ', opt$nperm, ' permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + verbose_print('Running permutations\n') + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) } } write.csv(perm_results, perm_out_file) @@ -221,10 +237,11 @@ if (!is.null(opt$filename)){ } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running scanone with cofactors\n') - qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) + verbose_print('Running scan with cofactors\n') + qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) } else { - verbose_print('Running scanone\n') - qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) + verbose_print('Running scan\n') + qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } + write.csv(qtl_results, out_file) -- cgit v1.2.3 From 91804b4d4f5f82ddad30b68691456885b39309c9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 20:08:36 +0000 Subject: Added line priting pair-scan results to CSV and changed the default step-size to 10cM for pair-scan --- scripts/rqtl_wrapper.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 2641271..ebd5a91 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -158,7 +158,7 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=20) + cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } -write.csv(qtl_results, out_file) +verbose_print('Writing results to CSV file\n') +if (!is.null(opt$pairscan)) { + write.csv(qtl_results[1], out_file) +} else { + write.csv(qtl_results, out_file) +} -- cgit v1.2.3 From 971c5f1342770b999c62e345a9ebf942a0410758 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ebb746c..ff76356 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..405b743 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 72ed7dfc45bb2a1303b900c8bc5b8ddd9e189a27 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ff76356..b0405f4 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 2cb18bb7916e029d3aa7779df710c1d0cbc910f8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 2 Aug 2021 21:07:52 +0000 Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan (since we need the list of markers/pseudomarkers and their positions) --- scripts/rqtl_wrapper.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index ebd5a91..ffff5b9 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -157,7 +157,7 @@ if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') + verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') @@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Writing results to CSV file\n') if (!is.null(opt$pairscan)) { + map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" )) write.csv(qtl_results[1], out_file) + write.csv(qtl_results[2], map_out_file) } else { write.csv(qtl_results, out_file) } -- cgit v1.2.3 From df3ec570f22ebb4809c9accc530795757d5afaad Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 405b743..00e1db9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 9f913b460cc7f72f1732195e6360c2d45b597d99 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 4 Aug 2021 19:41:47 +0000 Subject: Fixed a cople function calls to use the updated function names --- gn3/computations/rqtl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 00e1db9..45cf430 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -87,8 +87,8 @@ def process_rqtl_pairscan(file_name: str) -> List: """ - figure_data = pairscan_results_for_figure(file_name) - table_data = pairscan_results_for_table(file_name) + figure_data = pairscan_for_figure(file_name) + table_data = pairscan_for_table(file_name) return [figure_data, table_data] -- cgit v1.2.3 From 1408b107beb4c799a1d7afaf571f53d0d56305bf Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 11 Aug 2021 17:36:18 +0000 Subject: Removed quotes from beginning and end of chromosome string --- gn3/computations/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 45cf430..3b50c85 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -119,7 +119,7 @@ def pairscan_for_figure(file_name: str) -> Dict: if i == 0: # Skip first line continue line_items = [item.rstrip('\n') for item in line.split(",")] - chr_list.append(line_items[1]) + chr_list.append(line_items[1][1:-1]) pos_list.append(line_items[2]) figure_data['chr'] = chr_list figure_data['pos'] = pos_list -- cgit v1.2.3 From 9aeafe51e60989fb018d147163c4fb861ceeb256 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 26 Aug 2021 20:17:14 +0000 Subject: Added genofile name to inputs for processing R/qtl pair-scan results, since it's needed to store the proximal/distal markers for each position --- gn3/api/rqtl.py | 2 +- gn3/computations/rqtl.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index b0405f4..2ff0f4e 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -49,7 +49,7 @@ run the rqtl_wrapper script and return the results as JSON os.system(rqtl_cmd.get('rqtl_cmd')) if "pairscan" in boolean_kwargs: - rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 3b50c85..ea88c29 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -80,7 +80,7 @@ def process_rqtl_mapping(file_name: str) -> List: return marker_obs -def process_rqtl_pairscan(file_name: str) -> List: +def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of both the JSON needed for the d3panels figure and a list of results to be used when generating the results table (which will include marker names) @@ -88,7 +88,7 @@ def process_rqtl_pairscan(file_name: str) -> List: """ figure_data = pairscan_for_figure(file_name) - table_data = pairscan_for_table(file_name) + table_data = pairscan_for_table(file_name, geno_file) return [figure_data, table_data] @@ -127,7 +127,7 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data -def pairscan_for_table(file_name: str) -> List: +def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of results to be used when generating the results table (which will include marker names) -- cgit v1.2.3 From 23be50a195e648fa567777c4ab679d4c30cf6933 Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 28 Aug 2021 00:46:10 +0000 Subject: Fix issue that causes R/qtl to always run pair-scan even if pair-scan isn't selected --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 2ff0f4e..6548e1a 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -48,7 +48,7 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - if "pairscan" in boolean_kwargs: + if "pairscan" in rqtl_bool_kwargs: rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From 49a9150f2dafee19852d0e51ce089fdd54be7b5f Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:10 +0000 Subject: Add functions for getting proximal/distal markers for each pseudomarker position in pair-scan results + return only the sorted top 500 results --- gn3/computations/rqtl.py | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index ea88c29..d6fcd3f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,5 +1,6 @@ """Procedures related rqtl computations""" import os +from bisect import bisect from typing import Dict, List, Union import numpy as np @@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_list(geno_file) + # Open the file with the actual results and write the results as # they will be displayed in the results table with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] + proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] + proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) try: lod_score = f"{float(item):.3f}" except: lod_score = f"{item}" this_line = { - 'marker1': f"{marker_1['name']}", + 'proximal1': proximal1, + 'distal1': distal1, 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'marker2': f"{marker_2['name']}", + 'proximal2': proximal2, + 'distal2': distal2, 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" } - table_data.append(this_line) - return table_data + table_data.append(this_line) + + return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + +def build_marker_pos_list(genotype_file): + """ + Gets list of markers and their positions from .geno file + + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan + """ + + with open(genotype_file, "r") as infile: + contents = infile.readlines() + + # Get all lines after the metadata + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip().startswith("@")) and + (not line.strip() == ""))) + + header_items = lines[0].split("\t") + mb_exists = "Mb" in header_items + pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") + + the_markers = {} + for line in lines[1:]: # The lines with markers + line_items = line.split("\t") + this_chr = line_items[0] + if this_chr not in the_markers: + the_markers[this_chr] = {} + the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1] + + return the_markers + +def find_nearest_marker(the_chr, the_pos, marker_list): + """ + Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker + """ + + pos_list = [float(pos) for pos in marker_list[the_chr]] + + # Get the position of the pseudomarker in the list of markers for the chr + the_pos_index = bisect(pos_list, float(the_pos)) + + proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])] + distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])] + return proximal_marker, distal_marker def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 20f26af7a1af24b823ba1cf29d0e4393dabf9168 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:31 +0000 Subject: Add typing to some functions --- gn3/computations/rqtl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index d6fcd3f..4f2709c 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,7 +1,7 @@ """Procedures related rqtl computations""" import os from bisect import bisect -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import numpy as np @@ -150,7 +150,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) # Get the list of original markers from the .geno file - original_markers = build_marker_pos_list(geno_file) + original_markers = build_marker_pos_dict(geno_file) # Open the file with the actual results and write the results as # they will be displayed in the results table @@ -181,7 +181,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] -def build_marker_pos_list(genotype_file): +def build_marker_pos_dict(genotype_file: str) -> Dict: """ Gets list of markers and their positions from .geno file @@ -211,7 +211,7 @@ def build_marker_pos_list(genotype_file): return the_markers -def find_nearest_marker(the_chr, the_pos, marker_list): +def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: """ Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), return the nearest real marker @@ -227,7 +227,7 @@ def find_nearest_marker(the_chr, the_pos, marker_list): return proximal_marker, distal_marker -def process_perm_output(file_name: str): +def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate suggestive and significant thresholds -- cgit v1.2.3 From 976660ce9ef915426c7ce5ff9077b439e4102a2c Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:48:39 +0000 Subject: Tried to make the docstrings more consistent --- gn3/computations/rqtl.py | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 38c5000..45232e3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -16,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an -output filename generated from a hash of the genotype and phenotype files - - """ +output filename generated from a hash of the genotype and phenotype files""" # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( @@ -50,9 +48,7 @@ output filename generated from a hash of the genotype and phenotype files def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return - a List of marker objects - - """ + a List of marker objects""" marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] @@ -83,11 +79,8 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of both the JSON needed for the d3panels figure and a list of results - to be used when generating the results table (which will include marker names) - - """ - +a list of both the JSON needed for the d3panels figure and a list of results +to be used when generating the results table (which will include marker names)""" figure_data = pairscan_for_figure(file_name) table_data = pairscan_for_table(file_name, geno_file) @@ -95,9 +88,7 @@ def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: def pairscan_for_figure(file_name: str) -> Dict: """Given an output file name, read in R/qtl pair-scan results and return - the JSON needed for the d3panels figure - - """ + the JSON needed for the d3panels figure""" figure_data = {} # Open the file with the actual results, written as a list of lists @@ -129,9 +120,7 @@ def pairscan_for_figure(file_name: str) -> Dict: def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names) - - """ + a list of results to be used when generating the results table (which will include marker names)""" table_data = [] # Open the map file with the list of markers/pseudomarkers and create list of marker obs @@ -181,11 +170,9 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: - """ - Gets list of markers and their positions from .geno file + """Gets list of markers and their positions from .geno file - Basically a pared-down version of parse_genotype_file for R/qtl pair-scan - """ + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan""" with open(genotype_file, "r") as infile: contents = infile.readlines() @@ -211,10 +198,8 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: return the_markers def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: - """ - Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), - return the nearest real marker - """ + """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker""" pos_list = [float(pos) for pos in marker_list[the_chr]] @@ -228,9 +213,8 @@ def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[ def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate - suggestive and significant thresholds + suggestive and significant thresholds""" - """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: -- cgit v1.2.3 From 30d00f4db20ea9dce827fc4f4ac3d50221ad3823 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 19:15:55 +0000 Subject: Fixed mypy typing errors --- gn3/computations/rqtl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 45232e3..bb28dc3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -105,8 +105,8 @@ def pairscan_for_figure(file_name: str) -> Dict: # Open the map file with the list of markers/pseudomarkers and their positions with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "MAP_" + file_name), "r") as the_file: - chr_list = [] - pos_list = [] + chr_list = [] # type: List + pos_list = [] # type: List for i, line in enumerate(the_file): if i == 0: # Skip first line continue @@ -187,7 +187,7 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: mb_exists = "Mb" in header_items pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") - the_markers = {} + the_markers = {"1": {}} # type: Dict[str, Dict] for line in lines[1:]: # The lines with markers line_items = line.split("\t") this_chr = line_items[0] -- cgit v1.2.3 From 607c6e627c23c1bce3b199b145855182ab51b211 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 20:54:47 +0000 Subject: Fixes pylint errors --- gn3/api/rqtl.py | 3 +- gn3/computations/rqtl.py | 86 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 6548e1a..85b2460 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,8 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \ + process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index bb28dc3..e81aba3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -118,16 +118,19 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data -def pairscan_for_table(file_name: str, geno_file: str) -> List: - """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names)""" - table_data = [] - # Open the map file with the list of markers/pseudomarkers and create list of marker obs +def get_marker_list(map_file: str) -> List: + """ + Open the map file with the list of markers/pseudomarkers and create list of marker obs + + PARAMETERS: + map_file: The map file output by R/qtl containing marker/pseudomarker positions + """ + + marker_list = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", "MAP_" + file_name), "r") as the_file: - marker_list = [] - for i, line in enumerate(the_file.readlines()[1:]): + "output", map_file), "r") as map_fh: + for line in map_fh.readlines()[1:]: line_items = [item.rstrip('\n') for item in line.split(",")] this_marker = { 'name': line_items[0], @@ -137,37 +140,72 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) - # Get the list of original markers from the .geno file - original_markers = build_marker_pos_dict(geno_file) + return marker_list - # Open the file with the actual results and write the results as - # they will be displayed in the results table +def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List: + """ + Open the file with the actual R/qtl pair-scan results and write them as + they will be displayed in the results table + + PARAMETERS: + results_file: The filename containing R/qtl pair-scan results + marker_list: List of marker/pseudomarker names/positions from results + original_markers: Dict of markers from the .geno file, for finding proximal/distal + markers to each pseudomarker + """ + + table_data = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", file_name), "r") as the_file: + "output", results_file), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] - proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) + marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'], + marker_1['pos'], + original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] - proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) + marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'], + marker_2['pos'], + original_markers) try: lod_score = f"{float(item):.3f}" - except: + except ValueError: lod_score = f"{item}" - this_line = { - 'proximal1': proximal1, - 'distal1': distal1, + + table_data.append({ + 'proximal1': marker_1['proximal'], + 'distal1': marker_1['distal'], 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'proximal2': proximal2, - 'distal2': distal2, + 'proximal2': marker_2['proximal'], + 'distal2': marker_2['distal'], 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" - } + }) + + return table_data + +def pairscan_for_table(file_name: str, geno_file: str) -> List: + """ + Read in R/qtl pair-scan results and return as List representing + table row contents + + PARAMETERS: + file_name: The filename containing R/qtl pair-scan results + geno_file: Filename of the genotype file (used to get marker positions) + """ - table_data.append(this_line) + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + marker_list = get_marker_list("MAP_" + file_name) + + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_dict(geno_file) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + table_data = generate_table_rows(file_name, marker_list, original_markers) - return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: """Gets list of markers and their positions from .geno file -- cgit v1.2.3 From e13e10d0f0dc96518e1d5efa9865ac2821b1c3f9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 22 Jul 2021 23:47:58 +0000 Subject: Added option for running pairscan to rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 13c2684..4c96ff2 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -12,6 +12,7 @@ option_list = list( make_option(c("--covarstruct"), type="character", help="File detailing which covariates are categorical or numerical"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), -- cgit v1.2.3 From 2849792ddc2e76750421c75d926d5a7f18c57e97 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 19:40:04 +0000 Subject: - Added scan_func function that determines whether scanone or scantwo (pairscan) is used - For pairscan default to using step 20 (subject to change, but some step is required during calc.genoprob to make it run fast enough) - Added some new verbose prints --- scripts/rqtl_wrapper.R | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 4c96ff2..0d13ccb 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -175,6 +175,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=20) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -189,6 +192,7 @@ if (type == "4-way") { # Pull covariates out of cross object, if they exist covars <- c() # Holds the covariates which should be passed to R/qtl if (!is.null(opt$addcovar)) { + verbose_print('Pulling covariates out of cross object\n') # If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { covar_names = trait_names[2:(length(trait_names)-1)] @@ -224,16 +228,28 @@ if (!is.null(opt$addcovar)) { # Pull permutation strata out of cross object, if it is being used perm_strata = vector() if (!is.null(opt$pstrata)) { + verbose_print('Pulling permutation strata out of cross object\n') strata_col = trait_names[length(trait_names)] perm_strata <- pull.pheno(cross_object, strata_col) } # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { + verbose_print('Creating marker covariates and binding them to covariates vector\n') marker_covars = create_marker_covars(cross_object, opt$control) covars <- cbind(covars, marker_covars) } +if (!is.null(opt$pairscan)) { + scan_func <- function(...){ + scantwo(...) + } +} else { + scan_func <- function(...){ + scanone(...) + } +} + # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ @@ -244,19 +260,19 @@ if (opt$nperm > 0) { if (!is.null(opt$addcovar) || !is.null(opt$control)){ if (!is.null(opt$pstrata)) { - verbose_print('Running ', opt$nperm, ' permutations with cofactors and strata\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + verbose_print('Running permutations with cofactors and strata\n') + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { - verbose_print('Running ', opt$nperm, ' permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + verbose_print('Running permutations with cofactors\n') + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) } } else { if (!is.null(opt$pstrata)) { - verbose_print('Running ', opt$nperm, ' permutations with strata\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + verbose_print('Running permutations with strata\n') + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { - verbose_print('Running ', opt$nperm, ' permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + verbose_print('Running permutations\n') + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) } } write.csv(perm_results, perm_out_file) @@ -269,11 +285,11 @@ if (!is.null(opt$filename)){ } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running scanone with cofactors\n') - qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) + verbose_print('Running scan with cofactors\n') + qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) } else { - verbose_print('Running scanone\n') - qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) + verbose_print('Running scan\n') + qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } #QTL main effects on adjusted longevity -- cgit v1.2.3 From acfea0fc8793372723be0e64316002ed9bc2403b Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 20:08:36 +0000 Subject: Added line priting pair-scan results to CSV and changed the default step-size to 10cM for pair-scan --- scripts/rqtl_wrapper.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 0d13ccb..26d0a9c 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -177,7 +177,7 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=20) + cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -338,4 +338,9 @@ if (type == "4-way") { colnames(qtl_results)[4:7] <- c("AC", "AD", "BC", "BD") } -write.csv(qtl_results, out_file) +verbose_print('Writing results to CSV file\n') +if (!is.null(opt$pairscan)) { + write.csv(qtl_results[1], out_file) +} else { + write.csv(qtl_results, out_file) +} -- cgit v1.2.3 From 9a8ab0ea1bbfc82c79cb8183c37200a09ab27d9c Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index b5627c5..ed01a97 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index b3539a9..b9e715a 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 444095309a8841a0998e74f2f4eba2c3fea6890f Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ed01a97..abce705 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 4c3359082fbb1eef93a9e54c633f938606cba5f6 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 2 Aug 2021 21:07:52 +0000 Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan (since we need the list of markers/pseudomarkers and their positions) --- scripts/rqtl_wrapper.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 26d0a9c..fb12012 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -176,7 +176,7 @@ if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') + verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') @@ -340,7 +340,9 @@ if (type == "4-way") { verbose_print('Writing results to CSV file\n') if (!is.null(opt$pairscan)) { + map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" )) write.csv(qtl_results[1], out_file) + write.csv(qtl_results[2], map_out_file) } else { write.csv(qtl_results, out_file) } -- cgit v1.2.3 From 019f196f21cbfd7bb3d91838313aad1d29ea98cb Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index b9e715a..87c9ae8 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From f494a9462ddb22c117b16be9b4191564b3191c01 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 4 Aug 2021 19:41:47 +0000 Subject: Fixed a cople function calls to use the updated function names --- gn3/computations/rqtl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 87c9ae8..092fac6 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -87,8 +87,8 @@ def process_rqtl_pairscan(file_name: str) -> List: """ - figure_data = pairscan_results_for_figure(file_name) - table_data = pairscan_results_for_table(file_name) + figure_data = pairscan_for_figure(file_name) + table_data = pairscan_for_table(file_name) return [figure_data, table_data] -- cgit v1.2.3 From 35f0784ee32ccc76416d184ac65e8cad6f7aa490 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 11 Aug 2021 17:36:18 +0000 Subject: Removed quotes from beginning and end of chromosome string --- gn3/computations/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 092fac6..36c9fa2 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -119,7 +119,7 @@ def pairscan_for_figure(file_name: str) -> Dict: if i == 0: # Skip first line continue line_items = [item.rstrip('\n') for item in line.split(",")] - chr_list.append(line_items[1]) + chr_list.append(line_items[1][1:-1]) pos_list.append(line_items[2]) figure_data['chr'] = chr_list figure_data['pos'] = pos_list -- cgit v1.2.3 From 40e85090b60ef002723b614fd712b6affbd68176 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 26 Aug 2021 20:17:14 +0000 Subject: Added genofile name to inputs for processing R/qtl pair-scan results, since it's needed to store the proximal/distal markers for each position --- gn3/api/rqtl.py | 2 +- gn3/computations/rqtl.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index abce705..9aab9fa 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -49,7 +49,7 @@ run the rqtl_wrapper script and return the results as JSON os.system(rqtl_cmd.get('rqtl_cmd')) if "pairscan" in boolean_kwargs: - rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 36c9fa2..b05a577 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -80,7 +80,7 @@ def process_rqtl_mapping(file_name: str) -> List: return marker_obs -def process_rqtl_pairscan(file_name: str) -> List: +def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of both the JSON needed for the d3panels figure and a list of results to be used when generating the results table (which will include marker names) @@ -88,7 +88,7 @@ def process_rqtl_pairscan(file_name: str) -> List: """ figure_data = pairscan_for_figure(file_name) - table_data = pairscan_for_table(file_name) + table_data = pairscan_for_table(file_name, geno_file) return [figure_data, table_data] @@ -127,7 +127,7 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data -def pairscan_for_table(file_name: str) -> List: +def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of results to be used when generating the results table (which will include marker names) -- cgit v1.2.3 From 64aedf4202efc4e50648d496d2a8eed3fc828b2f Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 28 Aug 2021 00:46:10 +0000 Subject: Fix issue that causes R/qtl to always run pair-scan even if pair-scan isn't selected --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 9aab9fa..b6c04eb 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -48,7 +48,7 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - if "pairscan" in boolean_kwargs: + if "pairscan" in rqtl_bool_kwargs: rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From a4a2886c16b578dd4f91520bf3fa9a1e9d801edb Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:10 +0000 Subject: Add functions for getting proximal/distal markers for each pseudomarker position in pair-scan results + return only the sorted top 500 results --- gn3/computations/rqtl.py | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index b05a577..51ac3c7 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,5 +1,6 @@ """Procedures related rqtl computations""" import os +from bisect import bisect from typing import Dict, List, Union import numpy as np @@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_list(geno_file) + # Open the file with the actual results and write the results as # they will be displayed in the results table with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] + proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] + proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) try: lod_score = f"{float(item):.3f}" except: lod_score = f"{item}" this_line = { - 'marker1': f"{marker_1['name']}", + 'proximal1': proximal1, + 'distal1': distal1, 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'marker2': f"{marker_2['name']}", + 'proximal2': proximal2, + 'distal2': distal2, 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" } - table_data.append(this_line) - return table_data + table_data.append(this_line) + + return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + +def build_marker_pos_list(genotype_file): + """ + Gets list of markers and their positions from .geno file + + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan + """ + + with open(genotype_file, "r") as infile: + contents = infile.readlines() + + # Get all lines after the metadata + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip().startswith("@")) and + (not line.strip() == ""))) + + header_items = lines[0].split("\t") + mb_exists = "Mb" in header_items + pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") + + the_markers = {} + for line in lines[1:]: # The lines with markers + line_items = line.split("\t") + this_chr = line_items[0] + if this_chr not in the_markers: + the_markers[this_chr] = {} + the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1] + + return the_markers + +def find_nearest_marker(the_chr, the_pos, marker_list): + """ + Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker + """ + + pos_list = [float(pos) for pos in marker_list[the_chr]] + + # Get the position of the pseudomarker in the list of markers for the chr + the_pos_index = bisect(pos_list, float(the_pos)) + + proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])] + distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])] + return proximal_marker, distal_marker def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From ea15c3f0ed8fe6682af1ceb45e1fc962d09d2e7e Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:31 +0000 Subject: Add typing to some functions --- gn3/computations/rqtl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 51ac3c7..cfa58a8 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,7 +1,7 @@ """Procedures related rqtl computations""" import os from bisect import bisect -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import numpy as np @@ -150,7 +150,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) # Get the list of original markers from the .geno file - original_markers = build_marker_pos_list(geno_file) + original_markers = build_marker_pos_dict(geno_file) # Open the file with the actual results and write the results as # they will be displayed in the results table @@ -181,7 +181,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] -def build_marker_pos_list(genotype_file): +def build_marker_pos_dict(genotype_file: str) -> Dict: """ Gets list of markers and their positions from .geno file @@ -211,7 +211,7 @@ def build_marker_pos_list(genotype_file): return the_markers -def find_nearest_marker(the_chr, the_pos, marker_list): +def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: """ Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), return the nearest real marker @@ -227,7 +227,7 @@ def find_nearest_marker(the_chr, the_pos, marker_list): return proximal_marker, distal_marker -def process_perm_output(file_name: str): +def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate suggestive and significant thresholds -- cgit v1.2.3 From 14cadeb87e2a98300c7d47cf51d19c2e3508affe Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:48:39 +0000 Subject: Tried to make the docstrings more consistent --- gn3/computations/rqtl.py | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index cfa58a8..8d066ef 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -16,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an -output filename generated from a hash of the genotype and phenotype files - - """ +output filename generated from a hash of the genotype and phenotype files""" # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( @@ -50,9 +48,7 @@ output filename generated from a hash of the genotype and phenotype files def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return - a List of marker objects - - """ + a List of marker objects""" marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] @@ -83,11 +79,8 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of both the JSON needed for the d3panels figure and a list of results - to be used when generating the results table (which will include marker names) - - """ - +a list of both the JSON needed for the d3panels figure and a list of results +to be used when generating the results table (which will include marker names)""" figure_data = pairscan_for_figure(file_name) table_data = pairscan_for_table(file_name, geno_file) @@ -95,9 +88,7 @@ def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: def pairscan_for_figure(file_name: str) -> Dict: """Given an output file name, read in R/qtl pair-scan results and return - the JSON needed for the d3panels figure - - """ + the JSON needed for the d3panels figure""" figure_data = {} # Open the file with the actual results, written as a list of lists @@ -130,9 +121,7 @@ def pairscan_for_figure(file_name: str) -> Dict: def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names) - - """ + a list of results to be used when generating the results table (which will include marker names)""" table_data = [] # Open the map file with the list of markers/pseudomarkers and create list of marker obs @@ -182,11 +171,9 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: - """ - Gets list of markers and their positions from .geno file + """Gets list of markers and their positions from .geno file - Basically a pared-down version of parse_genotype_file for R/qtl pair-scan - """ + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan""" with open(genotype_file, "r") as infile: contents = infile.readlines() @@ -212,10 +199,8 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: return the_markers def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: - """ - Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), - return the nearest real marker - """ + """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker""" pos_list = [float(pos) for pos in marker_list[the_chr]] @@ -229,9 +214,8 @@ def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[ def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate - suggestive and significant thresholds + suggestive and significant thresholds""" - """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r", encoding="utf-8") as the_file: -- cgit v1.2.3 From d7f9cd14089a23a3d3bab8b54320a9ef4844e560 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 19:15:55 +0000 Subject: Fixed mypy typing errors --- gn3/computations/rqtl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 8d066ef..4327cac 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -105,8 +105,8 @@ def pairscan_for_figure(file_name: str) -> Dict: # Open the map file with the list of markers/pseudomarkers and their positions with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "MAP_" + file_name), "r") as the_file: - chr_list = [] - pos_list = [] + chr_list = [] # type: List + pos_list = [] # type: List for i, line in enumerate(the_file): if i == 0: # Skip first line continue @@ -188,7 +188,7 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: mb_exists = "Mb" in header_items pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") - the_markers = {} + the_markers = {"1": {}} # type: Dict[str, Dict] for line in lines[1:]: # The lines with markers line_items = line.split("\t") this_chr = line_items[0] -- cgit v1.2.3 From 47c38fe08f27a7de58430c2f7f5635a9ba5836c8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 20:54:47 +0000 Subject: Fixes pylint errors --- gn3/api/rqtl.py | 3 +- gn3/computations/rqtl.py | 84 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index b6c04eb..70ebe12 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,8 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \ + process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 4327cac..65ee6de 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -118,17 +118,18 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data +def get_marker_list(map_file: str) -> List: + """ + Open the map file with the list of markers/pseudomarkers and create list of marker obs -def pairscan_for_table(file_name: str, geno_file: str) -> List: - """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names)""" - table_data = [] + PARAMETERS: + map_file: The map file output by R/qtl containing marker/pseudomarker positions + """ - # Open the map file with the list of markers/pseudomarkers and create list of marker obs + marker_list = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", "MAP_" + file_name), "r") as the_file: - marker_list = [] - for i, line in enumerate(the_file.readlines()[1:]): + "output", map_file), "r") as map_fh: + for line in map_fh.readlines()[1:]: line_items = [item.rstrip('\n') for item in line.split(",")] this_marker = { 'name': line_items[0], @@ -138,37 +139,72 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) - # Get the list of original markers from the .geno file - original_markers = build_marker_pos_dict(geno_file) + return marker_list - # Open the file with the actual results and write the results as - # they will be displayed in the results table +def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List: + """ + Open the file with the actual R/qtl pair-scan results and write them as + they will be displayed in the results table + + PARAMETERS: + results_file: The filename containing R/qtl pair-scan results + marker_list: List of marker/pseudomarker names/positions from results + original_markers: Dict of markers from the .geno file, for finding proximal/distal + markers to each pseudomarker + """ + + table_data = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", file_name), "r") as the_file: + "output", results_file), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] - proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) + marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'], + marker_1['pos'], + original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] - proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) + marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'], + marker_2['pos'], + original_markers) try: lod_score = f"{float(item):.3f}" - except: + except ValueError: lod_score = f"{item}" - this_line = { - 'proximal1': proximal1, - 'distal1': distal1, + + table_data.append({ + 'proximal1': marker_1['proximal'], + 'distal1': marker_1['distal'], 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'proximal2': proximal2, - 'distal2': distal2, + 'proximal2': marker_2['proximal'], + 'distal2': marker_2['distal'], 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" - } + }) - table_data.append(this_line) + return table_data + +def pairscan_for_table(file_name: str, geno_file: str) -> List: + """ + Read in R/qtl pair-scan results and return as List representing + table row contents + + PARAMETERS: + file_name: The filename containing R/qtl pair-scan results + geno_file: Filename of the genotype file (used to get marker positions) + """ + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + marker_list = get_marker_list("MAP_" + file_name) + + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_dict(geno_file) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + table_data = generate_table_rows(file_name, marker_list, original_markers) - return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: """Gets list of markers and their positions from .geno file -- cgit v1.2.3 From 0e6990940e22eb0431d6a27e45d29bc04d8ad582 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 22 Mar 2022 20:28:58 +0000 Subject: Change order of if statements for running genoprob command Now it checks for pairscan first, just in case interval ends up being passed (which is an irrelevant parameter for pairscan) Also added a couple more verbose prints --- scripts/rqtl_wrapper.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index fb12012..5ecd774 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -172,12 +172,12 @@ verbose_print('Generating cross object\n') cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type) # Calculate genotype probabilities -if (!is.null(opt$interval)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") -} else if (!is.null(opt$pairscan)) { +if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) +} else if (!is.null(opt$interval)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -241,10 +241,12 @@ if (!is.null(opt$control)) { } if (!is.null(opt$pairscan)) { + verbose_print("Running scantwo") scan_func <- function(...){ scantwo(...) } } else { + verbose_print("Running scanone") scan_func <- function(...){ scanone(...) } -- cgit v1.2.3