From 238e3a1cfd4bc8677a530410af2767d28e93af62 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 22 Jul 2021 23:47:58 +0000 Subject: Added option for running pairscan to rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index f7e0406..98b1231 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -11,6 +11,7 @@ option_list = list( make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), -- cgit v1.2.3 From c3d59414b4bd23e52823a8b7e846f732e36d8c56 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 19:40:04 +0000 Subject: - Added scan_func function that determines whether scanone or scantwo (pairscan) is used - For pairscan default to using step 20 (subject to change, but some step is required during calc.genoprob to make it run fast enough) - Added some new verbose prints --- scripts/rqtl_wrapper.R | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 98b1231..77ed227 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=20) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -164,6 +167,7 @@ if (!is.null(opt$interval)) { # Pull covariates out of cross object, if they exist covars = vector(mode = "list", length = length(trait_names) - 1) if (!is.null(opt$addcovar)) { + verbose_print('Pulling covariates out of cross object\n') #If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { covar_names = trait_names[3:length(trait_names) - 1] @@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) { # Pull permutation strata out of cross object, if it is being used perm_strata = vector() if (!is.null(opt$pstrata)) { + verbose_print('Pulling permutation strata out of cross object\n') strata_col = trait_names[length(trait_names)] perm_strata <- pull.pheno(cross_object, strata_col) } # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { + verbose_print('Creating marker covariates and binding them to covariates vector\n') marker_covars = create_marker_covars(cross_object, opt$control) covars <- cbind(covars, marker_covars) } +if (!is.null(opt$pairscan)) { + scan_func <- function(...){ + scantwo(...) + } +} else { + scan_func <- function(...){ + scanone(...) + } +} + # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ @@ -197,18 +213,18 @@ if (opt$nperm > 0) { if (!is.null(opt$addcovar) || !is.null(opt$control)){ if (!is.null(opt$pstrata)) { verbose_print('Running permutations with cofactors and strata\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { verbose_print('Running permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) } } else { if (!is.null(opt$pstrata)) { verbose_print('Running permutations with strata\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { verbose_print('Running permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) } } write.csv(perm_results, perm_out_file) @@ -221,10 +237,11 @@ if (!is.null(opt$filename)){ } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running scanone with cofactors\n') - qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) + verbose_print('Running scan with cofactors\n') + qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) } else { - verbose_print('Running scanone\n') - qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) + verbose_print('Running scan\n') + qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } + write.csv(qtl_results, out_file) -- cgit v1.2.3 From b3e6105b5d8d2c2f30f2609290ef62fa17f32e62 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 20:08:36 +0000 Subject: Added line priting pair-scan results to CSV and changed the default step-size to 10cM for pair-scan --- scripts/rqtl_wrapper.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 77ed227..d8ecfaa 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -158,7 +158,7 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=20) + cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } -write.csv(qtl_results, out_file) +verbose_print('Writing results to CSV file\n') +if (!is.null(opt$pairscan)) { + write.csv(qtl_results[1], out_file) +} else { + write.csv(qtl_results, out_file) +} -- cgit v1.2.3 From a675e25d885f385c6ecd708f186b2ee448b97cd1 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ebb746c..ff76356 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..405b743 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 67f064606b9c8d4314701911dc4eedbaaa025cb0 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ff76356..b0405f4 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 85762647a0e54888bcc7cbd8a11d64cf22e07960 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 2 Aug 2021 21:07:52 +0000 Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan (since we need the list of markers/pseudomarkers and their positions) --- scripts/rqtl_wrapper.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index d8ecfaa..f9c2c3b 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -157,7 +157,7 @@ if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') + verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') @@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Writing results to CSV file\n') if (!is.null(opt$pairscan)) { + map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" )) write.csv(qtl_results[1], out_file) + write.csv(qtl_results[2], map_out_file) } else { write.csv(qtl_results, out_file) } -- cgit v1.2.3 From 58343f1005cb5a6aec47db22553313cf9fa17310 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 405b743..00e1db9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From e9110f840c8817092d0264990a4c7f6d16e966ba Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 22 Jul 2021 23:47:58 +0000 Subject: Added option for running pairscan to rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 7518175..db0b83c 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -11,6 +11,7 @@ option_list = list( make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), + make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"), make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"), -- cgit v1.2.3 From 14d76308a779f6e4da4c9f4c17f971f5a0c938b0 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 19:40:04 +0000 Subject: - Added scan_func function that determines whether scanone or scantwo (pairscan) is used - For pairscan default to using step 20 (subject to change, but some step is required during calc.genoprob to make it run fast enough) - Added some new verbose prints --- scripts/rqtl_wrapper.R | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index db0b83c..2641271 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") +} else if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities with interval mapping\n') + cross_object <- calc.genoprob(cross_object, step=20) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -164,6 +167,7 @@ if (!is.null(opt$interval)) { # Pull covariates out of cross object, if they exist covars = vector(mode = "list", length = length(trait_names) - 1) if (!is.null(opt$addcovar)) { + verbose_print('Pulling covariates out of cross object\n') #If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { covar_names = trait_names[3:length(trait_names) - 1] @@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) { # Pull permutation strata out of cross object, if it is being used perm_strata = vector() if (!is.null(opt$pstrata)) { + verbose_print('Pulling permutation strata out of cross object\n') strata_col = trait_names[length(trait_names)] perm_strata <- pull.pheno(cross_object, strata_col) } # If a marker name is supplied as covariate, get its vector of values and add them as a covariate if (!is.null(opt$control)) { + verbose_print('Creating marker covariates and binding them to covariates vector\n') marker_covars = create_marker_covars(cross_object, opt$control) covars <- cbind(covars, marker_covars) } +if (!is.null(opt$pairscan)) { + scan_func <- function(...){ + scantwo(...) + } +} else { + scan_func <- function(...){ + scanone(...) + } +} + # Calculate permutations if (opt$nperm > 0) { if (!is.null(opt$filename)){ @@ -196,19 +212,19 @@ if (opt$nperm > 0) { if (!is.null(opt$addcovar) || !is.null(opt$control)){ if (!is.null(opt$pstrata)) { - verbose_print('Running ', opt$nperm, ' permutations with cofactors and strata\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + verbose_print('Running permutations with cofactors and strata\n') + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { - verbose_print('Running ', opt$nperm, ' permutations with cofactors\n') - perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) + verbose_print('Running permutations with cofactors\n') + perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method) } } else { if (!is.null(opt$pstrata)) { - verbose_print('Running ', opt$nperm, ' permutations with strata\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) + verbose_print('Running permutations with strata\n') + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method) } else { - verbose_print('Running ', opt$nperm, ' permutations\n') - perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) + verbose_print('Running permutations\n') + perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method) } } write.csv(perm_results, perm_out_file) @@ -221,10 +237,11 @@ if (!is.null(opt$filename)){ } if (!is.null(opt$addcovar) || !is.null(opt$control)){ - verbose_print('Running scanone with cofactors\n') - qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) + verbose_print('Running scan with cofactors\n') + qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method) } else { - verbose_print('Running scanone\n') - qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) + verbose_print('Running scan\n') + qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } + write.csv(qtl_results, out_file) -- cgit v1.2.3 From 91804b4d4f5f82ddad30b68691456885b39309c9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 23 Jul 2021 20:08:36 +0000 Subject: Added line priting pair-scan results to CSV and changed the default step-size to 10cM for pair-scan --- scripts/rqtl_wrapper.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 2641271..ebd5a91 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -158,7 +158,7 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { verbose_print('Calculating genotype probabilities with interval mapping\n') - cross_object <- calc.genoprob(cross_object, step=20) + cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') cross_object <- calc.genoprob(cross_object) @@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method) } -write.csv(qtl_results, out_file) +verbose_print('Writing results to CSV file\n') +if (!is.null(opt$pairscan)) { + write.csv(qtl_results[1], out_file) +} else { + write.csv(qtl_results, out_file) +} -- cgit v1.2.3 From 971c5f1342770b999c62e345a9ebf942a0410758 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ebb746c..ff76356 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..405b743 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 72ed7dfc45bb2a1303b900c8bc5b8ddd9e189a27 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ff76356..b0405f4 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 2cb18bb7916e029d3aa7779df710c1d0cbc910f8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 2 Aug 2021 21:07:52 +0000 Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan (since we need the list of markers/pseudomarkers and their positions) --- scripts/rqtl_wrapper.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index ebd5a91..ffff5b9 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -157,7 +157,7 @@ if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { - verbose_print('Calculating genotype probabilities with interval mapping\n') + verbose_print('Calculating genotype probabilities for pair-scan\n') cross_object <- calc.genoprob(cross_object, step=10) } else { verbose_print('Calculating genotype probabilities\n') @@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Writing results to CSV file\n') if (!is.null(opt$pairscan)) { + map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" )) write.csv(qtl_results[1], out_file) + write.csv(qtl_results[2], map_out_file) } else { write.csv(qtl_results, out_file) } -- cgit v1.2.3 From df3ec570f22ebb4809c9accc530795757d5afaad Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 405b743..00e1db9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 9f913b460cc7f72f1732195e6360c2d45b597d99 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 4 Aug 2021 19:41:47 +0000 Subject: Fixed a cople function calls to use the updated function names --- gn3/computations/rqtl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 00e1db9..45cf430 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -87,8 +87,8 @@ def process_rqtl_pairscan(file_name: str) -> List: """ - figure_data = pairscan_results_for_figure(file_name) - table_data = pairscan_results_for_table(file_name) + figure_data = pairscan_for_figure(file_name) + table_data = pairscan_for_table(file_name) return [figure_data, table_data] -- cgit v1.2.3 From 1408b107beb4c799a1d7afaf571f53d0d56305bf Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 11 Aug 2021 17:36:18 +0000 Subject: Removed quotes from beginning and end of chromosome string --- gn3/computations/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 45cf430..3b50c85 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -119,7 +119,7 @@ def pairscan_for_figure(file_name: str) -> Dict: if i == 0: # Skip first line continue line_items = [item.rstrip('\n') for item in line.split(",")] - chr_list.append(line_items[1]) + chr_list.append(line_items[1][1:-1]) pos_list.append(line_items[2]) figure_data['chr'] = chr_list figure_data['pos'] = pos_list -- cgit v1.2.3 From 9aeafe51e60989fb018d147163c4fb861ceeb256 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 26 Aug 2021 20:17:14 +0000 Subject: Added genofile name to inputs for processing R/qtl pair-scan results, since it's needed to store the proximal/distal markers for each position --- gn3/api/rqtl.py | 2 +- gn3/computations/rqtl.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index b0405f4..2ff0f4e 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -49,7 +49,7 @@ run the rqtl_wrapper script and return the results as JSON os.system(rqtl_cmd.get('rqtl_cmd')) if "pairscan" in boolean_kwargs: - rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 3b50c85..ea88c29 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -80,7 +80,7 @@ def process_rqtl_mapping(file_name: str) -> List: return marker_obs -def process_rqtl_pairscan(file_name: str) -> List: +def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of both the JSON needed for the d3panels figure and a list of results to be used when generating the results table (which will include marker names) @@ -88,7 +88,7 @@ def process_rqtl_pairscan(file_name: str) -> List: """ figure_data = pairscan_for_figure(file_name) - table_data = pairscan_for_table(file_name) + table_data = pairscan_for_table(file_name, geno_file) return [figure_data, table_data] @@ -127,7 +127,7 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data -def pairscan_for_table(file_name: str) -> List: +def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of results to be used when generating the results table (which will include marker names) -- cgit v1.2.3 From 23be50a195e648fa567777c4ab679d4c30cf6933 Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 28 Aug 2021 00:46:10 +0000 Subject: Fix issue that causes R/qtl to always run pair-scan even if pair-scan isn't selected --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 2ff0f4e..6548e1a 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -48,7 +48,7 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - if "pairscan" in boolean_kwargs: + if "pairscan" in rqtl_bool_kwargs: rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From 49a9150f2dafee19852d0e51ce089fdd54be7b5f Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:10 +0000 Subject: Add functions for getting proximal/distal markers for each pseudomarker position in pair-scan results + return only the sorted top 500 results --- gn3/computations/rqtl.py | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index ea88c29..d6fcd3f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,5 +1,6 @@ """Procedures related rqtl computations""" import os +from bisect import bisect from typing import Dict, List, Union import numpy as np @@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_list(geno_file) + # Open the file with the actual results and write the results as # they will be displayed in the results table with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] + proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] + proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) try: lod_score = f"{float(item):.3f}" except: lod_score = f"{item}" this_line = { - 'marker1': f"{marker_1['name']}", + 'proximal1': proximal1, + 'distal1': distal1, 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'marker2': f"{marker_2['name']}", + 'proximal2': proximal2, + 'distal2': distal2, 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" } - table_data.append(this_line) - return table_data + table_data.append(this_line) + + return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + +def build_marker_pos_list(genotype_file): + """ + Gets list of markers and their positions from .geno file + + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan + """ + + with open(genotype_file, "r") as infile: + contents = infile.readlines() + + # Get all lines after the metadata + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip().startswith("@")) and + (not line.strip() == ""))) + + header_items = lines[0].split("\t") + mb_exists = "Mb" in header_items + pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") + + the_markers = {} + for line in lines[1:]: # The lines with markers + line_items = line.split("\t") + this_chr = line_items[0] + if this_chr not in the_markers: + the_markers[this_chr] = {} + the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1] + + return the_markers + +def find_nearest_marker(the_chr, the_pos, marker_list): + """ + Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker + """ + + pos_list = [float(pos) for pos in marker_list[the_chr]] + + # Get the position of the pseudomarker in the list of markers for the chr + the_pos_index = bisect(pos_list, float(the_pos)) + + proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])] + distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])] + return proximal_marker, distal_marker def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 20f26af7a1af24b823ba1cf29d0e4393dabf9168 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:31 +0000 Subject: Add typing to some functions --- gn3/computations/rqtl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index d6fcd3f..4f2709c 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,7 +1,7 @@ """Procedures related rqtl computations""" import os from bisect import bisect -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import numpy as np @@ -150,7 +150,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) # Get the list of original markers from the .geno file - original_markers = build_marker_pos_list(geno_file) + original_markers = build_marker_pos_dict(geno_file) # Open the file with the actual results and write the results as # they will be displayed in the results table @@ -181,7 +181,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] -def build_marker_pos_list(genotype_file): +def build_marker_pos_dict(genotype_file: str) -> Dict: """ Gets list of markers and their positions from .geno file @@ -211,7 +211,7 @@ def build_marker_pos_list(genotype_file): return the_markers -def find_nearest_marker(the_chr, the_pos, marker_list): +def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: """ Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), return the nearest real marker @@ -227,7 +227,7 @@ def find_nearest_marker(the_chr, the_pos, marker_list): return proximal_marker, distal_marker -def process_perm_output(file_name: str): +def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate suggestive and significant thresholds -- cgit v1.2.3 From 976660ce9ef915426c7ce5ff9077b439e4102a2c Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:48:39 +0000 Subject: Tried to make the docstrings more consistent --- gn3/computations/rqtl.py | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 38c5000..45232e3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -16,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an -output filename generated from a hash of the genotype and phenotype files - - """ +output filename generated from a hash of the genotype and phenotype files""" # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( @@ -50,9 +48,7 @@ output filename generated from a hash of the genotype and phenotype files def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return - a List of marker objects - - """ + a List of marker objects""" marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] @@ -83,11 +79,8 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of both the JSON needed for the d3panels figure and a list of results - to be used when generating the results table (which will include marker names) - - """ - +a list of both the JSON needed for the d3panels figure and a list of results +to be used when generating the results table (which will include marker names)""" figure_data = pairscan_for_figure(file_name) table_data = pairscan_for_table(file_name, geno_file) @@ -95,9 +88,7 @@ def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: def pairscan_for_figure(file_name: str) -> Dict: """Given an output file name, read in R/qtl pair-scan results and return - the JSON needed for the d3panels figure - - """ + the JSON needed for the d3panels figure""" figure_data = {} # Open the file with the actual results, written as a list of lists @@ -129,9 +120,7 @@ def pairscan_for_figure(file_name: str) -> Dict: def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names) - - """ + a list of results to be used when generating the results table (which will include marker names)""" table_data = [] # Open the map file with the list of markers/pseudomarkers and create list of marker obs @@ -181,11 +170,9 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: - """ - Gets list of markers and their positions from .geno file + """Gets list of markers and their positions from .geno file - Basically a pared-down version of parse_genotype_file for R/qtl pair-scan - """ + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan""" with open(genotype_file, "r") as infile: contents = infile.readlines() @@ -211,10 +198,8 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: return the_markers def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: - """ - Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), - return the nearest real marker - """ + """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker""" pos_list = [float(pos) for pos in marker_list[the_chr]] @@ -228,9 +213,8 @@ def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[ def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate - suggestive and significant thresholds + suggestive and significant thresholds""" - """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: -- cgit v1.2.3 From 30d00f4db20ea9dce827fc4f4ac3d50221ad3823 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 19:15:55 +0000 Subject: Fixed mypy typing errors --- gn3/computations/rqtl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 45232e3..bb28dc3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -105,8 +105,8 @@ def pairscan_for_figure(file_name: str) -> Dict: # Open the map file with the list of markers/pseudomarkers and their positions with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "MAP_" + file_name), "r") as the_file: - chr_list = [] - pos_list = [] + chr_list = [] # type: List + pos_list = [] # type: List for i, line in enumerate(the_file): if i == 0: # Skip first line continue @@ -187,7 +187,7 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: mb_exists = "Mb" in header_items pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") - the_markers = {} + the_markers = {"1": {}} # type: Dict[str, Dict] for line in lines[1:]: # The lines with markers line_items = line.split("\t") this_chr = line_items[0] -- cgit v1.2.3 From 607c6e627c23c1bce3b199b145855182ab51b211 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 20:54:47 +0000 Subject: Fixes pylint errors --- gn3/api/rqtl.py | 3 +- gn3/computations/rqtl.py | 86 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 6548e1a..85b2460 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,8 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \ + process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index bb28dc3..e81aba3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -118,16 +118,19 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data -def pairscan_for_table(file_name: str, geno_file: str) -> List: - """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names)""" - table_data = [] - # Open the map file with the list of markers/pseudomarkers and create list of marker obs +def get_marker_list(map_file: str) -> List: + """ + Open the map file with the list of markers/pseudomarkers and create list of marker obs + + PARAMETERS: + map_file: The map file output by R/qtl containing marker/pseudomarker positions + """ + + marker_list = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", "MAP_" + file_name), "r") as the_file: - marker_list = [] - for i, line in enumerate(the_file.readlines()[1:]): + "output", map_file), "r") as map_fh: + for line in map_fh.readlines()[1:]: line_items = [item.rstrip('\n') for item in line.split(",")] this_marker = { 'name': line_items[0], @@ -137,37 +140,72 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) - # Get the list of original markers from the .geno file - original_markers = build_marker_pos_dict(geno_file) + return marker_list - # Open the file with the actual results and write the results as - # they will be displayed in the results table +def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List: + """ + Open the file with the actual R/qtl pair-scan results and write them as + they will be displayed in the results table + + PARAMETERS: + results_file: The filename containing R/qtl pair-scan results + marker_list: List of marker/pseudomarker names/positions from results + original_markers: Dict of markers from the .geno file, for finding proximal/distal + markers to each pseudomarker + """ + + table_data = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", file_name), "r") as the_file: + "output", results_file), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] - proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) + marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'], + marker_1['pos'], + original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] - proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) + marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'], + marker_2['pos'], + original_markers) try: lod_score = f"{float(item):.3f}" - except: + except ValueError: lod_score = f"{item}" - this_line = { - 'proximal1': proximal1, - 'distal1': distal1, + + table_data.append({ + 'proximal1': marker_1['proximal'], + 'distal1': marker_1['distal'], 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'proximal2': proximal2, - 'distal2': distal2, + 'proximal2': marker_2['proximal'], + 'distal2': marker_2['distal'], 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" - } + }) + + return table_data + +def pairscan_for_table(file_name: str, geno_file: str) -> List: + """ + Read in R/qtl pair-scan results and return as List representing + table row contents + + PARAMETERS: + file_name: The filename containing R/qtl pair-scan results + geno_file: Filename of the genotype file (used to get marker positions) + """ - table_data.append(this_line) + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + marker_list = get_marker_list("MAP_" + file_name) + + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_dict(geno_file) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + table_data = generate_table_rows(file_name, marker_list, original_markers) - return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: """Gets list of markers and their positions from .geno file -- cgit v1.2.3