From 9a8ab0ea1bbfc82c79cb8183c37200a09ab27d9c Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 20:47:37 +0000 Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for reading in pairscan results + renamed process_rqtl_output to process_rqtl_mapping to distinguish between that and pairscan --- gn3/api/rqtl.py | 8 +++++--- gn3/computations/rqtl.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index b5627c5..ed01a97 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON # Split kwargs by those with values and boolean ones that just convert to True/False kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"] - boolean_kwargs = ["addcovar", "interval", "pstrata"] + boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs rqtl_kwargs = {"geno": genofile, "pheno": phenofile} @@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if "pairscan" in boolean_kwargs: + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + else: + rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) - rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) if int(rqtl_kwargs['nperm']) > 0: rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ process_perm_output(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index b3539a9..b9e715a 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return a List of marker objects @@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a List of Lists representing the matrix of results + + """ + results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = line.split(",") + results.append(line_items[1:]) # Append all but first item in line + + return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From 444095309a8841a0998e74f2f4eba2c3fea6890f Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 29 Jul 2021 19:46:34 +0000 Subject: Fix imports to import both process_rqtl_mapping and process_rqtl_pairscan in api/rqtl.py --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index ed01a97..abce705 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,7 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) -- cgit v1.2.3 From 019f196f21cbfd7bb3d91838313aad1d29ea98cb Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 3 Aug 2021 20:34:06 +0000 Subject: Create pairscan_for_figure and pairscan_for_table functions that return the Dict and List respectively used for the pair scan figure and the table showing the results --- gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 5 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index b9e715a..87c9ae8 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a List of Lists representing the matrix of results + a list of both the JSON needed for the d3panels figure and a list of results + to be used when generating the results table (which will include marker names) """ - results = [] + + figure_data = pairscan_results_for_figure(file_name) + table_data = pairscan_results_for_table(file_name) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure + + """ + figure_data = {} + + # Open the file with the actual results, written as a list of lists with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: + lod_results = [] for i, line in enumerate(the_file): if i == 0: # Skip first line continue - line_items = line.split(",") - results.append(line_items[1:]) # Append all but first item in line + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results + + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + + +def pairscan_for_table(file_name: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names) + + """ + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'marker1': f"{marker_1['name']}", + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'marker2': f"{marker_2['name']}", + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + table_data.append(this_line) + + return table_data - return results def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From f494a9462ddb22c117b16be9b4191564b3191c01 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 4 Aug 2021 19:41:47 +0000 Subject: Fixed a cople function calls to use the updated function names --- gn3/computations/rqtl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 87c9ae8..092fac6 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -87,8 +87,8 @@ def process_rqtl_pairscan(file_name: str) -> List: """ - figure_data = pairscan_results_for_figure(file_name) - table_data = pairscan_results_for_table(file_name) + figure_data = pairscan_for_figure(file_name) + table_data = pairscan_for_table(file_name) return [figure_data, table_data] -- cgit v1.2.3 From 35f0784ee32ccc76416d184ac65e8cad6f7aa490 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 11 Aug 2021 17:36:18 +0000 Subject: Removed quotes from beginning and end of chromosome string --- gn3/computations/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 092fac6..36c9fa2 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -119,7 +119,7 @@ def pairscan_for_figure(file_name: str) -> Dict: if i == 0: # Skip first line continue line_items = [item.rstrip('\n') for item in line.split(",")] - chr_list.append(line_items[1]) + chr_list.append(line_items[1][1:-1]) pos_list.append(line_items[2]) figure_data['chr'] = chr_list figure_data['pos'] = pos_list -- cgit v1.2.3 From 40e85090b60ef002723b614fd712b6affbd68176 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 26 Aug 2021 20:17:14 +0000 Subject: Added genofile name to inputs for processing R/qtl pair-scan results, since it's needed to store the proximal/distal markers for each position --- gn3/api/rqtl.py | 2 +- gn3/computations/rqtl.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index abce705..9aab9fa 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -49,7 +49,7 @@ run the rqtl_wrapper script and return the results as JSON os.system(rqtl_cmd.get('rqtl_cmd')) if "pairscan" in boolean_kwargs: - rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file')) + rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 36c9fa2..b05a577 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -80,7 +80,7 @@ def process_rqtl_mapping(file_name: str) -> List: return marker_obs -def process_rqtl_pairscan(file_name: str) -> List: +def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of both the JSON needed for the d3panels figure and a list of results to be used when generating the results table (which will include marker names) @@ -88,7 +88,7 @@ def process_rqtl_pairscan(file_name: str) -> List: """ figure_data = pairscan_for_figure(file_name) - table_data = pairscan_for_table(file_name) + table_data = pairscan_for_table(file_name, geno_file) return [figure_data, table_data] @@ -127,7 +127,7 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data -def pairscan_for_table(file_name: str) -> List: +def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return a list of results to be used when generating the results table (which will include marker names) -- cgit v1.2.3 From 64aedf4202efc4e50648d496d2a8eed3fc828b2f Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 28 Aug 2021 00:46:10 +0000 Subject: Fix issue that causes R/qtl to always run pair-scan even if pair-scan isn't selected --- gn3/api/rqtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 9aab9fa..b6c04eb 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -48,7 +48,7 @@ run the rqtl_wrapper script and return the results as JSON "output", rqtl_cmd.get('output_file'))): os.system(rqtl_cmd.get('rqtl_cmd')) - if "pairscan" in boolean_kwargs: + if "pairscan" in rqtl_bool_kwargs: rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile) else: rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file')) -- cgit v1.2.3 From a4a2886c16b578dd4f91520bf3fa9a1e9d801edb Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:10 +0000 Subject: Add functions for getting proximal/distal markers for each pseudomarker position in pair-scan results + return only the sorted top 500 results --- gn3/computations/rqtl.py | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index b05a577..51ac3c7 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,5 +1,6 @@ """Procedures related rqtl computations""" import os +from bisect import bisect from typing import Dict, List, Union import numpy as np @@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_list(geno_file) + # Open the file with the actual results and write the results as # they will be displayed in the results table with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] + proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] + proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) try: lod_score = f"{float(item):.3f}" except: lod_score = f"{item}" this_line = { - 'marker1': f"{marker_1['name']}", + 'proximal1': proximal1, + 'distal1': distal1, 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'marker2': f"{marker_2['name']}", + 'proximal2': proximal2, + 'distal2': distal2, 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" } - table_data.append(this_line) - return table_data + table_data.append(this_line) + + return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + +def build_marker_pos_list(genotype_file): + """ + Gets list of markers and their positions from .geno file + + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan + """ + + with open(genotype_file, "r") as infile: + contents = infile.readlines() + + # Get all lines after the metadata + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip().startswith("@")) and + (not line.strip() == ""))) + + header_items = lines[0].split("\t") + mb_exists = "Mb" in header_items + pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") + + the_markers = {} + for line in lines[1:]: # The lines with markers + line_items = line.split("\t") + this_chr = line_items[0] + if this_chr not in the_markers: + the_markers[this_chr] = {} + the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1] + + return the_markers + +def find_nearest_marker(the_chr, the_pos, marker_list): + """ + Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker + """ + + pos_list = [float(pos) for pos in marker_list[the_chr]] + + # Get the position of the pseudomarker in the list of markers for the chr + the_pos_index = bisect(pos_list, float(the_pos)) + + proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])] + distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])] + return proximal_marker, distal_marker def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate -- cgit v1.2.3 From ea15c3f0ed8fe6682af1ceb45e1fc962d09d2e7e Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:33:31 +0000 Subject: Add typing to some functions --- gn3/computations/rqtl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 51ac3c7..cfa58a8 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,7 +1,7 @@ """Procedures related rqtl computations""" import os from bisect import bisect -from typing import Dict, List, Union +from typing import Dict, List, Tuple, Union import numpy as np @@ -150,7 +150,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) # Get the list of original markers from the .geno file - original_markers = build_marker_pos_list(geno_file) + original_markers = build_marker_pos_dict(geno_file) # Open the file with the actual results and write the results as # they will be displayed in the results table @@ -181,7 +181,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] -def build_marker_pos_list(genotype_file): +def build_marker_pos_dict(genotype_file: str) -> Dict: """ Gets list of markers and their positions from .geno file @@ -211,7 +211,7 @@ def build_marker_pos_list(genotype_file): return the_markers -def find_nearest_marker(the_chr, the_pos, marker_list): +def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: """ Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), return the nearest real marker @@ -227,7 +227,7 @@ def find_nearest_marker(the_chr, the_pos, marker_list): return proximal_marker, distal_marker -def process_perm_output(file_name: str): +def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate suggestive and significant thresholds -- cgit v1.2.3 From 14cadeb87e2a98300c7d47cf51d19c2e3508affe Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 22:48:39 +0000 Subject: Tried to make the docstrings more consistent --- gn3/computations/rqtl.py | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index cfa58a8..8d066ef 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -16,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an -output filename generated from a hash of the genotype and phenotype files - - """ +output filename generated from a hash of the genotype and phenotype files""" # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( @@ -50,9 +48,7 @@ output filename generated from a hash of the genotype and phenotype files def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return - a List of marker objects - - """ + a List of marker objects""" marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] @@ -83,11 +79,8 @@ def process_rqtl_mapping(file_name: str) -> List: def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of both the JSON needed for the d3panels figure and a list of results - to be used when generating the results table (which will include marker names) - - """ - +a list of both the JSON needed for the d3panels figure and a list of results +to be used when generating the results table (which will include marker names)""" figure_data = pairscan_for_figure(file_name) table_data = pairscan_for_table(file_name, geno_file) @@ -95,9 +88,7 @@ def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: def pairscan_for_figure(file_name: str) -> Dict: """Given an output file name, read in R/qtl pair-scan results and return - the JSON needed for the d3panels figure - - """ + the JSON needed for the d3panels figure""" figure_data = {} # Open the file with the actual results, written as a list of lists @@ -130,9 +121,7 @@ def pairscan_for_figure(file_name: str) -> Dict: def pairscan_for_table(file_name: str, geno_file: str) -> List: """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names) - - """ + a list of results to be used when generating the results table (which will include marker names)""" table_data = [] # Open the map file with the list of markers/pseudomarkers and create list of marker obs @@ -182,11 +171,9 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: - """ - Gets list of markers and their positions from .geno file + """Gets list of markers and their positions from .geno file - Basically a pared-down version of parse_genotype_file for R/qtl pair-scan - """ + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan""" with open(genotype_file, "r") as infile: contents = infile.readlines() @@ -212,10 +199,8 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: return the_markers def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: - """ - Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), - return the nearest real marker - """ + """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker""" pos_list = [float(pos) for pos in marker_list[the_chr]] @@ -229,9 +214,8 @@ def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[ def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate - suggestive and significant thresholds + suggestive and significant thresholds""" - """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r", encoding="utf-8") as the_file: -- cgit v1.2.3 From d7f9cd14089a23a3d3bab8b54320a9ef4844e560 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 19:15:55 +0000 Subject: Fixed mypy typing errors --- gn3/computations/rqtl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gn3') diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 8d066ef..4327cac 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -105,8 +105,8 @@ def pairscan_for_figure(file_name: str) -> Dict: # Open the map file with the list of markers/pseudomarkers and their positions with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "MAP_" + file_name), "r") as the_file: - chr_list = [] - pos_list = [] + chr_list = [] # type: List + pos_list = [] # type: List for i, line in enumerate(the_file): if i == 0: # Skip first line continue @@ -188,7 +188,7 @@ def build_marker_pos_dict(genotype_file: str) -> Dict: mb_exists = "Mb" in header_items pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") - the_markers = {} + the_markers = {"1": {}} # type: Dict[str, Dict] for line in lines[1:]: # The lines with markers line_items = line.split("\t") this_chr = line_items[0] -- cgit v1.2.3 From 47c38fe08f27a7de58430c2f7f5635a9ba5836c8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 20:54:47 +0000 Subject: Fixes pylint errors --- gn3/api/rqtl.py | 3 +- gn3/computations/rqtl.py | 84 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 25 deletions(-) (limited to 'gn3') diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index b6c04eb..70ebe12 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -6,7 +6,8 @@ from flask import current_app from flask import jsonify from flask import request -from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \ + process_rqtl_pairscan, process_perm_output from gn3.computations.gemma import do_paths_exist rqtl = Blueprint("rqtl", __name__) diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 4327cac..65ee6de 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -118,17 +118,18 @@ def pairscan_for_figure(file_name: str) -> Dict: return figure_data +def get_marker_list(map_file: str) -> List: + """ + Open the map file with the list of markers/pseudomarkers and create list of marker obs -def pairscan_for_table(file_name: str, geno_file: str) -> List: - """Given an output file name, read in R/qtl pair-scan results and return - a list of results to be used when generating the results table (which will include marker names)""" - table_data = [] + PARAMETERS: + map_file: The map file output by R/qtl containing marker/pseudomarker positions + """ - # Open the map file with the list of markers/pseudomarkers and create list of marker obs + marker_list = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", "MAP_" + file_name), "r") as the_file: - marker_list = [] - for i, line in enumerate(the_file.readlines()[1:]): + "output", map_file), "r") as map_fh: + for line in map_fh.readlines()[1:]: line_items = [item.rstrip('\n') for item in line.split(",")] this_marker = { 'name': line_items[0], @@ -138,37 +139,72 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) - # Get the list of original markers from the .geno file - original_markers = build_marker_pos_dict(geno_file) + return marker_list - # Open the file with the actual results and write the results as - # they will be displayed in the results table +def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List: + """ + Open the file with the actual R/qtl pair-scan results and write them as + they will be displayed in the results table + + PARAMETERS: + results_file: The filename containing R/qtl pair-scan results + marker_list: List of marker/pseudomarker names/positions from results + original_markers: Dict of markers from the .geno file, for finding proximal/distal + markers to each pseudomarker + """ + + table_data = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", file_name), "r") as the_file: + "output", results_file), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] - proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) + marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'], + marker_1['pos'], + original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] - proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) + marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'], + marker_2['pos'], + original_markers) try: lod_score = f"{float(item):.3f}" - except: + except ValueError: lod_score = f"{item}" - this_line = { - 'proximal1': proximal1, - 'distal1': distal1, + + table_data.append({ + 'proximal1': marker_1['proximal'], + 'distal1': marker_1['distal'], 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'proximal2': proximal2, - 'distal2': distal2, + 'proximal2': marker_2['proximal'], + 'distal2': marker_2['distal'], 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" - } + }) - table_data.append(this_line) + return table_data + +def pairscan_for_table(file_name: str, geno_file: str) -> List: + """ + Read in R/qtl pair-scan results and return as List representing + table row contents + + PARAMETERS: + file_name: The filename containing R/qtl pair-scan results + geno_file: Filename of the genotype file (used to get marker positions) + """ + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + marker_list = get_marker_list("MAP_" + file_name) + + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_dict(geno_file) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + table_data = generate_table_rows(file_name, marker_list, original_markers) - return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500] def build_marker_pos_dict(genotype_file: str) -> Dict: """Gets list of markers and their positions from .geno file -- cgit v1.2.3