diff options
Diffstat (limited to 'gn3/computations')
-rw-r--r-- | gn3/computations/rqtl.py | 151 |
1 files changed, 140 insertions, 11 deletions
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..45232e3 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,6 +1,7 @@ """Procedures related rqtl computations""" import os -from typing import Dict, List, Union +from bisect import bisect +from typing import Dict, List, Tuple, Union import numpy as np @@ -15,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str, rqtl_wrapper_bool_kwargs: list) -> Dict: """Given the base rqtl_wrapper command and dict of keyword arguments, return the full rqtl_wrapper command and an -output filename generated from a hash of the genotype and phenotype files - - """ +output filename generated from a hash of the genotype and phenotype files""" # Generate a hash from contents of the genotype and phenotype files _hash = get_hash_of_files( @@ -47,11 +46,9 @@ output filename generated from a hash of the genotype and phenotype files } -def process_rqtl_output(file_name: str) -> List: +def process_rqtl_mapping(file_name: str) -> List: """Given an output file name, read in R/qtl results and return - a List of marker objects - - """ + a List of marker objects""" marker_obs = [] # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] @@ -80,12 +77,144 @@ def process_rqtl_output(file_name: str) -> List: return marker_obs +def process_rqtl_pairscan(file_name: str, geno_file: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return +a list of both the JSON needed for the d3panels figure and a list of results +to be used when generating the results table (which will include marker names)""" + figure_data = pairscan_for_figure(file_name) + table_data = pairscan_for_table(file_name, geno_file) + + return [figure_data, table_data] + +def pairscan_for_figure(file_name: str) -> Dict: + """Given an output file name, read in R/qtl pair-scan results and return + the JSON needed for the d3panels figure""" + figure_data = {} + + # Open the file with the actual results, written as a list of lists + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + lod_results = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + lod_results.append(line_items[1:]) # Append all but first item in line + figure_data['lod'] = lod_results -def process_perm_output(file_name: str): + # Open the map file with the list of markers/pseudomarkers and their positions + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + chr_list = [] + pos_list = [] + for i, line in enumerate(the_file): + if i == 0: # Skip first line + continue + line_items = [item.rstrip('\n') for item in line.split(",")] + chr_list.append(line_items[1][1:-1]) + pos_list.append(line_items[2]) + figure_data['chr'] = chr_list + figure_data['pos'] = pos_list + + return figure_data + +def pairscan_for_table(file_name: str, geno_file: str) -> List: + """Given an output file name, read in R/qtl pair-scan results and return + a list of results to be used when generating the results table (which will include marker names)""" + table_data = [] + + # Open the map file with the list of markers/pseudomarkers and create list of marker obs + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "MAP_" + file_name), "r") as the_file: + marker_list = [] + for i, line in enumerate(the_file.readlines()[1:]): + line_items = [item.rstrip('\n') for item in line.split(",")] + this_marker = { + 'name': line_items[0], + 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string + 'pos': line_items[2] + } + + marker_list.append(this_marker) + + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_dict(geno_file) + + # Open the file with the actual results and write the results as + # they will be displayed in the results table + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for i, line in enumerate(the_file.readlines()[1:]): + marker_1 = marker_list[i] + proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) + line_items = [item.rstrip('\n') for item in line.split(",")] + for j, item in enumerate(line_items[1:]): + marker_2 = marker_list[j] + proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) + try: + lod_score = f"{float(item):.3f}" + except: + lod_score = f"{item}" + this_line = { + 'proximal1': proximal1, + 'distal1': distal1, + 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", + 'lod': lod_score, + 'proximal2': proximal2, + 'distal2': distal2, + 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" + } + + table_data.append(this_line) + + return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + +def build_marker_pos_dict(genotype_file: str) -> Dict: + """Gets list of markers and their positions from .geno file + + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan""" + + with open(genotype_file, "r") as infile: + contents = infile.readlines() + + # Get all lines after the metadata + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip().startswith("@")) and + (not line.strip() == ""))) + + header_items = lines[0].split("\t") + mb_exists = "Mb" in header_items + pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") + + the_markers = {} + for line in lines[1:]: # The lines with markers + line_items = line.split("\t") + this_chr = line_items[0] + if this_chr not in the_markers: + the_markers[this_chr] = {} + the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1] + + return the_markers + +def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]: + """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker""" + + pos_list = [float(pos) for pos in marker_list[the_chr]] + + # Get the position of the pseudomarker in the list of markers for the chr + the_pos_index = bisect(pos_list, float(the_pos)) + + proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])] + distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])] + + return proximal_marker, distal_marker + +def process_perm_output(file_name: str) -> Tuple[List, float, float]: """Given base filename, read in R/qtl permutation output and calculate - suggestive and significant thresholds + suggestive and significant thresholds""" - """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", "PERM_" + file_name), "r") as the_file: |