diff options
author | zsloan | 2021-09-23 22:33:10 +0000 |
---|---|---|
committer | zsloan | 2021-09-23 22:33:10 +0000 |
commit | 49a9150f2dafee19852d0e51ce089fdd54be7b5f (patch) | |
tree | 9a8d8e31ed51a0d934d31f0027f7c67b4626bba7 /gn3/computations/rqtl.py | |
parent | 23be50a195e648fa567777c4ab679d4c30cf6933 (diff) | |
download | genenetwork3-49a9150f2dafee19852d0e51ce089fdd54be7b5f.tar.gz |
Add functions for getting proximal/distal markers for each pseudomarker position in pair-scan results + return only the sorted top 500 results
Diffstat (limited to 'gn3/computations/rqtl.py')
-rw-r--r-- | gn3/computations/rqtl.py | 62 |
1 files changed, 58 insertions, 4 deletions
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index ea88c29..d6fcd3f 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -1,5 +1,6 @@ """Procedures related rqtl computations""" import os +from bisect import bisect from typing import Dict, List, Union import numpy as np @@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List: marker_list.append(this_marker) + # Get the list of original markers from the .geno file + original_markers = build_marker_pos_list(geno_file) + # Open the file with the actual results and write the results as # they will be displayed in the results table with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), "output", file_name), "r") as the_file: for i, line in enumerate(the_file.readlines()[1:]): marker_1 = marker_list[i] + proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers) line_items = [item.rstrip('\n') for item in line.split(",")] for j, item in enumerate(line_items[1:]): marker_2 = marker_list[j] + proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers) try: lod_score = f"{float(item):.3f}" except: lod_score = f"{item}" this_line = { - 'marker1': f"{marker_1['name']}", + 'proximal1': proximal1, + 'distal1': distal1, 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM", 'lod': lod_score, - 'marker2': f"{marker_2['name']}", + 'proximal2': proximal2, + 'distal2': distal2, 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM" } - table_data.append(this_line) - return table_data + table_data.append(this_line) + + return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500] + +def build_marker_pos_list(genotype_file): + """ + Gets list of markers and their positions from .geno file + + Basically a pared-down version of parse_genotype_file for R/qtl pair-scan + """ + + with open(genotype_file, "r") as infile: + contents = infile.readlines() + + # Get all lines after the metadata + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip().startswith("@")) and + (not line.strip() == ""))) + + header_items = lines[0].split("\t") + mb_exists = "Mb" in header_items + pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM") + + the_markers = {} + for line in lines[1:]: # The lines with markers + line_items = line.split("\t") + this_chr = line_items[0] + if this_chr not in the_markers: + the_markers[this_chr] = {} + the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1] + + return the_markers + +def find_nearest_marker(the_chr, the_pos, marker_list): + """ + Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results), + return the nearest real marker + """ + + pos_list = [float(pos) for pos in marker_list[the_chr]] + + # Get the position of the pseudomarker in the list of markers for the chr + the_pos_index = bisect(pos_list, float(the_pos)) + + proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])] + distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])] + return proximal_marker, distal_marker def process_perm_output(file_name: str): """Given base filename, read in R/qtl permutation output and calculate |