aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations/rqtl.py
diff options
context:
space:
mode:
authorzsloan2021-09-23 22:33:10 +0000
committerzsloan2022-03-22 19:08:01 +0000
commita4a2886c16b578dd4f91520bf3fa9a1e9d801edb (patch)
tree6f97e7bda0e4a4220d5089aa174ebe99167b1459 /gn3/computations/rqtl.py
parent64aedf4202efc4e50648d496d2a8eed3fc828b2f (diff)
downloadgenenetwork3-a4a2886c16b578dd4f91520bf3fa9a1e9d801edb.tar.gz
Add functions for getting proximal/distal markers for each pseudomarker position in pair-scan results + return only the sorted top 500 results
Diffstat (limited to 'gn3/computations/rqtl.py')
-rw-r--r--gn3/computations/rqtl.py62
1 files changed, 58 insertions, 4 deletions
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index b05a577..51ac3c7 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,5 +1,6 @@
"""Procedures related rqtl computations"""
import os
+from bisect import bisect
from typing import Dict, List, Union
import numpy as np
@@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
marker_list.append(this_marker)
+ # Get the list of original markers from the .geno file
+ original_markers = build_marker_pos_list(geno_file)
+
# Open the file with the actual results and write the results as
# they will be displayed in the results table
with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
"output", file_name), "r") as the_file:
for i, line in enumerate(the_file.readlines()[1:]):
marker_1 = marker_list[i]
+ proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
line_items = [item.rstrip('\n') for item in line.split(",")]
for j, item in enumerate(line_items[1:]):
marker_2 = marker_list[j]
+ proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
try:
lod_score = f"{float(item):.3f}"
except:
lod_score = f"{item}"
this_line = {
- 'marker1': f"{marker_1['name']}",
+ 'proximal1': proximal1,
+ 'distal1': distal1,
'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
'lod': lod_score,
- 'marker2': f"{marker_2['name']}",
+ 'proximal2': proximal2,
+ 'distal2': distal2,
'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
}
- table_data.append(this_line)
- return table_data
+ table_data.append(this_line)
+
+ return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_list(genotype_file):
+ """
+ Gets list of markers and their positions from .geno file
+
+ Basically a pared-down version of parse_genotype_file for R/qtl pair-scan
+ """
+
+ with open(genotype_file, "r") as infile:
+ contents = infile.readlines()
+
+ # Get all lines after the metadata
+ lines = tuple(line for line in contents if
+ ((not line.strip().startswith("#")) and
+ (not line.strip().startswith("@")) and
+ (not line.strip() == "")))
+
+ header_items = lines[0].split("\t")
+ mb_exists = "Mb" in header_items
+ pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+ the_markers = {}
+ for line in lines[1:]: # The lines with markers
+ line_items = line.split("\t")
+ this_chr = line_items[0]
+ if this_chr not in the_markers:
+ the_markers[this_chr] = {}
+ the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+ return the_markers
+
+def find_nearest_marker(the_chr, the_pos, marker_list):
+ """
+ Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+ return the nearest real marker
+ """
+
+ pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+ # Get the position of the pseudomarker in the list of markers for the chr
+ the_pos_index = bisect(pos_list, float(the_pos))
+
+ proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+ distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
+ return proximal_marker, distal_marker
def process_perm_output(file_name: str):
"""Given base filename, read in R/qtl permutation output and calculate