From a4a2886c16b578dd4f91520bf3fa9a1e9d801edb Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 23 Sep 2021 22:33:10 +0000
Subject: Add functions for getting proximal/distal markers for each
 pseudomarker position in pair-scan results + return only the sorted top 500
 results

---
 gn3/computations/rqtl.py | 62 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 4 deletions(-)

(limited to 'gn3/computations/rqtl.py')

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index b05a577..51ac3c7 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,5 +1,6 @@
 """Procedures related rqtl computations"""
 import os
+from bisect import bisect
 from typing import Dict, List, Union
 
 import numpy as np
@@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
 
             marker_list.append(this_marker)
 
+    # Get the list of original markers from the .geno file
+    original_markers = build_marker_pos_list(geno_file)
+
     # Open the file with the actual results and write the results as
     # they will be displayed in the results table
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", file_name), "r") as the_file:
         for i, line in enumerate(the_file.readlines()[1:]):
             marker_1 = marker_list[i]
+            proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
             line_items = [item.rstrip('\n') for item in line.split(",")]
             for j, item in enumerate(line_items[1:]):
                 marker_2 = marker_list[j]
+                proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
                 try:
                     lod_score = f"{float(item):.3f}"
                 except:
                     lod_score = f"{item}"
                 this_line = {
-                    'marker1': f"{marker_1['name']}",
+                    'proximal1': proximal1,
+                    'distal1': distal1,
                     'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
                     'lod': lod_score,
-                    'marker2': f"{marker_2['name']}",
+                    'proximal2': proximal2,
+                    'distal2': distal2,
                     'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
                 }
-            table_data.append(this_line)
 
-    return table_data
+                table_data.append(this_line)
+
+    return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_list(genotype_file):
+    """
+    Gets list of markers and their positions from .geno file
+
+    Basically a pared-down version of parse_genotype_file for R/qtl pair-scan
+    """
+
+    with open(genotype_file, "r") as infile:
+        contents = infile.readlines()
+
+    # Get all lines after the metadata
+    lines = tuple(line for line in contents if
+                  ((not line.strip().startswith("#")) and
+                   (not line.strip().startswith("@")) and
+                   (not line.strip() == "")))
+
+    header_items = lines[0].split("\t")
+    mb_exists = "Mb" in header_items
+    pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+    the_markers = {}
+    for line in lines[1:]: # The lines with markers
+        line_items = line.split("\t")
+        this_chr = line_items[0]
+        if this_chr not in the_markers:
+            the_markers[this_chr] = {}
+        the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+    return the_markers
+
+def find_nearest_marker(the_chr, the_pos, marker_list):
+    """
+    Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+    return the nearest real marker
+    """
+
+    pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+    # Get the position of the pseudomarker in the list of markers for the chr
+    the_pos_index = bisect(pos_list, float(the_pos))
+
+    proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+    distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
 
+    return proximal_marker, distal_marker
 
 def process_perm_output(file_name: str):
     """Given base filename, read in R/qtl permutation output and calculate
-- 
cgit v1.2.3