aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/rqtl.py151
1 files changed, 140 insertions, 11 deletions
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 0433b3f..45232e3 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,6 +1,7 @@
"""Procedures related rqtl computations"""
import os
-from typing import Dict, List, Union
+from bisect import bisect
+from typing import Dict, List, Tuple, Union
import numpy as np
@@ -15,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
rqtl_wrapper_bool_kwargs: list) -> Dict:
"""Given the base rqtl_wrapper command and
dict of keyword arguments, return the full rqtl_wrapper command and an
-output filename generated from a hash of the genotype and phenotype files
-
- """
+output filename generated from a hash of the genotype and phenotype files"""
# Generate a hash from contents of the genotype and phenotype files
_hash = get_hash_of_files(
@@ -47,11 +46,9 @@ output filename generated from a hash of the genotype and phenotype files
}
-def process_rqtl_output(file_name: str) -> List:
+def process_rqtl_mapping(file_name: str) -> List:
"""Given an output file name, read in R/qtl results and return
- a List of marker objects
-
- """
+ a List of marker objects"""
marker_obs = []
# Later I should probably redo this using csv.read to avoid the
# awkwardness with removing quotes with [1:-1]
@@ -80,12 +77,144 @@ def process_rqtl_output(file_name: str) -> List:
return marker_obs
+def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
+ """Given an output file name, read in R/qtl pair-scan results and return
+a list of both the JSON needed for the d3panels figure and a list of results
+to be used when generating the results table (which will include marker names)"""
+ figure_data = pairscan_for_figure(file_name)
+ table_data = pairscan_for_table(file_name, geno_file)
+
+ return [figure_data, table_data]
+
+def pairscan_for_figure(file_name: str) -> Dict:
+ """Given an output file name, read in R/qtl pair-scan results and return
+ the JSON needed for the d3panels figure"""
+ figure_data = {}
+
+ # Open the file with the actual results, written as a list of lists
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", file_name), "r") as the_file:
+ lod_results = []
+ for i, line in enumerate(the_file):
+ if i == 0: # Skip first line
+ continue
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ lod_results.append(line_items[1:]) # Append all but first item in line
+ figure_data['lod'] = lod_results
-def process_perm_output(file_name: str):
+ # Open the map file with the list of markers/pseudomarkers and their positions
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", "MAP_" + file_name), "r") as the_file:
+ chr_list = []
+ pos_list = []
+ for i, line in enumerate(the_file):
+ if i == 0: # Skip first line
+ continue
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ chr_list.append(line_items[1][1:-1])
+ pos_list.append(line_items[2])
+ figure_data['chr'] = chr_list
+ figure_data['pos'] = pos_list
+
+ return figure_data
+
+def pairscan_for_table(file_name: str, geno_file: str) -> List:
+ """Given an output file name, read in R/qtl pair-scan results and return
+ a list of results to be used when generating the results table (which will include marker names)"""
+ table_data = []
+
+ # Open the map file with the list of markers/pseudomarkers and create list of marker obs
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", "MAP_" + file_name), "r") as the_file:
+ marker_list = []
+ for i, line in enumerate(the_file.readlines()[1:]):
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ this_marker = {
+ 'name': line_items[0],
+ 'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string
+ 'pos': line_items[2]
+ }
+
+ marker_list.append(this_marker)
+
+ # Get the list of original markers from the .geno file
+ original_markers = build_marker_pos_dict(geno_file)
+
+ # Open the file with the actual results and write the results as
+ # they will be displayed in the results table
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", file_name), "r") as the_file:
+ for i, line in enumerate(the_file.readlines()[1:]):
+ marker_1 = marker_list[i]
+ proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
+ line_items = [item.rstrip('\n') for item in line.split(",")]
+ for j, item in enumerate(line_items[1:]):
+ marker_2 = marker_list[j]
+ proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
+ try:
+ lod_score = f"{float(item):.3f}"
+ except:
+ lod_score = f"{item}"
+ this_line = {
+ 'proximal1': proximal1,
+ 'distal1': distal1,
+ 'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
+ 'lod': lod_score,
+ 'proximal2': proximal2,
+ 'distal2': distal2,
+ 'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
+ }
+
+ table_data.append(this_line)
+
+ return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_dict(genotype_file: str) -> Dict:
+ """Gets list of markers and their positions from .geno file
+
+ Basically a pared-down version of parse_genotype_file for R/qtl pair-scan"""
+
+ with open(genotype_file, "r") as infile:
+ contents = infile.readlines()
+
+ # Get all lines after the metadata
+ lines = tuple(line for line in contents if
+ ((not line.strip().startswith("#")) and
+ (not line.strip().startswith("@")) and
+ (not line.strip() == "")))
+
+ header_items = lines[0].split("\t")
+ mb_exists = "Mb" in header_items
+ pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+ the_markers = {}
+ for line in lines[1:]: # The lines with markers
+ line_items = line.split("\t")
+ this_chr = line_items[0]
+ if this_chr not in the_markers:
+ the_markers[this_chr] = {}
+ the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+ return the_markers
+
+def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
+ """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+ return the nearest real marker"""
+
+ pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+ # Get the position of the pseudomarker in the list of markers for the chr
+ the_pos_index = bisect(pos_list, float(the_pos))
+
+ proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+ distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
+
+ return proximal_marker, distal_marker
+
+def process_perm_output(file_name: str) -> Tuple[List, float, float]:
"""Given base filename, read in R/qtl permutation output and calculate
- suggestive and significant thresholds
+ suggestive and significant thresholds"""
- """
perm_results = []
with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
"output", "PERM_" + file_name), "r") as the_file: