about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/computations/rqtl.py222
1 files changed, 146 insertions, 76 deletions
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 65ee6de..2f9c4a2 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -11,24 +11,34 @@ from gn3.commands import compose_rqtl_cmd
 from gn3.computations.gemma import generate_hash_of_string
 from gn3.fs_helpers import get_hash_of_files
 
-def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
-                      rqtl_wrapper_kwargs: Dict,
-                      rqtl_wrapper_bool_kwargs: list) -> Dict:
+
+def generate_rqtl_cmd(
+    rqtl_wrapper_cmd: str,
+    rqtl_wrapper_kwargs: Dict,
+    rqtl_wrapper_bool_kwargs: list,
+) -> Dict:
     """Given the base rqtl_wrapper command and
-dict of keyword arguments, return the full rqtl_wrapper command and an
-output filename generated from a hash of the genotype and phenotype files"""
+    dict of keyword arguments, return the full rqtl_wrapper command and an
+    output filename generated from a hash of the genotype and phenotype files"""
 
     # Generate a hash from contents of the genotype and phenotype files
     _hash = get_hash_of_files(
-        [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]])
+        [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]
+    )
 
     # Append to hash a hash of keyword arguments
     _hash += generate_hash_of_string(
-        ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]]))
+        ",".join(
+            [
+                f"{k}:{v}"
+                for k, v in rqtl_wrapper_kwargs.items()
+                if k not in ["g", "p"]
+            ]
+        )
+    )
 
     # Append to hash a hash of boolean keyword arguments
-    _hash += generate_hash_of_string(
-        ",".join(rqtl_wrapper_bool_kwargs))
+    _hash += generate_hash_of_string(",".join(rqtl_wrapper_bool_kwargs))
 
     # Temporarily substitute forward-slashes in hash with underscores
     _hash = _hash.replace("/", "_")
@@ -37,12 +47,12 @@ output filename generated from a hash of the genotype and phenotype files"""
     rqtl_wrapper_kwargs["filename"] = _output_filename
 
     return {
-        "output_file":
-        _output_filename,
-        "rqtl_cmd":
-        compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd,
-                         rqtl_wrapper_kwargs=rqtl_wrapper_kwargs,
-                         rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs)
+        "output_file": _output_filename,
+        "rqtl_cmd": compose_rqtl_cmd(
+            rqtl_wrapper_cmd=rqtl_wrapper_cmd,
+            rqtl_wrapper_kwargs=rqtl_wrapper_kwargs,
+            rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs,
+        ),
     }
 
 
@@ -52,8 +62,13 @@ def process_rqtl_mapping(file_name: str) -> List:
     marker_obs = []
     # Later I should probably redo this using csv.read to avoid the
     # awkwardness with removing quotes with [1:-1]
-    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", file_name), "r", encoding="utf-8") as the_file:
+    with open(
+        os.path.join(
+            current_app.config.get("TMPDIR", "/tmp"), "output", file_name
+        ),
+        "r",
+        encoding="utf-8",
+    ) as the_file:
         for line in the_file:
             line_items = line.split(",")
             if line_items[1][1:-1] == "chr" or not line_items:
@@ -71,53 +86,70 @@ def process_rqtl_mapping(file_name: str) -> List:
                 "chr": the_chr,
                 "cM": float(line_items[2]),
                 "Mb": float(line_items[2]),
-                "lod_score": float(line_items[3])
+                "lod_score": float(line_items[3]),
             }
             marker_obs.append(this_marker)
 
     return marker_obs
 
+
 def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
-a list of both the JSON needed for the d3panels figure and a list of results
-to be used when generating the results table (which will include marker names)"""
+    a list of both the JSON needed for the d3panels figure and a list of results
+    to be used when generating the results table (which will include marker names)"""
     figure_data = pairscan_for_figure(file_name)
     table_data = pairscan_for_table(file_name, geno_file)
 
     return [figure_data, table_data]
 
+
 def pairscan_for_figure(file_name: str) -> Dict:
     """Given an output file name, read in R/qtl pair-scan results and return
     the JSON needed for the d3panels figure"""
     figure_data = {}
 
     # Open the file with the actual results, written as a list of lists
-    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", file_name), "r") as the_file:
+    with open(
+        os.path.join(
+            current_app.config.get("TMPDIR", "/tmp"), "output", file_name
+        ),
+        "r",
+        encoding="utf8",
+    ) as the_file:
         lod_results = []
         for i, line in enumerate(the_file):
-            if i == 0: # Skip first line
+            if i == 0:  # Skip first line
                 continue
-            line_items = [item.rstrip('\n') for item in line.split(",")]
-            lod_results.append(line_items[1:]) # Append all but first item in line
-        figure_data['lod'] = lod_results
-
-    # Open the map file with the list of markers/pseudomarkers and their positions
-    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", "MAP_" + file_name), "r") as the_file:
-        chr_list = [] # type: List
-        pos_list = [] # type: List
+            line_items = [item.rstrip("\n") for item in line.split(",")]
+            # Append all but first item in line
+            lod_results.append(line_items[1:])
+        figure_data["lod"] = lod_results
+
+    # Open the map file with the list of markers/pseudomarkers and their
+    # positions
+    with open(
+        os.path.join(
+            current_app.config.get("TMPDIR", "/tmp"),
+            "output",
+            "MAP_" + file_name,
+        ),
+        "r",
+        encoding="utf8",
+    ) as the_file:
+        chr_list = []  # type: List
+        pos_list = []  # type: List
         for i, line in enumerate(the_file):
-            if i == 0: # Skip first line
+            if i == 0:  # Skip first line
                 continue
-            line_items = [item.rstrip('\n') for item in line.split(",")]
+            line_items = [item.rstrip("\n") for item in line.split(",")]
             chr_list.append(line_items[1][1:-1])
             pos_list.append(line_items[2])
-        figure_data['chr'] = chr_list
-        figure_data['pos'] = pos_list
+        figure_data["chr"] = chr_list
+        figure_data["pos"] = pos_list
 
     return figure_data
 
+
 def get_marker_list(map_file: str) -> List:
     """
     Open the map file with the list of markers/pseudomarkers and create list of marker obs
@@ -127,21 +159,31 @@ def get_marker_list(map_file: str) -> List:
     """
 
     marker_list = []
-    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", map_file), "r") as map_fh:
+    with open(
+        os.path.join(
+            current_app.config.get("TMPDIR", "/tmp"), "output", map_file
+        ),
+        "r",
+        encoding="utf8",
+    ) as map_fh:
         for line in map_fh.readlines()[1:]:
-            line_items = [item.rstrip('\n') for item in line.split(",")]
+            line_items = [item.rstrip("\n") for item in line.split(",")]
             this_marker = {
-                'name': line_items[0],
-                'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string
-                'pos': line_items[2]
+                "name": line_items[0],
+                "chr": line_items[1][
+                    1:-1
+                ],  # Strip quotes from beginning and end of chr string
+                "pos": line_items[2],
             }
 
             marker_list.append(this_marker)
 
     return marker_list
 
-def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List:
+
+def generate_table_rows(
+    results_file: str, marker_list: List, original_markers: Dict
+) -> List:
     """
     Open the file with the actual R/qtl pair-scan results and write them as
     they will be displayed in the results table
@@ -152,38 +194,45 @@ def generate_table_rows(results_file: str, marker_list: List, original_markers:
     original_markers: Dict of markers from the .geno file, for finding proximal/distal
                       markers to each pseudomarker
     """
-
     table_data = []
-    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", results_file), "r") as the_file:
+    with open(
+        os.path.join(
+            current_app.config.get("TMPDIR", "/tmp"), "output", results_file
+        ),
+        "r",
+        encoding="utf8",
+    ) as the_file:
         for i, line in enumerate(the_file.readlines()[1:]):
             marker_1 = marker_list[i]
-            marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'],
-                                                                           marker_1['pos'],
-                                                                           original_markers)
-            line_items = [item.rstrip('\n') for item in line.split(",")]
+            marker_1["proximal"], marker_1["distal"] = find_nearest_marker(
+                marker_1["chr"], marker_1["pos"], original_markers
+            )
+            line_items = [item.rstrip("\n") for item in line.split(",")]
             for j, item in enumerate(line_items[1:]):
                 marker_2 = marker_list[j]
-                marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'],
-                                                                               marker_2['pos'],
-                                                                               original_markers)
+                marker_2["proximal"], marker_2["distal"] = find_nearest_marker(
+                    marker_2["chr"], marker_2["pos"], original_markers
+                )
                 try:
                     lod_score = f"{float(item):.3f}"
                 except ValueError:
                     lod_score = f"{item}"
 
-                table_data.append({
-                    'proximal1': marker_1['proximal'],
-                    'distal1': marker_1['distal'],
-                    'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
-                    'lod': lod_score,
-                    'proximal2': marker_2['proximal'],
-                    'distal2': marker_2['distal'],
-                    'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
-                })
+                table_data.append(
+                    {
+                        "proximal1": marker_1["proximal"],
+                        "distal1": marker_1["distal"],
+                        "pos1": f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
+                        "lod": lod_score,
+                        "proximal2": marker_2["proximal"],
+                        "distal2": marker_2["distal"],
+                        "pos2": f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM",
+                    }
+                )
 
     return table_data
 
+
 def pairscan_for_table(file_name: str, geno_file: str) -> List:
     """
     Read in R/qtl pair-scan results and return as List representing
@@ -204,37 +253,50 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
     # they will be displayed in the results table
     table_data = generate_table_rows(file_name, marker_list, original_markers)
 
-    return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500]
+    return sorted(table_data, key=lambda i: float(i["lod"]), reverse=True)[:500]
+
 
 def build_marker_pos_dict(genotype_file: str) -> Dict:
     """Gets list of markers and their positions from .geno file
 
     Basically a pared-down version of parse_genotype_file for R/qtl pair-scan"""
 
-    with open(genotype_file, "r") as infile:
+    with open(genotype_file, "r", encoding="utf8") as infile:
         contents = infile.readlines()
 
     # Get all lines after the metadata
-    lines = tuple(line for line in contents if
-                  ((not line.strip().startswith("#")) and
-                   (not line.strip().startswith("@")) and
-                   (not line.strip() == "")))
+    lines = tuple(
+        line
+        for line in contents
+        if (
+            (not line.strip().startswith("#"))
+            and (not line.strip().startswith("@"))
+            and (not line.strip() == "")
+        )
+    )
 
     header_items = lines[0].split("\t")
     mb_exists = "Mb" in header_items
-    pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+    pos_column = (
+        header_items.index("Mb") if mb_exists else header_items.index("cM")
+    )
 
-    the_markers = {"1": {}} # type: Dict[str, Dict]
-    for line in lines[1:]: # The lines with markers
+    the_markers = {"1": {}}  # type: Dict[str, Dict]
+    for line in lines[1:]:  # The lines with markers
         line_items = line.split("\t")
         this_chr = line_items[0]
         if this_chr not in the_markers:
             the_markers[this_chr] = {}
-        the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+        the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[
+            1
+        ]
 
     return the_markers
 
-def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
+
+def find_nearest_marker(
+    the_chr: str, the_pos: str, marker_list: Dict
+) -> Tuple[str, str]:
     """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
     return the nearest real marker"""
 
@@ -243,18 +305,26 @@ def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[
     # Get the position of the pseudomarker in the list of markers for the chr
     the_pos_index = bisect(pos_list, float(the_pos))
 
-    proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+    proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index - 1])]
     distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
 
     return proximal_marker, distal_marker
 
+
 def process_perm_output(file_name: str) -> Tuple[List, float, float]:
     """Given base filename, read in R/qtl permutation output and calculate
     suggestive and significant thresholds"""
 
     perm_results = []
-    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", "PERM_" + file_name), "r", encoding="utf-8") as the_file:
+    with open(
+        os.path.join(
+            current_app.config.get("TMPDIR", "/tmp"),
+            "output",
+            "PERM_" + file_name,
+        ),
+        "r",
+        encoding="utf-8",
+    ) as the_file:
         for i, line in enumerate(the_file):
             if i == 0:
                 # Skip header line