about summary refs log tree commit diff
path: root/gn3/computations
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/heatmap.py68
-rw-r--r--gn3/computations/qtlreaper.py92
2 files changed, 153 insertions, 7 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 3c35029..e0ff05b 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -149,22 +149,22 @@ def heatmap_data(formd, search_result, conn: Any):
 
     def __retrieve_traitlist_and_datalist(threshold, fullname):
         trait = retrieve_trait_info(threshold, fullname, conn)
-        return (
-            trait,
-            export_trait_data(retrieve_trait_data(trait, conn), strainlist))
+        return (trait, retrieve_trait_data(trait, conn))
 
     traits_details = [
         __retrieve_traitlist_and_datalist(threshold, fullname)
         for fullname in search_result]
-    traits_list = map(lambda x: x[0], traits_details)
-    traits_data_list = map(lambda x: x[1], traits_details)
+    traits_list = tuple(x[0] for x in traits_details)
+    traits_data_list = [x[1] for x in traits_details]
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for td in traits_data_list)
 
     return {
         "target_description_checked": formd.formdata.getvalue(
             "targetDescriptionCheck", ""),
         "cluster_checked": cluster_checked,
         "slink_data": (
-            slink(cluster_traits(traits_data_list))
+            slink(cluster_traits(exported_traits_data_list))
             if cluster_checked else False),
         "sessionfile": formd.formdata.getvalue("session"),
         "genotype": genotype,
@@ -173,5 +173,59 @@ def heatmap_data(formd, search_result, conn: Any):
         "ppolar": formd.ppolar,
         "mpolar":formd.mpolar,
         "traits_list": traits_list,
-        "traits_data_list": traits_data_list
+        "traits_data_list": traits_data_list,
+        "exported_traits_data_list": exported_traits_data_list
     }
+
+def compute_heatmap_order(
+        slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+    """
+    Compute the data used for drawing the heatmap proper from `slink_data`.
+
+    This function tries to reproduce the creation and update of the `neworder`
+    variable in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+    """
+    d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
+
+    def __order_maker(norder, slnk_dt):
+        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+            return norder + (
+                (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+
+        if isinstance(slnk_dt[0], int):
+            return norder + ((xoffset + 20, slnk_dt[0]), )
+
+        if isinstance(slnk_dt[1], int):
+            return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+
+        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+    return __order_maker(neworder, slink_data)
+
+def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+    """
+    Get the strains and their corresponding values from `strainlist` and
+    `traits_data_list`.
+
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+    """
+    # This feels nasty! There's a lot of mutation of values here, that might
+    # indicate something untoward in the design of this function and its
+    # dependents  ==>  Review
+    strains = []
+    values = []
+    rets = []
+    for order in orders:
+        temp_val = traits_data_list[order[1]]
+        for i, strain in enumerate(strainlist):
+            if temp_val[i] is not None:
+                strains.append(strain)
+                values.append(temp_val[i])
+        rets.append([order, strains[:], values[:]])
+        strains = []
+        values = []
+
+    return rets
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
new file mode 100644
index 0000000..c058e14
--- /dev/null
+++ b/gn3/computations/qtlreaper.py
@@ -0,0 +1,92 @@
+"""
+This module contains functions to interact with the `qtlreaper` utility for
+computation of QTLs.
+"""
+import os
+import random
+import string
+import subprocess
+from gn3.settings import TMPDIR, REAPER_COMMAND
+
+def random_string(length):
+    """Generate a random string of length `length`."""
+    return "".join(
+        random.choices(
+            string.ascii_letters + string.digits, k=length))
+
+def generate_traits_file(strains, trait_values, traits_filename):
+    """
+    Generate a traits file for use with `qtlreaper`.
+
+    PARAMETERS:
+    strains: A list of strains to use as the headers for the various columns.
+    trait_values: A list of lists of values for each trait and strain.
+    traits_filename: The tab-separated value to put the values in for
+        computation of QTLs.
+    """
+    header = "Trait\t{}\n".format("\t".join(strains))
+    data = [header] + [
+        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+        for i, t in enumerate(trait_values[:-1])] + [
+        "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
+        for t in trait_values[-1:]]
+    with open(traits_filename, "w") as outfile:
+        outfile.writelines(data)
+
+def create_output_directory(path: str):
+    """Create the output directory at `path` if it does not exist."""
+    try:
+        os.mkdir(path)
+    except OSError:
+        pass
+
+def run_reaper(
+        genotype_filename: str, traits_filename: str,
+        other_options: tuple = ("--n_permutations", "1000"),
+        separate_nperm_output: bool = False,
+        output_dir: str = TMPDIR):
+    """
+    Run the QTLReaper command to compute the QTLs.
+
+    PARAMETERS:
+    genotype_filename: The complete path to a genotype file to use in the QTL
+        computation.
+    traits_filename: A path to a file previously generated with the
+        `generate_traits_file` function in this module, to be used in the QTL
+        computation.
+    other_options: Other options to pass to the `qtlreaper` command to modify
+        the QTL computations.
+    separate_nperm_output: A flag indicating whether or not to provide a
+        separate output for the permutations computation. The default is False,
+        which means by default, no separate output file is created.
+    output_dir: A path to the directory where the outputs are put
+
+    RETURNS:
+    The function returns a tuple of the main output file, and the output file
+    for the permutation computations. If the `separate_nperm_output` is `False`,
+    the second value in the tuple returned is `None`.
+
+    RAISES:
+    The function will raise a `subprocess.CalledProcessError` exception in case
+    of any errors running the `qtlreaper` command.
+    """
+    create_output_directory("{}/qtlreaper".format(output_dir))
+    output_filename = "{}/qtlreaper/main_output_{}.txt".format(
+        output_dir, random_string(10))
+    output_list = ["--main_output", output_filename]
+    if separate_nperm_output:
+        permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+            output_dir, random_string(10))
+        output_list = output_list + ["--permu_output", permu_output_filename]
+    else:
+        permu_output_filename = None
+
+    command_list = [
+        REAPER_COMMAND, "--geno", genotype_filename,
+        *other_options, # this splices the `other_options` list here
+        "--traits", traits_filename,
+        *output_list # this splices the `output_list` list here
+    ]
+
+    subprocess.run(command_list, check=True)
+    return (output_filename, permu_output_filename)