From 983acfdfc523677b4d7501287a000b7fd52a2c39 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 07:00:38 +0300
Subject: Implement module for interfacing with rust-qtlreaper

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: move `generate_traits_file` function to new
  module
* gn3/computations/qtlreaper.py: new module to interface with the
  `rust-qtlreaper` utility.
* gn3/settings.py: Provide setting for the path to the `rust-qtlreaper`
  utility
* qtlfilesexport.py: Move `random_string` function to new module. Update to
  use functions in new module.

  Provide a module with functions to be used to interface with
  `rust-qtlreaper`. This module essentially contains all the functions that
  are needed to build the files needed for, and to run the qtlreaper utility.
---
 gn3/computations/qtlreaper.py | 88 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 gn3/computations/qtlreaper.py

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
new file mode 100644
index 0000000..49d363b
--- /dev/null
+++ b/gn3/computations/qtlreaper.py
@@ -0,0 +1,88 @@
+"""
+This module contains functions to interact with the `qtlreaper` utility for
+computation of QTLs.
+"""
+import os
+import random
+import string
+import subprocess
+from gn3.settings import TMPDIR, REAPER_COMMAND
+
+def random_string(length):
+    """Generate a random string of length `length`."""
+    return "".join(
+        random.choices(
+            string.ascii_letters + string.digits, k=length))
+
+def generate_traits_file(strains, trait_values, traits_filename):
+    """
+    Generate a traits file for use with `qtlreaper`.
+
+    PARAMETERS:
+    strains: A list of strains to use as the headers for the various columns.
+    trait_values: A list of lists of values for each trait and strain.
+    traits_filename: The tab-separated value to put the values in for
+        computation of QTLs.
+    """
+    header = "Traits\t{}\n".format("\t".join(strains))
+    data = [header] + [
+        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+        for i, t in enumerate(trait_values)]
+    with open(traits_filename, "w") as outfile:
+        outfile.writelines(data)
+
+def create_output_directory(path: str):
+    """Create the output directory at `path` if it does not exist."""
+    try:
+        os.mkdir(path)
+    except OSError:
+        pass
+
+def run_reaper(
+        genotype_filename: str, traits_filename: str,
+        other_options: tuple = ("--n_permutations", 1000),
+        separate_nperm_output: bool = False,
+        output_dir: str = TMPDIR):
+    """
+    Run the QTLReaper command to compute the QTLs.
+
+    PARAMETERS:
+    genotype_filename: The complete path to a genotype file to use in the QTL
+        computation.
+    traits_filename: A path to a file previously generated with the
+        `generate_traits_file` function in this module, to be used in the QTL
+        computation.
+    other_options: Other options to pass to the `qtlreaper` command to modify
+        the QTL computations.
+    separate_nperm_output: A flag indicating whether or not to provide a
+        separate output for the permutations computation. The default is False,
+        which means by default, no separate output file is created.
+    output_dir: A path to the directory where the outputs are put
+
+    RETURNS:
+    The function returns a tuple of the main output file, and the output file
+    for the permutation computations. If the `separate_nperm_output` is `False`,
+    the second value in the tuple returned is `None`.
+
+    RAISES:
+    The function will raise a `subprocess.CalledProcessError` exception in case
+    of any errors running the `qtlreaper` command.
+    """
+    create_output_directory(output_dir)
+    output_filename = "{}/qtlreaper/main_output_{}.txt".format(
+        output_dir, random_string(10))
+    output_list = ["--main_output", output_filename]
+    if separate_nperm_output:
+        permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+            output_dir, random_string(10))
+        output_list = output_list + ["--permu_output", permu_output_filename]
+    else:
+        permu_output_filename = None
+
+    command_list = [
+        REAPER_COMMAND, "--geno", genotype_filename,
+        *other_options, # this splices the `other_options` list here
+        "--traits", traits_filename, "--main_output", output_filename]
+
+    subprocess.run(command_list, check=True)
+    return (output_filename, permu_output_filename)
-- 
cgit v1.2.3


From bb1fd69fa24cec4ff605450d241601b3f0ced8cb Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 09:50:44 +0300
Subject: Remove empty line

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Remove empty line at the end of the traits file
---
 gn3/computations/qtlreaper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 49d363b..a88659e 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -27,7 +27,9 @@ def generate_traits_file(strains, trait_values, traits_filename):
     header = "Traits\t{}\n".format("\t".join(strains))
     data = [header] + [
         "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
-        for i, t in enumerate(trait_values)]
+        for i, t in enumerate(trait_values[:-1])] + [
+        "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
+        for t in trait_values[-1:]]
     with open(traits_filename, "w") as outfile:
         outfile.writelines(data)
 
-- 
cgit v1.2.3


From 58f59b8f7df82969b58a604070aec095d17e0501 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 11:44:37 +0300
Subject: Fix issues with traits file format

* README.md: update header: Traits ==> Trait
* gn3/computations/qtlreaper.py: update header: Traits ==> Trait
* qtlfilesexport.py: Choose only BXD strains

  Rename the first column header from "Traits" to "Trait" to correspond with
  what `rust-qtlreaper` expects.

  Choose only the BXD strains for the proof-of-concept example - this helped
  bring out the fact that the traits file SHOULD NOT contain a strain column
  for a strain that does not exist in the genotype file in consideration.

  If the traits file has a strain column which does not exist in the genotype
  file, then `rust-qtlreaper` fails with a panic, since, from what I can tell,
  it tries to get a value from the genotype file for the non-existent strain,
  which results to a `None` type. Subsequent attempts at running an operation
  on the `None` type lead to the panic.
---
 README.md                     |  4 +++-
 gn3/computations/qtlreaper.py |  2 +-
 qtlfilesexport.py             | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/README.md b/README.md
index 0e0e509..b54015f 100644
--- a/README.md
+++ b/README.md
@@ -136,8 +136,10 @@ Under the **"Trait"** column, the traits are numbered from **T1** to **T<n>** wh
 As an example, you could end up with a trait file like the following:
 
 ```txt
-Traits	BXD27	BXD32	DBA/2J	BXD21	...
+Trait	BXD27	BXD32	DBA/2J	BXD21	...
 T1	10.5735	9.27408	9.48255	9.18253	...
 T2	6.4471	6.7191	5.98015	6.68051	...
 ...
 ```
+
+It is very important that the column header names for the strains correspond to the genotype file used.
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index a88659e..9b13a55 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -24,7 +24,7 @@ def generate_traits_file(strains, trait_values, traits_filename):
     traits_filename: The tab-separated value to put the values in for
         computation of QTLs.
     """
-    header = "Traits\t{}\n".format("\t".join(strains))
+    header = "Trait\t{}\n".format("\t".join(strains))
     data = [header] + [
         "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
         for i, t in enumerate(trait_values[:-1])] + [
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 0543dc9..adc5e77 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -41,7 +41,36 @@ def main():
         retrieve_trait_info(threshold, fullname, conn)
         for fullname in trait_fullnames()]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
-    strains = list(set([k for td in traits_data_list for k in td["data"].keys()]))
+    # strains = list(set([k for td in traits_data_list for k in td["data"].keys()]))
+    strains = [# Use only the strains in the BXD.geno genotype file
+        "BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", "BXD12",
+        "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", "BXD19", "BXD20", "BXD21",
+        "BXD22", "BXD23", "BXD24", "BXD24a", "BXD25", "BXD27", "BXD28", "BXD29",
+        "BXD30", "BXD31", "BXD32", "BXD33", "BXD34", "BXD35", "BXD36", "BXD37",
+        "BXD38", "BXD39", "BXD40", "BXD41", "BXD42", "BXD43", "BXD44", "BXD45",
+        "BXD48", "BXD48a", "BXD49", "BXD50", "BXD51", "BXD52", "BXD53", "BXD54",
+        "BXD55", "BXD56", "BXD59", "BXD60", "BXD61", "BXD62", "BXD63", "BXD64",
+        "BXD65", "BXD65a", "BXD65b", "BXD66", "BXD67", "BXD68", "BXD69",
+        "BXD70", "BXD71", "BXD72", "BXD73", "BXD73a", "BXD73b", "BXD74",
+        "BXD75", "BXD76", "BXD77", "BXD78", "BXD79", "BXD81", "BXD83", "BXD84",
+        "BXD85", "BXD86", "BXD87", "BXD88", "BXD89", "BXD90", "BXD91", "BXD93",
+        "BXD94", "BXD95", "BXD98", "BXD99", "BXD100", "BXD101", "BXD102",
+        "BXD104", "BXD105", "BXD106", "BXD107", "BXD108", "BXD109", "BXD110",
+        "BXD111", "BXD112", "BXD113", "BXD114", "BXD115", "BXD116", "BXD117",
+        "BXD119", "BXD120", "BXD121", "BXD122", "BXD123", "BXD124", "BXD125",
+        "BXD126", "BXD127", "BXD128", "BXD128a", "BXD130", "BXD131", "BXD132",
+        "BXD133", "BXD134", "BXD135", "BXD136", "BXD137", "BXD138", "BXD139",
+        "BXD141", "BXD142", "BXD144", "BXD145", "BXD146", "BXD147", "BXD148",
+        "BXD149", "BXD150", "BXD151", "BXD152", "BXD153", "BXD154", "BXD155",
+        "BXD156", "BXD157", "BXD160", "BXD161", "BXD162", "BXD165", "BXD168",
+        "BXD169", "BXD170", "BXD171", "BXD172", "BXD173", "BXD174", "BXD175",
+        "BXD176", "BXD177", "BXD178", "BXD180", "BXD181", "BXD183", "BXD184",
+        "BXD186", "BXD187", "BXD188", "BXD189", "BXD190", "BXD191", "BXD192",
+        "BXD193", "BXD194", "BXD195", "BXD196", "BXD197", "BXD198", "BXD199",
+        "BXD200", "BXD201", "BXD202", "BXD203", "BXD204", "BXD205", "BXD206",
+        "BXD207", "BXD208", "BXD209", "BXD210", "BXD211", "BXD212", "BXD213",
+        "BXD214", "BXD215", "BXD216", "BXD217", "BXD218", "BXD219", "BXD220"
+    ]
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
     slinked = slink(cluster_traits(exported_traits_data_list))
-- 
cgit v1.2.3


From 6c872943597f3664cca77abbdf56f074fc5231e6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 06:56:35 +0300
Subject: Fix bugs with `run_reaper` function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/qtlreaper.py: Fix some bugs
* qtlfilesexport.py: Test out running rust-qtlreaper

  Test out the qtlreaper interface code and fix some bugs caught in the
  process.
---
 gn3/computations/qtlreaper.py | 8 +++++---
 qtlfilesexport.py             | 7 +++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 9b13a55..c058e14 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -42,7 +42,7 @@ def create_output_directory(path: str):
 
 def run_reaper(
         genotype_filename: str, traits_filename: str,
-        other_options: tuple = ("--n_permutations", 1000),
+        other_options: tuple = ("--n_permutations", "1000"),
         separate_nperm_output: bool = False,
         output_dir: str = TMPDIR):
     """
@@ -70,7 +70,7 @@ def run_reaper(
     The function will raise a `subprocess.CalledProcessError` exception in case
     of any errors running the `qtlreaper` command.
     """
-    create_output_directory(output_dir)
+    create_output_directory("{}/qtlreaper".format(output_dir))
     output_filename = "{}/qtlreaper/main_output_{}.txt".format(
         output_dir, random_string(10))
     output_list = ["--main_output", output_filename]
@@ -84,7 +84,9 @@ def run_reaper(
     command_list = [
         REAPER_COMMAND, "--geno", genotype_filename,
         *other_options, # this splices the `other_options` list here
-        "--traits", traits_filename, "--main_output", output_filename]
+        "--traits", traits_filename,
+        *output_list # this splices the `output_list` list here
+    ]
 
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 1db4ab6..799de31 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -9,6 +9,7 @@ replacing the variables in the angled brackets with the appropriate values
 """
 from gn3.computations.slink import slink
 from gn3.db_utils import database_connector
+from gn3.computations.qtlreaper import run_reaper
 from gn3.computations.heatmap import export_trait_data
 from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
 from gn3.db.genotypes import build_genotype_file, load_genotype_samples
@@ -57,5 +58,11 @@ def main():
     generate_traits_file(strains_values, trait_values, traits_filename)
     print("Generated file: {}".format(traits_filename))
 
+    main_output, permutations_output = run_reaper(
+        genotype_filename, traits_filename, separate_nperm_output=True)
+
+    print("Main output: {}, Permutation output: {}".format(
+        main_output, permutations_output))
+
 if __name__ == "__main__":
     main()
-- 
cgit v1.2.3


From 64ce38b45839b6305b009f6e28b0f852409e9bda Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 10:45:11 +0300
Subject: Parse QTLReaper outputs

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/qtlreaper.py: pass output files
* tests/unit/computations/data/qtlreaper/main_output_sample.txt: sample test
  data
* tests/unit/computations/data/qtlreaper/permu_output_sample.txt: sample test
  data
* tests/unit/computations/test_qtlreaper.py: add tests

  Add code to parse the QTLReaper output data files.
---
 gn3/computations/qtlreaper.py                      | 18 ++++++
 .../data/qtlreaper/main_output_sample.txt          | 11 ++++
 .../data/qtlreaper/permu_output_sample.txt         | 27 ++++++++
 tests/unit/computations/test_qtlreaper.py          | 74 ++++++++++++++++++++++
 4 files changed, 130 insertions(+)
 create mode 100644 tests/unit/computations/data/qtlreaper/main_output_sample.txt
 create mode 100644 tests/unit/computations/data/qtlreaper/permu_output_sample.txt
 create mode 100644 tests/unit/computations/test_qtlreaper.py

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index c058e14..3b8e4db 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -90,3 +90,21 @@ def run_reaper(
 
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
+
+
+def parse_reaper_main_results(results_file):
+    with open(results_file, "r") as infile:
+        lines = infile.readlines()
+
+    def __parse_line(line):
+        items = line.strip().split("\t")
+        return items[0:2] + [float(item) for item in items[2:]]
+
+    header = lines[0].strip().split("\t")
+    return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
+
+def parse_reaper_permutation_results(results_file):
+    with open(results_file, "r") as infile:
+        lines = infile.readlines()
+
+    return [float(line.strip()) for line in lines]
diff --git a/tests/unit/computations/data/qtlreaper/main_output_sample.txt b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
new file mode 100644
index 0000000..12b11b4
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
@@ -0,0 +1,11 @@
+ID	Locus	Chr	cM	Mb	LRS	Additive	pValue
+T1	rs31443144	1	1.500	3.010	0.500	-0.074	1.000
+T1	rs6269442	1	1.500	3.492	0.500	-0.074	1.000
+T1	rs32285189	1	1.630	3.511	0.500	-0.074	1.000
+T1	rs258367496	1	1.630	3.660	0.500	-0.074	1.000
+T1	rs32430919	1	1.750	3.777	0.500	-0.074	1.000
+T1	rs36251697	1	1.880	3.812	0.500	-0.074	1.000
+T1	rs30658298	1	2.010	4.431	0.500	-0.074	1.000
+T1	rs51852623	1	2.010	4.447	0.500	-0.074	1.000
+T1	rs31879829	1	2.140	4.519	0.500	-0.074	1.000
+T1	rs36742481	1	2.140	4.776	0.500	-0.074	1.000
diff --git a/tests/unit/computations/data/qtlreaper/permu_output_sample.txt b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
new file mode 100644
index 0000000..64cff07
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
@@ -0,0 +1,27 @@
+4.44174
+5.03825
+5.08167
+5.18119
+5.18578
+5.24563
+5.24619
+5.24619
+5.27961
+5.28228
+5.43903
+5.50188
+5.51694
+5.56830
+5.63874
+5.71346
+5.71936
+5.74275
+5.76764
+5.79815
+5.81671
+5.82775
+5.89659
+5.92117
+5.93396
+5.93396
+5.94957
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
new file mode 100644
index 0000000..ec23664
--- /dev/null
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -0,0 +1,74 @@
+"""Module contains tests for gn3.computations.qtlreaper"""
+import os
+from unittest import TestCase
+from gn3.computations.qtlreaper import (
+    parse_reaper_main_results, parse_reaper_permutation_results)
+
+class TestQTLReaper(TestCase):
+    """Class for testing qtlreaper interface functions."""
+
+    def test_parse_reaper_main_results(self):
+        self.assertEqual(
+            parse_reaper_main_results(
+                "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
+            [
+                {
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+                    "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+                    "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                }
+            ])
+
+    def test_parse_reaper_permutation_results(self):
+        self.assertEqual(
+            parse_reaper_permutation_results(
+            "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+            [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
+             5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
+             5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
+             5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
-- 
cgit v1.2.3


From b5e1d1176f1bf4f7c0b68b27beb15e99418f1650 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 11:16:29 +0300
Subject: Fix linting errors, minor bugs and reorganise code

* Fix some linting errors and some minor bugs caught by the linter.
  Move the `random_string` function to separate module for use in multiple
  places in the code.
---
 gn3/computations/heatmap.py               |  7 ++++---
 gn3/computations/qtlreaper.py             | 27 ++++++++++++++-------------
 gn3/db/traits.py                          |  5 ++++-
 gn3/heatmaps/heatmaps.py                  | 25 +++++++++++++++++++------
 gn3/random.py                             | 11 +++++++++++
 tests/unit/computations/test_qtlreaper.py |  5 +++--
 6 files changed, 55 insertions(+), 25 deletions(-)
 create mode 100644 gn3/random.py

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 92014cf..1143450 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -6,6 +6,7 @@ generate various kinds of heatmaps.
 from functools import reduce
 from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
+from gn3.computations.qtlreaper import generate_traits_file
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import build_genotype_file, load_genotype_samples
 from gn3.db.traits import (
@@ -155,14 +156,14 @@ def heatmap_data(traits_names, conn: Any):
         for fullname in traits_names]
     traits_list = tuple(x[0] for x in traits_details)
     traits_data_list = [x[1] for x in traits_details]
-    exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for td in traits_data_list)
     genotype_filename = build_genotype_file(traits_list[0]["riset"])
     strainlist = load_genotype_samples(genotype_filename)
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for td in traits_data_list)
     slink_data = slink(cluster_traits(exported_traits_data_list))
     ordering_data = compute_heatmap_order(slink_data)
     strains_and_values = retrieve_strains_and_values(
-        orders, strainlist, exported_traits_data_list)
+        ordering_data, strainlist, exported_traits_data_list)
     strains_values = strains_and_values[0][1]
     trait_values = [t[2] for t in strains_and_values]
     traits_filename = generate_traits_filename()
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 3b8e4db..30c7051 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -3,17 +3,10 @@ This module contains functions to interact with the `qtlreaper` utility for
 computation of QTLs.
 """
 import os
-import random
-import string
 import subprocess
+from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
-def random_string(length):
-    """Generate a random string of length `length`."""
-    return "".join(
-        random.choices(
-            string.ascii_letters + string.digits, k=length))
-
 def generate_traits_file(strains, trait_values, traits_filename):
     """
     Generate a traits file for use with `qtlreaper`.
@@ -25,11 +18,13 @@ def generate_traits_file(strains, trait_values, traits_filename):
         computation of QTLs.
     """
     header = "Trait\t{}\n".format("\t".join(strains))
-    data = [header] + [
-        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
-        for i, t in enumerate(trait_values[:-1])] + [
-        "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
-        for t in trait_values[-1:]]
+    data = (
+        [header] +
+        ["T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+         for i, t in enumerate(trait_values[:-1])] +
+        ["T{}\t{}".format(
+            len(trait_values), "\t".join([str(i) for i in t]))
+         for t in trait_values[-1:]])
     with open(traits_filename, "w") as outfile:
         outfile.writelines(data)
 
@@ -93,6 +88,9 @@ def run_reaper(
 
 
 def parse_reaper_main_results(results_file):
+    """
+    Parse the results file of running QTLReaper into a list of dicts.
+    """
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
@@ -104,6 +102,9 @@ def parse_reaper_main_results(results_file):
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
 
 def parse_reaper_permutation_results(results_file):
+    """
+    Parse the results QTLReaper permutations into a list of values.
+    """
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index ccb101a..bfe887e 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,6 +1,8 @@
 """This class contains functions relating to trait data manipulation"""
-from gn3.settings import TMPDIR
+import os
 from typing import Any, Dict, Union, Sequence
+from gn3.settings import TMPDIR
+from gn3.random import random_string
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
 
@@ -669,5 +671,6 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     return {}
 
 def generate_traits_filename(base_path: str = TMPDIR):
+    """Generate a unique filename for use with generated traits files."""
     return "{}/traits_test_file_{}.txt".format(
         os.path.abspath(base_path), random_string(10))
diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
index 3bf7917..88f546d 100644
--- a/gn3/heatmaps/heatmaps.py
+++ b/gn3/heatmaps/heatmaps.py
@@ -14,6 +14,19 @@ def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30
     return [[random.uniform(0,data_stop) for i in range(0, width)]
             for j in range(0, height)]
 
+def generate_random_data2(data_stop: float = 2, width: int = 10, height: int = 30):
+    """
+    This is mostly a utility function to be used to generate random data, useful
+    for development of the heatmap generation code, without access to the actual
+    database data.
+    """
+    return [
+        [{
+            "value": item,
+            "category": random.choice(["C57BL/6J +", "DBA/2J +"])}
+         for item in axis]
+        for axis in generate_random_data(data_stop, width, height)]
+
 def heatmap_x_axis_names():
     return [
         "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
@@ -30,13 +43,14 @@ def heatmap_x_axis_names():
 
 # Grey + Blue + Red
 def generate_heatmap():
-    rows = 20
-    data = generate_random_data(height=rows)
-    y = (["%s"%x for x in range(1, rows+1)][:-1] + ["X"]) #replace last item with x for now
+    cols = 20
+    y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
+    x_axis = heatmap_x_axis_names()
+    data = generate_random_data(height=cols, width=len(x_axis))
     fig = px.imshow(
         data,
-        x=heatmap_x_axis_names(),
-        y=y,
+        x=x_axis,
+        y=y_axis,
         width=500)
     fig.update_traces(xtype="array")
     fig.update_traces(ytype="array")
@@ -49,6 +63,5 @@ def generate_heatmap():
         coloraxis_colorscale=[
             [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
             [0.5, '#F5DE11'], [1.0, '#FF0D00']])
-
     fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
     return fig
diff --git a/gn3/random.py b/gn3/random.py
new file mode 100644
index 0000000..f0ba574
--- /dev/null
+++ b/gn3/random.py
@@ -0,0 +1,11 @@
+"""
+Functions to generate complex random data.
+"""
+import random
+import string
+
+def random_string(length):
+    """Generate a random string of length `length`."""
+    return "".join(
+        random.choices(
+            string.ascii_letters + string.digits, k=length))
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index ec23664..6c3b64d 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -1,5 +1,4 @@
 """Module contains tests for gn3.computations.qtlreaper"""
-import os
 from unittest import TestCase
 from gn3.computations.qtlreaper import (
     parse_reaper_main_results, parse_reaper_permutation_results)
@@ -8,6 +7,7 @@ class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
 
     def test_parse_reaper_main_results(self):
+        """Test that the main results file is parsed correctly."""
         self.assertEqual(
             parse_reaper_main_results(
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
@@ -65,9 +65,10 @@ class TestQTLReaper(TestCase):
             ])
 
     def test_parse_reaper_permutation_results(self):
+        """Test that the permutations results file is parsed correctly."""
         self.assertEqual(
             parse_reaper_permutation_results(
-            "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+                "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
             [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
              5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
              5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
-- 
cgit v1.2.3


From 4ce5695a35e92a704add8d497266bb2986a593f6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 06:47:52 +0300
Subject: Handle type-coercion exceptions

* gn3/computations/qtlreaper.py: handle exceptions

  Sometimes, the values being parsed are plain strings and cannot be cast to
  the float types. This commit handles that by casting only those values that
  can be cast to float, and returning the others as strings.
---
 gn3/computations/qtlreaper.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 30c7051..eff2a80 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -94,9 +94,15 @@ def parse_reaper_main_results(results_file):
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
+    def __parse_column_value(value):
+        try:
+            return float(value)
+        except:
+            return value
+
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:2] + [float(item) for item in items[2:]]
+        return items[0:2] + [__parse_column_value(item) for item in items[2:]]
 
     header = lines[0].strip().split("\t")
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
-- 
cgit v1.2.3


From 679a1af832ad9585c7cf72996043edb08e1b0d10 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 08:06:14 +0300
Subject: Leave "Chr" value as string when parsing

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* The "Chr" value seems to be mostly a name of some sort, despite it being,
  seemingly an number. This commit parses the "Chr" value as a string.
  It also updates the tests to expec a string, rather than a number for "Chr"
  values.
---
 gn3/computations/qtlreaper.py             |  5 +++--
 tests/unit/computations/test_qtlreaper.py | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index eff2a80..9b20309 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -94,7 +94,7 @@ def parse_reaper_main_results(results_file):
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
-    def __parse_column_value(value):
+    def __parse_column_float_value(value):
         try:
             return float(value)
         except:
@@ -102,7 +102,8 @@ def parse_reaper_main_results(results_file):
 
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:2] + [__parse_column_value(item) for item in items[2:]]
+        return items[0:3] + [
+            __parse_column_float_value(item) for item in items[3:]]
 
     header = lines[0].strip().split("\t")
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 6c3b64d..fd3434a 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -13,52 +13,52 @@ class TestQTLReaper(TestCase):
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
             [
                 {
-                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "ID": "T1", "Locus": "rs31443144", "Chr": "1", "cM": 1.500,
                     "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "ID": "T1", "Locus": "rs6269442", "Chr": "1", "cM": 1.500,
                     "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "ID": "T1", "Locus": "rs32285189", "Chr": "1", "cM": 1.630,
                     "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "ID": "T1", "Locus": "rs258367496", "Chr": "1", "cM": 1.630,
                     "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "ID": "T1", "Locus": "rs32430919", "Chr": "1", "cM": 1.750,
                     "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "ID": "T1", "Locus": "rs36251697", "Chr": "1", "cM": 1.880,
                     "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "ID": "T1", "Locus": "rs30658298", "Chr": "1", "cM": 2.010,
                     "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+                    "ID": "T1", "Locus": "rs51852623", "Chr": "1", "cM": 2.010,
                     "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+                    "ID": "T1", "Locus": "rs31879829", "Chr": "1", "cM": 2.140,
                     "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+                    "ID": "T1", "Locus": "rs36742481", "Chr": "1", "cM": 2.140,
                     "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 }
-- 
cgit v1.2.3


From d4943f1d01d89a3928c905f80914a23144126c8e Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 08:09:20 +0300
Subject: Provide function to organise parsed QTLReaper results

* gn3/computations/qtlreaper.py: Provide a function to organise the results by
  trait for easier use down the line.

* tests/unit/computations/test_qtlreaper.py: provide a test to ensure that the
  organising function works as expected.
---
 gn3/computations/qtlreaper.py             |  25 +++++++
 tests/unit/computations/test_qtlreaper.py | 105 +++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 1 deletion(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 9b20309..8c0e6de 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -86,6 +86,31 @@ def run_reaper(
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
 
+def organise_reaper_main_results(parsed_results):
+    def __organise_by_chromosome(chr_name, items):
+        chr_items = [item for item in items if item["Chr"] == chr_name]
+        return {
+            "Chr": str(chr_name),
+            "loci": [{
+                "Locus": locus["Locus"],
+                "cM": locus["cM"],
+                "Mb": locus["Mb"],
+                "LRS": locus["LRS"],
+                "Additive": locus["Additive"],
+                "pValue": locus["pValue"]
+            } for locus in chr_items]}
+
+    def __organise_by_id(identifier, items):
+        id_items = [item for item in items if item["ID"] == identifier]
+        unique_chromosomes = {item["Chr"] for item in id_items}
+        return {
+            "ID": identifier,
+            "chromosomes": [
+                __organise_by_chromosome(chromo, id_items)
+                for chromo in sorted(unique_chromosomes)]}
+
+    unique_ids = {res["ID"] for res in parsed_results}
+    return [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]
 
 def parse_reaper_main_results(results_file):
     """
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index fd3434a..1d7347f 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -1,7 +1,9 @@
 """Module contains tests for gn3.computations.qtlreaper"""
 from unittest import TestCase
 from gn3.computations.qtlreaper import (
-    parse_reaper_main_results, parse_reaper_permutation_results)
+    parse_reaper_main_results,
+    organise_reaper_main_results,
+    parse_reaper_permutation_results)
 
 class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
@@ -73,3 +75,104 @@ class TestQTLReaper(TestCase):
              5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
              5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
              5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
+
+    def test_organise_reaper_main_results(self):
+        self.assertEqual(
+            organise_reaper_main_results([
+                {
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
+                    "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
+                    "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
+                    "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                }
+            ]),
+            [{"ID": "T1",
+              "chromosomes": [
+                  {"Chr": "1",
+                   "loci": [
+                       {
+                           "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       }]},
+                  {"Chr": "2",
+                   "loci": [
+                       {
+                           "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       }]}]}])
-- 
cgit v1.2.3


From f360cc62cc156af90d3283ae7b6db9e8250fa43c Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:51:57 +0300
Subject: Remove extraneous text to ease sorting

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Change the id from 'T<n>' to simply '<n>' to ease sorting of the trait
  results by numerical order rather than string order.
---
 gn3/computations/qtlreaper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 8c0e6de..ec215e5 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -20,9 +20,9 @@ def generate_traits_file(strains, trait_values, traits_filename):
     header = "Trait\t{}\n".format("\t".join(strains))
     data = (
         [header] +
-        ["T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+        ["{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
          for i, t in enumerate(trait_values[:-1])] +
-        ["T{}\t{}".format(
+        ["{}\t{}".format(
             len(trait_values), "\t".join([str(i) for i in t]))
          for t in trait_values[-1:]])
     with open(traits_filename, "w") as outfile:
-- 
cgit v1.2.3


From 3f323734fcf258d28f3f7d33fdc1518ef9ec24a8 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:54:48 +0300
Subject: Parse Chr value as int where possible

* To ease sorting of data by numerical order down the line, sort the "Chr"
  values by numerical order.
---
 gn3/computations/qtlreaper.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index ec215e5..02d6572 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -86,11 +86,16 @@ def run_reaper(
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
 
+def chromosome_sorter_key_fn(val):
+    if isinstance(val, int):
+        return val
+    return ord(val)
+
 def organise_reaper_main_results(parsed_results):
     def __organise_by_chromosome(chr_name, items):
         chr_items = [item for item in items if item["Chr"] == chr_name]
         return {
-            "Chr": str(chr_name),
+            "Chr": chr_name,
             "loci": [{
                 "Locus": locus["Locus"],
                 "cM": locus["cM"],
@@ -125,9 +130,15 @@ def parse_reaper_main_results(results_file):
         except:
             return value
 
+    def __parse_column_int_value(value):
+        try:
+            return int(value)
+        except:
+            return value
+
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:3] + [
+        return items[0:2] + [__parse_column_int_value(items[2])] + [
             __parse_column_float_value(item) for item in items[3:]]
 
     header = lines[0].strip().split("\t")
-- 
cgit v1.2.3


From a718069c757bea9f7ecbaee25e23bd581750f906 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:56:56 +0300
Subject: Ease search for traits and chromosomes

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Return a dict of values rather than list for the traits and chromosomes to
  ease searching through the data.
---
 gn3/computations/qtlreaper.py             |  9 ++-
 tests/unit/computations/test_qtlreaper.py | 92 +++++++++++++++----------------
 2 files changed, 52 insertions(+), 49 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 02d6572..5180853 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -110,12 +110,15 @@ def organise_reaper_main_results(parsed_results):
         unique_chromosomes = {item["Chr"] for item in id_items}
         return {
             "ID": identifier,
-            "chromosomes": [
+            "chromosomes": {_chr["Chr"]: _chr for _chr in [
                 __organise_by_chromosome(chromo, id_items)
-                for chromo in sorted(unique_chromosomes)]}
+                for chromo in sorted(
+                        unique_chromosomes, key=chromosome_sorter_key_fn)]}}
 
     unique_ids = {res["ID"] for res in parsed_results}
-    return [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]
+    return {
+        trait["ID"]: trait for trait in
+        [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]}
 
 def parse_reaper_main_results(results_file):
     """
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 1d7347f..495ed97 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -130,49 +130,49 @@ class TestQTLReaper(TestCase):
                     "pValue": 1.000
                 }
             ]),
-            [{"ID": "T1",
-              "chromosomes": [
-                  {"Chr": "1",
-                   "loci": [
-                       {
-                           "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       }]},
-                  {"Chr": "2",
-                   "loci": [
-                       {
-                           "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       }]}]}])
+            {"T1": {"ID": "T1",
+                    "chromosomes": {
+                        1: {"Chr": 1,
+                            "loci": [
+                                {
+                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                }]},
+                        2: {"Chr": 2,
+                            "loci": [
+                                {
+                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                }]}}}})
-- 
cgit v1.2.3


From 1e2357049adc72808fbf8eaac3da9411d3c78c66 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 17 Sep 2021 11:20:16 +0300
Subject: Fix a number of linting issues

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
---
 gn3/computations/qtlreaper.py             |  7 ++--
 gn3/db/genotypes.py                       |  2 +-
 gn3/heatmaps.py                           | 54 ++++++++++++-------------------
 tests/unit/computations/test_qtlreaper.py |  3 +-
 tests/unit/test_heatmaps.py               |  6 ++--
 5 files changed, 32 insertions(+), 40 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5180853..377db9b 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -110,9 +110,10 @@ def organise_reaper_main_results(parsed_results):
         unique_chromosomes = {item["Chr"] for item in id_items}
         return {
             "ID": identifier,
-            "chromosomes": {_chr["Chr"]: _chr for _chr in [
-                __organise_by_chromosome(chromo, id_items)
-                for chromo in sorted(
+            "chromosomes": {
+                _chr["Chr"]: _chr for _chr in [
+                    __organise_by_chromosome(chromo, id_items)
+                    for chromo in sorted(
                         unique_chromosomes, key=chromosome_sorter_key_fn)]}}
 
     unique_ids = {res["ID"] for res in parsed_results}
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index b03d55c..9d052d9 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -174,7 +174,7 @@ def parse_genotype_file(filename: str, parlist: tuple = tuple()):
     geno_obj = dict(labels + header)
     markers = tuple(
         [parse_genotype_marker(line, geno_obj, parlist)
-        for line in data_lines[1:]])
+         for line in data_lines[1:]])
     chromosomes = tuple(
         dict(chromosome) for chromosome in
         build_genotype_chromosomes(geno_obj, markers))
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 2859dde..c4fc67d 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -3,13 +3,13 @@ This module will contain functions to be used in computation of the data used to
 generate various kinds of heatmaps.
 """
 
+from typing import Any, Dict, Sequence
 import numpy as np
 from functools import reduce
 from gn3.settings import TMPDIR
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
 from gn3.random import random_string
-from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
 from plotly.subplots import make_subplots
 from gn3.computations.correlations2 import compute_correlation
@@ -165,7 +165,7 @@ def build_heatmap(traits_names, conn: Any):
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    genotype = parse_genotype_file(genotype_filename)
+    # genotype = parse_genotype_file(genotype_filename)
     strains = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
@@ -183,22 +183,21 @@ def build_heatmap(traits_names, conn: Any):
         [t[2] for t in strains_and_values],
         traits_filename)
 
-    main_output, permutations_output = run_reaper(
+    main_output, _permutations_output = run_reaper(
         genotype_filename, traits_filename, separate_nperm_output=True)
 
     qtlresults = parse_reaper_main_results(main_output)
-    permudata = parse_reaper_permutation_results(permutations_output)
+    # permudata = parse_reaper_permutation_results(permutations_output)
     organised = organise_reaper_main_results(qtlresults)
 
     traits_ids = [# sort numerically, but retain the ids as strings
         str(i) for i in sorted({int(row["ID"]) for row in qtlresults})]
     chromosome_names = sorted(
-        {row["Chr"] for row in qtlresults}, key = chromosome_sorter_key_fn)
-    loci_names = sorted({row["Locus"] for row in qtlresults})
-    ordered_traits_names = {
-        res_id: trait for res_id, trait in
+        {row["Chr"] for row in qtlresults}, key=chromosome_sorter_key_fn)
+    # loci_names = sorted({row["Locus"] for row in qtlresults})
+    ordered_traits_names = dict(
         zip(traits_ids,
-            [traits[idx]["trait_fullname"] for idx in traits_order])}
+            [traits[idx]["trait_fullname"] for idx in traits_order]))
 
     return generate_clustered_heatmap(
         process_traits_data_for_heatmap(
@@ -207,22 +206,11 @@ def build_heatmap(traits_names, conn: Any):
         "single_heatmap_{}".format(random_string(10)),
         y_axis=tuple(
             ordered_traits_names[traits_ids[order]]
-                for order in traits_order),
+            for order in traits_order),
         y_label="Traits",
-        x_axis=[chromo for chromo in chromosome_names],
+        x_axis=chromosome_names,
         x_label="Chromosomes")
 
-    return {
-        "slink_data": slink_data,
-        "ordering_data": ordering_data,
-        "strainlist": strainlist,
-        "genotype_filename": genotype_filename,
-        "traits_list": traits_list,
-        "traits_data_list": traits_data_list,
-        "exported_traits_data_list": exported_traits_data_list,
-        "traits_filename": traits_filename
-    }
-
 def compute_traits_order(slink_data, neworder: tuple = tuple()):
     """
     Compute the order of the traits for clustering from `slink_data`.
@@ -314,7 +302,7 @@ def get_nearest_marker(traits_list, genotype):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
     """
     if not genotype["Mbmap"]:
-        return [None] * len(trait_list)
+        return [None] * len(traits_list)
 
     marker_finder = nearest_marker_finder(genotype)
     return [marker_finder(trait) for trait in traits_list]
@@ -340,10 +328,10 @@ def process_traits_data_for_heatmap(data, trait_names, chromosome_names):
     return hdata
 
 def generate_clustered_heatmap(
-        data, clustering_data, image_filename_prefix, x_axis = None,
-        x_label: str = "", y_axis = None, y_label: str = "",
+        data, clustering_data, image_filename_prefix, x_axis=None,
+        x_label: str = "", y_axis=None, y_label: str = "",
         output_dir: str = TMPDIR,
-        colorscale = (
+        colorscale=(
             (0.0, '#5D5D5D'), (0.4999999999999999, '#ABABAB'),
             (0.5, '#F5DE11'), (1.0, '#FF0D00'))):
     """
@@ -357,15 +345,15 @@ def generate_clustered_heatmap(
         shared_yaxes="rows",
         horizontal_spacing=0.001,
         subplot_titles=["distance"] + x_axis,
-        figure = ff.create_dendrogram(
+        figure=ff.create_dendrogram(
             np.array(clustering_data), orientation="right", labels=y_axis))
     hms = [go.Heatmap(
         name=chromo,
-        y = y_axis,
-        z = data_array,
+        y=y_axis,
+        z=data_array,
         showscale=False) for chromo, data_array in zip(x_axis, data)]
-    for i, hm in enumerate(hms):
-        fig.add_trace(hm, row=1, col=(i + 2))
+    for i, heatmap in enumerate(hms):
+        fig.add_trace(heatmap, row=1, col=(i + 2))
 
     fig.update_layout(
         {
@@ -380,8 +368,8 @@ def generate_clustered_heatmap(
     x_axes_layouts = {
         "xaxis{}".format(i+1 if i > 0 else ""): {
             "mirror": False,
-            "showticklabels": True if i==0 else False,
-            "ticks": "outside" if i==0 else ""
+            "showticklabels": True if i == 0 else False,
+            "ticks": "outside" if i == 0 else ""
         }
         for i in range(num_cols)}
 
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 1d67827..d420470 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -77,6 +77,7 @@ class TestQTLReaper(TestCase):
              5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
 
     def test_organise_reaper_main_results(self):
+        """Check that results are organised correctly."""
         self.assertEqual(
             organise_reaper_main_results([
                 {
@@ -135,7 +136,7 @@ class TestQTLReaper(TestCase):
                         1: {"Chr": 1,
                             "loci": [
                                 {
-                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                 },
                                 {
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index f3a81c5..c0a496b 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -189,6 +189,7 @@ class TestHeatmap(TestCase):
                     retrieve_strains_and_values(orders, slist, tdata), expected)
 
     def test_get_lrs_from_chr(self):
+        """Check that function gets correct LRS values"""
         for trait, chromosome, expected in [
                 [{"chromosomes": {}}, 3, [None]],
                 [{"chromosomes": {3: {"loci": [
@@ -202,6 +203,7 @@ class TestHeatmap(TestCase):
                 self.assertEqual(get_lrs_from_chr(trait, chromosome), expected)
 
     def test_process_traits_data_for_heatmap(self):
+        """Check for correct processing of data for heatmap generation."""
         self.assertEqual(
             process_traits_data_for_heatmap(
                 {"1": {
@@ -210,7 +212,7 @@ class TestHeatmap(TestCase):
                         1: {"Chr": 1,
                             "loci": [
                                 {
-                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                 },
                                 {
@@ -257,7 +259,7 @@ class TestHeatmap(TestCase):
                          1: {"Chr": 1,
                              "loci": [
                                  {
-                                     "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                     "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                      "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                  },
                                  {
-- 
cgit v1.2.3


From cd7f301688fd9780df1f842f8bd2b7602775ba1f Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 07:53:53 +0300
Subject: Fix pylint errors

* Add missing function and module docstrings
* Remove unused imports
* Fix import order
* Rework some code sections to fix issues
* Disable some pylint errors.
---
 gn3/api/heatmaps.py           |  8 ++++++++
 gn3/app.py                    |  5 +++--
 gn3/computations/qtlreaper.py |  8 ++++++++
 gn3/db/genotypes.py           |  1 +
 gn3/db/traits.py              |  2 +-
 gn3/heatmaps.py               | 28 ++++++++++++++++------------
 6 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py
index 1022a35..fe47aee 100644
--- a/gn3/api/heatmaps.py
+++ b/gn3/api/heatmaps.py
@@ -1,3 +1,7 @@
+"""
+Module to hold the entrypoint functions that generate heatmaps
+"""
+
 import io
 from flask import jsonify
 from flask import request
@@ -9,6 +13,10 @@ heatmaps = Blueprint("heatmaps", __name__)
 
 @heatmaps.route("/clustered", methods=("POST",))
 def clustered_heatmaps():
+    """
+    Parses the incoming data and responds with the JSON-serialized plotly figure
+    representing the clustered heatmap.
+    """
     heatmap_request = request.get_json()
     traits_names = heatmap_request.get("traits_names", tuple())
     if len(traits_names) < 2:
diff --git a/gn3/app.py b/gn3/app.py
index 6b4c57e..8badb65 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -3,7 +3,10 @@ import os
 
 from typing import Dict
 from typing import Union
+
 from flask import Flask
+from flask_cors import CORS
+
 from gn3.api.gemma import gemma
 from gn3.api.rqtl import rqtl
 from gn3.api.general import general
@@ -11,8 +14,6 @@ from gn3.api.heatmaps import heatmaps
 from gn3.api.correlation import correlation
 from gn3.api.data_entry import data_entry
 
-from flask_cors import CORS
-
 def create_app(config: Union[Dict, str, None] = None) -> Flask:
     """Create a new flask object"""
     app = Flask(__name__)
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 377db9b..5d17fed 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -87,11 +87,17 @@ def run_reaper(
     return (output_filename, permu_output_filename)
 
 def chromosome_sorter_key_fn(val):
+    """
+    Useful for sorting the chromosomes
+    """
     if isinstance(val, int):
         return val
     return ord(val)
 
 def organise_reaper_main_results(parsed_results):
+    """
+    Provide the results of running reaper in a format that is easier to use.
+    """
     def __organise_by_chromosome(chr_name, items):
         chr_items = [item for item in items if item["Chr"] == chr_name]
         return {
@@ -129,12 +135,14 @@ def parse_reaper_main_results(results_file):
         lines = infile.readlines()
 
     def __parse_column_float_value(value):
+        # pylint: disable=W0702
         try:
             return float(value)
         except:
             return value
 
     def __parse_column_int_value(value):
+        # pylint: disable=W0702
         try:
             return int(value)
         except:
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9d052d9..919c539 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -115,6 +115,7 @@ def parse_genotype_marker(line: str, geno_obj: dict, parlist: list):
     Reworks
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L143-L190
     """
+    # pylint: disable=W0702
     marker_row = [item.strip() for item in line.split("\t")]
     geno_table = {
         geno_obj["mat"]: -1, geno_obj["pat"]: 1, geno_obj["het"]: 0,
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index bfe887e..747ed27 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -46,7 +46,7 @@ def update_sample_data(conn: Any,
                        count: Union[int, str]):
     """Given the right parameters, update sample-data from the relevant
     table."""
-    # pylint: disable=[R0913, R0914]
+    # pylint: disable=[R0913, R0914, C0103]
     STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
     PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
                              "WHERE StrainId = %s AND Id = %s")
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index cd93b3f..9d82fb2 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -3,29 +3,28 @@ This module will contain functions to be used in computation of the data used to
 generate various kinds of heatmaps.
 """
 
+from functools import reduce
 from typing import Any, Dict, Sequence
+
 import numpy as np
-from functools import reduce
-from gn3.settings import TMPDIR
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
+from plotly.subplots import make_subplots
+
+from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.computations.slink import slink
-from plotly.subplots import make_subplots
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import (
-    build_genotype_file, load_genotype_samples, parse_genotype_file)
+    build_genotype_file, load_genotype_samples)
 from gn3.db.traits import (
-    retrieve_trait_data,
-    retrieve_trait_info,
-    generate_traits_filename)
+    retrieve_trait_data, retrieve_trait_info)
 from gn3.computations.qtlreaper import (
     run_reaper,
     generate_traits_file,
     chromosome_sorter_key_fn,
     parse_reaper_main_results,
-    organise_reaper_main_results,
-    parse_reaper_permutation_results)
+    organise_reaper_main_results)
 
 def export_trait_data(
         trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
@@ -159,13 +158,13 @@ def build_heatmap(traits_names, conn: Any):
     PARAMETERS:
     TODO: Elaborate on the parameters here...
     """
+    # pylint: disable=[R0914]
     threshold = 0 # webqtlConfig.PUBLICTHRESH
     traits = [
         retrieve_trait_info(threshold, fullname, conn)
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    # genotype = parse_genotype_file(genotype_filename)
     strains = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
@@ -336,6 +335,7 @@ def generate_clustered_heatmap(
     Generate a dendrogram, and heatmaps for each chromosome, and put them all
     into one plot.
     """
+    # pylint: disable=[R0913, R0914]
     num_cols = 1 + len(x_axis)
     fig = make_subplots(
         rows=1,
@@ -359,14 +359,18 @@ def generate_clustered_heatmap(
             "height": 800,
             "xaxis": {
                 "mirror": False,
-                "showgrid": True
+                "showgrid": True,
+                "title": x_label
+            },
+            "yaxis": {
+                "title": y_label
             }
         })
 
     x_axes_layouts = {
         "xaxis{}".format(i+1 if i > 0 else ""): {
             "mirror": False,
-            "showticklabels": True if i == 0 else False,
+            "showticklabels": i == 0,
             "ticks": "outside" if i == 0 else ""
         }
         for i in range(num_cols)}
-- 
cgit v1.2.3


From 71cc35e5178904b512b9007e33be17a36f6656f2 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 08:36:11 +0300
Subject: Fix typing issues

* Ignore some errors
* Update typing definitions for some portions of code
* Add missing imports
---
 gn3/app.py                    |  2 +-
 gn3/computations/qtlreaper.py |  6 ++++--
 gn3/db/genotypes.py           | 10 ++++++----
 gn3/db/traits.py              |  8 ++++----
 gn3/heatmaps.py               |  8 +++-----
 5 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/app.py b/gn3/app.py
index 8badb65..5e852e1 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -5,7 +5,7 @@ from typing import Dict
 from typing import Union
 
 from flask import Flask
-from flask_cors import CORS
+from flask_cors import CORS # type: ignore
 
 from gn3.api.gemma import gemma
 from gn3.api.rqtl import rqtl
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5d17fed..5ddea76 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -4,6 +4,8 @@ computation of QTLs.
 """
 import os
 import subprocess
+from typing import Union
+
 from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
@@ -70,9 +72,9 @@ def run_reaper(
         output_dir, random_string(10))
     output_list = ["--main_output", output_filename]
     if separate_nperm_output:
-        permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+        permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format(
             output_dir, random_string(10))
-        output_list = output_list + ["--permu_output", permu_output_filename]
+        output_list = output_list + ["--permu_output", permu_output_filename] # type: ignore[list-item]
     else:
         permu_output_filename = None
 
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 919c539..9ea9f20 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -2,6 +2,8 @@
 
 import os
 import gzip
+from typing import Union, TextIO
+
 from gn3.settings import GENOTYPE_FILES
 
 def build_genotype_file(
@@ -44,17 +46,17 @@ def __load_genotype_samples_from_geno(genotype_filename: str):
     """
     gzipped_filename = "{}.gz".format(genotype_filename)
     if os.path.isfile(gzipped_filename):
-        genofile = gzip.open(gzipped_filename)
+        genofile: Union[TextIO, gzip.GzipFile] = gzip.open(gzipped_filename)
     else:
         genofile = open(genotype_filename)
 
     for row in genofile:
         line = row.strip()
-        if (not line) or (line.startswith(("#", "@"))):
+        if (not line) or (line.startswith(("#", "@"))): # type: ignore[arg-type]
             continue
         break
 
-    headers = line.split("\t")
+    headers = line.split("\t" ) # type: ignore[arg-type]
     if headers[3] == "Mb":
         return headers[4:]
     return headers[3:]
@@ -107,7 +109,7 @@ def parse_genotype_header(line: str, parlist: tuple = tuple()):
         ("prgy", prgy),
         ("nprgy", len(prgy)))
 
-def parse_genotype_marker(line: str, geno_obj: dict, parlist: list):
+def parse_genotype_marker(line: str, geno_obj: dict, parlist: tuple):
     """
     Parse a data line in a genotype file
 
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 747ed27..4fc47c3 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -63,22 +63,22 @@ def update_sample_data(conn: Any,
     with conn.cursor() as cursor:
         # Update the Strains table
         cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id))
-        updated_strains: int = cursor.rowcount
+        updated_strains = cursor.rowcount
         # Update the PublishData table
         cursor.execute(PUBLISH_DATA_SQL,
                        (None if value == "x" else value,
                         strain_id, publish_data_id))
-        updated_published_data: int = cursor.rowcount
+        updated_published_data = cursor.rowcount
         # Update the PublishSE table
         cursor.execute(PUBLISH_SE_SQL,
                        (None if error == "x" else error,
                         strain_id, publish_data_id))
-        updated_se_data: int = cursor.rowcount
+        updated_se_data = cursor.rowcount
         # Update the NStrain table
         cursor.execute(N_STRAIN_SQL,
                        (None if count == "x" else count,
                         strain_id, publish_data_id))
-        updated_n_strains: int = cursor.rowcount
+        updated_n_strains = cursor.rowcount
     return (updated_strains, updated_published_data,
             updated_se_data, updated_n_strains)
 
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 9d82fb2..45d0c22 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -7,9 +7,9 @@ from functools import reduce
 from typing import Any, Dict, Sequence
 
 import numpy as np
-import plotly.graph_objects as go
-import plotly.figure_factory as ff
-from plotly.subplots import make_subplots
+import plotly.graph_objects as go # type: ignore
+import plotly.figure_factory as ff # type: ignore
+from plotly.subplots import make_subplots # type: ignore
 
 from gn3.settings import TMPDIR
 from gn3.random import random_string
@@ -171,8 +171,6 @@ def build_heatmap(traits_names, conn: Any):
     clustered = cluster_traits(exported_traits_data_list)
     slinked = slink(clustered)
     traits_order = compute_traits_order(slinked)
-    ordered_traits_names = [
-        traits[idx]["trait_fullname"] for idx in traits_order]
     strains_and_values = retrieve_strains_and_values(
         traits_order, strains, exported_traits_data_list)
     traits_filename = "{}/traits_test_file_{}.txt".format(
-- 
cgit v1.2.3


From 56c73324c285d896567268370f3955bbd15754b0 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 09:02:46 +0300
Subject: Fix more pylint errors

---
 gn3/computations/qtlreaper.py | 3 ++-
 gn3/db/genotypes.py           | 2 +-
 tests/unit/db/test_traits.py  | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5ddea76..8b2893e 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -74,7 +74,8 @@ def run_reaper(
     if separate_nperm_output:
         permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format(
             output_dir, random_string(10))
-        output_list = output_list + ["--permu_output", permu_output_filename] # type: ignore[list-item]
+        output_list = output_list + [
+            "--permu_output", permu_output_filename] # type: ignore[list-item]
     else:
         permu_output_filename = None
 
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9ea9f20..9987320 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -56,7 +56,7 @@ def __load_genotype_samples_from_geno(genotype_filename: str):
             continue
         break
 
-    headers = line.split("\t" ) # type: ignore[arg-type]
+    headers = line.split("\t") # type: ignore[arg-type]
     if headers[3] == "Mb":
         return headers[4:]
     return headers[3:]
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index ee98893..baa2af3 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -166,6 +166,7 @@ class TestTraitsDBFunctions(TestCase):
         the right calls.
 
         """
+        # pylint: disable=C0103
         db_mock = mock.MagicMock()
 
         STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
-- 
cgit v1.2.3


From 19783a18c2bc7941fc5980e593f19fb1d18c3623 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 27 Sep 2021 04:48:53 +0300
Subject: Update terminology: `strain` to `sample`

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Update the terminology used: use `sample` in place of `strain` according to
  Zachary's direction at
  https://github.com/genenetwork/genenetwork3/pull/37#issuecomment-926043306
---
 gn3/computations/parsers.py             | 10 ++---
 gn3/computations/qtlreaper.py           |  8 ++--
 gn3/db/genotypes.py                     |  8 ++--
 gn3/db/traits.py                        | 44 ++++++++++-----------
 gn3/heatmaps.py                         | 62 ++++++++++++++---------------
 tests/unit/computations/test_parsers.py |  4 +-
 tests/unit/test_heatmaps.py             | 70 ++++++++++++++++-----------------
 7 files changed, 103 insertions(+), 103 deletions(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
index 94387ff..1af35d6 100644
--- a/gn3/computations/parsers.py
+++ b/gn3/computations/parsers.py
@@ -14,7 +14,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
         'h': 0,
         'u': None,
     }
-    genotypes, strains = [], []
+    genotypes, samples = [], []
     with open(file_path, "r") as _genofile:
         for line in _genofile:
             line = line.strip()
@@ -22,8 +22,8 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
                 continue
             cells = line.split()
             if line.startswith("Chr"):
-                strains = cells[4:]
-                strains = [strain.lower() for strain in strains]
+                samples = cells[4:]
+                samples = [sample.lower() for sample in samples]
                 continue
             values = [__map.get(value.lower(), None) for value in cells[4:]]
             genotype = {
@@ -32,7 +32,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
                 "cm": cells[2],
                 "mb": cells[3],
                 "values":  values,
-                "dicvalues": dict(zip(strains, values)),
+                "dicvalues": dict(zip(samples, values)),
             }
             genotypes.append(genotype)
-        return strains, genotypes
+        return samples, genotypes
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 8b2893e..166d2dd 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -9,17 +9,17 @@ from typing import Union
 from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
-def generate_traits_file(strains, trait_values, traits_filename):
+def generate_traits_file(samples, trait_values, traits_filename):
     """
     Generate a traits file for use with `qtlreaper`.
 
     PARAMETERS:
-    strains: A list of strains to use as the headers for the various columns.
-    trait_values: A list of lists of values for each trait and strain.
+    samples: A list of samples to use as the headers for the various columns.
+    trait_values: A list of lists of values for each trait and sample.
     traits_filename: The tab-separated value to put the values in for
         computation of QTLs.
     """
-    header = "Trait\t{}\n".format("\t".join(strains))
+    header = "Trait\t{}\n".format("\t".join(samples))
     data = (
         [header] +
         ["{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9987320..8f18cac 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -14,16 +14,16 @@ def build_genotype_file(
 
 def load_genotype_samples(genotype_filename: str, file_type: str = "geno"):
     """
-    Load sample of strains from genotype files.
+    Load sample of samples from genotype files.
 
     DESCRIPTION:
-    Traits can contain a varied number of strains, some of which do not exist in
+    Traits can contain a varied number of samples, some of which do not exist in
     certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure
-    to pick only those strains that exist in the genotype under consideration
+    to pick only those samples that exist in the genotype under consideration
     for the traits used in the computation.
 
     This function loads a list of samples from the genotype files for use in
-    filtering out unusable strains.
+    filtering out unusable samples.
 
 
     PARAMETERS:
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 4fc47c3..c9d05d7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -445,7 +445,7 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any):
             query,
             {"trait_name": trait_info["trait_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -484,7 +484,7 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any):
              "species_id": retrieve_species_id(
                  trait_info["db"]["riset"], conn)})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -515,7 +515,7 @@ def retrieve_publish_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -548,7 +548,7 @@ def retrieve_cellid_trait_data(trait_info: Dict, conn: Any):
              "trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -577,29 +577,29 @@ def retrieve_probeset_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_name": trait_info["db"]["dataset_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
-def with_strainlist_data_setup(strainlist: Sequence[str]):
+def with_samplelist_data_setup(samplelist: Sequence[str]):
     """
-    Build function that computes the trait data from provided list of strains.
+    Build function that computes the trait data from provided list of samples.
 
     PARAMETERS
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values, only if the strain is present
-      in the provided `strainlist` variable.
+      sample's value, variance and ndata values, only if the sample is present
+      in the provided `samplelist` variable.
     """
     def setup_fn(tdata):
-        if tdata["strain_name"] in strainlist:
+        if tdata["sample_name"] in samplelist:
             val = tdata["value"]
             if val is not None:
                 return {
-                    "strain_name": tdata["strain_name"],
+                    "sample_name": tdata["sample_name"],
                     "value": val,
                     "variance": tdata["se_error"],
                     "ndata": tdata.get("nstrain", None)
@@ -607,19 +607,19 @@ def with_strainlist_data_setup(strainlist: Sequence[str]):
         return None
     return setup_fn
 
-def without_strainlist_data_setup():
+def without_samplelist_data_setup():
     """
     Build function that computes the trait data.
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values.
+      sample's value, variance and ndata values.
     """
     def setup_fn(tdata):
         val = tdata["value"]
         if val is not None:
             return {
-                "strain_name": tdata["strain_name"],
+                "sample_name": tdata["sample_name"],
                 "value": val,
                 "variance": tdata["se_error"],
                 "ndata": tdata.get("nstrain", None)
@@ -627,7 +627,7 @@ def without_strainlist_data_setup():
         return None
     return setup_fn
 
-def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tuple()):
+def retrieve_trait_data(trait: dict, conn: Any, samplelist: Sequence[str] = tuple()):
     """
     Retrieve trait data
 
@@ -650,23 +650,23 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     if results:
         # do something with mysqlid
         mysqlid = results[0]["id"]
-        if strainlist:
+        if samplelist:
             data = [
                 item for item in
-                map(with_strainlist_data_setup(strainlist), results)
+                map(with_samplelist_data_setup(samplelist), results)
                 if item is not None]
         else:
             data = [
                 item for item in
-                map(without_strainlist_data_setup(), results)
+                map(without_samplelist_data_setup(), results)
                 if item is not None]
 
         return {
             "mysqlid": mysqlid,
             "data": dict(map(
                 lambda x: (
-                    x["strain_name"],
-                    {k:v for k, v in x.items() if x != "strain_name"}),
+                    x["sample_name"],
+                    {k:v for k, v in x.items() if x != "sample_name"}),
                 data))}
     return {}
 
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 45d0c22..b6fc6d3 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -27,10 +27,10 @@ from gn3.computations.qtlreaper import (
     organise_reaper_main_results)
 
 def export_trait_data(
-        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
         var_exists: bool = False, n_exists: bool = False):
     """
-    Export data according to `strainlist`. Mostly used in calculating
+    Export data according to `samplelist`. Mostly used in calculating
     correlations.
 
     DESCRIPTION:
@@ -40,8 +40,8 @@ def export_trait_data(
     PARAMETERS
     trait: (dict)
       The dictionary of key-value pairs representing a trait
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
     dtype: (str)
       ... verify what this is ...
     var_exists: (bool)
@@ -49,18 +49,18 @@ def export_trait_data(
     n_exists: (bool)
       A flag indicating existence of ndata
     """
-    def __export_all_types(tdata, strain):
+    def __export_all_types(tdata, sample):
         sample_data = []
-        if tdata[strain]["value"]:
-            sample_data.append(tdata[strain]["value"])
+        if tdata[sample]["value"]:
+            sample_data.append(tdata[sample]["value"])
             if var_exists:
-                if tdata[strain]["variance"]:
-                    sample_data.append(tdata[strain]["variance"])
+                if tdata[sample]["variance"]:
+                    sample_data.append(tdata[sample]["variance"])
                 else:
                     sample_data.append(None)
             if n_exists:
-                if tdata[strain]["ndata"]:
-                    sample_data.append(tdata[strain]["ndata"])
+                if tdata[sample]["ndata"]:
+                    sample_data.append(tdata[sample]["ndata"])
                 else:
                     sample_data.append(None)
         else:
@@ -73,17 +73,17 @@ def export_trait_data(
 
         return tuple(sample_data)
 
-    def __exporter(accumulator, strain):
+    def __exporter(accumulator, sample):
         # pylint: disable=[R0911]
-        if strain in trait_data["data"]:
+        if sample in trait_data["data"]:
             if dtype == "val":
-                return accumulator + (trait_data["data"][strain]["value"], )
+                return accumulator + (trait_data["data"][sample]["value"], )
             if dtype == "var":
-                return accumulator + (trait_data["data"][strain]["variance"], )
+                return accumulator + (trait_data["data"][sample]["variance"], )
             if dtype == "N":
-                return accumulator + (trait_data["data"][strain]["ndata"], )
+                return accumulator + (trait_data["data"][sample]["ndata"], )
             if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], strain)
+                return accumulator + __export_all_types(trait_data["data"], sample)
             raise KeyError("Type `%s` is incorrect" % dtype)
         if var_exists and n_exists:
             return accumulator + (None, None, None)
@@ -91,7 +91,7 @@ def export_trait_data(
             return accumulator + (None, None)
         return accumulator + (None,)
 
-    return reduce(__exporter, strainlist, tuple())
+    return reduce(__exporter, samplelist, tuple())
 
 def trait_display_name(trait: Dict):
     """
@@ -165,19 +165,19 @@ def build_heatmap(traits_names, conn: Any):
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    strains = load_genotype_samples(genotype_filename)
+    samples = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
-        export_trait_data(td, strains) for td in traits_data_list]
+        export_trait_data(td, samples) for td in traits_data_list]
     clustered = cluster_traits(exported_traits_data_list)
     slinked = slink(clustered)
     traits_order = compute_traits_order(slinked)
-    strains_and_values = retrieve_strains_and_values(
-        traits_order, strains, exported_traits_data_list)
+    samples_and_values = retrieve_samples_and_values(
+        traits_order, samples, exported_traits_data_list)
     traits_filename = "{}/traits_test_file_{}.txt".format(
         TMPDIR, random_string(10))
     generate_traits_file(
-        strains_and_values[0][1],
-        [t[2] for t in strains_and_values],
+        samples_and_values[0][1],
+        [t[2] for t in samples_and_values],
         traits_filename)
 
     main_output, _permutations_output = run_reaper(
@@ -229,9 +229,9 @@ def compute_traits_order(slink_data, neworder: tuple = tuple()):
 
     return __order_maker(neworder, slink_data)
 
-def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+def retrieve_samples_and_values(orders, samplelist, traits_data_list):
     """
-    Get the strains and their corresponding values from `strainlist` and
+    Get the samples and their corresponding values from `samplelist` and
     `traits_data_list`.
 
     This migrates the code in
@@ -240,17 +240,17 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     # This feels nasty! There's a lot of mutation of values here, that might
     # indicate something untoward in the design of this function and its
     # dependents  ==>  Review
-    strains = []
+    samples = []
     values = []
     rets = []
     for order in orders:
         temp_val = traits_data_list[order]
-        for i, strain in enumerate(strainlist):
+        for i, sample in enumerate(samplelist):
             if temp_val[i] is not None:
-                strains.append(strain)
+                samples.append(sample)
                 values.append(temp_val[i])
-        rets.append([order, strains[:], values[:]])
-        strains = []
+        rets.append([order, samples[:], values[:]])
+        samples = []
         values = []
 
     return rets
diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py
index 19c3067..b51b0bf 100644
--- a/tests/unit/computations/test_parsers.py
+++ b/tests/unit/computations/test_parsers.py
@@ -15,7 +15,7 @@ class TestParsers(unittest.TestCase):
 
     def test_parse_genofile_with_existing_file(self):
         """Test that a genotype file is parsed correctly"""
-        strains = ["bxd1", "bxd2"]
+        samples = ["bxd1", "bxd2"]
         genotypes = [
             {"chr": "1", "locus": "rs31443144",
              "cm": "1.50", "mb": "3.010274",
@@ -51,4 +51,4 @@ class TestParsers(unittest.TestCase):
             "../test_data/genotype.txt"
         ))
         self.assertEqual(parse_genofile(
-            test_genotype_file), (strains, genotypes))
+            test_genotype_file), (samples, genotypes))
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index fd91cf9..b54e2f3 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -5,41 +5,41 @@ from gn3.heatmaps import (
     get_lrs_from_chr,
     export_trait_data,
     compute_traits_order,
-    retrieve_strains_and_values,
+    retrieve_samples_and_values,
     process_traits_data_for_heatmap)
 from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
-strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
     "mysqlid": 36688172,
     "data": {
-        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 slinked = (
     (((0, 2, 0.16381088984330505),
@@ -66,7 +66,7 @@ class TestHeatmap(TestCase):
                 ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
             with self.subTest(dtype=dtype):
                 self.assertEqual(
-                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    export_trait_data(trait_data, samplelist, dtype=dtype),
                     expected)
 
     def test_export_trait_data_dtype_all_flags(self):
@@ -106,7 +106,7 @@ class TestHeatmap(TestCase):
             with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
                 self.assertEqual(
                     export_trait_data(
-                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
 
@@ -164,8 +164,8 @@ class TestHeatmap(TestCase):
         self.assertEqual(
             compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
 
-    def test_retrieve_strains_and_values(self):
-        """Test retrieval of strains and values."""
+    def test_retrieve_samples_and_values(self):
+        """Test retrieval of samples and values."""
         for orders, slist, tdata, expected in [
                 [
                     [2],
@@ -185,9 +185,9 @@ class TestHeatmap(TestCase):
                      [6, None, None, 4, None]],
                     [[3, ["s1", "s4"], [6, 4]]]
                 ]]:
-            with self.subTest(strainlist=slist, traitdata=tdata):
+            with self.subTest(samplelist=slist, traitdata=tdata):
                 self.assertEqual(
-                    retrieve_strains_and_values(orders, slist, tdata), expected)
+                    retrieve_samples_and_values(orders, slist, tdata), expected)
 
     def test_get_lrs_from_chr(self):
         """Check that function gets correct LRS values"""
-- 
cgit v1.2.3


From 60d54d8de466c179a93b6d46ad05ec1b9ba5f4a1 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 27 Sep 2021 05:13:19 +0300
Subject: Narrow the exception and add comments

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Only catch the `FileExistsError` allowing any other exception to pass
  through. This tries to conform a little to the review at
  https://github.com/genenetwork/genenetwork3/pull/37#discussion_r714552696
---
 gn3/computations/qtlreaper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'gn3/computations/qtlreaper.py')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 166d2dd..d1ff4ac 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -34,7 +34,8 @@ def create_output_directory(path: str):
     """Create the output directory at `path` if it does not exist."""
     try:
         os.mkdir(path)
-    except OSError:
+    except FileExistsError:
+        # If the directory already exists, do nothing.
         pass
 
 def run_reaper(
-- 
cgit v1.2.3