From 1a9d28e6db2140cc7b3491c6dbcf4fc8cd8c09b6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 08:47:11 +0300
Subject: Add tests and fix errors caught with tests

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: fix errors
* tests/unit/computations/test_heatmap.py: new tests

  Add new tests with the expected source data format, and expected results.
  Fix all errors that were caught by running the tests
---
 gn3/computations/heatmap.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index a0e778a..8a86fe8 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -34,11 +34,11 @@ def export_trait_data(
     """
     def __export_all_types(tdata, strain):
         sample_data = []
-        if tdata[strain]["val"]:
-            sample_data.append(tdata[strain]["val"])
+        if tdata[strain]["value"]:
+            sample_data.append(tdata[strain]["value"])
             if var_exists:
-                if tdata[strain].var:
-                    sample_data.append(tdata[strain]["var"])
+                if tdata[strain]["variance"]:
+                    sample_data.append(tdata[strain]["variance"])
                 else:
                     sample_data.append(None)
             if n_exists:
@@ -58,15 +58,15 @@ def export_trait_data(
 
     def __exporter(accumulator, strain):
         # pylint: disable=[R0911]
-        if trait_data.has_key(strain):
+        if strain in trait_data["data"]:
             if dtype == "val":
-                return accumulator + (trait_data[strain]["val"], )
+                return accumulator + (trait_data["data"][strain]["value"], )
             if dtype == "var":
-                return accumulator + (trait_data[strain]["var"], )
+                return accumulator + (trait_data["data"][strain]["variance"], )
             if dtype == "N":
-                return trait_data[strain]["ndata"]
+                return accumulator + (trait_data["data"][strain]["ndata"], )
             if dtype == "all":
-                return accumulator + __export_all_types(trait_data, strain)
+                return accumulator + __export_all_types(trait_data["data"], strain)
             raise KeyError("Type `%s` is incorrect" % dtype)
         if var_exists and n_exists:
             return accumulator + (None, None, None)
-- 
cgit 1.4.1


From 99bfda81abe76b3bb3f7034cf6cdac21c8d50726 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:05:03 +0300
Subject: Make child sequence a list

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Since the `slink` function assigns values to the `listcopy` variable and its
  children, this commit ensures that the sequence is a list to allow for the
  assignment.

  If the child-sequence is a tuple, that would lead to an exception.
---
 gn3/computations/slink.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py
index 5953e6b..3d7a576 100644
--- a/gn3/computations/slink.py
+++ b/gn3/computations/slink.py
@@ -161,7 +161,7 @@ def slink(lists):
     try:
         size = len(lists)
         listindexcopy = list(range(size))
-        listscopy = [child[:] for child in lists]
+        listscopy = [list(child[:]) for child in lists]
         init_size = size
         candidate = []
         while init_size > 2:
-- 
cgit 1.4.1


From d491be2057843921cc67bd1c4b1ae612d9f15d34 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:42:20 +0300
Subject: Fix obvious linting errors

* Fix linting errors that do not change the function of the code.
---
 gn3/api/correlation.py           | 4 ++--
 gn3/api/general.py               | 3 ++-
 gn3/computations/correlations.py | 4 ++--
 wsgi.py                          | 6 ++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index a3e366e..46121f8 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -79,7 +79,7 @@ def compute_tissue_corr(corr_method="pearson"):
     target_tissues_dict = tissue_input_data["target_tissues_dict"]
 
     results = compute_tissue_correlation(primary_tissue_dict=primary_tissue_dict,
-                                             target_tissues_data=target_tissues_dict,
-                                             corr_method=corr_method)
+                                         target_tissues_data=target_tissues_dict,
+                                         corr_method=corr_method)
 
     return jsonify(results)
diff --git a/gn3/api/general.py b/gn3/api/general.py
index 86fb7b7..69ec343 100644
--- a/gn3/api/general.py
+++ b/gn3/api/general.py
@@ -13,7 +13,8 @@ general = Blueprint("general", __name__)
 
 @general.route("/version")
 def version():
-  return jsonify("1.0")
+    """Get API version."""
+    return jsonify("1.0")
 
 @general.route("/metadata/upload/", methods=["POST"],
                strict_slashes=False)
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 1fd3213..8d76c09 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -341,8 +341,8 @@ def compute_all_lit_correlation(conn, trait_lists: List,
 
 
 def compute_tissue_correlation(primary_tissue_dict: dict,
-                                   target_tissues_data: dict,
-                                   corr_method: str):
+                               target_tissues_data: dict,
+                               corr_method: str):
     """Function acts as an abstraction for tissue_correlation_for_trait\
     required input are target tissue object and primary tissue trait\
     target tissues data contains the trait_symbol_dict and symbol_tissue_vals
diff --git a/wsgi.py b/wsgi.py
index d30bc49..0fcb573 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -1,9 +1,11 @@
+"""
+WSGI application entry-point.
+"""
 # import main
+from gn3.app import create_app
 
 print("STARTING WSGI APP")
 
-from gn3.app import create_app
-
 app = create_app()
 
 if __name__ == "__main__":
-- 
cgit 1.4.1


From 41fc5136914548710529cbed7ef370dfb5b4a5c8 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:43:32 +0300
Subject: Test the clustering

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: Fix clustering bugs
* tests/unit/computations/test_heatmap.py: Add new tests. Fix linting issues.

  Test and fix the clustering function.
---
 gn3/computations/heatmap.py             |  14 ++--
 tests/unit/computations/test_heatmap.py | 109 +++++++++++++++++++++++++++++---
 2 files changed, 106 insertions(+), 17 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 8a86fe8..3c35029 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -110,13 +110,13 @@ def cluster_traits(traits_data_list: Sequence[Dict]):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
     """
     def __compute_corr(tdata_i, tdata_j):
-        if tdata_j[0] < tdata_i[0]:
-            corr_vals = compute_correlation(tdata_i, tdata_j)
-            corr = corr_vals[0]
-            if (1 - corr) < 0:
-                return 0.0
-            return 1 - corr
-        return 0.0
+        if tdata_i[0] == tdata_j[0]:
+            return 0.0
+        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+        corr = corr_vals[0]
+        if (1 - corr) < 0:
+            return 0.0
+        return 1 - corr
 
     def __cluster(tdata_i):
         return tuple(
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 78303ae..650cb45 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -1,9 +1,38 @@
 """Module contains tests for gn3.computations.heatmap"""
 from unittest import TestCase
-from gn3.computations.heatmap import export_trait_data
+from gn3.computations.heatmap import cluster_traits, export_trait_data
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
@@ -29,10 +58,14 @@ class TestHeatmap(TestCase):
         argument and the different flags set up
         """
         for dtype, vflag, nflag, expected in [
-                ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
                 ["var", False, False, (None, None, None, None, None, None)],
                 ["var", False, True, (None, None, None, None, None, None)],
                 ["var", True, False, (None, None, None, None, None, None)],
@@ -41,10 +74,17 @@ class TestHeatmap(TestCase):
                 ["N", False, True, (None, None, None, None, None, None)],
                 ["N", True, False, (None, None, None, None, None, None)],
                 ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
-                ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
-                ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
         ]:
             with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
                 self.assertEqual(
@@ -52,3 +92,52 @@ class TestHeatmap(TestCase):
                         trait_data, strainlist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
+
+    def test_cluster_traits(self):
+        """
+        Test that the clustering is working as expected.
+        """
+        traits_data_list = [
+            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
+            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
+            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
+            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
+            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
+            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
+            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
+            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
+            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
+        self.assertEqual(
+            cluster_traits(traits_data_list),
+            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
+              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
+              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
+             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
+              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
+              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
+             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
+              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
+              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
+             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
+              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
+              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
+             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
+              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
+              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
+             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
+              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
+              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
+             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
+              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
+              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
+             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
+              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
+              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
+             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
+              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
+              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
+             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
+              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
+              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
+              0.0)))
-- 
cgit 1.4.1


From ded960e3d32e4d7ebe590deda27fc47175be73d9 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 20 Aug 2021 13:21:31 +0300
Subject: Add tests for ordering and implement function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: implement new ordering function
* tests/unit/computations/test_heatmap.py: add new tests

  Implement the ordering function to migrate the setup of the `neworder`
  variable from GN1 to GN3.

  This migration is incomplete, since there is dependence on the return from
  the `web.webqtl.heatmap.Heatmap.draw` function in form of the `d_1` variable
  in some of the paths.

  The thing is, this `d_1` variable, and the `xoffset` variable seem to be
  used for laying out things on the drawn heatmap, and might actually end up
  not being needed for the new system using plotly, which has other ways of
  laying out things on the drawing.

  For now though, this commit "shims" the presence of these values until when
  the use of these variables is confirmed as present or absent in the new GN3
  system.
---
 gn3/computations/heatmap.py             | 28 ++++++++++++++++++++++++++++
 tests/unit/computations/test_heatmap.py | 25 ++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 3c35029..1c86261 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -175,3 +175,31 @@ def heatmap_data(formd, search_result, conn: Any):
         "traits_list": traits_list,
         "traits_data_list": traits_data_list
     }
+
+def compute_heatmap_order(
+        slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+    """
+    Compute the data used for drawing the heatmap proper from `slink_data`.
+
+    This function tries to reproduce the creation and update of the `neworder`
+    variable in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+    """
+    d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
+
+    def __order_maker(norder, slnk_dt):
+        print("norder:{}, slnk_dt:{}".format(norder, slnk_dt))
+        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+            return norder + (
+                (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+
+        if isinstance(slnk_dt[0], int):
+            return norder + ((xoffset + 20, slnk_dt[0]), )
+
+        if isinstance(slnk_dt[1], int):
+            return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+
+        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+    return __order_maker(neworder, slink_data)
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 650cb45..14807bb 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -1,6 +1,9 @@
 """Module contains tests for gn3.computations.heatmap"""
 from unittest import TestCase
-from gn3.computations.heatmap import cluster_traits, export_trait_data
+from gn3.computations.heatmap import (
+    cluster_traits,
+    export_trait_data,
+    compute_heatmap_order)
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
@@ -34,6 +37,16 @@ trait_data = {
         "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
         "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
+slinked = (
+    (((0, 2, 0.16381088984330505),
+      ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
+      0.20337048635536847),
+     9,
+     0.23451785425383564),
+    ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
+     4, 0.8968250491499363),
+    0.9313185954797953)
+
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
 
@@ -141,3 +154,13 @@ class TestHeatmap(TestCase):
               0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
               1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
               0.0)))
+
+    def test_compute_heatmap_order(self):
+        """Test the orders."""
+        for xoff, expected in [
+                (40, ((60, 9), (60, 4))),
+                (30, ((50, 9), (50, 4))),
+                (20, ((40, 9), (40, 4)))]:
+            with self.subTest(xoffset=xoff):
+                self.assertEqual(
+                    compute_heatmap_order(slinked, xoffset=xoff), expected)
-- 
cgit 1.4.1


From 8b2c776771d2a70613a1e31d6e6671b612cfbafc Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 20 Aug 2021 14:10:45 +0300
Subject: Retrieve the strains with valid values

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: add function to get strains with values
* tests/unit/computations/test_heatmap.py: new tests

  Add function to get the strains whose values are not `None` from the
  `trait_data` object passed in.

  This migrates
  https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
  into a separate function that can handle that and be tested independently of
  any other code.
---
 gn3/computations/heatmap.py             | 19 +++++++++++++++++++
 tests/unit/computations/test_heatmap.py | 14 +++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 1c86261..5a3c619 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -203,3 +203,22 @@ def compute_heatmap_order(
         return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
 
     return __order_maker(neworder, slink_data)
+
+def retrieve_strains_and_values(strainlist, trait_data):
+    """
+    Get the strains and their corresponding values from `strainlist` and
+    `trait_data`.
+
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+    """
+    def __strains_and_values(acc, i):
+        if trait_data[i] is None:
+            return acc
+        if len(acc) == 0:
+            return ((strainlist[i], ), (trait_data[i], ))
+        _strains = acc[0]
+        _vals = acc[1]
+        return (_strains + (strainlist[i], ), _vals + (trait_data[i], ))
+    return reduce(
+        __strains_and_values, range(len(strainlist)), (tuple(), tuple()))
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 14807bb..686288d 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -3,7 +3,8 @@ from unittest import TestCase
 from gn3.computations.heatmap import (
     cluster_traits,
     export_trait_data,
-    compute_heatmap_order)
+    compute_heatmap_order,
+    retrieve_strains_and_values)
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
@@ -164,3 +165,14 @@ class TestHeatmap(TestCase):
             with self.subTest(xoffset=xoff):
                 self.assertEqual(
                     compute_heatmap_order(slinked, xoffset=xoff), expected)
+
+    def test_retrieve_strains_and_values(self):
+        """Test retrieval of strains and values."""
+        for slist, tdata, expected in [
+                [["s1", "s2", "s3", "s4"], [9, None, 5, 4],
+                 (("s1", "s3", "s4"), (9, 5, 4))],
+                [["s1", "s2", "s3", "s4", "s5"], [6, None, None, 4, None],
+                 (("s1", "s4"), (6, 4))]]:
+            with self.subTest(strainlist=slist, traitdata=tdata):
+                self.assertEqual(
+                    retrieve_strains_and_values(slist, tdata), expected)
-- 
cgit 1.4.1


From 96af4e9e32ed167a8d70cf7761b709b1a37bb344 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 20 Aug 2021 14:14:12 +0300
Subject: Fix typing issue(s) caught by mypy

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: Use `Sequence` type not `Iterator` type
---
 gn3/computations/heatmap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 5a3c619..c9c2b8a 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -156,8 +156,8 @@ def heatmap_data(formd, search_result, conn: Any):
     traits_details = [
         __retrieve_traitlist_and_datalist(threshold, fullname)
         for fullname in search_result]
-    traits_list = map(lambda x: x[0], traits_details)
-    traits_data_list = map(lambda x: x[1], traits_details)
+    traits_list = tuple(x[0] for x in traits_details)
+    traits_data_list = tuple(x[1] for x in traits_details)
 
     return {
         "target_description_checked": formd.formdata.getvalue(
-- 
cgit 1.4.1


From 557e482c88ba3d44ae7d278b7222f37fa043b4d0 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 27 Aug 2021 15:47:52 +0300
Subject: Rework strains and trait values retrieval

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Rework the strains and values retrieval function to more closely correspond
  to the working of the original code in GN1
---
 gn3/computations/heatmap.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index c9c2b8a..da13ceb 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -204,21 +204,28 @@ def compute_heatmap_order(
 
     return __order_maker(neworder, slink_data)
 
-def retrieve_strains_and_values(strainlist, trait_data):
+def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     """
     Get the strains and their corresponding values from `strainlist` and
-    `trait_data`.
+    `traits_data_list`.
 
     This migrates the code in
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
     """
-    def __strains_and_values(acc, i):
-        if trait_data[i] is None:
-            return acc
-        if len(acc) == 0:
-            return ((strainlist[i], ), (trait_data[i], ))
-        _strains = acc[0]
-        _vals = acc[1]
-        return (_strains + (strainlist[i], ), _vals + (trait_data[i], ))
-    return reduce(
-        __strains_and_values, range(len(strainlist)), (tuple(), tuple()))
+    # This feels nasty! There's a lot of mutation of values here, that might
+    # indicate something untoward in the design of this function and its
+    # dependents  ==>  Review
+    strains = []
+    values = []
+    rets = []
+    for order in orders:
+        temp_val = traits_data_list[order[1]]
+        for i in range(len(strainlist)):
+            if temp_val[i] != None:
+                strains.append(strainlist[i])
+                values.append(temp_val[i])
+        rets.append([order, strains[:], values[:]])
+        strains = []
+        values = []
+
+    return rets
-- 
cgit 1.4.1


From 1a3901b174d00af8fa7f5ae78b810de66024b5ab Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 27 Aug 2021 15:49:53 +0300
Subject: Export trait data to file

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Provide a function to export the given strains and traits data into a traits
  file for use with `rust-qtlreaper`.
---
 gn3/computations/heatmap.py | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index da13ceb..2f92048 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -229,3 +229,11 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
         values = []
 
     return rets
+
+def generate_traits_file(strains, trait_values, traits_filename):
+    header = "Traits\t{}\n".format("\t".join(strains))
+    data = [header] + [
+        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+        for i,t in enumerate(trait_values)]
+    with open(traits_filename, "w") as outfile:
+        outfile.writelines(data)
-- 
cgit 1.4.1


From 28fde00ee2835d404157652548a4265be3accede Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 27 Aug 2021 15:51:27 +0300
Subject: Provide intermediate data in final results

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Seeing as not every requirement/feature has been migrated over at this time,
  this commit just provides all the intermediate data representations in the
  final return of the function for later use down the line.
---
 gn3/computations/heatmap.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 2f92048..3e96ed2 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -149,22 +149,22 @@ def heatmap_data(formd, search_result, conn: Any):
 
     def __retrieve_traitlist_and_datalist(threshold, fullname):
         trait = retrieve_trait_info(threshold, fullname, conn)
-        return (
-            trait,
-            export_trait_data(retrieve_trait_data(trait, conn), strainlist))
+        return (trait, retrieve_trait_data(trait, conn))
 
     traits_details = [
         __retrieve_traitlist_and_datalist(threshold, fullname)
         for fullname in search_result]
     traits_list = tuple(x[0] for x in traits_details)
-    traits_data_list = tuple(x[1] for x in traits_details)
+    traits_data_list = [x[1] for x in traits_details]
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for x in traits_data_list)
 
     return {
         "target_description_checked": formd.formdata.getvalue(
             "targetDescriptionCheck", ""),
         "cluster_checked": cluster_checked,
         "slink_data": (
-            slink(cluster_traits(traits_data_list))
+            slink(cluster_traits(exported_traits_data_list))
             if cluster_checked else False),
         "sessionfile": formd.formdata.getvalue("session"),
         "genotype": genotype,
@@ -173,7 +173,8 @@ def heatmap_data(formd, search_result, conn: Any):
         "ppolar": formd.ppolar,
         "mpolar":formd.mpolar,
         "traits_list": traits_list,
-        "traits_data_list": traits_data_list
+        "traits_data_list": traits_data_list,
+        "exported_traits_data_list": exported_traits_data_list
     }
 
 def compute_heatmap_order(
-- 
cgit 1.4.1


From 983acfdfc523677b4d7501287a000b7fd52a2c39 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 07:00:38 +0300
Subject: Implement module for interfacing with rust-qtlreaper

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: move `generate_traits_file` function to new
  module
* gn3/computations/qtlreaper.py: new module to interface with the
  `rust-qtlreaper` utility.
* gn3/settings.py: Provide setting for the path to the `rust-qtlreaper`
  utility
* qtlfilesexport.py: Move `random_string` function to new module. Update to
  use functions in new module.

  Provide a module with functions to be used to interface with
  `rust-qtlreaper`. This module essentially contains all the functions that
  are needed to build the files needed for, and to run the qtlreaper utility.
---
 gn3/computations/heatmap.py   |  8 ----
 gn3/computations/qtlreaper.py | 88 +++++++++++++++++++++++++++++++++++++++++++
 gn3/settings.py               |  3 ++
 qtlfilesexport.py             | 10 +----
 4 files changed, 92 insertions(+), 17 deletions(-)
 create mode 100644 gn3/computations/qtlreaper.py

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 3e96ed2..dcd64b1 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -230,11 +230,3 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
         values = []
 
     return rets
-
-def generate_traits_file(strains, trait_values, traits_filename):
-    header = "Traits\t{}\n".format("\t".join(strains))
-    data = [header] + [
-        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
-        for i,t in enumerate(trait_values)]
-    with open(traits_filename, "w") as outfile:
-        outfile.writelines(data)
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
new file mode 100644
index 0000000..49d363b
--- /dev/null
+++ b/gn3/computations/qtlreaper.py
@@ -0,0 +1,88 @@
+"""
+This module contains functions to interact with the `qtlreaper` utility for
+computation of QTLs.
+"""
+import os
+import random
+import string
+import subprocess
+from gn3.settings import TMPDIR, REAPER_COMMAND
+
+def random_string(length):
+    """Generate a random string of length `length`."""
+    return "".join(
+        random.choices(
+            string.ascii_letters + string.digits, k=length))
+
+def generate_traits_file(strains, trait_values, traits_filename):
+    """
+    Generate a traits file for use with `qtlreaper`.
+
+    PARAMETERS:
+    strains: A list of strains to use as the headers for the various columns.
+    trait_values: A list of lists of values for each trait and strain.
+    traits_filename: The tab-separated value to put the values in for
+        computation of QTLs.
+    """
+    header = "Traits\t{}\n".format("\t".join(strains))
+    data = [header] + [
+        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+        for i, t in enumerate(trait_values)]
+    with open(traits_filename, "w") as outfile:
+        outfile.writelines(data)
+
+def create_output_directory(path: str):
+    """Create the output directory at `path` if it does not exist."""
+    try:
+        os.mkdir(path)
+    except OSError:
+        pass
+
+def run_reaper(
+        genotype_filename: str, traits_filename: str,
+        other_options: tuple = ("--n_permutations", 1000),
+        separate_nperm_output: bool = False,
+        output_dir: str = TMPDIR):
+    """
+    Run the QTLReaper command to compute the QTLs.
+
+    PARAMETERS:
+    genotype_filename: The complete path to a genotype file to use in the QTL
+        computation.
+    traits_filename: A path to a file previously generated with the
+        `generate_traits_file` function in this module, to be used in the QTL
+        computation.
+    other_options: Other options to pass to the `qtlreaper` command to modify
+        the QTL computations.
+    separate_nperm_output: A flag indicating whether or not to provide a
+        separate output for the permutations computation. The default is False,
+        which means by default, no separate output file is created.
+    output_dir: A path to the directory where the outputs are put
+
+    RETURNS:
+    The function returns a tuple of the main output file, and the output file
+    for the permutation computations. If the `separate_nperm_output` is `False`,
+    the second value in the tuple returned is `None`.
+
+    RAISES:
+    The function will raise a `subprocess.CalledProcessError` exception in case
+    of any errors running the `qtlreaper` command.
+    """
+    create_output_directory(output_dir)
+    output_filename = "{}/qtlreaper/main_output_{}.txt".format(
+        output_dir, random_string(10))
+    output_list = ["--main_output", output_filename]
+    if separate_nperm_output:
+        permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+            output_dir, random_string(10))
+        output_list = output_list + ["--permu_output", permu_output_filename]
+    else:
+        permu_output_filename = None
+
+    command_list = [
+        REAPER_COMMAND, "--geno", genotype_filename,
+        *other_options, # this splices the `other_options` list here
+        "--traits", traits_filename, "--main_output", output_filename]
+
+    subprocess.run(command_list, check=True)
+    return (output_filename, permu_output_filename)
diff --git a/gn3/settings.py b/gn3/settings.py
index f4866d5..d137370 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -24,3 +24,6 @@ GN2_BASE_URL = "http://www.genenetwork.org/"
 
 # biweight script
 BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R"
+
+# qtlreaper command
+REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT"))
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 2e7c9c2..0543dc9 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -7,16 +7,14 @@ Run with:
 
 replacing the variables in the angled brackets with the appropriate values
 """
-import random
-import string
 from gn3.computations.slink import slink
 from gn3.db_utils import database_connector
 from gn3.computations.heatmap import export_trait_data
 from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
+from gn3.computations.qtlreaper import random_string, generate_traits_file
 from gn3.computations.heatmap import (
     cluster_traits,
     compute_heatmap_order,
-    generate_traits_file,
     retrieve_strains_and_values)
 
 TMPDIR = "tmp/qtltests"
@@ -35,11 +33,6 @@ def trait_fullnames():
         "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
         "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
 
-def random_string(length):
-    return "".join(
-        random.choices(
-            string.ascii_letters + string.digits, k=length))
-
 def main():
     """entrypoint function"""
     conn = database_connector()[0]
@@ -56,7 +49,6 @@ def main():
     strains_and_values = retrieve_strains_and_values(
         orders, strains, exported_traits_data_list)
     strains_values = strains_and_values[0][1]
-    strains_values2 = strains_and_values[1][1]
     trait_values = [t[2] for t in strains_and_values]
     traits_filename = "{}/traits_test_file_{}.txt".format(
         TMPDIR, random_string(10))
-- 
cgit 1.4.1


From b95ad3bd2ce8bc22d1dcadefdf76c43f28309984 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 07:05:49 +0300
Subject: Fix some linting errors and minor bugs.

---
 gn3/computations/heatmap.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index dcd64b1..e0ff05b 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -157,7 +157,7 @@ def heatmap_data(formd, search_result, conn: Any):
     traits_list = tuple(x[0] for x in traits_details)
     traits_data_list = [x[1] for x in traits_details]
     exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for x in traits_data_list)
+        export_trait_data(td, strainlist) for td in traits_data_list)
 
     return {
         "target_description_checked": formd.formdata.getvalue(
@@ -190,7 +190,6 @@ def compute_heatmap_order(
     d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
 
     def __order_maker(norder, slnk_dt):
-        print("norder:{}, slnk_dt:{}".format(norder, slnk_dt))
         if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
             return norder + (
                 (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
@@ -221,9 +220,9 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     rets = []
     for order in orders:
         temp_val = traits_data_list[order[1]]
-        for i in range(len(strainlist)):
-            if temp_val[i] != None:
-                strains.append(strainlist[i])
+        for i, strain in enumerate(strainlist):
+            if temp_val[i] is not None:
+                strains.append(strain)
                 values.append(temp_val[i])
         rets.append([order, strains[:], values[:]])
         strains = []
-- 
cgit 1.4.1


From bb1fd69fa24cec4ff605450d241601b3f0ced8cb Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 09:50:44 +0300
Subject: Remove empty line

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Remove empty line at the end of the traits file
---
 gn3/computations/qtlreaper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 49d363b..a88659e 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -27,7 +27,9 @@ def generate_traits_file(strains, trait_values, traits_filename):
     header = "Traits\t{}\n".format("\t".join(strains))
     data = [header] + [
         "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
-        for i, t in enumerate(trait_values)]
+        for i, t in enumerate(trait_values[:-1])] + [
+        "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
+        for t in trait_values[-1:]]
     with open(traits_filename, "w") as outfile:
         outfile.writelines(data)
 
-- 
cgit 1.4.1


From 58f59b8f7df82969b58a604070aec095d17e0501 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 30 Aug 2021 11:44:37 +0300
Subject: Fix issues with traits file format

* README.md: update header: Traits ==> Trait
* gn3/computations/qtlreaper.py: update header: Traits ==> Trait
* qtlfilesexport.py: Choose only BXD strains

  Rename the first column header from "Traits" to "Trait" to correspond with
  what `rust-qtlreaper` expects.

  Choose only the BXD strains for the proof-of-concept example - this helped
  bring out the fact that the traits file SHOULD NOT contain a strain column
  for a strain that does not exist in the genotype file in consideration.

  If the traits file has a strain column which does not exist in the genotype
  file, then `rust-qtlreaper` fails with a panic, since, from what I can tell,
  it tries to get a value from the genotype file for the non-existent strain,
  which results to a `None` type. Subsequent attempts at running an operation
  on the `None` type lead to the panic.
---
 README.md                     |  4 +++-
 gn3/computations/qtlreaper.py |  2 +-
 qtlfilesexport.py             | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'gn3/computations')

diff --git a/README.md b/README.md
index 0e0e509..b54015f 100644
--- a/README.md
+++ b/README.md
@@ -136,8 +136,10 @@ Under the **"Trait"** column, the traits are numbered from **T1** to **T<n>** wh
 As an example, you could end up with a trait file like the following:
 
 ```txt
-Traits	BXD27	BXD32	DBA/2J	BXD21	...
+Trait	BXD27	BXD32	DBA/2J	BXD21	...
 T1	10.5735	9.27408	9.48255	9.18253	...
 T2	6.4471	6.7191	5.98015	6.68051	...
 ...
 ```
+
+It is very important that the column header names for the strains correspond to the genotype file used.
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index a88659e..9b13a55 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -24,7 +24,7 @@ def generate_traits_file(strains, trait_values, traits_filename):
     traits_filename: The tab-separated value to put the values in for
         computation of QTLs.
     """
-    header = "Traits\t{}\n".format("\t".join(strains))
+    header = "Trait\t{}\n".format("\t".join(strains))
     data = [header] + [
         "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
         for i, t in enumerate(trait_values[:-1])] + [
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 0543dc9..adc5e77 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -41,7 +41,36 @@ def main():
         retrieve_trait_info(threshold, fullname, conn)
         for fullname in trait_fullnames()]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
-    strains = list(set([k for td in traits_data_list for k in td["data"].keys()]))
+    # strains = list(set([k for td in traits_data_list for k in td["data"].keys()]))
+    strains = [# Use only the strains in the BXD.geno genotype file
+        "BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", "BXD12",
+        "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", "BXD19", "BXD20", "BXD21",
+        "BXD22", "BXD23", "BXD24", "BXD24a", "BXD25", "BXD27", "BXD28", "BXD29",
+        "BXD30", "BXD31", "BXD32", "BXD33", "BXD34", "BXD35", "BXD36", "BXD37",
+        "BXD38", "BXD39", "BXD40", "BXD41", "BXD42", "BXD43", "BXD44", "BXD45",
+        "BXD48", "BXD48a", "BXD49", "BXD50", "BXD51", "BXD52", "BXD53", "BXD54",
+        "BXD55", "BXD56", "BXD59", "BXD60", "BXD61", "BXD62", "BXD63", "BXD64",
+        "BXD65", "BXD65a", "BXD65b", "BXD66", "BXD67", "BXD68", "BXD69",
+        "BXD70", "BXD71", "BXD72", "BXD73", "BXD73a", "BXD73b", "BXD74",
+        "BXD75", "BXD76", "BXD77", "BXD78", "BXD79", "BXD81", "BXD83", "BXD84",
+        "BXD85", "BXD86", "BXD87", "BXD88", "BXD89", "BXD90", "BXD91", "BXD93",
+        "BXD94", "BXD95", "BXD98", "BXD99", "BXD100", "BXD101", "BXD102",
+        "BXD104", "BXD105", "BXD106", "BXD107", "BXD108", "BXD109", "BXD110",
+        "BXD111", "BXD112", "BXD113", "BXD114", "BXD115", "BXD116", "BXD117",
+        "BXD119", "BXD120", "BXD121", "BXD122", "BXD123", "BXD124", "BXD125",
+        "BXD126", "BXD127", "BXD128", "BXD128a", "BXD130", "BXD131", "BXD132",
+        "BXD133", "BXD134", "BXD135", "BXD136", "BXD137", "BXD138", "BXD139",
+        "BXD141", "BXD142", "BXD144", "BXD145", "BXD146", "BXD147", "BXD148",
+        "BXD149", "BXD150", "BXD151", "BXD152", "BXD153", "BXD154", "BXD155",
+        "BXD156", "BXD157", "BXD160", "BXD161", "BXD162", "BXD165", "BXD168",
+        "BXD169", "BXD170", "BXD171", "BXD172", "BXD173", "BXD174", "BXD175",
+        "BXD176", "BXD177", "BXD178", "BXD180", "BXD181", "BXD183", "BXD184",
+        "BXD186", "BXD187", "BXD188", "BXD189", "BXD190", "BXD191", "BXD192",
+        "BXD193", "BXD194", "BXD195", "BXD196", "BXD197", "BXD198", "BXD199",
+        "BXD200", "BXD201", "BXD202", "BXD203", "BXD204", "BXD205", "BXD206",
+        "BXD207", "BXD208", "BXD209", "BXD210", "BXD211", "BXD212", "BXD213",
+        "BXD214", "BXD215", "BXD216", "BXD217", "BXD218", "BXD219", "BXD220"
+    ]
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
     slinked = slink(cluster_traits(exported_traits_data_list))
-- 
cgit 1.4.1


From 6c872943597f3664cca77abbdf56f074fc5231e6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 06:56:35 +0300
Subject: Fix bugs with `run_reaper` function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/qtlreaper.py: Fix some bugs
* qtlfilesexport.py: Test out running rust-qtlreaper

  Test out the qtlreaper interface code and fix some bugs caught in the
  process.
---
 gn3/computations/qtlreaper.py | 8 +++++---
 qtlfilesexport.py             | 7 +++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 9b13a55..c058e14 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -42,7 +42,7 @@ def create_output_directory(path: str):
 
 def run_reaper(
         genotype_filename: str, traits_filename: str,
-        other_options: tuple = ("--n_permutations", 1000),
+        other_options: tuple = ("--n_permutations", "1000"),
         separate_nperm_output: bool = False,
         output_dir: str = TMPDIR):
     """
@@ -70,7 +70,7 @@ def run_reaper(
     The function will raise a `subprocess.CalledProcessError` exception in case
     of any errors running the `qtlreaper` command.
     """
-    create_output_directory(output_dir)
+    create_output_directory("{}/qtlreaper".format(output_dir))
     output_filename = "{}/qtlreaper/main_output_{}.txt".format(
         output_dir, random_string(10))
     output_list = ["--main_output", output_filename]
@@ -84,7 +84,9 @@ def run_reaper(
     command_list = [
         REAPER_COMMAND, "--geno", genotype_filename,
         *other_options, # this splices the `other_options` list here
-        "--traits", traits_filename, "--main_output", output_filename]
+        "--traits", traits_filename,
+        *output_list # this splices the `output_list` list here
+    ]
 
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 1db4ab6..799de31 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -9,6 +9,7 @@ replacing the variables in the angled brackets with the appropriate values
 """
 from gn3.computations.slink import slink
 from gn3.db_utils import database_connector
+from gn3.computations.qtlreaper import run_reaper
 from gn3.computations.heatmap import export_trait_data
 from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
 from gn3.db.genotypes import build_genotype_file, load_genotype_samples
@@ -57,5 +58,11 @@ def main():
     generate_traits_file(strains_values, trait_values, traits_filename)
     print("Generated file: {}".format(traits_filename))
 
+    main_output, permutations_output = run_reaper(
+        genotype_filename, traits_filename, separate_nperm_output=True)
+
+    print("Main output: {}, Permutation output: {}".format(
+        main_output, permutations_output))
+
 if __name__ == "__main__":
     main()
-- 
cgit 1.4.1


From 64ce38b45839b6305b009f6e28b0f852409e9bda Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 10:45:11 +0300
Subject: Parse QTLReaper outputs

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/qtlreaper.py: pass output files
* tests/unit/computations/data/qtlreaper/main_output_sample.txt: sample test
  data
* tests/unit/computations/data/qtlreaper/permu_output_sample.txt: sample test
  data
* tests/unit/computations/test_qtlreaper.py: add tests

  Add code to parse the QTLReaper output data files.
---
 gn3/computations/qtlreaper.py                      | 18 ++++++
 .../data/qtlreaper/main_output_sample.txt          | 11 ++++
 .../data/qtlreaper/permu_output_sample.txt         | 27 ++++++++
 tests/unit/computations/test_qtlreaper.py          | 74 ++++++++++++++++++++++
 4 files changed, 130 insertions(+)
 create mode 100644 tests/unit/computations/data/qtlreaper/main_output_sample.txt
 create mode 100644 tests/unit/computations/data/qtlreaper/permu_output_sample.txt
 create mode 100644 tests/unit/computations/test_qtlreaper.py

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index c058e14..3b8e4db 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -90,3 +90,21 @@ def run_reaper(
 
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
+
+
+def parse_reaper_main_results(results_file):
+    with open(results_file, "r") as infile:
+        lines = infile.readlines()
+
+    def __parse_line(line):
+        items = line.strip().split("\t")
+        return items[0:2] + [float(item) for item in items[2:]]
+
+    header = lines[0].strip().split("\t")
+    return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
+
+def parse_reaper_permutation_results(results_file):
+    with open(results_file, "r") as infile:
+        lines = infile.readlines()
+
+    return [float(line.strip()) for line in lines]
diff --git a/tests/unit/computations/data/qtlreaper/main_output_sample.txt b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
new file mode 100644
index 0000000..12b11b4
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
@@ -0,0 +1,11 @@
+ID	Locus	Chr	cM	Mb	LRS	Additive	pValue
+T1	rs31443144	1	1.500	3.010	0.500	-0.074	1.000
+T1	rs6269442	1	1.500	3.492	0.500	-0.074	1.000
+T1	rs32285189	1	1.630	3.511	0.500	-0.074	1.000
+T1	rs258367496	1	1.630	3.660	0.500	-0.074	1.000
+T1	rs32430919	1	1.750	3.777	0.500	-0.074	1.000
+T1	rs36251697	1	1.880	3.812	0.500	-0.074	1.000
+T1	rs30658298	1	2.010	4.431	0.500	-0.074	1.000
+T1	rs51852623	1	2.010	4.447	0.500	-0.074	1.000
+T1	rs31879829	1	2.140	4.519	0.500	-0.074	1.000
+T1	rs36742481	1	2.140	4.776	0.500	-0.074	1.000
diff --git a/tests/unit/computations/data/qtlreaper/permu_output_sample.txt b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
new file mode 100644
index 0000000..64cff07
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
@@ -0,0 +1,27 @@
+4.44174
+5.03825
+5.08167
+5.18119
+5.18578
+5.24563
+5.24619
+5.24619
+5.27961
+5.28228
+5.43903
+5.50188
+5.51694
+5.56830
+5.63874
+5.71346
+5.71936
+5.74275
+5.76764
+5.79815
+5.81671
+5.82775
+5.89659
+5.92117
+5.93396
+5.93396
+5.94957
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
new file mode 100644
index 0000000..ec23664
--- /dev/null
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -0,0 +1,74 @@
+"""Module contains tests for gn3.computations.qtlreaper"""
+import os
+from unittest import TestCase
+from gn3.computations.qtlreaper import (
+    parse_reaper_main_results, parse_reaper_permutation_results)
+
+class TestQTLReaper(TestCase):
+    """Class for testing qtlreaper interface functions."""
+
+    def test_parse_reaper_main_results(self):
+        self.assertEqual(
+            parse_reaper_main_results(
+                "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
+            [
+                {
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+                    "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+                    "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                }
+            ])
+
+    def test_parse_reaper_permutation_results(self):
+        self.assertEqual(
+            parse_reaper_permutation_results(
+            "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+            [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
+             5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
+             5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
+             5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
-- 
cgit 1.4.1


From e441509a59c20a051fd5ab94710513f1968a5e02 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 10:50:56 +0300
Subject: Update `heatmap_data` function: remove extraneous data

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: update function
* gn3/db/traits.py: new function

  Remove extraneous data and arguments from the function.
  - Load the genotype file
  - Generate traits file
  - Provide both raw traits data, and exported traits data in return
---
 gn3/computations/heatmap.py | 42 ++++++++++++++++++++++--------------------
 gn3/db/traits.py            |  5 +++++
 2 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index e0ff05b..92014cf 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -6,8 +6,12 @@ generate various kinds of heatmaps.
 from functools import reduce
 from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
-from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
 from gn3.computations.correlations2 import compute_correlation
+from gn3.db.genotypes import build_genotype_file, load_genotype_samples
+from gn3.db.traits import (
+    retrieve_trait_data,
+    retrieve_trait_info,
+    generate_traits_filename)
 
 def export_trait_data(
         trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
@@ -125,7 +129,7 @@ def cluster_traits(traits_data_list: Sequence[Dict]):
 
     return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
 
-def heatmap_data(formd, search_result, conn: Any):
+def heatmap_data(traits_names, conn: Any):
     """
     heatmap function
 
@@ -142,39 +146,37 @@ def heatmap_data(formd, search_result, conn: Any):
     TODO: Elaborate on the parameters here...
     """
     threshold = 0 # webqtlConfig.PUBLICTHRESH
-    cluster_checked = formd.formdata.getvalue("clusterCheck", "")
-    strainlist = [
-        strain for strain in formd.strainlist if strain not in formd.parlist]
-    genotype = formd.genotype
-
     def __retrieve_traitlist_and_datalist(threshold, fullname):
         trait = retrieve_trait_info(threshold, fullname, conn)
         return (trait, retrieve_trait_data(trait, conn))
 
     traits_details = [
         __retrieve_traitlist_and_datalist(threshold, fullname)
-        for fullname in search_result]
+        for fullname in traits_names]
     traits_list = tuple(x[0] for x in traits_details)
     traits_data_list = [x[1] for x in traits_details]
     exported_traits_data_list = tuple(
         export_trait_data(td, strainlist) for td in traits_data_list)
+    genotype_filename = build_genotype_file(traits_list[0]["riset"])
+    strainlist = load_genotype_samples(genotype_filename)
+    slink_data = slink(cluster_traits(exported_traits_data_list))
+    ordering_data = compute_heatmap_order(slink_data)
+    strains_and_values = retrieve_strains_and_values(
+        orders, strainlist, exported_traits_data_list)
+    strains_values = strains_and_values[0][1]
+    trait_values = [t[2] for t in strains_and_values]
+    traits_filename = generate_traits_filename()
+    generate_traits_file(strains_values, trait_values, traits_filename)
 
     return {
-        "target_description_checked": formd.formdata.getvalue(
-            "targetDescriptionCheck", ""),
-        "cluster_checked": cluster_checked,
-        "slink_data": (
-            slink(cluster_traits(exported_traits_data_list))
-            if cluster_checked else False),
-        "sessionfile": formd.formdata.getvalue("session"),
-        "genotype": genotype,
-        "nLoci": sum(map(len, genotype)),
+        "slink_data": slink_data,
+        "ordering_data": ordering_data,
         "strainlist": strainlist,
-        "ppolar": formd.ppolar,
-        "mpolar":formd.mpolar,
+        "genotype_filename": genotype_filename,
         "traits_list": traits_list,
         "traits_data_list": traits_data_list,
-        "exported_traits_data_list": exported_traits_data_list
+        "exported_traits_data_list": exported_traits_data_list,
+        "traits_filename": traits_filename
     }
 
 def compute_heatmap_order(
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 1031e44..ccb101a 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,4 +1,5 @@
 """This class contains functions relating to trait data manipulation"""
+from gn3.settings import TMPDIR
 from typing import Any, Dict, Union, Sequence
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
@@ -666,3 +667,7 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
                     {k:v for k, v in x.items() if x != "strain_name"}),
                 data))}
     return {}
+
+def generate_traits_filename(base_path: str = TMPDIR):
+    return "{}/traits_test_file_{}.txt".format(
+        os.path.abspath(base_path), random_string(10))
-- 
cgit 1.4.1


From b5e1d1176f1bf4f7c0b68b27beb15e99418f1650 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 11:16:29 +0300
Subject: Fix linting errors, minor bugs and reorganise code

* Fix some linting errors and some minor bugs caught by the linter.
  Move the `random_string` function to separate module for use in multiple
  places in the code.
---
 gn3/computations/heatmap.py               |  7 ++++---
 gn3/computations/qtlreaper.py             | 27 ++++++++++++++-------------
 gn3/db/traits.py                          |  5 ++++-
 gn3/heatmaps/heatmaps.py                  | 25 +++++++++++++++++++------
 gn3/random.py                             | 11 +++++++++++
 tests/unit/computations/test_qtlreaper.py |  5 +++--
 6 files changed, 55 insertions(+), 25 deletions(-)
 create mode 100644 gn3/random.py

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 92014cf..1143450 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -6,6 +6,7 @@ generate various kinds of heatmaps.
 from functools import reduce
 from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
+from gn3.computations.qtlreaper import generate_traits_file
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import build_genotype_file, load_genotype_samples
 from gn3.db.traits import (
@@ -155,14 +156,14 @@ def heatmap_data(traits_names, conn: Any):
         for fullname in traits_names]
     traits_list = tuple(x[0] for x in traits_details)
     traits_data_list = [x[1] for x in traits_details]
-    exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for td in traits_data_list)
     genotype_filename = build_genotype_file(traits_list[0]["riset"])
     strainlist = load_genotype_samples(genotype_filename)
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for td in traits_data_list)
     slink_data = slink(cluster_traits(exported_traits_data_list))
     ordering_data = compute_heatmap_order(slink_data)
     strains_and_values = retrieve_strains_and_values(
-        orders, strainlist, exported_traits_data_list)
+        ordering_data, strainlist, exported_traits_data_list)
     strains_values = strains_and_values[0][1]
     trait_values = [t[2] for t in strains_and_values]
     traits_filename = generate_traits_filename()
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 3b8e4db..30c7051 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -3,17 +3,10 @@ This module contains functions to interact with the `qtlreaper` utility for
 computation of QTLs.
 """
 import os
-import random
-import string
 import subprocess
+from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
-def random_string(length):
-    """Generate a random string of length `length`."""
-    return "".join(
-        random.choices(
-            string.ascii_letters + string.digits, k=length))
-
 def generate_traits_file(strains, trait_values, traits_filename):
     """
     Generate a traits file for use with `qtlreaper`.
@@ -25,11 +18,13 @@ def generate_traits_file(strains, trait_values, traits_filename):
         computation of QTLs.
     """
     header = "Trait\t{}\n".format("\t".join(strains))
-    data = [header] + [
-        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
-        for i, t in enumerate(trait_values[:-1])] + [
-        "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
-        for t in trait_values[-1:]]
+    data = (
+        [header] +
+        ["T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+         for i, t in enumerate(trait_values[:-1])] +
+        ["T{}\t{}".format(
+            len(trait_values), "\t".join([str(i) for i in t]))
+         for t in trait_values[-1:]])
     with open(traits_filename, "w") as outfile:
         outfile.writelines(data)
 
@@ -93,6 +88,9 @@ def run_reaper(
 
 
 def parse_reaper_main_results(results_file):
+    """
+    Parse the results file of running QTLReaper into a list of dicts.
+    """
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
@@ -104,6 +102,9 @@ def parse_reaper_main_results(results_file):
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
 
 def parse_reaper_permutation_results(results_file):
+    """
+    Parse the results QTLReaper permutations into a list of values.
+    """
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index ccb101a..bfe887e 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,6 +1,8 @@
 """This class contains functions relating to trait data manipulation"""
-from gn3.settings import TMPDIR
+import os
 from typing import Any, Dict, Union, Sequence
+from gn3.settings import TMPDIR
+from gn3.random import random_string
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
 
@@ -669,5 +671,6 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     return {}
 
 def generate_traits_filename(base_path: str = TMPDIR):
+    """Generate a unique filename for use with generated traits files."""
     return "{}/traits_test_file_{}.txt".format(
         os.path.abspath(base_path), random_string(10))
diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
index 3bf7917..88f546d 100644
--- a/gn3/heatmaps/heatmaps.py
+++ b/gn3/heatmaps/heatmaps.py
@@ -14,6 +14,19 @@ def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30
     return [[random.uniform(0,data_stop) for i in range(0, width)]
             for j in range(0, height)]
 
+def generate_random_data2(data_stop: float = 2, width: int = 10, height: int = 30):
+    """
+    This is mostly a utility function to be used to generate random data, useful
+    for development of the heatmap generation code, without access to the actual
+    database data.
+    """
+    return [
+        [{
+            "value": item,
+            "category": random.choice(["C57BL/6J +", "DBA/2J +"])}
+         for item in axis]
+        for axis in generate_random_data(data_stop, width, height)]
+
 def heatmap_x_axis_names():
     return [
         "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
@@ -30,13 +43,14 @@ def heatmap_x_axis_names():
 
 # Grey + Blue + Red
 def generate_heatmap():
-    rows = 20
-    data = generate_random_data(height=rows)
-    y = (["%s"%x for x in range(1, rows+1)][:-1] + ["X"]) #replace last item with x for now
+    cols = 20
+    y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
+    x_axis = heatmap_x_axis_names()
+    data = generate_random_data(height=cols, width=len(x_axis))
     fig = px.imshow(
         data,
-        x=heatmap_x_axis_names(),
-        y=y,
+        x=x_axis,
+        y=y_axis,
         width=500)
     fig.update_traces(xtype="array")
     fig.update_traces(ytype="array")
@@ -49,6 +63,5 @@ def generate_heatmap():
         coloraxis_colorscale=[
             [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
             [0.5, '#F5DE11'], [1.0, '#FF0D00']])
-
     fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
     return fig
diff --git a/gn3/random.py b/gn3/random.py
new file mode 100644
index 0000000..f0ba574
--- /dev/null
+++ b/gn3/random.py
@@ -0,0 +1,11 @@
+"""
+Functions to generate complex random data.
+"""
+import random
+import string
+
+def random_string(length):
+    """Generate a random string of length `length`."""
+    return "".join(
+        random.choices(
+            string.ascii_letters + string.digits, k=length))
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index ec23664..6c3b64d 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -1,5 +1,4 @@
 """Module contains tests for gn3.computations.qtlreaper"""
-import os
 from unittest import TestCase
 from gn3.computations.qtlreaper import (
     parse_reaper_main_results, parse_reaper_permutation_results)
@@ -8,6 +7,7 @@ class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
 
     def test_parse_reaper_main_results(self):
+        """Test that the main results file is parsed correctly."""
         self.assertEqual(
             parse_reaper_main_results(
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
@@ -65,9 +65,10 @@ class TestQTLReaper(TestCase):
             ])
 
     def test_parse_reaper_permutation_results(self):
+        """Test that the permutations results file is parsed correctly."""
         self.assertEqual(
             parse_reaper_permutation_results(
-            "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+                "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
             [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
              5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
              5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
-- 
cgit 1.4.1


From 608ff9c6ff668d18f0c42aebf658ef80b517a6de Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 06:45:18 +0300
Subject: Find nearest marker

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Migrate the `web.webqtl.heatmap.Heatmap.getNearestMarker` function in GN1 to
  GN3.
---
 gn3/computations/heatmap.py | 49 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 1143450..ccce385 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -30,7 +30,7 @@ def export_trait_data(
       The dictionary of key-value pairs representing a trait
     strainlist: (list)
       A list of strain names
-    type: (str)
+    dtype: (str)
       ... verify what this is ...
     var_exists: (bool)
       A flag indicating existence of variance
@@ -232,3 +232,50 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
         values = []
 
     return rets
+
+def nearest_marker_finder(genotype):
+    """
+    Returns a function to be used with `genotype` to compute the nearest marker
+    to the trait passed to the returned function.
+
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L425-434
+    """
+    def __compute_distances(chromo, trait):
+        loci = chromo.get("loci", None)
+        if not loci:
+            return None
+        return tuple(
+            {
+                "name": locus["name"],
+                "distance": abs(locus["Mb"] - trait["mb"])
+            } for locus in loci)
+
+    def __finder(trait):
+        _chrs = tuple(
+            _chr for _chr in genotype["chromosomes"]
+            if str(_chr["name"]) == str(trait["chr"]))
+        if len(_chrs) == 0:
+            return None
+        distances = tuple(
+            distance for dists in
+            filter(
+                lambda x: x is not None,
+                (__compute_distances(_chr, trait) for _chr in _chrs))
+            for distance in dists)
+        nearest = min(distances, key=lambda d: d["distance"])
+        return nearest["name"]
+    return __finder
+
+def get_nearest_marker(traits_list, genotype):
+    """
+    Retrieves the nearest marker for each of the traits in the list.
+
+    DESCRIPTION:
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
+    """
+    if not genotype["Mbmap"]:
+        return [None] * len(trait_list)
+
+    marker_finder = nearest_marker_finder(genotype)
+    return [marker_finder(trait) for trait in traits_list]
-- 
cgit 1.4.1


From 4ce5695a35e92a704add8d497266bb2986a593f6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 06:47:52 +0300
Subject: Handle type-coercion exceptions

* gn3/computations/qtlreaper.py: handle exceptions

  Sometimes, the values being parsed are plain strings and cannot be cast to
  the float types. This commit handles that by casting only those values that
  can be cast to float, and returning the others as strings.
---
 gn3/computations/qtlreaper.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 30c7051..eff2a80 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -94,9 +94,15 @@ def parse_reaper_main_results(results_file):
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
+    def __parse_column_value(value):
+        try:
+            return float(value)
+        except:
+            return value
+
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:2] + [float(item) for item in items[2:]]
+        return items[0:2] + [__parse_column_value(item) for item in items[2:]]
 
     header = lines[0].strip().split("\t")
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
-- 
cgit 1.4.1


From 679a1af832ad9585c7cf72996043edb08e1b0d10 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 08:06:14 +0300
Subject: Leave "Chr" value as string when parsing

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* The "Chr" value seems to be mostly a name of some sort, despite it being,
  seemingly an number. This commit parses the "Chr" value as a string.
  It also updates the tests to expec a string, rather than a number for "Chr"
  values.
---
 gn3/computations/qtlreaper.py             |  5 +++--
 tests/unit/computations/test_qtlreaper.py | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index eff2a80..9b20309 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -94,7 +94,7 @@ def parse_reaper_main_results(results_file):
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
-    def __parse_column_value(value):
+    def __parse_column_float_value(value):
         try:
             return float(value)
         except:
@@ -102,7 +102,8 @@ def parse_reaper_main_results(results_file):
 
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:2] + [__parse_column_value(item) for item in items[2:]]
+        return items[0:3] + [
+            __parse_column_float_value(item) for item in items[3:]]
 
     header = lines[0].strip().split("\t")
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 6c3b64d..fd3434a 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -13,52 +13,52 @@ class TestQTLReaper(TestCase):
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
             [
                 {
-                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "ID": "T1", "Locus": "rs31443144", "Chr": "1", "cM": 1.500,
                     "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "ID": "T1", "Locus": "rs6269442", "Chr": "1", "cM": 1.500,
                     "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "ID": "T1", "Locus": "rs32285189", "Chr": "1", "cM": 1.630,
                     "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "ID": "T1", "Locus": "rs258367496", "Chr": "1", "cM": 1.630,
                     "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "ID": "T1", "Locus": "rs32430919", "Chr": "1", "cM": 1.750,
                     "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "ID": "T1", "Locus": "rs36251697", "Chr": "1", "cM": 1.880,
                     "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "ID": "T1", "Locus": "rs30658298", "Chr": "1", "cM": 2.010,
                     "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+                    "ID": "T1", "Locus": "rs51852623", "Chr": "1", "cM": 2.010,
                     "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+                    "ID": "T1", "Locus": "rs31879829", "Chr": "1", "cM": 2.140,
                     "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+                    "ID": "T1", "Locus": "rs36742481", "Chr": "1", "cM": 2.140,
                     "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 }
-- 
cgit 1.4.1


From d4943f1d01d89a3928c905f80914a23144126c8e Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 08:09:20 +0300
Subject: Provide function to organise parsed QTLReaper results

* gn3/computations/qtlreaper.py: Provide a function to organise the results by
  trait for easier use down the line.

* tests/unit/computations/test_qtlreaper.py: provide a test to ensure that the
  organising function works as expected.
---
 gn3/computations/qtlreaper.py             |  25 +++++++
 tests/unit/computations/test_qtlreaper.py | 105 +++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 9b20309..8c0e6de 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -86,6 +86,31 @@ def run_reaper(
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
 
+def organise_reaper_main_results(parsed_results):
+    def __organise_by_chromosome(chr_name, items):
+        chr_items = [item for item in items if item["Chr"] == chr_name]
+        return {
+            "Chr": str(chr_name),
+            "loci": [{
+                "Locus": locus["Locus"],
+                "cM": locus["cM"],
+                "Mb": locus["Mb"],
+                "LRS": locus["LRS"],
+                "Additive": locus["Additive"],
+                "pValue": locus["pValue"]
+            } for locus in chr_items]}
+
+    def __organise_by_id(identifier, items):
+        id_items = [item for item in items if item["ID"] == identifier]
+        unique_chromosomes = {item["Chr"] for item in id_items}
+        return {
+            "ID": identifier,
+            "chromosomes": [
+                __organise_by_chromosome(chromo, id_items)
+                for chromo in sorted(unique_chromosomes)]}
+
+    unique_ids = {res["ID"] for res in parsed_results}
+    return [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]
 
 def parse_reaper_main_results(results_file):
     """
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index fd3434a..1d7347f 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -1,7 +1,9 @@
 """Module contains tests for gn3.computations.qtlreaper"""
 from unittest import TestCase
 from gn3.computations.qtlreaper import (
-    parse_reaper_main_results, parse_reaper_permutation_results)
+    parse_reaper_main_results,
+    organise_reaper_main_results,
+    parse_reaper_permutation_results)
 
 class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
@@ -73,3 +75,104 @@ class TestQTLReaper(TestCase):
              5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
              5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
              5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
+
+    def test_organise_reaper_main_results(self):
+        self.assertEqual(
+            organise_reaper_main_results([
+                {
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
+                    "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
+                    "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
+                    "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                }
+            ]),
+            [{"ID": "T1",
+              "chromosomes": [
+                  {"Chr": "1",
+                   "loci": [
+                       {
+                           "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       }]},
+                  {"Chr": "2",
+                   "loci": [
+                       {
+                           "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       }]}]}])
-- 
cgit 1.4.1


From 31ca02d1f095c2cc667e5b7d49131d702982f321 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 06:52:01 +0300
Subject: Fix the traits order computations for clustering

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: Fix ordering function
* tests/unit/computations/test_heatmap.py: update test

  The order of the traits is important for the clustering algorithm, since the
  clustering seems to use the distance of one trait from another to determine
  how to order them.

  This commit also gets rid of the xoffset argument that is not important to
  the ordering, and was used in the older GN1 to determine how to draw the
  clustering lines.
---
 gn3/computations/heatmap.py             | 16 ++++++----------
 tests/unit/computations/test_heatmap.py | 11 +++--------
 2 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index ccce385..8727c92 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -180,28 +180,24 @@ def heatmap_data(traits_names, conn: Any):
         "traits_filename": traits_filename
     }
 
-def compute_heatmap_order(
-        slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+def compute_traits_order(slink_data, neworder: tuple = tuple()):
     """
-    Compute the data used for drawing the heatmap proper from `slink_data`.
+    Compute the order of the traits for clustering from `slink_data`.
 
     This function tries to reproduce the creation and update of the `neworder`
     variable in
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
     and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
     """
-    d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
-
     def __order_maker(norder, slnk_dt):
         if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
-            return norder + (
-                (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+            return norder + (slnk_dt[0], slnk_dt[1])
 
         if isinstance(slnk_dt[0], int):
-            return norder + ((xoffset + 20, slnk_dt[0]), )
+            return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
 
         if isinstance(slnk_dt[1], int):
-            return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+            return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
 
         return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
 
@@ -222,7 +218,7 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     values = []
     rets = []
     for order in orders:
-        temp_val = traits_data_list[order[1]]
+        temp_val = traits_data_list[order]
         for i, strain in enumerate(strainlist):
             if temp_val[i] is not None:
                 strains.append(strain)
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 87f8e45..f1bbefc 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -3,7 +3,7 @@ from unittest import TestCase
 from gn3.computations.heatmap import (
     cluster_traits,
     export_trait_data,
-    compute_heatmap_order,
+    compute_traits_order,
     retrieve_strains_and_values)
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
@@ -158,13 +158,8 @@ class TestHeatmap(TestCase):
 
     def test_compute_heatmap_order(self):
         """Test the orders."""
-        for xoff, expected in [
-                (40, ((60, 9), (60, 4))),
-                (30, ((50, 9), (50, 4))),
-                (20, ((40, 9), (40, 4)))]:
-            with self.subTest(xoffset=xoff):
-                self.assertEqual(
-                    compute_heatmap_order(slinked, xoffset=xoff), expected)
+        self.assertEqual(
+            compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
 
     def test_retrieve_strains_and_values(self):
         """Test retrieval of strains and values."""
-- 
cgit 1.4.1


From f360cc62cc156af90d3283ae7b6db9e8250fa43c Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:51:57 +0300
Subject: Remove extraneous text to ease sorting

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Change the id from 'T<n>' to simply '<n>' to ease sorting of the trait
  results by numerical order rather than string order.
---
 gn3/computations/qtlreaper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 8c0e6de..ec215e5 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -20,9 +20,9 @@ def generate_traits_file(strains, trait_values, traits_filename):
     header = "Trait\t{}\n".format("\t".join(strains))
     data = (
         [header] +
-        ["T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+        ["{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
          for i, t in enumerate(trait_values[:-1])] +
-        ["T{}\t{}".format(
+        ["{}\t{}".format(
             len(trait_values), "\t".join([str(i) for i in t]))
          for t in trait_values[-1:]])
     with open(traits_filename, "w") as outfile:
-- 
cgit 1.4.1


From 3f323734fcf258d28f3f7d33fdc1518ef9ec24a8 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:54:48 +0300
Subject: Parse Chr value as int where possible

* To ease sorting of data by numerical order down the line, sort the "Chr"
  values by numerical order.
---
 gn3/computations/qtlreaper.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index ec215e5..02d6572 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -86,11 +86,16 @@ def run_reaper(
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
 
+def chromosome_sorter_key_fn(val):
+    if isinstance(val, int):
+        return val
+    return ord(val)
+
 def organise_reaper_main_results(parsed_results):
     def __organise_by_chromosome(chr_name, items):
         chr_items = [item for item in items if item["Chr"] == chr_name]
         return {
-            "Chr": str(chr_name),
+            "Chr": chr_name,
             "loci": [{
                 "Locus": locus["Locus"],
                 "cM": locus["cM"],
@@ -125,9 +130,15 @@ def parse_reaper_main_results(results_file):
         except:
             return value
 
+    def __parse_column_int_value(value):
+        try:
+            return int(value)
+        except:
+            return value
+
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:3] + [
+        return items[0:2] + [__parse_column_int_value(items[2])] + [
             __parse_column_float_value(item) for item in items[3:]]
 
     header = lines[0].strip().split("\t")
-- 
cgit 1.4.1


From a718069c757bea9f7ecbaee25e23bd581750f906 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:56:56 +0300
Subject: Ease search for traits and chromosomes

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Return a dict of values rather than list for the traits and chromosomes to
  ease searching through the data.
---
 gn3/computations/qtlreaper.py             |  9 ++-
 tests/unit/computations/test_qtlreaper.py | 92 +++++++++++++++----------------
 2 files changed, 52 insertions(+), 49 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 02d6572..5180853 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -110,12 +110,15 @@ def organise_reaper_main_results(parsed_results):
         unique_chromosomes = {item["Chr"] for item in id_items}
         return {
             "ID": identifier,
-            "chromosomes": [
+            "chromosomes": {_chr["Chr"]: _chr for _chr in [
                 __organise_by_chromosome(chromo, id_items)
-                for chromo in sorted(unique_chromosomes)]}
+                for chromo in sorted(
+                        unique_chromosomes, key=chromosome_sorter_key_fn)]}}
 
     unique_ids = {res["ID"] for res in parsed_results}
-    return [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]
+    return {
+        trait["ID"]: trait for trait in
+        [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]}
 
 def parse_reaper_main_results(results_file):
     """
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 1d7347f..495ed97 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -130,49 +130,49 @@ class TestQTLReaper(TestCase):
                     "pValue": 1.000
                 }
             ]),
-            [{"ID": "T1",
-              "chromosomes": [
-                  {"Chr": "1",
-                   "loci": [
-                       {
-                           "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       }]},
-                  {"Chr": "2",
-                   "loci": [
-                       {
-                           "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       }]}]}])
+            {"T1": {"ID": "T1",
+                    "chromosomes": {
+                        1: {"Chr": 1,
+                            "loci": [
+                                {
+                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                }]},
+                        2: {"Chr": 2,
+                            "loci": [
+                                {
+                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                }]}}}})
-- 
cgit 1.4.1


From e3e18950cfcdec918429dcbb5d5ed2e9616b7a20 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 15 Sep 2021 11:19:56 +0300
Subject: Reorganise modules

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* The heatmap generation does not fall cleanly within the computations or db
  modules. This commit moves it to the higher level gn3 module.
---
 gn3/computations/heatmap.py             | 277 -----------------------------
 gn3/heatmaps.py                         | 302 ++++++++++++++++++++++++++++++++
 gn3/heatmaps/heatmaps.py                |  67 -------
 tests/unit/computations/test_heatmap.py | 187 --------------------
 tests/unit/test_heatmaps.py             | 187 ++++++++++++++++++++
 5 files changed, 489 insertions(+), 531 deletions(-)
 delete mode 100644 gn3/computations/heatmap.py
 create mode 100644 gn3/heatmaps.py
 delete mode 100644 gn3/heatmaps/heatmaps.py
 delete mode 100644 tests/unit/computations/test_heatmap.py
 create mode 100644 tests/unit/test_heatmaps.py

(limited to 'gn3/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
deleted file mode 100644
index 8727c92..0000000
--- a/gn3/computations/heatmap.py
+++ /dev/null
@@ -1,277 +0,0 @@
-"""
-This module will contain functions to be used in computation of the data used to
-generate various kinds of heatmaps.
-"""
-
-from functools import reduce
-from typing import Any, Dict, Sequence
-from gn3.computations.slink import slink
-from gn3.computations.qtlreaper import generate_traits_file
-from gn3.computations.correlations2 import compute_correlation
-from gn3.db.genotypes import build_genotype_file, load_genotype_samples
-from gn3.db.traits import (
-    retrieve_trait_data,
-    retrieve_trait_info,
-    generate_traits_filename)
-
-def export_trait_data(
-        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
-        var_exists: bool = False, n_exists: bool = False):
-    """
-    Export data according to `strainlist`. Mostly used in calculating
-    correlations.
-
-    DESCRIPTION:
-    Migrated from
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
-    PARAMETERS
-    trait: (dict)
-      The dictionary of key-value pairs representing a trait
-    strainlist: (list)
-      A list of strain names
-    dtype: (str)
-      ... verify what this is ...
-    var_exists: (bool)
-      A flag indicating existence of variance
-    n_exists: (bool)
-      A flag indicating existence of ndata
-    """
-    def __export_all_types(tdata, strain):
-        sample_data = []
-        if tdata[strain]["value"]:
-            sample_data.append(tdata[strain]["value"])
-            if var_exists:
-                if tdata[strain]["variance"]:
-                    sample_data.append(tdata[strain]["variance"])
-                else:
-                    sample_data.append(None)
-            if n_exists:
-                if tdata[strain]["ndata"]:
-                    sample_data.append(tdata[strain]["ndata"])
-                else:
-                    sample_data.append(None)
-        else:
-            if var_exists and n_exists:
-                sample_data += [None, None, None]
-            elif var_exists or n_exists:
-                sample_data += [None, None]
-            else:
-                sample_data.append(None)
-
-        return tuple(sample_data)
-
-    def __exporter(accumulator, strain):
-        # pylint: disable=[R0911]
-        if strain in trait_data["data"]:
-            if dtype == "val":
-                return accumulator + (trait_data["data"][strain]["value"], )
-            if dtype == "var":
-                return accumulator + (trait_data["data"][strain]["variance"], )
-            if dtype == "N":
-                return accumulator + (trait_data["data"][strain]["ndata"], )
-            if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], strain)
-            raise KeyError("Type `%s` is incorrect" % dtype)
-        if var_exists and n_exists:
-            return accumulator + (None, None, None)
-        if var_exists or n_exists:
-            return accumulator + (None, None)
-        return accumulator + (None,)
-
-    return reduce(__exporter, strainlist, tuple())
-
-def trait_display_name(trait: Dict):
-    """
-    Given a trait, return a name to use to display the trait on a heatmap.
-
-    DESCRIPTION
-    Migrated from
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
-    """
-    if trait.get("db", None) and trait.get("trait_name", None):
-        if trait["db"]["dataset_type"] == "Temp":
-            desc = trait["description"]
-            if desc.find("PCA") >= 0:
-                return "%s::%s" % (
-                    trait["db"]["displayname"],
-                    desc[desc.rindex(':')+1:].strip())
-            return "%s::%s" % (
-                trait["db"]["displayname"],
-                desc[:desc.index('entered')].strip())
-        prefix = "%s::%s" % (
-            trait["db"]["dataset_name"], trait["trait_name"])
-        if trait["cellid"]:
-            return "%s::%s" % (prefix, trait["cellid"])
-        return prefix
-    return trait["description"]
-
-def cluster_traits(traits_data_list: Sequence[Dict]):
-    """
-    Clusters the trait values.
-
-    DESCRIPTION
-    Attempts to replicate the clustering of the traits, as done at
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
-    """
-    def __compute_corr(tdata_i, tdata_j):
-        if tdata_i[0] == tdata_j[0]:
-            return 0.0
-        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
-        corr = corr_vals[0]
-        if (1 - corr) < 0:
-            return 0.0
-        return 1 - corr
-
-    def __cluster(tdata_i):
-        return tuple(
-            __compute_corr(tdata_i, tdata_j)
-            for tdata_j in enumerate(traits_data_list))
-
-    return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
-
-def heatmap_data(traits_names, conn: Any):
-    """
-    heatmap function
-
-    DESCRIPTION
-    This function is an attempt to reproduce the initialisation at
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
-    and also the clustering and slink computations at
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
-    with the help of the `gn3.computations.heatmap.cluster_traits` function.
-
-    It does not try to actually draw the heatmap image.
-
-    PARAMETERS:
-    TODO: Elaborate on the parameters here...
-    """
-    threshold = 0 # webqtlConfig.PUBLICTHRESH
-    def __retrieve_traitlist_and_datalist(threshold, fullname):
-        trait = retrieve_trait_info(threshold, fullname, conn)
-        return (trait, retrieve_trait_data(trait, conn))
-
-    traits_details = [
-        __retrieve_traitlist_and_datalist(threshold, fullname)
-        for fullname in traits_names]
-    traits_list = tuple(x[0] for x in traits_details)
-    traits_data_list = [x[1] for x in traits_details]
-    genotype_filename = build_genotype_file(traits_list[0]["riset"])
-    strainlist = load_genotype_samples(genotype_filename)
-    exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for td in traits_data_list)
-    slink_data = slink(cluster_traits(exported_traits_data_list))
-    ordering_data = compute_heatmap_order(slink_data)
-    strains_and_values = retrieve_strains_and_values(
-        ordering_data, strainlist, exported_traits_data_list)
-    strains_values = strains_and_values[0][1]
-    trait_values = [t[2] for t in strains_and_values]
-    traits_filename = generate_traits_filename()
-    generate_traits_file(strains_values, trait_values, traits_filename)
-
-    return {
-        "slink_data": slink_data,
-        "ordering_data": ordering_data,
-        "strainlist": strainlist,
-        "genotype_filename": genotype_filename,
-        "traits_list": traits_list,
-        "traits_data_list": traits_data_list,
-        "exported_traits_data_list": exported_traits_data_list,
-        "traits_filename": traits_filename
-    }
-
-def compute_traits_order(slink_data, neworder: tuple = tuple()):
-    """
-    Compute the order of the traits for clustering from `slink_data`.
-
-    This function tries to reproduce the creation and update of the `neworder`
-    variable in
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
-    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
-    """
-    def __order_maker(norder, slnk_dt):
-        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
-            return norder + (slnk_dt[0], slnk_dt[1])
-
-        if isinstance(slnk_dt[0], int):
-            return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
-
-        if isinstance(slnk_dt[1], int):
-            return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
-
-        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
-
-    return __order_maker(neworder, slink_data)
-
-def retrieve_strains_and_values(orders, strainlist, traits_data_list):
-    """
-    Get the strains and their corresponding values from `strainlist` and
-    `traits_data_list`.
-
-    This migrates the code in
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
-    """
-    # This feels nasty! There's a lot of mutation of values here, that might
-    # indicate something untoward in the design of this function and its
-    # dependents  ==>  Review
-    strains = []
-    values = []
-    rets = []
-    for order in orders:
-        temp_val = traits_data_list[order]
-        for i, strain in enumerate(strainlist):
-            if temp_val[i] is not None:
-                strains.append(strain)
-                values.append(temp_val[i])
-        rets.append([order, strains[:], values[:]])
-        strains = []
-        values = []
-
-    return rets
-
-def nearest_marker_finder(genotype):
-    """
-    Returns a function to be used with `genotype` to compute the nearest marker
-    to the trait passed to the returned function.
-
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L425-434
-    """
-    def __compute_distances(chromo, trait):
-        loci = chromo.get("loci", None)
-        if not loci:
-            return None
-        return tuple(
-            {
-                "name": locus["name"],
-                "distance": abs(locus["Mb"] - trait["mb"])
-            } for locus in loci)
-
-    def __finder(trait):
-        _chrs = tuple(
-            _chr for _chr in genotype["chromosomes"]
-            if str(_chr["name"]) == str(trait["chr"]))
-        if len(_chrs) == 0:
-            return None
-        distances = tuple(
-            distance for dists in
-            filter(
-                lambda x: x is not None,
-                (__compute_distances(_chr, trait) for _chr in _chrs))
-            for distance in dists)
-        nearest = min(distances, key=lambda d: d["distance"])
-        return nearest["name"]
-    return __finder
-
-def get_nearest_marker(traits_list, genotype):
-    """
-    Retrieves the nearest marker for each of the traits in the list.
-
-    DESCRIPTION:
-    This migrates the code in
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
-    """
-    if not genotype["Mbmap"]:
-        return [None] * len(trait_list)
-
-    marker_finder = nearest_marker_finder(genotype)
-    return [marker_finder(trait) for trait in traits_list]
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
new file mode 100644
index 0000000..198fb45
--- /dev/null
+++ b/gn3/heatmaps.py
@@ -0,0 +1,302 @@
+"""
+This module will contain functions to be used in computation of the data used to
+generate various kinds of heatmaps.
+"""
+
+from functools import reduce
+from typing import Any, Dict, Sequence
+from gn3.computations.slink import slink
+from gn3.computations.qtlreaper import generate_traits_file
+from gn3.computations.correlations2 import compute_correlation
+from gn3.db.genotypes import build_genotype_file, load_genotype_samples
+from gn3.db.traits import (
+    retrieve_trait_data,
+    retrieve_trait_info,
+    generate_traits_filename)
+
+def export_trait_data(
+        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+        var_exists: bool = False, n_exists: bool = False):
+    """
+    Export data according to `strainlist`. Mostly used in calculating
+    correlations.
+
+    DESCRIPTION:
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+    PARAMETERS
+    trait: (dict)
+      The dictionary of key-value pairs representing a trait
+    strainlist: (list)
+      A list of strain names
+    dtype: (str)
+      ... verify what this is ...
+    var_exists: (bool)
+      A flag indicating existence of variance
+    n_exists: (bool)
+      A flag indicating existence of ndata
+    """
+    def __export_all_types(tdata, strain):
+        sample_data = []
+        if tdata[strain]["value"]:
+            sample_data.append(tdata[strain]["value"])
+            if var_exists:
+                if tdata[strain]["variance"]:
+                    sample_data.append(tdata[strain]["variance"])
+                else:
+                    sample_data.append(None)
+            if n_exists:
+                if tdata[strain]["ndata"]:
+                    sample_data.append(tdata[strain]["ndata"])
+                else:
+                    sample_data.append(None)
+        else:
+            if var_exists and n_exists:
+                sample_data += [None, None, None]
+            elif var_exists or n_exists:
+                sample_data += [None, None]
+            else:
+                sample_data.append(None)
+
+        return tuple(sample_data)
+
+    def __exporter(accumulator, strain):
+        # pylint: disable=[R0911]
+        if strain in trait_data["data"]:
+            if dtype == "val":
+                return accumulator + (trait_data["data"][strain]["value"], )
+            if dtype == "var":
+                return accumulator + (trait_data["data"][strain]["variance"], )
+            if dtype == "N":
+                return accumulator + (trait_data["data"][strain]["ndata"], )
+            if dtype == "all":
+                return accumulator + __export_all_types(trait_data["data"], strain)
+            raise KeyError("Type `%s` is incorrect" % dtype)
+        if var_exists and n_exists:
+            return accumulator + (None, None, None)
+        if var_exists or n_exists:
+            return accumulator + (None, None)
+        return accumulator + (None,)
+
+    return reduce(__exporter, strainlist, tuple())
+
+def trait_display_name(trait: Dict):
+    """
+    Given a trait, return a name to use to display the trait on a heatmap.
+
+    DESCRIPTION
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
+    """
+    if trait.get("db", None) and trait.get("trait_name", None):
+        if trait["db"]["dataset_type"] == "Temp":
+            desc = trait["description"]
+            if desc.find("PCA") >= 0:
+                return "%s::%s" % (
+                    trait["db"]["displayname"],
+                    desc[desc.rindex(':')+1:].strip())
+            return "%s::%s" % (
+                trait["db"]["displayname"],
+                desc[:desc.index('entered')].strip())
+        prefix = "%s::%s" % (
+            trait["db"]["dataset_name"], trait["trait_name"])
+        if trait["cellid"]:
+            return "%s::%s" % (prefix, trait["cellid"])
+        return prefix
+    return trait["description"]
+
+def cluster_traits(traits_data_list: Sequence[Dict]):
+    """
+    Clusters the trait values.
+
+    DESCRIPTION
+    Attempts to replicate the clustering of the traits, as done at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
+    """
+    def __compute_corr(tdata_i, tdata_j):
+        if tdata_i[0] == tdata_j[0]:
+            return 0.0
+        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+        corr = corr_vals[0]
+        if (1 - corr) < 0:
+            return 0.0
+        return 1 - corr
+
+    def __cluster(tdata_i):
+        return tuple(
+            __compute_corr(tdata_i, tdata_j)
+            for tdata_j in enumerate(traits_data_list))
+
+    return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
+
+def heatmap_data(traits_names, conn: Any):
+    """
+    heatmap function
+
+    DESCRIPTION
+    This function is an attempt to reproduce the initialisation at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
+    and also the clustering and slink computations at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
+    with the help of the `gn3.computations.heatmap.cluster_traits` function.
+
+    It does not try to actually draw the heatmap image.
+
+    PARAMETERS:
+    TODO: Elaborate on the parameters here...
+    """
+    threshold = 0 # webqtlConfig.PUBLICTHRESH
+    def __retrieve_traitlist_and_datalist(threshold, fullname):
+        trait = retrieve_trait_info(threshold, fullname, conn)
+        return (trait, retrieve_trait_data(trait, conn))
+
+    traits_details = [
+        __retrieve_traitlist_and_datalist(threshold, fullname)
+        for fullname in traits_names]
+    traits_list = tuple(x[0] for x in traits_details)
+    traits_data_list = [x[1] for x in traits_details]
+    genotype_filename = build_genotype_file(traits_list[0]["riset"])
+    strainlist = load_genotype_samples(genotype_filename)
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for td in traits_data_list)
+    slink_data = slink(cluster_traits(exported_traits_data_list))
+    ordering_data = compute_heatmap_order(slink_data)
+    strains_and_values = retrieve_strains_and_values(
+        ordering_data, strainlist, exported_traits_data_list)
+    strains_values = strains_and_values[0][1]
+    trait_values = [t[2] for t in strains_and_values]
+    traits_filename = generate_traits_filename()
+    generate_traits_file(strains_values, trait_values, traits_filename)
+
+    return {
+        "slink_data": slink_data,
+        "ordering_data": ordering_data,
+        "strainlist": strainlist,
+        "genotype_filename": genotype_filename,
+        "traits_list": traits_list,
+        "traits_data_list": traits_data_list,
+        "exported_traits_data_list": exported_traits_data_list,
+        "traits_filename": traits_filename
+    }
+
+def compute_traits_order(slink_data, neworder: tuple = tuple()):
+    """
+    Compute the order of the traits for clustering from `slink_data`.
+
+    This function tries to reproduce the creation and update of the `neworder`
+    variable in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+    """
+    def __order_maker(norder, slnk_dt):
+        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+            return norder + (slnk_dt[0], slnk_dt[1])
+
+        if isinstance(slnk_dt[0], int):
+            return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
+
+        if isinstance(slnk_dt[1], int):
+            return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
+
+        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+    return __order_maker(neworder, slink_data)
+
+def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+    """
+    Get the strains and their corresponding values from `strainlist` and
+    `traits_data_list`.
+
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+    """
+    # This feels nasty! There's a lot of mutation of values here, that might
+    # indicate something untoward in the design of this function and its
+    # dependents  ==>  Review
+    strains = []
+    values = []
+    rets = []
+    for order in orders:
+        temp_val = traits_data_list[order]
+        for i, strain in enumerate(strainlist):
+            if temp_val[i] is not None:
+                strains.append(strain)
+                values.append(temp_val[i])
+        rets.append([order, strains[:], values[:]])
+        strains = []
+        values = []
+
+    return rets
+
+def nearest_marker_finder(genotype):
+    """
+    Returns a function to be used with `genotype` to compute the nearest marker
+    to the trait passed to the returned function.
+
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L425-434
+    """
+    def __compute_distances(chromo, trait):
+        loci = chromo.get("loci", None)
+        if not loci:
+            return None
+        return tuple(
+            {
+                "name": locus["name"],
+                "distance": abs(locus["Mb"] - trait["mb"])
+            } for locus in loci)
+
+    def __finder(trait):
+        _chrs = tuple(
+            _chr for _chr in genotype["chromosomes"]
+            if str(_chr["name"]) == str(trait["chr"]))
+        if len(_chrs) == 0:
+            return None
+        distances = tuple(
+            distance for dists in
+            filter(
+                lambda x: x is not None,
+                (__compute_distances(_chr, trait) for _chr in _chrs))
+            for distance in dists)
+        nearest = min(distances, key=lambda d: d["distance"])
+        return nearest["name"]
+    return __finder
+
+def get_nearest_marker(traits_list, genotype):
+    """
+    Retrieves the nearest marker for each of the traits in the list.
+
+    DESCRIPTION:
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
+    """
+    if not genotype["Mbmap"]:
+        return [None] * len(trait_list)
+
+    marker_finder = nearest_marker_finder(genotype)
+    return [marker_finder(trait) for trait in traits_list]
+
+# # Grey + Blue + Red
+# def generate_heatmap():
+#     cols = 20
+#     y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
+#     x_axis = heatmap_x_axis_names()
+#     data = generate_random_data(height=cols, width=len(x_axis))
+#     fig = px.imshow(
+#         data,
+#         x=x_axis,
+#         y=y_axis,
+#         width=500)
+#     fig.update_traces(xtype="array")
+#     fig.update_traces(ytype="array")
+#     # fig.update_traces(xgap=10)
+#     fig.update_xaxes(
+#         visible=True,
+#         title_text="Traits",
+#         title_font_size=16)
+#     fig.update_layout(
+#         coloraxis_colorscale=[
+#             [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
+#             [0.5, '#F5DE11'], [1.0, '#FF0D00']])
+#     fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
+#     return fig
diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
deleted file mode 100644
index 88f546d..0000000
--- a/gn3/heatmaps/heatmaps.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import random
-import plotly.express as px
-
-#### Remove these ####
-
-heatmap_dir = "heatmap_images"
-
-def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30):
-    """
-    This is mostly a utility function to be used to generate random data, useful
-    for development of the heatmap generation code, without access to the actual
-    database data.
-    """
-    return [[random.uniform(0,data_stop) for i in range(0, width)]
-            for j in range(0, height)]
-
-def generate_random_data2(data_stop: float = 2, width: int = 10, height: int = 30):
-    """
-    This is mostly a utility function to be used to generate random data, useful
-    for development of the heatmap generation code, without access to the actual
-    database data.
-    """
-    return [
-        [{
-            "value": item,
-            "category": random.choice(["C57BL/6J +", "DBA/2J +"])}
-         for item in axis]
-        for axis in generate_random_data(data_stop, width, height)]
-
-def heatmap_x_axis_names():
-    return [
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM2260338",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM3140576",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM5670577",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM2070121",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM103990541",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM1190722",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM6590722",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
-#### END: Remove these ####
-
-# Grey + Blue + Red
-def generate_heatmap():
-    cols = 20
-    y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
-    x_axis = heatmap_x_axis_names()
-    data = generate_random_data(height=cols, width=len(x_axis))
-    fig = px.imshow(
-        data,
-        x=x_axis,
-        y=y_axis,
-        width=500)
-    fig.update_traces(xtype="array")
-    fig.update_traces(ytype="array")
-    # fig.update_traces(xgap=10)
-    fig.update_xaxes(
-        visible=True,
-        title_text="Traits",
-        title_font_size=16)
-    fig.update_layout(
-        coloraxis_colorscale=[
-            [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
-            [0.5, '#F5DE11'], [1.0, '#FF0D00']])
-    fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
-    return fig
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
deleted file mode 100644
index 156af45..0000000
--- a/tests/unit/computations/test_heatmap.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""Module contains tests for gn3.computations.heatmap"""
-from unittest import TestCase
-from gn3.computations.heatmap import (
-    cluster_traits,
-    export_trait_data,
-    compute_traits_order,
-    retrieve_strains_and_values)
-
-strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {
-    "mysqlid": 36688172,
-    "data": {
-        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
-
-slinked = (
-    (((0, 2, 0.16381088984330505),
-      ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
-      0.20337048635536847),
-     9,
-     0.23451785425383564),
-    ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
-     4, 0.8968250491499363),
-    0.9313185954797953)
-
-class TestHeatmap(TestCase):
-    """Class for testing heatmap computation functions"""
-
-    def test_export_trait_data_dtype(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument
-        """
-        for dtype, expected in [
-                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", (None, None, None, None, None, None)],
-                ["N", (None, None, None, None, None, None)],
-                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
-            with self.subTest(dtype=dtype):
-                self.assertEqual(
-                    export_trait_data(trait_data, strainlist, dtype=dtype),
-                    expected)
-
-    def test_export_trait_data_dtype_all_flags(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument and the different flags set up
-        """
-        for dtype, vflag, nflag, expected in [
-                ["val", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", False, False, (None, None, None, None, None, None)],
-                ["var", False, True, (None, None, None, None, None, None)],
-                ["var", True, False, (None, None, None, None, None, None)],
-                ["var", True, True, (None, None, None, None, None, None)],
-                ["N", False, False, (None, None, None, None, None, None)],
-                ["N", False, True, (None, None, None, None, None, None)],
-                ["N", True, False, (None, None, None, None, None, None)],
-                ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, False,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, True,
-                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
-                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
-        ]:
-            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
-                self.assertEqual(
-                    export_trait_data(
-                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
-                        n_exists=nflag),
-                    expected)
-
-    def test_cluster_traits(self):
-        """
-        Test that the clustering is working as expected.
-        """
-        traits_data_list = [
-            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
-            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
-            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
-            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
-            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
-            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
-            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
-            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
-            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
-            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
-        self.assertEqual(
-            cluster_traits(traits_data_list),
-            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
-              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
-              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
-             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
-              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
-              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
-             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
-              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
-              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
-             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
-              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
-              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
-             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
-              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
-              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
-             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
-              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
-              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
-             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
-              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
-              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
-             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
-              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
-              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
-             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
-              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
-              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
-             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
-              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
-              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
-              0.0)))
-
-    def test_compute_heatmap_order(self):
-        """Test the orders."""
-        self.assertEqual(
-            compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
-
-    def test_retrieve_strains_and_values(self):
-        """Test retrieval of strains and values."""
-        for orders, slist, tdata, expected in [
-                [
-                    [2],
-                    ["s1", "s2", "s3", "s4"],
-                    [[2, 9, 6, None, 4],
-                     [7, 5, None, None, 4],
-                     [9, None, 5, 4, 7],
-                     [6, None, None, 4, None]],
-                    [[2, ["s1", "s3", "s4"], [9, 5, 4]]]
-                ],
-                [
-                    [3],
-                    ["s1", "s2", "s3", "s4", "s5"],
-                    [[2, 9, 6, None, 4],
-                     [7, 5, None, None, 4],
-                     [9, None, 5, 4, 7],
-                     [6, None, None, 4, None]],
-                    [[3, ["s1", "s4"], [6, 4]]]
-                ]]:
-            with self.subTest(strainlist=slist, traitdata=tdata):
-                self.assertEqual(
-                    retrieve_strains_and_values(orders, slist, tdata), expected)
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
new file mode 100644
index 0000000..265d5a8
--- /dev/null
+++ b/tests/unit/test_heatmaps.py
@@ -0,0 +1,187 @@
+"""Module contains tests for gn3.heatmaps.heatmaps"""
+from unittest import TestCase
+from gn3.heatmaps import (
+    cluster_traits,
+    export_trait_data,
+    compute_traits_order,
+    retrieve_strains_and_values)
+
+strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
+slinked = (
+    (((0, 2, 0.16381088984330505),
+      ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
+      0.20337048635536847),
+     9,
+     0.23451785425383564),
+    ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
+     4, 0.8968250491499363),
+    0.9313185954797953)
+
+class TestHeatmap(TestCase):
+    """Class for testing heatmap computation functions"""
+
+    def test_export_trait_data_dtype(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument
+        """
+        for dtype, expected in [
+                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", (None, None, None, None, None, None)],
+                ["N", (None, None, None, None, None, None)],
+                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+            with self.subTest(dtype=dtype):
+                self.assertEqual(
+                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    expected)
+
+    def test_export_trait_data_dtype_all_flags(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument and the different flags set up
+        """
+        for dtype, vflag, nflag, expected in [
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", False, False, (None, None, None, None, None, None)],
+                ["var", False, True, (None, None, None, None, None, None)],
+                ["var", True, False, (None, None, None, None, None, None)],
+                ["var", True, True, (None, None, None, None, None, None)],
+                ["N", False, False, (None, None, None, None, None, None)],
+                ["N", False, True, (None, None, None, None, None, None)],
+                ["N", True, False, (None, None, None, None, None, None)],
+                ["N", True, True, (None, None, None, None, None, None)],
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+        ]:
+            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+                self.assertEqual(
+                    export_trait_data(
+                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        n_exists=nflag),
+                    expected)
+
+    def test_cluster_traits(self):
+        """
+        Test that the clustering is working as expected.
+        """
+        traits_data_list = [
+            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
+            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
+            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
+            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
+            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
+            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
+            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
+            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
+            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
+        self.assertEqual(
+            cluster_traits(traits_data_list),
+            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
+              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
+              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
+             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
+              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
+              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
+             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
+              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
+              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
+             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
+              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
+              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
+             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
+              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
+              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
+             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
+              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
+              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
+             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
+              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
+              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
+             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
+              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
+              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
+             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
+              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
+              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
+             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
+              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
+              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
+              0.0)))
+
+    def test_compute_heatmap_order(self):
+        """Test the orders."""
+        self.assertEqual(
+            compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
+
+    def test_retrieve_strains_and_values(self):
+        """Test retrieval of strains and values."""
+        for orders, slist, tdata, expected in [
+                [
+                    [2],
+                    ["s1", "s2", "s3", "s4"],
+                    [[2, 9, 6, None, 4],
+                     [7, 5, None, None, 4],
+                     [9, None, 5, 4, 7],
+                     [6, None, None, 4, None]],
+                    [[2, ["s1", "s3", "s4"], [9, 5, 4]]]
+                ],
+                [
+                    [3],
+                    ["s1", "s2", "s3", "s4", "s5"],
+                    [[2, 9, 6, None, 4],
+                     [7, 5, None, None, 4],
+                     [9, None, 5, 4, 7],
+                     [6, None, None, 4, None]],
+                    [[3, ["s1", "s4"], [6, 4]]]
+                ]]:
+            with self.subTest(strainlist=slist, traitdata=tdata):
+                self.assertEqual(
+                    retrieve_strains_and_values(orders, slist, tdata), expected)
-- 
cgit 1.4.1


From 1e2357049adc72808fbf8eaac3da9411d3c78c66 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 17 Sep 2021 11:20:16 +0300
Subject: Fix a number of linting issues

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
---
 gn3/computations/qtlreaper.py             |  7 ++--
 gn3/db/genotypes.py                       |  2 +-
 gn3/heatmaps.py                           | 54 ++++++++++++-------------------
 tests/unit/computations/test_qtlreaper.py |  3 +-
 tests/unit/test_heatmaps.py               |  6 ++--
 5 files changed, 32 insertions(+), 40 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5180853..377db9b 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -110,9 +110,10 @@ def organise_reaper_main_results(parsed_results):
         unique_chromosomes = {item["Chr"] for item in id_items}
         return {
             "ID": identifier,
-            "chromosomes": {_chr["Chr"]: _chr for _chr in [
-                __organise_by_chromosome(chromo, id_items)
-                for chromo in sorted(
+            "chromosomes": {
+                _chr["Chr"]: _chr for _chr in [
+                    __organise_by_chromosome(chromo, id_items)
+                    for chromo in sorted(
                         unique_chromosomes, key=chromosome_sorter_key_fn)]}}
 
     unique_ids = {res["ID"] for res in parsed_results}
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index b03d55c..9d052d9 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -174,7 +174,7 @@ def parse_genotype_file(filename: str, parlist: tuple = tuple()):
     geno_obj = dict(labels + header)
     markers = tuple(
         [parse_genotype_marker(line, geno_obj, parlist)
-        for line in data_lines[1:]])
+         for line in data_lines[1:]])
     chromosomes = tuple(
         dict(chromosome) for chromosome in
         build_genotype_chromosomes(geno_obj, markers))
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 2859dde..c4fc67d 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -3,13 +3,13 @@ This module will contain functions to be used in computation of the data used to
 generate various kinds of heatmaps.
 """
 
+from typing import Any, Dict, Sequence
 import numpy as np
 from functools import reduce
 from gn3.settings import TMPDIR
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
 from gn3.random import random_string
-from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
 from plotly.subplots import make_subplots
 from gn3.computations.correlations2 import compute_correlation
@@ -165,7 +165,7 @@ def build_heatmap(traits_names, conn: Any):
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    genotype = parse_genotype_file(genotype_filename)
+    # genotype = parse_genotype_file(genotype_filename)
     strains = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
@@ -183,22 +183,21 @@ def build_heatmap(traits_names, conn: Any):
         [t[2] for t in strains_and_values],
         traits_filename)
 
-    main_output, permutations_output = run_reaper(
+    main_output, _permutations_output = run_reaper(
         genotype_filename, traits_filename, separate_nperm_output=True)
 
     qtlresults = parse_reaper_main_results(main_output)
-    permudata = parse_reaper_permutation_results(permutations_output)
+    # permudata = parse_reaper_permutation_results(permutations_output)
     organised = organise_reaper_main_results(qtlresults)
 
     traits_ids = [# sort numerically, but retain the ids as strings
         str(i) for i in sorted({int(row["ID"]) for row in qtlresults})]
     chromosome_names = sorted(
-        {row["Chr"] for row in qtlresults}, key = chromosome_sorter_key_fn)
-    loci_names = sorted({row["Locus"] for row in qtlresults})
-    ordered_traits_names = {
-        res_id: trait for res_id, trait in
+        {row["Chr"] for row in qtlresults}, key=chromosome_sorter_key_fn)
+    # loci_names = sorted({row["Locus"] for row in qtlresults})
+    ordered_traits_names = dict(
         zip(traits_ids,
-            [traits[idx]["trait_fullname"] for idx in traits_order])}
+            [traits[idx]["trait_fullname"] for idx in traits_order]))
 
     return generate_clustered_heatmap(
         process_traits_data_for_heatmap(
@@ -207,22 +206,11 @@ def build_heatmap(traits_names, conn: Any):
         "single_heatmap_{}".format(random_string(10)),
         y_axis=tuple(
             ordered_traits_names[traits_ids[order]]
-                for order in traits_order),
+            for order in traits_order),
         y_label="Traits",
-        x_axis=[chromo for chromo in chromosome_names],
+        x_axis=chromosome_names,
         x_label="Chromosomes")
 
-    return {
-        "slink_data": slink_data,
-        "ordering_data": ordering_data,
-        "strainlist": strainlist,
-        "genotype_filename": genotype_filename,
-        "traits_list": traits_list,
-        "traits_data_list": traits_data_list,
-        "exported_traits_data_list": exported_traits_data_list,
-        "traits_filename": traits_filename
-    }
-
 def compute_traits_order(slink_data, neworder: tuple = tuple()):
     """
     Compute the order of the traits for clustering from `slink_data`.
@@ -314,7 +302,7 @@ def get_nearest_marker(traits_list, genotype):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
     """
     if not genotype["Mbmap"]:
-        return [None] * len(trait_list)
+        return [None] * len(traits_list)
 
     marker_finder = nearest_marker_finder(genotype)
     return [marker_finder(trait) for trait in traits_list]
@@ -340,10 +328,10 @@ def process_traits_data_for_heatmap(data, trait_names, chromosome_names):
     return hdata
 
 def generate_clustered_heatmap(
-        data, clustering_data, image_filename_prefix, x_axis = None,
-        x_label: str = "", y_axis = None, y_label: str = "",
+        data, clustering_data, image_filename_prefix, x_axis=None,
+        x_label: str = "", y_axis=None, y_label: str = "",
         output_dir: str = TMPDIR,
-        colorscale = (
+        colorscale=(
             (0.0, '#5D5D5D'), (0.4999999999999999, '#ABABAB'),
             (0.5, '#F5DE11'), (1.0, '#FF0D00'))):
     """
@@ -357,15 +345,15 @@ def generate_clustered_heatmap(
         shared_yaxes="rows",
         horizontal_spacing=0.001,
         subplot_titles=["distance"] + x_axis,
-        figure = ff.create_dendrogram(
+        figure=ff.create_dendrogram(
             np.array(clustering_data), orientation="right", labels=y_axis))
     hms = [go.Heatmap(
         name=chromo,
-        y = y_axis,
-        z = data_array,
+        y=y_axis,
+        z=data_array,
         showscale=False) for chromo, data_array in zip(x_axis, data)]
-    for i, hm in enumerate(hms):
-        fig.add_trace(hm, row=1, col=(i + 2))
+    for i, heatmap in enumerate(hms):
+        fig.add_trace(heatmap, row=1, col=(i + 2))
 
     fig.update_layout(
         {
@@ -380,8 +368,8 @@ def generate_clustered_heatmap(
     x_axes_layouts = {
         "xaxis{}".format(i+1 if i > 0 else ""): {
             "mirror": False,
-            "showticklabels": True if i==0 else False,
-            "ticks": "outside" if i==0 else ""
+            "showticklabels": True if i == 0 else False,
+            "ticks": "outside" if i == 0 else ""
         }
         for i in range(num_cols)}
 
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 1d67827..d420470 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -77,6 +77,7 @@ class TestQTLReaper(TestCase):
              5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
 
     def test_organise_reaper_main_results(self):
+        """Check that results are organised correctly."""
         self.assertEqual(
             organise_reaper_main_results([
                 {
@@ -135,7 +136,7 @@ class TestQTLReaper(TestCase):
                         1: {"Chr": 1,
                             "loci": [
                                 {
-                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                 },
                                 {
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index f3a81c5..c0a496b 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -189,6 +189,7 @@ class TestHeatmap(TestCase):
                     retrieve_strains_and_values(orders, slist, tdata), expected)
 
     def test_get_lrs_from_chr(self):
+        """Check that function gets correct LRS values"""
         for trait, chromosome, expected in [
                 [{"chromosomes": {}}, 3, [None]],
                 [{"chromosomes": {3: {"loci": [
@@ -202,6 +203,7 @@ class TestHeatmap(TestCase):
                 self.assertEqual(get_lrs_from_chr(trait, chromosome), expected)
 
     def test_process_traits_data_for_heatmap(self):
+        """Check for correct processing of data for heatmap generation."""
         self.assertEqual(
             process_traits_data_for_heatmap(
                 {"1": {
@@ -210,7 +212,7 @@ class TestHeatmap(TestCase):
                         1: {"Chr": 1,
                             "loci": [
                                 {
-                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                 },
                                 {
@@ -257,7 +259,7 @@ class TestHeatmap(TestCase):
                          1: {"Chr": 1,
                              "loci": [
                                  {
-                                     "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                     "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                      "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                  },
                                  {
-- 
cgit 1.4.1


From cd7f301688fd9780df1f842f8bd2b7602775ba1f Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 07:53:53 +0300
Subject: Fix pylint errors

* Add missing function and module docstrings
* Remove unused imports
* Fix import order
* Rework some code sections to fix issues
* Disable some pylint errors.
---
 gn3/api/heatmaps.py           |  8 ++++++++
 gn3/app.py                    |  5 +++--
 gn3/computations/qtlreaper.py |  8 ++++++++
 gn3/db/genotypes.py           |  1 +
 gn3/db/traits.py              |  2 +-
 gn3/heatmaps.py               | 28 ++++++++++++++++------------
 6 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py
index 1022a35..fe47aee 100644
--- a/gn3/api/heatmaps.py
+++ b/gn3/api/heatmaps.py
@@ -1,3 +1,7 @@
+"""
+Module to hold the entrypoint functions that generate heatmaps
+"""
+
 import io
 from flask import jsonify
 from flask import request
@@ -9,6 +13,10 @@ heatmaps = Blueprint("heatmaps", __name__)
 
 @heatmaps.route("/clustered", methods=("POST",))
 def clustered_heatmaps():
+    """
+    Parses the incoming data and responds with the JSON-serialized plotly figure
+    representing the clustered heatmap.
+    """
     heatmap_request = request.get_json()
     traits_names = heatmap_request.get("traits_names", tuple())
     if len(traits_names) < 2:
diff --git a/gn3/app.py b/gn3/app.py
index 6b4c57e..8badb65 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -3,7 +3,10 @@ import os
 
 from typing import Dict
 from typing import Union
+
 from flask import Flask
+from flask_cors import CORS
+
 from gn3.api.gemma import gemma
 from gn3.api.rqtl import rqtl
 from gn3.api.general import general
@@ -11,8 +14,6 @@ from gn3.api.heatmaps import heatmaps
 from gn3.api.correlation import correlation
 from gn3.api.data_entry import data_entry
 
-from flask_cors import CORS
-
 def create_app(config: Union[Dict, str, None] = None) -> Flask:
     """Create a new flask object"""
     app = Flask(__name__)
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 377db9b..5d17fed 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -87,11 +87,17 @@ def run_reaper(
     return (output_filename, permu_output_filename)
 
 def chromosome_sorter_key_fn(val):
+    """
+    Useful for sorting the chromosomes
+    """
     if isinstance(val, int):
         return val
     return ord(val)
 
 def organise_reaper_main_results(parsed_results):
+    """
+    Provide the results of running reaper in a format that is easier to use.
+    """
     def __organise_by_chromosome(chr_name, items):
         chr_items = [item for item in items if item["Chr"] == chr_name]
         return {
@@ -129,12 +135,14 @@ def parse_reaper_main_results(results_file):
         lines = infile.readlines()
 
     def __parse_column_float_value(value):
+        # pylint: disable=W0702
         try:
             return float(value)
         except:
             return value
 
     def __parse_column_int_value(value):
+        # pylint: disable=W0702
         try:
             return int(value)
         except:
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9d052d9..919c539 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -115,6 +115,7 @@ def parse_genotype_marker(line: str, geno_obj: dict, parlist: list):
     Reworks
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L143-L190
     """
+    # pylint: disable=W0702
     marker_row = [item.strip() for item in line.split("\t")]
     geno_table = {
         geno_obj["mat"]: -1, geno_obj["pat"]: 1, geno_obj["het"]: 0,
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index bfe887e..747ed27 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -46,7 +46,7 @@ def update_sample_data(conn: Any,
                        count: Union[int, str]):
     """Given the right parameters, update sample-data from the relevant
     table."""
-    # pylint: disable=[R0913, R0914]
+    # pylint: disable=[R0913, R0914, C0103]
     STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
     PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
                              "WHERE StrainId = %s AND Id = %s")
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index cd93b3f..9d82fb2 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -3,29 +3,28 @@ This module will contain functions to be used in computation of the data used to
 generate various kinds of heatmaps.
 """
 
+from functools import reduce
 from typing import Any, Dict, Sequence
+
 import numpy as np
-from functools import reduce
-from gn3.settings import TMPDIR
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
+from plotly.subplots import make_subplots
+
+from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.computations.slink import slink
-from plotly.subplots import make_subplots
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import (
-    build_genotype_file, load_genotype_samples, parse_genotype_file)
+    build_genotype_file, load_genotype_samples)
 from gn3.db.traits import (
-    retrieve_trait_data,
-    retrieve_trait_info,
-    generate_traits_filename)
+    retrieve_trait_data, retrieve_trait_info)
 from gn3.computations.qtlreaper import (
     run_reaper,
     generate_traits_file,
     chromosome_sorter_key_fn,
     parse_reaper_main_results,
-    organise_reaper_main_results,
-    parse_reaper_permutation_results)
+    organise_reaper_main_results)
 
 def export_trait_data(
         trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
@@ -159,13 +158,13 @@ def build_heatmap(traits_names, conn: Any):
     PARAMETERS:
     TODO: Elaborate on the parameters here...
     """
+    # pylint: disable=[R0914]
     threshold = 0 # webqtlConfig.PUBLICTHRESH
     traits = [
         retrieve_trait_info(threshold, fullname, conn)
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    # genotype = parse_genotype_file(genotype_filename)
     strains = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
@@ -336,6 +335,7 @@ def generate_clustered_heatmap(
     Generate a dendrogram, and heatmaps for each chromosome, and put them all
     into one plot.
     """
+    # pylint: disable=[R0913, R0914]
     num_cols = 1 + len(x_axis)
     fig = make_subplots(
         rows=1,
@@ -359,14 +359,18 @@ def generate_clustered_heatmap(
             "height": 800,
             "xaxis": {
                 "mirror": False,
-                "showgrid": True
+                "showgrid": True,
+                "title": x_label
+            },
+            "yaxis": {
+                "title": y_label
             }
         })
 
     x_axes_layouts = {
         "xaxis{}".format(i+1 if i > 0 else ""): {
             "mirror": False,
-            "showticklabels": True if i == 0 else False,
+            "showticklabels": i == 0,
             "ticks": "outside" if i == 0 else ""
         }
         for i in range(num_cols)}
-- 
cgit 1.4.1


From 71cc35e5178904b512b9007e33be17a36f6656f2 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 08:36:11 +0300
Subject: Fix typing issues

* Ignore some errors
* Update typing definitions for some portions of code
* Add missing imports
---
 gn3/app.py                    |  2 +-
 gn3/computations/qtlreaper.py |  6 ++++--
 gn3/db/genotypes.py           | 10 ++++++----
 gn3/db/traits.py              |  8 ++++----
 gn3/heatmaps.py               |  8 +++-----
 5 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/app.py b/gn3/app.py
index 8badb65..5e852e1 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -5,7 +5,7 @@ from typing import Dict
 from typing import Union
 
 from flask import Flask
-from flask_cors import CORS
+from flask_cors import CORS # type: ignore
 
 from gn3.api.gemma import gemma
 from gn3.api.rqtl import rqtl
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5d17fed..5ddea76 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -4,6 +4,8 @@ computation of QTLs.
 """
 import os
 import subprocess
+from typing import Union
+
 from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
@@ -70,9 +72,9 @@ def run_reaper(
         output_dir, random_string(10))
     output_list = ["--main_output", output_filename]
     if separate_nperm_output:
-        permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+        permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format(
             output_dir, random_string(10))
-        output_list = output_list + ["--permu_output", permu_output_filename]
+        output_list = output_list + ["--permu_output", permu_output_filename] # type: ignore[list-item]
     else:
         permu_output_filename = None
 
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 919c539..9ea9f20 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -2,6 +2,8 @@
 
 import os
 import gzip
+from typing import Union, TextIO
+
 from gn3.settings import GENOTYPE_FILES
 
 def build_genotype_file(
@@ -44,17 +46,17 @@ def __load_genotype_samples_from_geno(genotype_filename: str):
     """
     gzipped_filename = "{}.gz".format(genotype_filename)
     if os.path.isfile(gzipped_filename):
-        genofile = gzip.open(gzipped_filename)
+        genofile: Union[TextIO, gzip.GzipFile] = gzip.open(gzipped_filename)
     else:
         genofile = open(genotype_filename)
 
     for row in genofile:
         line = row.strip()
-        if (not line) or (line.startswith(("#", "@"))):
+        if (not line) or (line.startswith(("#", "@"))): # type: ignore[arg-type]
             continue
         break
 
-    headers = line.split("\t")
+    headers = line.split("\t" ) # type: ignore[arg-type]
     if headers[3] == "Mb":
         return headers[4:]
     return headers[3:]
@@ -107,7 +109,7 @@ def parse_genotype_header(line: str, parlist: tuple = tuple()):
         ("prgy", prgy),
         ("nprgy", len(prgy)))
 
-def parse_genotype_marker(line: str, geno_obj: dict, parlist: list):
+def parse_genotype_marker(line: str, geno_obj: dict, parlist: tuple):
     """
     Parse a data line in a genotype file
 
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 747ed27..4fc47c3 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -63,22 +63,22 @@ def update_sample_data(conn: Any,
     with conn.cursor() as cursor:
         # Update the Strains table
         cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id))
-        updated_strains: int = cursor.rowcount
+        updated_strains = cursor.rowcount
         # Update the PublishData table
         cursor.execute(PUBLISH_DATA_SQL,
                        (None if value == "x" else value,
                         strain_id, publish_data_id))
-        updated_published_data: int = cursor.rowcount
+        updated_published_data = cursor.rowcount
         # Update the PublishSE table
         cursor.execute(PUBLISH_SE_SQL,
                        (None if error == "x" else error,
                         strain_id, publish_data_id))
-        updated_se_data: int = cursor.rowcount
+        updated_se_data = cursor.rowcount
         # Update the NStrain table
         cursor.execute(N_STRAIN_SQL,
                        (None if count == "x" else count,
                         strain_id, publish_data_id))
-        updated_n_strains: int = cursor.rowcount
+        updated_n_strains = cursor.rowcount
     return (updated_strains, updated_published_data,
             updated_se_data, updated_n_strains)
 
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 9d82fb2..45d0c22 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -7,9 +7,9 @@ from functools import reduce
 from typing import Any, Dict, Sequence
 
 import numpy as np
-import plotly.graph_objects as go
-import plotly.figure_factory as ff
-from plotly.subplots import make_subplots
+import plotly.graph_objects as go # type: ignore
+import plotly.figure_factory as ff # type: ignore
+from plotly.subplots import make_subplots # type: ignore
 
 from gn3.settings import TMPDIR
 from gn3.random import random_string
@@ -171,8 +171,6 @@ def build_heatmap(traits_names, conn: Any):
     clustered = cluster_traits(exported_traits_data_list)
     slinked = slink(clustered)
     traits_order = compute_traits_order(slinked)
-    ordered_traits_names = [
-        traits[idx]["trait_fullname"] for idx in traits_order]
     strains_and_values = retrieve_strains_and_values(
         traits_order, strains, exported_traits_data_list)
     traits_filename = "{}/traits_test_file_{}.txt".format(
-- 
cgit 1.4.1


From 56c73324c285d896567268370f3955bbd15754b0 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 09:02:46 +0300
Subject: Fix more pylint errors

---
 gn3/computations/qtlreaper.py | 3 ++-
 gn3/db/genotypes.py           | 2 +-
 tests/unit/db/test_traits.py  | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5ddea76..8b2893e 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -74,7 +74,8 @@ def run_reaper(
     if separate_nperm_output:
         permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format(
             output_dir, random_string(10))
-        output_list = output_list + ["--permu_output", permu_output_filename] # type: ignore[list-item]
+        output_list = output_list + [
+            "--permu_output", permu_output_filename] # type: ignore[list-item]
     else:
         permu_output_filename = None
 
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9ea9f20..9987320 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -56,7 +56,7 @@ def __load_genotype_samples_from_geno(genotype_filename: str):
             continue
         break
 
-    headers = line.split("\t" ) # type: ignore[arg-type]
+    headers = line.split("\t") # type: ignore[arg-type]
     if headers[3] == "Mb":
         return headers[4:]
     return headers[3:]
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index ee98893..baa2af3 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -166,6 +166,7 @@ class TestTraitsDBFunctions(TestCase):
         the right calls.
 
         """
+        # pylint: disable=C0103
         db_mock = mock.MagicMock()
 
         STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
-- 
cgit 1.4.1


From 19783a18c2bc7941fc5980e593f19fb1d18c3623 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 27 Sep 2021 04:48:53 +0300
Subject: Update terminology: `strain` to `sample`

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Update the terminology used: use `sample` in place of `strain` according to
  Zachary's direction at
  https://github.com/genenetwork/genenetwork3/pull/37#issuecomment-926043306
---
 gn3/computations/parsers.py             | 10 ++---
 gn3/computations/qtlreaper.py           |  8 ++--
 gn3/db/genotypes.py                     |  8 ++--
 gn3/db/traits.py                        | 44 ++++++++++-----------
 gn3/heatmaps.py                         | 62 ++++++++++++++---------------
 tests/unit/computations/test_parsers.py |  4 +-
 tests/unit/test_heatmaps.py             | 70 ++++++++++++++++-----------------
 7 files changed, 103 insertions(+), 103 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
index 94387ff..1af35d6 100644
--- a/gn3/computations/parsers.py
+++ b/gn3/computations/parsers.py
@@ -14,7 +14,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
         'h': 0,
         'u': None,
     }
-    genotypes, strains = [], []
+    genotypes, samples = [], []
     with open(file_path, "r") as _genofile:
         for line in _genofile:
             line = line.strip()
@@ -22,8 +22,8 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
                 continue
             cells = line.split()
             if line.startswith("Chr"):
-                strains = cells[4:]
-                strains = [strain.lower() for strain in strains]
+                samples = cells[4:]
+                samples = [sample.lower() for sample in samples]
                 continue
             values = [__map.get(value.lower(), None) for value in cells[4:]]
             genotype = {
@@ -32,7 +32,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
                 "cm": cells[2],
                 "mb": cells[3],
                 "values":  values,
-                "dicvalues": dict(zip(strains, values)),
+                "dicvalues": dict(zip(samples, values)),
             }
             genotypes.append(genotype)
-        return strains, genotypes
+        return samples, genotypes
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 8b2893e..166d2dd 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -9,17 +9,17 @@ from typing import Union
 from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
-def generate_traits_file(strains, trait_values, traits_filename):
+def generate_traits_file(samples, trait_values, traits_filename):
     """
     Generate a traits file for use with `qtlreaper`.
 
     PARAMETERS:
-    strains: A list of strains to use as the headers for the various columns.
-    trait_values: A list of lists of values for each trait and strain.
+    samples: A list of samples to use as the headers for the various columns.
+    trait_values: A list of lists of values for each trait and sample.
     traits_filename: The tab-separated value to put the values in for
         computation of QTLs.
     """
-    header = "Trait\t{}\n".format("\t".join(strains))
+    header = "Trait\t{}\n".format("\t".join(samples))
     data = (
         [header] +
         ["{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9987320..8f18cac 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -14,16 +14,16 @@ def build_genotype_file(
 
 def load_genotype_samples(genotype_filename: str, file_type: str = "geno"):
     """
-    Load sample of strains from genotype files.
+    Load sample of samples from genotype files.
 
     DESCRIPTION:
-    Traits can contain a varied number of strains, some of which do not exist in
+    Traits can contain a varied number of samples, some of which do not exist in
     certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure
-    to pick only those strains that exist in the genotype under consideration
+    to pick only those samples that exist in the genotype under consideration
     for the traits used in the computation.
 
     This function loads a list of samples from the genotype files for use in
-    filtering out unusable strains.
+    filtering out unusable samples.
 
 
     PARAMETERS:
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 4fc47c3..c9d05d7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -445,7 +445,7 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any):
             query,
             {"trait_name": trait_info["trait_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -484,7 +484,7 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any):
              "species_id": retrieve_species_id(
                  trait_info["db"]["riset"], conn)})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -515,7 +515,7 @@ def retrieve_publish_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -548,7 +548,7 @@ def retrieve_cellid_trait_data(trait_info: Dict, conn: Any):
              "trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -577,29 +577,29 @@ def retrieve_probeset_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_name": trait_info["db"]["dataset_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
-def with_strainlist_data_setup(strainlist: Sequence[str]):
+def with_samplelist_data_setup(samplelist: Sequence[str]):
     """
-    Build function that computes the trait data from provided list of strains.
+    Build function that computes the trait data from provided list of samples.
 
     PARAMETERS
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values, only if the strain is present
-      in the provided `strainlist` variable.
+      sample's value, variance and ndata values, only if the sample is present
+      in the provided `samplelist` variable.
     """
     def setup_fn(tdata):
-        if tdata["strain_name"] in strainlist:
+        if tdata["sample_name"] in samplelist:
             val = tdata["value"]
             if val is not None:
                 return {
-                    "strain_name": tdata["strain_name"],
+                    "sample_name": tdata["sample_name"],
                     "value": val,
                     "variance": tdata["se_error"],
                     "ndata": tdata.get("nstrain", None)
@@ -607,19 +607,19 @@ def with_strainlist_data_setup(strainlist: Sequence[str]):
         return None
     return setup_fn
 
-def without_strainlist_data_setup():
+def without_samplelist_data_setup():
     """
     Build function that computes the trait data.
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values.
+      sample's value, variance and ndata values.
     """
     def setup_fn(tdata):
         val = tdata["value"]
         if val is not None:
             return {
-                "strain_name": tdata["strain_name"],
+                "sample_name": tdata["sample_name"],
                 "value": val,
                 "variance": tdata["se_error"],
                 "ndata": tdata.get("nstrain", None)
@@ -627,7 +627,7 @@ def without_strainlist_data_setup():
         return None
     return setup_fn
 
-def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tuple()):
+def retrieve_trait_data(trait: dict, conn: Any, samplelist: Sequence[str] = tuple()):
     """
     Retrieve trait data
 
@@ -650,23 +650,23 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     if results:
         # do something with mysqlid
         mysqlid = results[0]["id"]
-        if strainlist:
+        if samplelist:
             data = [
                 item for item in
-                map(with_strainlist_data_setup(strainlist), results)
+                map(with_samplelist_data_setup(samplelist), results)
                 if item is not None]
         else:
             data = [
                 item for item in
-                map(without_strainlist_data_setup(), results)
+                map(without_samplelist_data_setup(), results)
                 if item is not None]
 
         return {
             "mysqlid": mysqlid,
             "data": dict(map(
                 lambda x: (
-                    x["strain_name"],
-                    {k:v for k, v in x.items() if x != "strain_name"}),
+                    x["sample_name"],
+                    {k:v for k, v in x.items() if x != "sample_name"}),
                 data))}
     return {}
 
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 45d0c22..b6fc6d3 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -27,10 +27,10 @@ from gn3.computations.qtlreaper import (
     organise_reaper_main_results)
 
 def export_trait_data(
-        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
         var_exists: bool = False, n_exists: bool = False):
     """
-    Export data according to `strainlist`. Mostly used in calculating
+    Export data according to `samplelist`. Mostly used in calculating
     correlations.
 
     DESCRIPTION:
@@ -40,8 +40,8 @@ def export_trait_data(
     PARAMETERS
     trait: (dict)
       The dictionary of key-value pairs representing a trait
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
     dtype: (str)
       ... verify what this is ...
     var_exists: (bool)
@@ -49,18 +49,18 @@ def export_trait_data(
     n_exists: (bool)
       A flag indicating existence of ndata
     """
-    def __export_all_types(tdata, strain):
+    def __export_all_types(tdata, sample):
         sample_data = []
-        if tdata[strain]["value"]:
-            sample_data.append(tdata[strain]["value"])
+        if tdata[sample]["value"]:
+            sample_data.append(tdata[sample]["value"])
             if var_exists:
-                if tdata[strain]["variance"]:
-                    sample_data.append(tdata[strain]["variance"])
+                if tdata[sample]["variance"]:
+                    sample_data.append(tdata[sample]["variance"])
                 else:
                     sample_data.append(None)
             if n_exists:
-                if tdata[strain]["ndata"]:
-                    sample_data.append(tdata[strain]["ndata"])
+                if tdata[sample]["ndata"]:
+                    sample_data.append(tdata[sample]["ndata"])
                 else:
                     sample_data.append(None)
         else:
@@ -73,17 +73,17 @@ def export_trait_data(
 
         return tuple(sample_data)
 
-    def __exporter(accumulator, strain):
+    def __exporter(accumulator, sample):
         # pylint: disable=[R0911]
-        if strain in trait_data["data"]:
+        if sample in trait_data["data"]:
             if dtype == "val":
-                return accumulator + (trait_data["data"][strain]["value"], )
+                return accumulator + (trait_data["data"][sample]["value"], )
             if dtype == "var":
-                return accumulator + (trait_data["data"][strain]["variance"], )
+                return accumulator + (trait_data["data"][sample]["variance"], )
             if dtype == "N":
-                return accumulator + (trait_data["data"][strain]["ndata"], )
+                return accumulator + (trait_data["data"][sample]["ndata"], )
             if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], strain)
+                return accumulator + __export_all_types(trait_data["data"], sample)
             raise KeyError("Type `%s` is incorrect" % dtype)
         if var_exists and n_exists:
             return accumulator + (None, None, None)
@@ -91,7 +91,7 @@ def export_trait_data(
             return accumulator + (None, None)
         return accumulator + (None,)
 
-    return reduce(__exporter, strainlist, tuple())
+    return reduce(__exporter, samplelist, tuple())
 
 def trait_display_name(trait: Dict):
     """
@@ -165,19 +165,19 @@ def build_heatmap(traits_names, conn: Any):
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    strains = load_genotype_samples(genotype_filename)
+    samples = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
-        export_trait_data(td, strains) for td in traits_data_list]
+        export_trait_data(td, samples) for td in traits_data_list]
     clustered = cluster_traits(exported_traits_data_list)
     slinked = slink(clustered)
     traits_order = compute_traits_order(slinked)
-    strains_and_values = retrieve_strains_and_values(
-        traits_order, strains, exported_traits_data_list)
+    samples_and_values = retrieve_samples_and_values(
+        traits_order, samples, exported_traits_data_list)
     traits_filename = "{}/traits_test_file_{}.txt".format(
         TMPDIR, random_string(10))
     generate_traits_file(
-        strains_and_values[0][1],
-        [t[2] for t in strains_and_values],
+        samples_and_values[0][1],
+        [t[2] for t in samples_and_values],
         traits_filename)
 
     main_output, _permutations_output = run_reaper(
@@ -229,9 +229,9 @@ def compute_traits_order(slink_data, neworder: tuple = tuple()):
 
     return __order_maker(neworder, slink_data)
 
-def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+def retrieve_samples_and_values(orders, samplelist, traits_data_list):
     """
-    Get the strains and their corresponding values from `strainlist` and
+    Get the samples and their corresponding values from `samplelist` and
     `traits_data_list`.
 
     This migrates the code in
@@ -240,17 +240,17 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     # This feels nasty! There's a lot of mutation of values here, that might
     # indicate something untoward in the design of this function and its
     # dependents  ==>  Review
-    strains = []
+    samples = []
     values = []
     rets = []
     for order in orders:
         temp_val = traits_data_list[order]
-        for i, strain in enumerate(strainlist):
+        for i, sample in enumerate(samplelist):
             if temp_val[i] is not None:
-                strains.append(strain)
+                samples.append(sample)
                 values.append(temp_val[i])
-        rets.append([order, strains[:], values[:]])
-        strains = []
+        rets.append([order, samples[:], values[:]])
+        samples = []
         values = []
 
     return rets
diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py
index 19c3067..b51b0bf 100644
--- a/tests/unit/computations/test_parsers.py
+++ b/tests/unit/computations/test_parsers.py
@@ -15,7 +15,7 @@ class TestParsers(unittest.TestCase):
 
     def test_parse_genofile_with_existing_file(self):
         """Test that a genotype file is parsed correctly"""
-        strains = ["bxd1", "bxd2"]
+        samples = ["bxd1", "bxd2"]
         genotypes = [
             {"chr": "1", "locus": "rs31443144",
              "cm": "1.50", "mb": "3.010274",
@@ -51,4 +51,4 @@ class TestParsers(unittest.TestCase):
             "../test_data/genotype.txt"
         ))
         self.assertEqual(parse_genofile(
-            test_genotype_file), (strains, genotypes))
+            test_genotype_file), (samples, genotypes))
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index fd91cf9..b54e2f3 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -5,41 +5,41 @@ from gn3.heatmaps import (
     get_lrs_from_chr,
     export_trait_data,
     compute_traits_order,
-    retrieve_strains_and_values,
+    retrieve_samples_and_values,
     process_traits_data_for_heatmap)
 from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
-strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
     "mysqlid": 36688172,
     "data": {
-        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 slinked = (
     (((0, 2, 0.16381088984330505),
@@ -66,7 +66,7 @@ class TestHeatmap(TestCase):
                 ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
             with self.subTest(dtype=dtype):
                 self.assertEqual(
-                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    export_trait_data(trait_data, samplelist, dtype=dtype),
                     expected)
 
     def test_export_trait_data_dtype_all_flags(self):
@@ -106,7 +106,7 @@ class TestHeatmap(TestCase):
             with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
                 self.assertEqual(
                     export_trait_data(
-                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
 
@@ -164,8 +164,8 @@ class TestHeatmap(TestCase):
         self.assertEqual(
             compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
 
-    def test_retrieve_strains_and_values(self):
-        """Test retrieval of strains and values."""
+    def test_retrieve_samples_and_values(self):
+        """Test retrieval of samples and values."""
         for orders, slist, tdata, expected in [
                 [
                     [2],
@@ -185,9 +185,9 @@ class TestHeatmap(TestCase):
                      [6, None, None, 4, None]],
                     [[3, ["s1", "s4"], [6, 4]]]
                 ]]:
-            with self.subTest(strainlist=slist, traitdata=tdata):
+            with self.subTest(samplelist=slist, traitdata=tdata):
                 self.assertEqual(
-                    retrieve_strains_and_values(orders, slist, tdata), expected)
+                    retrieve_samples_and_values(orders, slist, tdata), expected)
 
     def test_get_lrs_from_chr(self):
         """Check that function gets correct LRS values"""
-- 
cgit 1.4.1


From 60d54d8de466c179a93b6d46ad05ec1b9ba5f4a1 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 27 Sep 2021 05:13:19 +0300
Subject: Narrow the exception and add comments

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Only catch the `FileExistsError` allowing any other exception to pass
  through. This tries to conform a little to the review at
  https://github.com/genenetwork/genenetwork3/pull/37#discussion_r714552696
---
 gn3/computations/qtlreaper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 166d2dd..d1ff4ac 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -34,7 +34,8 @@ def create_output_directory(path: str):
     """Create the output directory at `path` if it does not exist."""
     try:
         os.mkdir(path)
-    except OSError:
+    except FileExistsError:
+        # If the directory already exists, do nothing.
         pass
 
 def run_reaper(
-- 
cgit 1.4.1