From 1a9d28e6db2140cc7b3491c6dbcf4fc8cd8c09b6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 08:47:11 +0300
Subject: Add tests and fix errors caught with tests

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: fix errors
* tests/unit/computations/test_heatmap.py: new tests

  Add new tests with the expected source data format, and expected results.
  Fix all errors that were caught by running the tests
---
 tests/unit/computations/test_heatmap.py | 54 +++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 tests/unit/computations/test_heatmap.py

(limited to 'tests/unit/computations')

diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
new file mode 100644
index 0000000..78303ae
--- /dev/null
+++ b/tests/unit/computations/test_heatmap.py
@@ -0,0 +1,54 @@
+"""Module contains tests for gn3.computations.heatmap"""
+from unittest import TestCase
+from gn3.computations.heatmap import export_trait_data
+
+strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
+class TestHeatmap(TestCase):
+    """Class for testing heatmap computation functions"""
+
+    def test_export_trait_data_dtype(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument
+        """
+        for dtype, expected in [
+                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", (None, None, None, None, None, None)],
+                ["N", (None, None, None, None, None, None)],
+                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+            with self.subTest(dtype=dtype):
+                self.assertEqual(
+                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    expected)
+
+    def test_export_trait_data_dtype_all_flags(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument and the different flags set up
+        """
+        for dtype, vflag, nflag, expected in [
+                ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", False, False, (None, None, None, None, None, None)],
+                ["var", False, True, (None, None, None, None, None, None)],
+                ["var", True, False, (None, None, None, None, None, None)],
+                ["var", True, True, (None, None, None, None, None, None)],
+                ["N", False, False, (None, None, None, None, None, None)],
+                ["N", False, True, (None, None, None, None, None, None)],
+                ["N", True, False, (None, None, None, None, None, None)],
+                ["N", True, True, (None, None, None, None, None, None)],
+                ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
+                ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
+                ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+        ]:
+            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+                self.assertEqual(
+                    export_trait_data(
+                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        n_exists=nflag),
+                    expected)
-- 
cgit v1.2.3


From 41fc5136914548710529cbed7ef370dfb5b4a5c8 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:43:32 +0300
Subject: Test the clustering

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: Fix clustering bugs
* tests/unit/computations/test_heatmap.py: Add new tests. Fix linting issues.

  Test and fix the clustering function.
---
 gn3/computations/heatmap.py             |  14 ++--
 tests/unit/computations/test_heatmap.py | 109 +++++++++++++++++++++++++++++---
 2 files changed, 106 insertions(+), 17 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 8a86fe8..3c35029 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -110,13 +110,13 @@ def cluster_traits(traits_data_list: Sequence[Dict]):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
     """
     def __compute_corr(tdata_i, tdata_j):
-        if tdata_j[0] < tdata_i[0]:
-            corr_vals = compute_correlation(tdata_i, tdata_j)
-            corr = corr_vals[0]
-            if (1 - corr) < 0:
-                return 0.0
-            return 1 - corr
-        return 0.0
+        if tdata_i[0] == tdata_j[0]:
+            return 0.0
+        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+        corr = corr_vals[0]
+        if (1 - corr) < 0:
+            return 0.0
+        return 1 - corr
 
     def __cluster(tdata_i):
         return tuple(
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 78303ae..650cb45 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -1,9 +1,38 @@
 """Module contains tests for gn3.computations.heatmap"""
 from unittest import TestCase
-from gn3.computations.heatmap import export_trait_data
+from gn3.computations.heatmap import cluster_traits, export_trait_data
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
@@ -29,10 +58,14 @@ class TestHeatmap(TestCase):
         argument and the different flags set up
         """
         for dtype, vflag, nflag, expected in [
-                ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
                 ["var", False, False, (None, None, None, None, None, None)],
                 ["var", False, True, (None, None, None, None, None, None)],
                 ["var", True, False, (None, None, None, None, None, None)],
@@ -41,10 +74,17 @@ class TestHeatmap(TestCase):
                 ["N", False, True, (None, None, None, None, None, None)],
                 ["N", True, False, (None, None, None, None, None, None)],
                 ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
-                ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
-                ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
         ]:
             with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
                 self.assertEqual(
@@ -52,3 +92,52 @@ class TestHeatmap(TestCase):
                         trait_data, strainlist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
+
+    def test_cluster_traits(self):
+        """
+        Test that the clustering is working as expected.
+        """
+        traits_data_list = [
+            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
+            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
+            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
+            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
+            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
+            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
+            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
+            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
+            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
+        self.assertEqual(
+            cluster_traits(traits_data_list),
+            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
+              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
+              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
+             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
+              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
+              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
+             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
+              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
+              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
+             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
+              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
+              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
+             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
+              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
+              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
+             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
+              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
+              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
+             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
+              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
+              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
+             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
+              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
+              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
+             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
+              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
+              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
+             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
+              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
+              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
+              0.0)))
-- 
cgit v1.2.3


From ded960e3d32e4d7ebe590deda27fc47175be73d9 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 20 Aug 2021 13:21:31 +0300
Subject: Add tests for ordering and implement function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: implement new ordering function
* tests/unit/computations/test_heatmap.py: add new tests

  Implement the ordering function to migrate the setup of the `neworder`
  variable from GN1 to GN3.

  This migration is incomplete, since there is dependence on the return from
  the `web.webqtl.heatmap.Heatmap.draw` function in form of the `d_1` variable
  in some of the paths.

  The thing is, this `d_1` variable, and the `xoffset` variable seem to be
  used for laying out things on the drawn heatmap, and might actually end up
  not being needed for the new system using plotly, which has other ways of
  laying out things on the drawing.

  For now though, this commit "shims" the presence of these values until when
  the use of these variables is confirmed as present or absent in the new GN3
  system.
---
 gn3/computations/heatmap.py             | 28 ++++++++++++++++++++++++++++
 tests/unit/computations/test_heatmap.py | 25 ++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 3c35029..1c86261 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -175,3 +175,31 @@ def heatmap_data(formd, search_result, conn: Any):
         "traits_list": traits_list,
         "traits_data_list": traits_data_list
     }
+
+def compute_heatmap_order(
+        slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+    """
+    Compute the data used for drawing the heatmap proper from `slink_data`.
+
+    This function tries to reproduce the creation and update of the `neworder`
+    variable in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+    """
+    d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
+
+    def __order_maker(norder, slnk_dt):
+        print("norder:{}, slnk_dt:{}".format(norder, slnk_dt))
+        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+            return norder + (
+                (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+
+        if isinstance(slnk_dt[0], int):
+            return norder + ((xoffset + 20, slnk_dt[0]), )
+
+        if isinstance(slnk_dt[1], int):
+            return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+
+        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+    return __order_maker(neworder, slink_data)
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 650cb45..14807bb 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -1,6 +1,9 @@
 """Module contains tests for gn3.computations.heatmap"""
 from unittest import TestCase
-from gn3.computations.heatmap import cluster_traits, export_trait_data
+from gn3.computations.heatmap import (
+    cluster_traits,
+    export_trait_data,
+    compute_heatmap_order)
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
@@ -34,6 +37,16 @@ trait_data = {
         "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
         "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
+slinked = (
+    (((0, 2, 0.16381088984330505),
+      ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
+      0.20337048635536847),
+     9,
+     0.23451785425383564),
+    ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
+     4, 0.8968250491499363),
+    0.9313185954797953)
+
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
 
@@ -141,3 +154,13 @@ class TestHeatmap(TestCase):
               0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
               1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
               0.0)))
+
+    def test_compute_heatmap_order(self):
+        """Test the orders."""
+        for xoff, expected in [
+                (40, ((60, 9), (60, 4))),
+                (30, ((50, 9), (50, 4))),
+                (20, ((40, 9), (40, 4)))]:
+            with self.subTest(xoffset=xoff):
+                self.assertEqual(
+                    compute_heatmap_order(slinked, xoffset=xoff), expected)
-- 
cgit v1.2.3


From 8b2c776771d2a70613a1e31d6e6671b612cfbafc Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Fri, 20 Aug 2021 14:10:45 +0300
Subject: Retrieve the strains with valid values

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: add function to get strains with values
* tests/unit/computations/test_heatmap.py: new tests

  Add function to get the strains whose values are not `None` from the
  `trait_data` object passed in.

  This migrates
  https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
  into a separate function that can handle that and be tested independently of
  any other code.
---
 gn3/computations/heatmap.py             | 19 +++++++++++++++++++
 tests/unit/computations/test_heatmap.py | 14 +++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 1c86261..5a3c619 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -203,3 +203,22 @@ def compute_heatmap_order(
         return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
 
     return __order_maker(neworder, slink_data)
+
+def retrieve_strains_and_values(strainlist, trait_data):
+    """
+    Get the strains and their corresponding values from `strainlist` and
+    `trait_data`.
+
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+    """
+    def __strains_and_values(acc, i):
+        if trait_data[i] is None:
+            return acc
+        if len(acc) == 0:
+            return ((strainlist[i], ), (trait_data[i], ))
+        _strains = acc[0]
+        _vals = acc[1]
+        return (_strains + (strainlist[i], ), _vals + (trait_data[i], ))
+    return reduce(
+        __strains_and_values, range(len(strainlist)), (tuple(), tuple()))
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 14807bb..686288d 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -3,7 +3,8 @@ from unittest import TestCase
 from gn3.computations.heatmap import (
     cluster_traits,
     export_trait_data,
-    compute_heatmap_order)
+    compute_heatmap_order,
+    retrieve_strains_and_values)
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
@@ -164,3 +165,14 @@ class TestHeatmap(TestCase):
             with self.subTest(xoffset=xoff):
                 self.assertEqual(
                     compute_heatmap_order(slinked, xoffset=xoff), expected)
+
+    def test_retrieve_strains_and_values(self):
+        """Test retrieval of strains and values."""
+        for slist, tdata, expected in [
+                [["s1", "s2", "s3", "s4"], [9, None, 5, 4],
+                 (("s1", "s3", "s4"), (9, 5, 4))],
+                [["s1", "s2", "s3", "s4", "s5"], [6, None, None, 4, None],
+                 (("s1", "s4"), (6, 4))]]:
+            with self.subTest(strainlist=slist, traitdata=tdata):
+                self.assertEqual(
+                    retrieve_strains_and_values(slist, tdata), expected)
-- 
cgit v1.2.3


From 64ce38b45839b6305b009f6e28b0f852409e9bda Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 10:45:11 +0300
Subject: Parse QTLReaper outputs

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/qtlreaper.py: pass output files
* tests/unit/computations/data/qtlreaper/main_output_sample.txt: sample test
  data
* tests/unit/computations/data/qtlreaper/permu_output_sample.txt: sample test
  data
* tests/unit/computations/test_qtlreaper.py: add tests

  Add code to parse the QTLReaper output data files.
---
 gn3/computations/qtlreaper.py                      | 18 ++++++
 .../data/qtlreaper/main_output_sample.txt          | 11 ++++
 .../data/qtlreaper/permu_output_sample.txt         | 27 ++++++++
 tests/unit/computations/test_qtlreaper.py          | 74 ++++++++++++++++++++++
 4 files changed, 130 insertions(+)
 create mode 100644 tests/unit/computations/data/qtlreaper/main_output_sample.txt
 create mode 100644 tests/unit/computations/data/qtlreaper/permu_output_sample.txt
 create mode 100644 tests/unit/computations/test_qtlreaper.py

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index c058e14..3b8e4db 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -90,3 +90,21 @@ def run_reaper(
 
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
+
+
+def parse_reaper_main_results(results_file):
+    with open(results_file, "r") as infile:
+        lines = infile.readlines()
+
+    def __parse_line(line):
+        items = line.strip().split("\t")
+        return items[0:2] + [float(item) for item in items[2:]]
+
+    header = lines[0].strip().split("\t")
+    return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
+
+def parse_reaper_permutation_results(results_file):
+    with open(results_file, "r") as infile:
+        lines = infile.readlines()
+
+    return [float(line.strip()) for line in lines]
diff --git a/tests/unit/computations/data/qtlreaper/main_output_sample.txt b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
new file mode 100644
index 0000000..12b11b4
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
@@ -0,0 +1,11 @@
+ID	Locus	Chr	cM	Mb	LRS	Additive	pValue
+T1	rs31443144	1	1.500	3.010	0.500	-0.074	1.000
+T1	rs6269442	1	1.500	3.492	0.500	-0.074	1.000
+T1	rs32285189	1	1.630	3.511	0.500	-0.074	1.000
+T1	rs258367496	1	1.630	3.660	0.500	-0.074	1.000
+T1	rs32430919	1	1.750	3.777	0.500	-0.074	1.000
+T1	rs36251697	1	1.880	3.812	0.500	-0.074	1.000
+T1	rs30658298	1	2.010	4.431	0.500	-0.074	1.000
+T1	rs51852623	1	2.010	4.447	0.500	-0.074	1.000
+T1	rs31879829	1	2.140	4.519	0.500	-0.074	1.000
+T1	rs36742481	1	2.140	4.776	0.500	-0.074	1.000
diff --git a/tests/unit/computations/data/qtlreaper/permu_output_sample.txt b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
new file mode 100644
index 0000000..64cff07
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
@@ -0,0 +1,27 @@
+4.44174
+5.03825
+5.08167
+5.18119
+5.18578
+5.24563
+5.24619
+5.24619
+5.27961
+5.28228
+5.43903
+5.50188
+5.51694
+5.56830
+5.63874
+5.71346
+5.71936
+5.74275
+5.76764
+5.79815
+5.81671
+5.82775
+5.89659
+5.92117
+5.93396
+5.93396
+5.94957
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
new file mode 100644
index 0000000..ec23664
--- /dev/null
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -0,0 +1,74 @@
+"""Module contains tests for gn3.computations.qtlreaper"""
+import os
+from unittest import TestCase
+from gn3.computations.qtlreaper import (
+    parse_reaper_main_results, parse_reaper_permutation_results)
+
+class TestQTLReaper(TestCase):
+    """Class for testing qtlreaper interface functions."""
+
+    def test_parse_reaper_main_results(self):
+        self.assertEqual(
+            parse_reaper_main_results(
+                "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
+            [
+                {
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+                    "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+                    "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                }
+            ])
+
+    def test_parse_reaper_permutation_results(self):
+        self.assertEqual(
+            parse_reaper_permutation_results(
+            "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+            [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
+             5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
+             5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
+             5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
-- 
cgit v1.2.3


From c3f8013347e3e8850c90cb787edb2bec1f367f7d Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 10:48:30 +0300
Subject: Fix test

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* The number of the arguments to the function changed, and so the tests for
  the function needed to be updated.
---
 tests/unit/computations/test_heatmap.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 686288d..87f8e45 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -168,11 +168,25 @@ class TestHeatmap(TestCase):
 
     def test_retrieve_strains_and_values(self):
         """Test retrieval of strains and values."""
-        for slist, tdata, expected in [
-                [["s1", "s2", "s3", "s4"], [9, None, 5, 4],
-                 (("s1", "s3", "s4"), (9, 5, 4))],
-                [["s1", "s2", "s3", "s4", "s5"], [6, None, None, 4, None],
-                 (("s1", "s4"), (6, 4))]]:
+        for orders, slist, tdata, expected in [
+                [
+                    [(60, 2)],
+                    ["s1", "s2", "s3", "s4"],
+                    [[2, 9, 6, None, 4],
+                     [7, 5, None, None, 4],
+                     [9, None, 5, 4, 7],
+                     [6, None, None, 4, None]],
+                    [[(60, 2), ["s1", "s3", "s4"], [9, 5, 4]]]
+                ],
+                [
+                    [(60, 3)],
+                    ["s1", "s2", "s3", "s4", "s5"],
+                    [[2, 9, 6, None, 4],
+                     [7, 5, None, None, 4],
+                     [9, None, 5, 4, 7],
+                     [6, None, None, 4, None]],
+                    [[(60, 3), ["s1", "s4"], [6, 4]]]
+                ]]:
             with self.subTest(strainlist=slist, traitdata=tdata):
                 self.assertEqual(
-                    retrieve_strains_and_values(slist, tdata), expected)
+                    retrieve_strains_and_values(orders, slist, tdata), expected)
-- 
cgit v1.2.3


From b5e1d1176f1bf4f7c0b68b27beb15e99418f1650 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 31 Aug 2021 11:16:29 +0300
Subject: Fix linting errors, minor bugs and reorganise code

* Fix some linting errors and some minor bugs caught by the linter.
  Move the `random_string` function to separate module for use in multiple
  places in the code.
---
 gn3/computations/heatmap.py               |  7 ++++---
 gn3/computations/qtlreaper.py             | 27 ++++++++++++++-------------
 gn3/db/traits.py                          |  5 ++++-
 gn3/heatmaps/heatmaps.py                  | 25 +++++++++++++++++++------
 gn3/random.py                             | 11 +++++++++++
 tests/unit/computations/test_qtlreaper.py |  5 +++--
 6 files changed, 55 insertions(+), 25 deletions(-)
 create mode 100644 gn3/random.py

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 92014cf..1143450 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -6,6 +6,7 @@ generate various kinds of heatmaps.
 from functools import reduce
 from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
+from gn3.computations.qtlreaper import generate_traits_file
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import build_genotype_file, load_genotype_samples
 from gn3.db.traits import (
@@ -155,14 +156,14 @@ def heatmap_data(traits_names, conn: Any):
         for fullname in traits_names]
     traits_list = tuple(x[0] for x in traits_details)
     traits_data_list = [x[1] for x in traits_details]
-    exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for td in traits_data_list)
     genotype_filename = build_genotype_file(traits_list[0]["riset"])
     strainlist = load_genotype_samples(genotype_filename)
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for td in traits_data_list)
     slink_data = slink(cluster_traits(exported_traits_data_list))
     ordering_data = compute_heatmap_order(slink_data)
     strains_and_values = retrieve_strains_and_values(
-        orders, strainlist, exported_traits_data_list)
+        ordering_data, strainlist, exported_traits_data_list)
     strains_values = strains_and_values[0][1]
     trait_values = [t[2] for t in strains_and_values]
     traits_filename = generate_traits_filename()
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 3b8e4db..30c7051 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -3,17 +3,10 @@ This module contains functions to interact with the `qtlreaper` utility for
 computation of QTLs.
 """
 import os
-import random
-import string
 import subprocess
+from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
-def random_string(length):
-    """Generate a random string of length `length`."""
-    return "".join(
-        random.choices(
-            string.ascii_letters + string.digits, k=length))
-
 def generate_traits_file(strains, trait_values, traits_filename):
     """
     Generate a traits file for use with `qtlreaper`.
@@ -25,11 +18,13 @@ def generate_traits_file(strains, trait_values, traits_filename):
         computation of QTLs.
     """
     header = "Trait\t{}\n".format("\t".join(strains))
-    data = [header] + [
-        "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
-        for i, t in enumerate(trait_values[:-1])] + [
-        "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
-        for t in trait_values[-1:]]
+    data = (
+        [header] +
+        ["T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+         for i, t in enumerate(trait_values[:-1])] +
+        ["T{}\t{}".format(
+            len(trait_values), "\t".join([str(i) for i in t]))
+         for t in trait_values[-1:]])
     with open(traits_filename, "w") as outfile:
         outfile.writelines(data)
 
@@ -93,6 +88,9 @@ def run_reaper(
 
 
 def parse_reaper_main_results(results_file):
+    """
+    Parse the results file of running QTLReaper into a list of dicts.
+    """
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
@@ -104,6 +102,9 @@ def parse_reaper_main_results(results_file):
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
 
 def parse_reaper_permutation_results(results_file):
+    """
+    Parse the results QTLReaper permutations into a list of values.
+    """
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index ccb101a..bfe887e 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,6 +1,8 @@
 """This class contains functions relating to trait data manipulation"""
-from gn3.settings import TMPDIR
+import os
 from typing import Any, Dict, Union, Sequence
+from gn3.settings import TMPDIR
+from gn3.random import random_string
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
 
@@ -669,5 +671,6 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     return {}
 
 def generate_traits_filename(base_path: str = TMPDIR):
+    """Generate a unique filename for use with generated traits files."""
     return "{}/traits_test_file_{}.txt".format(
         os.path.abspath(base_path), random_string(10))
diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
index 3bf7917..88f546d 100644
--- a/gn3/heatmaps/heatmaps.py
+++ b/gn3/heatmaps/heatmaps.py
@@ -14,6 +14,19 @@ def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30
     return [[random.uniform(0,data_stop) for i in range(0, width)]
             for j in range(0, height)]
 
+def generate_random_data2(data_stop: float = 2, width: int = 10, height: int = 30):
+    """
+    This is mostly a utility function to be used to generate random data, useful
+    for development of the heatmap generation code, without access to the actual
+    database data.
+    """
+    return [
+        [{
+            "value": item,
+            "category": random.choice(["C57BL/6J +", "DBA/2J +"])}
+         for item in axis]
+        for axis in generate_random_data(data_stop, width, height)]
+
 def heatmap_x_axis_names():
     return [
         "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
@@ -30,13 +43,14 @@ def heatmap_x_axis_names():
 
 # Grey + Blue + Red
 def generate_heatmap():
-    rows = 20
-    data = generate_random_data(height=rows)
-    y = (["%s"%x for x in range(1, rows+1)][:-1] + ["X"]) #replace last item with x for now
+    cols = 20
+    y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
+    x_axis = heatmap_x_axis_names()
+    data = generate_random_data(height=cols, width=len(x_axis))
     fig = px.imshow(
         data,
-        x=heatmap_x_axis_names(),
-        y=y,
+        x=x_axis,
+        y=y_axis,
         width=500)
     fig.update_traces(xtype="array")
     fig.update_traces(ytype="array")
@@ -49,6 +63,5 @@ def generate_heatmap():
         coloraxis_colorscale=[
             [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
             [0.5, '#F5DE11'], [1.0, '#FF0D00']])
-
     fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
     return fig
diff --git a/gn3/random.py b/gn3/random.py
new file mode 100644
index 0000000..f0ba574
--- /dev/null
+++ b/gn3/random.py
@@ -0,0 +1,11 @@
+"""
+Functions to generate complex random data.
+"""
+import random
+import string
+
+def random_string(length):
+    """Generate a random string of length `length`."""
+    return "".join(
+        random.choices(
+            string.ascii_letters + string.digits, k=length))
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index ec23664..6c3b64d 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -1,5 +1,4 @@
 """Module contains tests for gn3.computations.qtlreaper"""
-import os
 from unittest import TestCase
 from gn3.computations.qtlreaper import (
     parse_reaper_main_results, parse_reaper_permutation_results)
@@ -8,6 +7,7 @@ class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
 
     def test_parse_reaper_main_results(self):
+        """Test that the main results file is parsed correctly."""
         self.assertEqual(
             parse_reaper_main_results(
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
@@ -65,9 +65,10 @@ class TestQTLReaper(TestCase):
             ])
 
     def test_parse_reaper_permutation_results(self):
+        """Test that the permutations results file is parsed correctly."""
         self.assertEqual(
             parse_reaper_permutation_results(
-            "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+                "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
             [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
              5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
              5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
-- 
cgit v1.2.3


From 679a1af832ad9585c7cf72996043edb08e1b0d10 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 08:06:14 +0300
Subject: Leave "Chr" value as string when parsing

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* The "Chr" value seems to be mostly a name of some sort, despite it being,
  seemingly an number. This commit parses the "Chr" value as a string.
  It also updates the tests to expec a string, rather than a number for "Chr"
  values.
---
 gn3/computations/qtlreaper.py             |  5 +++--
 tests/unit/computations/test_qtlreaper.py | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index eff2a80..9b20309 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -94,7 +94,7 @@ def parse_reaper_main_results(results_file):
     with open(results_file, "r") as infile:
         lines = infile.readlines()
 
-    def __parse_column_value(value):
+    def __parse_column_float_value(value):
         try:
             return float(value)
         except:
@@ -102,7 +102,8 @@ def parse_reaper_main_results(results_file):
 
     def __parse_line(line):
         items = line.strip().split("\t")
-        return items[0:2] + [__parse_column_value(item) for item in items[2:]]
+        return items[0:3] + [
+            __parse_column_float_value(item) for item in items[3:]]
 
     header = lines[0].strip().split("\t")
     return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 6c3b64d..fd3434a 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -13,52 +13,52 @@ class TestQTLReaper(TestCase):
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
             [
                 {
-                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "ID": "T1", "Locus": "rs31443144", "Chr": "1", "cM": 1.500,
                     "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "ID": "T1", "Locus": "rs6269442", "Chr": "1", "cM": 1.500,
                     "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "ID": "T1", "Locus": "rs32285189", "Chr": "1", "cM": 1.630,
                     "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "ID": "T1", "Locus": "rs258367496", "Chr": "1", "cM": 1.630,
                     "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "ID": "T1", "Locus": "rs32430919", "Chr": "1", "cM": 1.750,
                     "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "ID": "T1", "Locus": "rs36251697", "Chr": "1", "cM": 1.880,
                     "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "ID": "T1", "Locus": "rs30658298", "Chr": "1", "cM": 2.010,
                     "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+                    "ID": "T1", "Locus": "rs51852623", "Chr": "1", "cM": 2.010,
                     "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+                    "ID": "T1", "Locus": "rs31879829", "Chr": "1", "cM": 2.140,
                     "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+                    "ID": "T1", "Locus": "rs36742481", "Chr": "1", "cM": 2.140,
                     "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 }
-- 
cgit v1.2.3


From d4943f1d01d89a3928c905f80914a23144126c8e Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 6 Sep 2021 08:09:20 +0300
Subject: Provide function to organise parsed QTLReaper results

* gn3/computations/qtlreaper.py: Provide a function to organise the results by
  trait for easier use down the line.

* tests/unit/computations/test_qtlreaper.py: provide a test to ensure that the
  organising function works as expected.
---
 gn3/computations/qtlreaper.py             |  25 +++++++
 tests/unit/computations/test_qtlreaper.py | 105 +++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 1 deletion(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 9b20309..8c0e6de 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -86,6 +86,31 @@ def run_reaper(
     subprocess.run(command_list, check=True)
     return (output_filename, permu_output_filename)
 
+def organise_reaper_main_results(parsed_results):
+    def __organise_by_chromosome(chr_name, items):
+        chr_items = [item for item in items if item["Chr"] == chr_name]
+        return {
+            "Chr": str(chr_name),
+            "loci": [{
+                "Locus": locus["Locus"],
+                "cM": locus["cM"],
+                "Mb": locus["Mb"],
+                "LRS": locus["LRS"],
+                "Additive": locus["Additive"],
+                "pValue": locus["pValue"]
+            } for locus in chr_items]}
+
+    def __organise_by_id(identifier, items):
+        id_items = [item for item in items if item["ID"] == identifier]
+        unique_chromosomes = {item["Chr"] for item in id_items}
+        return {
+            "ID": identifier,
+            "chromosomes": [
+                __organise_by_chromosome(chromo, id_items)
+                for chromo in sorted(unique_chromosomes)]}
+
+    unique_ids = {res["ID"] for res in parsed_results}
+    return [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]
 
 def parse_reaper_main_results(results_file):
     """
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index fd3434a..1d7347f 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -1,7 +1,9 @@
 """Module contains tests for gn3.computations.qtlreaper"""
 from unittest import TestCase
 from gn3.computations.qtlreaper import (
-    parse_reaper_main_results, parse_reaper_permutation_results)
+    parse_reaper_main_results,
+    organise_reaper_main_results,
+    parse_reaper_permutation_results)
 
 class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
@@ -73,3 +75,104 @@ class TestQTLReaper(TestCase):
              5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
              5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
              5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
+
+    def test_organise_reaper_main_results(self):
+        self.assertEqual(
+            organise_reaper_main_results([
+                {
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
+                    "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
+                    "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                },
+                {
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
+                    "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+                    "pValue": 1.000
+                }
+            ]),
+            [{"ID": "T1",
+              "chromosomes": [
+                  {"Chr": "1",
+                   "loci": [
+                       {
+                           "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       }]},
+                  {"Chr": "2",
+                   "loci": [
+                       {
+                           "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       },
+                       {
+                           "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                       }]}]}])
-- 
cgit v1.2.3


From 31ca02d1f095c2cc667e5b7d49131d702982f321 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 06:52:01 +0300
Subject: Fix the traits order computations for clustering

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: Fix ordering function
* tests/unit/computations/test_heatmap.py: update test

  The order of the traits is important for the clustering algorithm, since the
  clustering seems to use the distance of one trait from another to determine
  how to order them.

  This commit also gets rid of the xoffset argument that is not important to
  the ordering, and was used in the older GN1 to determine how to draw the
  clustering lines.
---
 gn3/computations/heatmap.py             | 16 ++++++----------
 tests/unit/computations/test_heatmap.py | 11 +++--------
 2 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index ccce385..8727c92 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -180,28 +180,24 @@ def heatmap_data(traits_names, conn: Any):
         "traits_filename": traits_filename
     }
 
-def compute_heatmap_order(
-        slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+def compute_traits_order(slink_data, neworder: tuple = tuple()):
     """
-    Compute the data used for drawing the heatmap proper from `slink_data`.
+    Compute the order of the traits for clustering from `slink_data`.
 
     This function tries to reproduce the creation and update of the `neworder`
     variable in
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
     and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
     """
-    d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
-
     def __order_maker(norder, slnk_dt):
         if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
-            return norder + (
-                (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+            return norder + (slnk_dt[0], slnk_dt[1])
 
         if isinstance(slnk_dt[0], int):
-            return norder + ((xoffset + 20, slnk_dt[0]), )
+            return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
 
         if isinstance(slnk_dt[1], int):
-            return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+            return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
 
         return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
 
@@ -222,7 +218,7 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     values = []
     rets = []
     for order in orders:
-        temp_val = traits_data_list[order[1]]
+        temp_val = traits_data_list[order]
         for i, strain in enumerate(strainlist):
             if temp_val[i] is not None:
                 strains.append(strain)
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 87f8e45..f1bbefc 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -3,7 +3,7 @@ from unittest import TestCase
 from gn3.computations.heatmap import (
     cluster_traits,
     export_trait_data,
-    compute_heatmap_order,
+    compute_traits_order,
     retrieve_strains_and_values)
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
@@ -158,13 +158,8 @@ class TestHeatmap(TestCase):
 
     def test_compute_heatmap_order(self):
         """Test the orders."""
-        for xoff, expected in [
-                (40, ((60, 9), (60, 4))),
-                (30, ((50, 9), (50, 4))),
-                (20, ((40, 9), (40, 4)))]:
-            with self.subTest(xoffset=xoff):
-                self.assertEqual(
-                    compute_heatmap_order(slinked, xoffset=xoff), expected)
+        self.assertEqual(
+            compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
 
     def test_retrieve_strains_and_values(self):
         """Test retrieval of strains and values."""
-- 
cgit v1.2.3


From a718069c757bea9f7ecbaee25e23bd581750f906 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 8 Sep 2021 10:56:56 +0300
Subject: Ease search for traits and chromosomes

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Return a dict of values rather than list for the traits and chromosomes to
  ease searching through the data.
---
 gn3/computations/qtlreaper.py             |  9 ++-
 tests/unit/computations/test_qtlreaper.py | 92 +++++++++++++++----------------
 2 files changed, 52 insertions(+), 49 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 02d6572..5180853 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -110,12 +110,15 @@ def organise_reaper_main_results(parsed_results):
         unique_chromosomes = {item["Chr"] for item in id_items}
         return {
             "ID": identifier,
-            "chromosomes": [
+            "chromosomes": {_chr["Chr"]: _chr for _chr in [
                 __organise_by_chromosome(chromo, id_items)
-                for chromo in sorted(unique_chromosomes)]}
+                for chromo in sorted(
+                        unique_chromosomes, key=chromosome_sorter_key_fn)]}}
 
     unique_ids = {res["ID"] for res in parsed_results}
-    return [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]
+    return {
+        trait["ID"]: trait for trait in
+        [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]}
 
 def parse_reaper_main_results(results_file):
     """
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 1d7347f..495ed97 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -130,49 +130,49 @@ class TestQTLReaper(TestCase):
                     "pValue": 1.000
                 }
             ]),
-            [{"ID": "T1",
-              "chromosomes": [
-                  {"Chr": "1",
-                   "loci": [
-                       {
-                           "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       }]},
-                  {"Chr": "2",
-                   "loci": [
-                       {
-                           "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       },
-                       {
-                           "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                           "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                       }]}]}])
+            {"T1": {"ID": "T1",
+                    "chromosomes": {
+                        1: {"Chr": 1,
+                            "loci": [
+                                {
+                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                }]},
+                        2: {"Chr": 2,
+                            "loci": [
+                                {
+                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                },
+                                {
+                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                                }]}}}})
-- 
cgit v1.2.3


From f17b489c8eb94050b81b1a59fb43954d036f7c38 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 15 Sep 2021 06:01:44 +0300
Subject: Fix format of arguments and expected values

* tests/unit/computations/test_heatmap.py: ordering is not longer provided as
  a list of tuples; the ordering values are just a list of numbers now. This
  commit updates the test to take this into consideration.

* tests/unit/computations/test_qtlreaper.py: the 'Chr' value if numeric, is
  represented by an actual number, not a string. This commit updates the code
  to take this into consideration.
---
 tests/unit/computations/test_heatmap.py   |  8 ++++----
 tests/unit/computations/test_qtlreaper.py | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index f1bbefc..156af45 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -165,22 +165,22 @@ class TestHeatmap(TestCase):
         """Test retrieval of strains and values."""
         for orders, slist, tdata, expected in [
                 [
-                    [(60, 2)],
+                    [2],
                     ["s1", "s2", "s3", "s4"],
                     [[2, 9, 6, None, 4],
                      [7, 5, None, None, 4],
                      [9, None, 5, 4, 7],
                      [6, None, None, 4, None]],
-                    [[(60, 2), ["s1", "s3", "s4"], [9, 5, 4]]]
+                    [[2, ["s1", "s3", "s4"], [9, 5, 4]]]
                 ],
                 [
-                    [(60, 3)],
+                    [3],
                     ["s1", "s2", "s3", "s4", "s5"],
                     [[2, 9, 6, None, 4],
                      [7, 5, None, None, 4],
                      [9, None, 5, 4, 7],
                      [6, None, None, 4, None]],
-                    [[(60, 3), ["s1", "s4"], [6, 4]]]
+                    [[3, ["s1", "s4"], [6, 4]]]
                 ]]:
             with self.subTest(strainlist=slist, traitdata=tdata):
                 self.assertEqual(
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 495ed97..1d67827 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -15,52 +15,52 @@ class TestQTLReaper(TestCase):
                 "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
             [
                 {
-                    "ID": "T1", "Locus": "rs31443144", "Chr": "1", "cM": 1.500,
+                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
                     "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs6269442", "Chr": "1", "cM": 1.500,
+                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
                     "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32285189", "Chr": "1", "cM": 1.630,
+                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
                     "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs258367496", "Chr": "1", "cM": 1.630,
+                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
                     "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32430919", "Chr": "1", "cM": 1.750,
+                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
                     "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36251697", "Chr": "1", "cM": 1.880,
+                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
                     "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs30658298", "Chr": "1", "cM": 2.010,
+                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
                     "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs51852623", "Chr": "1", "cM": 2.010,
+                    "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
                     "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs31879829", "Chr": "1", "cM": 2.140,
+                    "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
                     "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36742481", "Chr": "1", "cM": 2.140,
+                    "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
                     "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 }
-- 
cgit v1.2.3


From e3e18950cfcdec918429dcbb5d5ed2e9616b7a20 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 15 Sep 2021 11:19:56 +0300
Subject: Reorganise modules

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* The heatmap generation does not fall cleanly within the computations or db
  modules. This commit moves it to the higher level gn3 module.
---
 gn3/computations/heatmap.py             | 277 -----------------------------
 gn3/heatmaps.py                         | 302 ++++++++++++++++++++++++++++++++
 gn3/heatmaps/heatmaps.py                |  67 -------
 tests/unit/computations/test_heatmap.py | 187 --------------------
 tests/unit/test_heatmaps.py             | 187 ++++++++++++++++++++
 5 files changed, 489 insertions(+), 531 deletions(-)
 delete mode 100644 gn3/computations/heatmap.py
 create mode 100644 gn3/heatmaps.py
 delete mode 100644 gn3/heatmaps/heatmaps.py
 delete mode 100644 tests/unit/computations/test_heatmap.py
 create mode 100644 tests/unit/test_heatmaps.py

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
deleted file mode 100644
index 8727c92..0000000
--- a/gn3/computations/heatmap.py
+++ /dev/null
@@ -1,277 +0,0 @@
-"""
-This module will contain functions to be used in computation of the data used to
-generate various kinds of heatmaps.
-"""
-
-from functools import reduce
-from typing import Any, Dict, Sequence
-from gn3.computations.slink import slink
-from gn3.computations.qtlreaper import generate_traits_file
-from gn3.computations.correlations2 import compute_correlation
-from gn3.db.genotypes import build_genotype_file, load_genotype_samples
-from gn3.db.traits import (
-    retrieve_trait_data,
-    retrieve_trait_info,
-    generate_traits_filename)
-
-def export_trait_data(
-        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
-        var_exists: bool = False, n_exists: bool = False):
-    """
-    Export data according to `strainlist`. Mostly used in calculating
-    correlations.
-
-    DESCRIPTION:
-    Migrated from
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
-    PARAMETERS
-    trait: (dict)
-      The dictionary of key-value pairs representing a trait
-    strainlist: (list)
-      A list of strain names
-    dtype: (str)
-      ... verify what this is ...
-    var_exists: (bool)
-      A flag indicating existence of variance
-    n_exists: (bool)
-      A flag indicating existence of ndata
-    """
-    def __export_all_types(tdata, strain):
-        sample_data = []
-        if tdata[strain]["value"]:
-            sample_data.append(tdata[strain]["value"])
-            if var_exists:
-                if tdata[strain]["variance"]:
-                    sample_data.append(tdata[strain]["variance"])
-                else:
-                    sample_data.append(None)
-            if n_exists:
-                if tdata[strain]["ndata"]:
-                    sample_data.append(tdata[strain]["ndata"])
-                else:
-                    sample_data.append(None)
-        else:
-            if var_exists and n_exists:
-                sample_data += [None, None, None]
-            elif var_exists or n_exists:
-                sample_data += [None, None]
-            else:
-                sample_data.append(None)
-
-        return tuple(sample_data)
-
-    def __exporter(accumulator, strain):
-        # pylint: disable=[R0911]
-        if strain in trait_data["data"]:
-            if dtype == "val":
-                return accumulator + (trait_data["data"][strain]["value"], )
-            if dtype == "var":
-                return accumulator + (trait_data["data"][strain]["variance"], )
-            if dtype == "N":
-                return accumulator + (trait_data["data"][strain]["ndata"], )
-            if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], strain)
-            raise KeyError("Type `%s` is incorrect" % dtype)
-        if var_exists and n_exists:
-            return accumulator + (None, None, None)
-        if var_exists or n_exists:
-            return accumulator + (None, None)
-        return accumulator + (None,)
-
-    return reduce(__exporter, strainlist, tuple())
-
-def trait_display_name(trait: Dict):
-    """
-    Given a trait, return a name to use to display the trait on a heatmap.
-
-    DESCRIPTION
-    Migrated from
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
-    """
-    if trait.get("db", None) and trait.get("trait_name", None):
-        if trait["db"]["dataset_type"] == "Temp":
-            desc = trait["description"]
-            if desc.find("PCA") >= 0:
-                return "%s::%s" % (
-                    trait["db"]["displayname"],
-                    desc[desc.rindex(':')+1:].strip())
-            return "%s::%s" % (
-                trait["db"]["displayname"],
-                desc[:desc.index('entered')].strip())
-        prefix = "%s::%s" % (
-            trait["db"]["dataset_name"], trait["trait_name"])
-        if trait["cellid"]:
-            return "%s::%s" % (prefix, trait["cellid"])
-        return prefix
-    return trait["description"]
-
-def cluster_traits(traits_data_list: Sequence[Dict]):
-    """
-    Clusters the trait values.
-
-    DESCRIPTION
-    Attempts to replicate the clustering of the traits, as done at
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
-    """
-    def __compute_corr(tdata_i, tdata_j):
-        if tdata_i[0] == tdata_j[0]:
-            return 0.0
-        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
-        corr = corr_vals[0]
-        if (1 - corr) < 0:
-            return 0.0
-        return 1 - corr
-
-    def __cluster(tdata_i):
-        return tuple(
-            __compute_corr(tdata_i, tdata_j)
-            for tdata_j in enumerate(traits_data_list))
-
-    return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
-
-def heatmap_data(traits_names, conn: Any):
-    """
-    heatmap function
-
-    DESCRIPTION
-    This function is an attempt to reproduce the initialisation at
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
-    and also the clustering and slink computations at
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
-    with the help of the `gn3.computations.heatmap.cluster_traits` function.
-
-    It does not try to actually draw the heatmap image.
-
-    PARAMETERS:
-    TODO: Elaborate on the parameters here...
-    """
-    threshold = 0 # webqtlConfig.PUBLICTHRESH
-    def __retrieve_traitlist_and_datalist(threshold, fullname):
-        trait = retrieve_trait_info(threshold, fullname, conn)
-        return (trait, retrieve_trait_data(trait, conn))
-
-    traits_details = [
-        __retrieve_traitlist_and_datalist(threshold, fullname)
-        for fullname in traits_names]
-    traits_list = tuple(x[0] for x in traits_details)
-    traits_data_list = [x[1] for x in traits_details]
-    genotype_filename = build_genotype_file(traits_list[0]["riset"])
-    strainlist = load_genotype_samples(genotype_filename)
-    exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for td in traits_data_list)
-    slink_data = slink(cluster_traits(exported_traits_data_list))
-    ordering_data = compute_heatmap_order(slink_data)
-    strains_and_values = retrieve_strains_and_values(
-        ordering_data, strainlist, exported_traits_data_list)
-    strains_values = strains_and_values[0][1]
-    trait_values = [t[2] for t in strains_and_values]
-    traits_filename = generate_traits_filename()
-    generate_traits_file(strains_values, trait_values, traits_filename)
-
-    return {
-        "slink_data": slink_data,
-        "ordering_data": ordering_data,
-        "strainlist": strainlist,
-        "genotype_filename": genotype_filename,
-        "traits_list": traits_list,
-        "traits_data_list": traits_data_list,
-        "exported_traits_data_list": exported_traits_data_list,
-        "traits_filename": traits_filename
-    }
-
-def compute_traits_order(slink_data, neworder: tuple = tuple()):
-    """
-    Compute the order of the traits for clustering from `slink_data`.
-
-    This function tries to reproduce the creation and update of the `neworder`
-    variable in
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
-    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
-    """
-    def __order_maker(norder, slnk_dt):
-        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
-            return norder + (slnk_dt[0], slnk_dt[1])
-
-        if isinstance(slnk_dt[0], int):
-            return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
-
-        if isinstance(slnk_dt[1], int):
-            return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
-
-        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
-
-    return __order_maker(neworder, slink_data)
-
-def retrieve_strains_and_values(orders, strainlist, traits_data_list):
-    """
-    Get the strains and their corresponding values from `strainlist` and
-    `traits_data_list`.
-
-    This migrates the code in
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
-    """
-    # This feels nasty! There's a lot of mutation of values here, that might
-    # indicate something untoward in the design of this function and its
-    # dependents  ==>  Review
-    strains = []
-    values = []
-    rets = []
-    for order in orders:
-        temp_val = traits_data_list[order]
-        for i, strain in enumerate(strainlist):
-            if temp_val[i] is not None:
-                strains.append(strain)
-                values.append(temp_val[i])
-        rets.append([order, strains[:], values[:]])
-        strains = []
-        values = []
-
-    return rets
-
-def nearest_marker_finder(genotype):
-    """
-    Returns a function to be used with `genotype` to compute the nearest marker
-    to the trait passed to the returned function.
-
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L425-434
-    """
-    def __compute_distances(chromo, trait):
-        loci = chromo.get("loci", None)
-        if not loci:
-            return None
-        return tuple(
-            {
-                "name": locus["name"],
-                "distance": abs(locus["Mb"] - trait["mb"])
-            } for locus in loci)
-
-    def __finder(trait):
-        _chrs = tuple(
-            _chr for _chr in genotype["chromosomes"]
-            if str(_chr["name"]) == str(trait["chr"]))
-        if len(_chrs) == 0:
-            return None
-        distances = tuple(
-            distance for dists in
-            filter(
-                lambda x: x is not None,
-                (__compute_distances(_chr, trait) for _chr in _chrs))
-            for distance in dists)
-        nearest = min(distances, key=lambda d: d["distance"])
-        return nearest["name"]
-    return __finder
-
-def get_nearest_marker(traits_list, genotype):
-    """
-    Retrieves the nearest marker for each of the traits in the list.
-
-    DESCRIPTION:
-    This migrates the code in
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
-    """
-    if not genotype["Mbmap"]:
-        return [None] * len(trait_list)
-
-    marker_finder = nearest_marker_finder(genotype)
-    return [marker_finder(trait) for trait in traits_list]
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
new file mode 100644
index 0000000..198fb45
--- /dev/null
+++ b/gn3/heatmaps.py
@@ -0,0 +1,302 @@
+"""
+This module will contain functions to be used in computation of the data used to
+generate various kinds of heatmaps.
+"""
+
+from functools import reduce
+from typing import Any, Dict, Sequence
+from gn3.computations.slink import slink
+from gn3.computations.qtlreaper import generate_traits_file
+from gn3.computations.correlations2 import compute_correlation
+from gn3.db.genotypes import build_genotype_file, load_genotype_samples
+from gn3.db.traits import (
+    retrieve_trait_data,
+    retrieve_trait_info,
+    generate_traits_filename)
+
+def export_trait_data(
+        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+        var_exists: bool = False, n_exists: bool = False):
+    """
+    Export data according to `strainlist`. Mostly used in calculating
+    correlations.
+
+    DESCRIPTION:
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+    PARAMETERS
+    trait: (dict)
+      The dictionary of key-value pairs representing a trait
+    strainlist: (list)
+      A list of strain names
+    dtype: (str)
+      ... verify what this is ...
+    var_exists: (bool)
+      A flag indicating existence of variance
+    n_exists: (bool)
+      A flag indicating existence of ndata
+    """
+    def __export_all_types(tdata, strain):
+        sample_data = []
+        if tdata[strain]["value"]:
+            sample_data.append(tdata[strain]["value"])
+            if var_exists:
+                if tdata[strain]["variance"]:
+                    sample_data.append(tdata[strain]["variance"])
+                else:
+                    sample_data.append(None)
+            if n_exists:
+                if tdata[strain]["ndata"]:
+                    sample_data.append(tdata[strain]["ndata"])
+                else:
+                    sample_data.append(None)
+        else:
+            if var_exists and n_exists:
+                sample_data += [None, None, None]
+            elif var_exists or n_exists:
+                sample_data += [None, None]
+            else:
+                sample_data.append(None)
+
+        return tuple(sample_data)
+
+    def __exporter(accumulator, strain):
+        # pylint: disable=[R0911]
+        if strain in trait_data["data"]:
+            if dtype == "val":
+                return accumulator + (trait_data["data"][strain]["value"], )
+            if dtype == "var":
+                return accumulator + (trait_data["data"][strain]["variance"], )
+            if dtype == "N":
+                return accumulator + (trait_data["data"][strain]["ndata"], )
+            if dtype == "all":
+                return accumulator + __export_all_types(trait_data["data"], strain)
+            raise KeyError("Type `%s` is incorrect" % dtype)
+        if var_exists and n_exists:
+            return accumulator + (None, None, None)
+        if var_exists or n_exists:
+            return accumulator + (None, None)
+        return accumulator + (None,)
+
+    return reduce(__exporter, strainlist, tuple())
+
+def trait_display_name(trait: Dict):
+    """
+    Given a trait, return a name to use to display the trait on a heatmap.
+
+    DESCRIPTION
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
+    """
+    if trait.get("db", None) and trait.get("trait_name", None):
+        if trait["db"]["dataset_type"] == "Temp":
+            desc = trait["description"]
+            if desc.find("PCA") >= 0:
+                return "%s::%s" % (
+                    trait["db"]["displayname"],
+                    desc[desc.rindex(':')+1:].strip())
+            return "%s::%s" % (
+                trait["db"]["displayname"],
+                desc[:desc.index('entered')].strip())
+        prefix = "%s::%s" % (
+            trait["db"]["dataset_name"], trait["trait_name"])
+        if trait["cellid"]:
+            return "%s::%s" % (prefix, trait["cellid"])
+        return prefix
+    return trait["description"]
+
+def cluster_traits(traits_data_list: Sequence[Dict]):
+    """
+    Clusters the trait values.
+
+    DESCRIPTION
+    Attempts to replicate the clustering of the traits, as done at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
+    """
+    def __compute_corr(tdata_i, tdata_j):
+        if tdata_i[0] == tdata_j[0]:
+            return 0.0
+        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+        corr = corr_vals[0]
+        if (1 - corr) < 0:
+            return 0.0
+        return 1 - corr
+
+    def __cluster(tdata_i):
+        return tuple(
+            __compute_corr(tdata_i, tdata_j)
+            for tdata_j in enumerate(traits_data_list))
+
+    return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
+
+def heatmap_data(traits_names, conn: Any):
+    """
+    heatmap function
+
+    DESCRIPTION
+    This function is an attempt to reproduce the initialisation at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
+    and also the clustering and slink computations at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
+    with the help of the `gn3.computations.heatmap.cluster_traits` function.
+
+    It does not try to actually draw the heatmap image.
+
+    PARAMETERS:
+    TODO: Elaborate on the parameters here...
+    """
+    threshold = 0 # webqtlConfig.PUBLICTHRESH
+    def __retrieve_traitlist_and_datalist(threshold, fullname):
+        trait = retrieve_trait_info(threshold, fullname, conn)
+        return (trait, retrieve_trait_data(trait, conn))
+
+    traits_details = [
+        __retrieve_traitlist_and_datalist(threshold, fullname)
+        for fullname in traits_names]
+    traits_list = tuple(x[0] for x in traits_details)
+    traits_data_list = [x[1] for x in traits_details]
+    genotype_filename = build_genotype_file(traits_list[0]["riset"])
+    strainlist = load_genotype_samples(genotype_filename)
+    exported_traits_data_list = tuple(
+        export_trait_data(td, strainlist) for td in traits_data_list)
+    slink_data = slink(cluster_traits(exported_traits_data_list))
+    ordering_data = compute_heatmap_order(slink_data)
+    strains_and_values = retrieve_strains_and_values(
+        ordering_data, strainlist, exported_traits_data_list)
+    strains_values = strains_and_values[0][1]
+    trait_values = [t[2] for t in strains_and_values]
+    traits_filename = generate_traits_filename()
+    generate_traits_file(strains_values, trait_values, traits_filename)
+
+    return {
+        "slink_data": slink_data,
+        "ordering_data": ordering_data,
+        "strainlist": strainlist,
+        "genotype_filename": genotype_filename,
+        "traits_list": traits_list,
+        "traits_data_list": traits_data_list,
+        "exported_traits_data_list": exported_traits_data_list,
+        "traits_filename": traits_filename
+    }
+
+def compute_traits_order(slink_data, neworder: tuple = tuple()):
+    """
+    Compute the order of the traits for clustering from `slink_data`.
+
+    This function tries to reproduce the creation and update of the `neworder`
+    variable in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+    and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+    """
+    def __order_maker(norder, slnk_dt):
+        if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+            return norder + (slnk_dt[0], slnk_dt[1])
+
+        if isinstance(slnk_dt[0], int):
+            return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
+
+        if isinstance(slnk_dt[1], int):
+            return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
+
+        return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+    return __order_maker(neworder, slink_data)
+
+def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+    """
+    Get the strains and their corresponding values from `strainlist` and
+    `traits_data_list`.
+
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+    """
+    # This feels nasty! There's a lot of mutation of values here, that might
+    # indicate something untoward in the design of this function and its
+    # dependents  ==>  Review
+    strains = []
+    values = []
+    rets = []
+    for order in orders:
+        temp_val = traits_data_list[order]
+        for i, strain in enumerate(strainlist):
+            if temp_val[i] is not None:
+                strains.append(strain)
+                values.append(temp_val[i])
+        rets.append([order, strains[:], values[:]])
+        strains = []
+        values = []
+
+    return rets
+
+def nearest_marker_finder(genotype):
+    """
+    Returns a function to be used with `genotype` to compute the nearest marker
+    to the trait passed to the returned function.
+
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L425-434
+    """
+    def __compute_distances(chromo, trait):
+        loci = chromo.get("loci", None)
+        if not loci:
+            return None
+        return tuple(
+            {
+                "name": locus["name"],
+                "distance": abs(locus["Mb"] - trait["mb"])
+            } for locus in loci)
+
+    def __finder(trait):
+        _chrs = tuple(
+            _chr for _chr in genotype["chromosomes"]
+            if str(_chr["name"]) == str(trait["chr"]))
+        if len(_chrs) == 0:
+            return None
+        distances = tuple(
+            distance for dists in
+            filter(
+                lambda x: x is not None,
+                (__compute_distances(_chr, trait) for _chr in _chrs))
+            for distance in dists)
+        nearest = min(distances, key=lambda d: d["distance"])
+        return nearest["name"]
+    return __finder
+
+def get_nearest_marker(traits_list, genotype):
+    """
+    Retrieves the nearest marker for each of the traits in the list.
+
+    DESCRIPTION:
+    This migrates the code in
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
+    """
+    if not genotype["Mbmap"]:
+        return [None] * len(trait_list)
+
+    marker_finder = nearest_marker_finder(genotype)
+    return [marker_finder(trait) for trait in traits_list]
+
+# # Grey + Blue + Red
+# def generate_heatmap():
+#     cols = 20
+#     y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
+#     x_axis = heatmap_x_axis_names()
+#     data = generate_random_data(height=cols, width=len(x_axis))
+#     fig = px.imshow(
+#         data,
+#         x=x_axis,
+#         y=y_axis,
+#         width=500)
+#     fig.update_traces(xtype="array")
+#     fig.update_traces(ytype="array")
+#     # fig.update_traces(xgap=10)
+#     fig.update_xaxes(
+#         visible=True,
+#         title_text="Traits",
+#         title_font_size=16)
+#     fig.update_layout(
+#         coloraxis_colorscale=[
+#             [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
+#             [0.5, '#F5DE11'], [1.0, '#FF0D00']])
+#     fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
+#     return fig
diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
deleted file mode 100644
index 88f546d..0000000
--- a/gn3/heatmaps/heatmaps.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import random
-import plotly.express as px
-
-#### Remove these ####
-
-heatmap_dir = "heatmap_images"
-
-def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30):
-    """
-    This is mostly a utility function to be used to generate random data, useful
-    for development of the heatmap generation code, without access to the actual
-    database data.
-    """
-    return [[random.uniform(0,data_stop) for i in range(0, width)]
-            for j in range(0, height)]
-
-def generate_random_data2(data_stop: float = 2, width: int = 10, height: int = 30):
-    """
-    This is mostly a utility function to be used to generate random data, useful
-    for development of the heatmap generation code, without access to the actual
-    database data.
-    """
-    return [
-        [{
-            "value": item,
-            "category": random.choice(["C57BL/6J +", "DBA/2J +"])}
-         for item in axis]
-        for axis in generate_random_data(data_stop, width, height)]
-
-def heatmap_x_axis_names():
-    return [
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM2260338",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM3140576",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM5670577",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM2070121",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM103990541",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM1190722",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM6590722",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
-        "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
-#### END: Remove these ####
-
-# Grey + Blue + Red
-def generate_heatmap():
-    cols = 20
-    y_axis = (["%s"%x for x in range(1, cols+1)][:-1] + ["X"]) #replace last item with x for now
-    x_axis = heatmap_x_axis_names()
-    data = generate_random_data(height=cols, width=len(x_axis))
-    fig = px.imshow(
-        data,
-        x=x_axis,
-        y=y_axis,
-        width=500)
-    fig.update_traces(xtype="array")
-    fig.update_traces(ytype="array")
-    # fig.update_traces(xgap=10)
-    fig.update_xaxes(
-        visible=True,
-        title_text="Traits",
-        title_font_size=16)
-    fig.update_layout(
-        coloraxis_colorscale=[
-            [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
-            [0.5, '#F5DE11'], [1.0, '#FF0D00']])
-    fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
-    return fig
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
deleted file mode 100644
index 156af45..0000000
--- a/tests/unit/computations/test_heatmap.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""Module contains tests for gn3.computations.heatmap"""
-from unittest import TestCase
-from gn3.computations.heatmap import (
-    cluster_traits,
-    export_trait_data,
-    compute_traits_order,
-    retrieve_strains_and_values)
-
-strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {
-    "mysqlid": 36688172,
-    "data": {
-        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
-
-slinked = (
-    (((0, 2, 0.16381088984330505),
-      ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
-      0.20337048635536847),
-     9,
-     0.23451785425383564),
-    ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
-     4, 0.8968250491499363),
-    0.9313185954797953)
-
-class TestHeatmap(TestCase):
-    """Class for testing heatmap computation functions"""
-
-    def test_export_trait_data_dtype(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument
-        """
-        for dtype, expected in [
-                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", (None, None, None, None, None, None)],
-                ["N", (None, None, None, None, None, None)],
-                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
-            with self.subTest(dtype=dtype):
-                self.assertEqual(
-                    export_trait_data(trait_data, strainlist, dtype=dtype),
-                    expected)
-
-    def test_export_trait_data_dtype_all_flags(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument and the different flags set up
-        """
-        for dtype, vflag, nflag, expected in [
-                ["val", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", False, False, (None, None, None, None, None, None)],
-                ["var", False, True, (None, None, None, None, None, None)],
-                ["var", True, False, (None, None, None, None, None, None)],
-                ["var", True, True, (None, None, None, None, None, None)],
-                ["N", False, False, (None, None, None, None, None, None)],
-                ["N", False, True, (None, None, None, None, None, None)],
-                ["N", True, False, (None, None, None, None, None, None)],
-                ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, False,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, True,
-                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
-                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
-        ]:
-            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
-                self.assertEqual(
-                    export_trait_data(
-                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
-                        n_exists=nflag),
-                    expected)
-
-    def test_cluster_traits(self):
-        """
-        Test that the clustering is working as expected.
-        """
-        traits_data_list = [
-            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
-            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
-            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
-            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
-            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
-            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
-            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
-            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
-            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
-            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
-        self.assertEqual(
-            cluster_traits(traits_data_list),
-            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
-              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
-              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
-             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
-              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
-              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
-             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
-              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
-              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
-             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
-              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
-              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
-             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
-              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
-              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
-             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
-              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
-              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
-             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
-              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
-              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
-             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
-              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
-              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
-             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
-              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
-              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
-             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
-              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
-              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
-              0.0)))
-
-    def test_compute_heatmap_order(self):
-        """Test the orders."""
-        self.assertEqual(
-            compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
-
-    def test_retrieve_strains_and_values(self):
-        """Test retrieval of strains and values."""
-        for orders, slist, tdata, expected in [
-                [
-                    [2],
-                    ["s1", "s2", "s3", "s4"],
-                    [[2, 9, 6, None, 4],
-                     [7, 5, None, None, 4],
-                     [9, None, 5, 4, 7],
-                     [6, None, None, 4, None]],
-                    [[2, ["s1", "s3", "s4"], [9, 5, 4]]]
-                ],
-                [
-                    [3],
-                    ["s1", "s2", "s3", "s4", "s5"],
-                    [[2, 9, 6, None, 4],
-                     [7, 5, None, None, 4],
-                     [9, None, 5, 4, 7],
-                     [6, None, None, 4, None]],
-                    [[3, ["s1", "s4"], [6, 4]]]
-                ]]:
-            with self.subTest(strainlist=slist, traitdata=tdata):
-                self.assertEqual(
-                    retrieve_strains_and_values(orders, slist, tdata), expected)
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
new file mode 100644
index 0000000..265d5a8
--- /dev/null
+++ b/tests/unit/test_heatmaps.py
@@ -0,0 +1,187 @@
+"""Module contains tests for gn3.heatmaps.heatmaps"""
+from unittest import TestCase
+from gn3.heatmaps import (
+    cluster_traits,
+    export_trait_data,
+    compute_traits_order,
+    retrieve_strains_and_values)
+
+strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
+slinked = (
+    (((0, 2, 0.16381088984330505),
+      ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
+      0.20337048635536847),
+     9,
+     0.23451785425383564),
+    ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
+     4, 0.8968250491499363),
+    0.9313185954797953)
+
+class TestHeatmap(TestCase):
+    """Class for testing heatmap computation functions"""
+
+    def test_export_trait_data_dtype(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument
+        """
+        for dtype, expected in [
+                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", (None, None, None, None, None, None)],
+                ["N", (None, None, None, None, None, None)],
+                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+            with self.subTest(dtype=dtype):
+                self.assertEqual(
+                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    expected)
+
+    def test_export_trait_data_dtype_all_flags(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument and the different flags set up
+        """
+        for dtype, vflag, nflag, expected in [
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", False, False, (None, None, None, None, None, None)],
+                ["var", False, True, (None, None, None, None, None, None)],
+                ["var", True, False, (None, None, None, None, None, None)],
+                ["var", True, True, (None, None, None, None, None, None)],
+                ["N", False, False, (None, None, None, None, None, None)],
+                ["N", False, True, (None, None, None, None, None, None)],
+                ["N", True, False, (None, None, None, None, None, None)],
+                ["N", True, True, (None, None, None, None, None, None)],
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+        ]:
+            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+                self.assertEqual(
+                    export_trait_data(
+                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        n_exists=nflag),
+                    expected)
+
+    def test_cluster_traits(self):
+        """
+        Test that the clustering is working as expected.
+        """
+        traits_data_list = [
+            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
+            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
+            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
+            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
+            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
+            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
+            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
+            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
+            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
+        self.assertEqual(
+            cluster_traits(traits_data_list),
+            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
+              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
+              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
+             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
+              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
+              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
+             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
+              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
+              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
+             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
+              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
+              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
+             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
+              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
+              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
+             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
+              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
+              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
+             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
+              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
+              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
+             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
+              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
+              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
+             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
+              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
+              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
+             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
+              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
+              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
+              0.0)))
+
+    def test_compute_heatmap_order(self):
+        """Test the orders."""
+        self.assertEqual(
+            compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
+
+    def test_retrieve_strains_and_values(self):
+        """Test retrieval of strains and values."""
+        for orders, slist, tdata, expected in [
+                [
+                    [2],
+                    ["s1", "s2", "s3", "s4"],
+                    [[2, 9, 6, None, 4],
+                     [7, 5, None, None, 4],
+                     [9, None, 5, 4, 7],
+                     [6, None, None, 4, None]],
+                    [[2, ["s1", "s3", "s4"], [9, 5, 4]]]
+                ],
+                [
+                    [3],
+                    ["s1", "s2", "s3", "s4", "s5"],
+                    [[2, 9, 6, None, 4],
+                     [7, 5, None, None, 4],
+                     [9, None, 5, 4, 7],
+                     [6, None, None, 4, None]],
+                    [[3, ["s1", "s4"], [6, 4]]]
+                ]]:
+            with self.subTest(strainlist=slist, traitdata=tdata):
+                self.assertEqual(
+                    retrieve_strains_and_values(orders, slist, tdata), expected)
-- 
cgit v1.2.3


From 1e2357049adc72808fbf8eaac3da9411d3c78c66 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 17 Sep 2021 11:20:16 +0300
Subject: Fix a number of linting issues

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
---
 gn3/computations/qtlreaper.py             |  7 ++--
 gn3/db/genotypes.py                       |  2 +-
 gn3/heatmaps.py                           | 54 ++++++++++++-------------------
 tests/unit/computations/test_qtlreaper.py |  3 +-
 tests/unit/test_heatmaps.py               |  6 ++--
 5 files changed, 32 insertions(+), 40 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5180853..377db9b 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -110,9 +110,10 @@ def organise_reaper_main_results(parsed_results):
         unique_chromosomes = {item["Chr"] for item in id_items}
         return {
             "ID": identifier,
-            "chromosomes": {_chr["Chr"]: _chr for _chr in [
-                __organise_by_chromosome(chromo, id_items)
-                for chromo in sorted(
+            "chromosomes": {
+                _chr["Chr"]: _chr for _chr in [
+                    __organise_by_chromosome(chromo, id_items)
+                    for chromo in sorted(
                         unique_chromosomes, key=chromosome_sorter_key_fn)]}}
 
     unique_ids = {res["ID"] for res in parsed_results}
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index b03d55c..9d052d9 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -174,7 +174,7 @@ def parse_genotype_file(filename: str, parlist: tuple = tuple()):
     geno_obj = dict(labels + header)
     markers = tuple(
         [parse_genotype_marker(line, geno_obj, parlist)
-        for line in data_lines[1:]])
+         for line in data_lines[1:]])
     chromosomes = tuple(
         dict(chromosome) for chromosome in
         build_genotype_chromosomes(geno_obj, markers))
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 2859dde..c4fc67d 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -3,13 +3,13 @@ This module will contain functions to be used in computation of the data used to
 generate various kinds of heatmaps.
 """
 
+from typing import Any, Dict, Sequence
 import numpy as np
 from functools import reduce
 from gn3.settings import TMPDIR
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
 from gn3.random import random_string
-from typing import Any, Dict, Sequence
 from gn3.computations.slink import slink
 from plotly.subplots import make_subplots
 from gn3.computations.correlations2 import compute_correlation
@@ -165,7 +165,7 @@ def build_heatmap(traits_names, conn: Any):
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    genotype = parse_genotype_file(genotype_filename)
+    # genotype = parse_genotype_file(genotype_filename)
     strains = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
         export_trait_data(td, strains) for td in traits_data_list]
@@ -183,22 +183,21 @@ def build_heatmap(traits_names, conn: Any):
         [t[2] for t in strains_and_values],
         traits_filename)
 
-    main_output, permutations_output = run_reaper(
+    main_output, _permutations_output = run_reaper(
         genotype_filename, traits_filename, separate_nperm_output=True)
 
     qtlresults = parse_reaper_main_results(main_output)
-    permudata = parse_reaper_permutation_results(permutations_output)
+    # permudata = parse_reaper_permutation_results(permutations_output)
     organised = organise_reaper_main_results(qtlresults)
 
     traits_ids = [# sort numerically, but retain the ids as strings
         str(i) for i in sorted({int(row["ID"]) for row in qtlresults})]
     chromosome_names = sorted(
-        {row["Chr"] for row in qtlresults}, key = chromosome_sorter_key_fn)
-    loci_names = sorted({row["Locus"] for row in qtlresults})
-    ordered_traits_names = {
-        res_id: trait for res_id, trait in
+        {row["Chr"] for row in qtlresults}, key=chromosome_sorter_key_fn)
+    # loci_names = sorted({row["Locus"] for row in qtlresults})
+    ordered_traits_names = dict(
         zip(traits_ids,
-            [traits[idx]["trait_fullname"] for idx in traits_order])}
+            [traits[idx]["trait_fullname"] for idx in traits_order]))
 
     return generate_clustered_heatmap(
         process_traits_data_for_heatmap(
@@ -207,22 +206,11 @@ def build_heatmap(traits_names, conn: Any):
         "single_heatmap_{}".format(random_string(10)),
         y_axis=tuple(
             ordered_traits_names[traits_ids[order]]
-                for order in traits_order),
+            for order in traits_order),
         y_label="Traits",
-        x_axis=[chromo for chromo in chromosome_names],
+        x_axis=chromosome_names,
         x_label="Chromosomes")
 
-    return {
-        "slink_data": slink_data,
-        "ordering_data": ordering_data,
-        "strainlist": strainlist,
-        "genotype_filename": genotype_filename,
-        "traits_list": traits_list,
-        "traits_data_list": traits_data_list,
-        "exported_traits_data_list": exported_traits_data_list,
-        "traits_filename": traits_filename
-    }
-
 def compute_traits_order(slink_data, neworder: tuple = tuple()):
     """
     Compute the order of the traits for clustering from `slink_data`.
@@ -314,7 +302,7 @@ def get_nearest_marker(traits_list, genotype):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
     """
     if not genotype["Mbmap"]:
-        return [None] * len(trait_list)
+        return [None] * len(traits_list)
 
     marker_finder = nearest_marker_finder(genotype)
     return [marker_finder(trait) for trait in traits_list]
@@ -340,10 +328,10 @@ def process_traits_data_for_heatmap(data, trait_names, chromosome_names):
     return hdata
 
 def generate_clustered_heatmap(
-        data, clustering_data, image_filename_prefix, x_axis = None,
-        x_label: str = "", y_axis = None, y_label: str = "",
+        data, clustering_data, image_filename_prefix, x_axis=None,
+        x_label: str = "", y_axis=None, y_label: str = "",
         output_dir: str = TMPDIR,
-        colorscale = (
+        colorscale=(
             (0.0, '#5D5D5D'), (0.4999999999999999, '#ABABAB'),
             (0.5, '#F5DE11'), (1.0, '#FF0D00'))):
     """
@@ -357,15 +345,15 @@ def generate_clustered_heatmap(
         shared_yaxes="rows",
         horizontal_spacing=0.001,
         subplot_titles=["distance"] + x_axis,
-        figure = ff.create_dendrogram(
+        figure=ff.create_dendrogram(
             np.array(clustering_data), orientation="right", labels=y_axis))
     hms = [go.Heatmap(
         name=chromo,
-        y = y_axis,
-        z = data_array,
+        y=y_axis,
+        z=data_array,
         showscale=False) for chromo, data_array in zip(x_axis, data)]
-    for i, hm in enumerate(hms):
-        fig.add_trace(hm, row=1, col=(i + 2))
+    for i, heatmap in enumerate(hms):
+        fig.add_trace(heatmap, row=1, col=(i + 2))
 
     fig.update_layout(
         {
@@ -380,8 +368,8 @@ def generate_clustered_heatmap(
     x_axes_layouts = {
         "xaxis{}".format(i+1 if i > 0 else ""): {
             "mirror": False,
-            "showticklabels": True if i==0 else False,
-            "ticks": "outside" if i==0 else ""
+            "showticklabels": True if i == 0 else False,
+            "ticks": "outside" if i == 0 else ""
         }
         for i in range(num_cols)}
 
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index 1d67827..d420470 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -77,6 +77,7 @@ class TestQTLReaper(TestCase):
              5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
 
     def test_organise_reaper_main_results(self):
+        """Check that results are organised correctly."""
         self.assertEqual(
             organise_reaper_main_results([
                 {
@@ -135,7 +136,7 @@ class TestQTLReaper(TestCase):
                         1: {"Chr": 1,
                             "loci": [
                                 {
-                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                 },
                                 {
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index f3a81c5..c0a496b 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -189,6 +189,7 @@ class TestHeatmap(TestCase):
                     retrieve_strains_and_values(orders, slist, tdata), expected)
 
     def test_get_lrs_from_chr(self):
+        """Check that function gets correct LRS values"""
         for trait, chromosome, expected in [
                 [{"chromosomes": {}}, 3, [None]],
                 [{"chromosomes": {3: {"loci": [
@@ -202,6 +203,7 @@ class TestHeatmap(TestCase):
                 self.assertEqual(get_lrs_from_chr(trait, chromosome), expected)
 
     def test_process_traits_data_for_heatmap(self):
+        """Check for correct processing of data for heatmap generation."""
         self.assertEqual(
             process_traits_data_for_heatmap(
                 {"1": {
@@ -210,7 +212,7 @@ class TestHeatmap(TestCase):
                         1: {"Chr": 1,
                             "loci": [
                                 {
-                                    "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                 },
                                 {
@@ -257,7 +259,7 @@ class TestHeatmap(TestCase):
                          1: {"Chr": 1,
                              "loci": [
                                  {
-                                     "Locus": "rs31443144",  "cM": 1.500, "Mb": 3.010,
+                                     "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
                                      "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
                                  },
                                  {
-- 
cgit v1.2.3


From 95c5c0e73bffbf0287a17309e703063ee54d25ba Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 23 Sep 2021 03:45:19 +0300
Subject: Refactor: Move common sample data to separate file

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Move common sample test data into a separate file where it can be imported
  from, to prevent pylint error R0801 which proved tricky to silence in any
  other way.
---
 tests/unit/computations/test_qtlreaper.py |  68 ++++--------------
 tests/unit/db/test_traits.py              |  15 ++--
 tests/unit/sample_test_data.py            | 111 ++++++++++++++++++++++++++++++
 tests/unit/test_heatmaps.py               |  96 +-------------------------
 4 files changed, 134 insertions(+), 156 deletions(-)
 create mode 100644 tests/unit/sample_test_data.py

(limited to 'tests/unit/computations')

diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index d420470..742d106 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -4,6 +4,7 @@ from gn3.computations.qtlreaper import (
     parse_reaper_main_results,
     organise_reaper_main_results,
     parse_reaper_permutation_results)
+from tests.unit.sample_test_data import organised_trait_1
 
 class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
@@ -81,99 +82,54 @@ class TestQTLReaper(TestCase):
         self.assertEqual(
             organise_reaper_main_results([
                 {
-                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "ID": "1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
                     "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "ID": "1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
                     "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "ID": "1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
                     "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "ID": "1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
                     "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "ID": "1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
                     "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "ID": "1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
                     "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "ID": "1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
                     "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
+                    "ID": "1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
                     "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
+                    "ID": "1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
                     "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
+                    "ID": "1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
                     "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 }
             ]),
-            {"T1": {"ID": "T1",
-                    "chromosomes": {
-                        1: {"Chr": 1,
-                            "loci": [
-                                {
-                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]},
-                        2: {"Chr": 2,
-                            "loci": [
-                                {
-                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]}}}})
+            organised_trait_1)
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index baa2af3..8af8e82 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -170,12 +170,15 @@ class TestTraitsDBFunctions(TestCase):
         db_mock = mock.MagicMock()
 
         STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
-        PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
-                                 "WHERE StrainId = %s AND Id = %s")
-        PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s "
-                               "WHERE StrainId = %s AND DataId = %s")
-        N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s "
-                             "WHERE StrainId = %s AND DataId = %s")
+        PUBLISH_DATA_SQL: str = (
+            "UPDATE PublishData SET value = %s "
+            "WHERE StrainId = %s AND Id = %s")
+        PUBLISH_SE_SQL: str = (
+            "UPDATE PublishSE SET error = %s "
+            "WHERE StrainId = %s AND DataId = %s")
+        N_STRAIN_SQL: str = (
+            "UPDATE NStrain SET count = %s "
+            "WHERE StrainId = %s AND DataId = %s")
 
         with db_mock.cursor() as cursor:
             type(cursor).rowcount = 1
diff --git a/tests/unit/sample_test_data.py b/tests/unit/sample_test_data.py
new file mode 100644
index 0000000..407d074
--- /dev/null
+++ b/tests/unit/sample_test_data.py
@@ -0,0 +1,111 @@
+"""
+This module holds a collection of sample data variables, used in more than one
+ test.
+
+This is mostly to avoid the `duplicate-code` pylint error that gets raised if
+the same data is defined in more than one file. It has been found that adding
+the `# pylint: disable=R0801` or `# pylint: disable=duplicate-code` to the top
+of the file seems to not work as expected.
+
+Adding these same declarations to .pylintrc is not an option, since that,
+seemingly, would deactivate the warnings for all code in the project: We do not
+want that.
+"""
+
+organised_trait_1 = {
+    "1": {
+        "ID": "1",
+        "chromosomes": {
+            1: {"Chr": 1,
+                "loci": [
+                    {
+                        "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    }]},
+            2: {"Chr": 2,
+                "loci": [
+                    {
+                        "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    }]}}}}
+
+organised_trait_2 = {
+    "2": {
+        "ID": "2",
+        "chromosomes": {
+            1: {"Chr": 1,
+                "loci": [
+                    {
+                        "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    }]},
+            2: {"Chr": 2,
+                "loci": [
+                    {
+                        "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                        "LRS": 0.579, "Additive": -0.074, "pValue": 1.000
+                    }]}}}}
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index c0a496b..fd91cf9 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -7,6 +7,7 @@ from gn3.heatmaps import (
     compute_traits_order,
     retrieve_strains_and_values,
     process_traits_data_for_heatmap)
+from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
@@ -206,100 +207,7 @@ class TestHeatmap(TestCase):
         """Check for correct processing of data for heatmap generation."""
         self.assertEqual(
             process_traits_data_for_heatmap(
-                {"1": {
-                    "ID": "T1",
-                    "chromosomes": {
-                        1: {"Chr": 1,
-                            "loci": [
-                                {
-                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]},
-                        2: {"Chr": 2,
-                            "loci": [
-                                {
-                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]}}},
-                 "2": {
-                     "ID": "T1",
-                     "chromosomes": {
-                         1: {"Chr": 1,
-                             "loci": [
-                                 {
-                                     "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 }]},
-                         2: {"Chr": 2,
-                             "loci": [
-                                 {
-                                     "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                                     "LRS": 0.579, "Additive": -0.074, "pValue": 1.000
-                                 }]}}}},
+                {**organised_trait_1, **organised_trait_2},
                 ["2", "1"],
                 [1, 2]),
             [[[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
-- 
cgit v1.2.3


From 19783a18c2bc7941fc5980e593f19fb1d18c3623 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 27 Sep 2021 04:48:53 +0300
Subject: Update terminology: `strain` to `sample`

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Update the terminology used: use `sample` in place of `strain` according to
  Zachary's direction at
  https://github.com/genenetwork/genenetwork3/pull/37#issuecomment-926043306
---
 gn3/computations/parsers.py             | 10 ++---
 gn3/computations/qtlreaper.py           |  8 ++--
 gn3/db/genotypes.py                     |  8 ++--
 gn3/db/traits.py                        | 44 ++++++++++-----------
 gn3/heatmaps.py                         | 62 ++++++++++++++---------------
 tests/unit/computations/test_parsers.py |  4 +-
 tests/unit/test_heatmaps.py             | 70 ++++++++++++++++-----------------
 7 files changed, 103 insertions(+), 103 deletions(-)

(limited to 'tests/unit/computations')

diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
index 94387ff..1af35d6 100644
--- a/gn3/computations/parsers.py
+++ b/gn3/computations/parsers.py
@@ -14,7 +14,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
         'h': 0,
         'u': None,
     }
-    genotypes, strains = [], []
+    genotypes, samples = [], []
     with open(file_path, "r") as _genofile:
         for line in _genofile:
             line = line.strip()
@@ -22,8 +22,8 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
                 continue
             cells = line.split()
             if line.startswith("Chr"):
-                strains = cells[4:]
-                strains = [strain.lower() for strain in strains]
+                samples = cells[4:]
+                samples = [sample.lower() for sample in samples]
                 continue
             values = [__map.get(value.lower(), None) for value in cells[4:]]
             genotype = {
@@ -32,7 +32,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
                 "cm": cells[2],
                 "mb": cells[3],
                 "values":  values,
-                "dicvalues": dict(zip(strains, values)),
+                "dicvalues": dict(zip(samples, values)),
             }
             genotypes.append(genotype)
-        return strains, genotypes
+        return samples, genotypes
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 8b2893e..166d2dd 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -9,17 +9,17 @@ from typing import Union
 from gn3.random import random_string
 from gn3.settings import TMPDIR, REAPER_COMMAND
 
-def generate_traits_file(strains, trait_values, traits_filename):
+def generate_traits_file(samples, trait_values, traits_filename):
     """
     Generate a traits file for use with `qtlreaper`.
 
     PARAMETERS:
-    strains: A list of strains to use as the headers for the various columns.
-    trait_values: A list of lists of values for each trait and strain.
+    samples: A list of samples to use as the headers for the various columns.
+    trait_values: A list of lists of values for each trait and sample.
     traits_filename: The tab-separated value to put the values in for
         computation of QTLs.
     """
-    header = "Trait\t{}\n".format("\t".join(strains))
+    header = "Trait\t{}\n".format("\t".join(samples))
     data = (
         [header] +
         ["{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9987320..8f18cac 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -14,16 +14,16 @@ def build_genotype_file(
 
 def load_genotype_samples(genotype_filename: str, file_type: str = "geno"):
     """
-    Load sample of strains from genotype files.
+    Load sample of samples from genotype files.
 
     DESCRIPTION:
-    Traits can contain a varied number of strains, some of which do not exist in
+    Traits can contain a varied number of samples, some of which do not exist in
     certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure
-    to pick only those strains that exist in the genotype under consideration
+    to pick only those samples that exist in the genotype under consideration
     for the traits used in the computation.
 
     This function loads a list of samples from the genotype files for use in
-    filtering out unusable strains.
+    filtering out unusable samples.
 
 
     PARAMETERS:
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 4fc47c3..c9d05d7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -445,7 +445,7 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any):
             query,
             {"trait_name": trait_info["trait_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -484,7 +484,7 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any):
              "species_id": retrieve_species_id(
                  trait_info["db"]["riset"], conn)})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -515,7 +515,7 @@ def retrieve_publish_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -548,7 +548,7 @@ def retrieve_cellid_trait_data(trait_info: Dict, conn: Any):
              "trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -577,29 +577,29 @@ def retrieve_probeset_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_name": trait_info["db"]["dataset_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
-def with_strainlist_data_setup(strainlist: Sequence[str]):
+def with_samplelist_data_setup(samplelist: Sequence[str]):
     """
-    Build function that computes the trait data from provided list of strains.
+    Build function that computes the trait data from provided list of samples.
 
     PARAMETERS
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values, only if the strain is present
-      in the provided `strainlist` variable.
+      sample's value, variance and ndata values, only if the sample is present
+      in the provided `samplelist` variable.
     """
     def setup_fn(tdata):
-        if tdata["strain_name"] in strainlist:
+        if tdata["sample_name"] in samplelist:
             val = tdata["value"]
             if val is not None:
                 return {
-                    "strain_name": tdata["strain_name"],
+                    "sample_name": tdata["sample_name"],
                     "value": val,
                     "variance": tdata["se_error"],
                     "ndata": tdata.get("nstrain", None)
@@ -607,19 +607,19 @@ def with_strainlist_data_setup(strainlist: Sequence[str]):
         return None
     return setup_fn
 
-def without_strainlist_data_setup():
+def without_samplelist_data_setup():
     """
     Build function that computes the trait data.
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values.
+      sample's value, variance and ndata values.
     """
     def setup_fn(tdata):
         val = tdata["value"]
         if val is not None:
             return {
-                "strain_name": tdata["strain_name"],
+                "sample_name": tdata["sample_name"],
                 "value": val,
                 "variance": tdata["se_error"],
                 "ndata": tdata.get("nstrain", None)
@@ -627,7 +627,7 @@ def without_strainlist_data_setup():
         return None
     return setup_fn
 
-def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tuple()):
+def retrieve_trait_data(trait: dict, conn: Any, samplelist: Sequence[str] = tuple()):
     """
     Retrieve trait data
 
@@ -650,23 +650,23 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     if results:
         # do something with mysqlid
         mysqlid = results[0]["id"]
-        if strainlist:
+        if samplelist:
             data = [
                 item for item in
-                map(with_strainlist_data_setup(strainlist), results)
+                map(with_samplelist_data_setup(samplelist), results)
                 if item is not None]
         else:
             data = [
                 item for item in
-                map(without_strainlist_data_setup(), results)
+                map(without_samplelist_data_setup(), results)
                 if item is not None]
 
         return {
             "mysqlid": mysqlid,
             "data": dict(map(
                 lambda x: (
-                    x["strain_name"],
-                    {k:v for k, v in x.items() if x != "strain_name"}),
+                    x["sample_name"],
+                    {k:v for k, v in x.items() if x != "sample_name"}),
                 data))}
     return {}
 
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 45d0c22..b6fc6d3 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -27,10 +27,10 @@ from gn3.computations.qtlreaper import (
     organise_reaper_main_results)
 
 def export_trait_data(
-        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
         var_exists: bool = False, n_exists: bool = False):
     """
-    Export data according to `strainlist`. Mostly used in calculating
+    Export data according to `samplelist`. Mostly used in calculating
     correlations.
 
     DESCRIPTION:
@@ -40,8 +40,8 @@ def export_trait_data(
     PARAMETERS
     trait: (dict)
       The dictionary of key-value pairs representing a trait
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
     dtype: (str)
       ... verify what this is ...
     var_exists: (bool)
@@ -49,18 +49,18 @@ def export_trait_data(
     n_exists: (bool)
       A flag indicating existence of ndata
     """
-    def __export_all_types(tdata, strain):
+    def __export_all_types(tdata, sample):
         sample_data = []
-        if tdata[strain]["value"]:
-            sample_data.append(tdata[strain]["value"])
+        if tdata[sample]["value"]:
+            sample_data.append(tdata[sample]["value"])
             if var_exists:
-                if tdata[strain]["variance"]:
-                    sample_data.append(tdata[strain]["variance"])
+                if tdata[sample]["variance"]:
+                    sample_data.append(tdata[sample]["variance"])
                 else:
                     sample_data.append(None)
             if n_exists:
-                if tdata[strain]["ndata"]:
-                    sample_data.append(tdata[strain]["ndata"])
+                if tdata[sample]["ndata"]:
+                    sample_data.append(tdata[sample]["ndata"])
                 else:
                     sample_data.append(None)
         else:
@@ -73,17 +73,17 @@ def export_trait_data(
 
         return tuple(sample_data)
 
-    def __exporter(accumulator, strain):
+    def __exporter(accumulator, sample):
         # pylint: disable=[R0911]
-        if strain in trait_data["data"]:
+        if sample in trait_data["data"]:
             if dtype == "val":
-                return accumulator + (trait_data["data"][strain]["value"], )
+                return accumulator + (trait_data["data"][sample]["value"], )
             if dtype == "var":
-                return accumulator + (trait_data["data"][strain]["variance"], )
+                return accumulator + (trait_data["data"][sample]["variance"], )
             if dtype == "N":
-                return accumulator + (trait_data["data"][strain]["ndata"], )
+                return accumulator + (trait_data["data"][sample]["ndata"], )
             if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], strain)
+                return accumulator + __export_all_types(trait_data["data"], sample)
             raise KeyError("Type `%s` is incorrect" % dtype)
         if var_exists and n_exists:
             return accumulator + (None, None, None)
@@ -91,7 +91,7 @@ def export_trait_data(
             return accumulator + (None, None)
         return accumulator + (None,)
 
-    return reduce(__exporter, strainlist, tuple())
+    return reduce(__exporter, samplelist, tuple())
 
 def trait_display_name(trait: Dict):
     """
@@ -165,19 +165,19 @@ def build_heatmap(traits_names, conn: Any):
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
     genotype_filename = build_genotype_file(traits[0]["riset"])
-    strains = load_genotype_samples(genotype_filename)
+    samples = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
-        export_trait_data(td, strains) for td in traits_data_list]
+        export_trait_data(td, samples) for td in traits_data_list]
     clustered = cluster_traits(exported_traits_data_list)
     slinked = slink(clustered)
     traits_order = compute_traits_order(slinked)
-    strains_and_values = retrieve_strains_and_values(
-        traits_order, strains, exported_traits_data_list)
+    samples_and_values = retrieve_samples_and_values(
+        traits_order, samples, exported_traits_data_list)
     traits_filename = "{}/traits_test_file_{}.txt".format(
         TMPDIR, random_string(10))
     generate_traits_file(
-        strains_and_values[0][1],
-        [t[2] for t in strains_and_values],
+        samples_and_values[0][1],
+        [t[2] for t in samples_and_values],
         traits_filename)
 
     main_output, _permutations_output = run_reaper(
@@ -229,9 +229,9 @@ def compute_traits_order(slink_data, neworder: tuple = tuple()):
 
     return __order_maker(neworder, slink_data)
 
-def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+def retrieve_samples_and_values(orders, samplelist, traits_data_list):
     """
-    Get the strains and their corresponding values from `strainlist` and
+    Get the samples and their corresponding values from `samplelist` and
     `traits_data_list`.
 
     This migrates the code in
@@ -240,17 +240,17 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
     # This feels nasty! There's a lot of mutation of values here, that might
     # indicate something untoward in the design of this function and its
     # dependents  ==>  Review
-    strains = []
+    samples = []
     values = []
     rets = []
     for order in orders:
         temp_val = traits_data_list[order]
-        for i, strain in enumerate(strainlist):
+        for i, sample in enumerate(samplelist):
             if temp_val[i] is not None:
-                strains.append(strain)
+                samples.append(sample)
                 values.append(temp_val[i])
-        rets.append([order, strains[:], values[:]])
-        strains = []
+        rets.append([order, samples[:], values[:]])
+        samples = []
         values = []
 
     return rets
diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py
index 19c3067..b51b0bf 100644
--- a/tests/unit/computations/test_parsers.py
+++ b/tests/unit/computations/test_parsers.py
@@ -15,7 +15,7 @@ class TestParsers(unittest.TestCase):
 
     def test_parse_genofile_with_existing_file(self):
         """Test that a genotype file is parsed correctly"""
-        strains = ["bxd1", "bxd2"]
+        samples = ["bxd1", "bxd2"]
         genotypes = [
             {"chr": "1", "locus": "rs31443144",
              "cm": "1.50", "mb": "3.010274",
@@ -51,4 +51,4 @@ class TestParsers(unittest.TestCase):
             "../test_data/genotype.txt"
         ))
         self.assertEqual(parse_genofile(
-            test_genotype_file), (strains, genotypes))
+            test_genotype_file), (samples, genotypes))
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index fd91cf9..b54e2f3 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -5,41 +5,41 @@ from gn3.heatmaps import (
     get_lrs_from_chr,
     export_trait_data,
     compute_traits_order,
-    retrieve_strains_and_values,
+    retrieve_samples_and_values,
     process_traits_data_for_heatmap)
 from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
-strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
     "mysqlid": 36688172,
     "data": {
-        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 slinked = (
     (((0, 2, 0.16381088984330505),
@@ -66,7 +66,7 @@ class TestHeatmap(TestCase):
                 ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
             with self.subTest(dtype=dtype):
                 self.assertEqual(
-                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    export_trait_data(trait_data, samplelist, dtype=dtype),
                     expected)
 
     def test_export_trait_data_dtype_all_flags(self):
@@ -106,7 +106,7 @@ class TestHeatmap(TestCase):
             with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
                 self.assertEqual(
                     export_trait_data(
-                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
 
@@ -164,8 +164,8 @@ class TestHeatmap(TestCase):
         self.assertEqual(
             compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
 
-    def test_retrieve_strains_and_values(self):
-        """Test retrieval of strains and values."""
+    def test_retrieve_samples_and_values(self):
+        """Test retrieval of samples and values."""
         for orders, slist, tdata, expected in [
                 [
                     [2],
@@ -185,9 +185,9 @@ class TestHeatmap(TestCase):
                      [6, None, None, 4, None]],
                     [[3, ["s1", "s4"], [6, 4]]]
                 ]]:
-            with self.subTest(strainlist=slist, traitdata=tdata):
+            with self.subTest(samplelist=slist, traitdata=tdata):
                 self.assertEqual(
-                    retrieve_strains_and_values(orders, slist, tdata), expected)
+                    retrieve_samples_and_values(orders, slist, tdata), expected)
 
     def test_get_lrs_from_chr(self):
         """Check that function gets correct LRS values"""
-- 
cgit v1.2.3