From 3d1595d1fcc5085621ef8ce41a007d283595c7c6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Thu, 12 Aug 2021 17:21:44 +0300
Subject: Initialise heatmap generation module

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/heatmaps/heatmaps.py: Initialise the module with some code to be used to
  test out plotly features on the command-line.
* guix.scm: Add `python-plotly` and `python-pandas` as dependencies.
---
 gn3/heatmaps/heatmaps.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 gn3/heatmaps/heatmaps.py

(limited to 'gn3')

diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
new file mode 100644
index 0000000..3bf7917
--- /dev/null
+++ b/gn3/heatmaps/heatmaps.py
@@ -0,0 +1,54 @@
+import random
+import plotly.express as px
+
+#### Remove these ####
+
+heatmap_dir = "heatmap_images"
+
+def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30):
+    """
+    This is mostly a utility function to be used to generate random data, useful
+    for development of the heatmap generation code, without access to the actual
+    database data.
+    """
+    return [[random.uniform(0,data_stop) for i in range(0, width)]
+            for j in range(0, height)]
+
+def heatmap_x_axis_names():
+    return [
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM2260338",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM3140576",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM5670577",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM2070121",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM103990541",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM1190722",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM6590722",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
+        "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
+#### END: Remove these ####
+
+# Grey + Blue + Red
+def generate_heatmap():
+    rows = 20
+    data = generate_random_data(height=rows)
+    y = (["%s"%x for x in range(1, rows+1)][:-1] + ["X"]) #replace last item with x for now
+    fig = px.imshow(
+        data,
+        x=heatmap_x_axis_names(),
+        y=y,
+        width=500)
+    fig.update_traces(xtype="array")
+    fig.update_traces(ytype="array")
+    # fig.update_traces(xgap=10)
+    fig.update_xaxes(
+        visible=True,
+        title_text="Traits",
+        title_font_size=16)
+    fig.update_layout(
+        coloraxis_colorscale=[
+            [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
+            [0.5, '#F5DE11'], [1.0, '#FF0D00']])
+
+    fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
+    return fig
-- 
cgit 1.4.1


From 0fbbfa9181229a89da566adcf4bed24281da1e80 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 08:44:20 +0300
Subject: Fix errors: add in missing parenthesis

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Call the `cursor.fetchone()` function to get results. Without the
  parenthesis, the code was trying to use the function itself as the results,
  which was a bug, and would lead to failure.
---
 gn3/db/datasets.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'gn3')

diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index 53d6811..4a05499 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -25,7 +25,7 @@ def retrieve_probeset_trait_dataset_name(
         return dict(zip(
             ["dataset_id", "dataset_name", "dataset_fullname",
              "dataset_shortname", "dataset_datascale"],
-            cursor.fetchone))
+            cursor.fetchone()))
 
 def retrieve_publish_trait_dataset_name(
         threshold: int, name: str, connection: Any):
@@ -49,7 +49,7 @@ def retrieve_publish_trait_dataset_name(
         return dict(zip(
             ["dataset_id", "dataset_name", "dataset_fullname",
              "dataset_shortname"],
-            cursor.fetchone))
+            cursor.fetchone()))
 
 def retrieve_geno_trait_dataset_name(
         threshold: int, name: str, connection: Any):
@@ -73,7 +73,7 @@ def retrieve_geno_trait_dataset_name(
         return dict(zip(
             ["dataset_id", "dataset_name", "dataset_fullname",
              "dataset_shortname"],
-            cursor.fetchone))
+            cursor.fetchone()))
 
 def retrieve_temp_trait_dataset_name(
         threshold: int, name: str, connection: Any):
@@ -97,7 +97,7 @@ def retrieve_temp_trait_dataset_name(
         return dict(zip(
             ["dataset_id", "dataset_name", "dataset_fullname",
              "dataset_shortname"],
-            cursor.fetchone))
+            cursor.fetchone()))
 
 def retrieve_dataset_name(
         trait_type: str, threshold: int, trait_name: str, dataset_name: str,
-- 
cgit 1.4.1


From fc233942e9118a341001f3357a5d2b1c65187736 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 08:47:11 +0300
Subject: Add tests and fix errors caught with tests

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: fix errors
* tests/unit/computations/test_heatmap.py: new tests

  Add new tests with the expected source data format, and expected results.
  Fix all errors that were caught by running the tests
---
 gn3/computations/heatmap.py             | 18 +++++------
 tests/unit/computations/test_heatmap.py | 54 +++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 9 deletions(-)
 create mode 100644 tests/unit/computations/test_heatmap.py

(limited to 'gn3')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index a0e778a..8a86fe8 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -34,11 +34,11 @@ def export_trait_data(
     """
     def __export_all_types(tdata, strain):
         sample_data = []
-        if tdata[strain]["val"]:
-            sample_data.append(tdata[strain]["val"])
+        if tdata[strain]["value"]:
+            sample_data.append(tdata[strain]["value"])
             if var_exists:
-                if tdata[strain].var:
-                    sample_data.append(tdata[strain]["var"])
+                if tdata[strain]["variance"]:
+                    sample_data.append(tdata[strain]["variance"])
                 else:
                     sample_data.append(None)
             if n_exists:
@@ -58,15 +58,15 @@ def export_trait_data(
 
     def __exporter(accumulator, strain):
         # pylint: disable=[R0911]
-        if trait_data.has_key(strain):
+        if strain in trait_data["data"]:
             if dtype == "val":
-                return accumulator + (trait_data[strain]["val"], )
+                return accumulator + (trait_data["data"][strain]["value"], )
             if dtype == "var":
-                return accumulator + (trait_data[strain]["var"], )
+                return accumulator + (trait_data["data"][strain]["variance"], )
             if dtype == "N":
-                return trait_data[strain]["ndata"]
+                return accumulator + (trait_data["data"][strain]["ndata"], )
             if dtype == "all":
-                return accumulator + __export_all_types(trait_data, strain)
+                return accumulator + __export_all_types(trait_data["data"], strain)
             raise KeyError("Type `%s` is incorrect" % dtype)
         if var_exists and n_exists:
             return accumulator + (None, None, None)
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
new file mode 100644
index 0000000..78303ae
--- /dev/null
+++ b/tests/unit/computations/test_heatmap.py
@@ -0,0 +1,54 @@
+"""Module contains tests for gn3.computations.heatmap"""
+from unittest import TestCase
+from gn3.computations.heatmap import export_trait_data
+
+strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
+class TestHeatmap(TestCase):
+    """Class for testing heatmap computation functions"""
+
+    def test_export_trait_data_dtype(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument
+        """
+        for dtype, expected in [
+                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", (None, None, None, None, None, None)],
+                ["N", (None, None, None, None, None, None)],
+                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+            with self.subTest(dtype=dtype):
+                self.assertEqual(
+                    export_trait_data(trait_data, strainlist, dtype=dtype),
+                    expected)
+
+    def test_export_trait_data_dtype_all_flags(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument and the different flags set up
+        """
+        for dtype, vflag, nflag, expected in [
+                ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", False, False, (None, None, None, None, None, None)],
+                ["var", False, True, (None, None, None, None, None, None)],
+                ["var", True, False, (None, None, None, None, None, None)],
+                ["var", True, True, (None, None, None, None, None, None)],
+                ["N", False, False, (None, None, None, None, None, None)],
+                ["N", False, True, (None, None, None, None, None, None)],
+                ["N", True, False, (None, None, None, None, None, None)],
+                ["N", True, True, (None, None, None, None, None, None)],
+                ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
+                ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
+                ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+        ]:
+            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+                self.assertEqual(
+                    export_trait_data(
+                        trait_data, strainlist, dtype=dtype, var_exists=vflag,
+                        n_exists=nflag),
+                    expected)
-- 
cgit 1.4.1


From 45bc6cbbf1878c9271d410f37e24ad44f5100c2f Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 08:49:14 +0300
Subject: Provide top-level `riset` key-value pair

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Provide the expected, top-level `riset` key-value pair and eliminate the
  redundant key-value pair.
---
 gn3/db/traits.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'gn3')

diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 6ea24be..1031e44 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -418,9 +418,9 @@ def retrieve_trait_info(
         conn)
     if trait_info["haveinfo"]:
         return {
-            **trait_post_processing_functions_table[trait_dataset_type](trait_info),
-            "db": {**trait["db"], **trait_dataset},
-            "riset": trait_dataset["riset"]
+            **trait_post_processing_functions_table[trait_dataset_type](
+                {**trait_info, "riset": trait_dataset["riset"]}),
+            "db": {**trait["db"], **trait_dataset}
         }
     return trait_info
 
-- 
cgit 1.4.1


From d10ee60d2200eefb29a22b0a84cd19569235b354 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:05:03 +0300
Subject: Make child sequence a list

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Since the `slink` function assigns values to the `listcopy` variable and its
  children, this commit ensures that the sequence is a list to allow for the
  assignment.

  If the child-sequence is a tuple, that would lead to an exception.
---
 gn3/computations/slink.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gn3')

diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py
index 5953e6b..3d7a576 100644
--- a/gn3/computations/slink.py
+++ b/gn3/computations/slink.py
@@ -161,7 +161,7 @@ def slink(lists):
     try:
         size = len(lists)
         listindexcopy = list(range(size))
-        listscopy = [child[:] for child in lists]
+        listscopy = [list(child[:]) for child in lists]
         init_size = size
         candidate = []
         while init_size > 2:
-- 
cgit 1.4.1


From d54e2c4b48b24ebbccc8b2ae183fffd645e21344 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:42:20 +0300
Subject: Fix obvious linting errors

* Fix linting errors that do not change the function of the code.
---
 gn3/api/correlation.py           | 4 ++--
 gn3/api/general.py               | 3 ++-
 gn3/computations/correlations.py | 4 ++--
 wsgi.py                          | 6 ++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'gn3')

diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index a3e366e..46121f8 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -79,7 +79,7 @@ def compute_tissue_corr(corr_method="pearson"):
     target_tissues_dict = tissue_input_data["target_tissues_dict"]
 
     results = compute_tissue_correlation(primary_tissue_dict=primary_tissue_dict,
-                                             target_tissues_data=target_tissues_dict,
-                                             corr_method=corr_method)
+                                         target_tissues_data=target_tissues_dict,
+                                         corr_method=corr_method)
 
     return jsonify(results)
diff --git a/gn3/api/general.py b/gn3/api/general.py
index 86fb7b7..69ec343 100644
--- a/gn3/api/general.py
+++ b/gn3/api/general.py
@@ -13,7 +13,8 @@ general = Blueprint("general", __name__)
 
 @general.route("/version")
 def version():
-  return jsonify("1.0")
+    """Get API version."""
+    return jsonify("1.0")
 
 @general.route("/metadata/upload/", methods=["POST"],
                strict_slashes=False)
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 1fd3213..8d76c09 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -341,8 +341,8 @@ def compute_all_lit_correlation(conn, trait_lists: List,
 
 
 def compute_tissue_correlation(primary_tissue_dict: dict,
-                                   target_tissues_data: dict,
-                                   corr_method: str):
+                               target_tissues_data: dict,
+                               corr_method: str):
     """Function acts as an abstraction for tissue_correlation_for_trait\
     required input are target tissue object and primary tissue trait\
     target tissues data contains the trait_symbol_dict and symbol_tissue_vals
diff --git a/wsgi.py b/wsgi.py
index d30bc49..0fcb573 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -1,9 +1,11 @@
+"""
+WSGI application entry-point.
+"""
 # import main
+from gn3.app import create_app
 
 print("STARTING WSGI APP")
 
-from gn3.app import create_app
-
 app = create_app()
 
 if __name__ == "__main__":
-- 
cgit 1.4.1


From f8be3a85567cc17d50a01382eb10cb3b05436214 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Tue, 17 Aug 2021 11:43:32 +0300
Subject: Test the clustering

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/computations/heatmap.py: Fix clustering bugs
* tests/unit/computations/test_heatmap.py: Add new tests. Fix linting issues.

  Test and fix the clustering function.
---
 gn3/computations/heatmap.py             |  14 ++--
 tests/unit/computations/test_heatmap.py | 109 +++++++++++++++++++++++++++++---
 2 files changed, 106 insertions(+), 17 deletions(-)

(limited to 'gn3')

diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 8a86fe8..3c35029 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -110,13 +110,13 @@ def cluster_traits(traits_data_list: Sequence[Dict]):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
     """
     def __compute_corr(tdata_i, tdata_j):
-        if tdata_j[0] < tdata_i[0]:
-            corr_vals = compute_correlation(tdata_i, tdata_j)
-            corr = corr_vals[0]
-            if (1 - corr) < 0:
-                return 0.0
-            return 1 - corr
-        return 0.0
+        if tdata_i[0] == tdata_j[0]:
+            return 0.0
+        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+        corr = corr_vals[0]
+        if (1 - corr) < 0:
+            return 0.0
+        return 1 - corr
 
     def __cluster(tdata_i):
         return tuple(
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/computations/test_heatmap.py
index 78303ae..650cb45 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/computations/test_heatmap.py
@@ -1,9 +1,38 @@
 """Module contains tests for gn3.computations.heatmap"""
 from unittest import TestCase
-from gn3.computations.heatmap import export_trait_data
+from gn3.computations.heatmap import cluster_traits, export_trait_data
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {"mysqlid": 36688172, "data": {"B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
@@ -29,10 +58,14 @@ class TestHeatmap(TestCase):
         argument and the different flags set up
         """
         for dtype, vflag, nflag, expected in [
-                ["val", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
                 ["var", False, False, (None, None, None, None, None, None)],
                 ["var", False, True, (None, None, None, None, None, None)],
                 ["var", True, False, (None, None, None, None, None, None)],
@@ -41,10 +74,17 @@ class TestHeatmap(TestCase):
                 ["N", False, True, (None, None, None, None, None, None)],
                 ["N", True, False, (None, None, None, None, None, None)],
                 ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False, (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
-                ["all", True, False, (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, 8.30401, None, 7.80944, None)],
-                ["all", True, True, (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
         ]:
             with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
                 self.assertEqual(
@@ -52,3 +92,52 @@ class TestHeatmap(TestCase):
                         trait_data, strainlist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
+
+    def test_cluster_traits(self):
+        """
+        Test that the clustering is working as expected.
+        """
+        traits_data_list = [
+            (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+            (6.1427, 6.50588, 7.73705, 6.68328, 7.49293, 7.27398),
+            (8.4211, 8.30581, 9.24076, 8.51173, 9.18455, 8.36077),
+            (10.0904, 10.6509, 9.36716, 9.91202, 8.57444, 10.5731),
+            (10.188, 9.76652, 9.54813, 9.05074, 9.52319, 9.10505),
+            (6.74676, 7.01029, 7.54169, 6.48574, 7.01427, 7.26815),
+            (6.39359, 6.85321, 5.78337, 7.11141, 6.22101, 6.16544),
+            (6.84118, 7.08432, 7.59844, 7.08229, 7.26774, 7.24991),
+            (9.45215, 10.6943, 8.64719, 10.1592, 7.75044, 8.78615),
+            (7.04737, 6.87185, 7.58586, 6.92456, 6.84243, 7.36913)]
+        self.assertEqual(
+            cluster_traits(traits_data_list),
+            ((0.0, 0.20337048635536847, 0.16381088984330505, 1.7388553629398245,
+              1.5025235756329178, 0.6952839500255574, 1.271661230252733,
+              0.2100487290977544, 1.4699690641062024, 0.7934461515867415),
+             (0.20337048635536847, 0.0, 0.2198321044997198, 1.5753041735592204,
+              1.4815755944537086, 0.26087293140686374, 1.6939790104301427,
+              0.06024619831474998, 1.7430082449189215, 0.4497104244247795),
+             (0.16381088984330505, 0.2198321044997198, 0.0, 1.9073926868549234,
+              1.0396738891139845, 0.5278328671176757, 1.6275069061182947,
+              0.2636503792482082, 1.739617877037615, 0.7127042590637039),
+             (1.7388553629398245, 1.5753041735592204, 1.9073926868549234, 0.0,
+              0.9936846292920328, 1.1169999189889366, 0.6007483980555253,
+              1.430209221053372, 0.25879514152086425, 0.9313185954797953),
+             (1.5025235756329178, 1.4815755944537086, 1.0396738891139845,
+              0.9936846292920328, 0.0, 1.027827186339337, 1.1441743109173244,
+              1.4122477962364253, 0.8968250491499363, 1.1683723389247052),
+             (0.6952839500255574, 0.26087293140686374, 0.5278328671176757,
+              1.1169999189889366, 1.027827186339337, 0.0, 1.8420471110023269,
+              0.19179284676938602, 1.4875072385631605, 0.23451785425383564),
+             (1.271661230252733, 1.6939790104301427, 1.6275069061182947,
+              0.6007483980555253, 1.1441743109173244, 1.8420471110023269, 0.0,
+              1.6540234785929928, 0.2140799896286565, 1.7413442197913358),
+             (0.2100487290977544, 0.06024619831474998, 0.2636503792482082,
+              1.430209221053372, 1.4122477962364253, 0.19179284676938602,
+              1.6540234785929928, 0.0, 1.5225640692832796, 0.33370067057028485),
+             (1.4699690641062024, 1.7430082449189215, 1.739617877037615,
+              0.25879514152086425, 0.8968250491499363, 1.4875072385631605,
+              0.2140799896286565, 1.5225640692832796, 0.0, 1.3256191648260216),
+             (0.7934461515867415, 0.4497104244247795, 0.7127042590637039,
+              0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
+              1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
+              0.0)))
-- 
cgit 1.4.1


From c9ee473ff7797f6bbd7507eb55c772a3a646acee Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Fri, 20 Aug 2021 09:04:12 +0300
Subject: Minor correlation fixes (#36)

* fix key error for (*tissue_cor) tissue correlation

* update tests for tissue correlation

* rename speed_compute to fast_compute

* pep8 formatting---
 gn3/computations/correlations.py            | 15 +++++++--------
 tests/unit/computations/test_correlation.py |  8 ++++----
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'gn3')

diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 8d76c09..bb13ff1 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -124,9 +124,9 @@ def filter_shared_sample_keys(this_samplelist,
     return (this_vals, target_vals)
 
 
-def speed_compute_all_sample_correlation(this_trait,
-                                         target_dataset,
-                                         corr_method="pearson") -> List:
+def fast_compute_all_sample_correlation(this_trait,
+                                        target_dataset,
+                                        corr_method="pearson") -> List:
     """Given a trait data sample-list and target__datasets compute all sample
     correlation
     this functions uses multiprocessing if not use the normal fun
@@ -362,8 +362,7 @@ def compute_tissue_correlation(primary_tissue_dict: dict,
             target_tissues_values=target_tissue_vals,
             trait_id=trait_id,
             corr_method=corr_method)
-        tissue_result_dict = {trait_id: tissue_result}
-        tissues_results.append(tissue_result_dict)
+        tissues_results.append(tissue_result)
     return sorted(
         tissues_results,
         key=lambda trait_name: -abs(list(trait_name.values())[0]["tissue_corr"]))
@@ -386,9 +385,9 @@ def process_trait_symbol_dict(trait_symbol_dict, symbol_tissue_vals_dict) -> Lis
     return traits_tissue_vals
 
 
-def speed_compute_tissue_correlation(primary_tissue_dict: dict,
-                                     target_tissues_data: dict,
-                                     corr_method: str):
+def fast_compute_tissue_correlation(primary_tissue_dict: dict,
+                                    target_tissues_data: dict,
+                                    corr_method: str):
     """Experimental function that uses multiprocessing for computing tissue
     correlation
 
diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py
index f2d65bd..fc52ec1 100644
--- a/tests/unit/computations/test_correlation.py
+++ b/tests/unit/computations/test_correlation.py
@@ -406,10 +406,10 @@ class TestCorrelation(TestCase):
         target_tissue_data = {"trait_symbol_dict": target_trait_symbol,
                               "symbol_tissue_vals_dict": target_symbol_tissue_vals}
 
-        mock_tissue_corr.side_effect = [{"tissue_corr": -0.5, "tissue_p_val": 0.9,
-                                         "tissue_number": 3},
-                                        {"tissue_corr": 1.11, "tissue_p_val": 0.2,
-                                         "tissue_number": 3}]
+        mock_tissue_corr.side_effect = [{"1418702_a_at": {"tissue_corr": -0.5, "tissue_p_val": 0.9,
+                                                          "tissue_number": 3}},
+                                        {"1412_at": {"tissue_corr": 1.11, "tissue_p_val": 0.2,
+                                                     "tissue_number": 3}}]
 
         expected_results = [{"1412_at":
                              {"tissue_corr": 1.11, "tissue_p_val": 0.2, "tissue_number": 3}},
-- 
cgit 1.4.1