From 157df453cdb84591cb44af9f1d2677cd0b2c0380 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 12:17:11 +0300
Subject: Move 'export_trait_data' to 'gn3.db.traits' module

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/db/traits.py: Move function `export_trait_data` here
* gn3/heatmaps.py: Remove function `export_trait_data`
* tests/unit/db/test_traits.py: Move function `export_trait_data` tests here
* tests/unit/test_heatmaps.py: Remove function `export_trait_data` here

  Function `export_trait_data` more closely corresponds to the traits and is
  used in more than just the `gn3.heatmaps` module. This commit moves the
  relevant code over to the `gn3.db.traits` module and also moves the tests to
  the corresponding tests modules.
---
 gn3/db/traits.py             | 69 ++++++++++++++++++++++++++++++++++
 gn3/heatmaps.py              | 67 +--------------------------------
 tests/unit/db/test_traits.py | 89 ++++++++++++++++++++++++++++++++++++++++++++
 tests/unit/test_heatmaps.py  | 87 -------------------------------------------
 4 files changed, 159 insertions(+), 153 deletions(-)

diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index f2673c8..1e29aff 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,12 +1,81 @@
 """This class contains functions relating to trait data manipulation"""
 import os
+from functools import reduce
 from typing import Any, Dict, Union, Sequence
+
 from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
 
 
+def export_trait_data(
+        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
+        var_exists: bool = False, n_exists: bool = False):
+    """
+    Export data according to `samplelist`. Mostly used in calculating
+    correlations.
+
+    DESCRIPTION:
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+    PARAMETERS
+    trait: (dict)
+      The dictionary of key-value pairs representing a trait
+    samplelist: (list)
+      A list of sample names
+    dtype: (str)
+      ... verify what this is ...
+    var_exists: (bool)
+      A flag indicating existence of variance
+    n_exists: (bool)
+      A flag indicating existence of ndata
+    """
+    def __export_all_types(tdata, sample):
+        sample_data = []
+        if tdata[sample]["value"]:
+            sample_data.append(tdata[sample]["value"])
+            if var_exists:
+                if tdata[sample]["variance"]:
+                    sample_data.append(tdata[sample]["variance"])
+                else:
+                    sample_data.append(None)
+            if n_exists:
+                if tdata[sample]["ndata"]:
+                    sample_data.append(tdata[sample]["ndata"])
+                else:
+                    sample_data.append(None)
+        else:
+            if var_exists and n_exists:
+                sample_data += [None, None, None]
+            elif var_exists or n_exists:
+                sample_data += [None, None]
+            else:
+                sample_data.append(None)
+
+        return tuple(sample_data)
+
+    def __exporter(accumulator, sample):
+        # pylint: disable=[R0911]
+        if sample in trait_data["data"]:
+            if dtype == "val":
+                return accumulator + (trait_data["data"][sample]["value"], )
+            if dtype == "var":
+                return accumulator + (trait_data["data"][sample]["variance"], )
+            if dtype == "N":
+                return accumulator + (trait_data["data"][sample]["ndata"], )
+            if dtype == "all":
+                return accumulator + __export_all_types(trait_data["data"], sample)
+            raise KeyError("Type `%s` is incorrect" % dtype)
+        if var_exists and n_exists:
+            return accumulator + (None, None, None)
+        if var_exists or n_exists:
+            return accumulator + (None, None)
+        return accumulator + (None,)
+
+    return reduce(__exporter, samplelist, tuple())
+
 def get_trait_csv_sample_data(conn: Any,
                               trait_name: int, phenotype_id: int):
     """Fetch a trait and return it as a csv string"""
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index adbfbc6..3b94e88 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -14,6 +14,7 @@ from plotly.subplots import make_subplots # type: ignore
 from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.computations.slink import slink
+from gn3.db.traits import export_trait_data
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import (
     build_genotype_file, load_genotype_samples)
@@ -26,72 +27,6 @@ from gn3.computations.qtlreaper import (
     parse_reaper_main_results,
     organise_reaper_main_results)
 
-def export_trait_data(
-        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
-        var_exists: bool = False, n_exists: bool = False):
-    """
-    Export data according to `samplelist`. Mostly used in calculating
-    correlations.
-
-    DESCRIPTION:
-    Migrated from
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
-    PARAMETERS
-    trait: (dict)
-      The dictionary of key-value pairs representing a trait
-    samplelist: (list)
-      A list of sample names
-    dtype: (str)
-      ... verify what this is ...
-    var_exists: (bool)
-      A flag indicating existence of variance
-    n_exists: (bool)
-      A flag indicating existence of ndata
-    """
-    def __export_all_types(tdata, sample):
-        sample_data = []
-        if tdata[sample]["value"]:
-            sample_data.append(tdata[sample]["value"])
-            if var_exists:
-                if tdata[sample]["variance"]:
-                    sample_data.append(tdata[sample]["variance"])
-                else:
-                    sample_data.append(None)
-            if n_exists:
-                if tdata[sample]["ndata"]:
-                    sample_data.append(tdata[sample]["ndata"])
-                else:
-                    sample_data.append(None)
-        else:
-            if var_exists and n_exists:
-                sample_data += [None, None, None]
-            elif var_exists or n_exists:
-                sample_data += [None, None]
-            else:
-                sample_data.append(None)
-
-        return tuple(sample_data)
-
-    def __exporter(accumulator, sample):
-        # pylint: disable=[R0911]
-        if sample in trait_data["data"]:
-            if dtype == "val":
-                return accumulator + (trait_data["data"][sample]["value"], )
-            if dtype == "var":
-                return accumulator + (trait_data["data"][sample]["variance"], )
-            if dtype == "N":
-                return accumulator + (trait_data["data"][sample]["ndata"], )
-            if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], sample)
-            raise KeyError("Type `%s` is incorrect" % dtype)
-        if var_exists and n_exists:
-            return accumulator + (None, None, None)
-        if var_exists or n_exists:
-            return accumulator + (None, None)
-        return accumulator + (None,)
-
-    return reduce(__exporter, samplelist, tuple())
 
 def trait_display_name(trait: Dict):
     """
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index 8af8e82..0c4ef78 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -2,6 +2,7 @@
 from unittest import mock, TestCase
 from gn3.db.traits import (
     build_trait_name,
+    export_trait_data,
     set_haveinfo_field,
     update_sample_data,
     retrieve_trait_info,
@@ -12,6 +13,38 @@ from gn3.db.traits import (
     retrieve_publish_trait_info,
     retrieve_probeset_trait_info)
 
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
 class TestTraitsDBFunctions(TestCase):
     "Test cases for traits functions"
 
@@ -226,3 +259,59 @@ class TestTraitsDBFunctions(TestCase):
             with self.subTest(trait_info=trait_info, expected=expected):
                 self.assertEqual(
                     set_confidential_field(trait_type, trait_info), expected)
+
+    def test_export_trait_data_dtype(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument
+        """
+        for dtype, expected in [
+                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", (None, None, None, None, None, None)],
+                ["N", (None, None, None, None, None, None)],
+                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+            with self.subTest(dtype=dtype):
+                self.assertEqual(
+                    export_trait_data(trait_data, samplelist, dtype=dtype),
+                    expected)
+
+    def test_export_trait_data_dtype_all_flags(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument and the different flags set up
+        """
+        for dtype, vflag, nflag, expected in [
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", False, False, (None, None, None, None, None, None)],
+                ["var", False, True, (None, None, None, None, None, None)],
+                ["var", True, False, (None, None, None, None, None, None)],
+                ["var", True, True, (None, None, None, None, None, None)],
+                ["N", False, False, (None, None, None, None, None, None)],
+                ["N", False, True, (None, None, None, None, None, None)],
+                ["N", True, False, (None, None, None, None, None, None)],
+                ["N", True, True, (None, None, None, None, None, None)],
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+        ]:
+            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+                self.assertEqual(
+                    export_trait_data(
+                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
+                        n_exists=nflag),
+                    expected)
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index 7b66688..03fd4a6 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -4,43 +4,12 @@ from gn3.heatmaps import (
     cluster_traits,
     get_loci_names,
     get_lrs_from_chr,
-    export_trait_data,
     compute_traits_order,
     retrieve_samples_and_values,
     process_traits_data_for_heatmap)
 from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
 samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {
-    "mysqlid": 36688172,
-    "data": {
-        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 slinked = (
     (((0, 2, 0.16381088984330505),
@@ -55,62 +24,6 @@ slinked = (
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
 
-    def test_export_trait_data_dtype(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument
-        """
-        for dtype, expected in [
-                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", (None, None, None, None, None, None)],
-                ["N", (None, None, None, None, None, None)],
-                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
-            with self.subTest(dtype=dtype):
-                self.assertEqual(
-                    export_trait_data(trait_data, samplelist, dtype=dtype),
-                    expected)
-
-    def test_export_trait_data_dtype_all_flags(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument and the different flags set up
-        """
-        for dtype, vflag, nflag, expected in [
-                ["val", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", False, False, (None, None, None, None, None, None)],
-                ["var", False, True, (None, None, None, None, None, None)],
-                ["var", True, False, (None, None, None, None, None, None)],
-                ["var", True, True, (None, None, None, None, None, None)],
-                ["N", False, False, (None, None, None, None, None, None)],
-                ["N", False, True, (None, None, None, None, None, None)],
-                ["N", True, False, (None, None, None, None, None, None)],
-                ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, False,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, True,
-                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
-                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
-        ]:
-            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
-                self.assertEqual(
-                    export_trait_data(
-                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
-                        n_exists=nflag),
-                    expected)
-
     def test_cluster_traits(self):
         """
         Test that the clustering is working as expected.
-- 
cgit 1.4.1