From 27cca4c118cba6a5f8e8b03d152070f83a44a9e5 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 05:47:45 +0300
Subject: Migrate `export_informative` function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: Implement a mostly, bug-compatible
  `export_informative` function as part of migrating the partial correlations
  feature over to GN3 from GN1
* tests/unit/test_partial_correlations.py: Implement tests to ensure the code
  work in a similar manner as that one in GN1.
---
 tests/unit/test_partial_correlations.py | 92 +++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 tests/unit/test_partial_correlations.py

(limited to 'tests')

diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
new file mode 100644
index 0000000..6eea078
--- /dev/null
+++ b/tests/unit/test_partial_correlations.py
@@ -0,0 +1,92 @@
+"""Module contains tests for gn3.partial_correlations"""
+
+from unittest import TestCase
+from gn3.partial_correlations import export_informative
+
+class TestPartialCorrelations(TestCase):
+    """Class for testing partial correlations computation functions"""
+
+    def test_export_informative(self):
+        """Test that the function exports appropriate data."""
+        for trait_data, inc_var, expected in [
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+                    (None, None, None, None))],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": None, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample4"), (9, 8, 6),
+                    (None, None, None))],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, True, (tuple(), tuple(), tuple())],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": 0.657,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+                    (None, 0.657, None, None))]]:
+            with self.subTest(trait_data=trait_data):
+                self.assertEqual(
+                    export_informative(trait_data, inc_var), expected)
-- 
cgit 1.4.1


From 157df453cdb84591cb44af9f1d2677cd0b2c0380 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 12:17:11 +0300
Subject: Move 'export_trait_data' to 'gn3.db.traits' module

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/db/traits.py: Move function `export_trait_data` here
* gn3/heatmaps.py: Remove function `export_trait_data`
* tests/unit/db/test_traits.py: Move function `export_trait_data` tests here
* tests/unit/test_heatmaps.py: Remove function `export_trait_data` here

  Function `export_trait_data` more closely corresponds to the traits and is
  used in more than just the `gn3.heatmaps` module. This commit moves the
  relevant code over to the `gn3.db.traits` module and also moves the tests to
  the corresponding tests modules.
---
 gn3/db/traits.py             | 69 ++++++++++++++++++++++++++++++++++
 gn3/heatmaps.py              | 67 +--------------------------------
 tests/unit/db/test_traits.py | 89 ++++++++++++++++++++++++++++++++++++++++++++
 tests/unit/test_heatmaps.py  | 87 -------------------------------------------
 4 files changed, 159 insertions(+), 153 deletions(-)

(limited to 'tests')

diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index f2673c8..1e29aff 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,12 +1,81 @@
 """This class contains functions relating to trait data manipulation"""
 import os
+from functools import reduce
 from typing import Any, Dict, Union, Sequence
+
 from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
 
 
+def export_trait_data(
+        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
+        var_exists: bool = False, n_exists: bool = False):
+    """
+    Export data according to `samplelist`. Mostly used in calculating
+    correlations.
+
+    DESCRIPTION:
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+    PARAMETERS
+    trait: (dict)
+      The dictionary of key-value pairs representing a trait
+    samplelist: (list)
+      A list of sample names
+    dtype: (str)
+      ... verify what this is ...
+    var_exists: (bool)
+      A flag indicating existence of variance
+    n_exists: (bool)
+      A flag indicating existence of ndata
+    """
+    def __export_all_types(tdata, sample):
+        sample_data = []
+        if tdata[sample]["value"]:
+            sample_data.append(tdata[sample]["value"])
+            if var_exists:
+                if tdata[sample]["variance"]:
+                    sample_data.append(tdata[sample]["variance"])
+                else:
+                    sample_data.append(None)
+            if n_exists:
+                if tdata[sample]["ndata"]:
+                    sample_data.append(tdata[sample]["ndata"])
+                else:
+                    sample_data.append(None)
+        else:
+            if var_exists and n_exists:
+                sample_data += [None, None, None]
+            elif var_exists or n_exists:
+                sample_data += [None, None]
+            else:
+                sample_data.append(None)
+
+        return tuple(sample_data)
+
+    def __exporter(accumulator, sample):
+        # pylint: disable=[R0911]
+        if sample in trait_data["data"]:
+            if dtype == "val":
+                return accumulator + (trait_data["data"][sample]["value"], )
+            if dtype == "var":
+                return accumulator + (trait_data["data"][sample]["variance"], )
+            if dtype == "N":
+                return accumulator + (trait_data["data"][sample]["ndata"], )
+            if dtype == "all":
+                return accumulator + __export_all_types(trait_data["data"], sample)
+            raise KeyError("Type `%s` is incorrect" % dtype)
+        if var_exists and n_exists:
+            return accumulator + (None, None, None)
+        if var_exists or n_exists:
+            return accumulator + (None, None)
+        return accumulator + (None,)
+
+    return reduce(__exporter, samplelist, tuple())
+
 def get_trait_csv_sample_data(conn: Any,
                               trait_name: int, phenotype_id: int):
     """Fetch a trait and return it as a csv string"""
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index adbfbc6..3b94e88 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -14,6 +14,7 @@ from plotly.subplots import make_subplots # type: ignore
 from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.computations.slink import slink
+from gn3.db.traits import export_trait_data
 from gn3.computations.correlations2 import compute_correlation
 from gn3.db.genotypes import (
     build_genotype_file, load_genotype_samples)
@@ -26,72 +27,6 @@ from gn3.computations.qtlreaper import (
     parse_reaper_main_results,
     organise_reaper_main_results)
 
-def export_trait_data(
-        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
-        var_exists: bool = False, n_exists: bool = False):
-    """
-    Export data according to `samplelist`. Mostly used in calculating
-    correlations.
-
-    DESCRIPTION:
-    Migrated from
-    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
-    PARAMETERS
-    trait: (dict)
-      The dictionary of key-value pairs representing a trait
-    samplelist: (list)
-      A list of sample names
-    dtype: (str)
-      ... verify what this is ...
-    var_exists: (bool)
-      A flag indicating existence of variance
-    n_exists: (bool)
-      A flag indicating existence of ndata
-    """
-    def __export_all_types(tdata, sample):
-        sample_data = []
-        if tdata[sample]["value"]:
-            sample_data.append(tdata[sample]["value"])
-            if var_exists:
-                if tdata[sample]["variance"]:
-                    sample_data.append(tdata[sample]["variance"])
-                else:
-                    sample_data.append(None)
-            if n_exists:
-                if tdata[sample]["ndata"]:
-                    sample_data.append(tdata[sample]["ndata"])
-                else:
-                    sample_data.append(None)
-        else:
-            if var_exists and n_exists:
-                sample_data += [None, None, None]
-            elif var_exists or n_exists:
-                sample_data += [None, None]
-            else:
-                sample_data.append(None)
-
-        return tuple(sample_data)
-
-    def __exporter(accumulator, sample):
-        # pylint: disable=[R0911]
-        if sample in trait_data["data"]:
-            if dtype == "val":
-                return accumulator + (trait_data["data"][sample]["value"], )
-            if dtype == "var":
-                return accumulator + (trait_data["data"][sample]["variance"], )
-            if dtype == "N":
-                return accumulator + (trait_data["data"][sample]["ndata"], )
-            if dtype == "all":
-                return accumulator + __export_all_types(trait_data["data"], sample)
-            raise KeyError("Type `%s` is incorrect" % dtype)
-        if var_exists and n_exists:
-            return accumulator + (None, None, None)
-        if var_exists or n_exists:
-            return accumulator + (None, None)
-        return accumulator + (None,)
-
-    return reduce(__exporter, samplelist, tuple())
 
 def trait_display_name(trait: Dict):
     """
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index 8af8e82..0c4ef78 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -2,6 +2,7 @@
 from unittest import mock, TestCase
 from gn3.db.traits import (
     build_trait_name,
+    export_trait_data,
     set_haveinfo_field,
     update_sample_data,
     retrieve_trait_info,
@@ -12,6 +13,38 @@ from gn3.db.traits import (
     retrieve_publish_trait_info,
     retrieve_probeset_trait_info)
 
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {
+    "mysqlid": 36688172,
+    "data": {
+        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
 class TestTraitsDBFunctions(TestCase):
     "Test cases for traits functions"
 
@@ -226,3 +259,59 @@ class TestTraitsDBFunctions(TestCase):
             with self.subTest(trait_info=trait_info, expected=expected):
                 self.assertEqual(
                     set_confidential_field(trait_type, trait_info), expected)
+
+    def test_export_trait_data_dtype(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument
+        """
+        for dtype, expected in [
+                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", (None, None, None, None, None, None)],
+                ["N", (None, None, None, None, None, None)],
+                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+            with self.subTest(dtype=dtype):
+                self.assertEqual(
+                    export_trait_data(trait_data, samplelist, dtype=dtype),
+                    expected)
+
+    def test_export_trait_data_dtype_all_flags(self):
+        """
+        Test `export_trait_data` with different values for the `dtype` keyword
+        argument and the different flags set up
+        """
+        for dtype, vflag, nflag, expected in [
+                ["val", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", False, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["val", True, True,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["var", False, False, (None, None, None, None, None, None)],
+                ["var", False, True, (None, None, None, None, None, None)],
+                ["var", True, False, (None, None, None, None, None, None)],
+                ["var", True, True, (None, None, None, None, None, None)],
+                ["N", False, False, (None, None, None, None, None, None)],
+                ["N", False, True, (None, None, None, None, None, None)],
+                ["N", True, False, (None, None, None, None, None, None)],
+                ["N", True, True, (None, None, None, None, None, None)],
+                ["all", False, False,
+                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+                ["all", False, True,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, False,
+                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+                  8.30401, None, 7.80944, None)],
+                ["all", True, True,
+                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+        ]:
+            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+                self.assertEqual(
+                    export_trait_data(
+                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
+                        n_exists=nflag),
+                    expected)
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index 7b66688..03fd4a6 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -4,43 +4,12 @@ from gn3.heatmaps import (
     cluster_traits,
     get_loci_names,
     get_lrs_from_chr,
-    export_trait_data,
     compute_traits_order,
     retrieve_samples_and_values,
     process_traits_data_for_heatmap)
 from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
 samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {
-    "mysqlid": 36688172,
-    "data": {
-        "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
-        "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
-        "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
-        "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
-        "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
-        "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
-        "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
-        "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
-        "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
-        "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
-        "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
-        "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
-        "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
-        "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
-        "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
-        "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
-        "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
-        "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
-        "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
-        "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
-        "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
-        "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
-        "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
-        "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
-        "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
-        "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
-        "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
 
 slinked = (
     (((0, 2, 0.16381088984330505),
@@ -55,62 +24,6 @@ slinked = (
 class TestHeatmap(TestCase):
     """Class for testing heatmap computation functions"""
 
-    def test_export_trait_data_dtype(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument
-        """
-        for dtype, expected in [
-                ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", (None, None, None, None, None, None)],
-                ["N", (None, None, None, None, None, None)],
-                ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
-            with self.subTest(dtype=dtype):
-                self.assertEqual(
-                    export_trait_data(trait_data, samplelist, dtype=dtype),
-                    expected)
-
-    def test_export_trait_data_dtype_all_flags(self):
-        """
-        Test `export_trait_data` with different values for the `dtype` keyword
-        argument and the different flags set up
-        """
-        for dtype, vflag, nflag, expected in [
-                ["val", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", False, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["val", True, True,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["var", False, False, (None, None, None, None, None, None)],
-                ["var", False, True, (None, None, None, None, None, None)],
-                ["var", True, False, (None, None, None, None, None, None)],
-                ["var", True, True, (None, None, None, None, None, None)],
-                ["N", False, False, (None, None, None, None, None, None)],
-                ["N", False, True, (None, None, None, None, None, None)],
-                ["N", True, False, (None, None, None, None, None, None)],
-                ["N", True, True, (None, None, None, None, None, None)],
-                ["all", False, False,
-                 (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
-                ["all", False, True,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, False,
-                 (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
-                  8.30401, None, 7.80944, None)],
-                ["all", True, True,
-                 (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
-                  8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
-        ]:
-            with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
-                self.assertEqual(
-                    export_trait_data(
-                        trait_data, samplelist, dtype=dtype, var_exists=vflag,
-                        n_exists=nflag),
-                    expected)
-
     def test_cluster_traits(self):
         """
         Test that the clustering is working as expected.
-- 
cgit 1.4.1


From 94ca79045baf978d6aab964c7c70b84911c1124f Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 12:27:32 +0300
Subject: Move `export_informative` function to `gn3.db.traits` module

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/db/traits.py: Move `export_informative` function here
* gn3/partial_correlations.py: Remove `export_informative` function
* tests/unit/db/test_traits.py: Move `export_informative` function tests here
* tests/unit/test_partial_correlations.py: Remove `export_informative`
  function tests

  The `export_informative` function relates more to the traits than to the
  partial correlations, and could find use in more than just the partial
  correlations stuff. This commit moves the function to the more
  traits-specific `gn3.db.traits` module.
---
 gn3/db/traits.py                        | 24 +++++++++
 gn3/partial_correlations.py             | 24 ---------
 tests/unit/db/test_traits.py            | 86 ++++++++++++++++++++++++++++++++
 tests/unit/test_partial_correlations.py | 87 +--------------------------------
 4 files changed, 111 insertions(+), 110 deletions(-)

(limited to 'tests')

diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 1e29aff..1c6aaa7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -743,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR):
     """Generate a unique filename for use with generated traits files."""
     return "{}/traits_test_file_{}.txt".format(
         os.path.abspath(base_path), random_string(10))
+
+def export_informative(trait_data: dict, inc_var: bool = False) -> tuple:
+    """
+    Export informative strain
+
+    This is a migration of the `exportInformative` function in
+    web/webqtl/base/webqtlTrait.py module in GeneNetwork1.
+
+    There is a chance that the original implementation has a bug, especially
+    dealing with the `inc_var` value. It the `inc_var` value is meant to control
+    the inclusion of the `variance` value, then the current implementation, and
+    that one in GN1 have a bug.
+    """
+    def __exporter__(acc, data_item):
+        if not inc_var or data_item["variance"] is not None:
+            return (
+                acc[0] + (data_item["sample_name"],),
+                acc[1] + (data_item["value"],),
+                acc[2] + (data_item["variance"],))
+        return acc
+    return reduce(
+        __exporter__,
+        filter(lambda td: td["value"] is not None, trait_data["data"].values()),
+        (tuple(), tuple(), tuple()))
diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index 8c37886..df390ed 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -6,27 +6,3 @@ GeneNetwork1.
 """
 
 from functools import reduce
-
-def export_informative(trait_data: dict, inc_var: bool = False) -> tuple:
-    """
-    Export informative strain
-
-    This is a migration of the `exportInformative` function in
-    web/webqtl/base/webqtlTrait.py module in GeneNetwork1.
-
-    There is a chance that the original implementation has a bug, especially
-    dealing with the `inc_var` value. It the `inc_var` value is meant to control
-    the inclusion of the `variance` value, then the current implementation, and
-    that one in GN1 have a bug.
-    """
-    def __exporter__(acc, data_item):
-        if not inc_var or data_item["variance"] is not None:
-            return (
-                acc[0] + (data_item["sample_name"],),
-                acc[1] + (data_item["value"],),
-                acc[2] + (data_item["variance"],))
-        return acc
-    return reduce(
-        __exporter__,
-        filter(lambda td: td["value"] is not None, trait_data["data"].values()),
-        (tuple(), tuple(), tuple()))
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index 0c4ef78..67f0c6f 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -3,6 +3,7 @@ from unittest import mock, TestCase
 from gn3.db.traits import (
     build_trait_name,
     export_trait_data,
+    export_informative,
     set_haveinfo_field,
     update_sample_data,
     retrieve_trait_info,
@@ -315,3 +316,88 @@ class TestTraitsDBFunctions(TestCase):
                         trait_data, samplelist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
+
+    def test_export_informative(self):
+        """Test that the function exports appropriate data."""
+        for trait_data, inc_var, expected in [
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+                    (None, None, None, None))],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": None, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample4"), (9, 8, 6),
+                    (None, None, None))],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, True, (tuple(), tuple(), tuple())],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": 0.657,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+                    (None, 0.657, None, None))]]:
+            with self.subTest(trait_data=trait_data):
+                self.assertEqual(
+                    export_informative(trait_data, inc_var), expected)
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index 6eea078..f204d4f 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -1,92 +1,7 @@
 """Module contains tests for gn3.partial_correlations"""
 
 from unittest import TestCase
-from gn3.partial_correlations import export_informative
+
 
 class TestPartialCorrelations(TestCase):
     """Class for testing partial correlations computation functions"""
-
-    def test_export_informative(self):
-        """Test that the function exports appropriate data."""
-        for trait_data, inc_var, expected in [
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": 7, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, 0, (
-                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
-                    (None, None, None, None))],
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": None, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, 0, (
-                    ("sample1", "sample2", "sample4"), (9, 8, 6),
-                    (None, None, None))],
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": 7, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, True, (tuple(), tuple(), tuple())],
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": 0.657,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": 7, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, 0, (
-                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
-                    (None, 0.657, None, None))]]:
-            with self.subTest(trait_data=trait_data):
-                self.assertEqual(
-                    export_informative(trait_data, inc_var), expected)
-- 
cgit 1.4.1


From 1544776b072d7240773cf14d423078841e4c1a07 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 14:14:04 +0300
Subject: Implement `control_samples` function as is in GN1

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: Implement `control_samples` function
* tests/unit/test_partial_correlations.py: add tests for `control_samples`
  function

  Implement the function `control_samples` and make it mostly bug-compatible
  with the `web/webqtl/correlation/correlationFunction.controlStrain` function
  in GN1.

  This implementation in GN3 does not do any calls to the database. It will
  rely on other functions to provide the data from the database to it.
---
 gn3/partial_correlations.py             | 38 ++++++++++++++++
 tests/unit/test_partial_correlations.py | 80 +++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)

(limited to 'tests')

diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index df390ed..99521c6 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -5,4 +5,42 @@ It is an attempt to migrate over the partial correlations feature from
 GeneNetwork1.
 """
 
+from typing import Sequence
 from functools import reduce
+
+def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
+    """
+    Fetches data for the control traits.
+
+    This migrates `web/webqtl/correlation/correlationFunction.controlStrain` in
+    GN1, with a few modifications to the arguments passed in.
+
+    PARAMETERS:
+    controls: A map of sample names to trait data. Equivalent to the `cvals`
+        value in the corresponding source function in GN1.
+    sampleslist: A list of samples. Equivalent to `strainlst` in the
+        corresponding source function in GN1
+    """
+    def __process_control__(trait_data):
+        def __process_sample__(acc, sample):
+            if sample in trait_data["data"].keys():
+                sample_item = trait_data["data"][sample]
+                val = sample_item["value"]
+                if val is not None:
+                    return (
+                        acc[0] + (sample,),
+                        acc[1] + (val,),
+                        acc[2] + (sample_item["variance"],))
+            return acc
+        return reduce(
+            __process_sample__, sampleslist, (tuple(), tuple(), tuple()))
+
+    return reduce(
+        lambda acc, item: (
+            acc[0] + (item[0],),
+            acc[1] + (item[1],),
+            acc[2] + (item[2],),
+            acc[3] + (len(item[0]),),
+        ),
+        [__process_control__(trait_data) for trait_data in controls],
+        (tuple(), tuple(), tuple(), tuple()))
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index f204d4f..0083ef7 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -1,7 +1,87 @@
 """Module contains tests for gn3.partial_correlations"""
 
 from unittest import TestCase
+from gn3.partial_correlations import control_samples
 
+sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+control_traits = (
+    {
+        "mysqlid": 36688172,
+        "data": {
+            "B6cC3-1": {
+                "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+                "ndata": None},
+            "BXD1": {
+                "sample_name": "BXD1", "value": 7.77141, "variance": None,
+                "ndata": None},
+            "BXD12": {
+                "sample_name": "BXD12", "value": 8.39265, "variance": None,
+                "ndata": None},
+            "BXD16": {
+                "sample_name": "BXD16", "value": 8.17443, "variance": None,
+                "ndata": None},
+            "BXD19": {
+                "sample_name": "BXD19", "value": 8.30401, "variance": None,
+                "ndata": None},
+            "BXD2": {
+                "sample_name": "BXD2", "value": 7.80944, "variance": None,
+                "ndata": None}}},
+    {
+        "mysqlid": 36688172,
+        "data": {
+            "B6cC3-21": {
+                "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+                "ndata": None},
+            "BXD21": {
+                "sample_name": "BXD1", "value": 7.77141, "variance": None,
+                "ndata": None},
+            "BXD12": {
+                "sample_name": "BXD12", "value": 8.39265, "variance": None,
+                "ndata": None},
+            "BXD16": {
+                "sample_name": "BXD16", "value": 8.17443, "variance": None,
+                "ndata": None},
+            "BXD19": {
+                "sample_name": "BXD19", "value": 8.30401, "variance": None,
+                "ndata": None},
+            "BXD2": {
+                "sample_name": "BXD2", "value": 7.80944, "variance": None,
+                "ndata": None}}},
+    {
+        "mysqlid": 36688172,
+        "data": {
+            "B6cC3-1": {
+                "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+                "ndata": None},
+            "BXD1": {
+                "sample_name": "BXD1", "value": 7.77141, "variance": None,
+                "ndata": None},
+            "BXD12": {
+                "sample_name": "BXD12", "value": None, "variance": None,
+                "ndata": None},
+            "BXD16": {
+                "sample_name": "BXD16", "value": None, "variance": None,
+                "ndata": None},
+            "BXD19": {
+                "sample_name": "BXD19", "value": None, "variance": None,
+                "ndata": None},
+            "BXD2": {
+                "sample_name": "BXD2", "value": 7.80944, "variance": None,
+                "ndata": None}}})
 
 class TestPartialCorrelations(TestCase):
     """Class for testing partial correlations computation functions"""
+
+    def test_control_samples(self):
+        """Test that the control_samples works as expected."""
+        self.assertEqual(
+            control_samples(control_traits, sampleslist),
+            ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"),
+              ("BXD12", "BXD16", "BXD19", "BXD2"),
+              ("B6cC3-1", "BXD1", "BXD2")),
+             ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+              (8.39265, 8.17443, 8.30401, 7.80944),
+              (7.51879, 7.77141, 7.80944)),
+             ((None, None, None, None, None, None), (None, None, None, None),
+              (None, None, None)),
+             (6, 4, 3)))
-- 
cgit 1.4.1


From c5355c5db72fdec9e7e360ceec19d5d50d15ce00 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 14:31:51 +0300
Subject: Disable pylint issue

* Disable minor pylint issue.
---
 tests/unit/db/test_traits.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tests')

diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index 67f0c6f..4aa9389 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -319,6 +319,7 @@ class TestTraitsDBFunctions(TestCase):
 
     def test_export_informative(self):
         """Test that the function exports appropriate data."""
+        # pylint: disable=W0621
         for trait_data, inc_var, expected in [
                 [{"data": {
                     "sample1": {
-- 
cgit 1.4.1


From 3304fa682924b8f6bff5126ecf2fb58f4201b968 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 19 Oct 2021 09:16:38 +0300
Subject: Implement `dictify_by_samples`

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: implement `dictify_by_samples` function
* tests/unit/test_partial_correlations.py: implement tests for
  `dictify_by_samples` function

  Implement the `dictify_by_samples` function as a partial migration of the
  `web.webqtl.correlation.correlationFunction.fixStrains` function from GN1.
---
 gn3/partial_correlations.py             | 16 +++++++++++++
 tests/unit/test_partial_correlations.py | 42 ++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'tests')

diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index 99521c6..4db4807 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -44,3 +44,19 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
         ),
         [__process_control__(trait_data) for trait_data in controls],
         (tuple(), tuple(), tuple(), tuple()))
+
+def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
+    """
+    Build a sequence of dictionaries from a sequence of separate sequences of
+    samples, values and variances.
+
+    This is a partial migration of
+    `web.webqtl.correlation.correlationFunction.fixStrains` function in GN1.
+    This implementation extracts code that will find common use, and that will
+    find use in more than one place.
+    """
+    return tuple(
+        {
+            sample: {"sample_name": sample, "value": val, "variance": var}
+            for sample, val, var in zip(*trait_line)
+        } for trait_line in zip(*(samples_vals_vars[0:3])))
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index 0083ef7..6302f74 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -1,7 +1,7 @@
 """Module contains tests for gn3.partial_correlations"""
 
 from unittest import TestCase
-from gn3.partial_correlations import control_samples
+from gn3.partial_correlations import control_samples, dictify_by_samples
 
 sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 control_traits = (
@@ -85,3 +85,43 @@ class TestPartialCorrelations(TestCase):
              ((None, None, None, None, None, None), (None, None, None, None),
               (None, None, None)),
              (6, 4, 3)))
+
+    def test_dictify_by_samples(self):
+        """
+        Given:
+            a sequence of sequences with sample names, values and variances, as
+            in the output of `gn3.partial_correlations.control_samples` or
+            the output of `gn3.db.traits.export_informative`
+        When:
+            the sequence is passed as an argument into the
+            `gn3.partial_correlations.dictify_by_sample`
+        Then:
+            return a sequence of dicts with keys being the values of the sample
+            names, and each of who's values being sub-dicts with the keys
+            'sample_name', 'value' and 'variance' whose values correspond to the
+            values passed in.
+        """
+        self.assertEqual(
+            dictify_by_samples(
+                ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"),
+                  ("BXD12", "BXD16", "BXD19", "BXD2"),
+                  ("B6cC3-1", "BXD1", "BXD2")),
+                 ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+                  (8.39265, 8.17443, 8.30401, 7.80944),
+                  (7.51879, 7.77141, 7.80944)),
+                 ((None, None, None, None, None, None), (None, None, None, None),
+                  (None, None, None)),
+                 (6, 4, 3))),
+            ({"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
+              "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
+              "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
+              "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
+              "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
+              "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
+             {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
+              "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
+              "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
+              "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
+             {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
+              "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
+              "BXD2": {"sample_name": "BXD2", "value":  7.80944, "variance": None}}))
-- 
cgit 1.4.1


From efb9896464f969de4fe8fcaee21a19ac1d881fa2 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 19 Oct 2021 10:31:24 +0300
Subject: Implement remaining `fix_samples` functionality

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: implement `fix_samples` function
* tests/unit/test_partial_correlations.py: implement tests for `fix_samples`
  function

  Implement the remaining partial migration for the
  `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
---
 gn3/partial_correlations.py             | 30 +++++++++++++++++--
 tests/unit/test_partial_correlations.py | 52 ++++++++++++++++++++++++---------
 2 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'tests')

diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index 4db4807..c556d10 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -5,8 +5,8 @@ It is an attempt to migrate over the partial correlations feature from
 GeneNetwork1.
 """
 
-from typing import Sequence
 from functools import reduce
+from typing import Any, Sequence
 
 def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
     """
@@ -45,7 +45,7 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
         [__process_control__(trait_data) for trait_data in controls],
         (tuple(), tuple(), tuple(), tuple()))
 
-def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
+def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
     """
     Build a sequence of dictionaries from a sequence of separate sequences of
     samples, values and variances.
@@ -60,3 +60,29 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
             sample: {"sample_name": sample, "value": val, "variance": var}
             for sample, val, var in zip(*trait_line)
         } for trait_line in zip(*(samples_vals_vars[0:3])))
+
+def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
+    """
+    Corrects sample_names, values and variance such that they all contain only
+    those samples that are common to the reference trait and all control traits.
+
+    This is a partial migration of the
+    `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
+    """
+    primary_samples = tuple(
+        present[0] for present in
+        ((sample, all(sample in control.keys() for control in control_traits))
+         for sample in primary_trait.keys())
+        if present[1])
+    control_vals_vars: tuple = reduce(
+        lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)),
+        ((item["value"], item["variance"])
+         for sublist in [tuple(control.values()) for control in control_traits]
+         for item in sublist),
+        (tuple(), tuple()))
+    return (
+        primary_samples,
+        tuple(primary_trait[sample]["value"] for sample in primary_samples),
+        control_vals_vars[0],
+        tuple(primary_trait[sample]["variance"] for sample in primary_samples),
+        control_vals_vars[1])
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index 6302f74..7631a71 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -1,7 +1,10 @@
 """Module contains tests for gn3.partial_correlations"""
 
 from unittest import TestCase
-from gn3.partial_correlations import control_samples, dictify_by_samples
+from gn3.partial_correlations import (
+    fix_samples,
+    control_samples,
+    dictify_by_samples)
 
 sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 control_traits = (
@@ -69,6 +72,21 @@ control_traits = (
                 "sample_name": "BXD2", "value": 7.80944, "variance": None,
                 "ndata": None}}})
 
+dictified_control_samples = (
+    {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
+     "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
+     "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
+     "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
+     "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
+     "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
+    {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
+     "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
+     "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
+     "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
+    {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
+     "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
+     "BXD2": {"sample_name": "BXD2", "value":  7.80944, "variance": None}})
+
 class TestPartialCorrelations(TestCase):
     """Class for testing partial correlations computation functions"""
 
@@ -112,16 +130,22 @@ class TestPartialCorrelations(TestCase):
                  ((None, None, None, None, None, None), (None, None, None, None),
                   (None, None, None)),
                  (6, 4, 3))),
-            ({"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
-              "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
-              "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
-              "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
-              "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
-              "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
-             {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
-              "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
-              "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
-              "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
-             {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
-              "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
-              "BXD2": {"sample_name": "BXD2", "value":  7.80944, "variance": None}}))
+            dictified_control_samples)
+
+    def test_fix_samples(self):
+        """Test that fix_samples fixes the values"""
+        self.assertEqual(
+            fix_samples(
+                {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879,
+                             "variance": None},
+                 "BXD1": {"sample_name": "BXD1", "value": 7.77141,
+                          "variance": None},
+                 "BXD2": {"sample_name": "BXD2", "value":  7.80944,
+                          "variance": None}},
+                dictified_control_samples),
+            (("BXD2",), (7.80944,),
+             (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944, 8.39265,
+              8.17443, 8.30401, 7.80944, 7.51879, 7.77141, 7.80944),
+             (None,),
+             (None, None, None, None, None, None, None, None, None, None, None,
+              None, None)))
-- 
cgit 1.4.1


From 6818670686de86c86b6c1aa372135ab6c22af156 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 21 Oct 2021 07:11:41 +0300
Subject: Document tests better

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Document the issues better to help with understanding what each test checks
  for.
---
 tests/unit/test_partial_correlations.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tests')

diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index 7631a71..c591c8f 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -106,6 +106,8 @@ class TestPartialCorrelations(TestCase):
 
     def test_dictify_by_samples(self):
         """
+        Test that `dictify_by_samples` generates the appropriate dict
+
         Given:
             a sequence of sequences with sample names, values and variances, as
             in the output of `gn3.partial_correlations.control_samples` or
@@ -133,7 +135,34 @@ class TestPartialCorrelations(TestCase):
             dictified_control_samples)
 
     def test_fix_samples(self):
-        """Test that fix_samples fixes the values"""
+        """
+        Test that `fix_samples` returns only the common samples
+
+        Given:
+            - A primary trait
+            - A sequence of control samples
+        When:
+            - The two arguments are passed to `fix_samples`
+        Then:
+            - Only the names of the samples present in the primary trait that
+              are also present in ALL the control traits are present in the
+              return value
+            - Only the values of the samples present in the primary trait that
+              are also present in ALL the control traits are present in the
+              return value
+            - ALL the values for ALL the control traits are present in the
+              return value
+            - Only the variances of the samples present in the primary trait
+              that are also present in ALL the control traits are present in the
+              return value
+            - ALL the variances for ALL the control traits are present in the
+              return value
+            - The return value is a tuple of the above items, in the following
+              order:
+                ((sample_names, ...), (primary_trait_values, ...),
+                 (control_traits_values, ...), (primary_trait_variances, ...)
+                 (control_traits_variances, ...))
+        """
         self.assertEqual(
             fix_samples(
                 {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879,
-- 
cgit 1.4.1


From cad4649d19001f62ef592dedf09f3ac53744962a Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 21 Oct 2021 09:00:16 +0300
Subject: Implement `find_identical_traits` function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: implement function `find_identical_traits`
* tests/unit/test_partial_correlations.py: implement tests for function
  `find_identical_traits`

  Migrate `web.webqtl.correlation.correlationFunction.findIdenticalTraits`
  function in GN1 to here, adding in tests to ensure the migration works in a
  bug-compatible version with the original.
---
 gn3/partial_correlations.py             | 38 ++++++++++++++++++++++++++++++++-
 tests/unit/test_partial_correlations.py | 33 +++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

(limited to 'tests')

diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index c556d10..1fb0ccc 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -6,7 +6,7 @@ GeneNetwork1.
 """
 
 from functools import reduce
-from typing import Any, Sequence
+from typing import Any, Tuple, Sequence
 
 def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
     """
@@ -86,3 +86,39 @@ def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence
         control_vals_vars[0],
         tuple(primary_trait[sample]["variance"] for sample in primary_samples),
         control_vals_vars[1])
+
+def find_identical_traits(
+        primary_name: str, primary_value: float, control_names: Tuple[str, ...],
+        control_values: Tuple[float, ...]) -> Tuple[str, ...]:
+    """
+    Find traits that have the same value when the values are considered to
+    3 decimal places.
+
+    This is a migration of the
+    `web.webqtl.correlation.correlationFunction.findIdenticalTraits` function in
+    GN1.
+    """
+    def __merge_identicals__(
+            acc: Tuple[str, ...],
+            ident: Tuple[str, Tuple[str, ...]]) -> Tuple[str, ...]:
+        return acc + ident[1]
+
+    def __dictify_controls__(acc, control_item):
+        ckey = "{:.3f}".format(control_item[0])
+        return {**acc, ckey: acc.get(ckey, tuple()) + (control_item[1],)}
+
+    return (reduce(## for identical control traits
+        __merge_identicals__,
+        (item for item in reduce(# type: ignore[var-annotated]
+            __dictify_controls__, zip(control_values, control_names),
+            {}).items() if len(item[1]) > 1),
+        tuple())
+            or
+            reduce(## If no identical control traits, try primary and controls
+                __merge_identicals__,
+                (item for item in reduce(# type: ignore[var-annotated]
+                    __dictify_controls__,
+                    zip((primary_value,) + control_values,
+                        (primary_name,) + control_names), {}).items()
+                 if len(item[1]) > 1),
+                tuple()))
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index c591c8f..60e54c1 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -4,7 +4,8 @@ from unittest import TestCase
 from gn3.partial_correlations import (
     fix_samples,
     control_samples,
-    dictify_by_samples)
+    dictify_by_samples,
+    find_identical_traits)
 
 sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 control_traits = (
@@ -178,3 +179,33 @@ class TestPartialCorrelations(TestCase):
              (None,),
              (None, None, None, None, None, None, None, None, None, None, None,
               None, None)))
+
+    def test_find_identical_traits(self):
+        """
+        Test `gn3.partial_correlations.find_identical_traits`.
+
+        Given:
+            - the name of a primary trait
+            - the value of a primary trait
+            - a sequence of names of control traits
+            - a sequence of values of control traits
+        When:
+            - the arguments above are passed to the `find_identical_traits`
+              function
+        Then:
+            - Return ALL trait names that have the same value when up to three
+              decimal places are considered
+        """
+        for primn, primv, contn, contv, expected in (
+                ("pt", 12.98395, ("ct0", "ct1", "ct2"),
+                 (0.1234, 2.3456, 3.4567), tuple()),
+                ("pt", 12.98395, ("ct0", "ct1", "ct2"),
+                 (12.98354, 2.3456, 3.4567), ("pt", "ct0")),
+                ("pt", 12.98395, ("ct0", "ct1", "ct2", "ct3"),
+                 (0.1234, 2.3456, 0.1233, 4.5678), ("ct0", "ct2"))
+        ):
+            with self.subTest(
+                    primary_name=primn, primary_value=primv,
+                    control_names=contn, control_values=contv):
+                self.assertEqual(
+                    find_identical_traits(primn, primv, contn, contv), expected)
-- 
cgit 1.4.1


From 783f302c5d4729eb0b5fb6ba79180b7cd97764a5 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 25 Oct 2021 19:12:24 +0300
Subject: Implement `partition_all` function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/data_helpers.py: new function (partition_all)
* tests/unit/test_data_helpers.py: tests for function
  `gn3.data_helpers.partition_all`

  As part of migrating some functions that access the database, this commit
  extracts generic processes that can be accomplished on data, and implements
  the `partition_all` function, that is equivalent to Clojure's
  `partition-all` function.
---
 gn3/data_helpers.py             | 25 +++++++++++++++++++++++++
 tests/unit/test_data_helpers.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 gn3/data_helpers.py
 create mode 100644 tests/unit/test_data_helpers.py

(limited to 'tests')

diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
new file mode 100644
index 0000000..f0d971e
--- /dev/null
+++ b/gn3/data_helpers.py
@@ -0,0 +1,25 @@
+"""
+This module will hold generic functions that can operate on a wide-array of
+data structures.
+"""
+
+from math import ceil
+from functools import reduce
+from typing import Any, Tuple, Sequence
+
+def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]:
+    """
+    Given a sequence `items`, return a new sequence of the same type as `items`
+    with the data partitioned into sections of `n` items per partition.
+
+    This is an approximation of clojure's `partition-all` function.
+    """
+    def __compute_start_stop__(acc, iteration):
+        start = iteration * num
+        return acc + ((start, start + num),)
+
+    iterations = range(ceil(len(items) / num))
+    return tuple([# type: ignore[misc]
+        tuple(items[start:stop]) for start, stop # type: ignore[has-type]
+        in reduce(
+            __compute_start_stop__, iterations, tuple())])
diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py
new file mode 100644
index 0000000..1eec3cc
--- /dev/null
+++ b/tests/unit/test_data_helpers.py
@@ -0,0 +1,37 @@
+"""
+Test functions in gn3.data_helpers
+"""
+
+from unittest import TestCase
+
+from gn3.data_helpers import partition_all
+
+class TestDataHelpers(TestCase):
+    """
+    Test functions in gn3.data_helpers
+    """
+
+    def test_partition_all(self):
+        """
+        Test that `gn3.data_helpers.partition_all` partitions sequences as expected.
+
+        Given:
+            - `num`: The number of items per partition
+            - `items`: A sequence of items
+        When:
+            - The arguments above are passed to the `gn3.data_helpers.partition_all`
+        Then:
+            - Return a new sequence with partitions, each of which has `num`
+              items in the same order as those in `items`, save for the last
+              partition which might have fewer items than `num`.
+        """
+        for count, items, expected in (
+                (1, [0, 1, 2, 3], ((0,), (1,), (2,), (3,))),
+                (3, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
+                 ((0, 1, 2), (3, 4, 5), (6, 7, 8), (9, ))),
+                (4, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+                 ((0, 1, 2, 3), (4, 5, 6, 7), (8, 9))),
+                (13, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+                 ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), ))):
+            with self.subTest(n=count, items=items):
+                self.assertEqual(partition_all(count, items), expected)
-- 
cgit 1.4.1