about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-10-18 14:14:04 +0300
committerBonfaceKilz2021-10-19 16:33:32 +0300
commit38a0b65d234c0019ba14814adf69e09493082298 (patch)
tree49669bf3a2e3a5f11f539c105fdcd0922352e94b
parent42c56d330fdb51820c0fdcbb0b4376ff568ea009 (diff)
downloadgenenetwork3-38a0b65d234c0019ba14814adf69e09493082298.tar.gz
Implement `control_samples` function as is in GN1
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: Implement `control_samples` function
* tests/unit/test_partial_correlations.py: add tests for `control_samples`
  function

  Implement the function `control_samples` and make it mostly bug-compatible
  with the `web/webqtl/correlation/correlationFunction.controlStrain` function
  in GN1.

  This implementation in GN3 does not do any calls to the database. It will
  rely on other functions to provide the data from the database to it.
-rw-r--r--gn3/partial_correlations.py38
-rw-r--r--tests/unit/test_partial_correlations.py80
2 files changed, 118 insertions, 0 deletions
diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index df390ed..99521c6 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -5,4 +5,42 @@ It is an attempt to migrate over the partial correlations feature from
 GeneNetwork1.
 """
 
+from typing import Sequence
 from functools import reduce
+
+def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
+    """
+    Fetches data for the control traits.
+
+    This migrates `web/webqtl/correlation/correlationFunction.controlStrain` in
+    GN1, with a few modifications to the arguments passed in.
+
+    PARAMETERS:
+    controls: A map of sample names to trait data. Equivalent to the `cvals`
+        value in the corresponding source function in GN1.
+    sampleslist: A list of samples. Equivalent to `strainlst` in the
+        corresponding source function in GN1
+    """
+    def __process_control__(trait_data):
+        def __process_sample__(acc, sample):
+            if sample in trait_data["data"].keys():
+                sample_item = trait_data["data"][sample]
+                val = sample_item["value"]
+                if val is not None:
+                    return (
+                        acc[0] + (sample,),
+                        acc[1] + (val,),
+                        acc[2] + (sample_item["variance"],))
+            return acc
+        return reduce(
+            __process_sample__, sampleslist, (tuple(), tuple(), tuple()))
+
+    return reduce(
+        lambda acc, item: (
+            acc[0] + (item[0],),
+            acc[1] + (item[1],),
+            acc[2] + (item[2],),
+            acc[3] + (len(item[0]),),
+        ),
+        [__process_control__(trait_data) for trait_data in controls],
+        (tuple(), tuple(), tuple(), tuple()))
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index f204d4f..0083ef7 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -1,7 +1,87 @@
 """Module contains tests for gn3.partial_correlations"""
 
 from unittest import TestCase
+from gn3.partial_correlations import control_samples
 
+sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+control_traits = (
+    {
+        "mysqlid": 36688172,
+        "data": {
+            "B6cC3-1": {
+                "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+                "ndata": None},
+            "BXD1": {
+                "sample_name": "BXD1", "value": 7.77141, "variance": None,
+                "ndata": None},
+            "BXD12": {
+                "sample_name": "BXD12", "value": 8.39265, "variance": None,
+                "ndata": None},
+            "BXD16": {
+                "sample_name": "BXD16", "value": 8.17443, "variance": None,
+                "ndata": None},
+            "BXD19": {
+                "sample_name": "BXD19", "value": 8.30401, "variance": None,
+                "ndata": None},
+            "BXD2": {
+                "sample_name": "BXD2", "value": 7.80944, "variance": None,
+                "ndata": None}}},
+    {
+        "mysqlid": 36688172,
+        "data": {
+            "B6cC3-21": {
+                "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+                "ndata": None},
+            "BXD21": {
+                "sample_name": "BXD1", "value": 7.77141, "variance": None,
+                "ndata": None},
+            "BXD12": {
+                "sample_name": "BXD12", "value": 8.39265, "variance": None,
+                "ndata": None},
+            "BXD16": {
+                "sample_name": "BXD16", "value": 8.17443, "variance": None,
+                "ndata": None},
+            "BXD19": {
+                "sample_name": "BXD19", "value": 8.30401, "variance": None,
+                "ndata": None},
+            "BXD2": {
+                "sample_name": "BXD2", "value": 7.80944, "variance": None,
+                "ndata": None}}},
+    {
+        "mysqlid": 36688172,
+        "data": {
+            "B6cC3-1": {
+                "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+                "ndata": None},
+            "BXD1": {
+                "sample_name": "BXD1", "value": 7.77141, "variance": None,
+                "ndata": None},
+            "BXD12": {
+                "sample_name": "BXD12", "value": None, "variance": None,
+                "ndata": None},
+            "BXD16": {
+                "sample_name": "BXD16", "value": None, "variance": None,
+                "ndata": None},
+            "BXD19": {
+                "sample_name": "BXD19", "value": None, "variance": None,
+                "ndata": None},
+            "BXD2": {
+                "sample_name": "BXD2", "value": 7.80944, "variance": None,
+                "ndata": None}}})
 
 class TestPartialCorrelations(TestCase):
     """Class for testing partial correlations computation functions"""
+
+    def test_control_samples(self):
+        """Test that the control_samples works as expected."""
+        self.assertEqual(
+            control_samples(control_traits, sampleslist),
+            ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"),
+              ("BXD12", "BXD16", "BXD19", "BXD2"),
+              ("B6cC3-1", "BXD1", "BXD2")),
+             ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+              (8.39265, 8.17443, 8.30401, 7.80944),
+              (7.51879, 7.77141, 7.80944)),
+             ((None, None, None, None, None, None), (None, None, None, None),
+              (None, None, None)),
+             (6, 4, 3)))