From a3d4bc848caa8021e14282bab1a13ca7aadeb82d Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 19 Oct 2021 10:31:24 +0300
Subject: Implement remaining `fix_samples` functionality

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/partial_correlations.py: implement `fix_samples` function
* tests/unit/test_partial_correlations.py: implement tests for `fix_samples`
  function

  Implement the remaining partial migration for the
  `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
---
 gn3/partial_correlations.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'gn3')

diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index 4db4807..c556d10 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -5,8 +5,8 @@ It is an attempt to migrate over the partial correlations feature from
 GeneNetwork1.
 """
 
-from typing import Sequence
 from functools import reduce
+from typing import Any, Sequence
 
 def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
     """
@@ -45,7 +45,7 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
         [__process_control__(trait_data) for trait_data in controls],
         (tuple(), tuple(), tuple(), tuple()))
 
-def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
+def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
     """
     Build a sequence of dictionaries from a sequence of separate sequences of
     samples, values and variances.
@@ -60,3 +60,29 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
             sample: {"sample_name": sample, "value": val, "variance": var}
             for sample, val, var in zip(*trait_line)
         } for trait_line in zip(*(samples_vals_vars[0:3])))
+
+def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
+    """
+    Corrects sample_names, values and variance such that they all contain only
+    those samples that are common to the reference trait and all control traits.
+
+    This is a partial migration of the
+    `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
+    """
+    primary_samples = tuple(
+        present[0] for present in
+        ((sample, all(sample in control.keys() for control in control_traits))
+         for sample in primary_trait.keys())
+        if present[1])
+    control_vals_vars: tuple = reduce(
+        lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)),
+        ((item["value"], item["variance"])
+         for sublist in [tuple(control.values()) for control in control_traits]
+         for item in sublist),
+        (tuple(), tuple()))
+    return (
+        primary_samples,
+        tuple(primary_trait[sample]["value"] for sample in primary_samples),
+        control_vals_vars[0],
+        tuple(primary_trait[sample]["variance"] for sample in primary_samples),
+        control_vals_vars[1])
-- 
cgit v1.2.3