aboutsummaryrefslogtreecommitdiff
path: root/gn3/partial_correlations.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-10-19 10:31:24 +0300
committerBonfaceKilz2021-10-19 16:33:32 +0300
commita3d4bc848caa8021e14282bab1a13ca7aadeb82d (patch)
treee1b3c6a22a47834a50d6eaaf70544b4dce4a335e /gn3/partial_correlations.py
parentb829bf6f5a26edaa57acde0c4a21e2c24d695e87 (diff)
downloadgenenetwork3-a3d4bc848caa8021e14282bab1a13ca7aadeb82d.tar.gz
Implement remaining `fix_samples` functionality
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/partial_correlations.py: implement `fix_samples` function * tests/unit/test_partial_correlations.py: implement tests for `fix_samples` function Implement the remaining partial migration for the `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
Diffstat (limited to 'gn3/partial_correlations.py')
-rw-r--r--gn3/partial_correlations.py30
1 files changed, 28 insertions, 2 deletions
diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index 4db4807..c556d10 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -5,8 +5,8 @@ It is an attempt to migrate over the partial correlations feature from
GeneNetwork1.
"""
-from typing import Sequence
from functools import reduce
+from typing import Any, Sequence
def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
"""
@@ -45,7 +45,7 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
[__process_control__(trait_data) for trait_data in controls],
(tuple(), tuple(), tuple(), tuple()))
-def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
+def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
"""
Build a sequence of dictionaries from a sequence of separate sequences of
samples, values and variances.
@@ -60,3 +60,29 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict:
sample: {"sample_name": sample, "value": val, "variance": var}
for sample, val, var in zip(*trait_line)
} for trait_line in zip(*(samples_vals_vars[0:3])))
+
+def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
+ """
+ Corrects sample_names, values and variance such that they all contain only
+ those samples that are common to the reference trait and all control traits.
+
+ This is a partial migration of the
+ `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
+ """
+ primary_samples = tuple(
+ present[0] for present in
+ ((sample, all(sample in control.keys() for control in control_traits))
+ for sample in primary_trait.keys())
+ if present[1])
+ control_vals_vars: tuple = reduce(
+ lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)),
+ ((item["value"], item["variance"])
+ for sublist in [tuple(control.values()) for control in control_traits]
+ for item in sublist),
+ (tuple(), tuple()))
+ return (
+ primary_samples,
+ tuple(primary_trait[sample]["value"] for sample in primary_samples),
+ control_vals_vars[0],
+ tuple(primary_trait[sample]["variance"] for sample in primary_samples),
+ control_vals_vars[1])