From a3d4bc848caa8021e14282bab1a13ca7aadeb82d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 19 Oct 2021 10:31:24 +0300 Subject: Implement remaining `fix_samples` functionality Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/partial_correlations.py: implement `fix_samples` function * tests/unit/test_partial_correlations.py: implement tests for `fix_samples` function Implement the remaining partial migration for the `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1. --- gn3/partial_correlations.py | 30 +++++++++++++++++-- tests/unit/test_partial_correlations.py | 52 ++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py index 4db4807..c556d10 100644 --- a/gn3/partial_correlations.py +++ b/gn3/partial_correlations.py @@ -5,8 +5,8 @@ It is an attempt to migrate over the partial correlations feature from GeneNetwork1. """ -from typing import Sequence from functools import reduce +from typing import Any, Sequence def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): """ @@ -45,7 +45,7 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): [__process_control__(trait_data) for trait_data in controls], (tuple(), tuple(), tuple(), tuple())) -def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict: +def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]: """ Build a sequence of dictionaries from a sequence of separate sequences of samples, values and variances. @@ -60,3 +60,29 @@ def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> dict: sample: {"sample_name": sample, "value": val, "variance": var} for sample, val, var in zip(*trait_line) } for trait_line in zip(*(samples_vals_vars[0:3]))) + +def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]: + """ + Corrects sample_names, values and variance such that they all contain only + those samples that are common to the reference trait and all control traits. + + This is a partial migration of the + `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1. + """ + primary_samples = tuple( + present[0] for present in + ((sample, all(sample in control.keys() for control in control_traits)) + for sample in primary_trait.keys()) + if present[1]) + control_vals_vars: tuple = reduce( + lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)), + ((item["value"], item["variance"]) + for sublist in [tuple(control.values()) for control in control_traits] + for item in sublist), + (tuple(), tuple())) + return ( + primary_samples, + tuple(primary_trait[sample]["value"] for sample in primary_samples), + control_vals_vars[0], + tuple(primary_trait[sample]["variance"] for sample in primary_samples), + control_vals_vars[1]) diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py index 6302f74..7631a71 100644 --- a/tests/unit/test_partial_correlations.py +++ b/tests/unit/test_partial_correlations.py @@ -1,7 +1,10 @@ """Module contains tests for gn3.partial_correlations""" from unittest import TestCase -from gn3.partial_correlations import control_samples, dictify_by_samples +from gn3.partial_correlations import ( + fix_samples, + control_samples, + dictify_by_samples) sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] control_traits = ( @@ -69,6 +72,21 @@ control_traits = ( "sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}}}) +dictified_control_samples = ( + {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, + {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, + {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}) + class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" @@ -112,16 +130,22 @@ class TestPartialCorrelations(TestCase): ((None, None, None, None, None, None), (None, None, None, None), (None, None, None)), (6, 4, 3))), - ({"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, - "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, - "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, - "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, - "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, - {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, - "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, - "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, - {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, - "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}})) + dictified_control_samples) + + def test_fix_samples(self): + """Test that fix_samples fixes the values""" + self.assertEqual( + fix_samples( + {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, + "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, + "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, + "variance": None}}, + dictified_control_samples), + (("BXD2",), (7.80944,), + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944, 8.39265, + 8.17443, 8.30401, 7.80944, 7.51879, 7.77141, 7.80944), + (None,), + (None, None, None, None, None, None, None, None, None, None, None, + None, None))) -- cgit v1.2.3