From 85e369311b60faa2490f25c88a2ef87042b91738 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 30 May 2022 09:41:33 +0300 Subject: Pass trait data as args to `fix_strains` and fix some bugs The `fix_strains` function works on the trait data, not the basic trait info. This commit fixes the arguments passed to the function, and also some bugs in the function. --- gn3/computations/partial_correlations.py | 22 +++++++---- .../unit/computations/test_partial_correlations.py | 44 ++++++++++++---------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index f316f67..597c5a3 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -70,7 +70,9 @@ def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): [__process_control__(trait_data) for trait_data in controls], (tuple(), tuple(), tuple(), tuple())) -def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]: +def fix_samples( + primary_trait_data: dict, + control_traits_data: Sequence[dict]) -> Sequence[Sequence[Any]]: """ Corrects sample_names, values and variance such that they all contain only those samples that are common to the reference trait and all control traits. @@ -80,20 +82,26 @@ def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence """ primary_samples = tuple( present[0] for present in - ((sample, all(sample in control.keys() for control in control_traits)) - for sample in primary_trait.keys()) + ((sample, + all(sample in control["data"].keys() + for control in control_traits_data)) + for sample in primary_trait_data["data"].keys()) if present[1]) control_vals_vars: tuple = reduce( lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)), ((item["value"], item["variance"]) - for sublist in [tuple(control.values()) for control in control_traits] + for sublist in [ + tuple(control["data"].values()) + for control in control_traits_data] for item in sublist), (tuple(), tuple())) return ( primary_samples, - tuple(primary_trait[sample]["value"] for sample in primary_samples), + tuple(primary_trait_data["data"][sample]["value"] + for sample in primary_samples), control_vals_vars[0], - tuple(primary_trait[sample]["variance"] for sample in primary_samples), + tuple(primary_trait_data["data"][sample]["variance"] + for sample in primary_samples), control_vals_vars[1]) def find_identical_traits( @@ -628,7 +636,7 @@ def check_for_common_errors(# pylint: disable=[R0914] fixed_primary_vals, fixed_control_vals, _primary_variances, - _cntrl_variances) = fix_samples(primary_trait, cntrl_traits) + _cntrl_variances) = fix_samples(primary_trait_data, cntrl_traits_data) if len(common_primary_control_samples) < corr_min_informative: return { diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py index 81ddbcc..20cd6bc 100644 --- a/tests/unit/computations/test_partial_correlations.py +++ b/tests/unit/computations/test_partial_correlations.py @@ -81,19 +81,24 @@ control_traits = ( "ndata": None}}}) dictified_control_samples = ( - {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, - "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, - "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, - "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, - "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, - {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, - "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, - "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, - {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, - "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}) + {"data": { + "B6cC3-1": { + "sample_name": "B6cC3-1", "value": 7.51879,"variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}}, + {"data": { + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}}, + {"data": { + "B6cC3-1": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}}) class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" @@ -145,12 +150,13 @@ class TestPartialCorrelations(TestCase): """ self.assertEqual( fix_samples( - {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, - "variance": None}, - "BXD1": {"sample_name": "BXD1", "value": 7.77141, - "variance": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, - "variance": None}}, + {"data": {"B6cC3-1": {"sample_name": "B6cC3-1", + "value": 7.51879, + "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, + "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, + "variance": None}}}, dictified_control_samples), (("BXD2",), (7.80944,), (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944, 8.39265, -- cgit v1.2.3