From 1544776b072d7240773cf14d423078841e4c1a07 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 14:14:04 +0300 Subject: Implement `control_samples` function as is in GN1 Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/partial_correlations.py: Implement `control_samples` function * tests/unit/test_partial_correlations.py: add tests for `control_samples` function Implement the function `control_samples` and make it mostly bug-compatible with the `web/webqtl/correlation/correlationFunction.controlStrain` function in GN1. This implementation in GN3 does not do any calls to the database. It will rely on other functions to provide the data from the database to it. --- gn3/partial_correlations.py | 38 ++++++++++++++++ tests/unit/test_partial_correlations.py | 80 +++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py index df390ed..99521c6 100644 --- a/gn3/partial_correlations.py +++ b/gn3/partial_correlations.py @@ -5,4 +5,42 @@ It is an attempt to migrate over the partial correlations feature from GeneNetwork1. """ +from typing import Sequence from functools import reduce + +def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): + """ + Fetches data for the control traits. + + This migrates `web/webqtl/correlation/correlationFunction.controlStrain` in + GN1, with a few modifications to the arguments passed in. + + PARAMETERS: + controls: A map of sample names to trait data. Equivalent to the `cvals` + value in the corresponding source function in GN1. + sampleslist: A list of samples. Equivalent to `strainlst` in the + corresponding source function in GN1 + """ + def __process_control__(trait_data): + def __process_sample__(acc, sample): + if sample in trait_data["data"].keys(): + sample_item = trait_data["data"][sample] + val = sample_item["value"] + if val is not None: + return ( + acc[0] + (sample,), + acc[1] + (val,), + acc[2] + (sample_item["variance"],)) + return acc + return reduce( + __process_sample__, sampleslist, (tuple(), tuple(), tuple())) + + return reduce( + lambda acc, item: ( + acc[0] + (item[0],), + acc[1] + (item[1],), + acc[2] + (item[2],), + acc[3] + (len(item[0]),), + ), + [__process_control__(trait_data) for trait_data in controls], + (tuple(), tuple(), tuple(), tuple())) diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py index f204d4f..0083ef7 100644 --- a/tests/unit/test_partial_correlations.py +++ b/tests/unit/test_partial_correlations.py @@ -1,7 +1,87 @@ """Module contains tests for gn3.partial_correlations""" from unittest import TestCase +from gn3.partial_correlations import control_samples +sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] +control_traits = ( + { + "mysqlid": 36688172, + "data": { + "B6cC3-1": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None, + "ndata": None}, + "BXD1": { + "sample_name": "BXD1", "value": 7.77141, "variance": None, + "ndata": None}, + "BXD12": { + "sample_name": "BXD12", "value": 8.39265, "variance": None, + "ndata": None}, + "BXD16": { + "sample_name": "BXD16", "value": 8.17443, "variance": None, + "ndata": None}, + "BXD19": { + "sample_name": "BXD19", "value": 8.30401, "variance": None, + "ndata": None}, + "BXD2": { + "sample_name": "BXD2", "value": 7.80944, "variance": None, + "ndata": None}}}, + { + "mysqlid": 36688172, + "data": { + "B6cC3-21": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None, + "ndata": None}, + "BXD21": { + "sample_name": "BXD1", "value": 7.77141, "variance": None, + "ndata": None}, + "BXD12": { + "sample_name": "BXD12", "value": 8.39265, "variance": None, + "ndata": None}, + "BXD16": { + "sample_name": "BXD16", "value": 8.17443, "variance": None, + "ndata": None}, + "BXD19": { + "sample_name": "BXD19", "value": 8.30401, "variance": None, + "ndata": None}, + "BXD2": { + "sample_name": "BXD2", "value": 7.80944, "variance": None, + "ndata": None}}}, + { + "mysqlid": 36688172, + "data": { + "B6cC3-1": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None, + "ndata": None}, + "BXD1": { + "sample_name": "BXD1", "value": 7.77141, "variance": None, + "ndata": None}, + "BXD12": { + "sample_name": "BXD12", "value": None, "variance": None, + "ndata": None}, + "BXD16": { + "sample_name": "BXD16", "value": None, "variance": None, + "ndata": None}, + "BXD19": { + "sample_name": "BXD19", "value": None, "variance": None, + "ndata": None}, + "BXD2": { + "sample_name": "BXD2", "value": 7.80944, "variance": None, + "ndata": None}}}) class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" + + def test_control_samples(self): + """Test that the control_samples works as expected.""" + self.assertEqual( + control_samples(control_traits, sampleslist), + ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"), + ("BXD12", "BXD16", "BXD19", "BXD2"), + ("B6cC3-1", "BXD1", "BXD2")), + ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944), + (8.39265, 8.17443, 8.30401, 7.80944), + (7.51879, 7.77141, 7.80944)), + ((None, None, None, None, None, None), (None, None, None, None), + (None, None, None)), + (6, 4, 3))) -- cgit v1.2.3