From 679c3edd08453d2f1ef09b3461fd8d0b038b3adf Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 12:17:11 +0300 Subject: Move 'export_trait_data' to 'gn3.db.traits' module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/traits.py: Move function `export_trait_data` here * gn3/heatmaps.py: Remove function `export_trait_data` * tests/unit/db/test_traits.py: Move function `export_trait_data` tests here * tests/unit/test_heatmaps.py: Remove function `export_trait_data` here Function `export_trait_data` more closely corresponds to the traits and is used in more than just the `gn3.heatmaps` module. This commit moves the relevant code over to the `gn3.db.traits` module and also moves the tests to the corresponding tests modules. --- tests/unit/db/test_traits.py | 89 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'tests/unit/db') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 8af8e82..0c4ef78 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -2,6 +2,7 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, + export_trait_data, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -12,6 +13,38 @@ from gn3.db.traits import ( retrieve_publish_trait_info, retrieve_probeset_trait_info) +samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] +trait_data = { + "mysqlid": 36688172, + "data": { + "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, + "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, + "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, + "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, + "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, + "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, + "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, + "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, + "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, + "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, + "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, + "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, + "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, + "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, + "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, + "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, + "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, + "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, + "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, + "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, + "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, + "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} + class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" @@ -226,3 +259,59 @@ class TestTraitsDBFunctions(TestCase): with self.subTest(trait_info=trait_info, expected=expected): self.assertEqual( set_confidential_field(trait_type, trait_info), expected) + + def test_export_trait_data_dtype(self): + """ + Test `export_trait_data` with different values for the `dtype` keyword + argument + """ + for dtype, expected in [ + ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["var", (None, None, None, None, None, None)], + ["N", (None, None, None, None, None, None)], + ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]: + with self.subTest(dtype=dtype): + self.assertEqual( + export_trait_data(trait_data, samplelist, dtype=dtype), + expected) + + def test_export_trait_data_dtype_all_flags(self): + """ + Test `export_trait_data` with different values for the `dtype` keyword + argument and the different flags set up + """ + for dtype, vflag, nflag, expected in [ + ["val", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["var", False, False, (None, None, None, None, None, None)], + ["var", False, True, (None, None, None, None, None, None)], + ["var", True, False, (None, None, None, None, None, None)], + ["var", True, True, (None, None, None, None, None, None)], + ["N", False, False, (None, None, None, None, None, None)], + ["N", False, True, (None, None, None, None, None, None)], + ["N", True, False, (None, None, None, None, None, None)], + ["N", True, True, (None, None, None, None, None, None)], + ["all", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["all", False, True, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, False, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, True, + (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, + 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] + ]: + with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): + self.assertEqual( + export_trait_data( + trait_data, samplelist, dtype=dtype, var_exists=vflag, + n_exists=nflag), + expected) -- cgit v1.2.3 From 42c56d330fdb51820c0fdcbb0b4376ff568ea009 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 12:27:32 +0300 Subject: Move `export_informative` function to `gn3.db.traits` module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/traits.py: Move `export_informative` function here * gn3/partial_correlations.py: Remove `export_informative` function * tests/unit/db/test_traits.py: Move `export_informative` function tests here * tests/unit/test_partial_correlations.py: Remove `export_informative` function tests The `export_informative` function relates more to the traits than to the partial correlations, and could find use in more than just the partial correlations stuff. This commit moves the function to the more traits-specific `gn3.db.traits` module. --- gn3/db/traits.py | 24 +++++++++ gn3/partial_correlations.py | 24 --------- tests/unit/db/test_traits.py | 86 ++++++++++++++++++++++++++++++++ tests/unit/test_partial_correlations.py | 87 +-------------------------------- 4 files changed, 111 insertions(+), 110 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 1e29aff..1c6aaa7 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -743,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR): """Generate a unique filename for use with generated traits files.""" return "{}/traits_test_file_{}.txt".format( os.path.abspath(base_path), random_string(10)) + +def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: + """ + Export informative strain + + This is a migration of the `exportInformative` function in + web/webqtl/base/webqtlTrait.py module in GeneNetwork1. + + There is a chance that the original implementation has a bug, especially + dealing with the `inc_var` value. It the `inc_var` value is meant to control + the inclusion of the `variance` value, then the current implementation, and + that one in GN1 have a bug. + """ + def __exporter__(acc, data_item): + if not inc_var or data_item["variance"] is not None: + return ( + acc[0] + (data_item["sample_name"],), + acc[1] + (data_item["value"],), + acc[2] + (data_item["variance"],)) + return acc + return reduce( + __exporter__, + filter(lambda td: td["value"] is not None, trait_data["data"].values()), + (tuple(), tuple(), tuple())) diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py index 8c37886..df390ed 100644 --- a/gn3/partial_correlations.py +++ b/gn3/partial_correlations.py @@ -6,27 +6,3 @@ GeneNetwork1. """ from functools import reduce - -def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: - """ - Export informative strain - - This is a migration of the `exportInformative` function in - web/webqtl/base/webqtlTrait.py module in GeneNetwork1. - - There is a chance that the original implementation has a bug, especially - dealing with the `inc_var` value. It the `inc_var` value is meant to control - the inclusion of the `variance` value, then the current implementation, and - that one in GN1 have a bug. - """ - def __exporter__(acc, data_item): - if not inc_var or data_item["variance"] is not None: - return ( - acc[0] + (data_item["sample_name"],), - acc[1] + (data_item["value"],), - acc[2] + (data_item["variance"],)) - return acc - return reduce( - __exporter__, - filter(lambda td: td["value"] is not None, trait_data["data"].values()), - (tuple(), tuple(), tuple())) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 0c4ef78..67f0c6f 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -3,6 +3,7 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, export_trait_data, + export_informative, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -315,3 +316,88 @@ class TestTraitsDBFunctions(TestCase): trait_data, samplelist, dtype=dtype, var_exists=vflag, n_exists=nflag), expected) + + def test_export_informative(self): + """Test that the function exports appropriate data.""" + for trait_data, inc_var, expected in [ + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": None, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample4"), (9, 8, 6), + (None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, True, (tuple(), tuple(), tuple())], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": 0.657, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, 0.657, None, None))]]: + with self.subTest(trait_data=trait_data): + self.assertEqual( + export_informative(trait_data, inc_var), expected) diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py index 6eea078..f204d4f 100644 --- a/tests/unit/test_partial_correlations.py +++ b/tests/unit/test_partial_correlations.py @@ -1,92 +1,7 @@ """Module contains tests for gn3.partial_correlations""" from unittest import TestCase -from gn3.partial_correlations import export_informative + class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" - - def test_export_informative(self): - """Test that the function exports appropriate data.""" - for trait_data, inc_var, expected in [ - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), - (None, None, None, None))], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": None, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample4"), (9, 8, 6), - (None, None, None))], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, True, (tuple(), tuple(), tuple())], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": 0.657, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), - (None, 0.657, None, None))]]: - with self.subTest(trait_data=trait_data): - self.assertEqual( - export_informative(trait_data, inc_var), expected) -- cgit v1.2.3 From a44acad05fb286b9a2e797982d01841a1e817860 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 14:31:51 +0300 Subject: Disable pylint issue * Disable minor pylint issue. --- tests/unit/db/test_traits.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tests/unit/db') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 67f0c6f..4aa9389 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -319,6 +319,7 @@ class TestTraitsDBFunctions(TestCase): def test_export_informative(self): """Test that the function exports appropriate data.""" + # pylint: disable=W0621 for trait_data, inc_var, expected in [ [{"data": { "sample1": { -- cgit v1.2.3