From 679c3edd08453d2f1ef09b3461fd8d0b038b3adf Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 12:17:11 +0300 Subject: Move 'export_trait_data' to 'gn3.db.traits' module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/traits.py: Move function `export_trait_data` here * gn3/heatmaps.py: Remove function `export_trait_data` * tests/unit/db/test_traits.py: Move function `export_trait_data` tests here * tests/unit/test_heatmaps.py: Remove function `export_trait_data` here Function `export_trait_data` more closely corresponds to the traits and is used in more than just the `gn3.heatmaps` module. This commit moves the relevant code over to the `gn3.db.traits` module and also moves the tests to the corresponding tests modules. --- tests/unit/db/test_traits.py | 89 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'tests/unit/db') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 8af8e82..0c4ef78 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -2,6 +2,7 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, + export_trait_data, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -12,6 +13,38 @@ from gn3.db.traits import ( retrieve_publish_trait_info, retrieve_probeset_trait_info) +samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] +trait_data = { + "mysqlid": 36688172, + "data": { + "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, + "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, + "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, + "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, + "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, + "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, + "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, + "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, + "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, + "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, + "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, + "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, + "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, + "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, + "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, + "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, + "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, + "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, + "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, + "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, + "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, + "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} + class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" @@ -226,3 +259,59 @@ class TestTraitsDBFunctions(TestCase): with self.subTest(trait_info=trait_info, expected=expected): self.assertEqual( set_confidential_field(trait_type, trait_info), expected) + + def test_export_trait_data_dtype(self): + """ + Test `export_trait_data` with different values for the `dtype` keyword + argument + """ + for dtype, expected in [ + ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["var", (None, None, None, None, None, None)], + ["N", (None, None, None, None, None, None)], + ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]: + with self.subTest(dtype=dtype): + self.assertEqual( + export_trait_data(trait_data, samplelist, dtype=dtype), + expected) + + def test_export_trait_data_dtype_all_flags(self): + """ + Test `export_trait_data` with different values for the `dtype` keyword + argument and the different flags set up + """ + for dtype, vflag, nflag, expected in [ + ["val", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["var", False, False, (None, None, None, None, None, None)], + ["var", False, True, (None, None, None, None, None, None)], + ["var", True, False, (None, None, None, None, None, None)], + ["var", True, True, (None, None, None, None, None, None)], + ["N", False, False, (None, None, None, None, None, None)], + ["N", False, True, (None, None, None, None, None, None)], + ["N", True, False, (None, None, None, None, None, None)], + ["N", True, True, (None, None, None, None, None, None)], + ["all", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["all", False, True, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, False, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, True, + (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, + 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] + ]: + with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): + self.assertEqual( + export_trait_data( + trait_data, samplelist, dtype=dtype, var_exists=vflag, + n_exists=nflag), + expected) -- cgit v1.2.3 From 42c56d330fdb51820c0fdcbb0b4376ff568ea009 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 12:27:32 +0300 Subject: Move `export_informative` function to `gn3.db.traits` module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/traits.py: Move `export_informative` function here * gn3/partial_correlations.py: Remove `export_informative` function * tests/unit/db/test_traits.py: Move `export_informative` function tests here * tests/unit/test_partial_correlations.py: Remove `export_informative` function tests The `export_informative` function relates more to the traits than to the partial correlations, and could find use in more than just the partial correlations stuff. This commit moves the function to the more traits-specific `gn3.db.traits` module. --- gn3/db/traits.py | 24 +++++++++ gn3/partial_correlations.py | 24 --------- tests/unit/db/test_traits.py | 86 ++++++++++++++++++++++++++++++++ tests/unit/test_partial_correlations.py | 87 +-------------------------------- 4 files changed, 111 insertions(+), 110 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 1e29aff..1c6aaa7 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -743,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR): """Generate a unique filename for use with generated traits files.""" return "{}/traits_test_file_{}.txt".format( os.path.abspath(base_path), random_string(10)) + +def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: + """ + Export informative strain + + This is a migration of the `exportInformative` function in + web/webqtl/base/webqtlTrait.py module in GeneNetwork1. + + There is a chance that the original implementation has a bug, especially + dealing with the `inc_var` value. It the `inc_var` value is meant to control + the inclusion of the `variance` value, then the current implementation, and + that one in GN1 have a bug. + """ + def __exporter__(acc, data_item): + if not inc_var or data_item["variance"] is not None: + return ( + acc[0] + (data_item["sample_name"],), + acc[1] + (data_item["value"],), + acc[2] + (data_item["variance"],)) + return acc + return reduce( + __exporter__, + filter(lambda td: td["value"] is not None, trait_data["data"].values()), + (tuple(), tuple(), tuple())) diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py index 8c37886..df390ed 100644 --- a/gn3/partial_correlations.py +++ b/gn3/partial_correlations.py @@ -6,27 +6,3 @@ GeneNetwork1. """ from functools import reduce - -def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: - """ - Export informative strain - - This is a migration of the `exportInformative` function in - web/webqtl/base/webqtlTrait.py module in GeneNetwork1. - - There is a chance that the original implementation has a bug, especially - dealing with the `inc_var` value. It the `inc_var` value is meant to control - the inclusion of the `variance` value, then the current implementation, and - that one in GN1 have a bug. - """ - def __exporter__(acc, data_item): - if not inc_var or data_item["variance"] is not None: - return ( - acc[0] + (data_item["sample_name"],), - acc[1] + (data_item["value"],), - acc[2] + (data_item["variance"],)) - return acc - return reduce( - __exporter__, - filter(lambda td: td["value"] is not None, trait_data["data"].values()), - (tuple(), tuple(), tuple())) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 0c4ef78..67f0c6f 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -3,6 +3,7 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, export_trait_data, + export_informative, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -315,3 +316,88 @@ class TestTraitsDBFunctions(TestCase): trait_data, samplelist, dtype=dtype, var_exists=vflag, n_exists=nflag), expected) + + def test_export_informative(self): + """Test that the function exports appropriate data.""" + for trait_data, inc_var, expected in [ + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": None, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample4"), (9, 8, 6), + (None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, True, (tuple(), tuple(), tuple())], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": 0.657, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, 0.657, None, None))]]: + with self.subTest(trait_data=trait_data): + self.assertEqual( + export_informative(trait_data, inc_var), expected) diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py index 6eea078..f204d4f 100644 --- a/tests/unit/test_partial_correlations.py +++ b/tests/unit/test_partial_correlations.py @@ -1,92 +1,7 @@ """Module contains tests for gn3.partial_correlations""" from unittest import TestCase -from gn3.partial_correlations import export_informative + class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" - - def test_export_informative(self): - """Test that the function exports appropriate data.""" - for trait_data, inc_var, expected in [ - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), - (None, None, None, None))], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": None, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample4"), (9, 8, 6), - (None, None, None))], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, True, (tuple(), tuple(), tuple())], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": 0.657, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), - (None, 0.657, None, None))]]: - with self.subTest(trait_data=trait_data): - self.assertEqual( - export_informative(trait_data, inc_var), expected) -- cgit v1.2.3 From a44acad05fb286b9a2e797982d01841a1e817860 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 14:31:51 +0300 Subject: Disable pylint issue * Disable minor pylint issue. --- tests/unit/db/test_traits.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tests/unit/db') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 67f0c6f..4aa9389 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -319,6 +319,7 @@ class TestTraitsDBFunctions(TestCase): def test_export_informative(self): """Test that the function exports appropriate data.""" + # pylint: disable=W0621 for trait_data, inc_var, expected in [ [{"data": { "sample1": { -- cgit v1.2.3 From 575da0baf4468d27782c73b19995b3adb934ba70 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 22 Nov 2021 13:56:03 +0300 Subject: Add test to query builders Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Add some tests for the query builders to ensure that the queries are built up correctly. --- gn3/db/correlations.py | 78 +++++++++++++++++---------------- tests/unit/db/test_correlation.py | 90 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 36 deletions(-) create mode 100644 tests/unit/db/test_correlation.py (limited to 'tests/unit/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index ff570b4..7daff87 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -402,6 +402,43 @@ def fetch_sample_ids( species_name=species) return cursor.fetchall() +def build_query_sgo_lit_corr( + db_type: str, temp_table: str, sample_id_columns: str, + joins: Tuple[str, ...]) -> str: + """ + Build query for `SGO Literature Correlation` data, when querying the given + `temp_table` temporary table. + """ + return ( + (f"SELECT {db_type}.Name, {temp_table}.value, " + + sample_id_columns + + f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " + + f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " + + " ".join(joins) + + f" WHERE ProbeSet.GeneId IS NOT NULL " + + f"AND {temp_table}.value IS NOT NULL " + + f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " + + f"AND {db_type}Freeze.Name = %(db_name)s " + + f"AND {db_type}.Id = {db_type}XRef.{db_type}Id " + + f"ORDER BY {db_type}.Id"), + 2) + +def build_query_tissue_corr(db_type, temp_table, sample_id_columns, joins): + return ( + (f"SELECT {db_type}.Name, {temp_table}.Correlation, " + + f"{temp_table}.PValue, " + + sample_id_columns + + f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " + + f"LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol " + + " ".join(joins) + + f" WHERE ProbeSet.Symbol IS NOT NULL " + + f"AND {temp_table}.Correlation IS NOT NULL " + + f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " + + f"AND {db_type}Freeze.Name = %(db_name)s " + + f"AND {db_type}.Id = {db_type}XRef.{db_type}Id " + f"ORDER BY {db_type}.Id"), + 3) + def fetch_all_database_data( conn: Any, species: str, gene_id: int, gene_symbol: str, samples: Tuple[str, ...], db_type: str, db_name: str, method: str, @@ -411,37 +448,6 @@ def fetch_all_database_data( `web.webqtl.correlation.CorrelationPage.fetchAllDatabaseData` function in GeneNetwork1. """ - def __build_query_sgo_lit__(temp_table, sample_id_columns, joins): - return ( - (f"SELECT {db_type}.Name, {temp_table}.value " + - sample_id_columns + - f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " + - f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " + - " ".join(joins) + - f" WHERE ProbeSet.GeneId IS NOT NULL " + - f"AND {temp_table}.value IS NOT NULL " + - f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " + - f"AND {db_type}Freeze.Name = %(db_name)s " + - f"AND {db_type}.Id = {db_type}XRef.{db_type}Id " + - f"ORDER BY {db_type}.Id"), - 2) - - def __build_query_tissue_corr__(temp_table, sample_id_columns, joins): - return ( - (f"SELECT {db_type}.Name, {temp_table}.Correlation, " + - f"{temp_table}.PValue, " + - sample_id_columns + - f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " + - f"LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol " + - " ".join(joins) + - f" WHERE ProbeSet.Symbol IS NOT NULL " + - f"AND {temp_table}.Correlation IS NOT NULL " + - f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " + - f"AND {db_type}Freeze.Name = %(db_name)s " + - f"AND {db_type}.Id = {db_type}XRef.%sId " - f"ORDER BY {db_type}.Id"), - 3) - def __build_query__(sample_ids, temp_table): sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in samples_ids) if db_type == "Publish": @@ -460,17 +466,17 @@ def fetch_all_database_data( 1) if temp_table is not None: joins = tuple( - ("LEFT JOIN {db_type}Data AS T{item} " - "ON T{item}.Id = {db_type}XRef.DataId " - "AND T{item}.StrainId=%(T{item}_sample_id)s") + (f"LEFT JOIN {db_type}Data AS T{item} " + f"ON T{item}.Id = {db_type}XRef.DataId " + f"AND T{item}.StrainId=%(T{item}_sample_id)s") for item in sample_ids) if method.lower() == "sgo literature correlation": - return __build_query_sgo_lit__( + return build_query_sgo_lit_corr( sample_ids, temp_table, sample_id_columns) if method.lower() in ( "tissue correlation, pearson's r", "tissue correlation, spearman's rho"): - return __build_query_tissue_corr__( + return build_query_tissue_corr( sample_ids, temp_table, sample_id_columns) joins = tuple( (f"LEFT JOIN {db_type}Data AS T{item} " diff --git a/tests/unit/db/test_correlation.py b/tests/unit/db/test_correlation.py new file mode 100644 index 0000000..866d28d --- /dev/null +++ b/tests/unit/db/test_correlation.py @@ -0,0 +1,90 @@ +""" +Tests for the gn3.db.correlations module +""" + +from unittest import TestCase + +from gn3.db.correlations import ( + build_query_sgo_lit_corr, + build_query_tissue_corr) + +class TestCorrelation(TestCase): + """Test cases for correlation data fetching functions""" + maxDiff = None + + def test_build_query_sgo_lit_corr(self): + self.assertEqual( + build_query_sgo_lit_corr( + "Probeset", + "temp_table_xy45i7wd", + "T1.value, T2.value, T3.value", + (("LEFT JOIN ProbesetData AS T1 " + "ON T1.Id = ProbesetXRef.DataId " + "AND T1.StrainId=%(T1_sample_id)s"), + ( + "LEFT JOIN ProbesetData AS T2 " + "ON T2.Id = ProbesetXRef.DataId " + "AND T2.StrainId=%(T2_sample_id)s"), + ( + "LEFT JOIN ProbesetData AS T3 " + "ON T3.Id = ProbesetXRef.DataId " + "AND T3.StrainId=%(T3_sample_id)s"))), + (("SELECT Probeset.Name, temp_table_xy45i7wd.value, " + "T1.value, T2.value, T3.value " + "FROM (Probeset, ProbesetXRef, ProbesetFreeze) " + "LEFT JOIN temp_table_xy45i7wd ON temp_table_xy45i7wd.GeneId2=ProbeSet.GeneId " + "LEFT JOIN ProbesetData AS T1 " + "ON T1.Id = ProbesetXRef.DataId " + "AND T1.StrainId=%(T1_sample_id)s " + "LEFT JOIN ProbesetData AS T2 " + "ON T2.Id = ProbesetXRef.DataId " + "AND T2.StrainId=%(T2_sample_id)s " + "LEFT JOIN ProbesetData AS T3 " + "ON T3.Id = ProbesetXRef.DataId " + "AND T3.StrainId=%(T3_sample_id)s " + "WHERE ProbeSet.GeneId IS NOT NULL " + "AND temp_table_xy45i7wd.value IS NOT NULL " + "AND ProbesetXRef.ProbesetFreezeId = ProbesetFreeze.Id " + "AND ProbesetFreeze.Name = %(db_name)s " + "AND Probeset.Id = ProbesetXRef.ProbesetId " + "ORDER BY Probeset.Id"), + 2)) + + def test_build_query_tissue_corr(self): + self.assertEqual( + build_query_tissue_corr( + "Probeset", + "temp_table_xy45i7wd", + "T1.value, T2.value, T3.value", + (("LEFT JOIN ProbesetData AS T1 " + "ON T1.Id = ProbesetXRef.DataId " + "AND T1.StrainId=%(T1_sample_id)s"), + ( + "LEFT JOIN ProbesetData AS T2 " + "ON T2.Id = ProbesetXRef.DataId " + "AND T2.StrainId=%(T2_sample_id)s"), + ( + "LEFT JOIN ProbesetData AS T3 " + "ON T3.Id = ProbesetXRef.DataId " + "AND T3.StrainId=%(T3_sample_id)s"))), + (("SELECT Probeset.Name, temp_table_xy45i7wd.Correlation, " + "temp_table_xy45i7wd.PValue, " + "T1.value, T2.value, T3.value " + "FROM (Probeset, ProbesetXRef, ProbesetFreeze) " + "LEFT JOIN temp_table_xy45i7wd ON temp_table_xy45i7wd.Symbol=ProbeSet.Symbol " + "LEFT JOIN ProbesetData AS T1 " + "ON T1.Id = ProbesetXRef.DataId " + "AND T1.StrainId=%(T1_sample_id)s " + "LEFT JOIN ProbesetData AS T2 " + "ON T2.Id = ProbesetXRef.DataId " + "AND T2.StrainId=%(T2_sample_id)s " + "LEFT JOIN ProbesetData AS T3 " + "ON T3.Id = ProbesetXRef.DataId " + "AND T3.StrainId=%(T3_sample_id)s " + "WHERE ProbeSet.Symbol IS NOT NULL " + "AND temp_table_xy45i7wd.Correlation IS NOT NULL " + "AND ProbesetXRef.ProbesetFreezeId = ProbesetFreeze.Id " + "AND ProbesetFreeze.Name = %(db_name)s " + "AND Probeset.Id = ProbesetXRef.ProbesetId " + "ORDER BY Probeset.Id"), + 3)) -- cgit v1.2.3 From df4ed9183f3efd89d54bba1a144c48475f4b8169 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 23 Nov 2021 12:34:44 +0300 Subject: Fix a myriad of linting errors * Fix linting errors like: - Unused variables - Undeclared variable errors (mostly caused by typos, and wrong names) - Missing documentation strings for functions etc. --- gn3/computations/partial_correlations.py | 4 +++- gn3/db/correlations.py | 24 ++++++++++++------------ tests/unit/db/test_correlation.py | 6 ++++++ 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 4bd26a2..f43c4d4 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -200,11 +200,13 @@ def good_dataset_samples_indexes( samples_from_file.index(good) for good in set(samples).intersection(set(samples_from_file)))) -def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914] +def partial_correlations_fast(# pylint: disable=[R0913, R0914] samples, primary_vals, control_vals, database_filename, fetched_correlations, method: str, correlation_type: str) -> Tuple[ float, Tuple[float, ...]]: """ + Computes partial correlation coefficients using data from a CSV file. + This is a partial migration of the `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` function in GeneNetwork1. diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 5c3e7b8..a1daa3c 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -398,8 +398,8 @@ def fetch_sample_ids( "AND Species.name=%(species_name)s") with conn.cursor() as cursor: cursor.execute( - query, samples_names=tuple(samples), - species_name=species) + query, samples_names=tuple(sample_names), + species_name=species_name) return cursor.fetchall() def build_query_sgo_lit_corr( @@ -419,7 +419,7 @@ def build_query_sgo_lit_corr( f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " + f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " + " ".join(joins) + - f" WHERE ProbeSet.GeneId IS NOT NULL " + + " WHERE ProbeSet.GeneId IS NOT NULL " + f"AND {temp_table}.value IS NOT NULL " + f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " + f"AND {db_type}Freeze.Name = %(db_name)s " + @@ -443,7 +443,7 @@ def build_query_tissue_corr(db_type, temp_table, sample_id_columns, joins): f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " + f"LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol " + " ".join(joins) + - f" WHERE ProbeSet.Symbol IS NOT NULL " + + " WHERE ProbeSet.Symbol IS NOT NULL " + f"AND {temp_table}.Correlation IS NOT NULL " + f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " + f"AND {db_type}Freeze.Name = %(db_name)s " + @@ -451,17 +451,17 @@ def build_query_tissue_corr(db_type, temp_table, sample_id_columns, joins): f"ORDER BY {db_type}.Id"), 3) -def fetch_all_database_data( - conn: Any, species: str, gene_id: int, gene_symbol: str, +def fetch_all_database_data(# pylint: disable=[R0913, R0914] + conn: Any, species: str, gene_id: int, trait_symbol: str, samples: Tuple[str, ...], db_type: str, db_name: str, method: str, - returnNumber: int, tissueProbeSetFreezeId: int) -> Tuple[Any, Any]: + return_number: int, probeset_freeze_id: int) -> Tuple[Any, Any]: """ This is a migration of the `web.webqtl.correlation.CorrelationPage.fetchAllDatabaseData` function in GeneNetwork1. """ def __build_query__(sample_ids, temp_table): - sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in samples_ids) + sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in sample_ids) if db_type == "Publish": joins = tuple( ("LEFT JOIN PublishData AS T{item} " @@ -484,12 +484,12 @@ def fetch_all_database_data( for item in sample_ids) if method.lower() == "sgo literature correlation": return build_query_sgo_lit_corr( - sample_ids, temp_table, sample_id_columns) + sample_ids, temp_table, sample_id_columns, joins) if method.lower() in ( "tissue correlation, pearson's r", "tissue correlation, spearman's rho"): return build_query_tissue_corr( - sample_ids, temp_table, sample_id_columns) + sample_ids, temp_table, sample_id_columns, joins) joins = tuple( (f"LEFT JOIN {db_type}Data AS T{item} " f"ON T{item}.Id = {db_type}XRef.DataId " @@ -513,7 +513,7 @@ def fetch_all_database_data( cursor.execute( query, db_name=db_name, **{f"T{item}_sample_id": item for item in sample_ids}) - return cursor.fetchall() + return (cursor.fetchall(), data_start_pos) sample_ids = tuple( # look into graduating this to an argument and removing the `samples` @@ -543,4 +543,4 @@ def fetch_all_database_data( with conn.cursor() as cursor: cursor.execute(f"DROP TEMPORARY TABLE {temp_table}") - return trait_database, data_start_pos + return (tuple(item[0] for item in trait_database), trait_database[0][1]) diff --git a/tests/unit/db/test_correlation.py b/tests/unit/db/test_correlation.py index 866d28d..3f940b2 100644 --- a/tests/unit/db/test_correlation.py +++ b/tests/unit/db/test_correlation.py @@ -13,6 +13,9 @@ class TestCorrelation(TestCase): maxDiff = None def test_build_query_sgo_lit_corr(self): + """ + Test that the literature correlation query is built correctly. + """ self.assertEqual( build_query_sgo_lit_corr( "Probeset", @@ -51,6 +54,9 @@ class TestCorrelation(TestCase): 2)) def test_build_query_tissue_corr(self): + """ + Test that the tissue correlation query is built correctly. + """ self.assertEqual( build_query_tissue_corr( "Probeset", -- cgit v1.2.3