From 157df453cdb84591cb44af9f1d2677cd0b2c0380 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 12:17:11 +0300 Subject: Move 'export_trait_data' to 'gn3.db.traits' module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/traits.py: Move function `export_trait_data` here * gn3/heatmaps.py: Remove function `export_trait_data` * tests/unit/db/test_traits.py: Move function `export_trait_data` tests here * tests/unit/test_heatmaps.py: Remove function `export_trait_data` here Function `export_trait_data` more closely corresponds to the traits and is used in more than just the `gn3.heatmaps` module. This commit moves the relevant code over to the `gn3.db.traits` module and also moves the tests to the corresponding tests modules. --- gn3/db/traits.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index f2673c8..1e29aff 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,12 +1,81 @@ """This class contains functions relating to trait data manipulation""" import os +from functools import reduce from typing import Any, Dict, Union, Sequence + from gn3.settings import TMPDIR from gn3.random import random_string from gn3.function_helpers import compose from gn3.db.datasets import retrieve_trait_dataset +def export_trait_data( + trait_data: dict, samplelist: Sequence[str], dtype: str = "val", + var_exists: bool = False, n_exists: bool = False): + """ + Export data according to `samplelist`. Mostly used in calculating + correlations. + + DESCRIPTION: + Migrated from + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211 + + PARAMETERS + trait: (dict) + The dictionary of key-value pairs representing a trait + samplelist: (list) + A list of sample names + dtype: (str) + ... verify what this is ... + var_exists: (bool) + A flag indicating existence of variance + n_exists: (bool) + A flag indicating existence of ndata + """ + def __export_all_types(tdata, sample): + sample_data = [] + if tdata[sample]["value"]: + sample_data.append(tdata[sample]["value"]) + if var_exists: + if tdata[sample]["variance"]: + sample_data.append(tdata[sample]["variance"]) + else: + sample_data.append(None) + if n_exists: + if tdata[sample]["ndata"]: + sample_data.append(tdata[sample]["ndata"]) + else: + sample_data.append(None) + else: + if var_exists and n_exists: + sample_data += [None, None, None] + elif var_exists or n_exists: + sample_data += [None, None] + else: + sample_data.append(None) + + return tuple(sample_data) + + def __exporter(accumulator, sample): + # pylint: disable=[R0911] + if sample in trait_data["data"]: + if dtype == "val": + return accumulator + (trait_data["data"][sample]["value"], ) + if dtype == "var": + return accumulator + (trait_data["data"][sample]["variance"], ) + if dtype == "N": + return accumulator + (trait_data["data"][sample]["ndata"], ) + if dtype == "all": + return accumulator + __export_all_types(trait_data["data"], sample) + raise KeyError("Type `%s` is incorrect" % dtype) + if var_exists and n_exists: + return accumulator + (None, None, None) + if var_exists or n_exists: + return accumulator + (None, None) + return accumulator + (None,) + + return reduce(__exporter, samplelist, tuple()) + def get_trait_csv_sample_data(conn: Any, trait_name: int, phenotype_id: int): """Fetch a trait and return it as a csv string""" -- cgit v1.2.3 From 94ca79045baf978d6aab964c7c70b84911c1124f Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Oct 2021 12:27:32 +0300 Subject: Move `export_informative` function to `gn3.db.traits` module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/traits.py: Move `export_informative` function here * gn3/partial_correlations.py: Remove `export_informative` function * tests/unit/db/test_traits.py: Move `export_informative` function tests here * tests/unit/test_partial_correlations.py: Remove `export_informative` function tests The `export_informative` function relates more to the traits than to the partial correlations, and could find use in more than just the partial correlations stuff. This commit moves the function to the more traits-specific `gn3.db.traits` module. --- gn3/db/traits.py | 24 +++++++++ gn3/partial_correlations.py | 24 --------- tests/unit/db/test_traits.py | 86 ++++++++++++++++++++++++++++++++ tests/unit/test_partial_correlations.py | 87 +-------------------------------- 4 files changed, 111 insertions(+), 110 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 1e29aff..1c6aaa7 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -743,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR): """Generate a unique filename for use with generated traits files.""" return "{}/traits_test_file_{}.txt".format( os.path.abspath(base_path), random_string(10)) + +def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: + """ + Export informative strain + + This is a migration of the `exportInformative` function in + web/webqtl/base/webqtlTrait.py module in GeneNetwork1. + + There is a chance that the original implementation has a bug, especially + dealing with the `inc_var` value. It the `inc_var` value is meant to control + the inclusion of the `variance` value, then the current implementation, and + that one in GN1 have a bug. + """ + def __exporter__(acc, data_item): + if not inc_var or data_item["variance"] is not None: + return ( + acc[0] + (data_item["sample_name"],), + acc[1] + (data_item["value"],), + acc[2] + (data_item["variance"],)) + return acc + return reduce( + __exporter__, + filter(lambda td: td["value"] is not None, trait_data["data"].values()), + (tuple(), tuple(), tuple())) diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py index 8c37886..df390ed 100644 --- a/gn3/partial_correlations.py +++ b/gn3/partial_correlations.py @@ -6,27 +6,3 @@ GeneNetwork1. """ from functools import reduce - -def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: - """ - Export informative strain - - This is a migration of the `exportInformative` function in - web/webqtl/base/webqtlTrait.py module in GeneNetwork1. - - There is a chance that the original implementation has a bug, especially - dealing with the `inc_var` value. It the `inc_var` value is meant to control - the inclusion of the `variance` value, then the current implementation, and - that one in GN1 have a bug. - """ - def __exporter__(acc, data_item): - if not inc_var or data_item["variance"] is not None: - return ( - acc[0] + (data_item["sample_name"],), - acc[1] + (data_item["value"],), - acc[2] + (data_item["variance"],)) - return acc - return reduce( - __exporter__, - filter(lambda td: td["value"] is not None, trait_data["data"].values()), - (tuple(), tuple(), tuple())) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 0c4ef78..67f0c6f 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -3,6 +3,7 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, export_trait_data, + export_informative, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -315,3 +316,88 @@ class TestTraitsDBFunctions(TestCase): trait_data, samplelist, dtype=dtype, var_exists=vflag, n_exists=nflag), expected) + + def test_export_informative(self): + """Test that the function exports appropriate data.""" + for trait_data, inc_var, expected in [ + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": None, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample4"), (9, 8, 6), + (None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, True, (tuple(), tuple(), tuple())], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": 0.657, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, 0.657, None, None))]]: + with self.subTest(trait_data=trait_data): + self.assertEqual( + export_informative(trait_data, inc_var), expected) diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py index 6eea078..f204d4f 100644 --- a/tests/unit/test_partial_correlations.py +++ b/tests/unit/test_partial_correlations.py @@ -1,92 +1,7 @@ """Module contains tests for gn3.partial_correlations""" from unittest import TestCase -from gn3.partial_correlations import export_informative + class TestPartialCorrelations(TestCase): """Class for testing partial correlations computation functions""" - - def test_export_informative(self): - """Test that the function exports appropriate data.""" - for trait_data, inc_var, expected in [ - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), - (None, None, None, None))], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": None, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample4"), (9, 8, 6), - (None, None, None))], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": None, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, True, (tuple(), tuple(), tuple())], - [{"data": { - "sample1": { - "sample_name": "sample1", "value": 9, "variance": None, - "ndata": 13 - }, - "sample2": { - "sample_name": "sample2", "value": 8, "variance": 0.657, - "ndata": 13 - }, - "sample3": { - "sample_name": "sample3", "value": 7, "variance": None, - "ndata": 13 - }, - "sample4": { - "sample_name": "sample4", "value": 6, "variance": None, - "ndata": 13 - }, - }}, 0, ( - ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), - (None, 0.657, None, None))]]: - with self.subTest(trait_data=trait_data): - self.assertEqual( - export_informative(trait_data, inc_var), expected) -- cgit v1.2.3 From 41936d0a486ef54bf4fc049c2b4d85dca43ab761 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 21 Oct 2021 09:36:36 +0300 Subject: Implement `translate_to_mouse_gene_id` function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Migrate the `web.webqtl.correlation/CorrelationPage.translateToMouseGeneID` function in GN1 to GN3. This is a function that retrieves data from the database, and therefore uses a system outside of our code, therefore, the function does not have a corresponding unit test. This kind of function will probably need to be tested at the integration or system tests level, where we test that our code interacts correcly with any and all external systems that it should. --- gn3/db/species.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/species.py b/gn3/db/species.py index 0deae4e..1e5015f 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -30,3 +30,34 @@ def get_chromosome(name: str, is_species: bool, conn: Any) -> Optional[Tuple]: with conn.cursor() as cursor: cursor.execute(_sql) return cursor.fetchall() + +def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: + """ + Translate rat or human geneid to mouse geneid + + This is a migration of the + `web.webqtl.correlation/CorrelationPage.translateToMouseGeneID` function in + GN1 + """ + assert species in ("rat", "mouse", "human"), "Invalid species" + if geneid is None: + return 0 + + if species == "mouse": + return geneid + + with conn.cursor as cursor: + if species == "rat": + cursor.execute( + "SELECT mouse FROM GeneIDXRef WHERE rat = %s", geneid) + rat_geneid = cursor.fetchone() + if rat_geneid: + return rat_geneid[0] + + cursor.execute( + "SELECT mouse FROM GeneIDXRef WHERE human = %s", geneid) + human_geneid = cursor.fetchone() + if human_geneid: + return human_geneid[0] + + return 0 # default if all else fails -- cgit v1.2.3 From df8185078a52c89cc5a75ff9be413a236da29a6e Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 25 Oct 2021 09:31:58 +0300 Subject: Implement `get_filename` for correlations Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Implement `get_filename` for the correlations, to be used to determine whether to do fast or normal correlations. This is a migration of the `web.webqtl.correlation.CorrelationPage.getFileName` function in GN1 --- gn3/db/correlations.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 gn3/db/correlations.py (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py new file mode 100644 index 0000000..fa8e7ca --- /dev/null +++ b/gn3/db/correlations.py @@ -0,0 +1,26 @@ +""" +This module will hold functions that are used in the (partial) correlations +feature to access the database to retrieve data needed for computations. +""" + +from typing import Any +def get_filename(target_db_name: str, conn: Any) -> str: + """ + Retrieve the name of the reference database file with which correlations are + computed. + + This is a migration of the + `web.webqtl.correlation.CorrelationPage.getFileName` function in + GeneNetwork1. + """ + with conn.cursor() as cursor: + cursor.execute( + "SELECT Id, FullName from ProbeSetFreeze WHERE Name-%s", + target_db_name) + result = cursor.fetchone() + if result: + return "ProbeSetFreezeId_{tid}_FullName_{fname}.txt".format( + tid=result[0], + fname=result[1].replace(' ', '_').replace('/', '_')) + + return "" -- cgit v1.2.3 From 0814eea6b57e45d4337424e63c164d204d03b64d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 25 Oct 2021 12:38:24 +0300 Subject: Implement `fetch_literature_correlations` and depedencies Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Migrate: * `web.webqtl.correlation.CorrelationPage.getTempLiteratureTable` * `web.webqtl.correlation.CorrelationPage.fetchLitCorrelations` from GeneNetwork1. The first function creates and populates a temporary table with the literature correlations data. The second function uses the data in the newly created temporary table to link the trait with the correlation value. --- gn3/db/correlations.py | 113 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index fa8e7ca..67cfef9 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -4,6 +4,10 @@ feature to access the database to retrieve data needed for computations. """ from typing import Any + +from gn3.random import random_string +from gn3.db.species import translate_to_mouse_gene_id + def get_filename(target_db_name: str, conn: Any) -> str: """ Retrieve the name of the reference database file with which correlations are @@ -24,3 +28,112 @@ def get_filename(target_db_name: str, conn: Any) -> str: fname=result[1].replace(' ', '_').replace('/', '_')) return "" + +def build_temporary_literature_table( + species: str, gene_id: int, return_number: int, conn: Any) -> str: + """ + Build and populate a temporary table to hold the literature correlation data + to be used in computations. + + "This is a migration of the + `web.webqtl.correlation.CorrelationPage.getTempLiteratureTable` function in + GeneNetwork1. + """ + def __translated_species_id(row, cursor): + if species == "mouse": + return row[1] + query = { + "rat": "SELECT rat FROM GeneIDXRef WHERE mouse=%s", + "human": "SELECT human FROM GeneIDXRef WHERE mouse=%d"} + if species in query.keys(): + cursor.execute(query[species], row[1]) + record = cursor.fetchone() + if record: + return record[0] + return None + return None + + temp_table_name = f"TOPLITERATURE{random_string(8)}" + with conn.cursor as cursor: + mouse_geneid = translate_to_mouse_gene_id(species, gene_id, conn) + data_query = ( + "SELECT GeneId1, GeneId2, value FROM LCorrRamin3 " + "WHERE GeneId1 = %(mouse_gene_id)s " + "UNION ALL " + "SELECT GeneId2, GeneId1, value FROM LCorrRamin3 " + "WHERE GeneId2 = %(mouse_gene_id)s " + "AND GeneId1 != %(mouse_gene_id)s") + cursor.execute( + (f"CREATE TEMPORARY TABLE {temp_table_name} (" + "GeneId1 int(12) unsigned, " + "GeneId2 int(12) unsigned PRIMARY KEY, " + "value double)")) + cursor.execute(data_query, mouse_gene_id=mouse_geneid) + literature_data = [ + {"GeneId1": row[0], "GeneId2": row[1], "value": row[2]} + for row in cursor.fetchall() + if __translated_species_id(row, cursor)] + + cursor.execute( + (f"INSERT INTO {temp_table_name} " + "VALUES (%(GeneId1)s, %(GeneId2)s, %(value)s)"), + literature_data[0:(2 * return_number)]) + + return temp_table_name + +def fetch_geno_literature_correlations(temp_table: str) -> str: + """ + Helper function for `fetch_literature_correlations` below, to build query + for `Geno*` tables. + """ + return ( + f"SELECT Geno.Name, {temp_table}.value " + "FROM Geno, GenoXRef, GenoFreeze " + f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " + "WHERE ProbeSet.GeneId IS NOT NULL " + f"AND {temp_table}.value IS NOT NULL " + "AND GenoXRef.GenoFreezeId = GenoFreeze.Id " + "AND GenoFreeze.Name = %(db_name)s " + "AND Geno.Id=GenoXRef.GenoId " + "ORDER BY Geno.Id") + +def fetch_probeset_literature_correlations(temp_table: str) -> str: + """ + Helper function for `fetch_literature_correlations` below, to build query + for `ProbeSet*` tables. + """ + return ( + f"SELECT ProbeSet.Name, {temp_table}.value " + "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze " + "LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " + "WHERE ProbeSet.GeneId IS NOT NULL " + "AND {temp_table}.value IS NOT NULL " + "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %(db_name)s " + "AND ProbeSet.Id=ProbeSetXRef.ProbeSetId " + "ORDER BY ProbeSet.Id") + +def fetch_literature_correlations( + species: str, gene_id: int, dataset: dict, return_number: int, + conn: Any) -> dict: + """ + Gather the literature correlation data and pair it with trait id string(s). + + This is a migration of the + `web.webqtl.correlation.CorrelationPage.fetchLitCorrelations` function in + GeneNetwork1. + """ + temp_table = build_temporary_literature_table( + species, gene_id, return_number, conn) + query_fns = { + "Geno": fetch_geno_literature_correlations, + # "Temp": fetch_temp_literature_correlations, + # "Publish": fetch_publish_literature_correlations, + "ProbeSet": fetch_probeset_literature_correlations} + with conn.cursor as cursor: + cursor.execute( + query_fns[dataset["dataset_type"]](temp_table), + db_name=dataset["dataset_name"]) + results = cursor.fetchall() + cursor.execute("DROP TEMPORARY TABLE %s", temp_table) + return dict(results) # {trait_name: lit_corr for trait_name, lit_corr in results} -- cgit v1.2.3 From c13afb3af166d2b01e4f9fd9b09bb231f0a63cb1 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 25 Oct 2021 19:19:54 +0300 Subject: Start implementation of `fetch_tissue_correlations` and dependencies * compare_tissue_correlation_absolute_values: New function. Complete. Used for sorting of tissue correlation values * fetch_symbol_value_pair_dict: New function. Complete. Maps gene symbols to tissue expression data * fetch_gene_symbol_tissue_value_dict: New function. Complete. Wrapper for `gn3.db.correlations.fetch_symbol_value_pair_dict` function * fetch_tissue_probeset_xref_info: New function. Complete. Retrieves the Probeset XRef information for tissues from the database. * correlations_of_all_tissue_traits: Stub. Dependencies not completed yet. * build_temporary_tissue_correlations_table: Stub. Dependencies not completed yet. * fetch_tissue_correlations: New function. Incomplete. This function calls (a) stub(s) function(s) which is/are under development still. --- gn3/db/correlations.py | 183 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 181 insertions(+), 2 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 67cfef9..87ab082 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -3,9 +3,11 @@ This module will hold functions that are used in the (partial) correlations feature to access the database to retrieve data needed for computations. """ -from typing import Any +from functools import reduce +from typing import Any, Dict, Tuple from gn3.random import random_string +from gn3.data_helpers import partition_all from gn3.db.species import translate_to_mouse_gene_id def get_filename(target_db_name: str, conn: Any) -> str: @@ -136,4 +138,181 @@ def fetch_literature_correlations( db_name=dataset["dataset_name"]) results = cursor.fetchall() cursor.execute("DROP TEMPORARY TABLE %s", temp_table) - return dict(results) # {trait_name: lit_corr for trait_name, lit_corr in results} + return dict(results) + +def compare_tissue_correlation_absolute_values(val1, val2): + """ + Comparison function for use when sorting tissue correlation values. + + This is a partial migration of the + `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in + GeneNetwork1.""" + try: + if abs(val1) < abs(val2): + return 1 + if abs(val1) == abs(val2): + return 0 + return -1 + except TypeError: + return 0 + +def fetch_symbol_value_pair_dict( + symbol_list: Tuple[str, ...], data_id_dict: dict, + conn: Any) -> Dict[str, Tuple[float, ...]]: + """ + Map each gene symbols to the corresponding tissue expression data. + + This is a migration of the + `web.webqtl.correlation.correlationFunction.getSymbolValuePairDict` function + in GeneNetwork1. + """ + data_ids = { + symbol: data_id_dict.get(symbol) for symbol in symbol_list + if data_id_dict.get(symbol) is not None + } + query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN %(data_ids)s" + with conn.cursor() as cursor: + cursor.execute( + query, + data_ids=tuple(data_ids.values())) + value_results = cursor.fetchall() + return { + key: tuple(row[1] for row in value_results if row[0] == key) + for key in data_ids.keys() + } + + return {} + +def fetch_gene_symbol_tissue_value_dict( + symbol_list: Tuple[str, ...], data_id_dict: dict, conn: Any, + limit_num: int = 1000) -> dict:#getGeneSymbolTissueValueDict + """ + Wrapper function for `gn3.db.correlations.fetch_symbol_value_pair_dict`. + + This is a migrations of the + `web.webqtl.correlation.correlationFunction.getGeneSymbolTissueValueDict` in + GeneNetwork1. + """ + count = len(symbol_list) + if count != 0 and count <= limit_num: + return fetch_symbol_value_pair_dict(symbol_list, data_id_dict, conn) + + if count > limit_num: + return { + key: value for dct in [ + fetch_symbol_value_pair_dict(sl, data_id_dict, conn) + for sl in partition_all(limit_num, symbol_list)] + for key, value in dct.items() + } + + return {} + +def fetch_tissue_probeset_xref_info( + gene_name_list: Tuple[str, ...], probeset_freeze_id: int, + conn: Any) -> Tuple[tuple, dict, dict, dict, dict, dict, dict]: + """ + Retrieve the ProbeSet XRef information for tissues. + + This is a migration of the + `web.webqtl.correlation.correlationFunction.getTissueProbeSetXRefInfo` + function in GeneNetwork1.""" + with conn.cursor() as cursor: + if len(gene_name_list) == 0: + query = ( + "SELECT t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, " + "t.description, t.Probe_Target_Description " + "FROM " + "(" + " SELECT Symbol, max(Mean) AS maxmean " + " FROM TissueProbeSetXRef " + " WHERE TissueProbeSetFreezeId=%(probeset_freeze_id)s " + " AND Symbol != '' " + " AND Symbol IS NOT NULL " + " GROUP BY Symbol" + ") AS x " + "INNER JOIN TissueProbeSetXRef AS t ON t.Symbol = x.Symbol " + "AND t.Mean = x.maxmean") + cursor.execute(query, probeset_freeze_id=probeset_freeze_id) + else: + query = ( + "SELECT t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, " + "t.description, t.Probe_Target_Description " + "FROM " + "(" + " SELECT Symbol, max(Mean) AS maxmean " + " FROM TissueProbeSetXRef " + " WHERE TissueProbeSetFreezeId=%(probeset_freeze_id)s " + " AND Symbol in %(symbols)s " + " GROUP BY Symbol" + ") AS x " + "INNER JOIN TissueProbeSetXRef AS t ON t.Symbol = x.Symbol " + "AND t.Mean = x.maxmean") + cursor.execute( + query, probeset_freeze_id=probeset_freeze_id, + symbols=tuple(gene_name_list)) + + results = cursor.fetchall() + + return reduce( + lambda acc, item: ( + acc[0] + (item[0],), + {**acc[1], item[0].lower(): item[1]}, + {**acc[1], item[0].lower(): item[2]}, + {**acc[1], item[0].lower(): item[3]}, + {**acc[1], item[0].lower(): item[4]}, + {**acc[1], item[0].lower(): item[5]}, + {**acc[1], item[0].lower(): item[6]}), + results or tuple(), + (tuple(), {}, {}, {}, {}, {}, {})) + +def correlations_of_all_tissue_traits() -> Tuple[dict, dict]: + """ + This is a migration of the + `web.webqtl.correlation.CorrelationPage.calculateCorrOfAllTissueTrait` + function in GeneNetwork1. + """ + raise Exception("Unimplemented!!!") + return ({}, {}) + +def build_temporary_tissue_correlations_table( + trait_symbol: str, probeset_freeze_id: int, method: str, + return_number: int, conn: Any) -> str: + """ + Build a temporary table to hold the tissue correlations data. + + This is a migration of the + `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in + GeneNetwork1.""" + raise Exception("Unimplemented!!!") + return "" + +def fetch_tissue_correlations( + dataset: dict, trait_symbol: str, probeset_freeze_id: int, method: str, + return_number: int, conn: Any) -> dict: + """ + Pair tissue correlations data with a trait id string. + + This is a migration of the + `web.webqtl.correlation.CorrelationPage.fetchTissueCorrelations` function in + GeneNetwork1. + """ + temp_table = build_temporary_tissue_correlations_table( + trait_symbol, probeset_freeze_id, method, return_number, conn) + with conn.cursor() as cursor: + cursor.execute( + ( + f"SELECT ProbeSet.Name, {temp_table}.Correlation, " + f"{temp_table}.PValue " + "FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) " + "LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol " + "WHERE ProbeSetFreeze.Name = %(db_name) " + "AND ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId " + "AND ProbeSet.Id = ProbeSetXRef.ProbeSetId " + "AND ProbeSet.Symbol IS NOT NULL " + "AND %s.Correlation IS NOT NULL"), + db_name=dataset["dataset_name"]) + results = cursor.fetchall() + cursor.execute("DROP TEMPORARY TABLE %s", temp_table) + return { + trait_name: (tiss_corr, tiss_p_val) + for trait_name, tiss_corr, tiss_p_val in results} -- cgit v1.2.3 From 42dee16ec8a7d7620367dd31481999bfca9313db Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 26 Oct 2021 08:59:30 +0300 Subject: Implement `fetch_gene_symbol_tissue_value_dict_for_trait` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Implement `fetch_gene_symbol_tissue_value_dict_for_trait` function which is a migration of the `web.webqtl.correlation.correlationFunction.getGeneSymbolTissueValueDictForTrait` function in GeneNetwork1. --- gn3/db/correlations.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 87ab082..cae8080 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -266,7 +266,22 @@ def fetch_tissue_probeset_xref_info( (tuple(), {}, {}, {}, {}, {}, {})) def correlations_of_all_tissue_traits() -> Tuple[dict, dict]: +def fetch_gene_symbol_tissue_value_dict_for_trait( + gene_name_list: Tuple[str, ...], probeset_freeze_id: int, + conn: Any) -> dict: + """ + Fetches a map of the gene symbols to the tissue values. + + This is a migration of the + `web.webqtl.correlation.correlationFunction.getGeneSymbolTissueValueDictForTrait` + function in GeneNetwork1. """ + xref_info = fetch_tissue_probeset_xref_info( + gene_name_list, probeset_freeze_id, conn) + if xref_info[0]: + return fetch_gene_symbol_tissue_value_dict(xref_info[0], xref_info[2], conn) + return {} + This is a migration of the `web.webqtl.correlation.CorrelationPage.calculateCorrOfAllTissueTrait` function in GeneNetwork1. -- cgit v1.2.3 From d6e392c2488421ae04b4ffd5de26be40ed86a9b3 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 26 Oct 2021 09:17:52 +0300 Subject: Complete `correlations_of_all_tissue_traits` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Complete the implementation of the `correlations_of_all_tissue_traits` function by providing a call to a non-implemented function. --- gn3/db/correlations.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index cae8080..f43b8a5 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -265,7 +265,6 @@ def fetch_tissue_probeset_xref_info( results or tuple(), (tuple(), {}, {}, {}, {}, {}, {})) -def correlations_of_all_tissue_traits() -> Tuple[dict, dict]: def fetch_gene_symbol_tissue_value_dict_for_trait( gene_name_list: Tuple[str, ...], probeset_freeze_id: int, conn: Any) -> dict: @@ -282,12 +281,25 @@ def fetch_gene_symbol_tissue_value_dict_for_trait( return fetch_gene_symbol_tissue_value_dict(xref_info[0], xref_info[2], conn) return {} +def correlations_of_all_tissue_traits( + trait_symbol: str, probeset_freeze_id: int, + method: str, conn: Any) -> Tuple[dict, dict]: + """ + Computes and returns the correlation of all tissue traits. + This is a migration of the - `web.webqtl.correlation.CorrelationPage.calculateCorrOfAllTissueTrait` + `web.webqtl.correlation.correlationFunction.calculateCorrOfAllTissueTrait` function in GeneNetwork1. """ - raise Exception("Unimplemented!!!") - return ({}, {}) + primary_trait_symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( + (trait_symbol,), probeset_freeze_id, conn) + primary_trait_value = primary_trait_symbol_value_dict.vlaues()[0] + symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( + tuple(), probeset_freeze_id, conn) + if method == "1": + return batch_computed_tissue_correlation( + primaryTraitValue,SymbolValueDict,method='spearman') + return batch_computed_tissue_correlation(primaryTraitValue,SymbolValueDict) def build_temporary_tissue_correlations_table( trait_symbol: str, probeset_freeze_id: int, method: str, @@ -298,6 +310,8 @@ def build_temporary_tissue_correlations_table( This is a migration of the `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in GeneNetwork1.""" + symbol_corr_dict, symbol_p_value_dict = correlations_of_all_tissue_traits( + trait_symbol, probeset_freeze_id, method, conn) raise Exception("Unimplemented!!!") return "" -- cgit v1.2.3 From 5079e5077adafdbfd0b7e7c0ef12431e9aed443d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 26 Oct 2021 09:23:48 +0300 Subject: Stub out `batch_computed_tissue_correlation` function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Stub out `batch_computed_tissue_correlation` function to be used in implementing the function down the line. --- gn3/db/correlations.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index f43b8a5..54d3079 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -281,6 +281,14 @@ def fetch_gene_symbol_tissue_value_dict_for_trait( return fetch_gene_symbol_tissue_value_dict(xref_info[0], xref_info[2], conn) return {} +def batch_computed_tissue_correlation( + trait_value: str, symbol_value_dict: dict, + method: str = "pearson") -> Tuple[dict, dict]: + """ + `web.webqtl.correlation.correlationFunction.batchCalTissueCorr`""" + raise Exception("Not implemented!") + return ({}, {}) + def correlations_of_all_tissue_traits( trait_symbol: str, probeset_freeze_id: int, method: str, conn: Any) -> Tuple[dict, dict]: -- cgit v1.2.3 From 84aaf880f32f5293e5e4f1c74a3f284e3c95df2f Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 27 Oct 2021 10:24:28 +0300 Subject: Remove if clauses: replace with dict Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Remove the if clauses to simplify the code flow: use a dictionary of queries and select the appropriate query from the dictionary instead. --- gn3/db/species.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/species.py b/gn3/db/species.py index 1e5015f..abcbf64 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -47,17 +47,13 @@ def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: return geneid with conn.cursor as cursor: - if species == "rat": - cursor.execute( - "SELECT mouse FROM GeneIDXRef WHERE rat = %s", geneid) - rat_geneid = cursor.fetchone() - if rat_geneid: - return rat_geneid[0] - - cursor.execute( - "SELECT mouse FROM GeneIDXRef WHERE human = %s", geneid) - human_geneid = cursor.fetchone() - if human_geneid: - return human_geneid[0] + query = { + "rat": "SELECT mouse FROM GeneIDXRef WHERE rat = %s" + "human": "SELECT mouse FROM GeneIDXRef WHERE human = %s" + } + cursor.execute(query[species], geneid) + translated_gene_id = cursor.fetchone() + if translated_gene_id: + return translated_gene_id[0] return 0 # default if all else fails -- cgit v1.2.3 From 28b0ced4ec13451c5c7323ed5135d126f296836a Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 04:55:30 +0300 Subject: Move the function to computations module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * The function `batch_computed_tissue_correlation` is a pure computations function with no expressions accessing the database, as far as I can tell, therefore, this commit moves the function over to the gn3.computations.partial_correlations module that holds the pure computation functions. --- gn3/computations/partial_correlations.py | 8 ++++++++ gn3/db/correlations.py | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'gn3/db') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 1fb0ccc..b3de31c 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -122,3 +122,11 @@ def find_identical_traits( (primary_name,) + control_names), {}).items() if len(item[1]) > 1), tuple())) + +def batch_computed_tissue_correlation( + trait_value: str, symbol_value_dict: dict, + method: str = "pearson") -> Tuple[dict, dict]: + """ + `web.webqtl.correlation.correlationFunction.batchCalTissueCorr`""" + raise Exception("Not implemented!") + return ({}, {}) diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 54d3079..f43b8a5 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -281,14 +281,6 @@ def fetch_gene_symbol_tissue_value_dict_for_trait( return fetch_gene_symbol_tissue_value_dict(xref_info[0], xref_info[2], conn) return {} -def batch_computed_tissue_correlation( - trait_value: str, symbol_value_dict: dict, - method: str = "pearson") -> Tuple[dict, dict]: - """ - `web.webqtl.correlation.correlationFunction.batchCalTissueCorr`""" - raise Exception("Not implemented!") - return ({}, {}) - def correlations_of_all_tissue_traits( trait_symbol: str, probeset_freeze_id: int, method: str, conn: Any) -> Tuple[dict, dict]: -- cgit v1.2.3 From a85db849660a63b09e5c40f7753d861f47eaaaeb Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 06:37:24 +0300 Subject: Add missing comma Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi --- gn3/db/species.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3/db') diff --git a/gn3/db/species.py b/gn3/db/species.py index abcbf64..702a9a8 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -48,7 +48,7 @@ def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: with conn.cursor as cursor: query = { - "rat": "SELECT mouse FROM GeneIDXRef WHERE rat = %s" + "rat": "SELECT mouse FROM GeneIDXRef WHERE rat = %s", "human": "SELECT mouse FROM GeneIDXRef WHERE human = %s" } cursor.execute(query[species], geneid) -- cgit v1.2.3 From 5a9db2162a0a694a76a256996bb296ff06c75126 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 06:59:57 +0300 Subject: Move `correlations_of_all_tissue_traits` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/computations/partial_correlations.py: new function (`correlations_of_all_tissue_traits`). * gn3/db/correlations.py: delete function (`correlations_of_all_tissue_traits`). Move the function to `gn3.computations.partial_correlations` module and comment out the db-access code. Rework it to receive, as arguments, the data it previously fetched from the database, and add comments on future rework to get the function working again. --- gn3/computations/partial_correlations.py | 27 +++++++++++++++++++++++++++ gn3/db/correlations.py | 20 -------------------- 2 files changed, 27 insertions(+), 20 deletions(-) (limited to 'gn3/db') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index e73edfd..4ba2ba4 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -157,3 +157,30 @@ def batch_computed_tissue_correlation( `web.webqtl.correlation.correlationFunction.batchCalTissueCorr`""" raise Exception("Not implemented!") return ({}, {}) + +def correlations_of_all_tissue_traits( + primary_trait_symbol_value_dict: dict, symbol_value_dict: dict, + method: str) -> Tuple[dict, dict]: + """ + Computes and returns the correlation of all tissue traits. + + This is a migration of the + `web.webqtl.correlation.correlationFunction.calculateCorrOfAllTissueTrait` + function in GeneNetwork1. + """ + # The section below existed in the original function, but with the migration + # and the proposed rework (in the near future), the values from the database + # should be passed into this function, rather than have the function fetch + # the data for itself. + # --------------------------------------------------- + # primary_trait_symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( + # (trait_symbol,), probeset_freeze_id, conn) + # primary_trait_values = primary_trait_symbol_value_dict.vlaues()[0] + # symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( + # tuple(), probeset_freeze_id, conn) + # --------------------------------------------------- + # We might end up actually getting rid of this function all together as the + # rework is done. + primary_trait_values = primary_trait_symbol_value_dict.values()[0] + return batch_computed_tissue_correlation( + primary_trait_values, symbol_value_dict, method) diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index f43b8a5..39ed499 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -281,26 +281,6 @@ def fetch_gene_symbol_tissue_value_dict_for_trait( return fetch_gene_symbol_tissue_value_dict(xref_info[0], xref_info[2], conn) return {} -def correlations_of_all_tissue_traits( - trait_symbol: str, probeset_freeze_id: int, - method: str, conn: Any) -> Tuple[dict, dict]: - """ - Computes and returns the correlation of all tissue traits. - - This is a migration of the - `web.webqtl.correlation.correlationFunction.calculateCorrOfAllTissueTrait` - function in GeneNetwork1. - """ - primary_trait_symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( - (trait_symbol,), probeset_freeze_id, conn) - primary_trait_value = primary_trait_symbol_value_dict.vlaues()[0] - symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( - tuple(), probeset_freeze_id, conn) - if method == "1": - return batch_computed_tissue_correlation( - primaryTraitValue,SymbolValueDict,method='spearman') - return batch_computed_tissue_correlation(primaryTraitValue,SymbolValueDict) - def build_temporary_tissue_correlations_table( trait_symbol: str, probeset_freeze_id: int, method: str, return_number: int, conn: Any) -> str: -- cgit v1.2.3 From 773c0896ccbed12170be2b5aed4554ab86d923b5 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 08:00:27 +0300 Subject: Complete `build_temporary_tissue_correlations_table` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/computations/partial_correlations.py: Remove comments after updating usage of the function at call point * gn3/db/correlations.py: Complete the implementation of the `build_temporary_tissue_correlations_table` function --- gn3/computations/partial_correlations.py | 13 ------------ gn3/db/correlations.py | 36 +++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 16 deletions(-) (limited to 'gn3/db') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index d095185..5777a0b 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -173,19 +173,6 @@ def correlations_of_all_tissue_traits( `web.webqtl.correlation.correlationFunction.calculateCorrOfAllTissueTrait` function in GeneNetwork1. """ - # The section below existed in the original function, but with the migration - # and the proposed rework (in the near future), the values from the database - # should be passed into this function, rather than have the function fetch - # the data for itself. - # --------------------------------------------------- - # primary_trait_symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( - # (trait_symbol,), probeset_freeze_id, conn) - # primary_trait_values = primary_trait_symbol_value_dict.vlaues()[0] - # symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait( - # tuple(), probeset_freeze_id, conn) - # --------------------------------------------------- - # We might end up actually getting rid of this function all together as the - # rework is done. primary_trait_values = primary_trait_symbol_value_dict.values()[0] return batch_computed_tissue_correlation( primary_trait_values, symbol_value_dict, method) diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 39ed499..28f050a 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -290,10 +290,40 @@ def build_temporary_tissue_correlations_table( This is a migration of the `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in GeneNetwork1.""" + # We should probably pass the `correlations_of_all_tissue_traits` function + # as an argument to this function and get rid of the two lines immediately + # following this comment. + from gn3.computations.partial_correlations import correlations_of_all_tissue_traits symbol_corr_dict, symbol_p_value_dict = correlations_of_all_tissue_traits( - trait_symbol, probeset_freeze_id, method, conn) - raise Exception("Unimplemented!!!") - return "" + fetch_gene_symbol_tissue_value_dict_for_trait( + (trait_symbol,), probeset_freeze_id, conn), + fetch_gene_symbol_tissue_value_dict_for_trait( + tuple(), probeset_freeze_id, conn), + method) + + symbol_corr_list = sorted( + symbol_corr_dict.items(), + key=compare_tissue_correlation_absolute_values) + + temp_table_name = f"TOPTISSUE{random_string(8)}" + create_query = ( + "CREATE TEMPORARY TABLE {temp_table_name}" + "(Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)") + insert_query = ( + f"INSERT INTO {temp_table_name}(Symbol, Correlation, PValue) " + " VALUES (%(symbol)s, %(correlation)s, %(pvalue)s)") + + with conn.cursor() as cursor: + cursor.execute(create_query) + cursor.execute( + insert_query, + tuple({ + "symbol": symbol, + "correlation": corr, + "pvalue": symbol_p_value_dict[symbol] + } for symbol, corr in symbol_corr_list[0: 2 * return_number])) + + return temp_table_name def fetch_tissue_correlations( dataset: dict, trait_symbol: str, probeset_freeze_id: int, method: str, -- cgit v1.2.3 From 307a83b897b9ece7c9dd1af49bdedc9e1320eb61 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 08:25:13 +0300 Subject: Rework sorting: remove `compare_tissue_correlation_absolute_values` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/db/correlations.py: Remove the `compare_tissue_correlation_absolute_values` function which is no longer needed. --- gn3/db/correlations.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 28f050a..d7954e5 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -140,22 +140,6 @@ def fetch_literature_correlations( cursor.execute("DROP TEMPORARY TABLE %s", temp_table) return dict(results) -def compare_tissue_correlation_absolute_values(val1, val2): - """ - Comparison function for use when sorting tissue correlation values. - - This is a partial migration of the - `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in - GeneNetwork1.""" - try: - if abs(val1) < abs(val2): - return 1 - if abs(val1) == abs(val2): - return 0 - return -1 - except TypeError: - return 0 - def fetch_symbol_value_pair_dict( symbol_list: Tuple[str, ...], data_id_dict: dict, conn: Any) -> Dict[str, Tuple[float, ...]]: @@ -302,8 +286,7 @@ def build_temporary_tissue_correlations_table( method) symbol_corr_list = sorted( - symbol_corr_dict.items(), - key=compare_tissue_correlation_absolute_values) + symbol_corr_dict.items(), key=lambda key_val: key_val[1]) temp_table_name = f"TOPTISSUE{random_string(8)}" create_query = ( -- cgit v1.2.3 From 9ceb958273b8d86d220fa0d2f040fcb4a8233586 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 08:28:19 +0300 Subject: Fix linting and typing errors Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi --- gn3/computations/partial_correlations.py | 2 +- gn3/db/correlations.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'gn3/db') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 5777a0b..fce6ad2 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -173,6 +173,6 @@ def correlations_of_all_tissue_traits( `web.webqtl.correlation.correlationFunction.calculateCorrOfAllTissueTrait` function in GeneNetwork1. """ - primary_trait_values = primary_trait_symbol_value_dict.values()[0] + primary_trait_values = tuple(primary_trait_symbol_value_dict.values())[0] return batch_computed_tissue_correlation( primary_trait_values, symbol_value_dict, method) diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index d7954e5..d94759a 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -10,6 +10,8 @@ from gn3.random import random_string from gn3.data_helpers import partition_all from gn3.db.species import translate_to_mouse_gene_id +from gn3.computations.partial_correlations import correlations_of_all_tissue_traits + def get_filename(target_db_name: str, conn: Any) -> str: """ Retrieve the name of the reference database file with which correlations are @@ -275,9 +277,8 @@ def build_temporary_tissue_correlations_table( `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in GeneNetwork1.""" # We should probably pass the `correlations_of_all_tissue_traits` function - # as an argument to this function and get rid of the two lines immediately + # as an argument to this function and get rid of the one call immediately # following this comment. - from gn3.computations.partial_correlations import correlations_of_all_tissue_traits symbol_corr_dict, symbol_p_value_dict = correlations_of_all_tissue_traits( fetch_gene_symbol_tissue_value_dict_for_trait( (trait_symbol,), probeset_freeze_id, conn), @@ -308,7 +309,7 @@ def build_temporary_tissue_correlations_table( return temp_table_name -def fetch_tissue_correlations( +def fetch_tissue_correlations(# pylint: disable=R0913 dataset: dict, trait_symbol: str, probeset_freeze_id: int, method: str, return_number: int, conn: Any) -> dict: """ -- cgit v1.2.3 From 4a6be7e1b6514f3c7db8c672970b27e27ecde305 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 06:01:58 +0300 Subject: Add some condition checking functions Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Add the `check_for_literature_info` and `check_symbol_for_tissue_correlation` functions to check for the presence of specific data. --- gn3/db/correlations.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index d94759a..06b3310 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -339,3 +339,43 @@ def fetch_tissue_correlations(# pylint: disable=R0913 return { trait_name: (tiss_corr, tiss_p_val) for trait_name, tiss_corr, tiss_p_val in results} + +def check_for_literature_info(conn: Any, geneid: int) -> bool: + """ + Checks the database to find out whether the trait with `geneid` has any + associated literature. + + This is a migration of the + `web.webqtl.correlation.CorrelationPage.checkForLitInfo` function in + GeneNetwork1. + """ + query = "SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1" + with conn.cursor() as cursor: + cursor.execute(query, geneid) + result = cursor.fetchone() + if result: + return True + + return False + +def check_symbol_for_tissue_correlation( + conn: Any, tissue_probeset_freeze_id: int, symbol: str = "") -> bool: + """ + Checks whether a symbol has any associated tissue correlations. + + This is a migration of the + `web.webqtl.correlation.CorrelationPage.checkSymbolForTissueCorr` function + in GeneNetwork1. + """ + query = ( + "SELECT 1 FROM TissueProbeSetXRef " + "WHERE TissueProbeSetFreezeId=%(probeset_freeze_id)s " + "AND Symbol=%(symbol)s LIMIT 1") + with conn.cursor() as cursor: + cursor.execute( + query, probeset_freeze_id=tissue_probeset_freeze_id, symbol=symbol) + result = cursor.fetchone() + if result: + return True + + return False -- cgit v1.2.3