From 41936d0a486ef54bf4fc049c2b4d85dca43ab761 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 21 Oct 2021 09:36:36 +0300 Subject: Implement `translate_to_mouse_gene_id` function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Migrate the `web.webqtl.correlation/CorrelationPage.translateToMouseGeneID` function in GN1 to GN3. This is a function that retrieves data from the database, and therefore uses a system outside of our code, therefore, the function does not have a corresponding unit test. This kind of function will probably need to be tested at the integration or system tests level, where we test that our code interacts correcly with any and all external systems that it should. --- gn3/db/species.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'gn3/db/species.py') diff --git a/gn3/db/species.py b/gn3/db/species.py index 0deae4e..1e5015f 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -30,3 +30,34 @@ def get_chromosome(name: str, is_species: bool, conn: Any) -> Optional[Tuple]: with conn.cursor() as cursor: cursor.execute(_sql) return cursor.fetchall() + +def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: + """ + Translate rat or human geneid to mouse geneid + + This is a migration of the + `web.webqtl.correlation/CorrelationPage.translateToMouseGeneID` function in + GN1 + """ + assert species in ("rat", "mouse", "human"), "Invalid species" + if geneid is None: + return 0 + + if species == "mouse": + return geneid + + with conn.cursor as cursor: + if species == "rat": + cursor.execute( + "SELECT mouse FROM GeneIDXRef WHERE rat = %s", geneid) + rat_geneid = cursor.fetchone() + if rat_geneid: + return rat_geneid[0] + + cursor.execute( + "SELECT mouse FROM GeneIDXRef WHERE human = %s", geneid) + human_geneid = cursor.fetchone() + if human_geneid: + return human_geneid[0] + + return 0 # default if all else fails -- cgit v1.2.3 From 84aaf880f32f5293e5e4f1c74a3f284e3c95df2f Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 27 Oct 2021 10:24:28 +0300 Subject: Remove if clauses: replace with dict Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Remove the if clauses to simplify the code flow: use a dictionary of queries and select the appropriate query from the dictionary instead. --- gn3/db/species.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'gn3/db/species.py') diff --git a/gn3/db/species.py b/gn3/db/species.py index 1e5015f..abcbf64 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -47,17 +47,13 @@ def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: return geneid with conn.cursor as cursor: - if species == "rat": - cursor.execute( - "SELECT mouse FROM GeneIDXRef WHERE rat = %s", geneid) - rat_geneid = cursor.fetchone() - if rat_geneid: - return rat_geneid[0] - - cursor.execute( - "SELECT mouse FROM GeneIDXRef WHERE human = %s", geneid) - human_geneid = cursor.fetchone() - if human_geneid: - return human_geneid[0] + query = { + "rat": "SELECT mouse FROM GeneIDXRef WHERE rat = %s" + "human": "SELECT mouse FROM GeneIDXRef WHERE human = %s" + } + cursor.execute(query[species], geneid) + translated_gene_id = cursor.fetchone() + if translated_gene_id: + return translated_gene_id[0] return 0 # default if all else fails -- cgit v1.2.3 From a85db849660a63b09e5c40f7753d861f47eaaaeb Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 29 Oct 2021 06:37:24 +0300 Subject: Add missing comma Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi --- gn3/db/species.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gn3/db/species.py') diff --git a/gn3/db/species.py b/gn3/db/species.py index abcbf64..702a9a8 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -48,7 +48,7 @@ def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: with conn.cursor as cursor: query = { - "rat": "SELECT mouse FROM GeneIDXRef WHERE rat = %s" + "rat": "SELECT mouse FROM GeneIDXRef WHERE rat = %s", "human": "SELECT mouse FROM GeneIDXRef WHERE human = %s" } cursor.execute(query[species], geneid) -- cgit v1.2.3 From 40f264876f2309fcccfcd3d04a2999bdf3fa5d98 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 24 Nov 2021 05:46:43 +0300 Subject: Retrieve the species name given the group Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Migrate the `web.webqtl.dbFunction.webqtlDatabaseFunction.retrieveSpecies` in GeneNetwork1 to `gn3.db.species.species_name` in GeneNetwork3 to enable the retrieval of the species name, given the group name (formerly RISet). --- gn3/db/species.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'gn3/db/species.py') diff --git a/gn3/db/species.py b/gn3/db/species.py index 702a9a8..20170ba 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -57,3 +57,20 @@ def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int: return translated_gene_id[0] return 0 # default if all else fails + +def species_name(conn: Any, group: str) -> str: + """ + Retrieve the name of the species, given the group (RISet). + + This is a migration of the + `web.webqtl.dbFunction.webqtlDatabaseFunction.retrieveSpecies` function in + GeneNetwork1. + """ + with conn.cursor() as cursor: + cursor.execute( + ("SELECT Species.Name FROM Species, InbredSet " + "WHERE InbredSet.Name = %(group_name)s " + "AND InbredSet.SpeciesId = Species.Id"), + group_name=group_name) + return cursor.fetchone()[0] + return None -- cgit v1.2.3 From f88d8e4446c8c9d6693197a452669e0e9c8ea812 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 26 Nov 2021 11:44:53 +0300 Subject: Fix query parametrisation Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Pass parameters to the query the way the MySQL driver expects. --- gn3/db/correlations.py | 16 ++++++++++------ gn3/db/species.py | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'gn3/db/species.py') diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index a1daa3c..c838597 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -23,8 +23,8 @@ def get_filename(target_db_name: str, conn: Any) -> str: """ with conn.cursor() as cursor: cursor.execute( - "SELECT Id, FullName from ProbeSetFreeze WHERE Name-%s", - target_db_name) + "SELECT Id, FullName from ProbeSetFreeze WHERE Name=%s", + (target_db_name,)) result = cursor.fetchone() if result: return "ProbeSetFreezeId_{tid}_FullName_{fname}.txt".format( @@ -398,9 +398,12 @@ def fetch_sample_ids( "AND Species.name=%(species_name)s") with conn.cursor() as cursor: cursor.execute( - query, samples_names=tuple(sample_names), - species_name=species_name) return cursor.fetchall() + query, + { + "samples_names": tuple(sample_names), + "species_name": species_name + }) def build_query_sgo_lit_corr( db_type: str, temp_table: str, sample_id_columns: str, @@ -511,8 +514,9 @@ def fetch_all_database_data(# pylint: disable=[R0913, R0914] query, data_start_pos = __build_query__(sample_ids, temp_table) with conn.cursor() as cursor: cursor.execute( - query, db_name=db_name, - **{f"T{item}_sample_id": item for item in sample_ids}) + query, + {"db_name": db_name, + **{f"T{item}_sample_id": item for item in sample_ids}}) return (cursor.fetchall(), data_start_pos) sample_ids = tuple( diff --git a/gn3/db/species.py b/gn3/db/species.py index 20170ba..5b8e096 100644 --- a/gn3/db/species.py +++ b/gn3/db/species.py @@ -71,6 +71,6 @@ def species_name(conn: Any, group: str) -> str: ("SELECT Species.Name FROM Species, InbredSet " "WHERE InbredSet.Name = %(group_name)s " "AND InbredSet.SpeciesId = Species.Id"), - group_name=group_name) + {"group_name": group}) return cursor.fetchone()[0] return None -- cgit v1.2.3