From 02791f15d6b4940ae8be07fe9d4f8487d8291c78 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 09:42:13 +0300 Subject: Retrieve 'ProbeSet' trait name Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: new function (retrieve_probeset_trait_name) * tests/unit/db/test_traits.py: test(s) for new function Add a function to retrieve the name of a 'ProbeSet' trait in a manner similar to genenetwork1's retrieval of the same, as implemented here https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154 Unlike in genenetwork1, we do not mutate an object, instead, we return the values as retrieved from the database, and the caller will deal with the returned values as appropriate. --- gn3/db/traits.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4860a07..37b111e 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -90,3 +90,21 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], ", ".join(['%s'] * len(publication)))) with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) + +def retrieve_probeset_trait_name(threshold, name, connection): + """ + Retrieve the name for a Probeset trait + + This is extracted from the `webqtlDataset.retrieveName` function, + specifically the section dealing with 'ProbeSet' type traits + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + query = ( + 'SELECT Id, Name, FullName, ShortName, DataScale ' + 'FROM ProbeSetFreeze ' + 'WHERE ' + 'public > %(threshold)s ' + 'AND ' + '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + with connection.cursor() as cursor: + cursor.execute(query, {"threshold": threshold, "name": name}) + return cursor.fetchone() -- cgit v1.2.3 From 8d7f8eec5b5d84937e453c9b02de0bd1b1727265 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 10:20:18 +0300 Subject: Make name retrieval more general Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: make function more general * tests/unit/db/test_traits.py: parametrize the tests Make the name retrieval more general for the different types of traits by changing the column specification and table as appropriate. --- gn3/db/traits.py | 26 +++++++++++++++----------- tests/unit/db/test_traits.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 25 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 37b111e..fddb8be 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,20 +91,24 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_probeset_trait_name(threshold, name, connection): +def retrieve_type_trait_name(trait_type, threshold, name, connection): """ - Retrieve the name for a Probeset trait + Retrieve the name of a trait given the trait's name - This is extracted from the `webqtlDataset.retrieveName` function, - specifically the section dealing with 'ProbeSet' type traits - https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + This is extracted from the `webqtlDataset.retrieveName` function as is + implemented at + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169 + """ + columns = "Id, Name, FullName, ShortName{}".format( + ", DataScale" if trait_type == "ProbeSet" else "") query = ( - 'SELECT Id, Name, FullName, ShortName, DataScale ' - 'FROM ProbeSetFreeze ' - 'WHERE ' - 'public > %(threshold)s ' - 'AND ' - '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + "SELECT {columns} " + "FROM {trait_type}Freeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)").format( + columns=columns, trait_type=trait_type) with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 6d2ba4d..95c5b27 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,22 +1,34 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_probeset_trait_name +from gn3.db.traits import retrieve_type_trait_name class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" def test_retrieve_probeset_trait_name(self): """Test that the function is called correctly.""" - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - cursor.fetchone.return_value = ( - "testName", "testNameFull", "testNameShort", "dataScale") - self.assertEqual( - retrieve_probeset_trait_name(9, "testName", db_mock), - ("testName", "testNameFull", "testNameShort", "dataScale")) - cursor.execute.assert_called_once_with( - "SELECT Id, Name, FullName, ShortName, DataScale " - "FROM ProbeSetFreeze " - "WHERE public > %(threshold)s AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", - {"threshold": 9, "name": "testName"}) + for trait_type, thresh, trait_name, columns in [ + ["ProbeSet", 9, "testName", + "Id, Name, FullName, ShortName, DataScale"], + ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], + ["Publish", 6, "publishTraitName", + "Id, Name, FullName, ShortName"], + ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", + "dataScale") + self.assertEqual( + retrieve_type_trait_name( + trait_type, thresh, trait_name, db_mock), + ("testName", "testNameFull", "testNameShort", + "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT {cols} " + "FROM {ttype}Freeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( + cols=columns, ttype=trait_type), + {"threshold": thresh, "name": trait_name}) -- cgit v1.2.3 From 00579657abf5f9cadda1a9a479cae63ace28820c Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 12:32:43 +0300 Subject: Retrieve trait information Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: add functions to retrieve traits information * tests/unit/db/test_traits.py: add tests for new function Add functions to retrieve traits information as is done in genenetwork1 https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 At this point, the data retrieval functions are probably incomplete, as there is more of the `retrieveInfo` function in GN1 that has not been considered as of this commit. --- gn3/db/traits.py | 133 ++++++++++++++++++++++++++++++++++++++++++- tests/unit/db/test_traits.py | 92 ++++++++++++++++++++++++++++-- 2 files changed, 218 insertions(+), 7 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index fddb8be..3c62df8 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,7 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_type_trait_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name(trait_type, threshold, name, connection): """ Retrieve the name of a trait given the trait's name @@ -112,3 +112,134 @@ def retrieve_type_trait_name(trait_type, threshold, name, connection): with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() + +PUBLISH_TRAIT_INFO_QUERY = ( + "SELECT " + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s") + +def retrieve_publish_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Publish` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + with conn.cursor() as cursor: + cursor.execute( + PUBLISH_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_id"] + }) + return cursor.fetchone() + +PROBESET_TRAIT_INFO_QUERY = ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " + "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " + "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s") + +def retrieve_probeset_trait_info(trait_data_source, conn): + """Retrieve trait information for type `ProbeSet` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + with conn.cursor() as cursor: + cursor.execute( + PROBESET_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +GENO_TRAIT_INFO_QUERY = ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + +def retrieve_geno_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Geno` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + with conn.cursor() as cursor: + cursor.execute( + GENO_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +TEMP_TRAIT_INFO_QUERY = ( + "SELECT name, description FROM Temp " + "WHERE Name = %(trait_name)s") + +def retrieve_temp_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Temp` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + with conn.cursor() as cursor: + cursor.execute( + TEMP_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name"] + }) + return cursor.fetchone() + +def retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + """Retrieves the trait information. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 + + This function, or the dependent functions, might be incomplete as they are + currently.""" + trait_info_function_table = { + "Publish": retrieve_publish_trait_info, + "ProbeSet": retrieve_probeset_trait_info, + "Geno": retrieve_geno_trait_info, + "Temp": retrieve_temp_trait_info + } + return trait_info_function_table[trait_type]( + { + "trait_name": trait_name, + "trait_dataset_id": trait_dataset_id, + "trait_dataset_name":trait_dataset_name + }, + conn) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 95c5b27..e3c5c28 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,13 +1,24 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_type_trait_name +from gn3.db.traits import ( + GENO_TRAIT_INFO_QUERY, + TEMP_TRAIT_INFO_QUERY, + PUBLISH_TRAIT_INFO_QUERY, + PROBESET_TRAIT_INFO_QUERY) +from gn3.db.traits import ( + retrieve_trait_info, + retrieve_geno_trait_info, + retrieve_temp_trait_info, + retrieve_trait_dataset_name, + retrieve_publish_trait_info, + retrieve_probeset_trait_info) class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" - def test_retrieve_probeset_trait_name(self): + def test_retrieve_trait_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_name, columns in [ + for trait_type, thresh, trait_dataset_name, columns in [ ["ProbeSet", 9, "testName", "Id, Name, FullName, ShortName, DataScale"], ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], @@ -21,8 +32,8 @@ class TestTraitsDBFunctions(TestCase): "testName", "testNameFull", "testNameShort", "dataScale") self.assertEqual( - retrieve_type_trait_name( - trait_type, thresh, trait_name, db_mock), + retrieve_trait_dataset_name( + trait_type, thresh, trait_dataset_name, db_mock), ("testName", "testNameFull", "testNameShort", "dataScale")) cursor.execute.assert_called_once_with( @@ -31,4 +42,73 @@ class TestTraitsDBFunctions(TestCase): "WHERE public > %(threshold)s AND " "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_name}) + {"threshold": thresh, "name": trait_dataset_name}) + + def test_retrieve_publish_trait_info(self): + """Test retrieval of type `Publish` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "PublishTraitName", "trait_dataset_id": 1} + self.assertEqual( + retrieve_publish_trait_info( + trait_source, + db_mock), + tuple()) + cursor.execute.assert_called_once_with( + PUBLISH_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_probeset_trait_info(self): + """Test retrieval of type `Probeset` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "ProbeSetTraitName", + "trait_dataset_name": "ProbeSetDatasetTraitName"} + self.assertEqual( + retrieve_probeset_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + PROBESET_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_geno_trait_info(self): + """Test retrieval of type `Geno` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "GenoTraitName", + "trait_dataset_name": "GenoDatasetTraitName"} + self.assertEqual( + retrieve_geno_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + GENO_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_temp_trait_info(self): + """Test retrieval of type `Temp` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = {"trait_name": "TempTraitName"} + self.assertEqual( + retrieve_temp_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + TEMP_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_trait_info(self): + """Test that information on traits is retrieved as appropriate.""" + for trait_type, trait_name, trait_dataset_id, trait_dataset_name, in [ + ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName"], + ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName"], + ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName"], + ["Temp", "TempTraitName", 4, "TempDatasetTraitName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + self.assertEqual( + retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, + trait_dataset_name, db_mock), + tuple()) -- cgit v1.2.3 From 13680aa9206e2302760180bab3254182f11dde68 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 29 Jul 2021 12:28:21 +0300 Subject: Add type annotations to the function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add some type annotations to the functions to reduce the chances of bugs creeping into the code. --- gn3/db/traits.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 3c62df8..f18e16a 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,8 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_trait_dataset_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name( + trait_type: str, threshold: int, name: str, connection: Any): """ Retrieve the name of a trait given the trait's name @@ -136,7 +137,7 @@ PUBLISH_TRAIT_INFO_QUERY = ( "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " "PublishFreeze.Id =%(trait_dataset_id)s") -def retrieve_publish_trait_info(trait_data_source, conn): +def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" @@ -170,7 +171,7 @@ PROBESET_TRAIT_INFO_QUERY = ( "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " "ProbeSet.Name = %(trait_name)s") -def retrieve_probeset_trait_info(trait_data_source, conn): +def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" @@ -192,7 +193,7 @@ GENO_TRAIT_INFO_QUERY = ( "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") -def retrieve_geno_trait_info(trait_data_source, conn): +def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Geno` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" @@ -209,7 +210,7 @@ TEMP_TRAIT_INFO_QUERY = ( "SELECT name, description FROM Temp " "WHERE Name = %(trait_name)s") -def retrieve_temp_trait_info(trait_data_source, conn): +def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Temp` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" @@ -223,7 +224,8 @@ def retrieve_temp_trait_info(trait_data_source, conn): return cursor.fetchone() def retrieve_trait_info( - trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + trait_type: str, trait_name: str, trait_dataset_id: int, + trait_dataset_name: str, conn: Any): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 -- cgit v1.2.3