From 1812e3eb2d230bf8d6ac043d5ed85ad1d8027f5f Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 09:42:13 +0300 Subject: Retrieve 'ProbeSet' trait name Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: new function (retrieve_probeset_trait_name) * tests/unit/db/test_traits.py: test(s) for new function Add a function to retrieve the name of a 'ProbeSet' trait in a manner similar to genenetwork1's retrieval of the same, as implemented here https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154 Unlike in genenetwork1, we do not mutate an object, instead, we return the values as retrieved from the database, and the caller will deal with the returned values as appropriate. --- tests/unit/db/test_traits.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/unit/db/test_traits.py (limited to 'tests/unit') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py new file mode 100644 index 0000000..6d2ba4d --- /dev/null +++ b/tests/unit/db/test_traits.py @@ -0,0 +1,22 @@ +"""Tests for gn3/db/traits.py""" +from unittest import mock, TestCase +from gn3.db.traits import retrieve_probeset_trait_name + +class TestTraitsDBFunctions(TestCase): + "Test cases for traits functions" + + def test_retrieve_probeset_trait_name(self): + """Test that the function is called correctly.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", "dataScale") + self.assertEqual( + retrieve_probeset_trait_name(9, "testName", db_mock), + ("testName", "testNameFull", "testNameShort", "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT Id, Name, FullName, ShortName, DataScale " + "FROM ProbeSetFreeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", + {"threshold": 9, "name": "testName"}) -- cgit v1.2.3 From 9b66f428f341bc047030126ba1e4cc405a34570c Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 10:20:18 +0300 Subject: Make name retrieval more general Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: make function more general * tests/unit/db/test_traits.py: parametrize the tests Make the name retrieval more general for the different types of traits by changing the column specification and table as appropriate. --- gn3/db/traits.py | 26 +++++++++++++++----------- tests/unit/db/test_traits.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 25 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 37b111e..fddb8be 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,20 +91,24 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_probeset_trait_name(threshold, name, connection): +def retrieve_type_trait_name(trait_type, threshold, name, connection): """ - Retrieve the name for a Probeset trait + Retrieve the name of a trait given the trait's name - This is extracted from the `webqtlDataset.retrieveName` function, - specifically the section dealing with 'ProbeSet' type traits - https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + This is extracted from the `webqtlDataset.retrieveName` function as is + implemented at + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169 + """ + columns = "Id, Name, FullName, ShortName{}".format( + ", DataScale" if trait_type == "ProbeSet" else "") query = ( - 'SELECT Id, Name, FullName, ShortName, DataScale ' - 'FROM ProbeSetFreeze ' - 'WHERE ' - 'public > %(threshold)s ' - 'AND ' - '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + "SELECT {columns} " + "FROM {trait_type}Freeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)").format( + columns=columns, trait_type=trait_type) with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 6d2ba4d..95c5b27 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,22 +1,34 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_probeset_trait_name +from gn3.db.traits import retrieve_type_trait_name class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" def test_retrieve_probeset_trait_name(self): """Test that the function is called correctly.""" - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - cursor.fetchone.return_value = ( - "testName", "testNameFull", "testNameShort", "dataScale") - self.assertEqual( - retrieve_probeset_trait_name(9, "testName", db_mock), - ("testName", "testNameFull", "testNameShort", "dataScale")) - cursor.execute.assert_called_once_with( - "SELECT Id, Name, FullName, ShortName, DataScale " - "FROM ProbeSetFreeze " - "WHERE public > %(threshold)s AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", - {"threshold": 9, "name": "testName"}) + for trait_type, thresh, trait_name, columns in [ + ["ProbeSet", 9, "testName", + "Id, Name, FullName, ShortName, DataScale"], + ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], + ["Publish", 6, "publishTraitName", + "Id, Name, FullName, ShortName"], + ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", + "dataScale") + self.assertEqual( + retrieve_type_trait_name( + trait_type, thresh, trait_name, db_mock), + ("testName", "testNameFull", "testNameShort", + "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT {cols} " + "FROM {ttype}Freeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( + cols=columns, ttype=trait_type), + {"threshold": thresh, "name": trait_name}) -- cgit v1.2.3 From cdf5887506a0b035f5a51f9538815ff77cb71cdc Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 12:32:43 +0300 Subject: Retrieve trait information Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: add functions to retrieve traits information * tests/unit/db/test_traits.py: add tests for new function Add functions to retrieve traits information as is done in genenetwork1 https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 At this point, the data retrieval functions are probably incomplete, as there is more of the `retrieveInfo` function in GN1 that has not been considered as of this commit. --- gn3/db/traits.py | 133 ++++++++++++++++++++++++++++++++++++++++++- tests/unit/db/test_traits.py | 92 ++++++++++++++++++++++++++++-- 2 files changed, 218 insertions(+), 7 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index fddb8be..3c62df8 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,7 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_type_trait_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name(trait_type, threshold, name, connection): """ Retrieve the name of a trait given the trait's name @@ -112,3 +112,134 @@ def retrieve_type_trait_name(trait_type, threshold, name, connection): with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() + +PUBLISH_TRAIT_INFO_QUERY = ( + "SELECT " + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s") + +def retrieve_publish_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Publish` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + with conn.cursor() as cursor: + cursor.execute( + PUBLISH_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_id"] + }) + return cursor.fetchone() + +PROBESET_TRAIT_INFO_QUERY = ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " + "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " + "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s") + +def retrieve_probeset_trait_info(trait_data_source, conn): + """Retrieve trait information for type `ProbeSet` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + with conn.cursor() as cursor: + cursor.execute( + PROBESET_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +GENO_TRAIT_INFO_QUERY = ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + +def retrieve_geno_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Geno` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + with conn.cursor() as cursor: + cursor.execute( + GENO_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +TEMP_TRAIT_INFO_QUERY = ( + "SELECT name, description FROM Temp " + "WHERE Name = %(trait_name)s") + +def retrieve_temp_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Temp` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + with conn.cursor() as cursor: + cursor.execute( + TEMP_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name"] + }) + return cursor.fetchone() + +def retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + """Retrieves the trait information. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 + + This function, or the dependent functions, might be incomplete as they are + currently.""" + trait_info_function_table = { + "Publish": retrieve_publish_trait_info, + "ProbeSet": retrieve_probeset_trait_info, + "Geno": retrieve_geno_trait_info, + "Temp": retrieve_temp_trait_info + } + return trait_info_function_table[trait_type]( + { + "trait_name": trait_name, + "trait_dataset_id": trait_dataset_id, + "trait_dataset_name":trait_dataset_name + }, + conn) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 95c5b27..e3c5c28 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,13 +1,24 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_type_trait_name +from gn3.db.traits import ( + GENO_TRAIT_INFO_QUERY, + TEMP_TRAIT_INFO_QUERY, + PUBLISH_TRAIT_INFO_QUERY, + PROBESET_TRAIT_INFO_QUERY) +from gn3.db.traits import ( + retrieve_trait_info, + retrieve_geno_trait_info, + retrieve_temp_trait_info, + retrieve_trait_dataset_name, + retrieve_publish_trait_info, + retrieve_probeset_trait_info) class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" - def test_retrieve_probeset_trait_name(self): + def test_retrieve_trait_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_name, columns in [ + for trait_type, thresh, trait_dataset_name, columns in [ ["ProbeSet", 9, "testName", "Id, Name, FullName, ShortName, DataScale"], ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], @@ -21,8 +32,8 @@ class TestTraitsDBFunctions(TestCase): "testName", "testNameFull", "testNameShort", "dataScale") self.assertEqual( - retrieve_type_trait_name( - trait_type, thresh, trait_name, db_mock), + retrieve_trait_dataset_name( + trait_type, thresh, trait_dataset_name, db_mock), ("testName", "testNameFull", "testNameShort", "dataScale")) cursor.execute.assert_called_once_with( @@ -31,4 +42,73 @@ class TestTraitsDBFunctions(TestCase): "WHERE public > %(threshold)s AND " "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_name}) + {"threshold": thresh, "name": trait_dataset_name}) + + def test_retrieve_publish_trait_info(self): + """Test retrieval of type `Publish` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "PublishTraitName", "trait_dataset_id": 1} + self.assertEqual( + retrieve_publish_trait_info( + trait_source, + db_mock), + tuple()) + cursor.execute.assert_called_once_with( + PUBLISH_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_probeset_trait_info(self): + """Test retrieval of type `Probeset` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "ProbeSetTraitName", + "trait_dataset_name": "ProbeSetDatasetTraitName"} + self.assertEqual( + retrieve_probeset_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + PROBESET_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_geno_trait_info(self): + """Test retrieval of type `Geno` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "GenoTraitName", + "trait_dataset_name": "GenoDatasetTraitName"} + self.assertEqual( + retrieve_geno_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + GENO_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_temp_trait_info(self): + """Test retrieval of type `Temp` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = {"trait_name": "TempTraitName"} + self.assertEqual( + retrieve_temp_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + TEMP_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_trait_info(self): + """Test that information on traits is retrieved as appropriate.""" + for trait_type, trait_name, trait_dataset_id, trait_dataset_name, in [ + ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName"], + ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName"], + ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName"], + ["Temp", "TempTraitName", 4, "TempDatasetTraitName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + self.assertEqual( + retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, + trait_dataset_name, db_mock), + tuple()) -- cgit v1.2.3 From c4f362d9a9b83f4fc6fadde0989663dd34fb0b07 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Fri, 30 Jul 2021 08:29:38 +0300 Subject: Return dict from query functions Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: return dicts rather than tuples/list * tests/unit/db/test_traits.py: Update tests Return dicts with the key-value pairs set up so as to ease with the data manipulation down the pipeline. This is also useful to help with the retrieval of all other extra information that was left out in the first iteration. This commit also updates the tests by ensuring they expect dicts rather than tuples. --- gn3/db/traits.py | 141 +++++++++++++++++++++++-------------------- tests/unit/db/test_traits.py | 83 +++++++++++++++++++------ 2 files changed, 140 insertions(+), 84 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ae1939a..9742fa2 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -100,119 +100,128 @@ def retrieve_trait_dataset_name( cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() -PUBLISH_TRAIT_INFO_QUERY = ( - "SELECT " - "PublishXRef.Id, Publication.PubMed_ID, " - "Phenotype.Pre_publication_description, " - "Phenotype.Post_publication_description, " - "Phenotype.Original_description, " - "Phenotype.Pre_publication_abbreviation, " - "Phenotype.Post_publication_abbreviation, " - "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " - "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " - "Publication.Title, Publication.Abstract, Publication.Journal, " - "Publication.Volume, Publication.Pages, Publication.Month, " - "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " - "PublishXRef.comments " - "FROM " - "PublishXRef, Publication, Phenotype, PublishFreeze " - "WHERE " - "PublishXRef.Id = %(trait_name)s AND " - "Phenotype.Id = PublishXRef.PhenotypeId AND " - "Publication.Id = PublishXRef.PublicationId AND " - "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " - "PublishFreeze.Id =%(trait_dataset_id)s") - def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + keys = ( + "Id", "PubMed_ID", "Pre_publication_description", + "Post_publication_description", "Original_description", + "Pre_publication_abbreviation", "Post_publication_abbreviation", + "Lab_code", "Submitter", "Owner", "Authorized_Users", "Authors", + "Title", "Abstract", "Journal", "Volume", "Pages", "Month", "Year", + "Sequence", "Units", "comments") + columns = ( + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments") + query = ( + "SELECT " + "{columns} " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s").format( + columns = columns) with conn.cursor() as cursor: cursor.execute( - PUBLISH_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_id"] }) - return cursor.fetchone() - -PROBESET_TRAIT_INFO_QUERY = ( - "SELECT " - "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " - "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " - "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " - "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " - "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " - "ProbeSet.strand_probe, ProbeSet.strand_gene, " - "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " - "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " - "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " - "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " - "ProbeSet.flag " - "FROM " - "ProbeSet, ProbeSetFreeze, ProbeSetXRef " - "WHERE " - "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " - "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " - "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " - "ProbeSet.Name = %(trait_name)s") + return dict(zip((k.lower() for k in keys), cursor.fetchone())) def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + keys = ( + "name", "symbol", "description", "probe_target_description", "chr", + "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", + "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", + "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", + "probe_set_specificity", "probe_set_blat_score", + "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand", + "probe_set_note_by_rw", "flag") + query = ( + "SELECT " + "{columns} " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s").format( + columns = ", ".join(["ProbeSet.{}".format(x) for x in keys])) with conn.cursor() as cursor: cursor.execute( - PROBESET_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_name"] }) - return cursor.fetchone() - -GENO_TRAIT_INFO_QUERY = ( - "SELECT " - "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " - "FROM " - "Geno, GenoFreeze, GenoXRef " - "WHERE " - "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " - "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + return dict(zip(keys, cursor.fetchone())) def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Geno` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + keys = ("name", "chr", "mb", "source2", "sequence") + query = ( + "SELECT " + "{columns} " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND " + "Geno.Name = %(trait_name)s").format( + columns = ", ".join(["Geno.{}".format(x) for x in keys])) with conn.cursor() as cursor: cursor.execute( - GENO_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_name"] }) - return cursor.fetchone() - -TEMP_TRAIT_INFO_QUERY = ( - "SELECT name, description FROM Temp " - "WHERE Name = %(trait_name)s") + return dict(zip(keys, cursor.fetchone())) def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Temp` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + keys = ("name", "description") + query = ( + "SELECT {columns} FROM Temp " + "WHERE Name = %(trait_name)s").format(columns = ", ".join(keys)) with conn.cursor() as cursor: cursor.execute( - TEMP_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name"] }) - return cursor.fetchone() + return dict(zip(keys, cursor.fetchone())) def retrieve_trait_info( trait_type: str, trait_name: str, trait_dataset_id: int, - trait_dataset_name: str, conn: Any): + trait_dataset_name: str, conn: Any, QTL = None): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index c8f28b5..393983d 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,10 +1,5 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import ( - GENO_TRAIT_INFO_QUERY, - TEMP_TRAIT_INFO_QUERY, - PUBLISH_TRAIT_INFO_QUERY, - PROBESET_TRAIT_INFO_QUERY) from gn3.db.traits import ( retrieve_trait_info, retrieve_geno_trait_info, @@ -14,7 +9,6 @@ from gn3.db.traits import ( retrieve_probeset_trait_info, update_sample_data) - class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" @@ -54,12 +48,32 @@ class TestTraitsDBFunctions(TestCase): trait_source = { "trait_name": "PublishTraitName", "trait_dataset_id": 1} self.assertEqual( - retrieve_publish_trait_info( - trait_source, - db_mock), - tuple()) + retrieve_publish_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - PUBLISH_TRAIT_INFO_QUERY, trait_source) + ("SELECT " + "PublishXRef.Id, Publication.PubMed_ID," + " Phenotype.Pre_publication_description," + " Phenotype.Post_publication_description," + " Phenotype.Original_description," + " Phenotype.Pre_publication_abbreviation," + " Phenotype.Post_publication_abbreviation," + " Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner," + " Phenotype.Authorized_Users," + " CAST(Publication.Authors AS BINARY)," + " Publication.Title, Publication.Abstract," + " Publication.Journal," + " Publication.Volume, Publication.Pages, Publication.Month," + " Publication.Year, PublishXRef.Sequence, Phenotype.Units," + " PublishXRef.comments" + " FROM" + " PublishXRef, Publication, Phenotype, PublishFreeze" + " WHERE" + " PublishXRef.Id = %(trait_name)s " + " AND Phenotype.Id = PublishXRef.PhenotypeId" + " AND Publication.Id = PublishXRef.PublicationId" + " AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId" + " AND PublishFreeze.Id =%(trait_dataset_id)s"), + trait_source) def test_retrieve_probeset_trait_info(self): """Test retrieval of type `Probeset` traits.""" @@ -70,9 +84,31 @@ class TestTraitsDBFunctions(TestCase): "trait_name": "ProbeSetTraitName", "trait_dataset_name": "ProbeSetDatasetTraitName"} self.assertEqual( - retrieve_probeset_trait_info(trait_source, db_mock), tuple()) + retrieve_probeset_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - PROBESET_TRAIT_INFO_QUERY, trait_source) + ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, " + "ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid, " + "ProbeSet.genbankid, ProbeSet.unigeneid, ProbeSet.omim, " + "ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, " + "ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, " + "ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSetFreeze.Name = %(trait_dataset_name)s " + "AND ProbeSet.Name = %(trait_name)s"), trait_source) def test_retrieve_geno_trait_info(self): """Test retrieval of type `Geno` traits.""" @@ -83,9 +119,19 @@ class TestTraitsDBFunctions(TestCase): "trait_name": "GenoTraitName", "trait_dataset_name": "GenoDatasetTraitName"} self.assertEqual( - retrieve_geno_trait_info(trait_source, db_mock), tuple()) + retrieve_geno_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - GENO_TRAIT_INFO_QUERY, trait_source) + ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id " + "AND GenoXRef.GenoId = Geno.Id " + "AND GenoFreeze.Name = %(trait_dataset_name)s " + "AND Geno.Name = %(trait_name)s"), + trait_source) def test_retrieve_temp_trait_info(self): """Test retrieval of type `Temp` traits.""" @@ -94,9 +140,10 @@ class TestTraitsDBFunctions(TestCase): cursor.fetchone.return_value = tuple() trait_source = {"trait_name": "TempTraitName"} self.assertEqual( - retrieve_temp_trait_info(trait_source, db_mock), tuple()) + retrieve_temp_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - TEMP_TRAIT_INFO_QUERY, trait_source) + "SELECT name, description FROM Temp WHERE Name = %(trait_name)s", + trait_source) def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" @@ -113,7 +160,7 @@ class TestTraitsDBFunctions(TestCase): retrieve_trait_info( trait_type, trait_name, trait_dataset_id, trait_dataset_name, db_mock), - tuple()) + {}) def test_update_sample_data(self): """Test that the SQL queries when calling update_sample_data are called with -- cgit v1.2.3 From beec957107298eef2b2a825ba0a744e4e95b0dcd Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Fri, 30 Jul 2021 10:30:55 +0300 Subject: Remove extra space Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Remove extra space that was causing test to fail. --- tests/unit/db/test_traits.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests/unit') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 393983d..2a52ea6 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -68,7 +68,7 @@ class TestTraitsDBFunctions(TestCase): " FROM" " PublishXRef, Publication, Phenotype, PublishFreeze" " WHERE" - " PublishXRef.Id = %(trait_name)s " + " PublishXRef.Id = %(trait_name)s" " AND Phenotype.Id = PublishXRef.PhenotypeId" " AND Publication.Id = PublishXRef.PublicationId" " AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId" -- cgit v1.2.3 From 7ab35adb5afb242092e25763474c747e267ded81 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 4 Aug 2021 09:43:20 +0300 Subject: Update tests Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Update the tests to deal with changes in the code. --- tests/unit/db/test_traits.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'tests/unit') diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 2a52ea6..3840dd1 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -14,13 +14,15 @@ class TestTraitsDBFunctions(TestCase): def test_retrieve_trait_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_dataset_name, columns in [ + for trait_type, thresh, trait_dataset_name, columns, table in [ ["ProbeSet", 9, "testName", - "Id, Name, FullName, ShortName, DataScale"], - ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], + "Id, Name, FullName, ShortName, DataScale", "ProbeSetFreeze"], + ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName", + "GenoFreeze"], ["Publish", 6, "publishTraitName", - "Id, Name, FullName, ShortName"], - ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName"]]: + "Id, Name, FullName, ShortName", "PublishFreeze"], + ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName", + "TempFreeze"]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: @@ -33,12 +35,13 @@ class TestTraitsDBFunctions(TestCase): ("testName", "testNameFull", "testNameShort", "dataScale")) cursor.execute.assert_called_once_with( - "SELECT {cols} " - "FROM {ttype}Freeze " + "SELECT %(columns)s " + "FROM %(table)s " "WHERE public > %(threshold)s AND " "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_dataset_name}) + {"threshold": thresh, "name": trait_dataset_name, + "table": table, "columns": columns}) def test_retrieve_publish_trait_info(self): """Test retrieval of type `Publish` traits.""" @@ -147,11 +150,16 @@ class TestTraitsDBFunctions(TestCase): def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" - for trait_type, trait_name, trait_dataset_id, trait_dataset_name, in [ - ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName"], - ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName"], - ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName"], - ["Temp", "TempTraitName", 4, "TempDatasetTraitName"]]: + for trait_type, trait_name, trait_dataset_id, trait_dataset_name, expected in [ + ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Publish", + "confidential": 0}], + ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet"}], + ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Geno"}], + ["Temp", "TempTraitName", 4, "TempDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Temp"}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: @@ -160,7 +168,7 @@ class TestTraitsDBFunctions(TestCase): retrieve_trait_info( trait_type, trait_name, trait_dataset_id, trait_dataset_name, db_mock), - {}) + expected) def test_update_sample_data(self): """Test that the SQL queries when calling update_sample_data are called with -- cgit v1.2.3 From 53a8b6aa977bc6c051625a812009184f78da597d Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 4 Aug 2021 10:05:33 +0300 Subject: Add tests for post-processing functions Issues: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add missing tests for some post-processing functions --- gn3/db/traits.py | 13 ++++++++----- tests/unit/db/test_traits.py | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 7 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 902eb8b..ce6298f 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -160,11 +160,14 @@ def set_confidential_field(trait_info): """Post processing function for 'Publish' trait types. It sets the value for the 'confidential' key.""" - return { - **trait_info, - "confidential": 1 if ( - trait_info.get("pre_publication_description", None) - and not trait_info.get("pubmed_id", None)) else 0} + if trait_info["type"] == "Publish": + return { + **trait_info, + "confidential": 1 if ( + trait_info.get("pre_publication_description", None) + and not trait_info.get("pubmed_id", None)) else 0} + else: + return trait_info def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 3840dd1..7e8b29c 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,13 +1,16 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase from gn3.db.traits import ( + set_haveinfo_field, + update_sample_data, retrieve_trait_info, + set_confidential_field, + set_homologene_id_field, retrieve_geno_trait_info, retrieve_temp_trait_info, retrieve_trait_dataset_name, retrieve_publish_trait_info, - retrieve_probeset_trait_info, - update_sample_data) + retrieve_probeset_trait_info) class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" @@ -198,3 +201,35 @@ class TestTraitsDBFunctions(TestCase): mock.call(PUBLISH_SE_SQL, (2.3, 10, 8967049)), mock.call(N_STRAIN_SQL, (2, 10, 8967049))] ) + + def test_set_haveinfo_field(self): + for trait_info, expected in [ + [{}, {"haveinfo": 0}], + [{"k1": "v1"}, {"k1": "v1", "haveinfo": 1}]]: + with self.subTest(trait_info=trait_info, expected=expected): + self.assertEqual(set_haveinfo_field(trait_info), expected) + + def test_set_homologene_id_field(self): + for trait_info, expected in [ + [{"type": "Publish"}, + {"type": "Publish", "homologeneid": None}], + [{"type": "ProbeSet"}, + {"type": "ProbeSet", "homologeneid": None}], + [{"type": "Geno"}, {"type": "Geno", "homologeneid": None}], + [{"type": "Temp"}, {"type": "Temp", "homologeneid": None}]]: + db_mock = mock.MagicMock() + with self.subTest(trait_info=trait_info, expected=expected): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = () + self.assertEqual( + set_homologene_id_field(trait_info, db_mock), expected) + + def test_set_confidential_field(self): + for trait_info, expected in [ + [{"type": "Publish"}, {"type": "Publish", "confidential": 0}], + [{"type": "ProbeSet"}, {"type": "ProbeSet"}], + [{"type": "Geno"}, {"type": "Geno"}], + [{"type": "Temp"}, {"type": "Temp"}]]: + with self.subTest(trait_info=trait_info, expected=expected): + self.assertEqual( + set_confidential_field(trait_info), expected) -- cgit v1.2.3 From 0d7ebb87e2995207f23bc8b8e05e64aaab50b48d Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 4 Aug 2021 11:27:24 +0300 Subject: Retrieve the RISet and RISet ID values Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Retrieve the RISet and RISet ID values from the database. --- gn3/db/traits.py | 57 +++++++++++++++++++++++ tests/unit/db/test_traits.py | 105 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ce6298f..ea35d7e 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -286,6 +286,62 @@ def set_homologene_id_field(trait_info, conn): } return functions_table[trait_info["type"]](trait_info) +def set_geno_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for various Geno trait types. + """ + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, GenoFreeze " + "WHERE GenoFreeze.InbredSetId = InbredSet.Id " + "AND GenoFreeze.Name = %(name)s") + with conn.cursor() as cursor: + return cursor.execute(query, {"name": name}) + +def set_publish_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for various Publish trait types. + """ + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, PublishFreeze " + "WHERE PublishFreeze.InbredSetId = InbredSet.Id " + "AND PublishFreeze.Name = %(name)s") + with conn.cursor() as cursor: + return cursor.execute(query, {"name": name}) + +def set_probeset_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for various ProbeSet trait types. + """ + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, ProbeSetFreeze, ProbeFreeze " + "WHERE ProbeFreeze.InbredSetId = InbredSet.Id " + "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " + "AND ProbeSetFreeze.Name = %(name)s") + with conn.cursor() as cursor: + return cursor.execute(query, {"name": name}) + +def set_riset_fields(trait_info, conn): + """ + Retrieve the RISet, and RISetID values for various trait types. + """ + riset_functions_map = { + "Temp": lambda ti, con: (None, None), + "Geno": set_geno_riset_fields, + "Publish": set_publish_riset_fields, + "ProbeSet": set_probeset_riset_fields + } + if not trait_info.get("haveinfo", None): + return trait_info + + riset, riid = riset_functions_map[trait_info["type"]]( + trait_info["name"], conn) + return { + **trait_info, "risetid": riid, + "riset": "BXD" if riset == "BXD300" else riset} + def retrieve_trait_info( trait_type: str, trait_name: str, trait_dataset_id: int, trait_dataset_name: str, conn: Any, QTL=None): @@ -303,6 +359,7 @@ def retrieve_trait_info( } common_post_processing_fn = compose( + lambda ti: set_riset_fields(ti, conn), lambda ti: set_homologene_id_field(ti, conn), lambda ti: {"type": trait_type, **ti}, set_haveinfo_field) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 7e8b29c..2445d26 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,13 +1,17 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase from gn3.db.traits import ( + set_riset_fields, set_haveinfo_field, update_sample_data, retrieve_trait_info, + set_geno_riset_fields, set_confidential_field, set_homologene_id_field, retrieve_geno_trait_info, retrieve_temp_trait_info, + set_publish_riset_fields, + set_probeset_riset_fields, retrieve_trait_dataset_name, retrieve_publish_trait_info, retrieve_probeset_trait_info) @@ -233,3 +237,104 @@ class TestTraitsDBFunctions(TestCase): with self.subTest(trait_info=trait_info, expected=expected): self.assertEqual( set_confidential_field(trait_info), expected) + + def test_set_geno_riset_fields(self): + """ + Test that the `riset` and `riset_id` fields are retrieved appropriately + for the 'Geno' trait type. + """ + for trait_name, expected in [ + ["testGenoName", ()]]: + db_mock = mock.MagicMock() + with self.subTest(trait_name=trait_name, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = () + self.assertEqual( + set_geno_riset_fields(trait_name, db_mock), expected) + cursor.execute.assert_called_once_with( + ( + "SELECT InbredSet.Name, InbredSet.Id" + " FROM InbredSet, GenoFreeze" + " WHERE GenoFreeze.InbredSetId = InbredSet.Id" + " AND GenoFreeze.Name = %(name)s"), + {"name": trait_name}) + + + def test_set_publish_riset_fields(self): + """ + Test that the `riset` and `riset_id` fields are retrieved appropriately + for the 'Publish' trait type. + """ + for trait_name, expected in [ + ["testPublishName", ()]]: + db_mock = mock.MagicMock() + with self.subTest(trait_name=trait_name, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = () + self.assertEqual( + set_publish_riset_fields(trait_name, db_mock), expected) + cursor.execute.assert_called_once_with( + ( + "SELECT InbredSet.Name, InbredSet.Id" + " FROM InbredSet, PublishFreeze" + " WHERE PublishFreeze.InbredSetId = InbredSet.Id" + " AND PublishFreeze.Name = %(name)s"), + {"name": trait_name}) + + + def test_set_probeset_riset_fields(self): + """ + Test that the `riset` and `riset_id` fields are retrieved appropriately + for the 'ProbeSet' trait type. + """ + for trait_name, expected in [ + ["testProbeSetName", ()]]: + db_mock = mock.MagicMock() + with self.subTest(trait_name=trait_name, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = () + self.assertEqual( + set_probeset_riset_fields(trait_name, db_mock), expected) + cursor.execute.assert_called_once_with( + ( + "SELECT InbredSet.Name, InbredSet.Id" + " FROM InbredSet, ProbeSetFreeze, ProbeFreeze" + " WHERE ProbeFreeze.InbredSetId = InbredSet.Id" + " AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId" + " AND ProbeSetFreeze.Name = %(name)s"), + {"name": trait_name}) + + def test_set_riset_fields(self): + """ + Test that the riset fields are set up correctly for the different trait + types. + """ + for trait_info, expected in [ + [{}, {}], + [{"haveinfo": 0, "type": "Publish"}, + {"haveinfo": 0, "type": "Publish"}], + [{"haveinfo": 0, "type": "ProbeSet"}, + {"haveinfo": 0, "type": "ProbeSet"}], + [{"haveinfo": 0, "type": "Geno"}, + {"haveinfo": 0, "type": "Geno"}], + [{"haveinfo": 0, "type": "Temp"}, + {"haveinfo": 0, "type": "Temp"}], + [{"haveinfo": 1, "type": "Publish", "name": "test"}, + {"haveinfo": 1, "type": "Publish", "name": "test", + "riset": "riset_name", "risetid": 0}], + [{"haveinfo": 1, "type": "ProbeSet", "name": "test"}, + {"haveinfo": 1, "type": "ProbeSet", "name": "test", + "riset": "riset_name", "risetid": 0}], + [{"haveinfo": 1, "type": "Geno", "name": "test"}, + {"haveinfo": 1, "type": "Geno", "name": "test", + "riset": "riset_name", "risetid": 0}], + [{"haveinfo": 1, "type": "Temp", "name": "test"}, + {"haveinfo": 1, "type": "Temp", "name": "test", "riset": None, + "risetid": None}] + ]: + db_mock = mock.MagicMock() + with self.subTest(trait_info=trait_info, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = ("riset_name", 0) + self.assertEqual( + set_riset_fields(trait_info, db_mock), expected) -- cgit v1.2.3 From f712da630c1a3642cb44b62c4b2b857373cd78d7 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 4 Aug 2021 11:30:44 +0300 Subject: Fix issues caught by pylint * gn3/computations/slink.py: remove unused imports * gn3/db/traits.py: remove unnecessary `else` clauses * tests/unit/db/test_traits.py: add docstrings for functions --- gn3/computations/slink.py | 2 +- gn3/db/traits.py | 6 +++--- tests/unit/db/test_traits.py | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'tests/unit') diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py index 23d3d88..5953e6b 100644 --- a/gn3/computations/slink.py +++ b/gn3/computations/slink.py @@ -7,7 +7,7 @@ slink: TODO: Describe what the function does... """ import logging -from typing import List, Tuple, Union, Sequence +from typing import Union, Sequence NumType = Union[int, float] SeqOfNums = Sequence[NumType] diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ea35d7e..29c91a6 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -166,8 +166,7 @@ def set_confidential_field(trait_info): "confidential": 1 if ( trait_info.get("pre_publication_description", None) and not trait_info.get("pubmed_id", None)) else 0} - else: - return trait_info + return trait_info def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. @@ -344,13 +343,14 @@ def set_riset_fields(trait_info, conn): def retrieve_trait_info( trait_type: str, trait_name: str, trait_dataset_id: int, - trait_dataset_name: str, conn: Any, QTL=None): + trait_dataset_name: str, conn: Any, qtl=None): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 This function, or the dependent functions, might be incomplete as they are currently.""" + # pylint: disable=[R0913] trait_info_function_table = { "Publish": retrieve_publish_trait_info, "ProbeSet": retrieve_probeset_trait_info, diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 2445d26..1c481a2 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -207,6 +207,7 @@ class TestTraitsDBFunctions(TestCase): ) def test_set_haveinfo_field(self): + """Test that the `haveinfo` field is set up correctly""" for trait_info, expected in [ [{}, {"haveinfo": 0}], [{"k1": "v1"}, {"k1": "v1", "haveinfo": 1}]]: @@ -214,6 +215,7 @@ class TestTraitsDBFunctions(TestCase): self.assertEqual(set_haveinfo_field(trait_info), expected) def test_set_homologene_id_field(self): + """Test that the `homologene_id` field is set up correctly""" for trait_info, expected in [ [{"type": "Publish"}, {"type": "Publish", "homologeneid": None}], @@ -229,6 +231,7 @@ class TestTraitsDBFunctions(TestCase): set_homologene_id_field(trait_info, db_mock), expected) def test_set_confidential_field(self): + """Test that the `confidential` field is set up correctly""" for trait_info, expected in [ [{"type": "Publish"}, {"type": "Publish", "confidential": 0}], [{"type": "ProbeSet"}, {"type": "ProbeSet"}], -- cgit v1.2.3 From 76ba5296c66e131301a9fdb692c3b2623f3331ed Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 5 Aug 2021 08:40:49 +0300 Subject: Build up trait_name items from full name Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * The full name of the traits from search contains multiple parts to it, and as such, we use it to retrieve the appropriate data and set it up in the final trait_info dictionary that is produced. --- gn3/db/traits.py | 16 +++++++++++++-- tests/unit/db/test_traits.py | 46 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 29c91a6..9f89510 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -341,8 +341,18 @@ def set_riset_fields(trait_info, conn): **trait_info, "risetid": riid, "riset": "BXD" if riset == "BXD300" else riset} +def build_trait_name(trait_fullname): + name_parts = trait_fullname.split("::") + assert len(name_parts) >= 2, "Name format error" + return { + "trait_db": name_parts[0], + "trait_fullname": trait_fullname, + "trait_name": name_parts[1], + "cellid": name_parts[2] if len(name_parts) == 3 else "" + } + def retrieve_trait_info( - trait_type: str, trait_name: str, trait_dataset_id: int, + trait_type: str, trait_full_name: str, trait_dataset_id: int, trait_dataset_name: str, conn: Any, qtl=None): """Retrieves the trait information. @@ -351,6 +361,7 @@ def retrieve_trait_info( This function, or the dependent functions, might be incomplete as they are currently.""" # pylint: disable=[R0913] + trait = build_trait_name(trait_full_name) trait_info_function_table = { "Publish": retrieve_publish_trait_info, "ProbeSet": retrieve_probeset_trait_info, @@ -362,6 +373,7 @@ def retrieve_trait_info( lambda ti: set_riset_fields(ti, conn), lambda ti: set_homologene_id_field(ti, conn), lambda ti: {"type": trait_type, **ti}, + lambda ti: {**ti, **trait}, set_haveinfo_field) trait_post_processing_functions_table = { @@ -377,7 +389,7 @@ def retrieve_trait_info( return retrieve_info( { - "trait_name": trait_name, + "trait_name": trait["trait_name"], "trait_dataset_id": trait_dataset_id, "trait_dataset_name":trait_dataset_name }, diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 1c481a2..39d7a31 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,6 +1,7 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase from gn3.db.traits import ( + build_trait_name, set_riset_fields, set_haveinfo_field, update_sample_data, @@ -155,18 +156,47 @@ class TestTraitsDBFunctions(TestCase): "SELECT name, description FROM Temp WHERE Name = %(trait_name)s", trait_source) + def test_build_trait_name_with_good_fullnames(self): + for fullname, expected in [ + ["testdb::testname", + {"trait_db": "testdb", "trait_name": "testname", "cellid": "", + "trait_fullname": "testdb::testname"}], + ["testdb::testname::testcell", + {"trait_db": "testdb", "trait_name": "testname", + "cellid": "testcell", + "trait_fullname": "testdb::testname::testcell"}]]: + with self.subTest(fullname=fullname): + self.assertEqual(build_trait_name(fullname), expected) + + def test_build_trait_name_with_bad_fullnames(self): + for fullname in ["", "test", "test:test"]: + with self.subTest(fullname=fullname): + with self.assertRaises(AssertionError, msg="Name format error"): + build_trait_name(fullname) + def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" for trait_type, trait_name, trait_dataset_id, trait_dataset_name, expected in [ - ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName", + ["Publish", "pubDb::PublishTraitName::pubCell", 1, + "PublishDatasetTraitName", {"haveinfo": 0, "homologeneid": None, "type": "Publish", - "confidential": 0}], - ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet"}], - ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Geno"}], - ["Temp", "TempTraitName", 4, "TempDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Temp"}]]: + "confidential": 0, "trait_db": "pubDb", + "trait_name": "PublishTraitName", "cellid": "pubCell", + "trait_fullname": "pubDb::PublishTraitName::pubCell"}], + ["ProbeSet", "prbDb::ProbeSetTraitName::prbCell", 2, + "ProbeSetDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet", + "trait_fullname": "prbDb::ProbeSetTraitName::prbCell", + "trait_db": "prbDb", "trait_name": "ProbeSetTraitName", + "cellid": "prbCell"}], + ["Geno", "genDb::GenoTraitName", 3, "GenoDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Geno", + "trait_fullname": "genDb::GenoTraitName", "trait_db": "genDb", + "trait_name": "GenoTraitName", "cellid": ""}], + ["Temp", "tmpDb::TempTraitName", 4, "TempDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Temp", + "trait_fullname": "tmpDb::TempTraitName", "trait_db": "tmpDb", + "trait_name": "TempTraitName", "cellid": ""}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: -- cgit v1.2.3 From f1876f8b9939a9b863dc88aab8d3fed3c16ad4e1 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 5 Aug 2021 13:08:57 +0300 Subject: Reorganise the database code Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Reorganise the code to separate the datasets from the traits, and to more closely conform to the same flow as that in GN1 --- gn3/db/datasets.py | 251 +++++++++++++++++++++++++++++++++++++++++ gn3/db/traits.py | 171 ++++++++++++---------------- tests/unit/db/test_datasets.py | 133 ++++++++++++++++++++++ tests/unit/db/test_traits.py | 196 +++++--------------------------- 4 files changed, 485 insertions(+), 266 deletions(-) create mode 100644 gn3/db/datasets.py create mode 100644 tests/unit/db/test_datasets.py (limited to 'tests/unit') diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py new file mode 100644 index 0000000..3ad50f6 --- /dev/null +++ b/gn3/db/datasets.py @@ -0,0 +1,251 @@ +from typing import Any, Dict, Union + +def retrieve_probeset_trait_dataset_name( + threshold: int, name: str, connection: Any): + query = ( + "SELECT Id, Name, FullName, ShortName, DataScale " + "FROM ProbeSetFreeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)") + with connection.cursor() as cursor: + cursor.execute( + query, + { + "threshold": threshold, + "name": name + }) + return dict(zip( + ["dataset_id", "dataset_name", "dataset_fullname", + "dataset_shortname", "dataset_datascale"], + cursor.fetchone)) + +def retrieve_publish_trait_dataset_name(threshold: int, name: str, connection: Any): + query = ( + "SELECT Id, Name, FullName, ShortName " + "FROM PublishFreeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)") + with connection.cursor() as cursor: + cursor.execute( + query, + { + "threshold": threshold, + "name": name + }) + return dict(zip( + ["dataset_id", "dataset_name", "dataset_fullname", + "dataset_shortname"], + cursor.fetchone)) + +def retrieve_geno_trait_dataset_name(threshold: int, name: str, connection: Any): + query = ( + "SELECT Id, Name, FullName, ShortName " + "FROM GenoFreeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)") + with connection.cursor() as cursor: + cursor.execute( + query, + { + "threshold": threshold, + "name": name + }) + return dict(zip( + ["dataset_id", "dataset_name", "dataset_fullname", + "dataset_shortname"], + cursor.fetchone)) + +def retrieve_temp_trait_dataset_name(threshold: int, name: str, connection: Any): + query = ( + "SELECT Id, Name, FullName, ShortName " + "FROM TempFreeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)") + with connection.cursor() as cursor: + cursor.execute( + query, + { + "threshold": threshold, + "name": name + }) + return dict(zip( + ["dataset_id", "dataset_name", "dataset_fullname", + "dataset_shortname"], + cursor.fetchone)) + +def retrieve_dataset_name( + trait_type: str, threshold: int, trait_name: str, dataset_name: str, + conn: Any): + """ + Retrieve the name of a trait given the trait's name + + This is extracted from the `webqtlDataset.retrieveName` function as is + implemented at + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169 + """ + fn_map = { + "ProbeSet": retrieve_probeset_trait_dataset_name, + "Publish": retrieve_publish_trait_dataset_name, + "Geno": retrieve_geno_trait_dataset_name, + "Temp": retrieve_temp_trait_dataset_name} + if trait_type == "Temp": + return retrieve_temp_trait_dataset_name(threshold, trait_name, conn) + return fn_map[trait_type](threshold, dataset_name, conn) + + +def retrieve_geno_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for various Geno trait types. + """ + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, GenoFreeze " + "WHERE GenoFreeze.InbredSetId = InbredSet.Id " + "AND GenoFreeze.Name = %(name)s") + with conn.cursor() as cursor: + cursor.execute(query, {"name": name}) + return dict(zip(["riset", "risetid"], cursor.fetchone())) + return {} + +def retrieve_publish_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for various Publish trait types. + """ + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, PublishFreeze " + "WHERE PublishFreeze.InbredSetId = InbredSet.Id " + "AND PublishFreeze.Name = %(name)s") + with conn.cursor() as cursor: + cursor.execute(query, {"name": name}) + return dict(zip(["riset", "risetid"], cursor.fetchone())) + return {} + +def retrieve_probeset_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for various ProbeSet trait types. + """ + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, ProbeSetFreeze, ProbeFreeze " + "WHERE ProbeFreeze.InbredSetId = InbredSet.Id " + "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " + "AND ProbeSetFreeze.Name = %(name)s") + with conn.cursor() as cursor: + cursor.execute(query, {"name": name}) + return dict(zip(["riset", "risetid"], cursor.fetchone())) + return {} + +def retrieve_temp_riset_fields(name, conn): + query = ( + "SELECT InbredSet.Name, InbredSet.Id " + "FROM InbredSet, Temp " + "WHERE Temp.InbredSetId = InbredSet.Id " + "AND Temp.Name = %(name)s") + with conn.cursor() as cursor: + cursor.execute(query, {"name": name}) + return dict(zip(["riset", "risetid"], cursor.fetchone())) + return {} + +def retrieve_riset_fields(trait_type, trait_name, dataset_info, conn): + """ + Retrieve the RISet, and RISetID values for various trait types. + """ + riset_fns_map = { + "Geno": retrieve_geno_riset_fields, + "Publish": retrieve_publish_riset_fields, + "ProbeSet": retrieve_probeset_riset_fields + } + + if trait_type == "Temp": + riset_info = retrieve_temp_riset_fields(trait_name, conn) + else: + riset_info = riset_fns_map[trait_type](dataset_info["dataset_name"], conn) + + return { + **dataset_info, + **riset_info, + "riset": ( + "BXD" if riset_info.get("riset") == "BXD300" + else riset_info.get("riset", "")) + } + +def retrieve_temp_trait_dataset(): + return { + "searchfield": ["name", "description"], + "disfield": ["name", "description"], + "type": "Temp", + "dataset_id": 1, + "fullname": "Temporary Storage", + "shortname": "Temp" + } + +def retrieve_geno_trait_dataset(): + return { + "searchfield": ["name","chr"], + "disfield": ["name","chr","mb", "source2", "sequence"], + "type": "Geno" + } + +def retrieve_publish_trait_dataset(): + return { + "searchfield": [ + "name", "post_publication_description", "abstract", "title", + "authors"], + "disfield": [ + "name","pubmed_id", "pre_publication_description", + "post_publication_description", "original_description", + "pre_publication_abbreviation", "post_publication_abbreviation", + "lab_code", "submitter", "owner", "authorized_users", + "authors","title","abstract", "journal","volume","pages","month", + "year","sequence", "units", "comments"], + "type": "Publish" + } + +def retrieve_probeset_trait_dataset(): + return { + "searchfield": [ + "name", "description", "probe_target_description", "symbol", + "alias", "genbankid", "unigeneid", "omim", "refseq_transcriptid", + "probe_set_specificity", "probe_set_blat_score"], + "disfield": [ + "name", "symbol", "description", "probe_target_description", "chr", + "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", + "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", + "strand_probe", "strand_gene", "probe_set_target_region", + "proteinid", "probe_set_specificity", "probe_set_blat_score", + "probe_set_blat_mb_start", "probe_set_blat_mb_end", + "probe_set_strand", "probe_set_note_by_rw", "flag"], + "type": "ProbeSet" + } + +def retrieve_trait_dataset(trait_type, trait, threshold, conn): + dataset_fns = { + "Temp": retrieve_temp_trait_dataset, + "Geno": retrieve_geno_trait_dataset, + "Publish": retrieve_publish_trait_dataset, + "ProbeSet": retrieve_probeset_trait_dataset + } + dataset_name_info = { + "dataset_id": None, + "dataset_name": trait["db"]["dataset_name"], + **retrieve_dataset_name( + trait_type, threshold, trait["trait_name"], trait["db"]["dataset_name"], + conn) + } + riset = retrieve_riset_fields( + trait_type, trait["trait_name"], dataset_name_info, conn) + return { + "display_name": dataset_name_info["dataset_name"], + **dataset_name_info, + **dataset_fns[trait_type](), + **riset + } diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 9f89510..85cccfa 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,6 +1,7 @@ """This class contains functions relating to trait data manipulation""" from typing import Any, Dict, Union from gn3.function_helpers import compose +from gn3.db.datasets import retrieve_trait_dataset def get_trait_csv_sample_data(conn: Any, @@ -77,41 +78,6 @@ def update_sample_data(conn: Any, return (updated_strains, updated_published_data, updated_se_data, updated_n_strains) -def retrieve_trait_dataset_name( - trait_type: str, threshold: int, name: str, connection: Any): - """ - Retrieve the name of a trait given the trait's name - - This is extracted from the `webqtlDataset.retrieveName` function as is - implemented at - https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169 - """ - table_map = { - "ProbeSet": "ProbeSetFreeze", - "Publish": "PublishFreeze", - "Geno": "GenoFreeze", - "Temp": "TempFreeze"} - columns = "Id, Name, FullName, ShortName{}".format( - ", DataScale" if trait_type == "ProbeSet" else "") - query = ( - "SELECT %(columns)s " - "FROM %(table)s " - "WHERE " - "public > %(threshold)s " - "AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)") - with connection.cursor() as cursor: - cursor.execute( - query, - { - "table": table_map[trait_type], - "columns": columns, - "threshold": threshold, - "name": name - }) - return cursor.fetchone() - - def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. @@ -156,11 +122,11 @@ def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): }) return dict(zip([k.lower() for k in keys], cursor.fetchone())) -def set_confidential_field(trait_info): +def set_confidential_field(trait_type, trait_info): """Post processing function for 'Publish' trait types. It sets the value for the 'confidential' key.""" - if trait_info["type"] == "Publish": + if trait_type == "Publish": return { **trait_info, "confidential": 1 if ( @@ -271,7 +237,7 @@ def set_homologene_id_field_probeset(trait_info, conn): return {**trait_info, "homologeneid": res[0]} return {**trait_info, "homologeneid": None} -def set_homologene_id_field(trait_info, conn): +def set_homologene_id_field(trait_type, trait_info, conn): """ Common postprocessing function for all trait types. @@ -283,84 +249,83 @@ def set_homologene_id_field(trait_info, conn): "Publish": set_to_null, "ProbeSet": lambda ti: set_homologene_id_field_probeset(ti, conn) } - return functions_table[trait_info["type"]](trait_info) + return functions_table[trait_type](trait_info) -def set_geno_riset_fields(name, conn): - """ - Retrieve the RISet, and RISetID values for various Geno trait types. - """ +def load_publish_qtl_info(trait_info, conn): query = ( - "SELECT InbredSet.Name, InbredSet.Id " - "FROM InbredSet, GenoFreeze " - "WHERE GenoFreeze.InbredSetId = InbredSet.Id " - "AND GenoFreeze.Name = %(name)s") + "SELECT PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive " + "FROM PublishXRef, PublishFreeze " + "WHERE PublishXRef.Id = %(trait_name)s " + "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " + "AND PublishFreeze.Id = %(dataset_id)s") with conn.cursor() as cursor: - return cursor.execute(query, {"name": name}) + cursor.execute() + return dict(zip(["locus", "lrs", "additive"], cursor.fetchone())) + return {"locus": "", "lrs": "", "additive": ""} -def set_publish_riset_fields(name, conn): - """ - Retrieve the RISet, and RISetID values for various Publish trait types. - """ +def load_probeset_qtl_info(trait_info, conn): query = ( - "SELECT InbredSet.Name, InbredSet.Id " - "FROM InbredSet, PublishFreeze " - "WHERE PublishFreeze.InbredSetId = InbredSet.Id " - "AND PublishFreeze.Name = %(name)s") + "SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, " + "ProbeSetXRef.mean, ProbeSetXRef.additive " + "FROM ProbeSetXRef, ProbeSet " + "WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id " + " AND ProbeSet.Name = %(trait_name)s " + "AND ProbeSetXRef.ProbeSetFreezeId = %(dataset_id)s") with conn.cursor() as cursor: - return cursor.execute(query, {"name": name}) + cursor.execute() + return dict(zip( + ["locus", "lrs", "pvalue", "mean", "additive"], cursor.fetchone())) + return {"locus": "", "lrs": "", "pvalue": "", "mean": "", "additive": ""} -def set_probeset_riset_fields(name, conn): - """ - Retrieve the RISet, and RISetID values for various ProbeSet trait types. - """ - query = ( - "SELECT InbredSet.Name, InbredSet.Id " - "FROM InbredSet, ProbeSetFreeze, ProbeFreeze " - "WHERE ProbeFreeze.InbredSetId = InbredSet.Id " - "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " - "AND ProbeSetFreeze.Name = %(name)s") - with conn.cursor() as cursor: - return cursor.execute(query, {"name": name}) - -def set_riset_fields(trait_info, conn): - """ - Retrieve the RISet, and RISetID values for various trait types. - """ - riset_functions_map = { - "Temp": lambda ti, con: (None, None), - "Geno": set_geno_riset_fields, - "Publish": set_publish_riset_fields, - "ProbeSet": set_probeset_riset_fields +def load_qtl_info(qtl, trait_type, trait_info, conn): + if not qtl: + return trait_info + qtl_info_functions = { + "Publish": load_publish_qtl_info, + "ProbeSet": load_probeset_qtl_info } - if not trait_info.get("haveinfo", None): + if trait_inf["name"] not in qtl_info_functions.keys(): return trait_info - riset, riid = riset_functions_map[trait_info["type"]]( - trait_info["name"], conn) - return { - **trait_info, "risetid": riid, - "riset": "BXD" if riset == "BXD300" else riset} + return qtl_info_functions[trait_type](trait_info, conn) def build_trait_name(trait_fullname): name_parts = trait_fullname.split("::") assert len(name_parts) >= 2, "Name format error" return { - "trait_db": name_parts[0], + "db": {"dataset_name": name_parts[0]}, "trait_fullname": trait_fullname, "trait_name": name_parts[1], "cellid": name_parts[2] if len(name_parts) == 3 else "" } +def retrieve_probeset_sequence(trait, conn): + query = ( + "SELECT ProbeSet.BlatSeq " + "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE ProbeSet.Id=ProbeSetXRef.ProbeSetId " + "AND ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId " + "AND ProbeSet.Name = %(trait_name)s " + "AND ProbeSetFreeze.Name = %(dataset_name)s") + with conn.cursor() as cursor: + cursor.execute( + query, + { + "trait_name": trait["trait_name"], + "dataset_name": trait["db"]["dataset_name"] + }) + seq = cursor.fetchone() + return {**trait, "sequence": seq[0] if seq else ""} + def retrieve_trait_info( - trait_type: str, trait_full_name: str, trait_dataset_id: int, - trait_dataset_name: str, conn: Any, qtl=None): + trait_type: str, threshold: int, trait_full_name: str, conn: Any, + qtl=None): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 This function, or the dependent functions, might be incomplete as they are currently.""" - # pylint: disable=[R0913] trait = build_trait_name(trait_full_name) trait_info_function_table = { "Publish": retrieve_publish_trait_info, @@ -370,15 +335,19 @@ def retrieve_trait_info( } common_post_processing_fn = compose( - lambda ti: set_riset_fields(ti, conn), - lambda ti: set_homologene_id_field(ti, conn), - lambda ti: {"type": trait_type, **ti}, - lambda ti: {**ti, **trait}, + lambda ti: load_qtl_info(qtl, trait_type, ti, conn), + lambda ti: set_homologene_id_field(trait_type, ti, conn), + lambda ti: {"trait_type": trait_type, **ti}, + lambda ti: {**trait, **ti}, set_haveinfo_field) trait_post_processing_functions_table = { - "Publish": compose(set_confidential_field, common_post_processing_fn), - "ProbeSet": compose(common_post_processing_fn), + "Publish": compose( + lambda ti: set_confidential_field(trait_type, ti), + common_post_processing_fn), + "ProbeSet": compose( + lambda ti: retrieve_probeset_sequence(ti, conn), + common_post_processing_fn), "Geno": common_post_processing_fn, "Temp": common_post_processing_fn } @@ -387,10 +356,16 @@ def retrieve_trait_info( trait_post_processing_functions_table[trait_type], trait_info_function_table[trait_type]) - return retrieve_info( + trait_dataset = retrieve_trait_dataset(trait_type, trait, threshold, conn) + trait_info = retrieve_info( { "trait_name": trait["trait_name"], - "trait_dataset_id": trait_dataset_id, - "trait_dataset_name":trait_dataset_name + "trait_dataset_id": trait_dataset["dataset_id"], + "trait_dataset_name": trait_dataset["dataset_name"] }, conn) + return { + **trait_info, + "db": {**trait["db"], **trait_dataset}, + "riset": trait_dataset["riset"] + } diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py new file mode 100644 index 0000000..34fe7f0 --- /dev/null +++ b/tests/unit/db/test_datasets.py @@ -0,0 +1,133 @@ +from unittest import mock, TestCase + +class TestDatasetsDBFunctions(TestCase): + + def test_retrieve_trait_dataset_name(self): + """Test that the function is called correctly.""" + for trait_type, thresh, trait_dataset_name, columns, table in [ + ["ProbeSet", 9, "testName", + "Id, Name, FullName, ShortName, DataScale", "ProbeSetFreeze"], + ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName", + "GenoFreeze"], + ["Publish", 6, "publishTraitName", + "Id, Name, FullName, ShortName", "PublishFreeze"], + ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName", + "TempFreeze"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", + "dataScale") + self.assertEqual( + retrieve_trait_dataset_name( + trait_type, thresh, trait_dataset_name, db_mock), + ("testName", "testNameFull", "testNameShort", + "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT %(columns)s " + "FROM %(table)s " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( + cols=columns, ttype=trait_type), + {"threshold": thresh, "name": trait_dataset_name, + "table": table, "columns": columns}) + + def test_set_probeset_riset_fields(self): + """ + Test that the `riset` and `riset_id` fields are retrieved appropriately + for the 'ProbeSet' trait type. + """ + for trait_name, expected in [ + ["testProbeSetName", ()]]: + db_mock = mock.MagicMock() + with self.subTest(trait_name=trait_name, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = () + self.assertEqual( + set_probeset_riset_fields(trait_name, db_mock), expected) + cursor.execute.assert_called_once_with( + ( + "SELECT InbredSet.Name, InbredSet.Id" + " FROM InbredSet, ProbeSetFreeze, ProbeFreeze" + " WHERE ProbeFreeze.InbredSetId = InbredSet.Id" + " AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId" + " AND ProbeSetFreeze.Name = %(name)s"), + {"name": trait_name}) + + def test_set_riset_fields(self): + """ + Test that the riset fields are set up correctly for the different trait + types. + """ + for trait_info, expected in [ + [{}, {}], + [{"haveinfo": 0, "type": "Publish"}, + {"haveinfo": 0, "type": "Publish"}], + [{"haveinfo": 0, "type": "ProbeSet"}, + {"haveinfo": 0, "type": "ProbeSet"}], + [{"haveinfo": 0, "type": "Geno"}, + {"haveinfo": 0, "type": "Geno"}], + [{"haveinfo": 0, "type": "Temp"}, + {"haveinfo": 0, "type": "Temp"}], + [{"haveinfo": 1, "type": "Publish", "name": "test"}, + {"haveinfo": 1, "type": "Publish", "name": "test", + "riset": "riset_name", "risetid": 0}], + [{"haveinfo": 1, "type": "ProbeSet", "name": "test"}, + {"haveinfo": 1, "type": "ProbeSet", "name": "test", + "riset": "riset_name", "risetid": 0}], + [{"haveinfo": 1, "type": "Geno", "name": "test"}, + {"haveinfo": 1, "type": "Geno", "name": "test", + "riset": "riset_name", "risetid": 0}], + [{"haveinfo": 1, "type": "Temp", "name": "test"}, + {"haveinfo": 1, "type": "Temp", "name": "test", "riset": None, + "risetid": None}] + ]: + db_mock = mock.MagicMock() + with self.subTest(trait_info=trait_info, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = ("riset_name", 0) + self.assertEqual( + set_riset_fields(trait_info, db_mock), expected) + + def test_set_publish_riset_fields(self): + """ + Test that the `riset` and `riset_id` fields are retrieved appropriately + for the 'Publish' trait type. + """ + for trait_name, expected in [ + ["testPublishName", ()]]: + db_mock = mock.MagicMock() + with self.subTest(trait_name=trait_name, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = () + self.assertEqual( + set_publish_riset_fields(trait_name, db_mock), expected) + cursor.execute.assert_called_once_with( + ( + "SELECT InbredSet.Name, InbredSet.Id" + " FROM InbredSet, PublishFreeze" + " WHERE PublishFreeze.InbredSetId = InbredSet.Id" + " AND PublishFreeze.Name = %(name)s"), + {"name": trait_name}) + + def test_set_geno_riset_fields(self): + """ + Test that the `riset` and `riset_id` fields are retrieved appropriately + for the 'Geno' trait type. + """ + for trait_name, expected in [ + ["testGenoName", ()]]: + db_mock = mock.MagicMock() + with self.subTest(trait_name=trait_name, expected=expected): + with db_mock.cursor() as cursor: + cursor.execute.return_value = () + self.assertEqual( + set_geno_riset_fields(trait_name, db_mock), expected) + cursor.execute.assert_called_once_with( + ( + "SELECT InbredSet.Name, InbredSet.Id" + " FROM InbredSet, GenoFreeze" + " WHERE GenoFreeze.InbredSetId = InbredSet.Id" + " AND GenoFreeze.Name = %(name)s"), + {"name": trait_name}) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 39d7a31..7d161bf 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -2,55 +2,19 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, - set_riset_fields, set_haveinfo_field, update_sample_data, retrieve_trait_info, - set_geno_riset_fields, set_confidential_field, set_homologene_id_field, retrieve_geno_trait_info, retrieve_temp_trait_info, - set_publish_riset_fields, - set_probeset_riset_fields, - retrieve_trait_dataset_name, retrieve_publish_trait_info, retrieve_probeset_trait_info) class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" - def test_retrieve_trait_dataset_name(self): - """Test that the function is called correctly.""" - for trait_type, thresh, trait_dataset_name, columns, table in [ - ["ProbeSet", 9, "testName", - "Id, Name, FullName, ShortName, DataScale", "ProbeSetFreeze"], - ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName", - "GenoFreeze"], - ["Publish", 6, "publishTraitName", - "Id, Name, FullName, ShortName", "PublishFreeze"], - ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName", - "TempFreeze"]]: - db_mock = mock.MagicMock() - with self.subTest(trait_type=trait_type): - with db_mock.cursor() as cursor: - cursor.fetchone.return_value = ( - "testName", "testNameFull", "testNameShort", - "dataScale") - self.assertEqual( - retrieve_trait_dataset_name( - trait_type, thresh, trait_dataset_name, db_mock), - ("testName", "testNameFull", "testNameShort", - "dataScale")) - cursor.execute.assert_called_once_with( - "SELECT %(columns)s " - "FROM %(table)s " - "WHERE public > %(threshold)s AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( - cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_dataset_name, - "table": table, "columns": columns}) - def test_retrieve_publish_trait_info(self): """Test retrieval of type `Publish` traits.""" db_mock = mock.MagicMock() @@ -159,10 +123,10 @@ class TestTraitsDBFunctions(TestCase): def test_build_trait_name_with_good_fullnames(self): for fullname, expected in [ ["testdb::testname", - {"trait_db": "testdb", "trait_name": "testname", "cellid": "", - "trait_fullname": "testdb::testname"}], + {"db": {"dataset_name": "testdb"}, "trait_name": "testname", + "cellid": "", "trait_fullname": "testdb::testname"}], ["testdb::testname::testcell", - {"trait_db": "testdb", "trait_name": "testname", + {"db": {"dataset_name": "testdb"}, "trait_name": "testname", "cellid": "testcell", "trait_fullname": "testdb::testname::testcell"}]]: with self.subTest(fullname=fullname): @@ -176,26 +140,26 @@ class TestTraitsDBFunctions(TestCase): def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" - for trait_type, trait_name, trait_dataset_id, trait_dataset_name, expected in [ - ["Publish", "pubDb::PublishTraitName::pubCell", 1, - "PublishDatasetTraitName", + for trait_type, threshold, trait_fullname, expected in [ + ["Publish", 9, "pubDb::PublishTraitName::pubCell", {"haveinfo": 0, "homologeneid": None, "type": "Publish", - "confidential": 0, "trait_db": "pubDb", + "confidential": 0, "db": {"dataset_name": "pubDb"}, "trait_name": "PublishTraitName", "cellid": "pubCell", "trait_fullname": "pubDb::PublishTraitName::pubCell"}], - ["ProbeSet", "prbDb::ProbeSetTraitName::prbCell", 2, - "ProbeSetDatasetTraitName", + ["ProbeSet", 5, "prbDb::ProbeSetTraitName::prbCell", {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet", "trait_fullname": "prbDb::ProbeSetTraitName::prbCell", - "trait_db": "prbDb", "trait_name": "ProbeSetTraitName", - "cellid": "prbCell"}], - ["Geno", "genDb::GenoTraitName", 3, "GenoDatasetTraitName", + "db": {"dataset_name": "prbDb"}, + "trait_name": "ProbeSetTraitName", "cellid": "prbCell"}], + ["Geno", 12, "genDb::GenoTraitName", {"haveinfo": 0, "homologeneid": None, "type": "Geno", - "trait_fullname": "genDb::GenoTraitName", "trait_db": "genDb", + "trait_fullname": "genDb::GenoTraitName", + "db": {"dataset_name": "genDb"}, "trait_name": "GenoTraitName", "cellid": ""}], - ["Temp", "tmpDb::TempTraitName", 4, "TempDatasetTraitName", + ["Temp", 6, "tmpDb::TempTraitName", {"haveinfo": 0, "homologeneid": None, "type": "Temp", - "trait_fullname": "tmpDb::TempTraitName", "trait_db": "tmpDb", + "trait_fullname": "tmpDb::TempTraitName", + "db": {"dataset_name": "tmpDb"}, "trait_name": "TempTraitName", "cellid": ""}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): @@ -203,8 +167,7 @@ class TestTraitsDBFunctions(TestCase): cursor.fetchone.return_value = tuple() self.assertEqual( retrieve_trait_info( - trait_type, trait_name, trait_dataset_id, - trait_dataset_name, db_mock), + trait_type, threshold, trait_fullname, db_mock), expected) def test_update_sample_data(self): @@ -246,128 +209,25 @@ class TestTraitsDBFunctions(TestCase): def test_set_homologene_id_field(self): """Test that the `homologene_id` field is set up correctly""" - for trait_info, expected in [ - [{"type": "Publish"}, - {"type": "Publish", "homologeneid": None}], - [{"type": "ProbeSet"}, - {"type": "ProbeSet", "homologeneid": None}], - [{"type": "Geno"}, {"type": "Geno", "homologeneid": None}], - [{"type": "Temp"}, {"type": "Temp", "homologeneid": None}]]: + for trait_type, trait_info, expected in [ + ["Publish", {}, {"homologeneid": None}], + ["ProbeSet", {}, {"homologeneid": None}], + ["Geno", {}, {"homologeneid": None}], + ["Temp", {}, {"homologeneid": None}]]: db_mock = mock.MagicMock() with self.subTest(trait_info=trait_info, expected=expected): with db_mock.cursor() as cursor: cursor.fetchone.return_value = () self.assertEqual( - set_homologene_id_field(trait_info, db_mock), expected) + set_homologene_id_field(trait_type, trait_info, db_mock), expected) def test_set_confidential_field(self): """Test that the `confidential` field is set up correctly""" - for trait_info, expected in [ - [{"type": "Publish"}, {"type": "Publish", "confidential": 0}], - [{"type": "ProbeSet"}, {"type": "ProbeSet"}], - [{"type": "Geno"}, {"type": "Geno"}], - [{"type": "Temp"}, {"type": "Temp"}]]: + for trait_type, trait_info, expected in [ + ["Publish", {}, {"confidential": 0}], + ["ProbeSet", {}, {}], + ["Geno", {}, {}], + ["Temp", {}, {}]]: with self.subTest(trait_info=trait_info, expected=expected): self.assertEqual( - set_confidential_field(trait_info), expected) - - def test_set_geno_riset_fields(self): - """ - Test that the `riset` and `riset_id` fields are retrieved appropriately - for the 'Geno' trait type. - """ - for trait_name, expected in [ - ["testGenoName", ()]]: - db_mock = mock.MagicMock() - with self.subTest(trait_name=trait_name, expected=expected): - with db_mock.cursor() as cursor: - cursor.execute.return_value = () - self.assertEqual( - set_geno_riset_fields(trait_name, db_mock), expected) - cursor.execute.assert_called_once_with( - ( - "SELECT InbredSet.Name, InbredSet.Id" - " FROM InbredSet, GenoFreeze" - " WHERE GenoFreeze.InbredSetId = InbredSet.Id" - " AND GenoFreeze.Name = %(name)s"), - {"name": trait_name}) - - - def test_set_publish_riset_fields(self): - """ - Test that the `riset` and `riset_id` fields are retrieved appropriately - for the 'Publish' trait type. - """ - for trait_name, expected in [ - ["testPublishName", ()]]: - db_mock = mock.MagicMock() - with self.subTest(trait_name=trait_name, expected=expected): - with db_mock.cursor() as cursor: - cursor.execute.return_value = () - self.assertEqual( - set_publish_riset_fields(trait_name, db_mock), expected) - cursor.execute.assert_called_once_with( - ( - "SELECT InbredSet.Name, InbredSet.Id" - " FROM InbredSet, PublishFreeze" - " WHERE PublishFreeze.InbredSetId = InbredSet.Id" - " AND PublishFreeze.Name = %(name)s"), - {"name": trait_name}) - - - def test_set_probeset_riset_fields(self): - """ - Test that the `riset` and `riset_id` fields are retrieved appropriately - for the 'ProbeSet' trait type. - """ - for trait_name, expected in [ - ["testProbeSetName", ()]]: - db_mock = mock.MagicMock() - with self.subTest(trait_name=trait_name, expected=expected): - with db_mock.cursor() as cursor: - cursor.execute.return_value = () - self.assertEqual( - set_probeset_riset_fields(trait_name, db_mock), expected) - cursor.execute.assert_called_once_with( - ( - "SELECT InbredSet.Name, InbredSet.Id" - " FROM InbredSet, ProbeSetFreeze, ProbeFreeze" - " WHERE ProbeFreeze.InbredSetId = InbredSet.Id" - " AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId" - " AND ProbeSetFreeze.Name = %(name)s"), - {"name": trait_name}) - - def test_set_riset_fields(self): - """ - Test that the riset fields are set up correctly for the different trait - types. - """ - for trait_info, expected in [ - [{}, {}], - [{"haveinfo": 0, "type": "Publish"}, - {"haveinfo": 0, "type": "Publish"}], - [{"haveinfo": 0, "type": "ProbeSet"}, - {"haveinfo": 0, "type": "ProbeSet"}], - [{"haveinfo": 0, "type": "Geno"}, - {"haveinfo": 0, "type": "Geno"}], - [{"haveinfo": 0, "type": "Temp"}, - {"haveinfo": 0, "type": "Temp"}], - [{"haveinfo": 1, "type": "Publish", "name": "test"}, - {"haveinfo": 1, "type": "Publish", "name": "test", - "riset": "riset_name", "risetid": 0}], - [{"haveinfo": 1, "type": "ProbeSet", "name": "test"}, - {"haveinfo": 1, "type": "ProbeSet", "name": "test", - "riset": "riset_name", "risetid": 0}], - [{"haveinfo": 1, "type": "Geno", "name": "test"}, - {"haveinfo": 1, "type": "Geno", "name": "test", - "riset": "riset_name", "risetid": 0}], - [{"haveinfo": 1, "type": "Temp", "name": "test"}, - {"haveinfo": 1, "type": "Temp", "name": "test", "riset": None, - "risetid": None}] - ]: - db_mock = mock.MagicMock() - with self.subTest(trait_info=trait_info, expected=expected): - with db_mock.cursor() as cursor: - cursor.execute.return_value = ("riset_name", 0) - self.assertEqual( - set_riset_fields(trait_info, db_mock), expected) + set_confidential_field(trait_type, trait_info), expected) -- cgit v1.2.3 From 4bc516beea37b0cc0a54f42d93cf5606f073abbf Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Sun, 8 Aug 2021 12:18:43 +0300 Subject: Update tests Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Fix tests to take current changes into consideration. --- tests/unit/db/test_datasets.py | 109 ++++++++++++++++++++--------------------- tests/unit/db/test_traits.py | 20 ++------ 2 files changed, 57 insertions(+), 72 deletions(-) (limited to 'tests/unit') diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py index 34fe7f0..4f405cb 100644 --- a/tests/unit/db/test_datasets.py +++ b/tests/unit/db/test_datasets.py @@ -1,51 +1,56 @@ from unittest import mock, TestCase +from gn3.db.datasets import ( + retrieve_dataset_name, + retrieve_riset_fields, + retrieve_geno_riset_fields, + retrieve_publish_riset_fields, + retrieve_probeset_riset_fields) class TestDatasetsDBFunctions(TestCase): - def test_retrieve_trait_dataset_name(self): + def test_retrieve_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_dataset_name, columns, table in [ - ["ProbeSet", 9, "testName", + for trait_type, thresh, trait_name, dataset_name, columns, table in [ + ["ProbeSet", 9, "probesetTraitName", "probesetDatasetName", "Id, Name, FullName, ShortName, DataScale", "ProbeSetFreeze"], - ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName", - "GenoFreeze"], - ["Publish", 6, "publishTraitName", + ["Geno", 3, "genoTraitName", "genoDatasetName", + "Id, Name, FullName, ShortName", "GenoFreeze"], + ["Publish", 6, "publishTraitName", "publishDatasetName", "Id, Name, FullName, ShortName", "PublishFreeze"], - ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName", - "TempFreeze"]]: + ["Temp", 4, "tempTraitName", "tempTraitName", + "Id, Name, FullName, ShortName", "TempFreeze"]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: - cursor.fetchone.return_value = ( - "testName", "testNameFull", "testNameShort", - "dataScale") + cursor.fetchone.return_value = {} self.assertEqual( - retrieve_trait_dataset_name( - trait_type, thresh, trait_dataset_name, db_mock), - ("testName", "testNameFull", "testNameShort", - "dataScale")) + retrieve_dataset_name( + trait_type, thresh, trait_name, dataset_name, db_mock), + {}) cursor.execute.assert_called_once_with( - "SELECT %(columns)s " - "FROM %(table)s " + "SELECT {cols} " + "FROM {table} " "WHERE public > %(threshold)s AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( - cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_dataset_name, - "table": table, "columns": columns}) + "(Name = %(name)s " + "OR FullName = %(name)s " + "OR ShortName = %(name)s)".format( + table=table, cols=columns, ttype=trait_type), + {"threshold": thresh, "name": dataset_name}) - def test_set_probeset_riset_fields(self): + def test_retrieve_probeset_riset_fields(self): """ Test that the `riset` and `riset_id` fields are retrieved appropriately for the 'ProbeSet' trait type. """ for trait_name, expected in [ - ["testProbeSetName", ()]]: + ["testProbeSetName", {}]]: db_mock = mock.MagicMock() with self.subTest(trait_name=trait_name, expected=expected): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - set_probeset_riset_fields(trait_name, db_mock), expected) + retrieve_probeset_riset_fields(trait_name, db_mock), + expected) cursor.execute.assert_called_once_with( ( "SELECT InbredSet.Name, InbredSet.Id" @@ -55,54 +60,45 @@ class TestDatasetsDBFunctions(TestCase): " AND ProbeSetFreeze.Name = %(name)s"), {"name": trait_name}) - def test_set_riset_fields(self): + def test_retrieve_riset_fields(self): """ Test that the riset fields are set up correctly for the different trait types. """ - for trait_info, expected in [ - [{}, {}], - [{"haveinfo": 0, "type": "Publish"}, - {"haveinfo": 0, "type": "Publish"}], - [{"haveinfo": 0, "type": "ProbeSet"}, - {"haveinfo": 0, "type": "ProbeSet"}], - [{"haveinfo": 0, "type": "Geno"}, - {"haveinfo": 0, "type": "Geno"}], - [{"haveinfo": 0, "type": "Temp"}, - {"haveinfo": 0, "type": "Temp"}], - [{"haveinfo": 1, "type": "Publish", "name": "test"}, - {"haveinfo": 1, "type": "Publish", "name": "test", - "riset": "riset_name", "risetid": 0}], - [{"haveinfo": 1, "type": "ProbeSet", "name": "test"}, - {"haveinfo": 1, "type": "ProbeSet", "name": "test", - "riset": "riset_name", "risetid": 0}], - [{"haveinfo": 1, "type": "Geno", "name": "test"}, - {"haveinfo": 1, "type": "Geno", "name": "test", - "riset": "riset_name", "risetid": 0}], - [{"haveinfo": 1, "type": "Temp", "name": "test"}, - {"haveinfo": 1, "type": "Temp", "name": "test", "riset": None, - "risetid": None}] - ]: + for trait_type, trait_name, dataset_info, expected in [ + ["Publish", "pubTraitName01", {"dataset_name": "pubDBName01"}, + {"dataset_name": "pubDBName01", "riset": ""}], + ["ProbeSet", "prbTraitName01", {"dataset_name": "prbDBName01"}, + {"dataset_name": "prbDBName01", "riset": ""}], + ["Geno", "genoTraitName01", {"dataset_name": "genoDBName01"}, + {"dataset_name": "genoDBName01", "riset": ""}], + ["Temp", "tempTraitName01", {}, {"riset": ""}], + ]: db_mock = mock.MagicMock() - with self.subTest(trait_info=trait_info, expected=expected): + with self.subTest( + trait_type=trait_type, trait_name=trait_name, + dataset_info=dataset_info): with db_mock.cursor() as cursor: cursor.execute.return_value = ("riset_name", 0) self.assertEqual( - set_riset_fields(trait_info, db_mock), expected) + retrieve_riset_fields( + trait_type, trait_name, dataset_info, db_mock), + expected) - def test_set_publish_riset_fields(self): + def test_retrieve_publish_riset_fields(self): """ Test that the `riset` and `riset_id` fields are retrieved appropriately for the 'Publish' trait type. """ for trait_name, expected in [ - ["testPublishName", ()]]: + ["testPublishName", {}]]: db_mock = mock.MagicMock() with self.subTest(trait_name=trait_name, expected=expected): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - set_publish_riset_fields(trait_name, db_mock), expected) + retrieve_publish_riset_fields(trait_name, db_mock), + expected) cursor.execute.assert_called_once_with( ( "SELECT InbredSet.Name, InbredSet.Id" @@ -111,19 +107,20 @@ class TestDatasetsDBFunctions(TestCase): " AND PublishFreeze.Name = %(name)s"), {"name": trait_name}) - def test_set_geno_riset_fields(self): + def test_retrieve_geno_riset_fields(self): """ Test that the `riset` and `riset_id` fields are retrieved appropriately for the 'Geno' trait type. """ for trait_name, expected in [ - ["testGenoName", ()]]: + ["testGenoName", {}]]: db_mock = mock.MagicMock() with self.subTest(trait_name=trait_name, expected=expected): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - set_geno_riset_fields(trait_name, db_mock), expected) + retrieve_geno_riset_fields(trait_name, db_mock), + expected) cursor.execute.assert_called_once_with( ( "SELECT InbredSet.Name, InbredSet.Id" diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 7d161bf..5f52c18 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -142,25 +142,13 @@ class TestTraitsDBFunctions(TestCase): """Test that information on traits is retrieved as appropriate.""" for trait_type, threshold, trait_fullname, expected in [ ["Publish", 9, "pubDb::PublishTraitName::pubCell", - {"haveinfo": 0, "homologeneid": None, "type": "Publish", - "confidential": 0, "db": {"dataset_name": "pubDb"}, - "trait_name": "PublishTraitName", "cellid": "pubCell", - "trait_fullname": "pubDb::PublishTraitName::pubCell"}], + {"haveinfo": 0}], ["ProbeSet", 5, "prbDb::ProbeSetTraitName::prbCell", - {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet", - "trait_fullname": "prbDb::ProbeSetTraitName::prbCell", - "db": {"dataset_name": "prbDb"}, - "trait_name": "ProbeSetTraitName", "cellid": "prbCell"}], + {"haveinfo": 0}], ["Geno", 12, "genDb::GenoTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Geno", - "trait_fullname": "genDb::GenoTraitName", - "db": {"dataset_name": "genDb"}, - "trait_name": "GenoTraitName", "cellid": ""}], + {"haveinfo": 0}], ["Temp", 6, "tmpDb::TempTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Temp", - "trait_fullname": "tmpDb::TempTraitName", - "db": {"dataset_name": "tmpDb"}, - "trait_name": "TempTraitName", "cellid": ""}]]: + {"haveinfo": 0}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: -- cgit v1.2.3 From 667e67bae832ca5083f3319ada4fda67aca41f44 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 9 Aug 2021 11:44:47 +0300 Subject: Fix linting errors Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add module, class and function docstrings * Deactivate some irrelevant pylint errors * Fix indentations and line-lengths --- gn3/db/datasets.py | 64 ++++++++++++++++++++++++++++++++++-------- gn3/db/traits.py | 29 +++++++++++++++++++ tests/unit/db/test_datasets.py | 5 +++- tests/unit/db/test_traits.py | 6 ++++ 4 files changed, 91 insertions(+), 13 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index 3ad50f6..53d6811 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -1,7 +1,13 @@ -from typing import Any, Dict, Union +""" +This module contains functions relating to specific trait dataset manipulation +""" +from typing import Any def retrieve_probeset_trait_dataset_name( threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `ProbeSet` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName, DataScale " "FROM ProbeSetFreeze " @@ -21,7 +27,11 @@ def retrieve_probeset_trait_dataset_name( "dataset_shortname", "dataset_datascale"], cursor.fetchone)) -def retrieve_publish_trait_dataset_name(threshold: int, name: str, connection: Any): +def retrieve_publish_trait_dataset_name( + threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `Publish` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName " "FROM PublishFreeze " @@ -41,7 +51,11 @@ def retrieve_publish_trait_dataset_name(threshold: int, name: str, connection: A "dataset_shortname"], cursor.fetchone)) -def retrieve_geno_trait_dataset_name(threshold: int, name: str, connection: Any): +def retrieve_geno_trait_dataset_name( + threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `Geno` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName " "FROM GenoFreeze " @@ -61,7 +75,11 @@ def retrieve_geno_trait_dataset_name(threshold: int, name: str, connection: Any) "dataset_shortname"], cursor.fetchone)) -def retrieve_temp_trait_dataset_name(threshold: int, name: str, connection: Any): +def retrieve_temp_trait_dataset_name( + threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `Temp` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName " "FROM TempFreeze " @@ -145,6 +163,9 @@ def retrieve_probeset_riset_fields(name, conn): return {} def retrieve_temp_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for `Temp` trait types. + """ query = ( "SELECT InbredSet.Name, InbredSet.Id " "FROM InbredSet, Temp " @@ -179,6 +200,10 @@ def retrieve_riset_fields(trait_type, trait_name, dataset_info, conn): } def retrieve_temp_trait_dataset(): + """ + Retrieve the dataset that relates to `Temp` traits + """ + # pylint: disable=[C0330] return { "searchfield": ["name", "description"], "disfield": ["name", "description"], @@ -189,28 +214,40 @@ def retrieve_temp_trait_dataset(): } def retrieve_geno_trait_dataset(): + """ + Retrieve the dataset that relates to `Geno` traits + """ + # pylint: disable=[C0330] return { - "searchfield": ["name","chr"], - "disfield": ["name","chr","mb", "source2", "sequence"], + "searchfield": ["name", "chr"], + "disfield": ["name", "chr", "mb", "source2", "sequence"], "type": "Geno" } def retrieve_publish_trait_dataset(): + """ + Retrieve the dataset that relates to `Publish` traits + """ + # pylint: disable=[C0330] return { "searchfield": [ "name", "post_publication_description", "abstract", "title", "authors"], "disfield": [ - "name","pubmed_id", "pre_publication_description", - "post_publication_description", "original_description", + "name", "pubmed_id", "pre_publication_description", + "post_publication_description", "original_description", "pre_publication_abbreviation", "post_publication_abbreviation", "lab_code", "submitter", "owner", "authorized_users", - "authors","title","abstract", "journal","volume","pages","month", - "year","sequence", "units", "comments"], + "authors", "title", "abstract", "journal", "volume", "pages", + "month", "year", "sequence", "units", "comments"], "type": "Publish" } def retrieve_probeset_trait_dataset(): + """ + Retrieve the dataset that relates to `ProbeSet` traits + """ + # pylint: disable=[C0330] return { "searchfield": [ "name", "description", "probe_target_description", "symbol", @@ -228,6 +265,9 @@ def retrieve_probeset_trait_dataset(): } def retrieve_trait_dataset(trait_type, trait, threshold, conn): + """ + Retrieve the dataset that relates to a specific trait. + """ dataset_fns = { "Temp": retrieve_temp_trait_dataset, "Geno": retrieve_geno_trait_dataset, @@ -238,8 +278,8 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn): "dataset_id": None, "dataset_name": trait["db"]["dataset_name"], **retrieve_dataset_name( - trait_type, threshold, trait["trait_name"], trait["db"]["dataset_name"], - conn) + trait_type, threshold, trait["trait_name"], + trait["db"]["dataset_name"], conn) } riset = retrieve_riset_fields( trait_type, trait["trait_name"], dataset_name_info, conn) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 6c31a4d..fb48fc3 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -43,6 +43,7 @@ def update_sample_data(conn: Any, count: Union[int, str]): """Given the right parameters, update sample-data from the relevant table.""" + # pylint: disable=[R0913, R0914] STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " "WHERE StrainId = %s AND Id = %s") @@ -252,6 +253,9 @@ def set_homologene_id_field(trait_type, trait_info, conn): return functions_table[trait_type](trait_info) def load_publish_qtl_info(trait_info, conn): + """ + Load extra QTL information for `Publish` traits + """ query = ( "SELECT PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive " "FROM PublishXRef, PublishFreeze " @@ -264,6 +268,9 @@ def load_publish_qtl_info(trait_info, conn): return {"locus": "", "lrs": "", "additive": ""} def load_probeset_qtl_info(trait_info, conn): + """ + Load extra QTL information for `ProbeSet` traits + """ query = ( "SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, " "ProbeSetXRef.mean, ProbeSetXRef.additive " @@ -278,6 +285,22 @@ def load_probeset_qtl_info(trait_info, conn): return {"locus": "", "lrs": "", "pvalue": "", "mean": "", "additive": ""} def load_qtl_info(qtl, trait_type, trait_info, conn): + """ + Load extra QTL information for traits + + DESCRIPTION: + Migrated from + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L500-L534 + + PARAMETERS: + qtl: boolean + trait_type: string + The type of the trait in consideration + trait_info: map/dictionary + A dictionary of the trait's key-value pairs + conn: + A database connection object + """ if not qtl: return trait_info qtl_info_functions = { @@ -290,6 +313,9 @@ def load_qtl_info(qtl, trait_type, trait_info, conn): return qtl_info_functions[trait_type](trait_info, conn) def build_trait_name(trait_fullname): + """ + Initialises the trait's name, and other values from the search data provided + """ name_parts = trait_fullname.split("::") assert len(name_parts) >= 2, "Name format error" return { @@ -300,6 +326,9 @@ def build_trait_name(trait_fullname): } def retrieve_probeset_sequence(trait, conn): + """ + Retrieve a 'ProbeSet' trait's sequence information + """ query = ( "SELECT ProbeSet.BlatSeq " "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef " diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py index 4f405cb..38de0e2 100644 --- a/tests/unit/db/test_datasets.py +++ b/tests/unit/db/test_datasets.py @@ -1,3 +1,5 @@ +"""Tests for gn3/db/datasets.py""" + from unittest import mock, TestCase from gn3.db.datasets import ( retrieve_dataset_name, @@ -7,6 +9,7 @@ from gn3.db.datasets import ( retrieve_probeset_riset_fields) class TestDatasetsDBFunctions(TestCase): + """Test cases for datasets functions.""" def test_retrieve_dataset_name(self): """Test that the function is called correctly.""" @@ -34,7 +37,7 @@ class TestDatasetsDBFunctions(TestCase): "(Name = %(name)s " "OR FullName = %(name)s " "OR ShortName = %(name)s)".format( - table=table, cols=columns, ttype=trait_type), + table=table, cols=columns), {"threshold": thresh, "name": dataset_name}) def test_retrieve_probeset_riset_fields(self): diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 5f52c18..d9d7bbb 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -121,6 +121,9 @@ class TestTraitsDBFunctions(TestCase): trait_source) def test_build_trait_name_with_good_fullnames(self): + """ + Check that the name is built correctly. + """ for fullname, expected in [ ["testdb::testname", {"db": {"dataset_name": "testdb"}, "trait_name": "testname", @@ -133,6 +136,9 @@ class TestTraitsDBFunctions(TestCase): self.assertEqual(build_trait_name(fullname), expected) def test_build_trait_name_with_bad_fullnames(self): + """ + Check that an exception is raised if the full name format is wrong. + """ for fullname in ["", "test", "test:test"]: with self.subTest(fullname=fullname): with self.assertRaises(AssertionError, msg="Name format error"): -- cgit v1.2.3 From 243d76bd5cdb989ee7d3311e44aafb7e8f7da712 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 9 Aug 2021 14:25:49 +0300 Subject: Set up the trait dataset type correctly * gn3/db/traits.py: setup `trait_dataset_type` * tests/unit/db/test_traits.py: fix tests The type ('Temp', 'Geno', 'Publish', and 'ProbeSet') relate to a trait's dataset, and not the trait itself. This commit updates the code to take this into consideration. The dataset type is also set up from a trait's full name, therefore this commit removes the `trait_type` argument from the `retrieve_trait_info` function. --- gn3/db/traits.py | 33 ++++++++++++++++++++++++--------- tests/unit/db/test_traits.py | 27 ++++++++++++--------------- 2 files changed, 36 insertions(+), 24 deletions(-) (limited to 'tests/unit') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index a740352..6ea24be 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -326,10 +326,23 @@ def build_trait_name(trait_fullname): """ Initialises the trait's name, and other values from the search data provided """ + def dataset_type(dset_name): + if dset_name.find('Temp') >= 0: + return "Temp" + if dset_name.find('Geno') >= 0: + return "Geno" + if dset_name.find('Publish') >= 0: + return "Publish" + return "ProbeSet" + name_parts = trait_fullname.split("::") assert len(name_parts) >= 2, "Name format error" + dataset_name = name_parts[0] + dataset_type = dataset_type(dataset_name) return { - "db": {"dataset_name": name_parts[0]}, + "db": { + "dataset_name": dataset_name, + "dataset_type": dataset_type}, "trait_fullname": trait_fullname, "trait_name": name_parts[1], "cellid": name_parts[2] if len(name_parts) == 3 else "" @@ -357,7 +370,7 @@ def retrieve_probeset_sequence(trait, conn): return {**trait, "sequence": seq[0] if seq else ""} def retrieve_trait_info( - trait_type: str, threshold: int, trait_full_name: str, conn: Any, + threshold: int, trait_full_name: str, conn: Any, qtl=None): """Retrieves the trait information. @@ -366,6 +379,7 @@ def retrieve_trait_info( This function, or the dependent functions, might be incomplete as they are currently.""" trait = build_trait_name(trait_full_name) + trait_dataset_type = trait["db"]["dataset_type"] trait_info_function_table = { "Publish": retrieve_publish_trait_info, "ProbeSet": retrieve_probeset_trait_info, @@ -374,14 +388,14 @@ def retrieve_trait_info( } common_post_processing_fn = compose( - lambda ti: load_qtl_info(qtl, trait_type, ti, conn), - lambda ti: set_homologene_id_field(trait_type, ti, conn), - lambda ti: {"trait_type": trait_type, **ti}, + lambda ti: load_qtl_info(qtl, trait_dataset_type, ti, conn), + lambda ti: set_homologene_id_field(trait_dataset_type, ti, conn), + lambda ti: {"trait_type": trait_dataset_type, **ti}, lambda ti: {**trait, **ti}) trait_post_processing_functions_table = { "Publish": compose( - lambda ti: set_confidential_field(trait_type, ti), + lambda ti: set_confidential_field(trait_dataset_type, ti), common_post_processing_fn), "ProbeSet": compose( lambda ti: retrieve_probeset_sequence(ti, conn), @@ -391,9 +405,10 @@ def retrieve_trait_info( } retrieve_info = compose( - set_haveinfo_field, trait_info_function_table[trait_type]) + set_haveinfo_field, trait_info_function_table[trait_dataset_type]) - trait_dataset = retrieve_trait_dataset(trait_type, trait, threshold, conn) + trait_dataset = retrieve_trait_dataset( + trait_dataset_type, trait, threshold, conn) trait_info = retrieve_info( { "trait_name": trait["trait_name"], @@ -403,7 +418,7 @@ def retrieve_trait_info( conn) if trait_info["haveinfo"]: return { - **trait_post_processing_functions_table[trait_type](trait_info), + **trait_post_processing_functions_table[trait_dataset_type](trait_info), "db": {**trait["db"], **trait_dataset}, "riset": trait_dataset["riset"] } diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index d9d7bbb..ee98893 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -126,11 +126,12 @@ class TestTraitsDBFunctions(TestCase): """ for fullname, expected in [ ["testdb::testname", - {"db": {"dataset_name": "testdb"}, "trait_name": "testname", - "cellid": "", "trait_fullname": "testdb::testname"}], + {"db": {"dataset_name": "testdb", "dataset_type": "ProbeSet"}, + "trait_name": "testname", "cellid": "", + "trait_fullname": "testdb::testname"}], ["testdb::testname::testcell", - {"db": {"dataset_name": "testdb"}, "trait_name": "testname", - "cellid": "testcell", + {"db": {"dataset_name": "testdb", "dataset_type": "ProbeSet"}, + "trait_name": "testname", "cellid": "testcell", "trait_fullname": "testdb::testname::testcell"}]]: with self.subTest(fullname=fullname): self.assertEqual(build_trait_name(fullname), expected) @@ -146,22 +147,18 @@ class TestTraitsDBFunctions(TestCase): def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" - for trait_type, threshold, trait_fullname, expected in [ - ["Publish", 9, "pubDb::PublishTraitName::pubCell", - {"haveinfo": 0}], - ["ProbeSet", 5, "prbDb::ProbeSetTraitName::prbCell", - {"haveinfo": 0}], - ["Geno", 12, "genDb::GenoTraitName", - {"haveinfo": 0}], - ["Temp", 6, "tmpDb::TempTraitName", - {"haveinfo": 0}]]: + for threshold, trait_fullname, expected in [ + [9, "pubDb::PublishTraitName::pubCell", {"haveinfo": 0}], + [5, "prbDb::ProbeSetTraitName::prbCell", {"haveinfo": 0}], + [12, "genDb::GenoTraitName", {"haveinfo": 0}], + [6, "tmpDb::TempTraitName", {"haveinfo": 0}]]: db_mock = mock.MagicMock() - with self.subTest(trait_type=trait_type): + with self.subTest(trait_fullname=trait_fullname): with db_mock.cursor() as cursor: cursor.fetchone.return_value = tuple() self.assertEqual( retrieve_trait_info( - trait_type, threshold, trait_fullname, db_mock), + threshold, trait_fullname, db_mock), expected) def test_update_sample_data(self): -- cgit v1.2.3