diff options
author | Muriithi Frederick Muriuki | 2021-07-30 08:29:38 +0300 |
---|---|---|
committer | Muriithi Frederick Muriuki | 2021-07-30 08:29:38 +0300 |
commit | c4f362d9a9b83f4fc6fadde0989663dd34fb0b07 (patch) | |
tree | 1bd75a20f0443143d806a8364d6f99b711d3d20e | |
parent | 75ba10b9f7e8c5c7fabbd0f4134a1475cc180ae1 (diff) | |
download | genenetwork3-c4f362d9a9b83f4fc6fadde0989663dd34fb0b07.tar.gz |
Return dict from query functions
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
* gn3/db/traits.py: return dicts rather than tuples/list
* tests/unit/db/test_traits.py: Update tests
Return dicts with the key-value pairs set up so as to ease with the data
manipulation down the pipeline.
This is also useful to help with the retrieval of all other extra
information that was left out in the first iteration.
This commit also updates the tests by ensuring they expect dicts rather than
tuples.
-rw-r--r-- | gn3/db/traits.py | 141 | ||||
-rw-r--r-- | tests/unit/db/test_traits.py | 83 |
2 files changed, 140 insertions, 84 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ae1939a..9742fa2 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -100,119 +100,128 @@ def retrieve_trait_dataset_name( cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() -PUBLISH_TRAIT_INFO_QUERY = ( - "SELECT " - "PublishXRef.Id, Publication.PubMed_ID, " - "Phenotype.Pre_publication_description, " - "Phenotype.Post_publication_description, " - "Phenotype.Original_description, " - "Phenotype.Pre_publication_abbreviation, " - "Phenotype.Post_publication_abbreviation, " - "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " - "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " - "Publication.Title, Publication.Abstract, Publication.Journal, " - "Publication.Volume, Publication.Pages, Publication.Month, " - "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " - "PublishXRef.comments " - "FROM " - "PublishXRef, Publication, Phenotype, PublishFreeze " - "WHERE " - "PublishXRef.Id = %(trait_name)s AND " - "Phenotype.Id = PublishXRef.PhenotypeId AND " - "Publication.Id = PublishXRef.PublicationId AND " - "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " - "PublishFreeze.Id =%(trait_dataset_id)s") - def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + keys = ( + "Id", "PubMed_ID", "Pre_publication_description", + "Post_publication_description", "Original_description", + "Pre_publication_abbreviation", "Post_publication_abbreviation", + "Lab_code", "Submitter", "Owner", "Authorized_Users", "Authors", + "Title", "Abstract", "Journal", "Volume", "Pages", "Month", "Year", + "Sequence", "Units", "comments") + columns = ( + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments") + query = ( + "SELECT " + "{columns} " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s").format( + columns = columns) with conn.cursor() as cursor: cursor.execute( - PUBLISH_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_id"] }) - return cursor.fetchone() - -PROBESET_TRAIT_INFO_QUERY = ( - "SELECT " - "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " - "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " - "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " - "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " - "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " - "ProbeSet.strand_probe, ProbeSet.strand_gene, " - "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " - "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " - "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " - "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " - "ProbeSet.flag " - "FROM " - "ProbeSet, ProbeSetFreeze, ProbeSetXRef " - "WHERE " - "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " - "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " - "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " - "ProbeSet.Name = %(trait_name)s") + return dict(zip((k.lower() for k in keys), cursor.fetchone())) def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + keys = ( + "name", "symbol", "description", "probe_target_description", "chr", + "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", + "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", + "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", + "probe_set_specificity", "probe_set_blat_score", + "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand", + "probe_set_note_by_rw", "flag") + query = ( + "SELECT " + "{columns} " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s").format( + columns = ", ".join(["ProbeSet.{}".format(x) for x in keys])) with conn.cursor() as cursor: cursor.execute( - PROBESET_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_name"] }) - return cursor.fetchone() - -GENO_TRAIT_INFO_QUERY = ( - "SELECT " - "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " - "FROM " - "Geno, GenoFreeze, GenoXRef " - "WHERE " - "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " - "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + return dict(zip(keys, cursor.fetchone())) def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Geno` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + keys = ("name", "chr", "mb", "source2", "sequence") + query = ( + "SELECT " + "{columns} " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND " + "Geno.Name = %(trait_name)s").format( + columns = ", ".join(["Geno.{}".format(x) for x in keys])) with conn.cursor() as cursor: cursor.execute( - GENO_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_name"] }) - return cursor.fetchone() - -TEMP_TRAIT_INFO_QUERY = ( - "SELECT name, description FROM Temp " - "WHERE Name = %(trait_name)s") + return dict(zip(keys, cursor.fetchone())) def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Temp` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + keys = ("name", "description") + query = ( + "SELECT {columns} FROM Temp " + "WHERE Name = %(trait_name)s").format(columns = ", ".join(keys)) with conn.cursor() as cursor: cursor.execute( - TEMP_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name"] }) - return cursor.fetchone() + return dict(zip(keys, cursor.fetchone())) def retrieve_trait_info( trait_type: str, trait_name: str, trait_dataset_id: int, - trait_dataset_name: str, conn: Any): + trait_dataset_name: str, conn: Any, QTL = None): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index c8f28b5..393983d 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,11 +1,6 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase from gn3.db.traits import ( - GENO_TRAIT_INFO_QUERY, - TEMP_TRAIT_INFO_QUERY, - PUBLISH_TRAIT_INFO_QUERY, - PROBESET_TRAIT_INFO_QUERY) -from gn3.db.traits import ( retrieve_trait_info, retrieve_geno_trait_info, retrieve_temp_trait_info, @@ -14,7 +9,6 @@ from gn3.db.traits import ( retrieve_probeset_trait_info, update_sample_data) - class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" @@ -54,12 +48,32 @@ class TestTraitsDBFunctions(TestCase): trait_source = { "trait_name": "PublishTraitName", "trait_dataset_id": 1} self.assertEqual( - retrieve_publish_trait_info( - trait_source, - db_mock), - tuple()) + retrieve_publish_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - PUBLISH_TRAIT_INFO_QUERY, trait_source) + ("SELECT " + "PublishXRef.Id, Publication.PubMed_ID," + " Phenotype.Pre_publication_description," + " Phenotype.Post_publication_description," + " Phenotype.Original_description," + " Phenotype.Pre_publication_abbreviation," + " Phenotype.Post_publication_abbreviation," + " Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner," + " Phenotype.Authorized_Users," + " CAST(Publication.Authors AS BINARY)," + " Publication.Title, Publication.Abstract," + " Publication.Journal," + " Publication.Volume, Publication.Pages, Publication.Month," + " Publication.Year, PublishXRef.Sequence, Phenotype.Units," + " PublishXRef.comments" + " FROM" + " PublishXRef, Publication, Phenotype, PublishFreeze" + " WHERE" + " PublishXRef.Id = %(trait_name)s " + " AND Phenotype.Id = PublishXRef.PhenotypeId" + " AND Publication.Id = PublishXRef.PublicationId" + " AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId" + " AND PublishFreeze.Id =%(trait_dataset_id)s"), + trait_source) def test_retrieve_probeset_trait_info(self): """Test retrieval of type `Probeset` traits.""" @@ -70,9 +84,31 @@ class TestTraitsDBFunctions(TestCase): "trait_name": "ProbeSetTraitName", "trait_dataset_name": "ProbeSetDatasetTraitName"} self.assertEqual( - retrieve_probeset_trait_info(trait_source, db_mock), tuple()) + retrieve_probeset_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - PROBESET_TRAIT_INFO_QUERY, trait_source) + ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, " + "ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid, " + "ProbeSet.genbankid, ProbeSet.unigeneid, ProbeSet.omim, " + "ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, " + "ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, " + "ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSetFreeze.Name = %(trait_dataset_name)s " + "AND ProbeSet.Name = %(trait_name)s"), trait_source) def test_retrieve_geno_trait_info(self): """Test retrieval of type `Geno` traits.""" @@ -83,9 +119,19 @@ class TestTraitsDBFunctions(TestCase): "trait_name": "GenoTraitName", "trait_dataset_name": "GenoDatasetTraitName"} self.assertEqual( - retrieve_geno_trait_info(trait_source, db_mock), tuple()) + retrieve_geno_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - GENO_TRAIT_INFO_QUERY, trait_source) + ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id " + "AND GenoXRef.GenoId = Geno.Id " + "AND GenoFreeze.Name = %(trait_dataset_name)s " + "AND Geno.Name = %(trait_name)s"), + trait_source) def test_retrieve_temp_trait_info(self): """Test retrieval of type `Temp` traits.""" @@ -94,9 +140,10 @@ class TestTraitsDBFunctions(TestCase): cursor.fetchone.return_value = tuple() trait_source = {"trait_name": "TempTraitName"} self.assertEqual( - retrieve_temp_trait_info(trait_source, db_mock), tuple()) + retrieve_temp_trait_info(trait_source, db_mock), {}) cursor.execute.assert_called_once_with( - TEMP_TRAIT_INFO_QUERY, trait_source) + "SELECT name, description FROM Temp WHERE Name = %(trait_name)s", + trait_source) def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" @@ -113,7 +160,7 @@ class TestTraitsDBFunctions(TestCase): retrieve_trait_info( trait_type, trait_name, trait_dataset_id, trait_dataset_name, db_mock), - tuple()) + {}) def test_update_sample_data(self): """Test that the SQL queries when calling update_sample_data are called with |