aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-07-30 08:29:38 +0300
committerMuriithi Frederick Muriuki2021-07-30 08:29:38 +0300
commitc4f362d9a9b83f4fc6fadde0989663dd34fb0b07 (patch)
tree1bd75a20f0443143d806a8364d6f99b711d3d20e
parent75ba10b9f7e8c5c7fabbd0f4134a1475cc180ae1 (diff)
downloadgenenetwork3-c4f362d9a9b83f4fc6fadde0989663dd34fb0b07.tar.gz
Return dict from query functions
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: return dicts rather than tuples/list * tests/unit/db/test_traits.py: Update tests Return dicts with the key-value pairs set up so as to ease with the data manipulation down the pipeline. This is also useful to help with the retrieval of all other extra information that was left out in the first iteration. This commit also updates the tests by ensuring they expect dicts rather than tuples.
-rw-r--r--gn3/db/traits.py141
-rw-r--r--tests/unit/db/test_traits.py83
2 files changed, 140 insertions, 84 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index ae1939a..9742fa2 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -100,119 +100,128 @@ def retrieve_trait_dataset_name(
cursor.execute(query, {"threshold": threshold, "name": name})
return cursor.fetchone()
-PUBLISH_TRAIT_INFO_QUERY = (
- "SELECT "
- "PublishXRef.Id, Publication.PubMed_ID, "
- "Phenotype.Pre_publication_description, "
- "Phenotype.Post_publication_description, "
- "Phenotype.Original_description, "
- "Phenotype.Pre_publication_abbreviation, "
- "Phenotype.Post_publication_abbreviation, "
- "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, "
- "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), "
- "Publication.Title, Publication.Abstract, Publication.Journal, "
- "Publication.Volume, Publication.Pages, Publication.Month, "
- "Publication.Year, PublishXRef.Sequence, Phenotype.Units, "
- "PublishXRef.comments "
- "FROM "
- "PublishXRef, Publication, Phenotype, PublishFreeze "
- "WHERE "
- "PublishXRef.Id = %(trait_name)s AND "
- "Phenotype.Id = PublishXRef.PhenotypeId AND "
- "Publication.Id = PublishXRef.PublicationId AND "
- "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND "
- "PublishFreeze.Id =%(trait_dataset_id)s")
-
def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any):
"""Retrieve trait information for type `Publish` traits.
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421"""
+ keys = (
+ "Id", "PubMed_ID", "Pre_publication_description",
+ "Post_publication_description", "Original_description",
+ "Pre_publication_abbreviation", "Post_publication_abbreviation",
+ "Lab_code", "Submitter", "Owner", "Authorized_Users", "Authors",
+ "Title", "Abstract", "Journal", "Volume", "Pages", "Month", "Year",
+ "Sequence", "Units", "comments")
+ columns = (
+ "PublishXRef.Id, Publication.PubMed_ID, "
+ "Phenotype.Pre_publication_description, "
+ "Phenotype.Post_publication_description, "
+ "Phenotype.Original_description, "
+ "Phenotype.Pre_publication_abbreviation, "
+ "Phenotype.Post_publication_abbreviation, "
+ "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, "
+ "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), "
+ "Publication.Title, Publication.Abstract, Publication.Journal, "
+ "Publication.Volume, Publication.Pages, Publication.Month, "
+ "Publication.Year, PublishXRef.Sequence, Phenotype.Units, "
+ "PublishXRef.comments")
+ query = (
+ "SELECT "
+ "{columns} "
+ "FROM "
+ "PublishXRef, Publication, Phenotype, PublishFreeze "
+ "WHERE "
+ "PublishXRef.Id = %(trait_name)s AND "
+ "Phenotype.Id = PublishXRef.PhenotypeId AND "
+ "Publication.Id = PublishXRef.PublicationId AND "
+ "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND "
+ "PublishFreeze.Id =%(trait_dataset_id)s").format(
+ columns = columns)
with conn.cursor() as cursor:
cursor.execute(
- PUBLISH_TRAIT_INFO_QUERY,
+ query,
{
k:v for k, v in trait_data_source.items()
if k in ["trait_name", "trait_dataset_id"]
})
- return cursor.fetchone()
-
-PROBESET_TRAIT_INFO_QUERY = (
- "SELECT "
- "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, "
- "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, "
- "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, "
- "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, "
- "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, "
- "ProbeSet.strand_probe, ProbeSet.strand_gene, "
- "ProbeSet.probe_set_target_region, ProbeSet.proteinid, "
- "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, "
- "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, "
- "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, "
- "ProbeSet.flag "
- "FROM "
- "ProbeSet, ProbeSetFreeze, ProbeSetXRef "
- "WHERE "
- "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND "
- "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND "
- "ProbeSetFreeze.Name = %(trait_dataset_name)s AND "
- "ProbeSet.Name = %(trait_name)s")
+ return dict(zip((k.lower() for k in keys), cursor.fetchone()))
def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any):
"""Retrieve trait information for type `ProbeSet` traits.
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435"""
+ keys = (
+ "name", "symbol", "description", "probe_target_description", "chr",
+ "mb", "alias", "geneid", "genbankid", "unigeneid", "omim",
+ "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments",
+ "strand_probe", "strand_gene", "probe_set_target_region", "proteinid",
+ "probe_set_specificity", "probe_set_blat_score",
+ "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand",
+ "probe_set_note_by_rw", "flag")
+ query = (
+ "SELECT "
+ "{columns} "
+ "FROM "
+ "ProbeSet, ProbeSetFreeze, ProbeSetXRef "
+ "WHERE "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND "
+ "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND "
+ "ProbeSetFreeze.Name = %(trait_dataset_name)s AND "
+ "ProbeSet.Name = %(trait_name)s").format(
+ columns = ", ".join(["ProbeSet.{}".format(x) for x in keys]))
with conn.cursor() as cursor:
cursor.execute(
- PROBESET_TRAIT_INFO_QUERY,
+ query,
{
k:v for k, v in trait_data_source.items()
if k in ["trait_name", "trait_dataset_name"]
})
- return cursor.fetchone()
-
-GENO_TRAIT_INFO_QUERY = (
- "SELECT "
- "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence "
- "FROM "
- "Geno, GenoFreeze, GenoXRef "
- "WHERE "
- "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND "
- "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s")
+ return dict(zip(keys, cursor.fetchone()))
def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any):
"""Retrieve trait information for type `Geno` traits.
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449"""
+ keys = ("name", "chr", "mb", "source2", "sequence")
+ query = (
+ "SELECT "
+ "{columns} "
+ "FROM "
+ "Geno, GenoFreeze, GenoXRef "
+ "WHERE "
+ "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND "
+ "GenoFreeze.Name = %(trait_dataset_name)s AND "
+ "Geno.Name = %(trait_name)s").format(
+ columns = ", ".join(["Geno.{}".format(x) for x in keys]))
with conn.cursor() as cursor:
cursor.execute(
- GENO_TRAIT_INFO_QUERY,
+ query,
{
k:v for k, v in trait_data_source.items()
if k in ["trait_name", "trait_dataset_name"]
})
- return cursor.fetchone()
-
-TEMP_TRAIT_INFO_QUERY = (
- "SELECT name, description FROM Temp "
- "WHERE Name = %(trait_name)s")
+ return dict(zip(keys, cursor.fetchone()))
def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any):
"""Retrieve trait information for type `Temp` traits.
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452"""
+ keys = ("name", "description")
+ query = (
+ "SELECT {columns} FROM Temp "
+ "WHERE Name = %(trait_name)s").format(columns = ", ".join(keys))
with conn.cursor() as cursor:
cursor.execute(
- TEMP_TRAIT_INFO_QUERY,
+ query,
{
k:v for k, v in trait_data_source.items()
if k in ["trait_name"]
})
- return cursor.fetchone()
+ return dict(zip(keys, cursor.fetchone()))
def retrieve_trait_info(
trait_type: str, trait_name: str, trait_dataset_id: int,
- trait_dataset_name: str, conn: Any):
+ trait_dataset_name: str, conn: Any, QTL = None):
"""Retrieves the trait information.
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index c8f28b5..393983d 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -1,11 +1,6 @@
"""Tests for gn3/db/traits.py"""
from unittest import mock, TestCase
from gn3.db.traits import (
- GENO_TRAIT_INFO_QUERY,
- TEMP_TRAIT_INFO_QUERY,
- PUBLISH_TRAIT_INFO_QUERY,
- PROBESET_TRAIT_INFO_QUERY)
-from gn3.db.traits import (
retrieve_trait_info,
retrieve_geno_trait_info,
retrieve_temp_trait_info,
@@ -14,7 +9,6 @@ from gn3.db.traits import (
retrieve_probeset_trait_info,
update_sample_data)
-
class TestTraitsDBFunctions(TestCase):
"Test cases for traits functions"
@@ -54,12 +48,32 @@ class TestTraitsDBFunctions(TestCase):
trait_source = {
"trait_name": "PublishTraitName", "trait_dataset_id": 1}
self.assertEqual(
- retrieve_publish_trait_info(
- trait_source,
- db_mock),
- tuple())
+ retrieve_publish_trait_info(trait_source, db_mock), {})
cursor.execute.assert_called_once_with(
- PUBLISH_TRAIT_INFO_QUERY, trait_source)
+ ("SELECT "
+ "PublishXRef.Id, Publication.PubMed_ID,"
+ " Phenotype.Pre_publication_description,"
+ " Phenotype.Post_publication_description,"
+ " Phenotype.Original_description,"
+ " Phenotype.Pre_publication_abbreviation,"
+ " Phenotype.Post_publication_abbreviation,"
+ " Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner,"
+ " Phenotype.Authorized_Users,"
+ " CAST(Publication.Authors AS BINARY),"
+ " Publication.Title, Publication.Abstract,"
+ " Publication.Journal,"
+ " Publication.Volume, Publication.Pages, Publication.Month,"
+ " Publication.Year, PublishXRef.Sequence, Phenotype.Units,"
+ " PublishXRef.comments"
+ " FROM"
+ " PublishXRef, Publication, Phenotype, PublishFreeze"
+ " WHERE"
+ " PublishXRef.Id = %(trait_name)s "
+ " AND Phenotype.Id = PublishXRef.PhenotypeId"
+ " AND Publication.Id = PublishXRef.PublicationId"
+ " AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId"
+ " AND PublishFreeze.Id =%(trait_dataset_id)s"),
+ trait_source)
def test_retrieve_probeset_trait_info(self):
"""Test retrieval of type `Probeset` traits."""
@@ -70,9 +84,31 @@ class TestTraitsDBFunctions(TestCase):
"trait_name": "ProbeSetTraitName",
"trait_dataset_name": "ProbeSetDatasetTraitName"}
self.assertEqual(
- retrieve_probeset_trait_info(trait_source, db_mock), tuple())
+ retrieve_probeset_trait_info(trait_source, db_mock), {})
cursor.execute.assert_called_once_with(
- PROBESET_TRAIT_INFO_QUERY, trait_source)
+ (
+ "SELECT "
+ "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, "
+ "ProbeSet.probe_target_description, ProbeSet.chr, "
+ "ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid, "
+ "ProbeSet.genbankid, ProbeSet.unigeneid, ProbeSet.omim, "
+ "ProbeSet.refseq_transcriptid, ProbeSet.blatseq, "
+ "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, "
+ "ProbeSet.strand_probe, ProbeSet.strand_gene, "
+ "ProbeSet.probe_set_target_region, ProbeSet.proteinid, "
+ "ProbeSet.probe_set_specificity, "
+ "ProbeSet.probe_set_blat_score, "
+ "ProbeSet.probe_set_blat_mb_start, "
+ "ProbeSet.probe_set_blat_mb_end, "
+ "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, "
+ "ProbeSet.flag "
+ "FROM "
+ "ProbeSet, ProbeSetFreeze, ProbeSetXRef "
+ "WHERE "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+ "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id "
+ "AND ProbeSetFreeze.Name = %(trait_dataset_name)s "
+ "AND ProbeSet.Name = %(trait_name)s"), trait_source)
def test_retrieve_geno_trait_info(self):
"""Test retrieval of type `Geno` traits."""
@@ -83,9 +119,19 @@ class TestTraitsDBFunctions(TestCase):
"trait_name": "GenoTraitName",
"trait_dataset_name": "GenoDatasetTraitName"}
self.assertEqual(
- retrieve_geno_trait_info(trait_source, db_mock), tuple())
+ retrieve_geno_trait_info(trait_source, db_mock), {})
cursor.execute.assert_called_once_with(
- GENO_TRAIT_INFO_QUERY, trait_source)
+ (
+ "SELECT "
+ "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence "
+ "FROM "
+ "Geno, GenoFreeze, GenoXRef "
+ "WHERE "
+ "GenoXRef.GenoFreezeId = GenoFreeze.Id "
+ "AND GenoXRef.GenoId = Geno.Id "
+ "AND GenoFreeze.Name = %(trait_dataset_name)s "
+ "AND Geno.Name = %(trait_name)s"),
+ trait_source)
def test_retrieve_temp_trait_info(self):
"""Test retrieval of type `Temp` traits."""
@@ -94,9 +140,10 @@ class TestTraitsDBFunctions(TestCase):
cursor.fetchone.return_value = tuple()
trait_source = {"trait_name": "TempTraitName"}
self.assertEqual(
- retrieve_temp_trait_info(trait_source, db_mock), tuple())
+ retrieve_temp_trait_info(trait_source, db_mock), {})
cursor.execute.assert_called_once_with(
- TEMP_TRAIT_INFO_QUERY, trait_source)
+ "SELECT name, description FROM Temp WHERE Name = %(trait_name)s",
+ trait_source)
def test_retrieve_trait_info(self):
"""Test that information on traits is retrieved as appropriate."""
@@ -113,7 +160,7 @@ class TestTraitsDBFunctions(TestCase):
retrieve_trait_info(
trait_type, trait_name, trait_dataset_id,
trait_dataset_name, db_mock),
- tuple())
+ {})
def test_update_sample_data(self):
"""Test that the SQL queries when calling update_sample_data are called with