From e84861caf897d8f6f371d2797651f7c0a2787cea Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 26 Jul 2021 12:45:35 +0300 Subject: db: traits: Remove unused functions --- gn3/db/traits.py | 91 -------------------------------------------------------- 1 file changed, 91 deletions(-) (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4860a07..a773fb2 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,92 +1 @@ -"""This contains all the necessary functions that are required to add traits -to the published database""" -from dataclasses import dataclass -from typing import Any, Dict, Optional - - -@dataclass(frozen=True) -class Riset: - """Class for keeping track of riset. A riset is a group e.g. rat HSNIH-Palmer, -BXD - - """ - name: Optional[str] - r_id: Optional[int] - - -@dataclass(frozen=True) -class WebqtlCaseData: - """Class for keeping track of one case data in one trait""" - value: Optional[float] = None - variance: Optional[float] = None - count: Optional[int] = None # Number of Individuals - - def __str__(self): - _str = "" - if self.value: - _str += f"value={self.value:.3f}" - if self.variance: - _str += f" variance={self.variance:.3f}" - if self.count: - _str += " n_data={self.count}" - return _str - - -def lookup_webqtldataset_name(riset_name: str, conn: Any): - """Given a group name(riset), return it's name e.g. BXDPublish, -HLCPublish.""" with conn.cursor() as cursor: - cursor.execute( - "SELECT PublishFreeze.Name FROM " - "PublishFreeze, InbredSet WHERE " - "PublishFreeze.InbredSetId = InbredSet.Id " - "AND InbredSet.Name = '%s'" % riset_name) - _result, *_ = cursor.fetchone() - return _result - - -def get_riset(data_type: str, name: str, conn: Any): - """Get the groups given the data type and it's PublishFreeze or GenoFreeze -name - - """ - query, _name, _id = None, None, None - if data_type == "Publish": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, " - "PublishFreeze WHERE PublishFreeze.InbredSetId = " - "InbredSet.Id AND PublishFreeze.Name = '%s'" % name) - elif data_type == "Geno": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, " - "GenoFreeze WHERE GenoFreeze.InbredSetId = " - "InbredSet.Id AND GenoFreeze.Name = '%s'" % name) - elif data_type == "ProbeSet": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM " - "InbredSet, ProbeSetFreeze, ProbeFreeze WHERE " - "ProbeFreeze.InbredSetId = InbredSet.Id AND " - "ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND " - "ProbeSetFreeze.Name = '%s'" % name) - if query: - with conn.cursor() as cursor: - _name, _id = cursor.fetchone() - if _name == "BXD300": - _name = "BXD" - return Riset(_name, _id) - - -def insert_publication(pubmed_id: int, publication: Optional[Dict], - conn: Any): - """Creates a new publication record if it's not available""" - sql = ("SELECT Id FROM Publication where " - "PubMed_ID = %d" % pubmed_id) - _id = None - with conn.cursor() as cursor: - cursor.execute(sql) - _id = cursor.fetchone() - if not _id and publication: - # The Publication contains the fields: 'authors', 'title', 'abstract', - # 'journal','volume','pages','month','year' - insert_query = ("INSERT into Publication (%s) Values (%s)" % - (", ".join(publication.keys()), - ", ".join(['%s'] * len(publication)))) - with conn.cursor() as cursor: - cursor.execute(insert_query, tuple(publication.values())) -- cgit v1.2.3 From 9fe0f711a6b9ba8149ad26ffe2ff8cc1f686d90b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 26 Jul 2021 12:45:47 +0300 Subject: db: traits: Fetch sample_data from a trait in csv form --- gn3/db/traits.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index a773fb2..ec4fa5a 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1 +1,27 @@ +"""This class contains functions relating to trait data manipulation""" +from typing import Any + + +def get_trait_csv_sample_data(conn: Any, + trait_name: int, phenotype_id: int): + """Fetch a trait and return it as a csv string""" + sql = ("SELECT Strain.Id, PublishData.Id, Strain.Name, " + "PublishData.value, " + "PublishSE.error, NStrain.count FROM " + "(PublishData, Strain, PublishXRef, PublishFreeze) " + "LEFT JOIN PublishSE ON " + "(PublishSE.DataId = PublishData.Id AND " + "PublishSE.StrainId = PublishData.StrainId) " + "LEFT JOIN NStrain ON (NStrain.DataId = PublishData.Id AND " + "NStrain.StrainId = PublishData.StrainId) WHERE " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishData.Id = PublishXRef.DataId AND " + "PublishXRef.Id = %s AND PublishXRef.PhenotypeId = %s " + "AND PublishData.StrainId = Strain.Id Order BY Strain.Name") + csv_data = ["Strain Id, Publish Data Id,Strain Name,Value,SE,Count"] with conn.cursor() as cursor: + cursor.execute(sql, (trait_name, phenotype_id,)) + for record in cursor.fetchall(): + csv_data.append(",".join([str(val) if val else "x" + for val in record])) + return "\n".join(csv_data) -- cgit v1.2.3 From f5d83ab0e6db9ed5fab2a97695fee698ed484f9a Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 27 Jul 2021 11:35:23 +0300 Subject: db: traits: Remove publishdata column --- gn3/db/traits.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ec4fa5a..4baac67 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -18,10 +18,15 @@ def get_trait_csv_sample_data(conn: Any, "PublishData.Id = PublishXRef.DataId AND " "PublishXRef.Id = %s AND PublishXRef.PhenotypeId = %s " "AND PublishData.StrainId = Strain.Id Order BY Strain.Name") - csv_data = ["Strain Id, Publish Data Id,Strain Name,Value,SE,Count"] + csv_data = ["Strain Id,Strain Name,Value,SE,Count"] + publishdata_id = "" with conn.cursor() as cursor: cursor.execute(sql, (trait_name, phenotype_id,)) for record in cursor.fetchall(): - csv_data.append(",".join([str(val) if val else "x" - for val in record])) - return "\n".join(csv_data) + (strain_id, publishdata_id, + strain_name, value, error, count) = record + csv_data.append( + ",".join([str(val) if val else "x" + for val in (strain_id, strain_name, + value, error, count)])) + return f"# Publish Data Id: {publishdata_id}\n\n" + "\n".join(csv_data) -- cgit v1.2.3 From 02791f15d6b4940ae8be07fe9d4f8487d8291c78 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 09:42:13 +0300 Subject: Retrieve 'ProbeSet' trait name Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: new function (retrieve_probeset_trait_name) * tests/unit/db/test_traits.py: test(s) for new function Add a function to retrieve the name of a 'ProbeSet' trait in a manner similar to genenetwork1's retrieval of the same, as implemented here https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154 Unlike in genenetwork1, we do not mutate an object, instead, we return the values as retrieved from the database, and the caller will deal with the returned values as appropriate. --- gn3/db/traits.py | 18 ++++++++++++++++++ tests/unit/db/test_traits.py | 22 ++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests/unit/db/test_traits.py (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4860a07..37b111e 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -90,3 +90,21 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], ", ".join(['%s'] * len(publication)))) with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) + +def retrieve_probeset_trait_name(threshold, name, connection): + """ + Retrieve the name for a Probeset trait + + This is extracted from the `webqtlDataset.retrieveName` function, + specifically the section dealing with 'ProbeSet' type traits + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + query = ( + 'SELECT Id, Name, FullName, ShortName, DataScale ' + 'FROM ProbeSetFreeze ' + 'WHERE ' + 'public > %(threshold)s ' + 'AND ' + '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + with connection.cursor() as cursor: + cursor.execute(query, {"threshold": threshold, "name": name}) + return cursor.fetchone() diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py new file mode 100644 index 0000000..6d2ba4d --- /dev/null +++ b/tests/unit/db/test_traits.py @@ -0,0 +1,22 @@ +"""Tests for gn3/db/traits.py""" +from unittest import mock, TestCase +from gn3.db.traits import retrieve_probeset_trait_name + +class TestTraitsDBFunctions(TestCase): + "Test cases for traits functions" + + def test_retrieve_probeset_trait_name(self): + """Test that the function is called correctly.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", "dataScale") + self.assertEqual( + retrieve_probeset_trait_name(9, "testName", db_mock), + ("testName", "testNameFull", "testNameShort", "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT Id, Name, FullName, ShortName, DataScale " + "FROM ProbeSetFreeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", + {"threshold": 9, "name": "testName"}) -- cgit v1.2.3 From 8d7f8eec5b5d84937e453c9b02de0bd1b1727265 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 10:20:18 +0300 Subject: Make name retrieval more general Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: make function more general * tests/unit/db/test_traits.py: parametrize the tests Make the name retrieval more general for the different types of traits by changing the column specification and table as appropriate. --- gn3/db/traits.py | 26 +++++++++++++++----------- tests/unit/db/test_traits.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 25 deletions(-) (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 37b111e..fddb8be 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,20 +91,24 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_probeset_trait_name(threshold, name, connection): +def retrieve_type_trait_name(trait_type, threshold, name, connection): """ - Retrieve the name for a Probeset trait + Retrieve the name of a trait given the trait's name - This is extracted from the `webqtlDataset.retrieveName` function, - specifically the section dealing with 'ProbeSet' type traits - https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + This is extracted from the `webqtlDataset.retrieveName` function as is + implemented at + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169 + """ + columns = "Id, Name, FullName, ShortName{}".format( + ", DataScale" if trait_type == "ProbeSet" else "") query = ( - 'SELECT Id, Name, FullName, ShortName, DataScale ' - 'FROM ProbeSetFreeze ' - 'WHERE ' - 'public > %(threshold)s ' - 'AND ' - '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + "SELECT {columns} " + "FROM {trait_type}Freeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)").format( + columns=columns, trait_type=trait_type) with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 6d2ba4d..95c5b27 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,22 +1,34 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_probeset_trait_name +from gn3.db.traits import retrieve_type_trait_name class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" def test_retrieve_probeset_trait_name(self): """Test that the function is called correctly.""" - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - cursor.fetchone.return_value = ( - "testName", "testNameFull", "testNameShort", "dataScale") - self.assertEqual( - retrieve_probeset_trait_name(9, "testName", db_mock), - ("testName", "testNameFull", "testNameShort", "dataScale")) - cursor.execute.assert_called_once_with( - "SELECT Id, Name, FullName, ShortName, DataScale " - "FROM ProbeSetFreeze " - "WHERE public > %(threshold)s AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", - {"threshold": 9, "name": "testName"}) + for trait_type, thresh, trait_name, columns in [ + ["ProbeSet", 9, "testName", + "Id, Name, FullName, ShortName, DataScale"], + ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], + ["Publish", 6, "publishTraitName", + "Id, Name, FullName, ShortName"], + ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", + "dataScale") + self.assertEqual( + retrieve_type_trait_name( + trait_type, thresh, trait_name, db_mock), + ("testName", "testNameFull", "testNameShort", + "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT {cols} " + "FROM {ttype}Freeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( + cols=columns, ttype=trait_type), + {"threshold": thresh, "name": trait_name}) -- cgit v1.2.3 From 00579657abf5f9cadda1a9a479cae63ace28820c Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 12:32:43 +0300 Subject: Retrieve trait information Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: add functions to retrieve traits information * tests/unit/db/test_traits.py: add tests for new function Add functions to retrieve traits information as is done in genenetwork1 https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 At this point, the data retrieval functions are probably incomplete, as there is more of the `retrieveInfo` function in GN1 that has not been considered as of this commit. --- gn3/db/traits.py | 133 ++++++++++++++++++++++++++++++++++++++++++- tests/unit/db/test_traits.py | 92 ++++++++++++++++++++++++++++-- 2 files changed, 218 insertions(+), 7 deletions(-) (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index fddb8be..3c62df8 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,7 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_type_trait_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name(trait_type, threshold, name, connection): """ Retrieve the name of a trait given the trait's name @@ -112,3 +112,134 @@ def retrieve_type_trait_name(trait_type, threshold, name, connection): with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() + +PUBLISH_TRAIT_INFO_QUERY = ( + "SELECT " + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s") + +def retrieve_publish_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Publish` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + with conn.cursor() as cursor: + cursor.execute( + PUBLISH_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_id"] + }) + return cursor.fetchone() + +PROBESET_TRAIT_INFO_QUERY = ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " + "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " + "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s") + +def retrieve_probeset_trait_info(trait_data_source, conn): + """Retrieve trait information for type `ProbeSet` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + with conn.cursor() as cursor: + cursor.execute( + PROBESET_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +GENO_TRAIT_INFO_QUERY = ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + +def retrieve_geno_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Geno` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + with conn.cursor() as cursor: + cursor.execute( + GENO_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +TEMP_TRAIT_INFO_QUERY = ( + "SELECT name, description FROM Temp " + "WHERE Name = %(trait_name)s") + +def retrieve_temp_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Temp` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + with conn.cursor() as cursor: + cursor.execute( + TEMP_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name"] + }) + return cursor.fetchone() + +def retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + """Retrieves the trait information. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 + + This function, or the dependent functions, might be incomplete as they are + currently.""" + trait_info_function_table = { + "Publish": retrieve_publish_trait_info, + "ProbeSet": retrieve_probeset_trait_info, + "Geno": retrieve_geno_trait_info, + "Temp": retrieve_temp_trait_info + } + return trait_info_function_table[trait_type]( + { + "trait_name": trait_name, + "trait_dataset_id": trait_dataset_id, + "trait_dataset_name":trait_dataset_name + }, + conn) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 95c5b27..e3c5c28 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,13 +1,24 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_type_trait_name +from gn3.db.traits import ( + GENO_TRAIT_INFO_QUERY, + TEMP_TRAIT_INFO_QUERY, + PUBLISH_TRAIT_INFO_QUERY, + PROBESET_TRAIT_INFO_QUERY) +from gn3.db.traits import ( + retrieve_trait_info, + retrieve_geno_trait_info, + retrieve_temp_trait_info, + retrieve_trait_dataset_name, + retrieve_publish_trait_info, + retrieve_probeset_trait_info) class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" - def test_retrieve_probeset_trait_name(self): + def test_retrieve_trait_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_name, columns in [ + for trait_type, thresh, trait_dataset_name, columns in [ ["ProbeSet", 9, "testName", "Id, Name, FullName, ShortName, DataScale"], ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], @@ -21,8 +32,8 @@ class TestTraitsDBFunctions(TestCase): "testName", "testNameFull", "testNameShort", "dataScale") self.assertEqual( - retrieve_type_trait_name( - trait_type, thresh, trait_name, db_mock), + retrieve_trait_dataset_name( + trait_type, thresh, trait_dataset_name, db_mock), ("testName", "testNameFull", "testNameShort", "dataScale")) cursor.execute.assert_called_once_with( @@ -31,4 +42,73 @@ class TestTraitsDBFunctions(TestCase): "WHERE public > %(threshold)s AND " "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_name}) + {"threshold": thresh, "name": trait_dataset_name}) + + def test_retrieve_publish_trait_info(self): + """Test retrieval of type `Publish` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "PublishTraitName", "trait_dataset_id": 1} + self.assertEqual( + retrieve_publish_trait_info( + trait_source, + db_mock), + tuple()) + cursor.execute.assert_called_once_with( + PUBLISH_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_probeset_trait_info(self): + """Test retrieval of type `Probeset` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "ProbeSetTraitName", + "trait_dataset_name": "ProbeSetDatasetTraitName"} + self.assertEqual( + retrieve_probeset_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + PROBESET_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_geno_trait_info(self): + """Test retrieval of type `Geno` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "GenoTraitName", + "trait_dataset_name": "GenoDatasetTraitName"} + self.assertEqual( + retrieve_geno_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + GENO_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_temp_trait_info(self): + """Test retrieval of type `Temp` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = {"trait_name": "TempTraitName"} + self.assertEqual( + retrieve_temp_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + TEMP_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_trait_info(self): + """Test that information on traits is retrieved as appropriate.""" + for trait_type, trait_name, trait_dataset_id, trait_dataset_name, in [ + ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName"], + ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName"], + ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName"], + ["Temp", "TempTraitName", 4, "TempDatasetTraitName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + self.assertEqual( + retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, + trait_dataset_name, db_mock), + tuple()) -- cgit v1.2.3 From 13680aa9206e2302760180bab3254182f11dde68 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 29 Jul 2021 12:28:21 +0300 Subject: Add type annotations to the function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add some type annotations to the functions to reduce the chances of bugs creeping into the code. --- gn3/db/traits.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'gn3/db/traits.py') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 3c62df8..f18e16a 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,8 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_trait_dataset_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name( + trait_type: str, threshold: int, name: str, connection: Any): """ Retrieve the name of a trait given the trait's name @@ -136,7 +137,7 @@ PUBLISH_TRAIT_INFO_QUERY = ( "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " "PublishFreeze.Id =%(trait_dataset_id)s") -def retrieve_publish_trait_info(trait_data_source, conn): +def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" @@ -170,7 +171,7 @@ PROBESET_TRAIT_INFO_QUERY = ( "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " "ProbeSet.Name = %(trait_name)s") -def retrieve_probeset_trait_info(trait_data_source, conn): +def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" @@ -192,7 +193,7 @@ GENO_TRAIT_INFO_QUERY = ( "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") -def retrieve_geno_trait_info(trait_data_source, conn): +def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Geno` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" @@ -209,7 +210,7 @@ TEMP_TRAIT_INFO_QUERY = ( "SELECT name, description FROM Temp " "WHERE Name = %(trait_name)s") -def retrieve_temp_trait_info(trait_data_source, conn): +def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Temp` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" @@ -223,7 +224,8 @@ def retrieve_temp_trait_info(trait_data_source, conn): return cursor.fetchone() def retrieve_trait_info( - trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + trait_type: str, trait_name: str, trait_dataset_id: int, + trait_dataset_name: str, conn: Any): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 -- cgit v1.2.3 From ac14e1167d866b8ab3a43583db8860ce99a3310b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 29 Jul 2021 16:28:12 +0300 Subject: Add method for updating values from a sample dataset * gn3/db/traits.py (update_sample_data): New function. * tests/unit/db/test_traits.py: New test cases for ^^. --- HACKING.org | 8 ++++++++ gn3/db/traits.py | 47 +++++++++++++++++++++++++++++++++++++++++++- tests/unit/db/test_traits.py | 37 ++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 HACKING.org create mode 100644 tests/unit/db/test_traits.py (limited to 'gn3/db/traits.py') diff --git a/HACKING.org b/HACKING.org new file mode 100644 index 0000000..8c26a29 --- /dev/null +++ b/HACKING.org @@ -0,0 +1,8 @@ +* Introduction + +We are getting away from GitHub CI, and hosting our own CI servers. From GitHub, once a person makes a push, we'll be using [[https://github.com/adnanh/webhook][webhook]] to initiate laminar. + + +First install laminar (use the appropriate binary): + +: wget https://github.com/adnanh/webhook/releases/download/2.8.0/webhook-linux-amd64.tar.gz && tar xf webhook-linux-amd64.tar.gz diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4baac67..a77e6a1 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,5 +1,5 @@ """This class contains functions relating to trait data manipulation""" -from typing import Any +from typing import Any, Union def get_trait_csv_sample_data(conn: Any, @@ -30,3 +30,48 @@ def get_trait_csv_sample_data(conn: Any, for val in (strain_id, strain_name, value, error, count)])) return f"# Publish Data Id: {publishdata_id}\n\n" + "\n".join(csv_data) + + +def update_sample_data(conn: Any, + strain_name: str, + strain_id: int, + publish_data_id: int, + value: Union[int, float, str], + error: Union[int, float, str], + count: Union[int, str]): + """Given the right parameters, update sample-data from the relevant + table.""" + STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" + PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND Id = %s") + PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s " + "WHERE StrainId = %s AND DataId = %s") + + updated_strains: int = 0 + updated_published_data: int = 0 + updated_se_data: int = 0 + updated_n_strains: int = 0 + + with conn.cursor() as cursor: + # Update the Strains table + cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id)) + updated_strains: int = cursor.rowcount + # Update the PublishData table + cursor.execute(PUBLISH_DATA_SQL, + (None if value == "x" else value, + strain_id, publish_data_id)) + updated_published_data: int = cursor.rowcount + # Update the PublishSE table + cursor.execute(PUBLISH_SE_SQL, + (None if error == "x" else error, + strain_id, publish_data_id)) + updated_se_data: int = cursor.rowcount + # Update the NStrain table + cursor.execute(N_STRAIN_SQL, + (None if count == "x" else count, + strain_id, publish_data_id)) + updated_n_strains: int = cursor.rowcount + return (updated_strains, updated_published_data, + updated_se_data, updated_n_strains) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py new file mode 100644 index 0000000..0e69bbe --- /dev/null +++ b/tests/unit/db/test_traits.py @@ -0,0 +1,37 @@ +"""Tests for db/traits.py""" +from unittest import TestCase +from unittest import mock + +from gn3.db.traits import update_sample_data + + +class TestTraitsSqlMethods(TestCase): + """Test cases for sql operations that affect traits""" + def test_update_sample_data(self): + """Test that the SQL queries when calling update_sample_data are called with + the right calls. + + """ + db_mock = mock.MagicMock() + + STRAIN_ID_SQL: str = "UPDATE Strain SET Name = '%s' WHERE Id = %s" + PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND DataId = %s") + PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ("UPDATE NStrain SET count = '%s' " + "WHERE StrainId = %s AND DataId = %s") + + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(update_sample_data( + conn=db_mock, strain_name="BXD11", + strain_id=10, publish_data_id=8967049, + value=18.7, error=2.3, count=2), + (1, 1, 1, 1)) + cursor.execute.assert_has_calls( + [mock.call(STRAIN_ID_SQL, ('BXD11', 10)), + mock.call(PUBLISH_DATA_SQL, (18.7, 10, 8967049)), + mock.call(PUBLISH_SE_SQL, (2.3, 10, 8967049)), + mock.call(N_STRAIN_SQL, (2, 10, 8967049))] + ) -- cgit v1.2.3