From 00579657abf5f9cadda1a9a479cae63ace28820c Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 12:32:43 +0300 Subject: Retrieve trait information Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: add functions to retrieve traits information * tests/unit/db/test_traits.py: add tests for new function Add functions to retrieve traits information as is done in genenetwork1 https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 At this point, the data retrieval functions are probably incomplete, as there is more of the `retrieveInfo` function in GN1 that has not been considered as of this commit. --- gn3/db/traits.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) (limited to 'gn3/db') diff --git a/gn3/db/traits.py b/gn3/db/traits.py index fddb8be..3c62df8 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,7 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_type_trait_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name(trait_type, threshold, name, connection): """ Retrieve the name of a trait given the trait's name @@ -112,3 +112,134 @@ def retrieve_type_trait_name(trait_type, threshold, name, connection): with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() + +PUBLISH_TRAIT_INFO_QUERY = ( + "SELECT " + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s") + +def retrieve_publish_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Publish` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + with conn.cursor() as cursor: + cursor.execute( + PUBLISH_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_id"] + }) + return cursor.fetchone() + +PROBESET_TRAIT_INFO_QUERY = ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " + "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " + "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s") + +def retrieve_probeset_trait_info(trait_data_source, conn): + """Retrieve trait information for type `ProbeSet` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + with conn.cursor() as cursor: + cursor.execute( + PROBESET_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +GENO_TRAIT_INFO_QUERY = ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + +def retrieve_geno_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Geno` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + with conn.cursor() as cursor: + cursor.execute( + GENO_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +TEMP_TRAIT_INFO_QUERY = ( + "SELECT name, description FROM Temp " + "WHERE Name = %(trait_name)s") + +def retrieve_temp_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Temp` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + with conn.cursor() as cursor: + cursor.execute( + TEMP_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name"] + }) + return cursor.fetchone() + +def retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + """Retrieves the trait information. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 + + This function, or the dependent functions, might be incomplete as they are + currently.""" + trait_info_function_table = { + "Publish": retrieve_publish_trait_info, + "ProbeSet": retrieve_probeset_trait_info, + "Geno": retrieve_geno_trait_info, + "Temp": retrieve_temp_trait_info + } + return trait_info_function_table[trait_type]( + { + "trait_name": trait_name, + "trait_dataset_id": trait_dataset_id, + "trait_dataset_name":trait_dataset_name + }, + conn) -- cgit v1.2.3