diff options
Diffstat (limited to 'gn3/db/traits.py')
-rw-r--r-- | gn3/db/traits.py | 141 |
1 files changed, 75 insertions, 66 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ae1939a..9742fa2 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -100,119 +100,128 @@ def retrieve_trait_dataset_name( cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() -PUBLISH_TRAIT_INFO_QUERY = ( - "SELECT " - "PublishXRef.Id, Publication.PubMed_ID, " - "Phenotype.Pre_publication_description, " - "Phenotype.Post_publication_description, " - "Phenotype.Original_description, " - "Phenotype.Pre_publication_abbreviation, " - "Phenotype.Post_publication_abbreviation, " - "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " - "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " - "Publication.Title, Publication.Abstract, Publication.Journal, " - "Publication.Volume, Publication.Pages, Publication.Month, " - "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " - "PublishXRef.comments " - "FROM " - "PublishXRef, Publication, Phenotype, PublishFreeze " - "WHERE " - "PublishXRef.Id = %(trait_name)s AND " - "Phenotype.Id = PublishXRef.PhenotypeId AND " - "Publication.Id = PublishXRef.PublicationId AND " - "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " - "PublishFreeze.Id =%(trait_dataset_id)s") - def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + keys = ( + "Id", "PubMed_ID", "Pre_publication_description", + "Post_publication_description", "Original_description", + "Pre_publication_abbreviation", "Post_publication_abbreviation", + "Lab_code", "Submitter", "Owner", "Authorized_Users", "Authors", + "Title", "Abstract", "Journal", "Volume", "Pages", "Month", "Year", + "Sequence", "Units", "comments") + columns = ( + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments") + query = ( + "SELECT " + "{columns} " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s").format( + columns = columns) with conn.cursor() as cursor: cursor.execute( - PUBLISH_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_id"] }) - return cursor.fetchone() - -PROBESET_TRAIT_INFO_QUERY = ( - "SELECT " - "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " - "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " - "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " - "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " - "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " - "ProbeSet.strand_probe, ProbeSet.strand_gene, " - "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " - "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " - "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " - "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " - "ProbeSet.flag " - "FROM " - "ProbeSet, ProbeSetFreeze, ProbeSetXRef " - "WHERE " - "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " - "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " - "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " - "ProbeSet.Name = %(trait_name)s") + return dict(zip((k.lower() for k in keys), cursor.fetchone())) def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + keys = ( + "name", "symbol", "description", "probe_target_description", "chr", + "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", + "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", + "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", + "probe_set_specificity", "probe_set_blat_score", + "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand", + "probe_set_note_by_rw", "flag") + query = ( + "SELECT " + "{columns} " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s").format( + columns = ", ".join(["ProbeSet.{}".format(x) for x in keys])) with conn.cursor() as cursor: cursor.execute( - PROBESET_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_name"] }) - return cursor.fetchone() - -GENO_TRAIT_INFO_QUERY = ( - "SELECT " - "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " - "FROM " - "Geno, GenoFreeze, GenoXRef " - "WHERE " - "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " - "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + return dict(zip(keys, cursor.fetchone())) def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Geno` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + keys = ("name", "chr", "mb", "source2", "sequence") + query = ( + "SELECT " + "{columns} " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND " + "Geno.Name = %(trait_name)s").format( + columns = ", ".join(["Geno.{}".format(x) for x in keys])) with conn.cursor() as cursor: cursor.execute( - GENO_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name", "trait_dataset_name"] }) - return cursor.fetchone() - -TEMP_TRAIT_INFO_QUERY = ( - "SELECT name, description FROM Temp " - "WHERE Name = %(trait_name)s") + return dict(zip(keys, cursor.fetchone())) def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Temp` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + keys = ("name", "description") + query = ( + "SELECT {columns} FROM Temp " + "WHERE Name = %(trait_name)s").format(columns = ", ".join(keys)) with conn.cursor() as cursor: cursor.execute( - TEMP_TRAIT_INFO_QUERY, + query, { k:v for k, v in trait_data_source.items() if k in ["trait_name"] }) - return cursor.fetchone() + return dict(zip(keys, cursor.fetchone())) def retrieve_trait_info( trait_type: str, trait_name: str, trait_dataset_id: int, - trait_dataset_name: str, conn: Any): + trait_dataset_name: str, conn: Any, QTL = None): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 |