From 0931212bc692177cfc0ebcf016bc869dd4f88fd8 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 27 Nov 2012 14:44:14 -0600 Subject: Renamed webqtlDataSet.py to data_set.py Renamed the class webqtlDataset to DataSet Finished cisLRS and transLRS search types in d_search.py Fixed parent/f1 issue in show_trait.py --- wqflask/base/data_set.py | 162 +++++++++++++++++++++++++++++++ wqflask/base/webqtlDataset.py | 157 ------------------------------ wqflask/wqflask/do_search.py | 137 +++++++++++++------------- wqflask/wqflask/search_results.py | 34 ++----- wqflask/wqflask/show_trait/show_trait.py | 11 +-- 5 files changed, 247 insertions(+), 254 deletions(-) create mode 100755 wqflask/base/data_set.py delete mode 100755 wqflask/base/webqtlDataset.py (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py new file mode 100755 index 00000000..992c673e --- /dev/null +++ b/wqflask/base/data_set.py @@ -0,0 +1,162 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by GeneNetwork Core Team 2010/10/20 + +from htmlgen import HTMLgen2 as HT + +import webqtlConfig + + + +class DataSet(object): + """ + Dataset class defines a dataset in webqtl, can be either Microarray, + Published phenotype, genotype, or user input dataset(temp) + + """ + + def __init__(self, dbName, cursor=None): + + assert dbName + self.id = 0 + self.name = '' + self.type = '' + self.group = '' + self.cursor = cursor + + #temporary storage + if dbName.find('Temp') >= 0: + self.searchfield = ['name','description'] + self.disfield = ['name','description'] + self.type = 'Temp' + self.id = 1 + self.fullname = 'Temporary Storage' + self.shortname = 'Temp' + elif dbName.find('Publish') >= 0: + pass + elif dbName.find('Geno') >= 0: + self.searchfield = ['name','chr'] + self.disfield = ['name','chr','mb', 'source2', 'sequence'] + self.type = 'Geno' + else: #ProbeSet + self.searchfield = ['name','description','probe_target_description', + 'symbol','alias','genbankid','unigeneid','omim', + 'refseq_transcriptid','probe_set_specificity', 'probe_set_blat_score'] + self.disfield = ['name','symbol','description','probe_target_description', + 'chr','mb','alias','geneid','genbankid', 'unigeneid', 'omim', + 'refseq_transcriptid','blatseq','targetseq','chipid', 'comments', + 'strand_probe','strand_gene','probe_set_target_region', + 'probe_set_specificity', 'probe_set_blat_score','probe_set_blat_mb_start', + 'probe_set_blat_mb_end', 'probe_set_strand', + 'probe_set_note_by_rw', 'flag'] + self.type = 'ProbeSet' + self.name = dbName + if self.cursor and self.id == 0: + self.retrieveName() + + + # Delete this eventually + @property + def riset(): + Weve_Renamed_This_As_Group + + + def get_group(self): + assert self.cursor + if self.type == 'Publish': + query = ''' + SELECT + InbredSet.Name, InbredSet.Id + FROM + InbredSet, PublishFreeze + WHERE + PublishFreeze.InbredSetId = InbredSet.Id AND + PublishFreeze.Name = "%s" + ''' % self.name + elif self.type == 'Geno': + query = ''' + SELECT + InbredSet.Name, InbredSet.Id + FROM + InbredSet, GenoFreeze + WHERE + GenoFreeze.InbredSetId = InbredSet.Id AND + GenoFreeze.Name = "%s" + ''' % self.name + elif self.type == 'ProbeSet': + query = ''' + SELECT + InbredSet.Name, InbredSet.Id + FROM + InbredSet, ProbeSetFreeze, ProbeFreeze + WHERE + ProbeFreeze.InbredSetId = InbredSet.Id AND + ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND + ProbeSetFreeze.Name = "%s" + ''' % self.name + else: + return "" + self.cursor.execute(query) + group, RIID = self.cursor.fetchone() + if group == 'BXD300': + group = "BXD" + self.group = group + self.group_id = RIID + return group + + + def retrieveName(self): + assert self.id == 0 and self.cursor + query = ''' + SELECT + Id, Name, FullName, ShortName + FROM + %sFreeze + WHERE + public > %d AND + (Name = "%s" OR FullName = "%s" OR ShortName = "%s") + '''% (self.type, webqtlConfig.PUBLICTHRESH, self.name, self.name, self.name) + try: + self.cursor.execute(query) + self.id,self.name,self.fullname,self.shortname=self.cursor.fetchone() + except: + raise KeyError, `self.name`+' doesn\'t exist.' + + + def genHTML(self, Class='c0dd'): + return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class), + url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank") + +class PhenotypeDataSet(DataSet): + + def __init__(self): + self.searchfield = ['name','post_publication_description','abstract','title','authors'] + self.disfield = ['name','pubmed_id', + 'pre_publication_description', 'post_publication_description', 'original_description', + 'pre_publication_abbreviation', 'post_publication_abbreviation', + 'lab_code', 'submitter', 'owner', 'authorized_users', + 'authors','title','abstract', 'journal','volume','pages','month', + 'year','sequence', 'units', 'comments'] + self.type = 'Publish' \ No newline at end of file diff --git a/wqflask/base/webqtlDataset.py b/wqflask/base/webqtlDataset.py deleted file mode 100755 index 933077fd..00000000 --- a/wqflask/base/webqtlDataset.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from htmlgen import HTMLgen2 as HT - -import webqtlConfig - - - -class webqtlDataset: - """ - Dataset class defines a dataset in webqtl, can be either Microarray, - Published phenotype, genotype, or user input dataset(temp) - - """ - - def __init__(self, dbName, cursor=None): - - assert dbName - self.id = 0 - self.name = '' - self.type = '' - self.group = '' - self.cursor = cursor - - #temporary storage - if dbName.find('Temp') >= 0: - self.searchfield = ['name','description'] - self.disfield = ['name','description'] - self.type = 'Temp' - self.id = 1 - self.fullname = 'Temporary Storage' - self.shortname = 'Temp' - elif dbName.find('Publish') >= 0: - self.searchfield = ['name','post_publication_description','abstract','title','authors'] - self.disfield = ['name','pubmed_id', - 'pre_publication_description', 'post_publication_description', 'original_description', - 'pre_publication_abbreviation', 'post_publication_abbreviation', - 'lab_code', 'submitter', 'owner', 'authorized_users', - 'authors','title','abstract', 'journal','volume','pages','month', - 'year','sequence', 'units', 'comments'] - self.type = 'Publish' - elif dbName.find('Geno') >= 0: - self.searchfield = ['name','chr'] - self.disfield = ['name','chr','mb', 'source2', 'sequence'] - self.type = 'Geno' - else: #ProbeSet - self.searchfield = ['name','description','probe_target_description', - 'symbol','alias','genbankid','unigeneid','omim', - 'refseq_transcriptid','probe_set_specificity', 'probe_set_blat_score'] - self.disfield = ['name','symbol','description','probe_target_description', - 'chr','mb','alias','geneid','genbankid', 'unigeneid', 'omim', - 'refseq_transcriptid','blatseq','targetseq','chipid', 'comments', - 'strand_probe','strand_gene','probe_set_target_region', - 'probe_set_specificity', 'probe_set_blat_score','probe_set_blat_mb_start', - 'probe_set_blat_mb_end', 'probe_set_strand', - 'probe_set_note_by_rw', 'flag'] - self.type = 'ProbeSet' - self.name = dbName - if self.cursor and self.id == 0: - self.retrieveName() - - - # Delete this eventually - @property - def riset(): - Weve_Renamed_This_As_Group - - - def get_group(self): - assert self.cursor - if self.type == 'Publish': - query = ''' - SELECT - InbredSet.Name, InbredSet.Id - FROM - InbredSet, PublishFreeze - WHERE - PublishFreeze.InbredSetId = InbredSet.Id AND - PublishFreeze.Name = "%s" - ''' % self.name - elif self.type == 'Geno': - query = ''' - SELECT - InbredSet.Name, InbredSet.Id - FROM - InbredSet, GenoFreeze - WHERE - GenoFreeze.InbredSetId = InbredSet.Id AND - GenoFreeze.Name = "%s" - ''' % self.name - elif self.type == 'ProbeSet': - query = ''' - SELECT - InbredSet.Name, InbredSet.Id - FROM - InbredSet, ProbeSetFreeze, ProbeFreeze - WHERE - ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND - ProbeSetFreeze.Name = "%s" - ''' % self.name - else: - return "" - self.cursor.execute(query) - group, RIID = self.cursor.fetchone() - if group == 'BXD300': - group = "BXD" - self.group = group - self.group_id = RIID - return group - - - def retrieveName(self): - assert self.id == 0 and self.cursor - query = ''' - SELECT - Id, Name, FullName, ShortName - FROM - %sFreeze - WHERE - public > %d AND - (Name = "%s" OR FullName = "%s" OR ShortName = "%s") - '''% (self.type, webqtlConfig.PUBLICTHRESH, self.name, self.name, self.name) - try: - self.cursor.execute(query) - self.id,self.name,self.fullname,self.shortname=self.cursor.fetchone() - except: - raise KeyError, `self.name`+' doesn\'t exist.' - - - def genHTML(self, Class='c0dd'): - return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class), - url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank") diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index fd03f359..e2bafb3a 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -49,65 +49,6 @@ class DoSearch(object): return cls.search_types[search_type] -class ProbeSetSearch(DoSearch): - """A search within an mRNA expression dataset""" - - DoSearch.search_types['ProbeSet'] = "ProbeSetSearch" - - base_query = """SELECT ProbeSet.Name as TNAME, - 0 as thistable, - ProbeSetXRef.Mean as TMEAN, - ProbeSetXRef.LRS as TLRS, - ProbeSetXRef.PVALUE as TPVALUE, - ProbeSet.Chr_num as TCHR_NUM, - ProbeSet.Mb as TMB, - ProbeSet.Symbol as TSYMBOL, - ProbeSet.name_num as TNAME_NUM - FROM ProbeSetXRef, ProbeSet """ - - - def compile_final_query(self, from_clause = '', where_clause = ''): - """Generates the final query string""" - - from_clause = '' - from_clause = self.normalize_spaces(from_clause) - - query = (self.base_query + - """%s - WHERE %s - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(from_clause), - where_clause, - self.escape(self.dataset.id))) - - print("query is:", pf(query)) - - return query - - def run(self): - """Generates and runs a simple search of an mRNA expression dataset""" - - print("Running ProbeSetSearch") - query = (self.base_query + - """WHERE (MATCH (ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - alias, - GenbankId, - UniGeneId, - Probe_Target_Description) - AGAINST ('%s' IN BOOLEAN MODE)) - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(self.search_term), - self.escape(self.dataset.id))) - - print("final query is:", pf(query)) - - return self.execute(query) - - class PhenotypeSearch(DoSearch): """A search within a phenotype dataset""" @@ -212,6 +153,65 @@ class GenotypeSearch(DoSearch): return self.execute(query) + +class ProbeSetSearch(DoSearch): + """A search within an mRNA expression dataset""" + + DoSearch.search_types['ProbeSet'] = "ProbeSetSearch" + + base_query = """SELECT ProbeSet.Name as TNAME, + 0 as thistable, + ProbeSetXRef.Mean as TMEAN, + ProbeSetXRef.LRS as TLRS, + ProbeSetXRef.PVALUE as TPVALUE, + ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, + ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSet """ + + + def compile_final_query(self, from_clause, where_clause): + """Generates the final query string""" + + from_clause = self.normalize_spaces(from_clause) + + query = (self.normalize_spaces(self.base_query) + + """%s + WHERE %s + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.escape(from_clause), + where_clause, + self.escape(self.dataset.id))) + + print("query is:", pf(query)) + + return query + + def run(self): + """Generates and runs a simple search of an mRNA expression dataset""" + + print("Running ProbeSetSearch") + query = (self.base_query + + """WHERE (MATCH (ProbeSet.Name, + ProbeSet.description, + ProbeSet.symbol, + alias, + GenbankId, + UniGeneId, + Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE)) + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.escape(self.search_term), + self.escape(self.dataset.id))) + + print("final query is:", pf(query)) + + return self.execute(query) + + class RifSearch(ProbeSetSearch): """Searches for traits with a Gene RIF entry including the search term.""" @@ -283,7 +283,7 @@ class LrsSearch(ProbeSetSearch): DoSearch.search_types['LRS'] = 'LrsSearch' -class CisLrsSearch(LrsSearch): +class CisLrsSearch(ProbeSetSearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values A cisLRS search can take 2 forms: @@ -305,9 +305,12 @@ class CisLrsSearch(LrsSearch): DoSearch.search_types['CISLRS'] = "CisLrsSearch" def run(self): + + from_clause = ", Geno " + if len(self.search_term) == 3: lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] - + where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s and %sXRef.Locus = Geno.name and @@ -327,7 +330,7 @@ class CisLrsSearch(LrsSearch): else: NeedSomeErrorHere - query = self.compile_final_query(where_clause) + query = self.compile_final_query(from_clause, where_clause) return self.execute(query) @@ -353,6 +356,8 @@ class TransLrsSearch(LrsSearch): DoSearch.search_types['TRANSLRS'] = "TransLrsSearch" def run(self): + from_clause = ", Geno " + if len(self.search_term) == 3: lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] @@ -374,9 +379,11 @@ class TransLrsSearch(LrsSearch): ) else: - NeedSomeErrorHere + NeedSomeErrorHere - return None + query = self.compile_final_query(from_clause, where_clause) + + return self.execute(query) #itemCmd = item[0] @@ -447,7 +454,7 @@ if __name__ == "__main__": #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() #results = RifSearch("diabetes", dataset, cursor, db_conn).run() #results = WikiSearch("nicotine", dataset, cursor, db_conn).run() - results = CisLrsSearch(['9','99','10'], dataset, cursor, db_conn).run() + results = TransLrsSearch(['25','99','10'], dataset, cursor, db_conn).run() #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run() #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run() diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index dc3c72fc..05f062fc 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -84,16 +84,7 @@ class SearchResultPage(templatePage): print("self.dataset is:", pf(self.dataset)) self.dataset = webqtlDataset(self.dataset, self.cursor) print("self.dataset is now:", pf(self.dataset)) - #self.dataset = map(lambda x: webqtlDataset(x, self.cursor), self.dataset) - #currently, webqtl won't allow multiple crosses - #for other than multiple publish db search - #so we can use the first dataset as example - #if self.dataset.type=="Publish": - # pass if self.dataset.type in ("Geno", "ProbeSet"): - - #userExist = None - # Can't use paramater substitution for table names apparently db_type = self.dataset.type + "Freeze" print("db_type [%s]: %s" % (type(db_type), db_type)) @@ -124,11 +115,8 @@ class SearchResultPage(templatePage): # access_to_confidential_dataset = 1 # #if not access_to_confidential_dataset: - # #Error, No dataset selected - # heading = "Search Result" - # detail = ["The %s dataset you selected is not open to the public at this time, please go back and SELECT other dataset." % indFullName] - # self.error(heading=heading,detail=detail,error="Confidential dataset") - # return + # Some error + #else: # heading = "Search Result" # detail = ['''The dataset has not been established yet, please @@ -180,7 +168,8 @@ class SearchResultPage(templatePage): 'Max LRS', 'Max LRS Location'] elif self.dataset.type == "Geno": - self.search_fields = ['Name','Chr'] + self.search_fields = ['Name', + 'Chr'] self.header_fields = ['', 'ID', 'Location'] @@ -241,7 +230,6 @@ class SearchResultPage(templatePage): # This is throwing an error when a_search['key'] is None, so I changed above #search_type = string.upper(a_search['key']) #if not search_type: - # # We fall back to the dataset type as the key to get the right object # search_type = self.dataset.type search_ob = do_search.DoSearch.get_search(search_type) @@ -273,7 +261,6 @@ class SearchResultPage(templatePage): def getTraitInfoForGeno(self, trait_list): - for this_trait in trait_list: if not this_trait.haveinfo: this_trait.retrieveInfo() @@ -295,8 +282,7 @@ class SearchResultPage(templatePage): this_trait.location_value = trait_location_value - def getTraitInfoForPublish(self, trait_list, species=''): - + def getTraitInfoForPublish(self, trait_list, species = ''): for this_trait in trait_list: if not this_trait.haveinfo: this_trait.retrieveInfo(QTL=1) @@ -307,18 +293,16 @@ class SearchResultPage(templatePage): description = this_trait.pre_publication_description this_trait.description_display = description - try: - this_trait.pubmed_text = int(this_trait.year) - except: + if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" if this_trait.pubmed_id: this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id #LRS and its location - this_trait.LRS_score_repr = 'N/A' + this_trait.LRS_score_repr = "N/A" this_trait.LRS_score_value = 0 - this_trait.LRS_location_repr = 'N/A' + this_trait.LRS_location_repr = "N/A" this_trait.LRS_location_value = 1000000 if this_trait.lrs: @@ -408,7 +392,7 @@ class SearchResultPage(templatePage): ProbeSet.Name = '%s' """ % (self.db_conn.escape_string(str(this_trait.db.id)), self.db_conn.escape_string(this_trait.name))) - + print("query is:", pf(query)) self.cursor.execute(query) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 19e67c43..3dac5933 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -1509,10 +1509,10 @@ class ShowTrait(templatePage): def make_sample_lists(self, fd, variance_data_page, this_trait): - if fd.genotype.type == "riset": - all_samples_ordered = fd.f1list + fd.samplelist - else: + if fd.parlist: all_samples_ordered = fd.parlist + fd.f1list + fd.samplelist + else: + all_samples_ordered = fd.f1list + fd.samplelist this_trait_samples = set(this_trait.data.keys()) @@ -1527,8 +1527,6 @@ class ShowTrait(templatePage): this_trait=this_trait, sample_group_type='primary', header="%s Only" % (fd.RISet)) - - print("primary_samples.attributes:", pf(primary_samples.attributes)) other_sample_names = [] for sample in this_trait.data.keys(): @@ -1538,8 +1536,7 @@ class ShowTrait(templatePage): other_sample_names.append(sample) if other_sample_names: - unappended_par_f1 = fd.f1list + fd.parlist - par_f1_samples = ["_2nd_" + sample for sample in unappended_par_f1] + par_f1_samples = fd.parlist + fd.f1list other_sample_names.sort() #Sort other samples other_sample_names = par_f1_samples + other_sample_names -- cgit v1.2.3