From 94dd9844fb55f4576d3a079e9d5e59ebbf911b8c Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 27 Nov 2012 17:59:17 -0600 Subject: Created subclass for each main data set type and moved the code for getting trait info that was in search_results.py into its respective class Renamed webqtlDataset to DataSet/create_dataset in webqtlTrait.py, webqtlDatabaseFunction.py, and CorrelationPage.py Got search page running again for mRNA assay data sets with these changes --- web/webqtl/search/SearchResultPage.py | 6 +- wqflask/base/data_set.py | 556 ++++++++++++++++++++----- wqflask/base/webqtlTrait.py | 25 +- wqflask/dbFunction/webqtlDatabaseFunction.py | 23 +- wqflask/wqflask/correlation/CorrelationPage.py | 4 +- wqflask/wqflask/do_search.py | 22 +- wqflask/wqflask/search_results.py | 325 +-------------- 7 files changed, 514 insertions(+), 447 deletions(-) diff --git a/web/webqtl/search/SearchResultPage.py b/web/webqtl/search/SearchResultPage.py index 029a54c4..d62bb449 100755 --- a/web/webqtl/search/SearchResultPage.py +++ b/web/webqtl/search/SearchResultPage.py @@ -14,7 +14,7 @@ from htmlgen import HTMLgen2 as HT from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell -from base.webqtlDataset import webqtlDataset +from base.data_set import DataSet from base.webqtlTrait import webqtlTrait from base.templatePage import templatePage from utility import webqtlUtil @@ -65,12 +65,12 @@ class SearchResultPage(templatePage): InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = PublishFreeze.InbredSetId""") results = self.cursor.fetchall() - self.database = map(lambda x: webqtlDataset(x[0], self.cursor), results) + self.database = map(lambda x: DataSet(x[0], self.cursor), results) self.databaseCrosses = map(lambda x: x[1], results) self.databaseCrossIds = map(lambda x: x[2], results) self.singleCross = False else: - self.database = map(lambda x: webqtlDataset(x, self.cursor), self.database) + self.database = map(lambda x: DataSet(x, self.cursor), self.database) #currently, webqtl wouldn't allow multiple crosses #for other than multiple publish db search #so we can use the first database as example diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 992c673e..9e3e6d81 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -19,64 +19,64 @@ # # # This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 + +from __future__ import print_function, division from htmlgen import HTMLgen2 as HT import webqtlConfig +from pprint import pformat as pf +# Used by create_database to instantiate objects +DS_NAME_MAP = {} + +def create_dataset(db_conn, dataset_name): + cursor = db_conn.cursor() + cursor.execute(""" + SELECT DBType.Name + FROM DBList, DBType + WHERE DBList.Name = %s and + DBType.Id = DBList.DBTypeId + """, (dataset_name)) + print("dataset_name:", dataset_name) + dataset_type = cursor.fetchone()[0] + print("dataset_type:", pf(dataset_type)) + + dataset_ob = DS_NAME_MAP[dataset_type] + #dataset_class = getattr(data_set, dataset_ob) + + print("DS_NAME_MAP:", pf(DS_NAME_MAP)) + + dataset_class = globals()[dataset_ob] + return dataset_class(dataset_name, db_conn) class DataSet(object): """ - Dataset class defines a dataset in webqtl, can be either Microarray, + DataSet class defines a dataset in webqtl, can be either Microarray, Published phenotype, genotype, or user input dataset(temp) """ - def __init__(self, dbName, cursor=None): - - assert dbName - self.id = 0 - self.name = '' - self.type = '' - self.group = '' - self.cursor = cursor - - #temporary storage - if dbName.find('Temp') >= 0: - self.searchfield = ['name','description'] - self.disfield = ['name','description'] - self.type = 'Temp' - self.id = 1 - self.fullname = 'Temporary Storage' - self.shortname = 'Temp' - elif dbName.find('Publish') >= 0: - pass - elif dbName.find('Geno') >= 0: - self.searchfield = ['name','chr'] - self.disfield = ['name','chr','mb', 'source2', 'sequence'] - self.type = 'Geno' - else: #ProbeSet - self.searchfield = ['name','description','probe_target_description', - 'symbol','alias','genbankid','unigeneid','omim', - 'refseq_transcriptid','probe_set_specificity', 'probe_set_blat_score'] - self.disfield = ['name','symbol','description','probe_target_description', - 'chr','mb','alias','geneid','genbankid', 'unigeneid', 'omim', - 'refseq_transcriptid','blatseq','targetseq','chipid', 'comments', - 'strand_probe','strand_gene','probe_set_target_region', - 'probe_set_specificity', 'probe_set_blat_score','probe_set_blat_mb_start', - 'probe_set_blat_mb_end', 'probe_set_strand', - 'probe_set_note_by_rw', 'flag'] - self.type = 'ProbeSet' - self.name = dbName - if self.cursor and self.id == 0: - self.retrieveName() - - + def __init__(self, name, db_conn): + + assert name + self.name = name + self.db_conn = db_conn + self.cursor = self.db_conn.cursor() + self.id = None + self.type = None + self.group = None + + #if self.cursor and self.id == 0: + self.setup() + + self.check_confidentiality() + + self.retrieve_name() + self.get_group() + + # Delete this eventually @property def riset(): @@ -85,8 +85,93 @@ class DataSet(object): def get_group(self): assert self.cursor - if self.type == 'Publish': - query = ''' + self.cursor.execute(self.query) + self.group, self.group_id = self.cursor.fetchone() + if self.group == 'BXD300': + self.group = "BXD" + #return group + + + def retrieve_name(self): + """ + If the data set name parameter is not found in the 'Name' field of the data set table, + check if it is actually the FullName or ShortName instead. + + This is not meant to retrieve the data set info if no name at all is passed. + + """ + + query_args = tuple(self.db_conn.escape_string(x) for x in ( + (self.type + "Freeze"), + str(webqtlConfig.PUBLICTHRESH), + self.name, + self.name, + self.name)) + print("query_args are:", query_args) + + query = ''' + SELECT + Id, Name, FullName, ShortName + FROM + %s + WHERE + public > %s AND + (Name = "%s" OR FullName = "%s" OR ShortName = "%s") + ''' % (query_args) + + self.cursor.execute(query) + self.id, self.name, self.fullname, self.shortname = self.cursor.fetchone() + + + #def genHTML(self, Class='c0dd'): + # return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class), + # url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank") + +class PhenotypeDataSet(DataSet): + DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' + + def setup(self): + # Fields in the database table + self.search_fields = ['Phenotype.Post_publication_description', + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors', + 'PublishXRef.Id'] + + # Figure out what display_fields is + self.display_fields = ['name', + 'pubmed_id', + 'pre_publication_description', + 'post_publication_description', + 'original_description', + 'pre_publication_abbreviation', + 'post_publication_abbreviation', + 'lab_code', + 'submitter', 'owner', + 'authorized_users', + 'authors', 'title', + 'abstract', 'journal', + 'volume', 'pages', + 'month', 'year', + 'sequence', 'units', 'comments'] + + # Fields displayed in the search results table header + self.header_fields = ['', + 'ID', + 'Description', + 'Authors', + 'Year', + 'Max LRS', + 'Max LRS Location'] + + self.type = 'Publish' + + self.query = ''' SELECT InbredSet.Name, InbredSet.Id FROM @@ -94,69 +179,336 @@ class DataSet(object): WHERE PublishFreeze.InbredSetId = InbredSet.Id AND PublishFreeze.Name = "%s" - ''' % self.name - elif self.type == 'Geno': - query = ''' - SELECT - InbredSet.Name, InbredSet.Id - FROM - InbredSet, GenoFreeze - WHERE - GenoFreeze.InbredSetId = InbredSet.Id AND - GenoFreeze.Name = "%s" - ''' % self.name - elif self.type == 'ProbeSet': - query = ''' - SELECT - InbredSet.Name, InbredSet.Id - FROM - InbredSet, ProbeSetFreeze, ProbeFreeze - WHERE - ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND - ProbeSetFreeze.Name = "%s" - ''' % self.name - else: - return "" - self.cursor.execute(query) - group, RIID = self.cursor.fetchone() - if group == 'BXD300': - group = "BXD" - self.group = group - self.group_id = RIID - return group + ''' % self.db_conn.escape_string(self.name) + + def check_confidentiality(self): + # (Urgently?) Need to write this + pass + + def get_trait_info(self, trait_list, species = ''): + for this_trait in trait_list: + if not this_trait.haveinfo: + this_trait.retrieveInfo(QTL=1) + + description = this_trait.post_publication_description + if this_trait.confidential: + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): + description = this_trait.pre_publication_description + this_trait.description_display = description + + if not this_trait.year.isdigit(): + this_trait.pubmed_text = "N/A" + + if this_trait.pubmed_id: + this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id + + #LRS and its location + this_trait.LRS_score_repr = "N/A" + this_trait.LRS_score_value = 0 + this_trait.LRS_location_repr = "N/A" + this_trait.LRS_location_value = 1000000 + + if this_trait.lrs: + self.cursor.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """ % (species, this_trait.locus)) + result = self.cursor.fetchone() + if result: + if result[0] and result[1]: + LRS_Chr = result[0] + LRS_Mb = result[1] - def retrieveName(self): - assert self.id == 0 and self.cursor + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) + except: + if LRS_Chr.upper() == 'X': + LRS_location_value = 20*1000 + float(LRS_Mb) + else: + LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + + this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_score_value = LRS_score_value = this_trait.lrs + this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) + +class GenotypeDataSet(DataSet): + DS_NAME_MAP['Geno'] = 'GenotypeDataSet' + + def setup(self): + # Fields in the database table + self.search_fields = ['Name', + 'Chr'] + + # Find out what display_fields is + self.display_fields = ['name', + 'chr', + 'mb', + 'source2', + 'sequence'] + + # Fields displayed in the search results table header + self.header_fields = ['', + 'ID', + 'Location'] + + # Todo: Obsolete or rename this field + self.type = 'Geno' + query = ''' SELECT - Id, Name, FullName, ShortName + InbredSet.Name, InbredSet.Id FROM - %sFreeze + InbredSet, GenoFreeze WHERE - public > %d AND - (Name = "%s" OR FullName = "%s" OR ShortName = "%s") - '''% (self.type, webqtlConfig.PUBLICTHRESH, self.name, self.name, self.name) - try: + GenoFreeze.InbredSetId = InbredSet.Id AND + GenoFreeze.Name = "%s" + ''' % self.db_conn.escape_string(self.name) + + def check_confidentiality(self): + return geno_mrna_confidentiality(self) + + def get_trait_info(self, trait_list): + for this_trait in trait_list: + if not this_trait.haveinfo: + this_trait.retrieveInfo() + + #XZ: trait_location_value is used for sorting + trait_location_repr = 'N/A' + trait_location_value = 1000000 + + if this_trait.chr and this_trait.mb: + try: + trait_location_value = int(this_trait.chr)*1000 + this_trait.mb + except: + if this_trait.chr.upper() == 'X': + trait_location_value = 20*1000 + this_trait.mb + else: + trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb + + this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) ) + this_trait.location_value = trait_location_value + + +class MrnaAssayDataSet(DataSet): + ''' + An mRNA Assay is a quantitative assessment (assay) associated with an mRNA trait + + This used to be called ProbeSet, but that term only refers specifically to the Affymetrix + platform and is far too specific. + + ''' + DS_NAME_MAP['ProbeSet'] = 'MrnaAssayDataSet' + + def setup(self): + # Fields in the database table + self.search_fields = ['Name', + 'Description', + 'Probe_Target_Description', + 'Symbol', + 'Alias', + 'GenbankId', + 'UniGeneId', + 'RefSeq_TranscriptId'] + + # Find out what display_fields is + self.display_fields = ['name', 'symbol', + 'description', 'probe_target_description', + 'chr', 'mb', + 'alias', 'geneid', + 'genbankid', 'unigeneid', + 'omim', 'refseq_transcriptid', + 'blatseq', 'targetseq', + 'chipid', 'comments', + 'strand_probe', 'strand_gene', + 'probe_set_target_region', + 'probe_set_specificity', + 'probe_set_blat_score', + 'probe_set_blat_mb_start', + 'probe_set_blat_mb_end', + 'probe_set_strand', + 'probe_set_note_by_rw', + 'flag'] + + # Fields displayed in the search results table header + self.header_fields = ['', + 'ID', + 'Symbol', + 'Description', + 'Location', + 'Mean Expr', + 'Max LRS', + 'Max LRS Location'] + + # Todo: Obsolete or rename this field + self.type = 'ProbeSet' + + self.query = ''' + SELECT + InbredSet.Name, InbredSet.Id + FROM + InbredSet, ProbeSetFreeze, ProbeFreeze + WHERE + ProbeFreeze.InbredSetId = InbredSet.Id AND + ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND + ProbeSetFreeze.Name = "%s" + ''' % self.db_conn.escape_string(self.name) + + + def check_confidentiality(self): + return geno_mrna_confidentiality(self) + + def get_trait_info(self, trait_list=None, species=''): + + # Note: setting trait_list to [] is probably not a great idea. + if not trait_list: + trait_list = [] + + for this_trait in trait_list: + + if not this_trait.haveinfo: + this_trait.retrieveInfo(QTL=1) + + if this_trait.symbol: + pass + else: + this_trait.symbol = "N/A" + + #XZ, 12/08/2008: description + #XZ, 06/05/2009: Rob asked to add probe target description + description_string = str(this_trait.description).strip() + target_string = str(this_trait.probe_target_description).strip() + + description_display = '' + + if len(description_string) > 1 and description_string != 'None': + description_display = description_string + else: + description_display = this_trait.symbol + + if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + description_display = description_display + '; ' + target_string.strip() + + # Save it for the jinja2 tablet + this_trait.description_display = description_display + + #XZ: trait_location_value is used for sorting + trait_location_repr = 'N/A' + trait_location_value = 1000000 + + if this_trait.chr and this_trait.mb: + try: + trait_location_value = int(this_trait.chr)*1000 + this_trait.mb + except: + if this_trait.chr.upper() == 'X': + trait_location_value = 20*1000 + this_trait.mb + else: + trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb + + this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) ) + this_trait.location_value = trait_location_value + #this_trait.trait_location_value = trait_location_value + + #XZ, 01/12/08: This SQL query is much faster. + query = ( +"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' + """ % (self.db_conn.escape_string(str(this_trait.db.id)), + self.db_conn.escape_string(this_trait.name))) + + print("query is:", pf(query)) + self.cursor.execute(query) - self.id,self.name,self.fullname,self.shortname=self.cursor.fetchone() - except: - raise KeyError, `self.name`+' doesn\'t exist.' + result = self.cursor.fetchone() + if result: + if result[0]: + mean = result[0] + else: + mean=0 + else: + mean = 0 - def genHTML(self, Class='c0dd'): - return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class), - url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank") + #XZ, 06/05/2009: It is neccessary to turn on nowrap + this_trait.mean = repr = "%2.3f" % mean -class PhenotypeDataSet(DataSet): + #LRS and its location + this_trait.LRS_score_repr = 'N/A' + this_trait.LRS_score_value = 0 + this_trait.LRS_location_repr = 'N/A' + this_trait.LRS_location_value = 1000000 + + #Max LRS and its Locus location + if this_trait.lrs and this_trait.locus: + self.cursor.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """ % (species, this_trait.locus)) + result = self.cursor.fetchone() + + if result: + if result[0] and result[1]: + LRS_Chr = result[0] + LRS_Mb = result[1] + + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) + except: + if LRS_Chr.upper() == 'X': + LRS_location_value = 20*1000 + float(LRS_Mb) + else: + LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + + this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_score_value = LRS_score_value = this_trait.lrs + this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) + + +class TempDataSet(DataSet): + '''Temporary user-generated data set''' - def __init__(self): - self.searchfield = ['name','post_publication_description','abstract','title','authors'] - self.disfield = ['name','pubmed_id', - 'pre_publication_description', 'post_publication_description', 'original_description', - 'pre_publication_abbreviation', 'post_publication_abbreviation', - 'lab_code', 'submitter', 'owner', 'authorized_users', - 'authors','title','abstract', 'journal','volume','pages','month', - 'year','sequence', 'units', 'comments'] - self.type = 'Publish' \ No newline at end of file + def setup(self): + self.search_fields = ['name', + 'description'] + + self.display_fields = ['name', + 'description'] + + self.header_fields = ['Name', + 'Description'] + + self.type = 'Temp' + + # Need to double check later how these are used + self.id = 1 + self.fullname = 'Temporary Storage' + self.shortname = 'Temp' + + +def geno_mrna_confidentiality(ob): + dataset_table = ob.type + "Freeze" + print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) + + query = '''SELECT Id, Name, FullName, confidentiality, + AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table) + + ob.cursor.execute(query, ob.name) + + (dataset_id, + name, + full_name, + confidential, + authorized_users) = ob.cursor.fetchall()[0] + + if confidential: + # Allow confidential data later + NoConfindetialDataForYouTodaySorry + \ No newline at end of file diff --git a/wqflask/base/webqtlTrait.py b/wqflask/base/webqtlTrait.py index 51d36ab2..29087721 100755 --- a/wqflask/base/webqtlTrait.py +++ b/wqflask/base/webqtlTrait.py @@ -6,7 +6,7 @@ from htmlgen import HTMLgen2 as HT import webqtlConfig from webqtlCaseData import webqtlCaseData -from webqtlDataset import webqtlDataset +from data_set import create_dataset from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil @@ -20,9 +20,10 @@ class webqtlTrait: """ - def __init__(self, cursor = None, **kw): + def __init__(self, db_conn, **kw): print("in webqtlTrait") - self.cursor = cursor + self.db_conn = db_conn + self.cursor = self.db_conn.cursor() self.db = None # database object self.name = '' # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = '' @@ -50,7 +51,7 @@ class webqtlTrait: if self.db and isinstance(self.db, basestring): assert self.cursor, "Don't have a cursor" - self.db = webqtlDataset(self.db, self.cursor) + self.db = create_dataset(self.db_conn, self.db) #if self.db == None, not from a database print("self.db is:", self.db, type(self.db)) @@ -396,8 +397,8 @@ class webqtlTrait: #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif self.db.type == 'ProbeSet': - disfieldString = string.join(self.db.disfield,',ProbeSet.') - disfieldString = 'ProbeSet.' + disfieldString + display_fields_string = ',ProbeSet.'.join(self.db.display_fields) + display_fields_string = 'ProbeSet.' + display_fields_string query = """ SELECT %s FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef @@ -406,12 +407,12 @@ class webqtlTrait: ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' - """ % (disfieldString, self.db.name, self.name) + """ % (display_fields_string, self.db.name, self.name) #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif self.db.type == 'Geno': - disfieldString = string.join(self.db.disfield,',Geno.') - disfieldString = 'Geno.' + disfieldString + display_fields_string = string.join(self.db.display_fields,',Geno.') + display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s FROM Geno, GenoFreeze, GenoXRef @@ -420,10 +421,10 @@ class webqtlTrait: GenoXRef.GenoId = Geno.Id AND GenoFreeze.Name = '%s' AND Geno.Name = '%s' - """ % (disfieldString, self.db.name, self.name) + """ % (display_fields_string, self.db.name, self.name) else: #Temp type query = 'SELECT %s FROM %s WHERE Name = "%s"' % \ - (string.join(self.db.disfield,','), self.db.type, self.name) + (string.join(self.db.display_fields,','), self.db.type, self.name) self.cursor.execute(query) @@ -432,7 +433,7 @@ class webqtlTrait: self.haveinfo = 1 #XZ: assign SQL query result to trait attributes. - for i, field in enumerate(self.db.disfield): + for i, field in enumerate(self.db.display_fields): setattr(self, field, traitInfo[i]) if self.db.type == 'Publish': diff --git a/wqflask/dbFunction/webqtlDatabaseFunction.py b/wqflask/dbFunction/webqtlDatabaseFunction.py index 7e33da3f..8f923b8a 100755 --- a/wqflask/dbFunction/webqtlDatabaseFunction.py +++ b/wqflask/dbFunction/webqtlDatabaseFunction.py @@ -19,14 +19,7 @@ # # # This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by Xiaodong Zhou 2011/Jan/20 -#webqtlDatabaseFunction.py -# -#This file consists of various database related functions; the names are generally self-explanatory. import MySQLdb import string @@ -206,21 +199,21 @@ def getTissueCountByTissueProbeSetFreezeId(cursor=None, TissueProbeSetFreezeId=N ########################################################################### # input: cursor, TissueProbeSetFreezeId (int) -# output: DatasetName(string),DatasetFullName(string) -# function: retrieve DatasetName, DatasetFullName based on TissueProbeSetFreezeId +# output: DataSetName(string),DataSetFullName(string) +# function: retrieve DataSetName, DataSetFullName based on TissueProbeSetFreezeId ########################################################################### -def getDatasetNamesByTissueProbeSetFreezeId(cursor=None, TissueProbeSetFreezeId=None): +def getDataSetNamesByTissueProbeSetFreezeId(cursor=None, TissueProbeSetFreezeId=None): query ="select Name, FullName from TissueProbeSetFreeze where Id=%s" % TissueProbeSetFreezeId try: cursor.execute(query) result = cursor.fetchone() - DatasetName = result[0] - DatasetFullName =result[1] + DataSetName = result[0] + DataSetFullName =result[1] except: - DatasetName =None - DatasetFullName =None + DataSetName =None + DataSetFullName =None - return DatasetName, DatasetFullName + return DataSetName, DataSetFullName ########################################################################### # input: cursor, geneIdLst (list) diff --git a/wqflask/wqflask/correlation/CorrelationPage.py b/wqflask/wqflask/correlation/CorrelationPage.py index e48ea412..8af30d1e 100644 --- a/wqflask/wqflask/correlation/CorrelationPage.py +++ b/wqflask/wqflask/correlation/CorrelationPage.py @@ -47,7 +47,7 @@ from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell from base.webqtlTrait import webqtlTrait -from base.webqtlDataset import webqtlDataset +from base.data_set import create_dataset from base.templatePage import templatePage from utility import webqtlUtil from dbFunction import webqtlDatabaseFunction @@ -310,7 +310,7 @@ class CorrelationPage(templatePage): #try: #print("target_db_name is:", target_db_name) - self.db = webqtlDataset(self.target_db_name, self.cursor) + self.db = create_dataset(self.db_conn, self.target_db_name) #except: # detail = ["The database you just requested has not been established yet."] # self.error(detail) diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index e2bafb3a..73a72e00 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -147,7 +147,7 @@ class GenotypeSearch(DoSearch): """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and - GenoFreeze.Id = %s"""% ( + GenoFreeze.Id = %s""" % ( self.get_where_clause(), self.escape(self.dataset.id))) @@ -257,7 +257,7 @@ class GoSearch(ProbeSetSearch): statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and GOterm.id=GOassociation.term_id""" % ( - self.db_conn.escape_string(self.dataset.type))) + self.escape(self.dataset.type))) where_clause = " %s = '%s' and %s " % (field, go_id, statements) @@ -317,14 +317,14 @@ class CisLrsSearch(ProbeSetSearch): Geno.SpeciesId = %s and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) < %s """ % ( - self.dataset.type, + self.escape(self.dataset.type), min(lower_limit, upper_limit), - self.dataset.type, + self.escape(self.dataset.type), max(lower_limit, upper_limit), - self.dataset.type, + self.escape(self.dataset.type), self.species_id, - self.dataset.type, - self.dataset.type, + self.escape(self.dataset.type), + self.escape(self.dataset.type), min_threshold ) else: @@ -437,7 +437,7 @@ if __name__ == "__main__": from base import webqtlConfig - from base.webqtlDataset import webqtlDataset + from base.data_set import create_dataset from base.templatePage import templatePage from utility import webqtlUtil from dbFunction import webqtlDatabaseFunction @@ -449,13 +449,13 @@ if __name__ == "__main__": cursor = db_conn.cursor() dataset_name = "HC_M2_0606_P" - dataset = webqtlDataset(dataset_name, cursor) + dataset = create_dataset(db_conn, dataset_name) #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() #results = RifSearch("diabetes", dataset, cursor, db_conn).run() #results = WikiSearch("nicotine", dataset, cursor, db_conn).run() - results = TransLrsSearch(['25','99','10'], dataset, cursor, db_conn).run() - #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run() + results = CisLrsSearch(['25','99','10'], dataset, cursor, db_conn).run() + #results = TransLrsSearch(['25', '999', '10'], dataset, cursor, db_conn).run() #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run() #results = GoSearch("0045202", dataset, cursor, db_conn).run() diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 05f062fc..b50e45d5 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -10,7 +10,7 @@ from flask import render_template # # ################################################### -import string +#import string import os import cPickle import re @@ -29,7 +29,7 @@ from htmlgen import HTMLgen2 as HT from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell -from base.webqtlDataset import webqtlDataset +from base.data_set import create_dataset from base.webqtlTrait import webqtlTrait from base.templatePage import templatePage from wqflask import parser @@ -43,14 +43,13 @@ from utility import formatting class SearchResultPage(templatePage): + #maxReturn = 3000 - maxReturn = 3000 - nkeywords = 0 def __init__(self, fd): print("initing SearchResultPage") - import logging_tree - logging_tree.printout() + #import logging_tree + #logging_tree.printout() self.fd = fd templatePage.__init__(self, fd) assert self.openMysql(), "Couldn't open MySQL" @@ -59,127 +58,40 @@ class SearchResultPage(templatePage): self.dataset = fd['dataset'] # change back to self.dataset - if not self.dataset or self.dataset == 'spacer': - #Error, No dataset selected - heading = "Search Result" - detail = ['''No dataset was selected for this search, please - go back and SELECT at least one dataset.'''] - self.error(heading=heading,detail=detail,error="No dataset Selected") - return + #if not self.dataset or self.dataset == 'spacer': + # #Error, No dataset selected + # heading = "Search Result" + # detail = ['''No dataset was selected for this search, please + # go back and SELECT at least one dataset.'''] + # self.error(heading=heading,detail=detail,error="No dataset Selected") + # return ########################################### # Names and IDs of RISet / F2 set ########################################### + + # All Phenotypes is a special case we'll deal with later if self.dataset == "All Phenotypes": self.cursor.execute(""" select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze, InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = PublishFreeze.InbredSetId""") results = self.cursor.fetchall() - self.dataset = map(lambda x: webqtlDataset(x[0], self.cursor), results) + self.dataset = map(lambda x: DataSet(x[0], self.cursor), results) self.dataset_groups = map(lambda x: x[1], results) self.dataset_group_ids = map(lambda x: x[2], results) - self.single_group = False else: print("self.dataset is:", pf(self.dataset)) - self.dataset = webqtlDataset(self.dataset, self.cursor) + self.dataset = create_dataset(self.db_conn, self.dataset) print("self.dataset is now:", pf(self.dataset)) - if self.dataset.type in ("Geno", "ProbeSet"): - db_type = self.dataset.type + "Freeze" - print("db_type [%s]: %s" % (type(db_type), db_type)) - - query = '''SELECT Id, Name, FullName, confidentiality, - AuthorisedUsers FROM %s WHERE Name = %%s''' % (db_type) - - self.cursor.execute(query, self.dataset.name) - - (indId, - indName, - indFullName, - confidential, - AuthorisedUsers) = self.cursor.fetchall()[0] - - if confidential: - # Allow confidential data later - NoConfindetialDataForYouTodaySorry - #access_to_confidential_dataset = 0 - # - ##for the dataset that confidentiality is 1 - ##1. 'admin' and 'root' can see all of the dataset - ##2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table) - #if webqtlConfig.USERDICT[self.privilege] > webqtlConfig.USERDICT['user']: - # access_to_confidential_dataset = 1 - #else: - # AuthorisedUsersList=AuthorisedUsers.split(',') - # if AuthorisedUsersList.__contains__(self.userName): - # access_to_confidential_dataset = 1 - # - #if not access_to_confidential_dataset: - # Some error - - #else: - # heading = "Search Result" - # detail = ['''The dataset has not been established yet, please - # go back and SELECT at least one dataset.'''] - # self.error(heading=heading,detail=detail,error="No dataset Selected") - # return - - self.dataset.get_group() - self.single_group = True - #XZ, August 24,2010: Since self.single_group = True, it's safe to assign one species Id. - self.species_id = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, - self.dataset.group) - - #self.db_type = self.dataset.type - if self.dataset.type == "Publish": - self.search_fields = ['Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors', - 'PublishXRef.Id'] - self.header_fields = ['', - 'ID', - 'Description', - 'Authors', - 'Year', - 'Max LRS', - 'Max LRS Location'] - - elif self.dataset.type == "ProbeSet": - self.search_fields = ['Name', - 'Description', - 'Probe_Target_Description', - 'Symbol', - 'Alias', - 'GenbankId', - 'UniGeneId', - 'RefSeq_TranscriptId'] - self.header_fields = ['', - 'ID', - 'Symbol', - 'Description', - 'Location', - 'Mean Expr', - 'Max LRS', - 'Max LRS Location'] - elif self.dataset.type == "Geno": - self.search_fields = ['Name', - 'Chr'] - self.header_fields = ['', - 'ID', - 'Location'] - + self.search() self.gen_search_result() def gen_search_result(self): - """Get the info displayed in the search result table from the set of results computed in + """ + Get the info displayed in the search result table from the set of results computed in the "search" function """ @@ -191,26 +103,19 @@ class SearchResultPage(templatePage): if not result: continue - seq = 1 group = self.dataset.group - species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, RISet=group) #### Excel file needs to be generated #### print("foo locals are:", locals()) trait_id = result[0] - this_trait = webqtlTrait(db=self.dataset, name=trait_id, cursor=self.cursor) + this_trait = webqtlTrait(self.db_conn, db=self.dataset, name=trait_id) this_trait.retrieveInfo(QTL=True) print("this_trait is:", pf(this_trait)) self.trait_list.append(this_trait) - - if self.dataset.type == "ProbeSet": - self.getTraitInfoForProbeSet(trait_list=self.trait_list, species=species) - elif self.dataset.type == "Publish": - self.getTraitInfoForPublish(trait_list=self.trait_list, species=species) - elif self.dataset.type == "Geno": - self.getTraitInfoForGeno(trait_list=self.trait_list) + + self.dataset.get_trait_info(self.trait_list, species) def search(self): @@ -222,7 +127,7 @@ class SearchResultPage(templatePage): print("[kodak] item is:", pf(a_search)) search_term = a_search['search_term'] if a_search['key']: - search_type = string.upper(a_search['key']) + search_type = a_search['key'].upper() else: # We fall back to the dataset type as the key to get the right object search_type = self.dataset.type @@ -258,187 +163,3 @@ class SearchResultPage(templatePage): keyword = string.replace(keyword,"?",".") wildcardkeyword[i] = keyword#'[[:<:]]'+ keyword+'[[:>:]]' return wildcardkeyword - - - def getTraitInfoForGeno(self, trait_list): - for this_trait in trait_list: - if not this_trait.haveinfo: - this_trait.retrieveInfo() - - #XZ: trait_location_value is used for sorting - trait_location_repr = 'N/A' - trait_location_value = 1000000 - - if this_trait.chr and this_trait.mb: - try: - trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - except: - if this_trait.chr.upper() == 'X': - trait_location_value = 20*1000 + this_trait.mb - else: - trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - - this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) ) - this_trait.location_value = trait_location_value - - - def getTraitInfoForPublish(self, trait_list, species = ''): - for this_trait in trait_list: - if not this_trait.haveinfo: - this_trait.retrieveInfo(QTL=1) - - description = this_trait.post_publication_description - if this_trait.confidential: - if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): - description = this_trait.pre_publication_description - this_trait.description_display = description - - if not this_trait.year.isdigit(): - this_trait.pubmed_text = "N/A" - - if this_trait.pubmed_id: - this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id - - #LRS and its location - this_trait.LRS_score_repr = "N/A" - this_trait.LRS_score_value = 0 - this_trait.LRS_location_repr = "N/A" - this_trait.LRS_location_value = 1000000 - - if this_trait.lrs: - self.cursor.execute(""" - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, this_trait.locus)) - result = self.cursor.fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) - - - def getTraitInfoForProbeSet(self, trait_list=None, species=''): - - # Note: setting trait_list to [] is probably not a great idea. - if not trait_list: - trait_list = [] - - for this_trait in trait_list: - - if not this_trait.haveinfo: - this_trait.retrieveInfo(QTL=1) - - if this_trait.symbol: - pass - else: - this_trait.symbol = "N/A" - - #XZ, 12/08/2008: description - #XZ, 06/05/2009: Rob asked to add probe target description - description_string = str(this_trait.description).strip() - target_string = str(this_trait.probe_target_description).strip() - - description_display = '' - - if len(description_string) > 1 and description_string != 'None': - description_display = description_string - else: - description_display = this_trait.symbol - - if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': - description_display = description_display + '; ' + target_string.strip() - - # Save it for the jinja2 tablet - this_trait.description_display = description_display - - #XZ: trait_location_value is used for sorting - trait_location_repr = 'N/A' - trait_location_value = 1000000 - - if this_trait.chr and this_trait.mb: - try: - trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - except: - if this_trait.chr.upper() == 'X': - trait_location_value = 20*1000 + this_trait.mb - else: - trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) ) - this_trait.location_value = trait_location_value - #this_trait.trait_location_value = trait_location_value - - #XZ, 01/12/08: This SQL query is much faster. - query = ( -"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' - """ % (self.db_conn.escape_string(str(this_trait.db.id)), - self.db_conn.escape_string(this_trait.name))) - - print("query is:", pf(query)) - - self.cursor.execute(query) - result = self.cursor.fetchone() - - if result: - if result[0]: - mean = result[0] - else: - mean=0 - else: - mean = 0 - - #XZ, 06/05/2009: It is neccessary to turn on nowrap - this_trait.mean = repr = "%2.3f" % mean - - #LRS and its location - this_trait.LRS_score_repr = 'N/A' - this_trait.LRS_score_value = 0 - this_trait.LRS_location_repr = 'N/A' - this_trait.LRS_location_value = 1000000 - - #Max LRS and its Locus location - if this_trait.lrs and this_trait.locus: - self.cursor.execute(""" - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, this_trait.locus)) - result = self.cursor.fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) -- cgit v1.2.3