From ba2909f7dd931688f344536c422bb4ac27a3a183 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 11 Apr 2016 17:15:18 +0000 Subject: Changed the way trait data is retrieved for the global search page (it now gets the LRS location repr and other variables needed to display in table during the retrieve_info function in trait.py instead of retrieve_trait_info in dataset.py) This change increases the speed by a bit (85 seconds to 66 seconds for example) Made the column width for location a bit wider for global search page so it doesn't spill onto a second line --- wqflask/base/trait.py | 78 ++++++++++--- wqflask/wqflask/gsearch.py | 168 +++++++++++++-------------- wqflask/wqflask/templates/gsearch_pheno.html | 2 +- 3 files changed, 147 insertions(+), 101 deletions(-) mode change 100755 => 100644 wqflask/base/trait.py mode change 100755 => 100644 wqflask/wqflask/gsearch.py (limited to 'wqflask') diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py old mode 100755 new mode 100644 index acff38d9..ce3a7608 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -256,10 +256,10 @@ class GeneralTrait(object): PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - - #print("query is:", query) trait_info = g.db.execute(query).fetchone() + + #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif self.dataset.type == 'ProbeSet': @@ -294,7 +294,6 @@ class GeneralTrait(object): escape(self.dataset.name), escape(self.name)) trait_info = g.db.execute(query).fetchone() - #print("trait_info is: ", pf(trait_info)) else: #Temp type query = """SELECT %s FROM %s WHERE Name = %s""" trait_info = g.db.execute(query, @@ -305,7 +304,6 @@ class GeneralTrait(object): #XZ: assign SQL query result to trait attributes. for i, field in enumerate(self.dataset.display_fields): - #print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i])) holder = trait_info[i] if isinstance(trait_info[i], basestring): holder = unicode(trait_info[i], "utf8", "ignore") @@ -315,12 +313,37 @@ class GeneralTrait(object): self.confidential = 0 if self.pre_publication_description and not self.pubmed_id: self.confidential = 1 + + description = self.post_publication_description + + #If the dataset is confidential and the user has access to confidential + #phenotype traits, then display the pre-publication description instead + #of the post-publication description + if self.confidential: + self.description_display = "" + + #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( + # privilege=self.dataset.privilege, + # userName=self.dataset.userName, + # authorized_users=self.authorized_users): + # + # description = self.pre_publication_description + + if len(description) > 0: + self.description_display = description.strip() + else: + self.description_display = "" + if not self.year.isdigit(): + self.pubmed_text = "N/A" + else: + self.pubmed_text = self.year + + if self.pubmed_id: + self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id + self.homologeneid = None - #print("self.geneid is:", self.geneid) - #print(" type:", type(self.geneid)) - #print("self.dataset.group.name is:", self.dataset.group.name) if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. @@ -330,10 +353,7 @@ class GeneralTrait(object): # geneidIsNumber = True #except ValueError: # geneidIsNumber = False - #if geneidIsNumber: - - query = """ SELECT HomologeneId @@ -353,6 +373,11 @@ class GeneralTrait(object): self.homologeneid = result[0] if get_qtl_info: + #LRS and its location + self.LRS_score_repr = "N/A" + self.LRS_score_value = 0 + self.LRS_location_repr = "N/A" + self.LRS_location_value = 1000000 if self.dataset.type == 'ProbeSet' and not self.cellid: query = """ SELECT @@ -365,12 +390,8 @@ class GeneralTrait(object): ProbeSetXRef.ProbeSetFreezeId ={} """.format(self.name, self.dataset.id) trait_qtl = g.db.execute(query).fetchone() - #self.cursor.execute(query) - #trait_qtl = self.cursor.fetchone() if trait_qtl: - #print("trait_qtl:", trait_qtl) self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl - #print("self.locus:", self.locus) if self.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species @@ -403,8 +424,37 @@ class GeneralTrait(object): """, (self.name, self.dataset.id)).fetchone() if trait_qtl: self.locus, self.lrs, self.additive = trait_qtl + if self.locus: + query = """ + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '{}' and + Geno.Name = '{}' and + Geno.SpeciesId = Species.Id + """.format(self.dataset.group.species, self.locus) + result = g.db.execute(query).fetchone() + if result: + self.locus_chr = result[0] + self.locus_mb = result[1] + else: + self.locus = self.locus_chr = self.locus_mb = "" + else: + self.locus = self.locus_chr = self.locus_mb = "" else: self.locus = self.lrs = self.additive = "" + + if self.locus_chr != "" and self.locus_mb != "": + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(self.locus_chr)*1000 + float(self.locus_mb) + except: + if self.locus_chr.upper() == 'X': + LRS_location_value = 20*1000 + float(self.locus_mb) + else: + LRS_location_value = ord(str(self.locus_chr).upper()[0])*1000 + float(self.locus_mb) + + self.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (self.locus_chr, float(self.locus_mb)) + self.LRS_score_repr = LRS_score_repr = '%3.1f' % self.lrs + self.LRS_score_value = LRS_score_value = self.lrs else: raise KeyError, `self.name`+' information is not found in the database.' diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py old mode 100755 new mode 100644 index 179f054e..4cd3874c --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -9,90 +9,86 @@ from utility.benchmark import Bench class GSearch(object): - def __init__(self, kw): - self.type = kw['type'] - self.terms = kw['terms'] - if self.type == "gene": - sql = """ - SELECT - Species.`Name` AS species_name, - InbredSet.`Name` AS inbredset_name, - Tissue.`Name` AS tissue_name, - ProbeSetFreeze.Name AS probesetfreeze_name, - ProbeSet.Name AS probeset_name, - ProbeSet.Symbol AS probeset_symbol, - ProbeSet.`description` AS probeset_description, - ProbeSet.Chr AS chr, - ProbeSet.Mb AS mb, - ProbeSetXRef.Mean AS mean, - ProbeSetXRef.LRS AS lrs, - ProbeSetXRef.`Locus` AS locus, - ProbeSetXRef.`pValue` AS pvalue, - ProbeSetXRef.`additive` AS additive - FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue - WHERE InbredSet.`SpeciesId`=Species.`Id` - AND ProbeFreeze.InbredSetId=InbredSet.`Id` - AND ProbeFreeze.`TissueId`=Tissue.`Id` - AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id - AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) - AND ProbeSet.Id = ProbeSetXRef.ProbeSetId - AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id - AND ProbeSetFreeze.public > 0 - ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name - LIMIT 6000 - """ % (self.terms) - with Bench("Running query"): - re = g.db.execute(sql).fetchall() - self.trait_list = [] - with Bench("Creating trait objects"): - for line in re: - dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) - trait_id = line[4] - #with Bench("Building trait object"): - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - self.trait_list.append(this_trait) - species = webqtlDatabaseFunction.retrieve_species(dataset.group.name) - #with Bench("Getting trait info"): - dataset.get_trait_info([this_trait], species) + def __init__(self, kw): + self.type = kw['type'] + self.terms = kw['terms'] + if self.type == "gene": + sql = """ + SELECT + Species.`Name` AS species_name, + InbredSet.`Name` AS inbredset_name, + Tissue.`Name` AS tissue_name, + ProbeSetFreeze.Name AS probesetfreeze_name, + ProbeSet.Name AS probeset_name, + ProbeSet.Symbol AS probeset_symbol, + ProbeSet.`description` AS probeset_description, + ProbeSet.Chr AS chr, + ProbeSet.Mb AS mb, + ProbeSetXRef.Mean AS mean, + ProbeSetXRef.LRS AS lrs, + ProbeSetXRef.`Locus` AS locus, + ProbeSetXRef.`pValue` AS pvalue, + ProbeSetXRef.`additive` AS additive + FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue + WHERE InbredSet.`SpeciesId`=Species.`Id` + AND ProbeFreeze.InbredSetId=InbredSet.`Id` + AND ProbeFreeze.`TissueId`=Tissue.`Id` + AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id + AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) + AND ProbeSet.Id = ProbeSetXRef.ProbeSetId + AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id + AND ProbeSetFreeze.public > 0 + ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name + LIMIT 6000 + """ % (self.terms) + with Bench("Running query"): + re = g.db.execute(sql).fetchall() + self.trait_list = [] + with Bench("Creating trait objects"): + for line in re: + dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) + trait_id = line[4] + #with Bench("Building trait object"): + this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + self.trait_list.append(this_trait) - elif self.type == "phenotype": - sql = """ - SELECT - Species.`Name`, - InbredSet.`Name`, - PublishFreeze.`Name`, - PublishXRef.`Id`, - Phenotype.`Post_publication_description`, - Publication.`Authors`, - Publication.`Year`, - PublishXRef.`LRS`, - PublishXRef.`Locus`, - PublishXRef.`additive` - FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication - WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` - AND PublishFreeze.`InbredSetId`=InbredSet.`Id` - AND InbredSet.`SpeciesId`=Species.`Id` - AND PublishXRef.`PhenotypeId`=Phenotype.`Id` - AND PublishXRef.`PublicationId`=Publication.`Id` - AND (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" - OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" - OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" - OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" - OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" - OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" - OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" - OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" - OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" - OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]") - ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` - LIMIT 6000 - """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms) - re = g.db.execute(sql).fetchall() - self.trait_list = [] - for line in re: - dataset = create_dataset(line[2], "Publish") - trait_id = line[3] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - self.trait_list.append(this_trait) - species = webqtlDatabaseFunction.retrieve_species(dataset.group.name) - dataset.get_trait_info([this_trait], species) + elif self.type == "phenotype": + sql = """ + SELECT + Species.`Name`, + InbredSet.`Name`, + PublishFreeze.`Name`, + PublishXRef.`Id`, + Phenotype.`Post_publication_description`, + Publication.`Authors`, + Publication.`Year`, + PublishXRef.`LRS`, + PublishXRef.`Locus`, + PublishXRef.`additive` + FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication + WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` + AND PublishFreeze.`InbredSetId`=InbredSet.`Id` + AND InbredSet.`SpeciesId`=Species.`Id` + AND PublishXRef.`PhenotypeId`=Phenotype.`Id` + AND PublishXRef.`PublicationId`=Publication.`Id` + AND (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" + OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]") + ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` + LIMIT 6000 + """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms) + re = g.db.execute(sql).fetchall() + self.trait_list = [] + with Bench("Creating trait objects"): + for line in re: + dataset = create_dataset(line[2], "Publish") + trait_id = line[3] + this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + self.trait_list.append(this_trait) diff --git a/wqflask/wqflask/templates/gsearch_pheno.html b/wqflask/wqflask/templates/gsearch_pheno.html index 0bdc4f4e..3c9f6841 100755 --- a/wqflask/wqflask/templates/gsearch_pheno.html +++ b/wqflask/wqflask/templates/gsearch_pheno.html @@ -137,7 +137,7 @@ { "type": "natural" }, { "type": "natural" }, { "type": "natural" }, - { "type": "natural" }, + { "type": "natural", "width": "12%"}, { "type": "natural" } ], "order": [[ 1, "asc" ]], -- cgit v1.2.3