From f0aadf83cf4a26c522887a89d4d09f9b4c01c704 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 1 Jun 2016 20:03:21 +0000 Subject: Changed global gene search to no longer use server-side processing --- wqflask/wqflask/gsearch.py | 166 ++++++++++++++-------------- wqflask/wqflask/search_results.py | 136 ++++------------------- wqflask/wqflask/templates/gsearch_gene.html | 32 ++++-- wqflask/wqflask/views.py | 4 +- 4 files changed, 123 insertions(+), 215 deletions(-) mode change 100755 => 100644 wqflask/wqflask/search_results.py diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 4f9dc316..4cd3874c 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -1,94 +1,94 @@ from __future__ import absolute_import, print_function, division from flask import Flask, g -#from base.data_set import create_dataset -#from base.trait import GeneralTrait -#from dbFunction import webqtlDatabaseFunction +from base.data_set import create_dataset +from base.trait import GeneralTrait +from dbFunction import webqtlDatabaseFunction -#from utility.benchmark import Bench +from utility.benchmark import Bench class GSearch(object): def __init__(self, kw): self.type = kw['type'] self.terms = kw['terms'] - # if self.type == "gene": - # sql = """ - # SELECT - # Species.`Name` AS species_name, - # InbredSet.`Name` AS inbredset_name, - # Tissue.`Name` AS tissue_name, - # ProbeSetFreeze.Name AS probesetfreeze_name, - # ProbeSet.Name AS probeset_name, - # ProbeSet.Symbol AS probeset_symbol, - # ProbeSet.`description` AS probeset_description, - # ProbeSet.Chr AS chr, - # ProbeSet.Mb AS mb, - # ProbeSetXRef.Mean AS mean, - # ProbeSetXRef.LRS AS lrs, - # ProbeSetXRef.`Locus` AS locus, - # ProbeSetXRef.`pValue` AS pvalue, - # ProbeSetXRef.`additive` AS additive - # FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue - # WHERE InbredSet.`SpeciesId`=Species.`Id` - # AND ProbeFreeze.InbredSetId=InbredSet.`Id` - # AND ProbeFreeze.`TissueId`=Tissue.`Id` - # AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id - # AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) - # AND ProbeSet.Id = ProbeSetXRef.ProbeSetId - # AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id - # AND ProbeSetFreeze.public > 0 - # ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name - # LIMIT 6000 - # """ % (self.terms) - # with Bench("Running query"): - # re = g.db.execute(sql).fetchall() - # self.trait_list = [] - # with Bench("Creating trait objects"): - # for line in re: - # dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) - # trait_id = line[4] - # with Bench("Building trait object"): - # this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - # self.trait_list.append(this_trait) + if self.type == "gene": + sql = """ + SELECT + Species.`Name` AS species_name, + InbredSet.`Name` AS inbredset_name, + Tissue.`Name` AS tissue_name, + ProbeSetFreeze.Name AS probesetfreeze_name, + ProbeSet.Name AS probeset_name, + ProbeSet.Symbol AS probeset_symbol, + ProbeSet.`description` AS probeset_description, + ProbeSet.Chr AS chr, + ProbeSet.Mb AS mb, + ProbeSetXRef.Mean AS mean, + ProbeSetXRef.LRS AS lrs, + ProbeSetXRef.`Locus` AS locus, + ProbeSetXRef.`pValue` AS pvalue, + ProbeSetXRef.`additive` AS additive + FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue + WHERE InbredSet.`SpeciesId`=Species.`Id` + AND ProbeFreeze.InbredSetId=InbredSet.`Id` + AND ProbeFreeze.`TissueId`=Tissue.`Id` + AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id + AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) + AND ProbeSet.Id = ProbeSetXRef.ProbeSetId + AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id + AND ProbeSetFreeze.public > 0 + ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name + LIMIT 6000 + """ % (self.terms) + with Bench("Running query"): + re = g.db.execute(sql).fetchall() + self.trait_list = [] + with Bench("Creating trait objects"): + for line in re: + dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) + trait_id = line[4] + #with Bench("Building trait object"): + this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + self.trait_list.append(this_trait) - # elif self.type == "phenotype": - # sql = """ - # SELECT - # Species.`Name`, - # InbredSet.`Name`, - # PublishFreeze.`Name`, - # PublishXRef.`Id`, - # Phenotype.`Post_publication_description`, - # Publication.`Authors`, - # Publication.`Year`, - # PublishXRef.`LRS`, - # PublishXRef.`Locus`, - # PublishXRef.`additive` - # FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication - # WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` - # AND PublishFreeze.`InbredSetId`=InbredSet.`Id` - # AND InbredSet.`SpeciesId`=Species.`Id` - # AND PublishXRef.`PhenotypeId`=Phenotype.`Id` - # AND PublishXRef.`PublicationId`=Publication.`Id` - # AND (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" - # OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" - # OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" - # OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" - # OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" - # OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" - # OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" - # OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" - # OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" - # OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]") - # ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` - # LIMIT 6000 - # """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms) - # re = g.db.execute(sql).fetchall() - # self.trait_list = [] - # with Bench("Creating trait objects"): - # for line in re: - # dataset = create_dataset(line[2], "Publish") - # trait_id = line[3] - # this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - # self.trait_list.append(this_trait) + elif self.type == "phenotype": + sql = """ + SELECT + Species.`Name`, + InbredSet.`Name`, + PublishFreeze.`Name`, + PublishXRef.`Id`, + Phenotype.`Post_publication_description`, + Publication.`Authors`, + Publication.`Year`, + PublishXRef.`LRS`, + PublishXRef.`Locus`, + PublishXRef.`additive` + FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication + WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` + AND PublishFreeze.`InbredSetId`=InbredSet.`Id` + AND InbredSet.`SpeciesId`=Species.`Id` + AND PublishXRef.`PhenotypeId`=Phenotype.`Id` + AND PublishXRef.`PublicationId`=Publication.`Id` + AND (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" + OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" + OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" + OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]") + ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` + LIMIT 6000 + """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms) + re = g.db.execute(sql).fetchall() + self.trait_list = [] + with Bench("Creating trait objects"): + for line in re: + dataset = create_dataset(line[2], "Publish") + trait_id = line[3] + this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + self.trait_list.append(this_trait) diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py old mode 100755 new mode 100644 index 9941a4d3..8a0485b3 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -63,40 +63,29 @@ class SearchResultPage(object): # self.dataset_group_ids = map(lambda x: x[2], results) #else: - self.quick = False - self.uc_id = uuid.uuid4() print("uc_id:", self.uc_id) - if 'q' in kw: - self.results = {} - self.quick = True - self.search_terms = kw['q'] - print("self.search_terms is: ", self.search_terms) - self.trait_type = kw['trait_type'] - self.quick_search() + print("kw is:", kw) + if kw['search_terms_or']: + self.and_or = "or" + self.search_terms = kw['search_terms_or'] else: - print("kw is:", kw) - if kw['search_terms_or']: - self.and_or = "or" - self.search_terms = kw['search_terms_or'] - else: - self.and_or = "and" - self.search_terms = kw['search_terms_and'] - self.search_term_exists = True - self.results = [] - if kw['type'] == "Phenotypes": - dataset_type = "Publish" - elif kw['type'] == "Genotypes": - dataset_type = "Geno" - else: - dataset_type = "ProbeSet" - self.dataset = create_dataset(kw['dataset'], dataset_type) - print("KEYWORD:", self.search_terms) - self.search() - if self.search_term_exists: - self.gen_search_result() - + self.and_or = "and" + self.search_terms = kw['search_terms_and'] + self.search_term_exists = True + self.results = [] + if kw['type'] == "Phenotypes": + dataset_type = "Publish" + elif kw['type'] == "Genotypes": + dataset_type = "Geno" + else: + dataset_type = "ProbeSet" + self.dataset = create_dataset(kw['dataset'], dataset_type) + print("KEYWORD:", self.search_terms) + self.search() + if self.search_term_exists: + self.gen_search_result() def gen_search_result(self): @@ -124,56 +113,6 @@ class SearchResultPage(object): self.trait_list.append(this_trait) self.dataset.get_trait_info(self.trait_list, species) - - def quick_search(self): - #search_terms = "" - #for term in self.search_terms.split(): - # search_terms += '+{} '.format(term) - - search_terms = ' '.join('+{}'.format(escape(term)) for term in self.search_terms.split()) - print("search_terms are:", search_terms) - - query = """ SELECT table_name, the_key, result_fields - FROM QuickSearch - WHERE MATCH (terms) - AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms) - - with Bench("Doing QuickSearch Query: "): - dbresults = g.db.execute(query, no_parameters=True).fetchall() - #print("results: ", pf(results)) - - self.results = collections.defaultdict(list) - - type_dict = {'PublishXRef': 'phenotype', - 'ProbeSetXRef': 'mrna_assay', - 'GenoXRef': 'genotype'} - - self.species_groups = {} - - for dbresult in dbresults: - this_result = {} - this_result['table_name'] = dbresult.table_name - if self.trait_type == type_dict[dbresult.table_name] or self.trait_type == 'all': - this_result['key'] = dbresult.the_key - this_result['result_fields'] = json.loads(dbresult.result_fields) - this_species = this_result['result_fields']['species'] - this_group = this_result['result_fields']['group_name'] - if this_species not in self.species_groups: - self.species_groups[this_species] = {} - if type_dict[dbresult.table_name] not in self.species_groups[this_species]: - self.species_groups[this_species][type_dict[dbresult.table_name]] = [] - if this_group not in self.species_groups[this_species][type_dict[dbresult.table_name]]: - self.species_groups[this_species][type_dict[dbresult.table_name]].append(this_group) - #if type_dict[dbresult.table_name] not in self.species_groups: - # self.species_groups[type_dict[dbresult.table_name]] = {} - #if this_species not in self.species_groups[type_dict[dbresult.table_name]]: - # self.species_groups[type_dict[dbresult.table_name]][this_species] = [] - #if this_group not in self.species_groups[type_dict[dbresult.table_name]][this_species]: - # self.species_groups[type_dict[dbresult.table_name]][this_species].append(this_group) - self.results[type_dict[dbresult.table_name]].append(this_result) - - import redis - Redis = redis.Redis() #def get_group_species_tree(self): # self.species_groups = collections.default_dict(list) @@ -183,43 +122,6 @@ class SearchResultPage(object): # item['result_fields']['group_name']) - #def quick_search(self): - # self.search_terms = parser.parse(self.search_terms) - # - # search_types = ["quick_mrna_assay", "quick_phenotype"] - # - # for search_category in search_types: - # these_results = [] - # search_ob = do_search.DoSearch.get_search(search_category) - # search_class = getattr(do_search, search_ob) - # for a_search in self.search_terms: - # search_term = a_search['search_term'] - # the_search = search_class(search_term) - # these_results.extend(the_search.run()) - # print("in the search results are:", self.results) - # self.results[search_category] = these_results - # - # #for a_search in self.search_terms: - # # search_term = a_search['search_term'] - # # - # # #Do mRNA assay search - # # search_ob = do_search.DoSearch.get_search("quick_mrna_assay") - # # search_class = getattr(do_search, search_ob) - # # the_search = search_class(search_term) - # # - # # self.results.extend(the_search.run()) - # # print("in the search results are:", self.results) - # - # - # #return True - # - # #search_gene - # #search_geno - # #search_pheno - # #search_mrn - # #search_publish - - def search(self): self.search_terms = parser.parse(self.search_terms) print("After parsing:", self.search_terms) diff --git a/wqflask/wqflask/templates/gsearch_gene.html b/wqflask/wqflask/templates/gsearch_gene.html index 92b0b411..37e26817 100755 --- a/wqflask/wqflask/templates/gsearch_gene.html +++ b/wqflask/wqflask/templates/gsearch_gene.html @@ -43,6 +43,25 @@