From d2e64633a40ecab55c9cedd46b0a8f9e93761aad Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 9 Apr 2013 23:18:05 +0000 Subject: Have quick search working for phenotypes --- wqflask/maintenance/quick_search_table.py | 133 +++++++++++++++++++++++++++--- wqflask/wqflask/do_search.py | 84 +++++++++---------- wqflask/wqflask/search_results.py | 113 ++++++++++++++++--------- wqflask/wqflask/views.py | 5 +- 4 files changed, 245 insertions(+), 90 deletions(-) (limited to 'wqflask') diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 48697e58..a6ca6265 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -88,7 +88,7 @@ class PublishXRef(Base): "PublishXRef.PhenotypeId = Phenotype.Id and " "PublishXRef.PublicationId = Publication.Id ").params(publishxref_id=publishxref_id, inbredset_id=inbredset_id).all() - + unique = set() for item in results[0]: #print("locals:", locals()) @@ -110,11 +110,11 @@ class PublishXRef(Base): print("\n-- UDE \n") # Can't get it into utf-8, we won't use it continue - + unique.add(token) print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) - + @staticmethod def get_result_fields(publishxref_id, inbredset_id): results = Session.query( @@ -153,9 +153,9 @@ class PublishXRef(Base): # inbredset_id=inbredset_id).all() for result in results: print("****", result) - + assert len(set(result for result in results)) == 1, "Different results or no results" - + print("results are:", results) result = results[0] result = row2dict(result) @@ -170,10 +170,123 @@ class PublishXRef(Base): result[key] = value.decode('utf-8', errors='ignore') json_results = json.dumps(result, sort_keys=True) - return json_results - + return json_results + +class GenoXRef(Base): + __tablename__ = 'ProbeSetXRef' + GenoFreezeId = sa.Column(sa.Integer, primary_key=True) + GenoId = sa.Column(sa.Integer, primary_key=True) + DataId = sa.Column(sa.Integer) + cM = sa.Column(sa.Float) + Used_for_mapping = sa.Column(sa.Text) + + @classmethod + def run(cls): + conn = Engine.connect() + counter = 0 + for item in page_query(Session.query(cls)): #all() + values = {} + values['table_name'] = cls.__tablename__ + values['the_key'] = json.dumps([item.GenoId, item.GenoFreezeId]) + values['terms'] = cls.get_unique_terms(item.GenoId) + print("terms is:", values['terms']) + values['result_fields'] = cls.get_result_fields(item.GenoId, item.GenoFreezeId) + ins = QuickSearch.insert().values(**values) + conn.execute(ins) + counter += 1 + print("Done:", counter) + @staticmethod + def get_unique_terms(geno_id): + results = Session.query( + "name", + "marker_name" + ).from_statement( + "SELECT Geno.Name as name, " + "Geno.Marker_Name as marker_name " + "FROM Geno " + "WHERE Geno.Id = :geno_id ").params(geno_id=geno_id).all() + + unique = set() + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) + print("\nUnique terms are: {}\n".format(unique)) + return " ".join(unique) + + + @staticmethod + def get_result_fields(geno_id, dataset_id): + results = Session.query( + "name", + "species", + "group_name", + "dataset", + "dataset_name", + "symbol", + "description", + "chr", "mb", + "lrs", + "genbank_id", + "gene_id", + "chip_id", + "chip_name" + ).from_statement( + "SELECT Geno.Name as name, " + "Geno.Marker_Name as marker_name, " + "InbredSet.Name as group_name, " + "Species.Name as species, " + "GenoFreeze.Name as dataset, " + "GenoFreeze.FullName as dataset_name, " + "Geno.Chr as chr, " + "Geno.Mb as mb, " + "Geno.Source as source " + "FROM Geno, " + "GenoXRef, " + "GenoFreeze, " + "InbredSet, " + "Species " + "WHERE Geno.Id = :geno_id and " + "GenoXRef.GenoId = Geno.Id and " + "GenoFreeze.Id = :dataset_id and " + "GenoXRef.GenoFreezeId = GenoFreeze.Id and " + "InbredSet.Id = GenoFreeze.InbredSetId and " + "InbredSet.SpeciesId = Species.Id ").params(geno_id=geno_id, + dataset_id=dataset_id).all() + for result in results: + print(result) + assert len(set(result for result in results)) == 1, "Different results" + + print("results are:", results) + result = results[0] + result = row2dict(result) + try: + json_results = json.dumps(result, sort_keys=True) + except UnicodeDecodeError: + print("\n\nTrying to massage unicode\n\n") + for key, value in result.iteritems(): + print("\tkey is:", key) + print("\tvalue is:", value) + if isinstance(value, basestring): + result[key] = value.decode('utf-8', errors='ignore') + json_results = json.dumps(result, sort_keys=True) + + return json_results + class ProbeSetXRef(Base): __tablename__ = 'ProbeSetXRef' @@ -255,7 +368,7 @@ class ProbeSetXRef(Base): results = Session.query( "name", "species", - "group", + "group_name", "dataset", "dataset_name", "symbol", @@ -269,7 +382,7 @@ class ProbeSetXRef(Base): ).from_statement( "SELECT ProbeSet.Name as name, " "Species.Name as species, " - "InbredSet.Name as group, " + "InbredSet.Name as group_name, " "ProbeSetFreeze.Name as dataset, " "ProbeSetFreeze.FullName as dataset_name, " "ProbeSet.Symbol as symbol, " @@ -350,8 +463,8 @@ def page_query(q): def main(): - PublishXRef.run() ProbeSetXRef.run() + PublishXRef.run() if __name__ == "__main__": main() \ No newline at end of file diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index fc65eb49..1b1b56fb 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -235,48 +235,48 @@ class PhenotypeSearch(DoSearch): return self.execute(query) -#class QuickPhenotypeSearch(PhenotypeSearch): -# """A search across all phenotype datasets""" -# -# DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch" -# -# base_query = """SELECT Species.Name as Species_Name, -# PublishFreeze.FullName as Dataset_Name, -# PublishFreeze.Name, -# PublishXRef.Id, -# PublishFreeze.createtime as thistable, -# Publication.PubMed_ID as Publication_PubMed_ID, -# Phenotype.Post_publication_description as Phenotype_Name -# FROM Phenotype, -# PublishFreeze, -# Publication, -# PublishXRef, -# InbredSet, -# Species """ -# -# search_fields = ('Phenotype.Post_publication_description', -# 'Phenotype.Pre_publication_description', -# 'Phenotype.Pre_publication_abbreviation', -# 'Phenotype.Post_publication_abbreviation', -# 'Phenotype.Lab_code', -# 'Publication.PubMed_ID', -# 'Publication.Abstract', -# 'Publication.Title', -# 'Publication.Authors') -# -# def compile_final_query(self, where_clause = ''): -# """Generates the final query string""" -# -# query = (self.base_query + -# """WHERE %s -# PublishXRef.PhenotypeId = Phenotype.Id and -# PublishXRef.PublicationId = Publication.Id and -# PublishXRef.InbredSetId = InbredSet.Id and -# InbredSet.SpeciesId = Species.Id""" % where_clause) -# -# print("query is:", pf(query)) -# -# return query +class QuickPhenotypeSearch(PhenotypeSearch): + """A search across all phenotype datasets""" + + DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch" + + base_query = """SELECT Species.Name as Species_Name, + PublishFreeze.FullName as Dataset_Name, + PublishFreeze.Name, + PublishXRef.Id, + PublishFreeze.createtime as thistable, + Publication.PubMed_ID as Publication_PubMed_ID, + Phenotype.Post_publication_description as Phenotype_Name + FROM Phenotype, + PublishFreeze, + Publication, + PublishXRef, + InbredSet, + Species """ + + search_fields = ('Phenotype.Post_publication_description', + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors') + + def compile_final_query(self, where_clause = ''): + """Generates the final query string""" + + query = (self.base_query + + """WHERE %s + PublishXRef.PhenotypeId = Phenotype.Id and + PublishXRef.PublicationId = Publication.Id and + PublishXRef.InbredSetId = InbredSet.Id and + InbredSet.SpeciesId = Species.Id""" % where_clause) + + print("query is:", pf(query)) + + return query def run(self): """Generates and runs a search across all phenotype datasets""" diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 43c68942..499782ac 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -13,10 +13,14 @@ import time #import pp - Note from Sam: is this used? import math import datetime +import collections from pprint import pformat as pf +import json + from flask import Flask, g +from MySQLdb import escape_string as escape # Instead of importing HT we're going to build a class below until we can eliminate it from htmlgen import HTMLgen2 as HT @@ -58,19 +62,22 @@ class SearchResultPage(): # self.dataset_group_ids = map(lambda x: x[2], results) #else: - self.results = [] + self.quick = False if 'q' in kw: - #self.quick_search = True + self.results = {} + self.quick = True self.search_terms = kw['q'] print("self.search_terms is: ", self.search_terms) self.quick_search() else: + self.results = [] #self.quick_search = False self.search_terms = kw['search_terms'] self.dataset = create_dataset(kw['dataset']) self.search() - self.gen_search_result() + self.gen_search_result() + def gen_search_result(self): @@ -81,7 +88,7 @@ class SearchResultPage(): """ self.trait_list = [] - species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) + species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term @@ -101,39 +108,71 @@ class SearchResultPage(): self.dataset.get_trait_info(self.trait_list, species) def quick_search(self): - self.search_terms = parser.parse(self.search_terms) - print("After parsing:", self.search_terms) - - search_types = ["quick_phenotype", "quick_mrna_assay"] - - for search_category in search_types: - search_ob = do_search.DoSearch.get_search(search_category) - search_class = getattr(do_search, search_ob) - for a_search in self.search_terms: - search_term = a_search['search_term'] - the_search = search_class(search_term) - self.results.extend(the_search.run()) - print("in the search results are:", self.results) - - #for a_search in self.search_terms: - # search_term = a_search['search_term'] - # - # #Do mRNA assay search - # search_ob = do_search.DoSearch.get_search("quick_mrna_assay") - # search_class = getattr(do_search, search_ob) - # the_search = search_class(search_term) - # - # self.results.extend(the_search.run()) - # print("in the search results are:", self.results) - - - #return True - - #search_gene - #search_geno - #searhch_pheno - #search_mrn - #searhc_publish + #search_terms = "" + #for term in self.search_terms.split(): + # search_terms += '+{} '.format(term) + + search_terms = ' '.join('+{}'.format(escape(term)) for term in self.search_terms.split()) + print("search_terms are:", search_terms) + + query = """ SELECT table_name, the_key, result_fields + FROM QuickSearch + WHERE MATCH (terms) + AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms) + dbresults = g.db.execute(query, no_parameters=True).fetchall() + #print("results: ", pf(results)) + + self.results = collections.defaultdict(list) + + type_dict = {'PublishXRef': 'phenotype', + 'ProbesetXRef': 'mrna_assay', + 'GenoXRef': 'genotype'} + + for dbresult in dbresults: + this_result = {} + this_result['table_name'] = dbresult.table_name + this_result['key'] = dbresult.the_key + this_result['result_fields'] = json.loads(dbresult.result_fields) + + self.results[type_dict[dbresult.table_name]].append(this_result) + + print("results: ", pf(self.results['phenotype'])) + + #def quick_search(self): + # self.search_terms = parser.parse(self.search_terms) + # + # search_types = ["quick_mrna_assay", "quick_phenotype"] + # + # for search_category in search_types: + # these_results = [] + # search_ob = do_search.DoSearch.get_search(search_category) + # search_class = getattr(do_search, search_ob) + # for a_search in self.search_terms: + # search_term = a_search['search_term'] + # the_search = search_class(search_term) + # these_results.extend(the_search.run()) + # print("in the search results are:", self.results) + # self.results[search_category] = these_results + # + # #for a_search in self.search_terms: + # # search_term = a_search['search_term'] + # # + # # #Do mRNA assay search + # # search_ob = do_search.DoSearch.get_search("quick_mrna_assay") + # # search_class = getattr(do_search, search_ob) + # # the_search = search_class(search_term) + # # + # # self.results.extend(the_search.run()) + # # print("in the search results are:", self.results) + # + # + # #return True + # + # #search_gene + # #search_geno + # #searhch_pheno + # #search_mrn + # #searhc_publish def search(self): diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index eb7ae8f8..7a504c54 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -83,7 +83,10 @@ def search_page(): #for trait in the_search.trait_list: # print(" -", trait.description_display) - return render_template("search_result_page.html", **the_search.__dict__) + if the_search.quick: + return render_template("quick_search.html", **the_search.__dict__) + else: + return render_template("search_result_page.html", **the_search.__dict__) @app.route("/whats_new") -- cgit v1.2.3