aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/maintenance/quick_search_table.py133
-rw-r--r--wqflask/wqflask/do_search.py84
-rw-r--r--wqflask/wqflask/search_results.py113
-rw-r--r--wqflask/wqflask/views.py5
4 files changed, 245 insertions, 90 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 48697e58..a6ca6265 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -88,7 +88,7 @@ class PublishXRef(Base):
"PublishXRef.PhenotypeId = Phenotype.Id and "
"PublishXRef.PublicationId = Publication.Id ").params(publishxref_id=publishxref_id,
inbredset_id=inbredset_id).all()
-
+
unique = set()
for item in results[0]:
#print("locals:", locals())
@@ -110,11 +110,11 @@ class PublishXRef(Base):
print("\n-- UDE \n")
# Can't get it into utf-8, we won't use it
continue
-
+
unique.add(token)
print("\nUnique terms are: {}\n".format(unique))
return " ".join(unique)
-
+
@staticmethod
def get_result_fields(publishxref_id, inbredset_id):
results = Session.query(
@@ -153,9 +153,9 @@ class PublishXRef(Base):
# inbredset_id=inbredset_id).all()
for result in results:
print("****", result)
-
+
assert len(set(result for result in results)) == 1, "Different results or no results"
-
+
print("results are:", results)
result = results[0]
result = row2dict(result)
@@ -170,10 +170,123 @@ class PublishXRef(Base):
result[key] = value.decode('utf-8', errors='ignore')
json_results = json.dumps(result, sort_keys=True)
- return json_results
-
+ return json_results
+
+class GenoXRef(Base):
+ __tablename__ = 'ProbeSetXRef'
+ GenoFreezeId = sa.Column(sa.Integer, primary_key=True)
+ GenoId = sa.Column(sa.Integer, primary_key=True)
+ DataId = sa.Column(sa.Integer)
+ cM = sa.Column(sa.Float)
+ Used_for_mapping = sa.Column(sa.Text)
+
+ @classmethod
+ def run(cls):
+ conn = Engine.connect()
+ counter = 0
+ for item in page_query(Session.query(cls)): #all()
+ values = {}
+ values['table_name'] = cls.__tablename__
+ values['the_key'] = json.dumps([item.GenoId, item.GenoFreezeId])
+ values['terms'] = cls.get_unique_terms(item.GenoId)
+ print("terms is:", values['terms'])
+ values['result_fields'] = cls.get_result_fields(item.GenoId, item.GenoFreezeId)
+ ins = QuickSearch.insert().values(**values)
+ conn.execute(ins)
+ counter += 1
+ print("Done:", counter)
+ @staticmethod
+ def get_unique_terms(geno_id):
+ results = Session.query(
+ "name",
+ "marker_name"
+ ).from_statement(
+ "SELECT Geno.Name as name, "
+ "Geno.Marker_Name as marker_name "
+ "FROM Geno "
+ "WHERE Geno.Id = :geno_id ").params(geno_id=geno_id).all()
+
+ unique = set()
+ for item in results[0]:
+ #print("locals:", locals())
+ if not item:
+ continue
+ for token in item.split():
+ if len(token) > 2:
+ try:
+ # This hopefully ensures that the token is utf-8
+ token = token.encode('utf-8')
+ print(" ->", token)
+ except UnicodeDecodeError:
+ print("\n-- UDE \n")
+ # Can't get it into utf-8, we won't use it
+ continue
+
+ unique.add(token)
+ print("\nUnique terms are: {}\n".format(unique))
+ return " ".join(unique)
+
+
+ @staticmethod
+ def get_result_fields(geno_id, dataset_id):
+ results = Session.query(
+ "name",
+ "species",
+ "group_name",
+ "dataset",
+ "dataset_name",
+ "symbol",
+ "description",
+ "chr", "mb",
+ "lrs",
+ "genbank_id",
+ "gene_id",
+ "chip_id",
+ "chip_name"
+ ).from_statement(
+ "SELECT Geno.Name as name, "
+ "Geno.Marker_Name as marker_name, "
+ "InbredSet.Name as group_name, "
+ "Species.Name as species, "
+ "GenoFreeze.Name as dataset, "
+ "GenoFreeze.FullName as dataset_name, "
+ "Geno.Chr as chr, "
+ "Geno.Mb as mb, "
+ "Geno.Source as source "
+ "FROM Geno, "
+ "GenoXRef, "
+ "GenoFreeze, "
+ "InbredSet, "
+ "Species "
+ "WHERE Geno.Id = :geno_id and "
+ "GenoXRef.GenoId = Geno.Id and "
+ "GenoFreeze.Id = :dataset_id and "
+ "GenoXRef.GenoFreezeId = GenoFreeze.Id and "
+ "InbredSet.Id = GenoFreeze.InbredSetId and "
+ "InbredSet.SpeciesId = Species.Id ").params(geno_id=geno_id,
+ dataset_id=dataset_id).all()
+ for result in results:
+ print(result)
+ assert len(set(result for result in results)) == 1, "Different results"
+
+ print("results are:", results)
+ result = results[0]
+ result = row2dict(result)
+ try:
+ json_results = json.dumps(result, sort_keys=True)
+ except UnicodeDecodeError:
+ print("\n\nTrying to massage unicode\n\n")
+ for key, value in result.iteritems():
+ print("\tkey is:", key)
+ print("\tvalue is:", value)
+ if isinstance(value, basestring):
+ result[key] = value.decode('utf-8', errors='ignore')
+ json_results = json.dumps(result, sort_keys=True)
+
+ return json_results
+
class ProbeSetXRef(Base):
__tablename__ = 'ProbeSetXRef'
@@ -255,7 +368,7 @@ class ProbeSetXRef(Base):
results = Session.query(
"name",
"species",
- "group",
+ "group_name",
"dataset",
"dataset_name",
"symbol",
@@ -269,7 +382,7 @@ class ProbeSetXRef(Base):
).from_statement(
"SELECT ProbeSet.Name as name, "
"Species.Name as species, "
- "InbredSet.Name as group, "
+ "InbredSet.Name as group_name, "
"ProbeSetFreeze.Name as dataset, "
"ProbeSetFreeze.FullName as dataset_name, "
"ProbeSet.Symbol as symbol, "
@@ -350,8 +463,8 @@ def page_query(q):
def main():
- PublishXRef.run()
ProbeSetXRef.run()
+ PublishXRef.run()
if __name__ == "__main__":
main() \ No newline at end of file
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index fc65eb49..1b1b56fb 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -235,48 +235,48 @@ class PhenotypeSearch(DoSearch):
return self.execute(query)
-#class QuickPhenotypeSearch(PhenotypeSearch):
-# """A search across all phenotype datasets"""
-#
-# DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch"
-#
-# base_query = """SELECT Species.Name as Species_Name,
-# PublishFreeze.FullName as Dataset_Name,
-# PublishFreeze.Name,
-# PublishXRef.Id,
-# PublishFreeze.createtime as thistable,
-# Publication.PubMed_ID as Publication_PubMed_ID,
-# Phenotype.Post_publication_description as Phenotype_Name
-# FROM Phenotype,
-# PublishFreeze,
-# Publication,
-# PublishXRef,
-# InbredSet,
-# Species """
-#
-# search_fields = ('Phenotype.Post_publication_description',
-# 'Phenotype.Pre_publication_description',
-# 'Phenotype.Pre_publication_abbreviation',
-# 'Phenotype.Post_publication_abbreviation',
-# 'Phenotype.Lab_code',
-# 'Publication.PubMed_ID',
-# 'Publication.Abstract',
-# 'Publication.Title',
-# 'Publication.Authors')
-#
-# def compile_final_query(self, where_clause = ''):
-# """Generates the final query string"""
-#
-# query = (self.base_query +
-# """WHERE %s
-# PublishXRef.PhenotypeId = Phenotype.Id and
-# PublishXRef.PublicationId = Publication.Id and
-# PublishXRef.InbredSetId = InbredSet.Id and
-# InbredSet.SpeciesId = Species.Id""" % where_clause)
-#
-# print("query is:", pf(query))
-#
-# return query
+class QuickPhenotypeSearch(PhenotypeSearch):
+ """A search across all phenotype datasets"""
+
+ DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch"
+
+ base_query = """SELECT Species.Name as Species_Name,
+ PublishFreeze.FullName as Dataset_Name,
+ PublishFreeze.Name,
+ PublishXRef.Id,
+ PublishFreeze.createtime as thistable,
+ Publication.PubMed_ID as Publication_PubMed_ID,
+ Phenotype.Post_publication_description as Phenotype_Name
+ FROM Phenotype,
+ PublishFreeze,
+ Publication,
+ PublishXRef,
+ InbredSet,
+ Species """
+
+ search_fields = ('Phenotype.Post_publication_description',
+ 'Phenotype.Pre_publication_description',
+ 'Phenotype.Pre_publication_abbreviation',
+ 'Phenotype.Post_publication_abbreviation',
+ 'Phenotype.Lab_code',
+ 'Publication.PubMed_ID',
+ 'Publication.Abstract',
+ 'Publication.Title',
+ 'Publication.Authors')
+
+ def compile_final_query(self, where_clause = ''):
+ """Generates the final query string"""
+
+ query = (self.base_query +
+ """WHERE %s
+ PublishXRef.PhenotypeId = Phenotype.Id and
+ PublishXRef.PublicationId = Publication.Id and
+ PublishXRef.InbredSetId = InbredSet.Id and
+ InbredSet.SpeciesId = Species.Id""" % where_clause)
+
+ print("query is:", pf(query))
+
+ return query
def run(self):
"""Generates and runs a search across all phenotype datasets"""
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 43c68942..499782ac 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -13,10 +13,14 @@ import time
#import pp - Note from Sam: is this used?
import math
import datetime
+import collections
from pprint import pformat as pf
+import json
+
from flask import Flask, g
+from MySQLdb import escape_string as escape
# Instead of importing HT we're going to build a class below until we can eliminate it
from htmlgen import HTMLgen2 as HT
@@ -58,19 +62,22 @@ class SearchResultPage():
# self.dataset_group_ids = map(lambda x: x[2], results)
#else:
- self.results = []
+ self.quick = False
if 'q' in kw:
- #self.quick_search = True
+ self.results = {}
+ self.quick = True
self.search_terms = kw['q']
print("self.search_terms is: ", self.search_terms)
self.quick_search()
else:
+ self.results = []
#self.quick_search = False
self.search_terms = kw['search_terms']
self.dataset = create_dataset(kw['dataset'])
self.search()
- self.gen_search_result()
+ self.gen_search_result()
+
def gen_search_result(self):
@@ -81,7 +88,7 @@ class SearchResultPage():
"""
self.trait_list = []
- species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)
+ species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)
# result_set represents the results for each search term; a search of
# "shh grin2b" would have two sets of results, one for each term
@@ -101,39 +108,71 @@ class SearchResultPage():
self.dataset.get_trait_info(self.trait_list, species)
def quick_search(self):
- self.search_terms = parser.parse(self.search_terms)
- print("After parsing:", self.search_terms)
-
- search_types = ["quick_phenotype", "quick_mrna_assay"]
-
- for search_category in search_types:
- search_ob = do_search.DoSearch.get_search(search_category)
- search_class = getattr(do_search, search_ob)
- for a_search in self.search_terms:
- search_term = a_search['search_term']
- the_search = search_class(search_term)
- self.results.extend(the_search.run())
- print("in the search results are:", self.results)
-
- #for a_search in self.search_terms:
- # search_term = a_search['search_term']
- #
- # #Do mRNA assay search
- # search_ob = do_search.DoSearch.get_search("quick_mrna_assay")
- # search_class = getattr(do_search, search_ob)
- # the_search = search_class(search_term)
- #
- # self.results.extend(the_search.run())
- # print("in the search results are:", self.results)
-
-
- #return True
-
- #search_gene
- #search_geno
- #searhch_pheno
- #search_mrn
- #searhc_publish
+ #search_terms = ""
+ #for term in self.search_terms.split():
+ # search_terms += '+{} '.format(term)
+
+ search_terms = ' '.join('+{}'.format(escape(term)) for term in self.search_terms.split())
+ print("search_terms are:", search_terms)
+
+ query = """ SELECT table_name, the_key, result_fields
+ FROM QuickSearch
+ WHERE MATCH (terms)
+ AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms)
+ dbresults = g.db.execute(query, no_parameters=True).fetchall()
+ #print("results: ", pf(results))
+
+ self.results = collections.defaultdict(list)
+
+ type_dict = {'PublishXRef': 'phenotype',
+ 'ProbesetXRef': 'mrna_assay',
+ 'GenoXRef': 'genotype'}
+
+ for dbresult in dbresults:
+ this_result = {}
+ this_result['table_name'] = dbresult.table_name
+ this_result['key'] = dbresult.the_key
+ this_result['result_fields'] = json.loads(dbresult.result_fields)
+
+ self.results[type_dict[dbresult.table_name]].append(this_result)
+
+ print("results: ", pf(self.results['phenotype']))
+
+ #def quick_search(self):
+ # self.search_terms = parser.parse(self.search_terms)
+ #
+ # search_types = ["quick_mrna_assay", "quick_phenotype"]
+ #
+ # for search_category in search_types:
+ # these_results = []
+ # search_ob = do_search.DoSearch.get_search(search_category)
+ # search_class = getattr(do_search, search_ob)
+ # for a_search in self.search_terms:
+ # search_term = a_search['search_term']
+ # the_search = search_class(search_term)
+ # these_results.extend(the_search.run())
+ # print("in the search results are:", self.results)
+ # self.results[search_category] = these_results
+ #
+ # #for a_search in self.search_terms:
+ # # search_term = a_search['search_term']
+ # #
+ # # #Do mRNA assay search
+ # # search_ob = do_search.DoSearch.get_search("quick_mrna_assay")
+ # # search_class = getattr(do_search, search_ob)
+ # # the_search = search_class(search_term)
+ # #
+ # # self.results.extend(the_search.run())
+ # # print("in the search results are:", self.results)
+ #
+ #
+ # #return True
+ #
+ # #search_gene
+ # #search_geno
+ # #searhch_pheno
+ # #search_mrn
+ # #searhc_publish
def search(self):
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index eb7ae8f8..7a504c54 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -83,7 +83,10 @@ def search_page():
#for trait in the_search.trait_list:
# print(" -", trait.description_display)
- return render_template("search_result_page.html", **the_search.__dict__)
+ if the_search.quick:
+ return render_template("quick_search.html", **the_search.__dict__)
+ else:
+ return render_template("search_result_page.html", **the_search.__dict__)
@app.route("/whats_new")