Have quick search working for phenotypes

author: Zachary Sloan 2013-04-09 23:18:05 +0000
committer: Zachary Sloan 2013-04-09 23:18:05 +0000
commit: d2e64633a40ecab55c9cedd46b0a8f9e93761aad (patch)
tree: 78c4a182bbede6d431ec9fa11c4c682e7c5508e6
parent: c7c306c69254ca49ddeccc495a8a096fcf03974d (diff)
download: genenetwork2-d2e64633a40ecab55c9cedd46b0a8f9e93761aad.tar.gz
4 files changed, 245 insertions, 90 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 48697e58..a6ca6265 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -88,7 +88,7 @@ class PublishXRef(Base):
                 "PublishXRef.PhenotypeId = Phenotype.Id and "
                 "PublishXRef.PublicationId = Publication.Id ").params(publishxref_id=publishxref_id,
                                                             inbredset_id=inbredset_id).all()
-        
+
         unique = set()
         for item in results[0]:
             #print("locals:", locals())
@@ -110,11 +110,11 @@ class PublishXRef(Base):
                         print("\n-- UDE \n")
                         # Can't get it into utf-8, we won't use it
                         continue 
-                    
+
                     unique.add(token)
         print("\nUnique terms are: {}\n".format(unique))
         return " ".join(unique)            
-    
+
     @staticmethod
     def get_result_fields(publishxref_id, inbredset_id):
         results = Session.query(
@@ -153,9 +153,9 @@ class PublishXRef(Base):
                 #                                            inbredset_id=inbredset_id).all()
         for result in results:
             print("****", result)
-        
+
         assert len(set(result for result in results)) == 1, "Different results or no results"
-        
+
         print("results are:", results)
         result = results[0]
         result = row2dict(result)
@@ -170,10 +170,123 @@ class PublishXRef(Base):
                     result[key] = value.decode('utf-8', errors='ignore')
             json_results = json.dumps(result, sort_keys=True)
 
-        return json_results    
-    
+        return json_results
+
+class GenoXRef(Base):
+    __tablename__ = 'ProbeSetXRef'
     
+    GenoFreezeId = sa.Column(sa.Integer, primary_key=True)
+    GenoId = sa.Column(sa.Integer, primary_key=True)
+    DataId = sa.Column(sa.Integer)
+    cM = sa.Column(sa.Float)
+    Used_for_mapping = sa.Column(sa.Text)
+
+    @classmethod
+    def run(cls):
+        conn = Engine.connect()
+        counter = 0
+        for item in page_query(Session.query(cls)):   #all()
+            values = {}
+            values['table_name'] = cls.__tablename__
+            values['the_key'] = json.dumps([item.GenoId, item.GenoFreezeId])
+            values['terms'] = cls.get_unique_terms(item.GenoId)
+            print("terms is:", values['terms'])
+            values['result_fields'] = cls.get_result_fields(item.GenoId, item.GenoFreezeId)
+            ins = QuickSearch.insert().values(**values)
+            conn.execute(ins)
+            counter += 1
+            print("Done:", counter)
     
+    @staticmethod
+    def get_unique_terms(geno_id):
+        results = Session.query(
+                "name",
+                "marker_name"
+            ).from_statement(
+                "SELECT Geno.Name as name, "
+                "Geno.Marker_Name as marker_name "
+                "FROM Geno "
+                "WHERE Geno.Id = :geno_id ").params(geno_id=geno_id).all()
+        
+        unique = set()
+        for item in results[0]:
+            #print("locals:", locals())
+            if not item:
+                continue
+            for token in item.split():
+                if len(token) > 2:
+                    try:
+                        # This hopefully ensures that the token is utf-8
+                        token = token.encode('utf-8')
+                        print(" ->", token)
+                    except UnicodeDecodeError:
+                        print("\n-- UDE \n")
+                        # Can't get it into utf-8, we won't use it
+                        continue 
+                    
+                    unique.add(token)
+        print("\nUnique terms are: {}\n".format(unique))
+        return " ".join(unique)
+
+
+    @staticmethod
+    def get_result_fields(geno_id, dataset_id):
+        results = Session.query(
+                "name",
+                "species",
+                "group_name",
+                "dataset",
+                "dataset_name",
+                "symbol",
+                "description",
+                "chr", "mb",
+                "lrs",
+                "genbank_id",
+                "gene_id",
+                "chip_id",
+                "chip_name"
+            ).from_statement(
+                "SELECT Geno.Name as name, "
+                "Geno.Marker_Name as marker_name, "
+                "InbredSet.Name as group_name, "
+                "Species.Name as species, "
+                "GenoFreeze.Name as dataset, "
+                "GenoFreeze.FullName as dataset_name, "
+                "Geno.Chr as chr, "
+                "Geno.Mb as mb, "
+                "Geno.Source as source "
+                "FROM Geno, "
+                "GenoXRef, "
+                "GenoFreeze, "
+                "InbredSet, "
+                "Species "
+                "WHERE Geno.Id = :geno_id and "
+                "GenoXRef.GenoId = Geno.Id and "
+                "GenoFreeze.Id = :dataset_id and "
+                "GenoXRef.GenoFreezeId = GenoFreeze.Id and "
+                "InbredSet.Id = GenoFreeze.InbredSetId and "
+                "InbredSet.SpeciesId = Species.Id ").params(geno_id=geno_id,
+                                                                    dataset_id=dataset_id).all()
+        for result in results:
+            print(result)
+        assert len(set(result for result in results)) == 1, "Different results"
+        
+        print("results are:", results)
+        result = results[0]
+        result = row2dict(result)
+        try:
+            json_results = json.dumps(result, sort_keys=True)
+        except UnicodeDecodeError:
+            print("\n\nTrying to massage unicode\n\n")
+            for key, value in result.iteritems():
+                print("\tkey is:", key)
+                print("\tvalue is:", value)
+                if isinstance(value, basestring):
+                    result[key] = value.decode('utf-8', errors='ignore')
+            json_results = json.dumps(result, sort_keys=True)
+
+        return json_results    
+
 class ProbeSetXRef(Base):
     __tablename__ = 'ProbeSetXRef'
     
@@ -255,7 +368,7 @@ class ProbeSetXRef(Base):
         results = Session.query(
                 "name",
                 "species",
-                "group",
+                "group_name",
                 "dataset",
                 "dataset_name",
                 "symbol",
@@ -269,7 +382,7 @@ class ProbeSetXRef(Base):
             ).from_statement(
                 "SELECT ProbeSet.Name as name, "
                 "Species.Name as species, "
-                "InbredSet.Name as group, "
+                "InbredSet.Name as group_name, "
                 "ProbeSetFreeze.Name as dataset, "
                 "ProbeSetFreeze.FullName as dataset_name, "
                 "ProbeSet.Symbol as symbol, "
@@ -350,8 +463,8 @@ def page_query(q):
 
 
 def main():
-    PublishXRef.run()
     ProbeSetXRef.run()
+    PublishXRef.run()
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index fc65eb49..1b1b56fb 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -235,48 +235,48 @@ class PhenotypeSearch(DoSearch):
 
         return self.execute(query)
 
-#class QuickPhenotypeSearch(PhenotypeSearch):
-#    """A search across all phenotype datasets"""
-#    
-#    DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch"
-#    
-#    base_query = """SELECT Species.Name as Species_Name,
-#                PublishFreeze.FullName as Dataset_Name,
-#                PublishFreeze.Name,
-#                PublishXRef.Id,
-#                PublishFreeze.createtime as thistable,
-#                Publication.PubMed_ID as Publication_PubMed_ID,
-#                Phenotype.Post_publication_description as Phenotype_Name
-#                FROM Phenotype,
-#                    PublishFreeze,
-#                    Publication,
-#                    PublishXRef,
-#                    InbredSet,
-#                    Species """
-#
-#    search_fields = ('Phenotype.Post_publication_description',
-#                    'Phenotype.Pre_publication_description',
-#                    'Phenotype.Pre_publication_abbreviation',
-#                    'Phenotype.Post_publication_abbreviation',
-#                    'Phenotype.Lab_code',
-#                    'Publication.PubMed_ID',
-#                    'Publication.Abstract',
-#                    'Publication.Title',
-#                    'Publication.Authors')    
-#    
-#    def compile_final_query(self, where_clause = ''):
-#        """Generates the final query string"""
-#
-#        query = (self.base_query +
-#                 """WHERE %s
-#                    PublishXRef.PhenotypeId = Phenotype.Id and
-#                    PublishXRef.PublicationId = Publication.Id and
-#                    PublishXRef.InbredSetId = InbredSet.Id and
-#                    InbredSet.SpeciesId = Species.Id""" % where_clause)
-#
-#        print("query is:", pf(query))
-#
-#        return query
+class QuickPhenotypeSearch(PhenotypeSearch):
+    """A search across all phenotype datasets"""
+    
+    DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch"
+    
+    base_query = """SELECT Species.Name as Species_Name,
+                PublishFreeze.FullName as Dataset_Name,
+                PublishFreeze.Name,
+                PublishXRef.Id,
+                PublishFreeze.createtime as thistable,
+                Publication.PubMed_ID as Publication_PubMed_ID,
+                Phenotype.Post_publication_description as Phenotype_Name
+                FROM Phenotype,
+                    PublishFreeze,
+                    Publication,
+                    PublishXRef,
+                    InbredSet,
+                    Species """
+
+    search_fields = ('Phenotype.Post_publication_description',
+                    'Phenotype.Pre_publication_description',
+                    'Phenotype.Pre_publication_abbreviation',
+                    'Phenotype.Post_publication_abbreviation',
+                    'Phenotype.Lab_code',
+                    'Publication.PubMed_ID',
+                    'Publication.Abstract',
+                    'Publication.Title',
+                    'Publication.Authors')    
+    
+    def compile_final_query(self, where_clause = ''):
+        """Generates the final query string"""
+
+        query = (self.base_query +
+                 """WHERE %s
+                    PublishXRef.PhenotypeId = Phenotype.Id and
+                    PublishXRef.PublicationId = Publication.Id and
+                    PublishXRef.InbredSetId = InbredSet.Id and
+                    InbredSet.SpeciesId = Species.Id""" % where_clause)
+
+        print("query is:", pf(query))
+
+        return query
     
     def run(self):
         """Generates and runs a search across all phenotype datasets"""
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 43c68942..499782ac 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -13,10 +13,14 @@ import time
 #import pp - Note from Sam: is this used?
 import math
 import datetime
+import collections
 
 from pprint import pformat as pf
 
+import json
+
 from flask import Flask, g
+from MySQLdb import escape_string as escape
 
 # Instead of importing HT we're going to build a class below until we can eliminate it
 from htmlgen import HTMLgen2 as HT
@@ -58,19 +62,22 @@ class SearchResultPage():
         #    self.dataset_group_ids = map(lambda x: x[2], results)
         #else:
 
-        self.results = []
+        self.quick = False
 
         if 'q' in kw:
-            #self.quick_search = True
+            self.results = {}
+            self.quick = True
             self.search_terms = kw['q']
             print("self.search_terms is: ", self.search_terms)
             self.quick_search()
         else:
+            self.results = []
             #self.quick_search = False
             self.search_terms = kw['search_terms']
             self.dataset = create_dataset(kw['dataset'])
             self.search()
-        self.gen_search_result()
+            self.gen_search_result()
+
 
 
     def gen_search_result(self):
@@ -81,7 +88,7 @@ class SearchResultPage():
         """
         self.trait_list = []
         
-        species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)        
+        species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name)
         
         # result_set represents the results for each search term; a search of 
         # "shh grin2b" would have two sets of results, one for each term
@@ -101,39 +108,71 @@ class SearchResultPage():
         self.dataset.get_trait_info(self.trait_list, species)
 
     def quick_search(self):
-        self.search_terms = parser.parse(self.search_terms)
-        print("After parsing:", self.search_terms)
-
-        search_types = ["quick_phenotype", "quick_mrna_assay"]
-
-        for search_category in search_types:
-            search_ob = do_search.DoSearch.get_search(search_category)
-            search_class = getattr(do_search, search_ob)
-            for a_search in self.search_terms:
-                search_term = a_search['search_term']
-                the_search = search_class(search_term)
-                self.results.extend(the_search.run())
-                print("in the search results are:", self.results)
-
-        #for a_search in self.search_terms:
-        #    search_term = a_search['search_term']
-        #
-        #    #Do mRNA assay search
-        #    search_ob = do_search.DoSearch.get_search("quick_mrna_assay")
-        #    search_class = getattr(do_search, search_ob)
-        #    the_search = search_class(search_term)
-        #    
-        #    self.results.extend(the_search.run())
-        #    print("in the search results are:", self.results)
-
-
-        #return True
-
-        #search_gene
-        #search_geno
-        #searhch_pheno
-        #search_mrn
-        #searhc_publish
+        #search_terms = ""
+        #for term in self.search_terms.split():
+        #    search_terms += '+{} '.format(term)
+            
+        search_terms = ' '.join('+{}'.format(escape(term)) for term in self.search_terms.split())
+        print("search_terms are:", search_terms)
+        
+        query = """ SELECT table_name, the_key, result_fields
+                    FROM QuickSearch
+                    WHERE MATCH (terms)
+                          AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms)
+        dbresults = g.db.execute(query, no_parameters=True).fetchall()
+        #print("results: ", pf(results))
+        
+        self.results = collections.defaultdict(list)
+        
+        type_dict = {'PublishXRef': 'phenotype',
+                   'ProbesetXRef': 'mrna_assay',
+                   'GenoXRef': 'genotype'}
+
+        for dbresult in dbresults:
+            this_result = {}
+            this_result['table_name'] = dbresult.table_name
+            this_result['key'] = dbresult.the_key
+            this_result['result_fields'] = json.loads(dbresult.result_fields)
+            
+            self.results[type_dict[dbresult.table_name]].append(this_result)
+            
+        print("results: ", pf(self.results['phenotype']))
+
+    #def quick_search(self):
+    #    self.search_terms = parser.parse(self.search_terms)
+    #
+    #    search_types = ["quick_mrna_assay", "quick_phenotype"]
+    #
+    #    for search_category in search_types:
+    #        these_results = []
+    #        search_ob = do_search.DoSearch.get_search(search_category)
+    #        search_class = getattr(do_search, search_ob)
+    #        for a_search in self.search_terms:
+    #            search_term = a_search['search_term']
+    #            the_search = search_class(search_term)
+    #            these_results.extend(the_search.run())
+    #            print("in the search results are:", self.results)
+    #        self.results[search_category] = these_results
+    #
+    #    #for a_search in self.search_terms:
+    #    #    search_term = a_search['search_term']
+    #    #
+    #    #    #Do mRNA assay search
+    #    #    search_ob = do_search.DoSearch.get_search("quick_mrna_assay")
+    #    #    search_class = getattr(do_search, search_ob)
+    #    #    the_search = search_class(search_term)
+    #    #    
+    #    #    self.results.extend(the_search.run())
+    #    #    print("in the search results are:", self.results)
+    #
+    #
+    #    #return True
+    #
+    #    #search_gene
+    #    #search_geno
+    #    #searhch_pheno
+    #    #search_mrn
+    #    #searhc_publish
 
 
     def search(self):
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index eb7ae8f8..7a504c54 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -83,7 +83,10 @@ def search_page():
         #for trait in the_search.trait_list:
         #    print(" -", trait.description_display)
 
-        return render_template("search_result_page.html", **the_search.__dict__)
+        if the_search.quick:
+            return render_template("quick_search.html", **the_search.__dict__)
+        else:
+            return render_template("search_result_page.html", **the_search.__dict__)
 
 
 @app.route("/whats_new")
author	Zachary Sloan	2013-04-09 23:18:05 +0000
committer	Zachary Sloan	2013-04-09 23:18:05 +0000
commit	d2e64633a40ecab55c9cedd46b0a8f9e93761aad (patch)
tree	78c4a182bbede6d431ec9fa11c4c682e7c5508e6
parent	c7c306c69254ca49ddeccc495a8a096fcf03974d (diff)
download	genenetwork2-d2e64633a40ecab55c9cedd46b0a8f9e93761aad.tar.gz