about summary refs log tree commit diff
path: root/wqflask/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r--wqflask/wqflask/do_search.py89
1 files changed, 73 insertions, 16 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index 19c6fa74..2641431c 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -9,6 +9,9 @@ from pprint import pformat as pf
 class DoSearch(object):
     """Parent class containing parameters/functions used for all searches"""
     
+    # Used to translate search phrases into classes
+    search_types = dict()
+    
     def __init__(self, search_term, dataset, cursor, db_conn):
         self.search_term = search_term
         self.dataset = dataset
@@ -28,14 +31,20 @@ class DoSearch(object):
         return self.db_conn.escape_string(str(stringy))
 
     def normalize_spaces(self, stringy):
-        """Strips out newlines extra spaces and replaces them with just spaces"""
+        """Strips out newlines/extra spaces and replaces them with just spaces"""
         step_one = " ".join(stringy.split())
         return step_one
+        
+    @classmethod
+    def get_search(cls, search_type):
+        return cls.search_types[search_type]
 
 
 class ProbeSetSearch(DoSearch):
     """A search within an mRNA expression dataset"""
     
+    DoSearch.search_types['ProbeSet'] = "ProbeSetSearch"
+    
     base_query = """SELECT ProbeSet.Name as TNAME,
                 0 as thistable,
                 ProbeSetXRef.Mean as TMEAN,
@@ -47,6 +56,24 @@ class ProbeSetSearch(DoSearch):
                 ProbeSet.name_num as TNAME_NUM
                 FROM ProbeSetXRef, ProbeSet """
 
+    def compile_final_query(self, from_clause, where_clause):
+        """Generates the final query string"""
+        
+        from_clause = self.normalize_spaces(from_clause)
+
+        query = (self.base_query +
+            """%s
+                WHERE %s
+                    and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+                    and ProbeSetXRef.ProbeSetFreezeId = %s
+                            """ % (self.escape(from_clause),
+                                    where_clause,
+                                    self.escape(self.dataset.id)))        
+
+        print("query is:", pf(query))
+        
+        return query
+
     def run(self):
         """Generates and runs a simple search of an mRNA expression dataset"""
         
@@ -73,6 +100,8 @@ class ProbeSetSearch(DoSearch):
 class PhenotypeSearch(DoSearch):
     """A search within a phenotype dataset"""
     
+    DoSearch.search_types['Publish'] = "PhenotypeSearch"
+    
     base_query = """SELECT PublishXRef.Id,
                 PublishFreeze.createtime as thistable,
                 Publication.PubMed_ID as Publication_PubMed_ID,
@@ -128,6 +157,8 @@ class PhenotypeSearch(DoSearch):
 
 class GenotypeSearch(DoSearch):
     """A search within a genotype dataset"""
+    
+    DoSearch.search_types['Geno'] = "GenotypeSearch"
 
     base_query = """SELECT Geno.Name,
                 GenoFreeze.createtime as thistable,
@@ -169,9 +200,42 @@ class GenotypeSearch(DoSearch):
 
         return self.execute(query)
 
+class RifSearch(ProbeSetSearch):
+    """Searches for traits with a Gene RIF entry including the search term."""
+    
+    DoSearch.search_types['RIF'] = "RifSearch"
+
+    def run(self):
+        where_clause = """( %s.symbol = GeneRIF_BASIC.symbol and
+            MATCH (GeneRIF_BASIC.comment)
+            AGAINST ('+%s' IN BOOLEAN MODE)) """ % (self.dataset.type, self.search_term)
+
+        from_clause = ", GeneRIF_BASIC "
+        query = self.compile_final_query(from_clause, where_clause)
+
+        return self.execute(query)
+
+class WikiSearch(ProbeSetSearch):
+    """Searches GeneWiki for traits other people have annotated"""
+    
+    DoSearch.search_types['WIKI'] =  "WikiSearch"
+    
+    def run(self):
+        where_clause = """%s.symbol = GeneRIF.symbol
+            and GeneRIF.versionId=0 and GeneRIF.display>0
+            and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s')
+                """ % (self.dataset.type, "[[:<:]]"+self.search_term+"[[:>:]]", self.search_term)
+
+        from_clause = ", GeneRIF "
+        query = self.compile_final_query(from_clause, where_clause)
+
+        return self.execute(query)
+
 class GoSearch(ProbeSetSearch):
     """Searches for synapse-associated genes listed in the Gene Ontology."""
 
+    DoSearch.search_types['GO'] =  "GoSearch"
+
     def run(self):
         field = 'GOterm.acc'
         go_id = 'GO:' + ('0000000'+self.search_term)[-7:]
@@ -181,23 +245,13 @@ class GoSearch(ProbeSetSearch):
            GOterm.id=GOassociation.term_id""" % (
             self.db_conn.escape_string(self.dataset.type)))
 
-        clause_item = " %s = '%s' and %s " % (field, go_id, statements)
+        where_clause = " %s = '%s' and %s " % (field, go_id, statements)
 
-        # 
-        gene_ontology_from_table = """ , db_GeneOntology.term as GOterm,
+        from_clause = """ , db_GeneOntology.term as GOterm,
             db_GeneOntology.association as GOassociation,
             db_GeneOntology.gene_product as GOgene_product """
-
-        gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table)
-
-        query = (self.base_query + 
-            """%s
-                WHERE %s 
-                    and ProbeSet.Id = ProbeSetXRef.ProbeSetId
-                    and ProbeSetXRef.ProbeSetFreezeId = %s  
-                            """ % (self.db_conn.escape_string(gene_ontology_from_table),
-                                    clause_item,
-                                    self.db_conn.escape_string(str(self.dataset.id))))
+            
+        query = self.compile_final_query(from_clause, where_clause)
 
         return self.execute(query)
 
@@ -227,8 +281,11 @@ if __name__ == "__main__":
     dataset_name = "HC_M2_0606_P"
     dataset = webqtlDataset(dataset_name, cursor)
 
-    results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
+    #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
+    #results = RifSearch("diabetes", dataset, cursor, db_conn).run()
+    results = WikiSearch("nicotine", dataset, cursor, db_conn).run()
     #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
     #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run()
     #results = GoSearch("0045202", dataset, cursor, db_conn).run()
+    
     print("results are:", pf(results))
\ No newline at end of file