about summary refs log tree commit diff
path: root/wqflask/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r--wqflask/wqflask/do_search.py140
1 files changed, 97 insertions, 43 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index cfa73334..19c6fa74 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -7,31 +7,35 @@ from pprint import pformat as pf
 
 
 class DoSearch(object):
+    """Parent class containing parameters/functions used for all searches"""
+    
     def __init__(self, search_term, dataset, cursor, db_conn):
         self.search_term = search_term
         self.dataset = dataset
         self.db_conn = db_conn
         self.cursor = cursor
-        
+
     def execute(self, query):
+        """Executes query and returns results"""
         query = self.normalize_spaces(query)
-        print("query is:", pf(query))
+        print("in do_search query is:", pf(query))
         self.cursor.execute(query)
         results = self.cursor.fetchall()
         return results
-    
+
     def escape(self, stringy):
         """Shorter name than self.db_conn.escape_string"""
         return self.db_conn.escape_string(str(stringy))
-    
+
     def normalize_spaces(self, stringy):
-        """Strips out newlines  extra spaces and replaces them with just spaces"""
+        """Strips out newlines extra spaces and replaces them with just spaces"""
         step_one = " ".join(stringy.split())
         return step_one
 
-        
-        
+
 class ProbeSetSearch(DoSearch):
+    """A search within an mRNA expression dataset"""
+    
     base_query = """SELECT ProbeSet.Name as TNAME,
                 0 as thistable,
                 ProbeSetXRef.Mean as TMEAN,
@@ -42,9 +46,11 @@ class ProbeSetSearch(DoSearch):
                 ProbeSet.Symbol as TSYMBOL,
                 ProbeSet.name_num as TNAME_NUM
                 FROM ProbeSetXRef, ProbeSet """
-    
+
     def run(self):
+        """Generates and runs a simple search of an mRNA expression dataset"""
         
+        print("Running ProbeSetSearch")
         query = (self.base_query +
                 """WHERE (MATCH (ProbeSet.Name,
                     ProbeSet.description,
@@ -57,18 +63,22 @@ class ProbeSetSearch(DoSearch):
                     and ProbeSet.Id = ProbeSetXRef.ProbeSetId
                     and ProbeSetXRef.ProbeSetFreezeId = %s  
                             """ % (self.escape(self.search_term),
-                            self.escape(dataset.id)))
-        
+                            self.escape(self.dataset.id)))
+
+        print("final query is:", pf(query))
+
         return self.execute(query)
 
 
 class PhenotypeSearch(DoSearch):
+    """A search within a phenotype dataset"""
+    
     base_query = """SELECT PublishXRef.Id,
                 PublishFreeze.createtime as thistable,
                 Publication.PubMed_ID as Publication_PubMed_ID,
                 Phenotype.Post_publication_description as Phenotype_Name
                 FROM Phenotype, PublishFreeze, Publication, PublishXRef """
-                
+
     search_fields = ('Phenotype.Post_publication_description',
                     'Phenotype.Pre_publication_description',
                     'Phenotype.Pre_publication_abbreviation',
@@ -78,65 +88,108 @@ class PhenotypeSearch(DoSearch):
                     'Publication.Abstract',
                     'Publication.Title',
                     'Publication.Authors',
-                    'PublishXRef.Id')           
-                
-    def run(self):
+                    'PublishXRef.Id')
+    
+    def get_where_clause(self):
+        """Generate clause for WHERE portion of query"""
+
         #Todo: Zach will figure out exactly what both these lines mean
         #and comment here
         if "'" not in self.search_term:
             search_term = "[[:<:]]" + self.search_term + "[[:>:]]"
 
+        # This adds a clause to the query that matches the search term
+        # against each field in the search_fields tuple
         where_clause = []
         for field in self.search_fields:
             where_clause.append('''%s REGEXP "%s"''' % (field, search_term))
-            
         where_clause = "(%s)" % ' OR '.join(where_clause)
         
+        return where_clause
+
+    def run(self):
+        """Generates and runs a simple search of a phenotype dataset"""
+
         #Get group information for dataset
         self.dataset.get_group()
-        
-        print("before query where clause is:", where_clause)
-        
+
         query = (self.base_query +
                 """WHERE %s and
                     PublishXRef.InbredSetId = %s and
                     PublishXRef.PhenotypeId = Phenotype.Id and
                     PublishXRef.PublicationId = Publication.Id and
                     PublishFreeze.Id = %s""" % (
-                        where_clause,
+                        self.get_where_clause(),
                         self.escape(self.dataset.group_id),
                         self.escape(self.dataset.id)))
 
+        return self.execute(query)
 
 
+class GenotypeSearch(DoSearch):
+    """A search within a genotype dataset"""
+
+    base_query = """SELECT Geno.Name,
+                GenoFreeze.createtime as thistable,
+                Geno.Name as Geno_Name,
+                Geno.Source2 as Geno_Source2,
+                Geno.chr_num as Geno_chr_num,
+                Geno.Mb as Geno_Mb
+                FROM GenoXRef, GenoFreeze, Geno """
+
+    search_fields = ('Name', 'Chr')
+
+    def get_where_clause(self):
+        """Generate clause for WHERE portion of query"""
+
+        # This adds a clause to the query that matches the search term
+        # against each field in search_fields (above)
+        where_clause = []
+        for field in self.search_fields:
+            where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % (self.dataset.type, field),
+                                                                self.search_term))
+        where_clause = "(%s)" % ' OR '.join(where_clause)
+
+        return where_clause
+
+    def run(self):
+        """Generates and runs a simple search of a genotype dataset"""
+        #Todo: Zach will figure out exactly what both these lines mean
+        #and comment here
+        if "'" not in self.search_term:
+            search_term = "[[:<:]]" + self.search_term + "[[:>:]]"
+
+        query = (self.base_query +
+                """WHERE %s and
+                    Geno.Id = GenoXRef.GenoId and
+                    GenoXRef.GenoFreezeId = GenoFreeze.Id and
+                    GenoFreeze.Id = %s"""% (
+                        self.get_where_clause(),
+                        self.escape(self.dataset.id)))
+
         return self.execute(query)
 
-       
-class GenotypeSearch(DoSearch):
-    def __init__(self):
-       pass
-    
 class GoSearch(ProbeSetSearch):
-    """searches for synapse-associated genes listed in the Gene Ontology."""
-    
+    """Searches for synapse-associated genes listed in the Gene Ontology."""
+
     def run(self):
         field = 'GOterm.acc'
         go_id = 'GO:' + ('0000000'+self.search_term)[-7:]
-        
+
         statements = ("""%s.symbol=GOgene_product.symbol and
            GOassociation.gene_product_id=GOgene_product.id and
            GOterm.id=GOassociation.term_id""" % (
             self.db_conn.escape_string(self.dataset.type)))
-            
+
         clause_item = " %s = '%s' and %s " % (field, go_id, statements)
-        
+
+        # 
         gene_ontology_from_table = """ , db_GeneOntology.term as GOterm,
             db_GeneOntology.association as GOassociation,
             db_GeneOntology.gene_product as GOgene_product """
-        
+
         gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table)
-        #gene_ontology_from_table = " ".join(gene_ontology_from_table.splitlines())
-        
+
         query = (self.base_query + 
             """%s
                 WHERE %s 
@@ -145,36 +198,37 @@ class GoSearch(ProbeSetSearch):
                             """ % (self.db_conn.escape_string(gene_ontology_from_table),
                                     clause_item,
                                     self.db_conn.escape_string(str(self.dataset.id))))
-        
+
         return self.execute(query)
-        
 
 
 if __name__ == "__main__":
-    
+    ### Usually this will be used as a library, but call it from the command line for testing
+    ### And it runs the code below
+
     import MySQLdb
     import sys
     sys.path.append("/home/zas1024/gene/wqflask")
     print("Path is:", sys.path)
-    
-    
+
+
     from base import webqtlConfig
     from base.webqtlDataset import webqtlDataset
     from base.templatePage import templatePage
     from utility import webqtlUtil
     from dbFunction import webqtlDatabaseFunction
-    
+
     db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME,
                               host=webqtlConfig.MYSQL_SERVER,
                               user=webqtlConfig.DB_USER,
                               passwd=webqtlConfig.DB_PASSWD)
     cursor = db_conn.cursor()
-    
+
     dataset_name = "HC_M2_0606_P"
     dataset = webqtlDataset(dataset_name, cursor)
-    
-    #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
+
+    results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
     #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
-    
-    results = GoSearch("0045202", dataset, cursor, db_conn).run()
+    #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run()
+    #results = GoSearch("0045202", dataset, cursor, db_conn).run()
     print("results are:", pf(results))
\ No newline at end of file