about summary refs log tree commit diff
diff options
context:
space:
mode:
authorZachary Sloan2012-10-31 18:30:39 -0500
committerZachary Sloan2012-10-31 18:30:39 -0500
commit29de88e179b735a26a2134db4d10d88905afad32 (patch)
tree28e8bd0b28e1436088d0099392c02d747bcd76ab
parent45b6e6a5eea13dbd06045e8307e371994b8c3083 (diff)
downloadgenenetwork2-29de88e179b735a26a2134db4d10d88905afad32.tar.gz
Did work with writing the search page query functions for some ProbeSet and Phenotype searches
-rw-r--r--wqflask/wqflask/do_search.py180
-rw-r--r--wqflask/wqflask/search_results.py52
2 files changed, 217 insertions, 15 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
new file mode 100644
index 00000000..cfa73334
--- /dev/null
+++ b/wqflask/wqflask/do_search.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+
+
+from __future__ import print_function, division
+
+from pprint import pformat as pf
+
+
+class DoSearch(object):
+    def __init__(self, search_term, dataset, cursor, db_conn):
+        self.search_term = search_term
+        self.dataset = dataset
+        self.db_conn = db_conn
+        self.cursor = cursor
+        
+    def execute(self, query):
+        query = self.normalize_spaces(query)
+        print("query is:", pf(query))
+        self.cursor.execute(query)
+        results = self.cursor.fetchall()
+        return results
+    
+    def escape(self, stringy):
+        """Shorter name than self.db_conn.escape_string"""
+        return self.db_conn.escape_string(str(stringy))
+    
+    def normalize_spaces(self, stringy):
+        """Strips out newlines  extra spaces and replaces them with just spaces"""
+        step_one = " ".join(stringy.split())
+        return step_one
+
+        
+        
+class ProbeSetSearch(DoSearch):
+    base_query = """SELECT ProbeSet.Name as TNAME,
+                0 as thistable,
+                ProbeSetXRef.Mean as TMEAN,
+                ProbeSetXRef.LRS as TLRS,
+                ProbeSetXRef.PVALUE as TPVALUE,
+                ProbeSet.Chr_num as TCHR_NUM,
+                ProbeSet.Mb as TMB,
+                ProbeSet.Symbol as TSYMBOL,
+                ProbeSet.name_num as TNAME_NUM
+                FROM ProbeSetXRef, ProbeSet """
+    
+    def run(self):
+        
+        query = (self.base_query +
+                """WHERE (MATCH (ProbeSet.Name,
+                    ProbeSet.description,
+                    ProbeSet.symbol,
+                    alias,
+                    GenbankId,
+                    UniGeneId,
+                    Probe_Target_Description)
+                    AGAINST ('%s' IN BOOLEAN MODE)) 
+                    and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+                    and ProbeSetXRef.ProbeSetFreezeId = %s  
+                            """ % (self.escape(self.search_term),
+                            self.escape(dataset.id)))
+        
+        return self.execute(query)
+
+
+class PhenotypeSearch(DoSearch):
+    base_query = """SELECT PublishXRef.Id,
+                PublishFreeze.createtime as thistable,
+                Publication.PubMed_ID as Publication_PubMed_ID,
+                Phenotype.Post_publication_description as Phenotype_Name
+                FROM Phenotype, PublishFreeze, Publication, PublishXRef """
+                
+    search_fields = ('Phenotype.Post_publication_description',
+                    'Phenotype.Pre_publication_description',
+                    'Phenotype.Pre_publication_abbreviation',
+                    'Phenotype.Post_publication_abbreviation',
+                    'Phenotype.Lab_code',
+                    'Publication.PubMed_ID',
+                    'Publication.Abstract',
+                    'Publication.Title',
+                    'Publication.Authors',
+                    'PublishXRef.Id')           
+                
+    def run(self):
+        #Todo: Zach will figure out exactly what both these lines mean
+        #and comment here
+        if "'" not in self.search_term:
+            search_term = "[[:<:]]" + self.search_term + "[[:>:]]"
+
+        where_clause = []
+        for field in self.search_fields:
+            where_clause.append('''%s REGEXP "%s"''' % (field, search_term))
+            
+        where_clause = "(%s)" % ' OR '.join(where_clause)
+        
+        #Get group information for dataset
+        self.dataset.get_group()
+        
+        print("before query where clause is:", where_clause)
+        
+        query = (self.base_query +
+                """WHERE %s and
+                    PublishXRef.InbredSetId = %s and
+                    PublishXRef.PhenotypeId = Phenotype.Id and
+                    PublishXRef.PublicationId = Publication.Id and
+                    PublishFreeze.Id = %s""" % (
+                        where_clause,
+                        self.escape(self.dataset.group_id),
+                        self.escape(self.dataset.id)))
+
+
+
+        return self.execute(query)
+
+       
+class GenotypeSearch(DoSearch):
+    def __init__(self):
+       pass
+    
+class GoSearch(ProbeSetSearch):
+    """searches for synapse-associated genes listed in the Gene Ontology."""
+    
+    def run(self):
+        field = 'GOterm.acc'
+        go_id = 'GO:' + ('0000000'+self.search_term)[-7:]
+        
+        statements = ("""%s.symbol=GOgene_product.symbol and
+           GOassociation.gene_product_id=GOgene_product.id and
+           GOterm.id=GOassociation.term_id""" % (
+            self.db_conn.escape_string(self.dataset.type)))
+            
+        clause_item = " %s = '%s' and %s " % (field, go_id, statements)
+        
+        gene_ontology_from_table = """ , db_GeneOntology.term as GOterm,
+            db_GeneOntology.association as GOassociation,
+            db_GeneOntology.gene_product as GOgene_product """
+        
+        gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table)
+        #gene_ontology_from_table = " ".join(gene_ontology_from_table.splitlines())
+        
+        query = (self.base_query + 
+            """%s
+                WHERE %s 
+                    and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+                    and ProbeSetXRef.ProbeSetFreezeId = %s  
+                            """ % (self.db_conn.escape_string(gene_ontology_from_table),
+                                    clause_item,
+                                    self.db_conn.escape_string(str(self.dataset.id))))
+        
+        return self.execute(query)
+        
+
+
+if __name__ == "__main__":
+    
+    import MySQLdb
+    import sys
+    sys.path.append("/home/zas1024/gene/wqflask")
+    print("Path is:", sys.path)
+    
+    
+    from base import webqtlConfig
+    from base.webqtlDataset import webqtlDataset
+    from base.templatePage import templatePage
+    from utility import webqtlUtil
+    from dbFunction import webqtlDatabaseFunction
+    
+    db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME,
+                              host=webqtlConfig.MYSQL_SERVER,
+                              user=webqtlConfig.DB_USER,
+                              passwd=webqtlConfig.DB_PASSWD)
+    cursor = db_conn.cursor()
+    
+    dataset_name = "HC_M2_0606_P"
+    dataset = webqtlDataset(dataset_name, cursor)
+    
+    #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
+    #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
+    
+    results = GoSearch("0045202", dataset, cursor, db_conn).run()
+    print("results are:", pf(results))
\ No newline at end of file
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 29a497fa..02d2bef2 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -681,15 +681,15 @@ class SearchResultPage(templatePage):
             print("item is:", pf(search_term))
             
             
-            clause_item = (
-""" MATCH (ProbeSet.Name,
-        ProbeSet.description,
-        ProbeSet.symbol,
-        alias,
-        GenbankId,
-        UniGeneId,
-        Probe_Target_Description)
-        AGAINST ('%s' IN BOOLEAN MODE) """ % self.db_conn.escape_string(search_term))
+#            clause_item = (
+#""" MATCH (ProbeSet.Name,
+#        ProbeSet.description,
+#        ProbeSet.symbol,
+#        alias,
+#        GenbankId,
+#        UniGeneId,
+#        Probe_Target_Description)
+#        AGAINST ('%s' IN BOOLEAN MODE) """ % self.db_conn.escape_string(search_term))
             if self.dataset.type == "ProbeSet":
                 
                 query = (
@@ -702,11 +702,17 @@ class SearchResultPage(templatePage):
     ProbeSet.Symbol as TSYMBOL,
     ProbeSet.name_num as TNAME_NUM
     FROM ProbeSetXRef, ProbeSet
-    WHERE %s
+    WHERE (MATCH (ProbeSet.Name,
+        ProbeSet.description,
+        ProbeSet.symbol,
+        alias,
+        GenbankId,
+        UniGeneId,
+        Probe_Target_Description)
+        AGAINST ('%s' IN BOOLEAN MODE)) 
         and ProbeSet.Id = ProbeSetXRef.ProbeSetId
         and ProbeSetXRef.ProbeSetFreezeId = %s  
                 """ % (self.db_conn.escape_string(search_term),
-                       self.db_conn.escape_string(clause_item),
                 self.db_conn.escape_string(str(self.dataset.id))))
                 
             elif self.dataset.type == "Publish":
@@ -719,13 +725,21 @@ class SearchResultPage(templatePage):
     Publication.PubMed_ID as Publication_PubMed_ID,
     Phenotype.Post_publication_description as Phenotype_Name
     FROM %s PublishFreeze, Publication, PublishXRef, Phenotype
-    WHERE PublishXRef.InbredSetId = %s and %s and
+    WHERE (MATCH (ProbeSet.Name,
+        ProbeSet.description,
+        ProbeSet.symbol,
+        alias,
+        GenbankId,
+        UniGeneId,
+        Probe_Target_Description)
+        AGAINST ('%s' IN BOOLEAN MODE)) and
+        PublishXRef.InbredSetId = %s and
         PublishXRef.PhenotypeId = Phenotype.Id and
         PublishXRef.PublicationId = Publication.Id and
         PublishFreeze.Id = %s
                 """ % (include_geno,
+                       self.db_conn.escape_string(search_term),
                        self.db_conn.escape_string(str(self.dataset.group_id)),
-                       self.db_conn.escape_string(clause_item),
                        self.db_conn.escape_string(str(self.dataset.id))))
 
             elif self.dataset.type == "Geno":
@@ -736,10 +750,18 @@ class SearchResultPage(templatePage):
     Geno.chr_num as Geno_chr_num,
     Geno.Mb as Geno_Mb
     FROM GenoXRef, GenoFreeze, Geno
-    WHERE %s and Geno.Id = GenoXRef.GenoId and
+    WHERE (MATCH (ProbeSet.Name,
+        ProbeSet.description,
+        ProbeSet.symbol,
+        alias,
+        GenbankId,
+        UniGeneId,
+        Probe_Target_Description)
+        AGAINST ('%s' IN BOOLEAN MODE)) and
+        and Geno.Id = GenoXRef.GenoId and
         GenoXRef.GenoFreezeId = GenoFreeze.Id and
         GenoFreeze.Id = %d
-                """% (self.db_conn.escape_string(clause_item),
+                """% (self.db_conn.escape_string(search_term),
                       self.db_conn.escape_string(str(self.dataset.id))))