about summary refs log tree commit diff
path: root/wqflask/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r--wqflask/wqflask/do_search.py237
1 files changed, 91 insertions, 146 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index 2b8efd68..92a754e3 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -20,7 +20,7 @@ class DoSearch(object):
     def __init__(self, search_term, search_operator, dataset, cursor, db_conn):
         self.search_term = search_term
         # Make sure search_operator is something we expect
-        assert search_operator in ("=", "<", ">", "<=", ">="), "Bad search operator"
+        assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator"
         self.search_operator = search_operator
         self.dataset = dataset
         self.db_conn = db_conn
@@ -41,6 +41,12 @@ class DoSearch(object):
     def escape(self, stringy):
         """Shorter name than self.db_conn.escape_string"""
         return self.db_conn.escape_string(str(stringy))
+    
+    def mescape(self, *items):
+        """Multiple escape"""
+        escaped = [self.escape(item) for item in items]
+        print("escaped is:", escaped)
+        return tuple(escaped)
 
     def normalize_spaces(self, stringy):
         """Strips out newlines/extra spaces and replaces them with just spaces"""
@@ -91,8 +97,7 @@ class ProbeSetSearch(DoSearch):
         """Generates and runs a simple search of an mRNA expression dataset"""
 
         print("Running ProbeSetSearch")
-        query = (self.base_query +
-                """WHERE (MATCH (ProbeSet.Name,
+        query = self.base_query + """WHERE (MATCH (ProbeSet.Name,
                     ProbeSet.description,
                     ProbeSet.symbol,
                     alias,
@@ -102,8 +107,8 @@ class ProbeSetSearch(DoSearch):
                     AGAINST ('%s' IN BOOLEAN MODE)) 
                     and ProbeSet.Id = ProbeSetXRef.ProbeSetId
                     and ProbeSetXRef.ProbeSetFreezeId = %s  
-                            """ % (self.escape(self.search_term),
-                            self.escape(self.dataset.id)))
+                            """ % (self.escape(self.search_term[0]),
+                            self.escape(self.dataset.id))
 
         print("final query is:", pf(query))
 
@@ -275,7 +280,8 @@ class GoSearch(ProbeSetSearch):
 class LrsSearch(ProbeSetSearch):
     """Searches for genes with a QTL within the given LRS values
 
-    LRS searches can take 2 different forms:
+    LRS searches can take 3 different forms:
+    - LRS > (or <) min/max_LRS
     - LRS=(min_LRS max_LRS)
     - LRS=(min_LRS max_LRS chromosome start_Mb end_Mb)
     where min/max_LRS represent the range of LRS scores and start/end_Mb represent
@@ -289,129 +295,128 @@ class LrsSearch(ProbeSetSearch):
         
         self.search_term = [float(value) for value in self.search_term]
         
-        from_clause = ", Geno"
+        self.from_clause = ", Geno"
         
         if self.search_operator == "=":
-            if len(self.search_term) >= 2:
-                if len(self.search_term) == 2:
-                    lrs_min, lrs_max = self.search_term
-                elif len(self.search_term) == 5:
-                    lrs_min, lrs_max, chr_num, mb_low, mb_high = self.search_term
-                else:
-                    SomeError
-                    
-                sub_clause = """ %sXRef.LRS > %s and
-                                 %sXRef.LRS < %s and """ % (self.escape(self.dataset.type),
-                                                        self.escape(min(lrs_min, lrs_max)),
-                                                        self.escape(self.dataset.type),
-                                                        self.escape(max(lrs_min, lrs_max)))
-                
+            assert isinstance(self.search_term, (list, tuple))
+            self.lrs_min, self.lrs_max = self.search_term[:2]
+
+            self.sub_clause = """ %sXRef.LRS > %s and
+                             %sXRef.LRS < %s and """ % self.mescape(self.dataset.type,
+                                                                min(self.lrs_min, self.lrs_max),
+                                                                self.dataset.type,
+                                                                max(self.lrs_min, self.lrs_max))
+
+            if len(self.search_term) > 2:
+                self.chr_num = self.search_term[2]
+                self.sub_clause += """ Geno.Chr = %s and """ % (self.escape(self.chr_num))
                 if len(self.search_term) == 5:
-                    sub_clause = sub_clause + """ Geno.Mb > %s and
+                    self.mb_low, self.mb_high = self.search_term[3:]
+                    self.sub_clause += """ Geno.Mb > %s and
                                                   Geno.Mb < %s and
-                                                  Geno.Chr = %s and
-                                                  """ % (self.escape(min(mb_low, mb_high)),
-                                                         self.escape(max(mb_low, mb_high)),
-                                                         self.escape(chr_num))
+                                            """ % self.mescape(min(self.mb_low, self.mb_high),
+                                                               max(self.mb_low, self.mb_high))                    
+            print("self.sub_clause is:", pf(self.sub_clause))                                  
         else:
             # Deal with >, <, >=, and <=
-            sub_clause = """ %sXRef.LRS %s %s and """ % (self.escape(self.dataset.type),
-                                                     self.escape(self.search_operator),
-                                                     self.escape(self.search_term[0]))
+            self.sub_clause = """ %sXRef.LRS %s %s and """ % self.mescape(self.dataset.type,
+                                                                        self.search_operator,
+                                                                        self.search_term[0])
 
-        where_clause = sub_clause + """ %sXRef.Locus = Geno.name and
+        self.where_clause = self.sub_clause + """ %sXRef.Locus = Geno.name and
                                         Geno.SpeciesId = %s  and
                                         %s.Chr = Geno.Chr
-                                        """ % (self.escape(self.dataset.type),
-                                               self.escape(self.species_id),
-                                               self.escape(self.dataset.type))
+                                        """ % self.mescape(self.dataset.type,
+                                               self.species_id,
+                                               self.dataset.type)
 
-        print("where_clause is:", pf(where_clause))
+        print("where_clause is:", pf(self.where_clause))
 
-        query = self.compile_final_query(from_clause, where_clause)
+        self.query = self.compile_final_query(self.from_clause, self.where_clause)
 
-        return self.execute(query)
-
-class CisLrsSearch(LrsSearch):
-    """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
+        return self.execute(self.query)
 
-    A cisLRS search can take 3 forms:
-    - cisLRS=(min_LRS max_LRS)
-    - cisLRS=(min_LRS max_LRS mb_buffer)
-    - cisLRS>min_LRS
-    where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around
-    a particular QTL where its eQTL would be considered "cis". If there is no third parameter,
-    mb_buffer will default to 5 megabases.
 
-    A QTL is a cis-eQTL if a gene's expression is regulated by a QTL in roughly the same area
-    (where the area is determined by the mb_buffer that the user can choose).
+class CisTransLrsSearch(LrsSearch):
 
-    """
-
-    # This is tentatively a child of LrsSearch; I'll need to check what code, if any, overlaps
-    # between this and the LrsSearch code. In the original code, commands are divided by
-    # the number of inputs they take, so these commands are completely separate
-
-    DoSearch.search_types['CISLRS'] = "CisLrsSearch"
-
-    def run(self):
+    def real_run(self, the_operator):
         #if isinstance(self.search_term, basestring):
         #    self.search_term = [self.search_term]
         print("self.search_term is:", self.search_term)
         self.search_term = [float(value) for value in self.search_term]
-        mb_buffer = 5  # default
-        
-        from_clause = ", Geno "
-        
+        self.mb_buffer = 5  # default
         
+        self.from_clause = ", Geno "
+
         if self.search_operator == "=":
             if len(self.search_term) == 2:
-                lower_limit, upper_limit = self.search_term
+                self.lrs_min, self.lrs_max = self.search_term
                 #[int(value) for value in self.search_term]
             
             elif len(self.search_term) == 3:
-                lower_limit, upper_limit, mb_buffer = self.search_term
+                self.lrs_min, self.lrs_max, self.mb_buffer = self.search_term
                 
             else:
                 SomeError
               
-            sub_clause = """ %sXRef.LRS > %s and
-                %sXRef.LRS < %s  and
-                ABS(%s.Mb-Geno.Mb) < %s and """  % (
-                    self.escape(self.dataset.type),
-                    self.escape(min(lower_limit, upper_limit)),
+            self.sub_clause = """ %sXRef.LRS > %s and
+                %sXRef.LRS < %s  and """  % (
                     self.escape(self.dataset.type),
-                    self.escape(max(lower_limit, upper_limit)),
+                    self.escape(min(self.lrs_min, self.lrs_max)),
                     self.escape(self.dataset.type),
-                    self.escape(mb_buffer)
+                    self.escape(max(self.lrs_min, self.lrs_max))
                 )
-            
         else:
             # Deal with >, <, >=, and <=
-            sub_clause = """ %sXRef.LRS %s %s and
-                ABS(%s.Mb-Geno.Mb) < %s and """  % (
+            self.sub_clause = """ %sXRef.LRS %s %s and """  % (
                     self.escape(self.dataset.type),
                     self.escape(self.search_operator),
-                    self.escape(self.search_term[0]),
-                    self.escape(self.dataset.type),
-                    self.escape(mb_buffer)
+                    self.escape(self.search_term[0])
                 )
-            
-        where_clause = sub_clause + """%sXRef.Locus = Geno.name and
+
+        self.where_clause = self.sub_clause + """
+                ABS(%s.Mb-Geno.Mb) %s %s and
+                %sXRef.Locus = Geno.name and
                 Geno.SpeciesId = %s and
                 %s.Chr = Geno.Chr""" % (
                     self.escape(self.dataset.type),
+                    the_operator,
+                    self.escape(self.mb_buffer),                    
+                    self.escape(self.dataset.type),
                     self.escape(self.species_id),
                     self.escape(self.dataset.type)
                     )
                 
-        print("where_clause is:", pf(where_clause))
+        print("where_clause is:", pf(self.where_clause))
 
-        query = self.compile_final_query(from_clause, where_clause)
+        self.query = self.compile_final_query(self.from_clause, self.where_clause)
 
-        return self.execute(query)
+        return self.execute(self.query)
+
+
+class CisLrsSearch(CisTransLrsSearch):
+    """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
+
+    A cisLRS search can take 3 forms:
+    - cisLRS=(min_LRS max_LRS)
+    - cisLRS=(min_LRS max_LRS mb_buffer)
+    - cisLRS>min_LRS
+    where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around
+    a particular QTL where its eQTL would be considered "cis". If there is no third parameter,
+    mb_buffer will default to 5 megabases.
+
+    A QTL is a cis-eQTL if a gene's expression is regulated by a QTL in roughly the same area
+    (where the area is determined by the mb_buffer that the user can choose).
+
+    """
+
+    DoSearch.search_types['CISLRS'] = "CisLrsSearch"
+
+    def run(self):
+        return self.real_run("<")
+    
     
-class TransLrsSearch(LrsSearch):
+class TransLrsSearch(CisTransLrsSearch):
     """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
 
     A transLRS search can take 2 forms:
@@ -425,70 +430,11 @@ class TransLrsSearch(LrsSearch):
     (where the area is determined by the mb_buffer that the user can choose). Opposite of cis-eQTL.
 
     """
-
-    # This is tentatively a child of LrsSearch; I'll need to check what code, if any, overlaps
-    # between this and the LrsSearch code. In the original code, commands are divided by
-    # the number of inputs they take, so these commands are completely separate
     
     DoSearch.search_types['TRANSLRS'] = "TransLrsSearch"
 
     def run(self):
-        if len(self.search_term) == 3:
-            lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term]
-            
-            where_clause = """ %sXRef.LRS > %s and
-                %sXRef.LRS < %s  and
-                %sXRef.Locus = Geno.name and
-                Geno.SpeciesId = %s and
-                (%s.Chr != Geno.Chr or
-                ABS(%s.Mb-Geno.Mb) > %s) """ % (
-                    self.dataset.type,
-                    min(lower_limit, upper_limit),
-                    self.dataset.type,
-                    max(lower_limit, upper_limit),
-                    self.dataset.type,
-                    self.species_id,
-                    self.dataset.type,
-                    self.dataset.type,
-                    min_threshold
-                    )
-
-        else:
-            NeedSomeErrorHere            
-
-        return None
-
-
-#itemCmd = item[0]
-#lowerLimit = float(item[1])
-#upperLimit = float(item[2])
-#
-#if itemCmd.upper() in ("TRANSLRS", "CISLRS"):
-#    if item[3]:
-#        mthresh = float(item[3])
-#        clauseItem = " %sXRef.LRS > %2.7f and %sXRef.LRS < %2.7f " % \
-#            (self.dbType, min(lowerLimit, upperLimit), self.dbType, max(lowerLimit, upperLimit))
-#        if itemCmd.upper() == "CISLRS":
-#            clauseItem += """ and  %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) < %2.7f """ % (self.dbType, self.speciesId, self.dbType, self.dbType, mthresh)
-#            DescriptionText.append(HT.Span(' with a ', HT.U('cis-QTL'), ' having an LRS between %g and %g using a %g Mb exclusion buffer'  % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit),  mthresh)))
-#        else:
-#            clauseItem += """ and  %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s.Chr != Geno.Chr or (%s.Chr != Geno.Chr and ABS(%s.Mb-Geno.Mb) > %2.7f)) """ % (self.dbType, self.speciesId, self.dbType, self.dbType, self.dbType, mthresh)
-#            DescriptionText.append(HT.Span(' with a ', HT.U('trans-QTL'), ' having an LRS between %g and %g using a %g Mb exclusion buffer'  % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit),  mthresh)))
-#        query.append(" (%s) " % clauseItem)
-#        self.orderByDefalut = "LRS"
-#    else:
-#        pass
-#elif itemCmd.upper() in ("RANGE"):
-#    #XZ, 03/05/2009: Xiaodong changed Data to ProbeSetData
-#    clauseItem = " (select Pow(2, max(value) -min(value)) from ProbeSetData where Id = ProbeSetXRef.dataId) > %2.7f and (select Pow(2, max(value) -min(value)) from ProbeSetData where Id = ProbeSetXRef.dataId) < %2.7f " % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit))
-#    query.append(" (%s) " % clauseItem)
-#    DescriptionText.append(HT.Span(' with a range of expression that varied between %g and %g' % (min(lowerLimit, upperLimit),  max(lowerLimit, upperLimit)), "  (fold difference)"))
-#else:
-#    clauseItem = " %sXRef.%s > %2.7f and %sXRef.%s < %2.7f " % \
-#        (self.dbType, itemCmd, min(lowerLimit, upperLimit), self.dbType, itemCmd, max(lowerLimit, upperLimit))
-#    query.append(" (%s) " % clauseItem)
-#    self.orderByDefalut = itemCmd
-#    DescriptionText.append(HT.Span(' with ', HT.U(itemCmd), ' between %g and %g' % (min(lowerLimit, upperLimit),  max(lowerLimit, upperLimit))))    
+        return self.real_run(">")
 
 
 class MeanSearch(ProbeSetSearch):
@@ -508,7 +454,6 @@ if __name__ == "__main__":
     import MySQLdb
     import sys
 
-
     from base import webqtlConfig
     from base.data_set import create_dataset
     from base.templatePage import templatePage
@@ -540,11 +485,11 @@ if __name__ == "__main__":
                 ProbeSetXRef.ProbeSetFreezeId = 112""")
 
     #print(pf(cursor.fetchall()))
-    #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
+    results = ProbeSetSearch("shh", None, dataset, cursor, db_conn).run()
     #results = RifSearch("diabetes", dataset, cursor, db_conn).run()
     #results = WikiSearch("nicotine", dataset, cursor, db_conn).run()
-    results = CisLrsSearch(['99'], '>', dataset, cursor, db_conn).run() # cisLRS > 99
-    #results = LrsSearch('9', '99', '1', '50', '150', '=', dataset, cursor, db_conn).run()
+    #results = CisLrsSearch(['99'], '>', dataset, cursor, db_conn).run() # cisLRS > 99
+    #results = LrsSearch('99', '>', dataset, cursor, db_conn).run()
     #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run()
     #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
     #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run()