about summary refs log tree commit diff
path: root/wqflask/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r--wqflask/wqflask/do_search.py152
1 files changed, 117 insertions, 35 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index c7dbc972..05caa100 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -1,6 +1,8 @@
 from __future__ import print_function, division
 
 import string
+import requests
+import json
 
 from flask import Flask, g
 
@@ -22,17 +24,19 @@ class DoSearch(object):
     # Used to translate search phrases into classes
     search_types = dict()
 
-    def __init__(self, search_term, search_operator=None, dataset=None):
+    def __init__(self, search_term, search_operator=None, dataset=None, search_type=None):
         self.search_term = search_term
         # Make sure search_operator is something we expect
         assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator"
         self.search_operator = search_operator
         self.dataset = dataset
+        self.search_type = search_type
 
         if self.dataset:
             logger.debug("self.dataset is boo: ", type(self.dataset), pf(self.dataset))
             logger.debug("self.dataset.group is: ", pf(self.dataset.group))
             #Get group information for dataset and the species id
+
             self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name)
 
     def execute(self, query):
@@ -44,8 +48,8 @@ class DoSearch(object):
 
     def handle_wildcard(self, str):
         keyword = str.strip()
-        keyword.replace("*",".*")
-        keyword.replace("?",".")
+        keyword = keyword.replace("*",".*")
+        keyword = keyword.replace("?",".")
 
         return keyword
 
@@ -69,7 +73,7 @@ class DoSearch(object):
         logger.debug("search_types are:", pf(cls.search_types))
 
         search_type_string = search_type['dataset_type']
-        if 'key' in search_type:
+        if 'key' in search_type and search_type['key'] != None:
             search_type_string += '_' + search_type['key']
 
         logger.debug("search_type_string is:", search_type_string)
@@ -105,18 +109,34 @@ class MrnaAssaySearch(DoSearch):
                      'Max LRS Location',
                      'Additive Effect']
 
+    def get_alias_where_clause(self):
+        search_string = escape(self.search_term[0])
+
+        if self.search_term[0] != "*":
+            match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string)
+        else:
+            match_clause = ""
+
+        where_clause = (match_clause +
+            """ProbeSet.Id = ProbeSetXRef.ProbeSetId
+               and ProbeSetXRef.ProbeSetFreezeId = %s
+                        """ % (escape(str(self.dataset.id))))
+
+        return where_clause
+
     def get_where_clause(self):
+        search_string = escape(self.search_term[0])
 
         if self.search_term[0] != "*":
-            match_clause = """(MATCH (ProbeSet.Name,
+            match_clause = """((MATCH (ProbeSet.Name,
                         ProbeSet.description,
                         ProbeSet.symbol,
                         alias,
                         GenbankId,
                         UniGeneId,
                         Probe_Target_Description)
-                        AGAINST ('%s' IN BOOLEAN MODE)) and
-                                """ % (escape(self.search_term[0]))
+                        AGAINST ('%s' IN BOOLEAN MODE))) AND
+                                """ % (search_string)
         else:
             match_clause = ""
 
@@ -198,6 +218,7 @@ class PhenotypeSearch(DoSearch):
     header_fields = ['Index',
                      'Record',
                      'Description',
+                     'Mean',
                      'Authors',
                      'Year',
                      'Max LRS',
@@ -209,8 +230,12 @@ class PhenotypeSearch(DoSearch):
 
         #Todo: Zach will figure out exactly what both these lines mean
         #and comment here
-        if "'" not in self.search_term[0]:
-            search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]"
+
+        #if "'" not in self.search_term[0]:
+        search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]"
+        if "_" in self.search_term[0]:
+            if len(self.search_term[0].split("_")[0]) == 3:
+                search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]"
 
         # This adds a clause to the query that matches the search term
         # against each field in the search_fields tuple
@@ -232,7 +257,8 @@ class PhenotypeSearch(DoSearch):
                         WHERE PublishXRef.InbredSetId = %s
                         and PublishXRef.PhenotypeId = Phenotype.Id
                         and PublishXRef.PublicationId = Publication.Id
-                        and PublishFreeze.Id = %s""" % (
+                        and PublishFreeze.Id = %s
+                        ORDER BY PublishXRef.Id""" % (
                             from_clause,
                             escape(str(self.dataset.group.id)),
                             escape(str(self.dataset.id))))
@@ -243,7 +269,8 @@ class PhenotypeSearch(DoSearch):
                         and PublishXRef.InbredSetId = %s
                         and PublishXRef.PhenotypeId = Phenotype.Id
                         and PublishXRef.PublicationId = Publication.Id
-                        and PublishFreeze.Id = %s""" % (
+                        and PublishFreeze.Id = %s
+                        ORDER BY PublishXRef.Id""" % (
                             from_clause,
                             where_clause,
                             escape(str(self.dataset.group.id)),
@@ -444,15 +471,18 @@ class LrsSearch(DoSearch):
 
     """
 
-    DoSearch.search_types['LRS'] = 'LrsSearch'
+    for search_key in ('LRS', 'LOD'):
+        DoSearch.search_types[search_key] = "LrsSearch"
 
     def get_from_clause(self):
-        #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats
-        if len(self.search_term) > 2 and "Chr" in self.search_term[2]:
-            chr_num = self.search_term[2].replace("Chr", "")
-            self.search_term[2] = chr_num
+        converted_search_term = []
+        for value in self.search_term:
+            try:
+                converted_search_term.append(float(value))
+            except:
+                converted_search_term.append(value)
 
-        self.search_term = [float(value) for value in self.search_term]
+        self.search_term = converted_search_term
 
         if len(self.search_term) > 2:
             from_clause = ", Geno"
@@ -465,6 +495,9 @@ class LrsSearch(DoSearch):
         if self.search_operator == "=":
             assert isinstance(self.search_term, (list, tuple))
             lrs_min, lrs_max = self.search_term[:2]
+            if self.search_type == "LOD":
+                lrs_min = lrs_min*4.61
+                lrs_max = lrs_max*4.61
 
             where_clause = """ %sXRef.LRS > %s and
                              %sXRef.LRS < %s """ % self.mescape(self.dataset.type,
@@ -473,8 +506,12 @@ class LrsSearch(DoSearch):
                                                                 max(lrs_min, lrs_max))
 
             if len(self.search_term) > 2:
+                #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats
                 chr_num = self.search_term[2]
-                where_clause += """ and Geno.Chr = %s """ % (chr_num)
+                if "chr" in self.search_term[2].lower():
+                    chr_num = self.search_term[2].lower().replace("chr", "")
+                    self.search_term[2] = chr_num
+                where_clause += """ and Geno.Chr = '%s' """ % (chr_num)
                 if len(self.search_term) == 5:
                     mb_low, mb_high = self.search_term[3:]
                     where_clause += """ and Geno.Mb > %s and
@@ -489,6 +526,10 @@ class LrsSearch(DoSearch):
         else:
             # Deal with >, <, >=, and <=
             logger.debug("self.search_term is:", self.search_term)
+            lrs_val = self.search_term[0]
+            if self.search_type == "LOD":
+                lrs_val = lrs_val*4.61
+
             where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type,
                                                                         self.search_operator,
                                                                         self.search_term[0])
@@ -505,12 +546,13 @@ class LrsSearch(DoSearch):
 
         return self.execute(self.query)
 
+
 class MrnaLrsSearch(LrsSearch, MrnaAssaySearch):
 
-    DoSearch.search_types['ProbeSet_LRS'] = 'MrnaLrsSearch'
+    for search_key in ('LRS', 'LOD'):
+        DoSearch.search_types['ProbeSet_' + search_key] = "MrnaLrsSearch"
 
     def run(self):
-
         self.from_clause = self.get_from_clause()
         self.where_clause = self.get_where_clause()
 
@@ -520,7 +562,8 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch):
 
 class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch):
 
-    DoSearch.search_types['Publish_LRS'] = 'PhenotypeLrsSearch'
+    for search_key in ('LRS', 'LOD'):
+        DoSearch.search_types['Publish_' + search_key] = "PhenotypeLrsSearch"
 
     def run(self):
 
@@ -532,31 +575,39 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch):
         return self.execute(self.query)
 
 
-
 class CisTransLrsSearch(DoSearch):
 
     def get_from_clause(self):
         return ", Geno"
 
     def get_where_clause(self, cis_trans):
-        self.search_term = [float(value) for value in self.search_term]
         self.mb_buffer = 5  # default
+        chromosome = None
         if cis_trans == "cis":
             the_operator = "<"
         else:
             the_operator = ">"
 
         if self.search_operator == "=":
+            if len(self.search_term) == 2 or len(self.search_term) == 3:
+                self.search_term = [float(value) for value in self.search_term]
             if len(self.search_term) == 2:
                 lrs_min, lrs_max = self.search_term
                 #[int(value) for value in self.search_term]
-
             elif len(self.search_term) == 3:
                 lrs_min, lrs_max, self.mb_buffer = self.search_term
-
+            elif len(self.search_term) == 4:
+                lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]]
+                chromosome = self.search_term[3]
+                if "Chr" in chromosome or "chr" in chromosome:
+                    chromosome = int(chromosome[3:])
             else:
                 SomeError
 
+            if self.search_type == "CISLOD" or self.search_type == "TRANSLOD":
+                lrs_min = lrs_min * 4.61
+                lrs_max = lrs_max * 4.61
+
             sub_clause = """ %sXRef.LRS > %s and
                 %sXRef.LRS < %s  and """  % (
                     escape(self.dataset.type),
@@ -586,18 +637,24 @@ class CisTransLrsSearch(DoSearch):
                         escape(self.dataset.type)
                         )
         else:
+            if chromosome:
+                location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type),
+                                                                                                                                                  chromosome,
+                                                                                                                                                  escape(self.dataset.type),
+                                                                                                                                                  escape(self.dataset.type),
+                                                                                                                                                  the_operator,
+                                                                                                                                                  escape(str(self.mb_buffer)),
+                                                                                                                                                  escape(self.dataset.type),
+                                                                                                                                                  chromosome)
+            else:
+                location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type))
             where_clause = sub_clause + """
                     %sXRef.Locus = Geno.name and
                     Geno.SpeciesId = %s and
-                    ((ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or
-                    (%s.Chr != Geno.Chr))""" % (
+                    (%s)""" % (
                         escape(self.dataset.type),
                         escape(str(self.species_id)),
-                        escape(self.dataset.type),
-                        the_operator,
-                        escape(str(self.mb_buffer)),
-                        escape(self.dataset.type),
-                        escape(self.dataset.type)
+                        location_clause
                         )
 
         return where_clause
@@ -619,7 +676,8 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
 
     """
 
-    DoSearch.search_types['ProbeSet_CISLRS'] = 'CisLrsSearch'
+    for search_key in ('LRS', 'LOD'):
+        DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch"
 
     def get_where_clause(self):
         return CisTransLrsSearch.get_where_clause(self, "cis")
@@ -648,7 +706,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
 
     """
 
-    DoSearch.search_types['ProbeSet_TRANSLRS'] = 'TransLrsSearch'
+    for search_key in ('LRS', 'LOD'):
+        DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch"
 
     def get_where_clause(self):
         return CisTransLrsSearch.get_where_clause(self, "trans")
@@ -742,7 +801,7 @@ class PositionSearch(DoSearch):
         self.chr = str(chr).lower()
         self.get_chr()
 
-        where_clause = """ %s.Chr = %s and
+        where_clause = """ %s.Chr = '%s' and
                                 %s.Mb > %s and
                                 %s.Mb < %s """ % self.mescape(self.dataset.type,
                                                               self.chr,
@@ -851,6 +910,29 @@ def is_number(s):
     except ValueError:
         return False
 
+def get_aliases(symbol, species):
+    if species == "mouse":
+        symbol_string = symbol.capitalize()
+    elif species == "human":
+        symbol_string = symbol.upper()
+    else:
+        return []
+
+    filtered_aliases = []
+    response = requests.get("http://gn2.genenetwork.org/gn3/gene/aliases/" + symbol_string)
+    if response:
+        alias_list = json.loads(response.content)
+
+        seen = set()
+        for item in alias_list:
+            if item in seen:
+                continue
+            else:
+                filtered_aliases.append(item)
+                seen.add(item)
+
+    return filtered_aliases
+
 if __name__ == "__main__":
     ### Usually this will be used as a library, but call it from the command line for testing
     ### And it runs the code below