diff options
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r-- | wqflask/wqflask/do_search.py | 152 |
1 files changed, 117 insertions, 35 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index c7dbc972..05caa100 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -1,6 +1,8 @@ from __future__ import print_function, division import string +import requests +import json from flask import Flask, g @@ -22,17 +24,19 @@ class DoSearch(object): # Used to translate search phrases into classes search_types = dict() - def __init__(self, search_term, search_operator=None, dataset=None): + def __init__(self, search_term, search_operator=None, dataset=None, search_type=None): self.search_term = search_term # Make sure search_operator is something we expect assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset + self.search_type = search_type if self.dataset: logger.debug("self.dataset is boo: ", type(self.dataset), pf(self.dataset)) logger.debug("self.dataset.group is: ", pf(self.dataset.group)) #Get group information for dataset and the species id + self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) def execute(self, query): @@ -44,8 +48,8 @@ class DoSearch(object): def handle_wildcard(self, str): keyword = str.strip() - keyword.replace("*",".*") - keyword.replace("?",".") + keyword = keyword.replace("*",".*") + keyword = keyword.replace("?",".") return keyword @@ -69,7 +73,7 @@ class DoSearch(object): logger.debug("search_types are:", pf(cls.search_types)) search_type_string = search_type['dataset_type'] - if 'key' in search_type: + if 'key' in search_type and search_type['key'] != None: search_type_string += '_' + search_type['key'] logger.debug("search_type_string is:", search_type_string) @@ -105,18 +109,34 @@ class MrnaAssaySearch(DoSearch): 'Max LRS Location', 'Additive Effect'] + def get_alias_where_clause(self): + search_string = escape(self.search_term[0]) + + if self.search_term[0] != "*": + match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string) + else: + match_clause = "" + + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (escape(str(self.dataset.id)))) + + return where_clause + def get_where_clause(self): + search_string = escape(self.search_term[0]) if self.search_term[0] != "*": - match_clause = """(MATCH (ProbeSet.Name, + match_clause = """((MATCH (ProbeSet.Name, ProbeSet.description, ProbeSet.symbol, alias, GenbankId, UniGeneId, Probe_Target_Description) - AGAINST ('%s' IN BOOLEAN MODE)) and - """ % (escape(self.search_term[0])) + AGAINST ('%s' IN BOOLEAN MODE))) AND + """ % (search_string) else: match_clause = "" @@ -198,6 +218,7 @@ class PhenotypeSearch(DoSearch): header_fields = ['Index', 'Record', 'Description', + 'Mean', 'Authors', 'Year', 'Max LRS', @@ -209,8 +230,12 @@ class PhenotypeSearch(DoSearch): #Todo: Zach will figure out exactly what both these lines mean #and comment here - if "'" not in self.search_term[0]: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + + #if "'" not in self.search_term[0]: + search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + if "_" in self.search_term[0]: + if len(self.search_term[0].split("_")[0]) == 3: + search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]" # This adds a clause to the query that matches the search term # against each field in the search_fields tuple @@ -232,7 +257,8 @@ class PhenotypeSearch(DoSearch): WHERE PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id - and PublishFreeze.Id = %s""" % ( + and PublishFreeze.Id = %s + ORDER BY PublishXRef.Id""" % ( from_clause, escape(str(self.dataset.group.id)), escape(str(self.dataset.id)))) @@ -243,7 +269,8 @@ class PhenotypeSearch(DoSearch): and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id - and PublishFreeze.Id = %s""" % ( + and PublishFreeze.Id = %s + ORDER BY PublishXRef.Id""" % ( from_clause, where_clause, escape(str(self.dataset.group.id)), @@ -444,15 +471,18 @@ class LrsSearch(DoSearch): """ - DoSearch.search_types['LRS'] = 'LrsSearch' + for search_key in ('LRS', 'LOD'): + DoSearch.search_types[search_key] = "LrsSearch" def get_from_clause(self): - #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats - if len(self.search_term) > 2 and "Chr" in self.search_term[2]: - chr_num = self.search_term[2].replace("Chr", "") - self.search_term[2] = chr_num + converted_search_term = [] + for value in self.search_term: + try: + converted_search_term.append(float(value)) + except: + converted_search_term.append(value) - self.search_term = [float(value) for value in self.search_term] + self.search_term = converted_search_term if len(self.search_term) > 2: from_clause = ", Geno" @@ -465,6 +495,9 @@ class LrsSearch(DoSearch): if self.search_operator == "=": assert isinstance(self.search_term, (list, tuple)) lrs_min, lrs_max = self.search_term[:2] + if self.search_type == "LOD": + lrs_min = lrs_min*4.61 + lrs_max = lrs_max*4.61 where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s """ % self.mescape(self.dataset.type, @@ -473,8 +506,12 @@ class LrsSearch(DoSearch): max(lrs_min, lrs_max)) if len(self.search_term) > 2: + #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats chr_num = self.search_term[2] - where_clause += """ and Geno.Chr = %s """ % (chr_num) + if "chr" in self.search_term[2].lower(): + chr_num = self.search_term[2].lower().replace("chr", "") + self.search_term[2] = chr_num + where_clause += """ and Geno.Chr = '%s' """ % (chr_num) if len(self.search_term) == 5: mb_low, mb_high = self.search_term[3:] where_clause += """ and Geno.Mb > %s and @@ -489,6 +526,10 @@ class LrsSearch(DoSearch): else: # Deal with >, <, >=, and <= logger.debug("self.search_term is:", self.search_term) + lrs_val = self.search_term[0] + if self.search_type == "LOD": + lrs_val = lrs_val*4.61 + where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type, self.search_operator, self.search_term[0]) @@ -505,12 +546,13 @@ class LrsSearch(DoSearch): return self.execute(self.query) + class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): - DoSearch.search_types['ProbeSet_LRS'] = 'MrnaLrsSearch' + for search_key in ('LRS', 'LOD'): + DoSearch.search_types['ProbeSet_' + search_key] = "MrnaLrsSearch" def run(self): - self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() @@ -520,7 +562,8 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): - DoSearch.search_types['Publish_LRS'] = 'PhenotypeLrsSearch' + for search_key in ('LRS', 'LOD'): + DoSearch.search_types['Publish_' + search_key] = "PhenotypeLrsSearch" def run(self): @@ -532,31 +575,39 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): return self.execute(self.query) - class CisTransLrsSearch(DoSearch): def get_from_clause(self): return ", Geno" def get_where_clause(self, cis_trans): - self.search_term = [float(value) for value in self.search_term] self.mb_buffer = 5 # default + chromosome = None if cis_trans == "cis": the_operator = "<" else: the_operator = ">" if self.search_operator == "=": + if len(self.search_term) == 2 or len(self.search_term) == 3: + self.search_term = [float(value) for value in self.search_term] if len(self.search_term) == 2: lrs_min, lrs_max = self.search_term #[int(value) for value in self.search_term] - elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term - + elif len(self.search_term) == 4: + lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]] + chromosome = self.search_term[3] + if "Chr" in chromosome or "chr" in chromosome: + chromosome = int(chromosome[3:]) else: SomeError + if self.search_type == "CISLOD" or self.search_type == "TRANSLOD": + lrs_min = lrs_min * 4.61 + lrs_max = lrs_max * 4.61 + sub_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s and """ % ( escape(self.dataset.type), @@ -586,18 +637,24 @@ class CisTransLrsSearch(DoSearch): escape(self.dataset.type) ) else: + if chromosome: + location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type), + chromosome, + escape(self.dataset.type), + escape(self.dataset.type), + the_operator, + escape(str(self.mb_buffer)), + escape(self.dataset.type), + chromosome) + else: + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and - ((ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or - (%s.Chr != Geno.Chr))""" % ( + (%s)""" % ( escape(self.dataset.type), escape(str(self.species_id)), - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - escape(self.dataset.type) + location_clause ) return where_clause @@ -619,7 +676,8 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ - DoSearch.search_types['ProbeSet_CISLRS'] = 'CisLrsSearch' + for search_key in ('LRS', 'LOD'): + DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "cis") @@ -648,7 +706,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ - DoSearch.search_types['ProbeSet_TRANSLRS'] = 'TransLrsSearch' + for search_key in ('LRS', 'LOD'): + DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "trans") @@ -742,7 +801,7 @@ class PositionSearch(DoSearch): self.chr = str(chr).lower() self.get_chr() - where_clause = """ %s.Chr = %s and + where_clause = """ %s.Chr = '%s' and %s.Mb > %s and %s.Mb < %s """ % self.mescape(self.dataset.type, self.chr, @@ -851,6 +910,29 @@ def is_number(s): except ValueError: return False +def get_aliases(symbol, species): + if species == "mouse": + symbol_string = symbol.capitalize() + elif species == "human": + symbol_string = symbol.upper() + else: + return [] + + filtered_aliases = [] + response = requests.get("http://gn2.genenetwork.org/gn3/gene/aliases/" + symbol_string) + if response: + alias_list = json.loads(response.content) + + seen = set() + for item in alias_list: + if item in seen: + continue + else: + filtered_aliases.append(item) + seen.add(item) + + return filtered_aliases + if __name__ == "__main__": ### Usually this will be used as a library, but call it from the command line for testing ### And it runs the code below |