From 9eab3fbce2cd2247dc571f9d1c19946f97fb33ce Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 28 Nov 2012 17:45:02 -0600 Subject: Got cisLRS search working (in web service) for searches with > or < (ex. LRS>99) search_term is always a list now ([99] in the example LRS>99) --- wqflask/wqflask/do_search.py | 217 ++++++++++++++++++++++---------------- wqflask/wqflask/parser.py | 16 ++- wqflask/wqflask/search_results.py | 2 + 3 files changed, 143 insertions(+), 92 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 73a72e00..49da4282 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -17,8 +17,11 @@ class DoSearch(object): # Used to translate search phrases into classes search_types = dict() - def __init__(self, search_term, dataset, cursor, db_conn): + def __init__(self, search_term, search_operator, dataset, cursor, db_conn): self.search_term = search_term + # Make sure search_operator is something we expect + assert search_operator in ("=", "<", ">", "<=", ">="), "Bad search operator" + self.search_operator = search_operator self.dataset = dataset self.db_conn = db_conn self.cursor = cursor @@ -49,6 +52,64 @@ class DoSearch(object): return cls.search_types[search_type] +class ProbeSetSearch(DoSearch): + """A search within an mRNA expression dataset""" + + DoSearch.search_types['ProbeSet'] = "ProbeSetSearch" + + base_query = """SELECT ProbeSet.Name as TNAME, + 0 as thistable, + ProbeSetXRef.Mean as TMEAN, + ProbeSetXRef.LRS as TLRS, + ProbeSetXRef.PVALUE as TPVALUE, + ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, + ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSet """ + + + def compile_final_query(self, from_clause = '', where_clause = ''): + """Generates the final query string""" + + from_clause = self.normalize_spaces(from_clause) + + query = (self.base_query + + """%s + WHERE %s + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.escape(from_clause), + where_clause, + self.escape(self.dataset.id))) + + print("query is:", pf(query)) + + return query + + def run(self): + """Generates and runs a simple search of an mRNA expression dataset""" + + print("Running ProbeSetSearch") + query = (self.base_query + + """WHERE (MATCH (ProbeSet.Name, + ProbeSet.description, + ProbeSet.symbol, + alias, + GenbankId, + UniGeneId, + Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE)) + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.escape(self.search_term), + self.escape(self.dataset.id))) + + print("final query is:", pf(query)) + + return self.execute(query) + + class PhenotypeSearch(DoSearch): """A search within a phenotype dataset""" @@ -147,71 +208,12 @@ class GenotypeSearch(DoSearch): """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and - GenoFreeze.Id = %s""" % ( + GenoFreeze.Id = %s"""% ( self.get_where_clause(), self.escape(self.dataset.id))) return self.execute(query) - -class ProbeSetSearch(DoSearch): - """A search within an mRNA expression dataset""" - - DoSearch.search_types['ProbeSet'] = "ProbeSetSearch" - - base_query = """SELECT ProbeSet.Name as TNAME, - 0 as thistable, - ProbeSetXRef.Mean as TMEAN, - ProbeSetXRef.LRS as TLRS, - ProbeSetXRef.PVALUE as TPVALUE, - ProbeSet.Chr_num as TCHR_NUM, - ProbeSet.Mb as TMB, - ProbeSet.Symbol as TSYMBOL, - ProbeSet.name_num as TNAME_NUM - FROM ProbeSetXRef, ProbeSet """ - - - def compile_final_query(self, from_clause, where_clause): - """Generates the final query string""" - - from_clause = self.normalize_spaces(from_clause) - - query = (self.normalize_spaces(self.base_query) + - """%s - WHERE %s - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(from_clause), - where_clause, - self.escape(self.dataset.id))) - - print("query is:", pf(query)) - - return query - - def run(self): - """Generates and runs a simple search of an mRNA expression dataset""" - - print("Running ProbeSetSearch") - query = (self.base_query + - """WHERE (MATCH (ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - alias, - GenbankId, - UniGeneId, - Probe_Target_Description) - AGAINST ('%s' IN BOOLEAN MODE)) - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(self.search_term), - self.escape(self.dataset.id))) - - print("final query is:", pf(query)) - - return self.execute(query) - - class RifSearch(ProbeSetSearch): """Searches for traits with a Gene RIF entry including the search term.""" @@ -257,7 +259,7 @@ class GoSearch(ProbeSetSearch): statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and GOterm.id=GOassociation.term_id""" % ( - self.escape(self.dataset.type))) + self.db_conn.escape_string(self.dataset.type))) where_clause = " %s = '%s' and %s " % (field, go_id, statements) @@ -283,12 +285,13 @@ class LrsSearch(ProbeSetSearch): DoSearch.search_types['LRS'] = 'LrsSearch' -class CisLrsSearch(ProbeSetSearch): +class CisLrsSearch(LrsSearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values - A cisLRS search can take 2 forms: + A cisLRS search can take 3 forms: - cisLRS=(min_LRS max_LRS) - cisLRS=(min_LRS max_LRS mb_buffer) + - cisLRS>min_LRS where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around a particular QTL where its eQTL would be considered "cis". If there is no third parameter, mb_buffer will default to 5 megabases. @@ -305,30 +308,56 @@ class CisLrsSearch(ProbeSetSearch): DoSearch.search_types['CISLRS'] = "CisLrsSearch" def run(self): + #if isinstance(self.search_term, basestring): + # self.search_term = [self.search_term] + print("self.search_term is:", self.search_term) + self.search_term = [float(value) for value in self.search_term] + mb_buffer = 5 # default from_clause = ", Geno " - if len(self.search_term) == 3: - lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] - - where_clause = """ %sXRef.LRS > %s and + + if self.search_operator == "=": + if len(self.search_term) == 2: + lower_limit, upper_limit = self.search_term + #[int(value) for value in self.search_term] + + elif len(self.search_term) == 3: + lower_limit, upper_limit, mb_buffer = self.search_term + + sub_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s and - %sXRef.Locus = Geno.name and - Geno.SpeciesId = %s and - %s.Chr = Geno.Chr and - ABS(%s.Mb-Geno.Mb) < %s """ % ( + ABS(%s.Mb-Geno.Mb) < %s """ % ( self.escape(self.dataset.type), - min(lower_limit, upper_limit), + self.escape(min(lower_limit, upper_limit)), self.escape(self.dataset.type), - max(lower_limit, upper_limit), + self.escape(max(lower_limit, upper_limit)), self.escape(self.dataset.type), - self.species_id, + self.escape(mb_buffer) + ) + + + + else: + # Deal with >, <, >=, and <= + sub_clause = """ %sXRef.LRS %s %s and + ABS(%s.Mb-Geno.Mb) < %s and """ % ( self.escape(self.dataset.type), + self.escape(self.search_operator), + self.escape(self.search_term[0]), self.escape(self.dataset.type), - min_threshold + self.escape(mb_buffer) + ) + + where_clause = sub_clause + """%sXRef.Locus = Geno.name and + Geno.SpeciesId = %s and + %s.Chr = Geno.Chr""" % ( + self.escape(self.dataset.type), + self.escape(self.species_id), + self.escape(self.dataset.type) ) - else: - NeedSomeErrorHere + + print("where_clause is:", pf(where_clause)) query = self.compile_final_query(from_clause, where_clause) @@ -356,8 +385,6 @@ class TransLrsSearch(LrsSearch): DoSearch.search_types['TRANSLRS'] = "TransLrsSearch" def run(self): - from_clause = ", Geno " - if len(self.search_term) == 3: lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] @@ -379,11 +406,9 @@ class TransLrsSearch(LrsSearch): ) else: - NeedSomeErrorHere - - query = self.compile_final_query(from_clause, where_clause) + NeedSomeErrorHere - return self.execute(query) + return None #itemCmd = item[0] @@ -450,14 +475,30 @@ if __name__ == "__main__": dataset_name = "HC_M2_0606_P" dataset = create_dataset(db_conn, dataset_name) - + + cursor.execute(""" + SELECT ProbeSet.Name as TNAME, 0 as thistable, + ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS, + ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSet, Geno + WHERE ProbeSetXRef.LRS > 99.0 and + ABS(ProbeSet.Mb-Geno.Mb) < 5 and + ProbeSetXRef.Locus = Geno.name and + Geno.SpeciesId = 1 and + ProbeSet.Chr = Geno.Chr and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSetXRef.ProbeSetFreezeId = 112""") + + #print(pf(cursor.fetchall())) #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() #results = RifSearch("diabetes", dataset, cursor, db_conn).run() #results = WikiSearch("nicotine", dataset, cursor, db_conn).run() - results = CisLrsSearch(['25','99','10'], dataset, cursor, db_conn).run() - #results = TransLrsSearch(['25', '999', '10'], dataset, cursor, db_conn).run() + results = CisLrsSearch('99', '>', dataset, cursor, db_conn).run() # cisLRS > 99 + #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run() #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run() #results = GoSearch("0045202", dataset, cursor, db_conn).run() - print("results are:", pf(results)) \ No newline at end of file + #print("results are:", pf(results)) \ No newline at end of file diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py index 676efa1e..efe479e6 100644 --- a/wqflask/wqflask/parser.py +++ b/wqflask/wqflask/parser.py @@ -15,8 +15,6 @@ Both square brackets and parentheses can be used interchangeably. Both can also encapsulate a single value; "cisLRS=[9 999 10)" would be acceptable.] -NEED TO DEAL WITH WILDCARD CHARACTER '*' - """ from __future__ import print_function, division @@ -26,6 +24,10 @@ import re from pprint import pformat as pf def parse(pstring): + """ + + returned item serach_term is always a list, even if only one element + """ pstring = re.split(r"""(?:(\w+\s*=\s*[\(\[][^)]*[\)\]]) | # LRS=(1 2 3), cisLRS=[4 5 6], etc (\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc ([\w\*]+)) # shh, brain, etc """, pstring, @@ -53,16 +55,22 @@ def parse(pstring): value = value[1:-1] # Get rid of the parenthesis values = re.split(r"""\s+|,""", value) value = [value.strip() for value in values if value.strip()] + else: + value = [value] + # : is a synonym for = + if separator == ":": + separator = "=" + term = dict(key=key, separator=separator, search_term=value) else: term = dict(key=None, separator=None, - search_term = item) + search_term=[item]) items.append(term) - print(pf(items) + "\n") + print("* items are:", pf(items) + "\n") return(items) if __name__ == '__main__': diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 96350f22..fe091f97 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -127,6 +127,7 @@ class SearchResultPage(templatePage): for a_search in self.search_terms: print("[kodak] item is:", pf(a_search)) search_term = a_search['search_term'] + search_operator = a_search['separator'] if a_search['key']: search_type = a_search['key'].upper() else: @@ -141,6 +142,7 @@ class SearchResultPage(templatePage): search_ob = do_search.DoSearch.get_search(search_type) search_class = getattr(do_search, search_ob) self.results.extend(search_class(search_term, + search_operator, self.dataset, self.cursor, self.db_conn).run()) -- cgit v1.2.3