aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZachary Sloan2012-11-28 17:45:02 -0600
committerZachary Sloan2012-11-28 17:45:02 -0600
commit9eab3fbce2cd2247dc571f9d1c19946f97fb33ce (patch)
treeb15c3166c4447e8c6703903d6e607c58815c4a29
parent5278e4b66d3261c8ff114bf4cd0e69307d1ffd5f (diff)
downloadgenenetwork2-9eab3fbce2cd2247dc571f9d1c19946f97fb33ce.tar.gz
Got cisLRS search working (in web service) for searches with > or < (ex. LRS>99)
search_term is always a list now ([99] in the example LRS>99)
-rw-r--r--wqflask/wqflask/do_search.py217
-rw-r--r--wqflask/wqflask/parser.py16
-rw-r--r--wqflask/wqflask/search_results.py2
3 files changed, 143 insertions, 92 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index 73a72e00..49da4282 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -17,8 +17,11 @@ class DoSearch(object):
# Used to translate search phrases into classes
search_types = dict()
- def __init__(self, search_term, dataset, cursor, db_conn):
+ def __init__(self, search_term, search_operator, dataset, cursor, db_conn):
self.search_term = search_term
+ # Make sure search_operator is something we expect
+ assert search_operator in ("=", "<", ">", "<=", ">="), "Bad search operator"
+ self.search_operator = search_operator
self.dataset = dataset
self.db_conn = db_conn
self.cursor = cursor
@@ -49,6 +52,64 @@ class DoSearch(object):
return cls.search_types[search_type]
+class ProbeSetSearch(DoSearch):
+ """A search within an mRNA expression dataset"""
+
+ DoSearch.search_types['ProbeSet'] = "ProbeSetSearch"
+
+ base_query = """SELECT ProbeSet.Name as TNAME,
+ 0 as thistable,
+ ProbeSetXRef.Mean as TMEAN,
+ ProbeSetXRef.LRS as TLRS,
+ ProbeSetXRef.PVALUE as TPVALUE,
+ ProbeSet.Chr_num as TCHR_NUM,
+ ProbeSet.Mb as TMB,
+ ProbeSet.Symbol as TSYMBOL,
+ ProbeSet.name_num as TNAME_NUM
+ FROM ProbeSetXRef, ProbeSet """
+
+
+ def compile_final_query(self, from_clause = '', where_clause = ''):
+ """Generates the final query string"""
+
+ from_clause = self.normalize_spaces(from_clause)
+
+ query = (self.base_query +
+ """%s
+ WHERE %s
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ """ % (self.escape(from_clause),
+ where_clause,
+ self.escape(self.dataset.id)))
+
+ print("query is:", pf(query))
+
+ return query
+
+ def run(self):
+ """Generates and runs a simple search of an mRNA expression dataset"""
+
+ print("Running ProbeSetSearch")
+ query = (self.base_query +
+ """WHERE (MATCH (ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ alias,
+ GenbankId,
+ UniGeneId,
+ Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE))
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ """ % (self.escape(self.search_term),
+ self.escape(self.dataset.id)))
+
+ print("final query is:", pf(query))
+
+ return self.execute(query)
+
+
class PhenotypeSearch(DoSearch):
"""A search within a phenotype dataset"""
@@ -147,71 +208,12 @@ class GenotypeSearch(DoSearch):
"""WHERE %s and
Geno.Id = GenoXRef.GenoId and
GenoXRef.GenoFreezeId = GenoFreeze.Id and
- GenoFreeze.Id = %s""" % (
+ GenoFreeze.Id = %s"""% (
self.get_where_clause(),
self.escape(self.dataset.id)))
return self.execute(query)
-
-class ProbeSetSearch(DoSearch):
- """A search within an mRNA expression dataset"""
-
- DoSearch.search_types['ProbeSet'] = "ProbeSetSearch"
-
- base_query = """SELECT ProbeSet.Name as TNAME,
- 0 as thistable,
- ProbeSetXRef.Mean as TMEAN,
- ProbeSetXRef.LRS as TLRS,
- ProbeSetXRef.PVALUE as TPVALUE,
- ProbeSet.Chr_num as TCHR_NUM,
- ProbeSet.Mb as TMB,
- ProbeSet.Symbol as TSYMBOL,
- ProbeSet.name_num as TNAME_NUM
- FROM ProbeSetXRef, ProbeSet """
-
-
- def compile_final_query(self, from_clause, where_clause):
- """Generates the final query string"""
-
- from_clause = self.normalize_spaces(from_clause)
-
- query = (self.normalize_spaces(self.base_query) +
- """%s
- WHERE %s
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and ProbeSetXRef.ProbeSetFreezeId = %s
- """ % (self.escape(from_clause),
- where_clause,
- self.escape(self.dataset.id)))
-
- print("query is:", pf(query))
-
- return query
-
- def run(self):
- """Generates and runs a simple search of an mRNA expression dataset"""
-
- print("Running ProbeSetSearch")
- query = (self.base_query +
- """WHERE (MATCH (ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- alias,
- GenbankId,
- UniGeneId,
- Probe_Target_Description)
- AGAINST ('%s' IN BOOLEAN MODE))
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and ProbeSetXRef.ProbeSetFreezeId = %s
- """ % (self.escape(self.search_term),
- self.escape(self.dataset.id)))
-
- print("final query is:", pf(query))
-
- return self.execute(query)
-
-
class RifSearch(ProbeSetSearch):
"""Searches for traits with a Gene RIF entry including the search term."""
@@ -257,7 +259,7 @@ class GoSearch(ProbeSetSearch):
statements = ("""%s.symbol=GOgene_product.symbol and
GOassociation.gene_product_id=GOgene_product.id and
GOterm.id=GOassociation.term_id""" % (
- self.escape(self.dataset.type)))
+ self.db_conn.escape_string(self.dataset.type)))
where_clause = " %s = '%s' and %s " % (field, go_id, statements)
@@ -283,12 +285,13 @@ class LrsSearch(ProbeSetSearch):
DoSearch.search_types['LRS'] = 'LrsSearch'
-class CisLrsSearch(ProbeSetSearch):
+class CisLrsSearch(LrsSearch):
"""Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
- A cisLRS search can take 2 forms:
+ A cisLRS search can take 3 forms:
- cisLRS=(min_LRS max_LRS)
- cisLRS=(min_LRS max_LRS mb_buffer)
+ - cisLRS>min_LRS
where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around
a particular QTL where its eQTL would be considered "cis". If there is no third parameter,
mb_buffer will default to 5 megabases.
@@ -305,30 +308,56 @@ class CisLrsSearch(ProbeSetSearch):
DoSearch.search_types['CISLRS'] = "CisLrsSearch"
def run(self):
+ #if isinstance(self.search_term, basestring):
+ # self.search_term = [self.search_term]
+ print("self.search_term is:", self.search_term)
+ self.search_term = [float(value) for value in self.search_term]
+ mb_buffer = 5 # default
from_clause = ", Geno "
- if len(self.search_term) == 3:
- lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term]
-
- where_clause = """ %sXRef.LRS > %s and
+
+ if self.search_operator == "=":
+ if len(self.search_term) == 2:
+ lower_limit, upper_limit = self.search_term
+ #[int(value) for value in self.search_term]
+
+ elif len(self.search_term) == 3:
+ lower_limit, upper_limit, mb_buffer = self.search_term
+
+ sub_clause = """ %sXRef.LRS > %s and
%sXRef.LRS < %s and
- %sXRef.Locus = Geno.name and
- Geno.SpeciesId = %s and
- %s.Chr = Geno.Chr and
- ABS(%s.Mb-Geno.Mb) < %s """ % (
+ ABS(%s.Mb-Geno.Mb) < %s """ % (
self.escape(self.dataset.type),
- min(lower_limit, upper_limit),
+ self.escape(min(lower_limit, upper_limit)),
self.escape(self.dataset.type),
- max(lower_limit, upper_limit),
+ self.escape(max(lower_limit, upper_limit)),
self.escape(self.dataset.type),
- self.species_id,
+ self.escape(mb_buffer)
+ )
+
+
+
+ else:
+ # Deal with >, <, >=, and <=
+ sub_clause = """ %sXRef.LRS %s %s and
+ ABS(%s.Mb-Geno.Mb) < %s and """ % (
self.escape(self.dataset.type),
+ self.escape(self.search_operator),
+ self.escape(self.search_term[0]),
self.escape(self.dataset.type),
- min_threshold
+ self.escape(mb_buffer)
+ )
+
+ where_clause = sub_clause + """%sXRef.Locus = Geno.name and
+ Geno.SpeciesId = %s and
+ %s.Chr = Geno.Chr""" % (
+ self.escape(self.dataset.type),
+ self.escape(self.species_id),
+ self.escape(self.dataset.type)
)
- else:
- NeedSomeErrorHere
+
+ print("where_clause is:", pf(where_clause))
query = self.compile_final_query(from_clause, where_clause)
@@ -356,8 +385,6 @@ class TransLrsSearch(LrsSearch):
DoSearch.search_types['TRANSLRS'] = "TransLrsSearch"
def run(self):
- from_clause = ", Geno "
-
if len(self.search_term) == 3:
lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term]
@@ -379,11 +406,9 @@ class TransLrsSearch(LrsSearch):
)
else:
- NeedSomeErrorHere
-
- query = self.compile_final_query(from_clause, where_clause)
+ NeedSomeErrorHere
- return self.execute(query)
+ return None
#itemCmd = item[0]
@@ -450,14 +475,30 @@ if __name__ == "__main__":
dataset_name = "HC_M2_0606_P"
dataset = create_dataset(db_conn, dataset_name)
-
+
+ cursor.execute("""
+ SELECT ProbeSet.Name as TNAME, 0 as thistable,
+ ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS,
+ ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM,
+ ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL,
+ ProbeSet.name_num as TNAME_NUM
+ FROM ProbeSetXRef, ProbeSet, Geno
+ WHERE ProbeSetXRef.LRS > 99.0 and
+ ABS(ProbeSet.Mb-Geno.Mb) < 5 and
+ ProbeSetXRef.Locus = Geno.name and
+ Geno.SpeciesId = 1 and
+ ProbeSet.Chr = Geno.Chr and
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId and
+ ProbeSetXRef.ProbeSetFreezeId = 112""")
+
+ #print(pf(cursor.fetchall()))
#results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
#results = RifSearch("diabetes", dataset, cursor, db_conn).run()
#results = WikiSearch("nicotine", dataset, cursor, db_conn).run()
- results = CisLrsSearch(['25','99','10'], dataset, cursor, db_conn).run()
- #results = TransLrsSearch(['25', '999', '10'], dataset, cursor, db_conn).run()
+ results = CisLrsSearch('99', '>', dataset, cursor, db_conn).run() # cisLRS > 99
+ #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run()
#results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
#results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run()
#results = GoSearch("0045202", dataset, cursor, db_conn).run()
- print("results are:", pf(results)) \ No newline at end of file
+ #print("results are:", pf(results)) \ No newline at end of file
diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py
index 676efa1e..efe479e6 100644
--- a/wqflask/wqflask/parser.py
+++ b/wqflask/wqflask/parser.py
@@ -15,8 +15,6 @@ Both square brackets and parentheses can be used interchangeably. Both can also
encapsulate a single value; "cisLRS=[9 999 10)" would
be acceptable.]
-NEED TO DEAL WITH WILDCARD CHARACTER '*'
-
"""
from __future__ import print_function, division
@@ -26,6 +24,10 @@ import re
from pprint import pformat as pf
def parse(pstring):
+ """
+
+ returned item serach_term is always a list, even if only one element
+ """
pstring = re.split(r"""(?:(\w+\s*=\s*[\(\[][^)]*[\)\]]) | # LRS=(1 2 3), cisLRS=[4 5 6], etc
(\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc
([\w\*]+)) # shh, brain, etc """, pstring,
@@ -53,16 +55,22 @@ def parse(pstring):
value = value[1:-1] # Get rid of the parenthesis
values = re.split(r"""\s+|,""", value)
value = [value.strip() for value in values if value.strip()]
+ else:
+ value = [value]
+ # : is a synonym for =
+ if separator == ":":
+ separator = "="
+
term = dict(key=key,
separator=separator,
search_term=value)
else:
term = dict(key=None,
separator=None,
- search_term = item)
+ search_term=[item])
items.append(term)
- print(pf(items) + "\n")
+ print("* items are:", pf(items) + "\n")
return(items)
if __name__ == '__main__':
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 96350f22..fe091f97 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -127,6 +127,7 @@ class SearchResultPage(templatePage):
for a_search in self.search_terms:
print("[kodak] item is:", pf(a_search))
search_term = a_search['search_term']
+ search_operator = a_search['separator']
if a_search['key']:
search_type = a_search['key'].upper()
else:
@@ -141,6 +142,7 @@ class SearchResultPage(templatePage):
search_ob = do_search.DoSearch.get_search(search_type)
search_class = getattr(do_search, search_ob)
self.results.extend(search_class(search_term,
+ search_operator,
self.dataset,
self.cursor,
self.db_conn).run())