diff options
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r-- | wqflask/wqflask/do_search.py | 237 |
1 files changed, 91 insertions, 146 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 2b8efd68..92a754e3 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -20,7 +20,7 @@ class DoSearch(object): def __init__(self, search_term, search_operator, dataset, cursor, db_conn): self.search_term = search_term # Make sure search_operator is something we expect - assert search_operator in ("=", "<", ">", "<=", ">="), "Bad search operator" + assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset self.db_conn = db_conn @@ -41,6 +41,12 @@ class DoSearch(object): def escape(self, stringy): """Shorter name than self.db_conn.escape_string""" return self.db_conn.escape_string(str(stringy)) + + def mescape(self, *items): + """Multiple escape""" + escaped = [self.escape(item) for item in items] + print("escaped is:", escaped) + return tuple(escaped) def normalize_spaces(self, stringy): """Strips out newlines/extra spaces and replaces them with just spaces""" @@ -91,8 +97,7 @@ class ProbeSetSearch(DoSearch): """Generates and runs a simple search of an mRNA expression dataset""" print("Running ProbeSetSearch") - query = (self.base_query + - """WHERE (MATCH (ProbeSet.Name, + query = self.base_query + """WHERE (MATCH (ProbeSet.Name, ProbeSet.description, ProbeSet.symbol, alias, @@ -102,8 +107,8 @@ class ProbeSetSearch(DoSearch): AGAINST ('%s' IN BOOLEAN MODE)) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(self.search_term), - self.escape(self.dataset.id))) + """ % (self.escape(self.search_term[0]), + self.escape(self.dataset.id)) print("final query is:", pf(query)) @@ -275,7 +280,8 @@ class GoSearch(ProbeSetSearch): class LrsSearch(ProbeSetSearch): """Searches for genes with a QTL within the given LRS values - LRS searches can take 2 different forms: + LRS searches can take 3 different forms: + - LRS > (or <) min/max_LRS - LRS=(min_LRS max_LRS) - LRS=(min_LRS max_LRS chromosome start_Mb end_Mb) where min/max_LRS represent the range of LRS scores and start/end_Mb represent @@ -289,129 +295,128 @@ class LrsSearch(ProbeSetSearch): self.search_term = [float(value) for value in self.search_term] - from_clause = ", Geno" + self.from_clause = ", Geno" if self.search_operator == "=": - if len(self.search_term) >= 2: - if len(self.search_term) == 2: - lrs_min, lrs_max = self.search_term - elif len(self.search_term) == 5: - lrs_min, lrs_max, chr_num, mb_low, mb_high = self.search_term - else: - SomeError - - sub_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and """ % (self.escape(self.dataset.type), - self.escape(min(lrs_min, lrs_max)), - self.escape(self.dataset.type), - self.escape(max(lrs_min, lrs_max))) - + assert isinstance(self.search_term, (list, tuple)) + self.lrs_min, self.lrs_max = self.search_term[:2] + + self.sub_clause = """ %sXRef.LRS > %s and + %sXRef.LRS < %s and """ % self.mescape(self.dataset.type, + min(self.lrs_min, self.lrs_max), + self.dataset.type, + max(self.lrs_min, self.lrs_max)) + + if len(self.search_term) > 2: + self.chr_num = self.search_term[2] + self.sub_clause += """ Geno.Chr = %s and """ % (self.escape(self.chr_num)) if len(self.search_term) == 5: - sub_clause = sub_clause + """ Geno.Mb > %s and + self.mb_low, self.mb_high = self.search_term[3:] + self.sub_clause += """ Geno.Mb > %s and Geno.Mb < %s and - Geno.Chr = %s and - """ % (self.escape(min(mb_low, mb_high)), - self.escape(max(mb_low, mb_high)), - self.escape(chr_num)) + """ % self.mescape(min(self.mb_low, self.mb_high), + max(self.mb_low, self.mb_high)) + print("self.sub_clause is:", pf(self.sub_clause)) else: # Deal with >, <, >=, and <= - sub_clause = """ %sXRef.LRS %s %s and """ % (self.escape(self.dataset.type), - self.escape(self.search_operator), - self.escape(self.search_term[0])) + self.sub_clause = """ %sXRef.LRS %s %s and """ % self.mescape(self.dataset.type, + self.search_operator, + self.search_term[0]) - where_clause = sub_clause + """ %sXRef.Locus = Geno.name and + self.where_clause = self.sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr - """ % (self.escape(self.dataset.type), - self.escape(self.species_id), - self.escape(self.dataset.type)) + """ % self.mescape(self.dataset.type, + self.species_id, + self.dataset.type) - print("where_clause is:", pf(where_clause)) + print("where_clause is:", pf(self.where_clause)) - query = self.compile_final_query(from_clause, where_clause) + self.query = self.compile_final_query(self.from_clause, self.where_clause) - return self.execute(query) - -class CisLrsSearch(LrsSearch): - """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values + return self.execute(self.query) - A cisLRS search can take 3 forms: - - cisLRS=(min_LRS max_LRS) - - cisLRS=(min_LRS max_LRS mb_buffer) - - cisLRS>min_LRS - where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around - a particular QTL where its eQTL would be considered "cis". If there is no third parameter, - mb_buffer will default to 5 megabases. - A QTL is a cis-eQTL if a gene's expression is regulated by a QTL in roughly the same area - (where the area is determined by the mb_buffer that the user can choose). +class CisTransLrsSearch(LrsSearch): - """ - - # This is tentatively a child of LrsSearch; I'll need to check what code, if any, overlaps - # between this and the LrsSearch code. In the original code, commands are divided by - # the number of inputs they take, so these commands are completely separate - - DoSearch.search_types['CISLRS'] = "CisLrsSearch" - - def run(self): + def real_run(self, the_operator): #if isinstance(self.search_term, basestring): # self.search_term = [self.search_term] print("self.search_term is:", self.search_term) self.search_term = [float(value) for value in self.search_term] - mb_buffer = 5 # default - - from_clause = ", Geno " - + self.mb_buffer = 5 # default + self.from_clause = ", Geno " + if self.search_operator == "=": if len(self.search_term) == 2: - lower_limit, upper_limit = self.search_term + self.lrs_min, self.lrs_max = self.search_term #[int(value) for value in self.search_term] elif len(self.search_term) == 3: - lower_limit, upper_limit, mb_buffer = self.search_term + self.lrs_min, self.lrs_max, self.mb_buffer = self.search_term else: SomeError - sub_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and - ABS(%s.Mb-Geno.Mb) < %s and """ % ( - self.escape(self.dataset.type), - self.escape(min(lower_limit, upper_limit)), + self.sub_clause = """ %sXRef.LRS > %s and + %sXRef.LRS < %s and """ % ( self.escape(self.dataset.type), - self.escape(max(lower_limit, upper_limit)), + self.escape(min(self.lrs_min, self.lrs_max)), self.escape(self.dataset.type), - self.escape(mb_buffer) + self.escape(max(self.lrs_min, self.lrs_max)) ) - else: # Deal with >, <, >=, and <= - sub_clause = """ %sXRef.LRS %s %s and - ABS(%s.Mb-Geno.Mb) < %s and """ % ( + self.sub_clause = """ %sXRef.LRS %s %s and """ % ( self.escape(self.dataset.type), self.escape(self.search_operator), - self.escape(self.search_term[0]), - self.escape(self.dataset.type), - self.escape(mb_buffer) + self.escape(self.search_term[0]) ) - - where_clause = sub_clause + """%sXRef.Locus = Geno.name and + + self.where_clause = self.sub_clause + """ + ABS(%s.Mb-Geno.Mb) %s %s and + %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( self.escape(self.dataset.type), + the_operator, + self.escape(self.mb_buffer), + self.escape(self.dataset.type), self.escape(self.species_id), self.escape(self.dataset.type) ) - print("where_clause is:", pf(where_clause)) + print("where_clause is:", pf(self.where_clause)) - query = self.compile_final_query(from_clause, where_clause) + self.query = self.compile_final_query(self.from_clause, self.where_clause) - return self.execute(query) + return self.execute(self.query) + + +class CisLrsSearch(CisTransLrsSearch): + """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values + + A cisLRS search can take 3 forms: + - cisLRS=(min_LRS max_LRS) + - cisLRS=(min_LRS max_LRS mb_buffer) + - cisLRS>min_LRS + where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around + a particular QTL where its eQTL would be considered "cis". If there is no third parameter, + mb_buffer will default to 5 megabases. + + A QTL is a cis-eQTL if a gene's expression is regulated by a QTL in roughly the same area + (where the area is determined by the mb_buffer that the user can choose). + + """ + + DoSearch.search_types['CISLRS'] = "CisLrsSearch" + + def run(self): + return self.real_run("<") + -class TransLrsSearch(LrsSearch): +class TransLrsSearch(CisTransLrsSearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values A transLRS search can take 2 forms: @@ -425,70 +430,11 @@ class TransLrsSearch(LrsSearch): (where the area is determined by the mb_buffer that the user can choose). Opposite of cis-eQTL. """ - - # This is tentatively a child of LrsSearch; I'll need to check what code, if any, overlaps - # between this and the LrsSearch code. In the original code, commands are divided by - # the number of inputs they take, so these commands are completely separate DoSearch.search_types['TRANSLRS'] = "TransLrsSearch" def run(self): - if len(self.search_term) == 3: - lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] - - where_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and - %sXRef.Locus = Geno.name and - Geno.SpeciesId = %s and - (%s.Chr != Geno.Chr or - ABS(%s.Mb-Geno.Mb) > %s) """ % ( - self.dataset.type, - min(lower_limit, upper_limit), - self.dataset.type, - max(lower_limit, upper_limit), - self.dataset.type, - self.species_id, - self.dataset.type, - self.dataset.type, - min_threshold - ) - - else: - NeedSomeErrorHere - - return None - - -#itemCmd = item[0] -#lowerLimit = float(item[1]) -#upperLimit = float(item[2]) -# -#if itemCmd.upper() in ("TRANSLRS", "CISLRS"): -# if item[3]: -# mthresh = float(item[3]) -# clauseItem = " %sXRef.LRS > %2.7f and %sXRef.LRS < %2.7f " % \ -# (self.dbType, min(lowerLimit, upperLimit), self.dbType, max(lowerLimit, upperLimit)) -# if itemCmd.upper() == "CISLRS": -# clauseItem += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) < %2.7f """ % (self.dbType, self.speciesId, self.dbType, self.dbType, mthresh) -# DescriptionText.append(HT.Span(' with a ', HT.U('cis-QTL'), ' having an LRS between %g and %g using a %g Mb exclusion buffer' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit), mthresh))) -# else: -# clauseItem += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s.Chr != Geno.Chr or (%s.Chr != Geno.Chr and ABS(%s.Mb-Geno.Mb) > %2.7f)) """ % (self.dbType, self.speciesId, self.dbType, self.dbType, self.dbType, mthresh) -# DescriptionText.append(HT.Span(' with a ', HT.U('trans-QTL'), ' having an LRS between %g and %g using a %g Mb exclusion buffer' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit), mthresh))) -# query.append(" (%s) " % clauseItem) -# self.orderByDefalut = "LRS" -# else: -# pass -#elif itemCmd.upper() in ("RANGE"): -# #XZ, 03/05/2009: Xiaodong changed Data to ProbeSetData -# clauseItem = " (select Pow(2, max(value) -min(value)) from ProbeSetData where Id = ProbeSetXRef.dataId) > %2.7f and (select Pow(2, max(value) -min(value)) from ProbeSetData where Id = ProbeSetXRef.dataId) < %2.7f " % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)) -# query.append(" (%s) " % clauseItem) -# DescriptionText.append(HT.Span(' with a range of expression that varied between %g and %g' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)), " (fold difference)")) -#else: -# clauseItem = " %sXRef.%s > %2.7f and %sXRef.%s < %2.7f " % \ -# (self.dbType, itemCmd, min(lowerLimit, upperLimit), self.dbType, itemCmd, max(lowerLimit, upperLimit)) -# query.append(" (%s) " % clauseItem) -# self.orderByDefalut = itemCmd -# DescriptionText.append(HT.Span(' with ', HT.U(itemCmd), ' between %g and %g' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)))) + return self.real_run(">") class MeanSearch(ProbeSetSearch): @@ -508,7 +454,6 @@ if __name__ == "__main__": import MySQLdb import sys - from base import webqtlConfig from base.data_set import create_dataset from base.templatePage import templatePage @@ -540,11 +485,11 @@ if __name__ == "__main__": ProbeSetXRef.ProbeSetFreezeId = 112""") #print(pf(cursor.fetchall())) - #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() + results = ProbeSetSearch("shh", None, dataset, cursor, db_conn).run() #results = RifSearch("diabetes", dataset, cursor, db_conn).run() #results = WikiSearch("nicotine", dataset, cursor, db_conn).run() - results = CisLrsSearch(['99'], '>', dataset, cursor, db_conn).run() # cisLRS > 99 - #results = LrsSearch('9', '99', '1', '50', '150', '=', dataset, cursor, db_conn).run() + #results = CisLrsSearch(['99'], '>', dataset, cursor, db_conn).run() # cisLRS > 99 + #results = LrsSearch('99', '>', dataset, cursor, db_conn).run() #results = TransLrsSearch(['9', '999', '10'], dataset, cursor, db_conn).run() #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run() |