From 8aa848b0c1bddab3080f7a5abbd7ba199e786262 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 16 Nov 2012 10:15:14 -0600 Subject: Made small change to parser to have it detect square brackets '[' along with round ones '('; need to ask Sam about rewriting the regular expression stuff Trimmed a lot of code from search_results.py --- wqflask/wqflask/do_search.py | 79 ++++++++++++++++++++++++++++------- wqflask/wqflask/parser.py | 7 ++++ wqflask/wqflask/search_results.py | 86 +++++++++++++-------------------------- 3 files changed, 99 insertions(+), 73 deletions(-) diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 4517d9f5..ac6014e7 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -19,6 +19,10 @@ class DoSearch(object): self.dataset = dataset self.db_conn = db_conn self.cursor = cursor + + #Get group information for dataset and the species id + self.dataset.get_group() + self.species_id = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group) def execute(self, query): """Executes query and returns results""" @@ -47,10 +51,6 @@ class ProbeSetSearch(DoSearch): DoSearch.search_types['ProbeSet'] = "ProbeSetSearch" - #Get group information for dataset and the species id - self.dataset.get_group() - self.species_id = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group) - base_query = """SELECT ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, @@ -62,6 +62,7 @@ class ProbeSetSearch(DoSearch): ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet """ + def compile_final_query(self, from_clause, where_clause): """Generates the final query string""" @@ -268,7 +269,7 @@ class LrsSearch(ProbeSetSearch): LRS searches can take 2 different forms: - LRS=(min_LRS max_LRS) - - LRS=(mine_LRS max_LRS chromosome start_Mb end_Mb) + - LRS=(min_LRS max_LRS chromosome start_Mb end_Mb) where min/max_LRS represent the range of LRS scores and start/end_Mb represent the range in megabases on the given chromosome @@ -276,9 +277,6 @@ class LrsSearch(ProbeSetSearch): DoSearch.search_types['LRS'] = 'LrsSearch' - self.species_id = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, - self.dataset.group) - class CisLrsSearch(LrsSearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -290,8 +288,10 @@ class CisLrsSearch(LrsSearch): mb_buffer will default to 5 megabases. A QTL is a cis-eQTL if a gene's expression is regulated by a QTL in roughly the same area - (where the area is determined by the mb_buffer that the user can choose. + (where the area is determined by the mb_buffer that the user can choose). + """ + # This is tentatively a child of LrsSearch; I'll need to check what code, if any, overlaps # between this and the LrsSearch code. In the original code, commands are divided by # the number of inputs they take, so these commands are completely separate @@ -300,7 +300,7 @@ class CisLrsSearch(LrsSearch): def run(self): if len(self.search_term) == 3: - lower_limit, upper_limit, min_threshold = int(value) for value in self.search_term + lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s and @@ -309,16 +309,65 @@ class CisLrsSearch(LrsSearch): %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) < %s """ % ( self.dataset.type, - min(lower_limit, upper_limit) + min(lower_limit, upper_limit), + self.dataset.type, + max(lower_limit, upper_limit), + self.dataset.type, + self.species_id, self.dataset.type, - max(lower_limit, upper_limit, self.dataset.type, - + min_threshold ) else: - NeedSomeErrorHere - + NeedSomeErrorHere + + return None + +class TransLrsSearch(LrsSearch): + """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values + + A transLRS search can take 2 forms: + - transLRS=(min_LRS max_LRS) + - transLRS=(min_LRS max_LRS mb_buffer) + where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around + a particular QTL where its eQTL would be considered "cis". If there is no third parameter, + mb_buffer will default to 5 megabases. + + A QTL is a trans-eQTL if a gene's expression is regulated by a QTL in a different location/area + (where the area is determined by the mb_buffer that the user can choose). Opposite of cis-eQTL. + + """ + + # This is tentatively a child of LrsSearch; I'll need to check what code, if any, overlaps + # between this and the LrsSearch code. In the original code, commands are divided by + # the number of inputs they take, so these commands are completely separate + + DoSearch.search_types['TRANSLRS'] = "TransLrsSearch" + + def run(self): + if len(self.search_term) == 3: + lower_limit, upper_limit, min_threshold = [int(value) for value in self.search_term] + + where_clause = """ %sXRef.LRS > %s and + %sXRef.LRS < %s and + %sXRef.Locus = Geno.name and + Geno.SpeciesId = %s and + (%s.Chr != Geno.Chr or + ABS(%s.Mb-Geno.Mb) > %s) """ % ( + self.dataset.type, + min(lower_limit, upper_limit), + self.dataset.type, + max(lower_limit, upper_limit), + self.dataset.type, + self.species_id, + self.dataset.type, + self.dataset.type, + min_threshold + ) + + else: + NeedSomeErrorHere return None diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py index e693b2b8..b220f837 100644 --- a/wqflask/wqflask/parser.py +++ b/wqflask/wqflask/parser.py @@ -27,6 +27,13 @@ def parse(pstring): value = value[1:-1] # Get rid of the parenthesis values = re.split(r"""\s+|,""", value) value = [value.strip() for value in values if value.strip()] + # Brackets can also be used to encapsulate values + elif '[' in value: + assert value.startswith("["), "Invalid token" + assert value.endswith("]"), "Invalid token" + value = value[1:-1] # Get rid of the brackets + values = re.split(r"""\s+|,""", value) + value = [value.strip() for value in values if value.strip()] term = dict(key=key, seperator=seperator, search_term=value) diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 6ba4d94a..4aa1f2bc 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -213,56 +213,49 @@ class SearchResultPage(templatePage): #### Excel file stuff stops if self.dataset.type == "ProbeSet": - #for item in result: print("foo locals are:", locals()) probe_set_id = result[0] - print("probe_set_id is:", pf(probe_set_id)) this_trait = webqtlTrait(db=self.dataset, name=probe_set_id, cursor=self.cursor) this_trait.retrieveInfo(QTL=True) print("this_trait is:", pf(this_trait)) self.trait_list.append(this_trait) - elif self.dataset.type == "Publish": - newrow += 1 - tblobj['body'] = self.getTableBodyForPublish(trait_list=self.trait_list, formName=mainfmName, worksheet=worksheet, newrow=newrow, species=species) - elif self.dataset.type == "Geno": - newrow += 1 - tblobj['body'] = self.getTableBodyForGeno(trait_list=self.trait_list, form_name=form_name, worksheet=worksheet, newrow=newrow) + #elif self.dataset.type == "Publish": + # tblobj['body'] = self.getTableBodyForPublish(trait_list=self.trait_list, formName=mainfmName, worksheet=worksheet, species=species) + #elif self.dataset.type == "Geno": + # tblobj['body'] = self.getTableBodyForGeno(trait_list=self.trait_list, form_name=form_name, worksheet=worksheet) #traitForm = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name=thisFormName, submit=HT.Input(type='hidden')) hddn = {'FormID':'showDatabase','ProbeSetID':'_','database':'_','CellID':'_','group':group} hddn['incparentsf1']='ON' - # for key in hddn.keys(): - # traitForm.append(HT.Input(name=key, value=hddn[key], type='hidden')) - # - # traitForm.append(HT.P(),pageTable) - # - # TD_LR.append(traitForm) - # if len(self.results) > 1 and i < len(self.results) - 1: - # last_result = True - #if last_result: - # TD_LR.contents.pop() - + if self.dataset.type == "ProbeSet": - tblobj['body'] = self.getTableBodyForProbeSet(trait_list=self.trait_list, formName=self.form_name, newrow=newrow, species=species) + tblobj['body'] = self.getTableBodyForProbeSet(trait_list=self.trait_list, formName=self.form_name, species=species) elif self.dataset.type == "Publish": - tblobj['body'] = self.getTableBodyForPublish(trait_list=self.trait_list, formName=mainfmName, worksheet=worksheet, newrow=newrow, species=species) + tblobj['body'] = self.getTableBodyForPublish(trait_list=self.trait_list, formName=self.form_name, species=species) elif self.dataset.type == "Geno": - tblobj['body'] = self.getTableBodyForGeno(trait_list=self.trait_list, form_name=form_name, worksheet=worksheet, newrow=newrow) + tblobj['body'] = self.getTableBodyForGeno(trait_list=self.trait_list, form_name=self.form_name) def search(self): print("fd.search_terms:", self.fd['search_terms']) self.search_terms = parser.parse(self.fd['search_terms']) print("After parsing:", self.search_terms) - + self.results = [] for a_search in self.search_terms: print("[kodak] item is:", pf(a_search)) search_term = a_search['search_term'] - search_type = string.upper(a_search['key']) - if not search_type: + if a_search['key']: + search_type = string.upper(a_search['key']) + else: # We fall back to the dataset type as the key to get the right object - search_type = self.dataset.type + search_type = self.dataset.type + + # This is throwing an error when a_search['key'] is None, so I changed above + #search_type = string.upper(a_search['key']) + #if not search_type: + # # We fall back to the dataset type as the key to get the right object + # search_type = self.dataset.type search_ob = do_search.DoSearch.get_search(search_type) search_class = getattr(do_search, search_ob) @@ -289,26 +282,9 @@ class SearchResultPage(templatePage): keyword = string.replace(keyword,"?",".") wildcardkeyword[i] = keyword#'[[:<:]]'+ keyword+'[[:>:]]' return wildcardkeyword + - - def getTableHeaderForGeno(self, worksheet=None, newrow=None, headingStyle=None): - - tblobj_header = [] - - className = "fs13 fwb ffl b1 cw cbrb" - - tblobj_header = [[THCell(HT.TD(' ', Class=className), sort=0), - THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class=className), text='record_id', idx=1), - THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class=className), text='location', idx=2)]] - - for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - - return tblobj_header - - - def getTableBodyForGeno(self, trait_list, formName=None, worksheet=None, newrow=None): + def getTableBodyForGeno(self, trait_list, formName=None): tblobj_body = [] @@ -345,15 +321,13 @@ class SearchResultPage(templatePage): tblobj_body.append(tr) - for ncol, item in enumerate([this_trait.name, trait_location_repr]): - worksheet.write([newrow, ncol], item) - - newrow += 1 + #for ncol, item in enumerate([this_trait.name, trait_location_repr]): + # worksheet.write([newrow, ncol], item) return tblobj_body - - def getTableBodyForPublish(self, trait_list, formName=None, worksheet=None, newrow=None, species=''): + + def getTableBodyForPublish(self, trait_list, formName=None, species=''): tblobj_body = [] @@ -438,15 +412,13 @@ class SearchResultPage(templatePage): tblobj_body.append(tr) - for ncol, item in enumerate([this_trait.name, PhenotypeString, this_trait.authors, this_trait.year, this_trait.pubmed_id, LRS_score_repr, LRS_location_repr]): - worksheet.write([newrow, ncol], item) - - newrow += 1 + #for ncol, item in enumerate([this_trait.name, PhenotypeString, this_trait.authors, this_trait.year, this_trait.pubmed_id, LRS_score_repr, LRS_location_repr]): + # worksheet.write([newrow, ncol], item) return tblobj_body - def getTableBodyForProbeSet(self, trait_list=None, primaryTrait=None, formName=None, worksheet=None, newrow=None, species=''): + def getTableBodyForProbeSet(self, trait_list=None, primaryTrait=None, formName=None, species=''): # Note: setting trait_list to [] is probably not a great idea. tblobj_body = [] @@ -590,8 +562,6 @@ class SearchResultPage(templatePage): tblobj_body.append(tr) - newrow += 1 - return tblobj_body -- cgit v1.2.3