diff options
author | Zachary Sloan | 2012-12-04 18:08:09 -0600 |
---|---|---|
committer | Zachary Sloan | 2012-12-04 18:08:09 -0600 |
commit | 01785471d63de156fa9787a0fb38c9df09824183 (patch) | |
tree | 00eaac20327dde5647bf58a87e5beb1ddd1a3360 | |
parent | 0e17939e123ec80c4da3f665004b08347aa9480b (diff) | |
download | genenetwork2-01785471d63de156fa9787a0fb38c9df09824183.tar.gz |
Changed parser to allow quotes (i.e. name="rw williams")
Renamed webqtlTrait to GeneralTrait and began rewriting parts
Changed database code in many places to use simple sqlalchemy
-rwxr-xr-x | wqflask/base/data_set.py | 18 | ||||
-rwxr-xr-x | wqflask/base/webqtlTrait.py | 166 | ||||
-rwxr-xr-x | wqflask/dbFunction/webqtlDatabaseFunction.py | 26 | ||||
-rw-r--r-- | wqflask/wqflask/do_search.py | 78 | ||||
-rw-r--r-- | wqflask/wqflask/parser.py | 4 | ||||
-rw-r--r-- | wqflask/wqflask/search_results.py | 7 |
6 files changed, 138 insertions, 161 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index cd9e810e..7833f5c1 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -68,13 +68,10 @@ class DataSet(object): assert name self.name = name - #self.db_conn = db_conn - #self.cursor = self.db_conn.cursor() self.id = None self.type = None self.group = None - #if self.cursor and self.id == 0: self.setup() self.check_confidentiality() @@ -200,6 +197,7 @@ class PhenotypeDataSet(DataSet): description = this_trait.post_publication_description if this_trait.confidential: + continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description this_trait.description_display = description @@ -217,13 +215,13 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_location_value = 1000000 if this_trait.lrs: - self.cursor.execute(""" + result = g.db.execute(""" select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and + where Species.Name = %s and + Geno.Name = %s and Geno.SpeciesId = Species.Id - """ % (species, this_trait.locus)) - result = self.cursor.fetchone() + """, (species, this_trait.locus)).fetchone() + #result = self.cursor.fetchone() if result: if result[0] and result[1]: @@ -509,13 +507,13 @@ def geno_mrna_confidentiality(ob): query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table) - ob.cursor.execute(query, ob.name) + result = g.db.execute(query, ob.name) (dataset_id, name, full_name, confidential, - authorized_users) = ob.cursor.fetchall()[0] + authorized_users) = result.fetchall()[0] if confidential: # Allow confidential data later diff --git a/wqflask/base/webqtlTrait.py b/wqflask/base/webqtlTrait.py index 9763e441..dec5fa00 100755 --- a/wqflask/base/webqtlTrait.py +++ b/wqflask/base/webqtlTrait.py @@ -14,7 +14,7 @@ from pprint import pformat as pf from flask import Flask, g -class webqtlTrait: +class GeneralTrait: """ Trait class defines a trait in webqtl, can be either Microarray, Published phenotype, genotype, or user input trait @@ -22,9 +22,7 @@ class webqtlTrait: """ def __init__(self, **kw): - print("in webqtlTrait") - #self.db_conn = db_conn - #self.cursor = self.db_conn.cursor() + print("in GeneralTrait") self.dataset = kw.get('dataset', None) # database object self.name = kw.get('name', None) # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = kw.get('cellid', None) @@ -41,45 +39,31 @@ class webqtlTrait: elif len(name2) == 3: self.dataset, self.name, self.cellid = name2 - #print("foo") - #print("kw in webqtlTrait are:", pf(kw)) - #print("printed\n\n") - #for name, value in kw.items(): - # if self.__dict__.has_key(name): - # setattr(self, name, value) - # elif name == 'fullname': - # name2 = value.split("::") - # if len(name2) == 2: - # self.dataset, self.name = name2 - # elif len(name2) == 3: - # self.dataset, self.name, self.cellid = name2 - # else: - # raise KeyError, repr(value) + ' parameter format error.' - # else: - # raise KeyError, repr(name) + ' not a valid parameter for this class.' - - if self.dataset and isinstance(self.dataset, basestring): - #assert self.cursor, "Don't have a cursor" - self.dataset = create_dataset(self.dataset) + #if self.dataset and isinstance(self.dataset, basestring): + self.dataset = create_dataset(self.dataset) + + - #if self.dataset == None, not from a database print("self.dataset is:", self.dataset, type(self.dataset)) - if self.dataset: - if self.dataset.type == "Temp": - self.cursor.execute(''' - SELECT - InbredSet.Name - FROM - InbredSet, Temp - WHERE - Temp.InbredSetId = InbredSet.Id AND - Temp.Name = "%s" - ''', self.name) - self.group = self.cursor.fetchone()[0] - else: - self.group = self.dataset.get_group() + #if self.dataset: + + self.dataset.get_group() + + if self.dataset.type == "Temp": + self.cursor.execute(''' + SELECT + InbredSet.Name + FROM + InbredSet, Temp + WHERE + Temp.InbredSetId = InbredSet.Id AND + Temp.Name = "%s" + ''', self.name) + self.group = self.cursor.fetchone()[0] + else: + self.group = self.dataset.get_group() - print("trinity, self.group is:", self.group) + print("trinity, self.group is:", self.group) # # In ProbeSet, there are maybe several annotations match one sequence @@ -93,24 +77,24 @@ class webqtlTrait: # The variable self.sequence should be changed to self.BlatSeq # It also should be changed in other places where it are used. - if self.dataset: - if self.dataset.type == 'ProbeSet': - print("Doing ProbeSet Query") - query = ''' - SELECT - ProbeSet.BlatSeq - FROM - ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSet.Id=ProbeSetXRef.ProbeSetId and - ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and - ProbeSet.Name = %s and - ProbeSetFreeze.Name = %s - ''', (self.name, self.dataset.name) - print("query is:", query) - self.cursor.execute(*query) - self.sequence = self.cursor.fetchone()[0] - print("self.sequence is:", self.sequence) + #if self.dataset: + if self.dataset.type == 'ProbeSet': + print("Doing ProbeSet Query") + query = ''' + SELECT + ProbeSet.BlatSeq + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSet.Id=ProbeSetXRef.ProbeSetId and + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and + ProbeSet.Name = %s and + ProbeSetFreeze.Name = %s + ''', (self.name, self.dataset.name) + print("query is:", query) + self.sequence = g.db.execute(*query).fetchone()[0] + #self.sequence = self.cursor.fetchone()[0] + print("self.sequence is:", self.sequence) def getName(self): @@ -380,13 +364,10 @@ class webqtlTrait: #def items(self): # return self.__dict__.items() - def retrieveInfo(self, QTL = None): - assert self.dataset + def retrieve_info(self, QTL=False): + assert self.dataset, "Dataset doesn't exist" if self.dataset.type == 'Publish': - #self.dataset.DisField = ['Name','PubMed_ID','Phenotype','Abbreviation','Authors','Title',\ - # 'Abstract', 'Journal','Volume','Pages','Month','Year','Sequence',\ - # 'Units', 'comments'] - query = ''' + traitInfo = g.db.execute(""" SELECT PublishXRef.Id, Publication.PubMed_ID, Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description, @@ -404,43 +385,50 @@ class webqtlTrait: Publication.Id = PublishXRef.PublicationId AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id =%s - ''' % (self.name, self.dataset.id) + """, (self.name, self.dataset.id)).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif self.dataset.type == 'ProbeSet': display_fields_string = ',ProbeSet.'.join(self.dataset.display_fields) display_fields_string = 'ProbeSet.' + display_fields_string - query = """ + traitInfo = g.db.execute(""" SELECT %s FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSet.Name = '%s' - """ % (display_fields_string, self.dataset.name, self.name) + ProbeSetFreeze.Name = %s AND + ProbeSet.Name = %s + """, (display_fields_string, self.dataset.name, self.name)).fetchone() #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif self.dataset.type == 'Geno': display_fields_string = string.join(self.dataset.display_fields,',Geno.') display_fields_string = 'Geno.' + display_fields_string - query = """ + traitInfo = g.db.execute(""" SELECT %s FROM Geno, GenoFreeze, GenoXRef WHERE GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND - GenoFreeze.Name = '%s' AND - Geno.Name = '%s' - """ % (display_fields_string, self.dataset.name, self.name) + GenoFreeze.Name = %s AND + Geno.Name = %s + """, (display_fields_string, self.dataset.name, self.name)).fetchone() else: #Temp type - traitInfo = g.db.execute("""SELECT %s FROM %s WHERE Name = '%s' + traitInfo = g.db.execute("""SELECT %s FROM %s WHERE Name = %s """, (string.join(self.dataset.display_fields,','), self.dataset.type, self.name)).fetchone() + + query = """SELECT %s FROM %s WHERE Name = %s + """ % (string.join(self.dataset.display_fields,','), + self.dataset.type, self.name) + + print("query is:", pf(query)) + print("traitInfo is: ", pf(traitInfo)) - - self.cursor.execute(query) - traitInfo = self.cursor.fetchone() + + #self.cursor.execute(query) + #traitInfo = self.cursor.fetchone() if traitInfo: self.haveinfo = True @@ -465,7 +453,7 @@ class webqtlTrait: geneidIsNumber = 0 if geneidIsNumber: - query = """ + result = g.db.execute(""" SELECT HomologeneId FROM @@ -475,9 +463,9 @@ class webqtlTrait: InbredSet.Name = '%s' AND InbredSet.SpeciesId = Species.Id AND Species.TaxonomyId = Homologene.TaxonomyId - """ % (self.geneid, self.group) - self.cursor.execute(query) - result = self.cursor.fetchone() + """, (self.geneid, self.group)).fetchone() + #self.cursor.execute(query) + #result = self.cursor.fetchone() else: result = None @@ -486,7 +474,7 @@ class webqtlTrait: if QTL: if self.dataset.type == 'ProbeSet' and not self.cellid: - query = ''' + traitQTL = g.db.execute(""" SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean FROM @@ -495,15 +483,15 @@ class webqtlTrait: ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSet.Name = "%s" AND ProbeSetXRef.ProbeSetFreezeId =%s - ''' % (self.name, self.dataset.id) - self.cursor.execute(query) - traitQTL = self.cursor.fetchone() + """, (self.name, self.dataset.id)).fetchone() + #self.cursor.execute(query) + #traitQTL = self.cursor.fetchone() if traitQTL: self.locus, self.lrs, self.pvalue, self.mean = traitQTL else: self.locus = self.lrs = self.pvalue = self.mean = "" if self.dataset.type == 'Publish': - query = ''' + traitQTL = g.db.execute(""" SELECT PublishXRef.Locus, PublishXRef.LRS FROM @@ -512,9 +500,9 @@ class webqtlTrait: PublishXRef.Id = %s AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id =%s - ''' % (self.name, self.dataset.id) - self.cursor.execute(query) - traitQTL = self.cursor.fetchone() + """, (self.name, self.dataset.id)).fetchone() + #self.cursor.execute(query) + #traitQTL = self.cursor.fetchone() if traitQTL: self.locus, self.lrs = traitQTL else: diff --git a/wqflask/dbFunction/webqtlDatabaseFunction.py b/wqflask/dbFunction/webqtlDatabaseFunction.py index 1e028ecc..299114b4 100755 --- a/wqflask/dbFunction/webqtlDatabaseFunction.py +++ b/wqflask/dbFunction/webqtlDatabaseFunction.py @@ -21,6 +21,8 @@ # This module is used by GeneNetwork project (www.genenetwork.org) +from flask import Flask, g + import MySQLdb import string from base import webqtlConfig @@ -80,25 +82,15 @@ def getAllSpecies(cursor=None): #function: retrieve specie's name info based on RISet ########################################################################### -def retrieveSpecies(cursor=None, group=None): - try: - cursor.execute("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % group) - return cursor.fetchone()[0] - except: - return None +def retrieve_species(group): + return g.db.execute("""select Species.Name + from Species, InbredSet + where InbredSet.Name = %s and + InbredSet.SpeciesId = Species.Id""", (group)).fetchone()[0] -########################################################################### -#input: cursor, RISet (string) -#output: specie's Id (string), value will be None or else -#function: retrieve specie's Id info based on RISet -########################################################################### +def retrieve_species_id(group): + return g.db.execute("select SpeciesId from InbredSet where Name = %s", (group)).fetchone()[0] -def retrieveSpeciesId(cursor=None, RISet=None): - try: - cursor.execute("select SpeciesId from InbredSet where Name = '%s'" % RISet) - return cursor.fetchone()[0] - except: - return None ########################################################################### # input: cursor diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 802cbea5..2094ed14 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -20,34 +20,32 @@ class DoSearch(object): # Used to translate search phrases into classes search_types = dict() - def __init__(self, search_term, search_operator, dataset, cursor, db_conn): + def __init__(self, search_term, search_operator, dataset): self.search_term = search_term # Make sure search_operator is something we expect assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset - self.db_conn = db_conn - self.cursor = cursor #Get group information for dataset and the species id self.dataset.get_group() - self.species_id = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.dataset.group) + self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group) def execute(self, query): """Executes query and returns results""" query = self.normalize_spaces(query) print("in do_search query is:", pf(query)) - g.db.execute(query) - results = self.cursor.fetchall() + results = g.db.execute(query).fetchall() + #results = self.cursor.fetchall() return results - def escape(self, stringy): - """Shorter name than self.db_conn.escape_string""" - return escape(str(stringy)) + #def escape(self, stringy): + # """Shorter name than self.db_conn.escape_string""" + # return escape(str(stringy)) def mescape(self, *items): """Multiple escape""" - escaped = [self.escape(item) for item in items] + escaped = [escape(item) for item in items] print("escaped is:", escaped) return tuple(escaped) @@ -96,9 +94,9 @@ class ProbeSetSearch(DoSearch): WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(from_clause), + """ % (escape(from_clause), where_clause, - self.escape(self.dataset.id))) + escape(self.dataset.id))) print("query is:", pf(query)) @@ -118,8 +116,8 @@ class ProbeSetSearch(DoSearch): AGAINST ('%s' IN BOOLEAN MODE)) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.escape(self.search_term[0]), - self.escape(self.dataset.id)) + """ % (escape(self.search_term[0]), + escape(str(self.dataset.id))) print("final query is:", pf(query)) @@ -182,14 +180,16 @@ class PhenotypeSearch(DoSearch): self.dataset.get_group() query = (self.base_query + - """WHERE %s and + """%s + WHERE %s PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % ( - self.get_fields_clause(), - self.escape(self.dataset.group_id), - self.escape(self.dataset.id))) + from_clause, + where_clause, + escape(str(self.dataset.group_id)), + escape(str(self.dataset.id)))) print("query is:", pf(query)) @@ -272,7 +272,7 @@ class GenotypeSearch(DoSearch): Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and GenoFreeze.Id = %s"""% (where_clause, - self.escape(self.dataset.id))) + escape(self.dataset.id))) print("query is:", pf(query)) @@ -332,7 +332,7 @@ class GoSearch(ProbeSetSearch): statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and GOterm.id=GOassociation.term_id""" % ( - self.db_conn.escape_string(self.dataset.type))) + escape(self.dataset.type))) where_clause = " %s = '%s' and %s " % (field, go_id, statements) @@ -377,7 +377,7 @@ class LrsSearch(ProbeSetSearch): if len(self.search_term) > 2: self.chr_num = self.search_term[2] - self.sub_clause += """ Geno.Chr = %s and """ % (self.escape(self.chr_num)) + self.sub_clause += """ Geno.Chr = %s and """ % (escape(self.chr_num)) if len(self.search_term) == 5: self.mb_low, self.mb_high = self.search_term[3:] self.sub_clause += """ Geno.Mb > %s and @@ -429,17 +429,17 @@ class CisTransLrsSearch(LrsSearch): self.sub_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s and """ % ( - self.escape(self.dataset.type), - self.escape(min(self.lrs_min, self.lrs_max)), - self.escape(self.dataset.type), - self.escape(max(self.lrs_min, self.lrs_max)) + escape(self.dataset.type), + escape(min(self.lrs_min, self.lrs_max)), + escape(self.dataset.type), + escape(max(self.lrs_min, self.lrs_max)) ) else: # Deal with >, <, >=, and <= self.sub_clause = """ %sXRef.LRS %s %s and """ % ( - self.escape(self.dataset.type), - self.escape(self.search_operator), - self.escape(self.search_term[0]) + escape(self.dataset.type), + escape(self.search_operator), + escape(self.search_term[0]) ) self.where_clause = self.sub_clause + """ @@ -447,12 +447,12 @@ class CisTransLrsSearch(LrsSearch): %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( - self.escape(self.dataset.type), + escape(self.dataset.type), the_operator, - self.escape(self.mb_buffer), - self.escape(self.dataset.type), - self.escape(self.species_id), - self.escape(self.dataset.type) + escape(self.mb_buffer), + escape(self.dataset.type), + escape(self.species_id), + escape(self.dataset.type) ) print("where_clause is:", pf(self.where_clause)) @@ -559,7 +559,7 @@ class RangeSearch(ProbeSetSearch): self.where_clause = """ (SELECT Pow(2, max(value) -min(value)) FROM ProbeSetData WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s - """ % (self.escape(self.search_term[0])) + """ % (escape(self.search_term[0])) print("where_clause is:", pf(self.where_clause)) @@ -647,16 +647,14 @@ class AuthorSearch(PhenotypeSearch): DoSearch.search_types["NAME"] = "AuthorSearch" def run(self): - - self.search_term = [float(value) for value in self.search_term] - - self.where_clause = """ Publication.Authors LIKE %s and - """ % (self.escape(self.search_term[0])) + + self.where_clause = """ Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" and + """ % (self.search_term[0]) self.query = self.compile_final_query(where_clause = self.where_clause) return self.execute(self.query) - + if __name__ == "__main__": diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py index efe479e6..f991d8c7 100644 --- a/wqflask/wqflask/parser.py +++ b/wqflask/wqflask/parser.py @@ -28,7 +28,7 @@ def parse(pstring): returned item serach_term is always a list, even if only one element """ - pstring = re.split(r"""(?:(\w+\s*=\s*[\(\[][^)]*[\)\]]) | # LRS=(1 2 3), cisLRS=[4 5 6], etc + pstring = re.split(r"""(?:(\w+\s*=\s*[\('"\[][^)'"]*[\)\]'"]) | # LRS=(1 2 3), cisLRS=[4 5 6], etc (\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc ([\w\*]+)) # shh, brain, etc """, pstring, flags=re.VERBOSE) @@ -78,6 +78,8 @@ if __name__ == '__main__': parse("WIKI=ho*") parse("LRS>9") parse("LRS>=18") + parse("NAME='rw williams'") + parse('NAME="rw williams"') parse("foo <= 2") parse("cisLRS<20") parse("foo=[3 2 1)") diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 52f628f6..efa1c5cc 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -100,7 +100,7 @@ class SearchResultPage(templatePage): self.trait_list = [] group = self.dataset.group - species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, group=group) + species = webqtlDatabaseFunction.retrieve_species(group=group) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term @@ -114,7 +114,7 @@ class SearchResultPage(templatePage): print("foo locals are:", locals()) trait_id = result[0] this_trait = webqtlTrait(dataset=self.dataset, name=trait_id) - this_trait.retrieveInfo(QTL=True) + this_trait.retrieve_info(QTL=True) self.trait_list.append(this_trait) self.dataset.get_trait_info(self.trait_list, species) @@ -147,8 +147,7 @@ class SearchResultPage(templatePage): the_search = search_class(search_term, search_operator, self.dataset, - self.cursor, - self.db_conn) + ) self.results.extend(the_search.run()) print("in the search results are:", self.results) |