diff options
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r-- | wqflask/wqflask/do_search.py | 140 |
1 files changed, 97 insertions, 43 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index cfa73334..19c6fa74 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -7,31 +7,35 @@ from pprint import pformat as pf class DoSearch(object): + """Parent class containing parameters/functions used for all searches""" + def __init__(self, search_term, dataset, cursor, db_conn): self.search_term = search_term self.dataset = dataset self.db_conn = db_conn self.cursor = cursor - + def execute(self, query): + """Executes query and returns results""" query = self.normalize_spaces(query) - print("query is:", pf(query)) + print("in do_search query is:", pf(query)) self.cursor.execute(query) results = self.cursor.fetchall() return results - + def escape(self, stringy): """Shorter name than self.db_conn.escape_string""" return self.db_conn.escape_string(str(stringy)) - + def normalize_spaces(self, stringy): - """Strips out newlines extra spaces and replaces them with just spaces""" + """Strips out newlines extra spaces and replaces them with just spaces""" step_one = " ".join(stringy.split()) return step_one - - + class ProbeSetSearch(DoSearch): + """A search within an mRNA expression dataset""" + base_query = """SELECT ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, @@ -42,9 +46,11 @@ class ProbeSetSearch(DoSearch): ProbeSet.Symbol as TSYMBOL, ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet """ - + def run(self): + """Generates and runs a simple search of an mRNA expression dataset""" + print("Running ProbeSetSearch") query = (self.base_query + """WHERE (MATCH (ProbeSet.Name, ProbeSet.description, @@ -57,18 +63,22 @@ class ProbeSetSearch(DoSearch): and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (self.escape(self.search_term), - self.escape(dataset.id))) - + self.escape(self.dataset.id))) + + print("final query is:", pf(query)) + return self.execute(query) class PhenotypeSearch(DoSearch): + """A search within a phenotype dataset""" + base_query = """SELECT PublishXRef.Id, PublishFreeze.createtime as thistable, Publication.PubMed_ID as Publication_PubMed_ID, Phenotype.Post_publication_description as Phenotype_Name FROM Phenotype, PublishFreeze, Publication, PublishXRef """ - + search_fields = ('Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', @@ -78,65 +88,108 @@ class PhenotypeSearch(DoSearch): 'Publication.Abstract', 'Publication.Title', 'Publication.Authors', - 'PublishXRef.Id') - - def run(self): + 'PublishXRef.Id') + + def get_where_clause(self): + """Generate clause for WHERE portion of query""" + #Todo: Zach will figure out exactly what both these lines mean #and comment here if "'" not in self.search_term: search_term = "[[:<:]]" + self.search_term + "[[:>:]]" + # This adds a clause to the query that matches the search term + # against each field in the search_fields tuple where_clause = [] for field in self.search_fields: where_clause.append('''%s REGEXP "%s"''' % (field, search_term)) - where_clause = "(%s)" % ' OR '.join(where_clause) + return where_clause + + def run(self): + """Generates and runs a simple search of a phenotype dataset""" + #Get group information for dataset self.dataset.get_group() - - print("before query where clause is:", where_clause) - + query = (self.base_query + """WHERE %s and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % ( - where_clause, + self.get_where_clause(), self.escape(self.dataset.group_id), self.escape(self.dataset.id))) + return self.execute(query) +class GenotypeSearch(DoSearch): + """A search within a genotype dataset""" + + base_query = """SELECT Geno.Name, + GenoFreeze.createtime as thistable, + Geno.Name as Geno_Name, + Geno.Source2 as Geno_Source2, + Geno.chr_num as Geno_chr_num, + Geno.Mb as Geno_Mb + FROM GenoXRef, GenoFreeze, Geno """ + + search_fields = ('Name', 'Chr') + + def get_where_clause(self): + """Generate clause for WHERE portion of query""" + + # This adds a clause to the query that matches the search term + # against each field in search_fields (above) + where_clause = [] + for field in self.search_fields: + where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % (self.dataset.type, field), + self.search_term)) + where_clause = "(%s)" % ' OR '.join(where_clause) + + return where_clause + + def run(self): + """Generates and runs a simple search of a genotype dataset""" + #Todo: Zach will figure out exactly what both these lines mean + #and comment here + if "'" not in self.search_term: + search_term = "[[:<:]]" + self.search_term + "[[:>:]]" + + query = (self.base_query + + """WHERE %s and + Geno.Id = GenoXRef.GenoId and + GenoXRef.GenoFreezeId = GenoFreeze.Id and + GenoFreeze.Id = %s"""% ( + self.get_where_clause(), + self.escape(self.dataset.id))) + return self.execute(query) - -class GenotypeSearch(DoSearch): - def __init__(self): - pass - class GoSearch(ProbeSetSearch): - """searches for synapse-associated genes listed in the Gene Ontology.""" - + """Searches for synapse-associated genes listed in the Gene Ontology.""" + def run(self): field = 'GOterm.acc' go_id = 'GO:' + ('0000000'+self.search_term)[-7:] - + statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and GOterm.id=GOassociation.term_id""" % ( self.db_conn.escape_string(self.dataset.type))) - + clause_item = " %s = '%s' and %s " % (field, go_id, statements) - + + # gene_ontology_from_table = """ , db_GeneOntology.term as GOterm, db_GeneOntology.association as GOassociation, db_GeneOntology.gene_product as GOgene_product """ - + gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table) - #gene_ontology_from_table = " ".join(gene_ontology_from_table.splitlines()) - + query = (self.base_query + """%s WHERE %s @@ -145,36 +198,37 @@ class GoSearch(ProbeSetSearch): """ % (self.db_conn.escape_string(gene_ontology_from_table), clause_item, self.db_conn.escape_string(str(self.dataset.id)))) - + return self.execute(query) - if __name__ == "__main__": - + ### Usually this will be used as a library, but call it from the command line for testing + ### And it runs the code below + import MySQLdb import sys sys.path.append("/home/zas1024/gene/wqflask") print("Path is:", sys.path) - - + + from base import webqtlConfig from base.webqtlDataset import webqtlDataset from base.templatePage import templatePage from utility import webqtlUtil from dbFunction import webqtlDatabaseFunction - + db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME, host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER, passwd=webqtlConfig.DB_PASSWD) cursor = db_conn.cursor() - + dataset_name = "HC_M2_0606_P" dataset = webqtlDataset(dataset_name, cursor) - - #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() + + results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() - - results = GoSearch("0045202", dataset, cursor, db_conn).run() + #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run() + #results = GoSearch("0045202", dataset, cursor, db_conn).run() print("results are:", pf(results))
\ No newline at end of file |