diff options
-rw-r--r-- | wqflask/wqflask/do_search.py | 180 | ||||
-rw-r--r-- | wqflask/wqflask/search_results.py | 52 |
2 files changed, 217 insertions, 15 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py new file mode 100644 index 00000000..cfa73334 --- /dev/null +++ b/wqflask/wqflask/do_search.py @@ -0,0 +1,180 @@ +#!/usr/bin/python + + +from __future__ import print_function, division + +from pprint import pformat as pf + + +class DoSearch(object): + def __init__(self, search_term, dataset, cursor, db_conn): + self.search_term = search_term + self.dataset = dataset + self.db_conn = db_conn + self.cursor = cursor + + def execute(self, query): + query = self.normalize_spaces(query) + print("query is:", pf(query)) + self.cursor.execute(query) + results = self.cursor.fetchall() + return results + + def escape(self, stringy): + """Shorter name than self.db_conn.escape_string""" + return self.db_conn.escape_string(str(stringy)) + + def normalize_spaces(self, stringy): + """Strips out newlines extra spaces and replaces them with just spaces""" + step_one = " ".join(stringy.split()) + return step_one + + + +class ProbeSetSearch(DoSearch): + base_query = """SELECT ProbeSet.Name as TNAME, + 0 as thistable, + ProbeSetXRef.Mean as TMEAN, + ProbeSetXRef.LRS as TLRS, + ProbeSetXRef.PVALUE as TPVALUE, + ProbeSet.Chr_num as TCHR_NUM, + ProbeSet.Mb as TMB, + ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM + FROM ProbeSetXRef, ProbeSet """ + + def run(self): + + query = (self.base_query + + """WHERE (MATCH (ProbeSet.Name, + ProbeSet.description, + ProbeSet.symbol, + alias, + GenbankId, + UniGeneId, + Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE)) + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.escape(self.search_term), + self.escape(dataset.id))) + + return self.execute(query) + + +class PhenotypeSearch(DoSearch): + base_query = """SELECT PublishXRef.Id, + PublishFreeze.createtime as thistable, + Publication.PubMed_ID as Publication_PubMed_ID, + Phenotype.Post_publication_description as Phenotype_Name + FROM Phenotype, PublishFreeze, Publication, PublishXRef """ + + search_fields = ('Phenotype.Post_publication_description', + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors', + 'PublishXRef.Id') + + def run(self): + #Todo: Zach will figure out exactly what both these lines mean + #and comment here + if "'" not in self.search_term: + search_term = "[[:<:]]" + self.search_term + "[[:>:]]" + + where_clause = [] + for field in self.search_fields: + where_clause.append('''%s REGEXP "%s"''' % (field, search_term)) + + where_clause = "(%s)" % ' OR '.join(where_clause) + + #Get group information for dataset + self.dataset.get_group() + + print("before query where clause is:", where_clause) + + query = (self.base_query + + """WHERE %s and + PublishXRef.InbredSetId = %s and + PublishXRef.PhenotypeId = Phenotype.Id and + PublishXRef.PublicationId = Publication.Id and + PublishFreeze.Id = %s""" % ( + where_clause, + self.escape(self.dataset.group_id), + self.escape(self.dataset.id))) + + + + return self.execute(query) + + +class GenotypeSearch(DoSearch): + def __init__(self): + pass + +class GoSearch(ProbeSetSearch): + """searches for synapse-associated genes listed in the Gene Ontology.""" + + def run(self): + field = 'GOterm.acc' + go_id = 'GO:' + ('0000000'+self.search_term)[-7:] + + statements = ("""%s.symbol=GOgene_product.symbol and + GOassociation.gene_product_id=GOgene_product.id and + GOterm.id=GOassociation.term_id""" % ( + self.db_conn.escape_string(self.dataset.type))) + + clause_item = " %s = '%s' and %s " % (field, go_id, statements) + + gene_ontology_from_table = """ , db_GeneOntology.term as GOterm, + db_GeneOntology.association as GOassociation, + db_GeneOntology.gene_product as GOgene_product """ + + gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table) + #gene_ontology_from_table = " ".join(gene_ontology_from_table.splitlines()) + + query = (self.base_query + + """%s + WHERE %s + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.db_conn.escape_string(gene_ontology_from_table), + clause_item, + self.db_conn.escape_string(str(self.dataset.id)))) + + return self.execute(query) + + + +if __name__ == "__main__": + + import MySQLdb + import sys + sys.path.append("/home/zas1024/gene/wqflask") + print("Path is:", sys.path) + + + from base import webqtlConfig + from base.webqtlDataset import webqtlDataset + from base.templatePage import templatePage + from utility import webqtlUtil + from dbFunction import webqtlDatabaseFunction + + db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME, + host=webqtlConfig.MYSQL_SERVER, + user=webqtlConfig.DB_USER, + passwd=webqtlConfig.DB_PASSWD) + cursor = db_conn.cursor() + + dataset_name = "HC_M2_0606_P" + dataset = webqtlDataset(dataset_name, cursor) + + #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() + #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() + + results = GoSearch("0045202", dataset, cursor, db_conn).run() + print("results are:", pf(results))
\ No newline at end of file diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 29a497fa..02d2bef2 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -681,15 +681,15 @@ class SearchResultPage(templatePage): print("item is:", pf(search_term)) - clause_item = ( -""" MATCH (ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - alias, - GenbankId, - UniGeneId, - Probe_Target_Description) - AGAINST ('%s' IN BOOLEAN MODE) """ % self.db_conn.escape_string(search_term)) +# clause_item = ( +#""" MATCH (ProbeSet.Name, +# ProbeSet.description, +# ProbeSet.symbol, +# alias, +# GenbankId, +# UniGeneId, +# Probe_Target_Description) +# AGAINST ('%s' IN BOOLEAN MODE) """ % self.db_conn.escape_string(search_term)) if self.dataset.type == "ProbeSet": query = ( @@ -702,11 +702,17 @@ class SearchResultPage(templatePage): ProbeSet.Symbol as TSYMBOL, ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet - WHERE %s + WHERE (MATCH (ProbeSet.Name, + ProbeSet.description, + ProbeSet.symbol, + alias, + GenbankId, + UniGeneId, + Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE)) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (self.db_conn.escape_string(search_term), - self.db_conn.escape_string(clause_item), self.db_conn.escape_string(str(self.dataset.id)))) elif self.dataset.type == "Publish": @@ -719,13 +725,21 @@ class SearchResultPage(templatePage): Publication.PubMed_ID as Publication_PubMed_ID, Phenotype.Post_publication_description as Phenotype_Name FROM %s PublishFreeze, Publication, PublishXRef, Phenotype - WHERE PublishXRef.InbredSetId = %s and %s and + WHERE (MATCH (ProbeSet.Name, + ProbeSet.description, + ProbeSet.symbol, + alias, + GenbankId, + UniGeneId, + Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE)) and + PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s """ % (include_geno, + self.db_conn.escape_string(search_term), self.db_conn.escape_string(str(self.dataset.group_id)), - self.db_conn.escape_string(clause_item), self.db_conn.escape_string(str(self.dataset.id)))) elif self.dataset.type == "Geno": @@ -736,10 +750,18 @@ class SearchResultPage(templatePage): Geno.chr_num as Geno_chr_num, Geno.Mb as Geno_Mb FROM GenoXRef, GenoFreeze, Geno - WHERE %s and Geno.Id = GenoXRef.GenoId and + WHERE (MATCH (ProbeSet.Name, + ProbeSet.description, + ProbeSet.symbol, + alias, + GenbankId, + UniGeneId, + Probe_Target_Description) + AGAINST ('%s' IN BOOLEAN MODE)) and + and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and GenoFreeze.Id = %d - """% (self.db_conn.escape_string(clause_item), + """% (self.db_conn.escape_string(search_term), self.db_conn.escape_string(str(self.dataset.id)))) |