diff options
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r-- | wqflask/wqflask/do_search.py | 89 |
1 files changed, 73 insertions, 16 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 19c6fa74..2641431c 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -9,6 +9,9 @@ from pprint import pformat as pf class DoSearch(object): """Parent class containing parameters/functions used for all searches""" + # Used to translate search phrases into classes + search_types = dict() + def __init__(self, search_term, dataset, cursor, db_conn): self.search_term = search_term self.dataset = dataset @@ -28,14 +31,20 @@ class DoSearch(object): return self.db_conn.escape_string(str(stringy)) def normalize_spaces(self, stringy): - """Strips out newlines extra spaces and replaces them with just spaces""" + """Strips out newlines/extra spaces and replaces them with just spaces""" step_one = " ".join(stringy.split()) return step_one + + @classmethod + def get_search(cls, search_type): + return cls.search_types[search_type] class ProbeSetSearch(DoSearch): """A search within an mRNA expression dataset""" + DoSearch.search_types['ProbeSet'] = "ProbeSetSearch" + base_query = """SELECT ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, @@ -47,6 +56,24 @@ class ProbeSetSearch(DoSearch): ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet """ + def compile_final_query(self, from_clause, where_clause): + """Generates the final query string""" + + from_clause = self.normalize_spaces(from_clause) + + query = (self.base_query + + """%s + WHERE %s + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSetXRef.ProbeSetFreezeId = %s + """ % (self.escape(from_clause), + where_clause, + self.escape(self.dataset.id))) + + print("query is:", pf(query)) + + return query + def run(self): """Generates and runs a simple search of an mRNA expression dataset""" @@ -73,6 +100,8 @@ class ProbeSetSearch(DoSearch): class PhenotypeSearch(DoSearch): """A search within a phenotype dataset""" + DoSearch.search_types['Publish'] = "PhenotypeSearch" + base_query = """SELECT PublishXRef.Id, PublishFreeze.createtime as thistable, Publication.PubMed_ID as Publication_PubMed_ID, @@ -128,6 +157,8 @@ class PhenotypeSearch(DoSearch): class GenotypeSearch(DoSearch): """A search within a genotype dataset""" + + DoSearch.search_types['Geno'] = "GenotypeSearch" base_query = """SELECT Geno.Name, GenoFreeze.createtime as thistable, @@ -169,9 +200,42 @@ class GenotypeSearch(DoSearch): return self.execute(query) +class RifSearch(ProbeSetSearch): + """Searches for traits with a Gene RIF entry including the search term.""" + + DoSearch.search_types['RIF'] = "RifSearch" + + def run(self): + where_clause = """( %s.symbol = GeneRIF_BASIC.symbol and + MATCH (GeneRIF_BASIC.comment) + AGAINST ('+%s' IN BOOLEAN MODE)) """ % (self.dataset.type, self.search_term) + + from_clause = ", GeneRIF_BASIC " + query = self.compile_final_query(from_clause, where_clause) + + return self.execute(query) + +class WikiSearch(ProbeSetSearch): + """Searches GeneWiki for traits other people have annotated""" + + DoSearch.search_types['WIKI'] = "WikiSearch" + + def run(self): + where_clause = """%s.symbol = GeneRIF.symbol + and GeneRIF.versionId=0 and GeneRIF.display>0 + and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s') + """ % (self.dataset.type, "[[:<:]]"+self.search_term+"[[:>:]]", self.search_term) + + from_clause = ", GeneRIF " + query = self.compile_final_query(from_clause, where_clause) + + return self.execute(query) + class GoSearch(ProbeSetSearch): """Searches for synapse-associated genes listed in the Gene Ontology.""" + DoSearch.search_types['GO'] = "GoSearch" + def run(self): field = 'GOterm.acc' go_id = 'GO:' + ('0000000'+self.search_term)[-7:] @@ -181,23 +245,13 @@ class GoSearch(ProbeSetSearch): GOterm.id=GOassociation.term_id""" % ( self.db_conn.escape_string(self.dataset.type))) - clause_item = " %s = '%s' and %s " % (field, go_id, statements) + where_clause = " %s = '%s' and %s " % (field, go_id, statements) - # - gene_ontology_from_table = """ , db_GeneOntology.term as GOterm, + from_clause = """ , db_GeneOntology.term as GOterm, db_GeneOntology.association as GOassociation, db_GeneOntology.gene_product as GOgene_product """ - - gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table) - - query = (self.base_query + - """%s - WHERE %s - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - and ProbeSetXRef.ProbeSetFreezeId = %s - """ % (self.db_conn.escape_string(gene_ontology_from_table), - clause_item, - self.db_conn.escape_string(str(self.dataset.id)))) + + query = self.compile_final_query(from_clause, where_clause) return self.execute(query) @@ -227,8 +281,11 @@ if __name__ == "__main__": dataset_name = "HC_M2_0606_P" dataset = webqtlDataset(dataset_name, cursor) - results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() + #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run() + #results = RifSearch("diabetes", dataset, cursor, db_conn).run() + results = WikiSearch("nicotine", dataset, cursor, db_conn).run() #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run() #results = GenotypeSearch("rs13475699", dataset, cursor, db_conn).run() #results = GoSearch("0045202", dataset, cursor, db_conn).run() + print("results are:", pf(results))
\ No newline at end of file |