aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r--wqflask/wqflask/do_search.py180
1 files changed, 180 insertions, 0 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
new file mode 100644
index 00000000..cfa73334
--- /dev/null
+++ b/wqflask/wqflask/do_search.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+
+
+from __future__ import print_function, division
+
+from pprint import pformat as pf
+
+
+class DoSearch(object):
+ def __init__(self, search_term, dataset, cursor, db_conn):
+ self.search_term = search_term
+ self.dataset = dataset
+ self.db_conn = db_conn
+ self.cursor = cursor
+
+ def execute(self, query):
+ query = self.normalize_spaces(query)
+ print("query is:", pf(query))
+ self.cursor.execute(query)
+ results = self.cursor.fetchall()
+ return results
+
+ def escape(self, stringy):
+ """Shorter name than self.db_conn.escape_string"""
+ return self.db_conn.escape_string(str(stringy))
+
+ def normalize_spaces(self, stringy):
+ """Strips out newlines extra spaces and replaces them with just spaces"""
+ step_one = " ".join(stringy.split())
+ return step_one
+
+
+
+class ProbeSetSearch(DoSearch):
+ base_query = """SELECT ProbeSet.Name as TNAME,
+ 0 as thistable,
+ ProbeSetXRef.Mean as TMEAN,
+ ProbeSetXRef.LRS as TLRS,
+ ProbeSetXRef.PVALUE as TPVALUE,
+ ProbeSet.Chr_num as TCHR_NUM,
+ ProbeSet.Mb as TMB,
+ ProbeSet.Symbol as TSYMBOL,
+ ProbeSet.name_num as TNAME_NUM
+ FROM ProbeSetXRef, ProbeSet """
+
+ def run(self):
+
+ query = (self.base_query +
+ """WHERE (MATCH (ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ alias,
+ GenbankId,
+ UniGeneId,
+ Probe_Target_Description)
+ AGAINST ('%s' IN BOOLEAN MODE))
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ """ % (self.escape(self.search_term),
+ self.escape(dataset.id)))
+
+ return self.execute(query)
+
+
+class PhenotypeSearch(DoSearch):
+ base_query = """SELECT PublishXRef.Id,
+ PublishFreeze.createtime as thistable,
+ Publication.PubMed_ID as Publication_PubMed_ID,
+ Phenotype.Post_publication_description as Phenotype_Name
+ FROM Phenotype, PublishFreeze, Publication, PublishXRef """
+
+ search_fields = ('Phenotype.Post_publication_description',
+ 'Phenotype.Pre_publication_description',
+ 'Phenotype.Pre_publication_abbreviation',
+ 'Phenotype.Post_publication_abbreviation',
+ 'Phenotype.Lab_code',
+ 'Publication.PubMed_ID',
+ 'Publication.Abstract',
+ 'Publication.Title',
+ 'Publication.Authors',
+ 'PublishXRef.Id')
+
+ def run(self):
+ #Todo: Zach will figure out exactly what both these lines mean
+ #and comment here
+ if "'" not in self.search_term:
+ search_term = "[[:<:]]" + self.search_term + "[[:>:]]"
+
+ where_clause = []
+ for field in self.search_fields:
+ where_clause.append('''%s REGEXP "%s"''' % (field, search_term))
+
+ where_clause = "(%s)" % ' OR '.join(where_clause)
+
+ #Get group information for dataset
+ self.dataset.get_group()
+
+ print("before query where clause is:", where_clause)
+
+ query = (self.base_query +
+ """WHERE %s and
+ PublishXRef.InbredSetId = %s and
+ PublishXRef.PhenotypeId = Phenotype.Id and
+ PublishXRef.PublicationId = Publication.Id and
+ PublishFreeze.Id = %s""" % (
+ where_clause,
+ self.escape(self.dataset.group_id),
+ self.escape(self.dataset.id)))
+
+
+
+ return self.execute(query)
+
+
+class GenotypeSearch(DoSearch):
+ def __init__(self):
+ pass
+
+class GoSearch(ProbeSetSearch):
+ """searches for synapse-associated genes listed in the Gene Ontology."""
+
+ def run(self):
+ field = 'GOterm.acc'
+ go_id = 'GO:' + ('0000000'+self.search_term)[-7:]
+
+ statements = ("""%s.symbol=GOgene_product.symbol and
+ GOassociation.gene_product_id=GOgene_product.id and
+ GOterm.id=GOassociation.term_id""" % (
+ self.db_conn.escape_string(self.dataset.type)))
+
+ clause_item = " %s = '%s' and %s " % (field, go_id, statements)
+
+ gene_ontology_from_table = """ , db_GeneOntology.term as GOterm,
+ db_GeneOntology.association as GOassociation,
+ db_GeneOntology.gene_product as GOgene_product """
+
+ gene_ontology_from_table = self.normalize_spaces(gene_ontology_from_table)
+ #gene_ontology_from_table = " ".join(gene_ontology_from_table.splitlines())
+
+ query = (self.base_query +
+ """%s
+ WHERE %s
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ """ % (self.db_conn.escape_string(gene_ontology_from_table),
+ clause_item,
+ self.db_conn.escape_string(str(self.dataset.id))))
+
+ return self.execute(query)
+
+
+
+if __name__ == "__main__":
+
+ import MySQLdb
+ import sys
+ sys.path.append("/home/zas1024/gene/wqflask")
+ print("Path is:", sys.path)
+
+
+ from base import webqtlConfig
+ from base.webqtlDataset import webqtlDataset
+ from base.templatePage import templatePage
+ from utility import webqtlUtil
+ from dbFunction import webqtlDatabaseFunction
+
+ db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME,
+ host=webqtlConfig.MYSQL_SERVER,
+ user=webqtlConfig.DB_USER,
+ passwd=webqtlConfig.DB_PASSWD)
+ cursor = db_conn.cursor()
+
+ dataset_name = "HC_M2_0606_P"
+ dataset = webqtlDataset(dataset_name, cursor)
+
+ #results = ProbeSetSearch("salt", dataset, cursor, db_conn).run()
+ #results = PhenotypeSearch("brain", dataset, cursor, db_conn).run()
+
+ results = GoSearch("0045202", dataset, cursor, db_conn).run()
+ print("results are:", pf(results)) \ No newline at end of file