aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r--wqflask/wqflask/do_search.py399
1 files changed, 227 insertions, 172 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index b0ca5ced..6b8dfa41 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -1,16 +1,13 @@
-from __future__ import print_function, division
-
import string
import requests
import json
from flask import Flask, g
-from MySQLdb import escape_string as escape
+from utility.db_tools import escape
from pprint import pformat as pf
import sys
-# sys.path.append("..") Never in a running webserver
from db import webqtlDatabaseFunction
from utility.tools import GN2_BASE_URL
@@ -19,7 +16,8 @@ import logging
from utility.logger import getLogger
logger = getLogger(__name__)
-class DoSearch(object):
+
+class DoSearch:
"""Parent class containing parameters/functions used for all searches"""
# Used to translate search phrases into classes
@@ -28,17 +26,16 @@ class DoSearch(object):
def __init__(self, search_term, search_operator=None, dataset=None, search_type=None):
self.search_term = search_term
# Make sure search_operator is something we expect
- assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator"
+ assert search_operator in (
+ None, "=", "<", ">", "<=", ">="), "Bad search operator"
self.search_operator = search_operator
self.dataset = dataset
self.search_type = search_type
if self.dataset:
- logger.debug("self.dataset is boo: ", type(self.dataset), pf(self.dataset))
- logger.debug("self.dataset.group is: ", pf(self.dataset.group))
- #Get group information for dataset and the species id
-
- self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name)
+ # Get group information for dataset and the species id
+ self.species_id = webqtlDatabaseFunction.retrieve_species_id(
+ self.dataset.group.name)
def execute(self, query):
"""Executes query and returns results"""
@@ -49,15 +46,11 @@ class DoSearch(object):
def handle_wildcard(self, str):
keyword = str.strip()
- keyword = keyword.replace("*",".*")
- keyword = keyword.replace("?",".")
+ keyword = keyword.replace("*", ".*")
+ keyword = keyword.replace("?", ".")
return keyword
- #def escape(self, stringy):
- # """Shorter name than self.db_conn.escape_string"""
- # return escape(str(stringy))
-
def mescape(self, *items):
"""Multiple escape"""
escaped = [escape(str(item)) for item in items]
@@ -71,8 +64,6 @@ class DoSearch(object):
@classmethod
def get_search(cls, search_type):
- logger.debug("search_types are:", pf(cls.search_types))
-
search_type_string = search_type['dataset_type']
if 'key' in search_type and search_type['key'] != None:
search_type_string += '_' + search_type['key']
@@ -84,21 +75,37 @@ class DoSearch(object):
else:
return None
+
class MrnaAssaySearch(DoSearch):
"""A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites"""
DoSearch.search_types['ProbeSet'] = "MrnaAssaySearch"
- base_query = """SELECT distinct ProbeSet.Name as TNAME,
- 0 as thistable,
- ProbeSetXRef.Mean as TMEAN,
- ProbeSetXRef.LRS as TLRS,
- ProbeSetXRef.PVALUE as TPVALUE,
- ProbeSet.Chr_num as TCHR_NUM,
- ProbeSet.Mb as TMB,
- ProbeSet.Symbol as TSYMBOL,
- ProbeSet.name_num as TNAME_NUM
- FROM ProbeSetXRef, ProbeSet """
+ base_query = """
+ SELECT
+ ProbeSetFreeze.`Name`,
+ ProbeSetFreeze.`FullName`,
+ ProbeSet.`Name`,
+ ProbeSet.`Symbol`,
+ CAST(ProbeSet.`description` AS BINARY),
+ CAST(ProbeSet.`Probe_Target_Description` AS BINARY),
+ ProbeSet.`Chr`,
+ ProbeSet.`Mb`,
+ ProbeSetXRef.`Mean`,
+ ProbeSetXRef.`LRS`,
+ ProbeSetXRef.`Locus`,
+ ProbeSetXRef.`pValue`,
+ ProbeSetXRef.`additive`,
+ Geno.`Chr` as geno_chr,
+ Geno.`Mb` as geno_mb
+ FROM Species
+ INNER JOIN InbredSet ON InbredSet.`SpeciesId`= Species.`Id`
+ INNER JOIN ProbeFreeze ON ProbeFreeze.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN Tissue ON ProbeFreeze.`TissueId` = Tissue.`Id`
+ INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.`ProbeFreezeId` = ProbeFreeze.`Id`
+ INNER JOIN ProbeSetXRef ON ProbeSetXRef.`ProbeSetFreezeId` = ProbeSetFreeze.`Id`
+ INNER JOIN ProbeSet ON ProbeSet.`Id` = ProbeSetXRef.`ProbeSetId`
+ LEFT JOIN Geno ON ProbeSetXRef.`Locus` = Geno.`Name` AND Geno.`SpeciesId` = Species.`Id` """
header_fields = ['Index',
'Record',
@@ -114,12 +121,13 @@ class MrnaAssaySearch(DoSearch):
search_string = escape(self.search_term[0])
if self.search_term[0] != "*":
- match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string)
+ match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (
+ search_string)
else:
match_clause = ""
- where_clause = (match_clause +
- """ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ where_clause = (match_clause
+ + """ProbeSet.Id = ProbeSetXRef.ProbeSetId
and ProbeSetXRef.ProbeSetFreezeId = %s
""" % (escape(str(self.dataset.id))))
@@ -141,30 +149,30 @@ class MrnaAssaySearch(DoSearch):
else:
match_clause = ""
- where_clause = (match_clause +
- """ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ where_clause = (match_clause
+ + """ProbeSet.Id = ProbeSetXRef.ProbeSetId
and ProbeSetXRef.ProbeSetFreezeId = %s
""" % (escape(str(self.dataset.id))))
return where_clause
- def compile_final_query(self, from_clause = '', where_clause = ''):
+ def compile_final_query(self, from_clause='', where_clause=''):
"""Generates the final query string"""
from_clause = self.normalize_spaces(from_clause)
query = (self.base_query +
- """%s
+ """%s
WHERE %s
and ProbeSet.Id = ProbeSetXRef.ProbeSetId
and ProbeSetXRef.ProbeSetFreezeId = %s
ORDER BY ProbeSet.symbol ASC
""" % (escape(from_clause),
- where_clause,
- escape(str(self.dataset.id))))
+ where_clause,
+ escape(str(self.dataset.id))))
return query
- def run_combined(self, from_clause = '', where_clause = ''):
+ def run_combined(self, from_clause='', where_clause=''):
"""Generates and runs a combined search of an mRNA expression dataset"""
logger.debug("Running ProbeSetSearch")
@@ -173,14 +181,14 @@ class MrnaAssaySearch(DoSearch):
from_clause = self.normalize_spaces(from_clause)
query = (self.base_query +
- """%s
+ """%s
WHERE %s
and ProbeSet.Id = ProbeSetXRef.ProbeSetId
and ProbeSetXRef.ProbeSetFreezeId = %s
ORDER BY ProbeSet.symbol ASC
""" % (escape(from_clause),
- where_clause,
- escape(str(self.dataset.id))))
+ where_clause,
+ escape(str(self.dataset.id))))
return self.execute(query)
@@ -200,21 +208,36 @@ class PhenotypeSearch(DoSearch):
DoSearch.search_types['Publish'] = "PhenotypeSearch"
base_query = """SELECT PublishXRef.Id,
- PublishFreeze.createtime as thistable,
- Publication.PubMed_ID as Publication_PubMed_ID,
- Phenotype.Post_publication_description as Phenotype_Name
- FROM Phenotype, PublishFreeze, Publication, PublishXRef """
+ CAST(Phenotype.`Pre_publication_description` AS BINARY),
+ CAST(Phenotype.`Post_publication_description` AS BINARY),
+ Publication.`Authors`,
+ Publication.`Year`,
+ Publication.`PubMed_ID`,
+ PublishXRef.`mean`,
+ PublishXRef.`LRS`,
+ PublishXRef.`additive`,
+ PublishXRef.`Locus`,
+ InbredSet.`InbredSetCode`,
+ Geno.`Chr`,
+ Geno.`Mb`
+ FROM Species
+ INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id`
+ INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId`
+ INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId`
+ LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id """
search_fields = ('Phenotype.Post_publication_description',
- 'Phenotype.Pre_publication_description',
- 'Phenotype.Pre_publication_abbreviation',
- 'Phenotype.Post_publication_abbreviation',
- 'Phenotype.Lab_code',
- 'Publication.PubMed_ID',
- 'Publication.Abstract',
- 'Publication.Title',
- 'Publication.Authors',
- 'PublishXRef.Id')
+ 'Phenotype.Pre_publication_description',
+ 'Phenotype.Pre_publication_abbreviation',
+ 'Phenotype.Post_publication_abbreviation',
+ 'Phenotype.Lab_code',
+ 'Publication.PubMed_ID',
+ 'Publication.Abstract',
+ 'Publication.Title',
+ 'Publication.Authors',
+ 'PublishXRef.Id')
header_fields = ['Index',
'Record',
@@ -229,53 +252,56 @@ class PhenotypeSearch(DoSearch):
def get_where_clause(self):
"""Generate clause for WHERE portion of query"""
- #Todo: Zach will figure out exactly what both these lines mean
- #and comment here
+ # Todo: Zach will figure out exactly what both these lines mean
+ # and comment here
- #if "'" not in self.search_term[0]:
- search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]"
+ # if "'" not in self.search_term[0]:
+ search_term = "[[:<:]]" + \
+ self.handle_wildcard(self.search_term[0]) + "[[:>:]]"
if "_" in self.search_term[0]:
if len(self.search_term[0].split("_")[0]) == 3:
- search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]"
+ search_term = "[[:<:]]" + self.handle_wildcard(
+ self.search_term[0].split("_")[1]) + "[[:>:]]"
# This adds a clause to the query that matches the search term
# against each field in the search_fields tuple
where_clause_list = []
for field in self.search_fields:
- where_clause_list.append('''%s REGEXP "%s"''' % (field, search_term))
+ where_clause_list.append('''%s REGEXP "%s"''' %
+ (field, search_term))
where_clause = "(%s) " % ' OR '.join(where_clause_list)
return where_clause
- def compile_final_query(self, from_clause = '', where_clause = ''):
+ def compile_final_query(self, from_clause='', where_clause=''):
"""Generates the final query string"""
from_clause = self.normalize_spaces(from_clause)
if self.search_term[0] == "*":
query = (self.base_query +
- """%s
+ """%s
WHERE PublishXRef.InbredSetId = %s
and PublishXRef.PhenotypeId = Phenotype.Id
and PublishXRef.PublicationId = Publication.Id
and PublishFreeze.Id = %s
ORDER BY PublishXRef.Id""" % (
- from_clause,
- escape(str(self.dataset.group.id)),
- escape(str(self.dataset.id))))
+ from_clause,
+ escape(str(self.dataset.group.id)),
+ escape(str(self.dataset.id))))
else:
query = (self.base_query +
- """%s
+ """%s
WHERE %s
and PublishXRef.InbredSetId = %s
and PublishXRef.PhenotypeId = Phenotype.Id
and PublishXRef.PublicationId = Publication.Id
and PublishFreeze.Id = %s
ORDER BY PublishXRef.Id""" % (
- from_clause,
- where_clause,
- escape(str(self.dataset.group.id)),
- escape(str(self.dataset.id))))
+ from_clause,
+ where_clause,
+ escape(str(self.dataset.group.id)),
+ escape(str(self.dataset.id))))
return query
@@ -287,26 +313,27 @@ class PhenotypeSearch(DoSearch):
from_clause = self.normalize_spaces(from_clause)
query = (self.base_query +
- """%s
+ """%s
WHERE %s
PublishXRef.InbredSetId = %s and
PublishXRef.PhenotypeId = Phenotype.Id and
PublishXRef.PublicationId = Publication.Id and
PublishFreeze.Id = %s""" % (
- from_clause,
- where_clause,
- escape(str(self.dataset.group.id)),
- escape(str(self.dataset.id))))
+ from_clause,
+ where_clause,
+ escape(str(self.dataset.group.id)),
+ escape(str(self.dataset.id))))
return self.execute(query)
def run(self):
"""Generates and runs a simple search of a phenotype dataset"""
- query = self.compile_final_query(where_clause = self.get_where_clause())
+ query = self.compile_final_query(where_clause=self.get_where_clause())
return self.execute(query)
+
class GenotypeSearch(DoSearch):
"""A search within a genotype dataset"""
@@ -339,57 +366,56 @@ class GenotypeSearch(DoSearch):
for field in self.search_fields:
where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % self.mescape(self.dataset.type,
field),
- self.search_term))
+ self.search_term))
logger.debug("hello ;where_clause is:", pf(where_clause))
where_clause = "(%s) " % ' OR '.join(where_clause)
return where_clause
- def compile_final_query(self, from_clause = '', where_clause = ''):
+ def compile_final_query(self, from_clause='', where_clause=''):
"""Generates the final query string"""
from_clause = self.normalize_spaces(from_clause)
-
if self.search_term[0] == "*":
- query = (self.base_query +
- """WHERE Geno.Id = GenoXRef.GenoId
+ query = (self.base_query
+ + """WHERE Geno.Id = GenoXRef.GenoId
and GenoXRef.GenoFreezeId = GenoFreeze.Id
- and GenoFreeze.Id = %s"""% (escape(str(self.dataset.id))))
+ and GenoFreeze.Id = %s""" % (escape(str(self.dataset.id))))
else:
query = (self.base_query +
- """WHERE %s
+ """WHERE %s
and Geno.Id = GenoXRef.GenoId
and GenoXRef.GenoFreezeId = GenoFreeze.Id
- and GenoFreeze.Id = %s"""% (where_clause,
- escape(str(self.dataset.id))))
+ and GenoFreeze.Id = %s""" % (where_clause,
+ escape(str(self.dataset.id))))
return query
def run(self):
"""Generates and runs a simple search of a genotype dataset"""
- #Todo: Zach will figure out exactly what both these lines mean
- #and comment here
+ # Todo: Zach will figure out exactly what both these lines mean
+ # and comment here
if self.search_term[0] == "*":
self.query = self.compile_final_query()
else:
- self.query = self.compile_final_query(where_clause = self.get_where_clause())
+ self.query = self.compile_final_query(
+ where_clause=self.get_where_clause())
return self.execute(self.query)
+
class RifSearch(MrnaAssaySearch):
"""Searches for traits with a Gene RIF entry including the search term."""
DoSearch.search_types['ProbeSet_RIF'] = "RifSearch"
def get_from_clause(self):
- return ", GeneRIF_BASIC "
+ return f" INNER JOIN GeneRIF_BASIC ON GeneRIF_BASIC.`symbol` = { self.dataset.type }.`symbol` "
def get_where_clause(self):
- where_clause = """( %s.symbol = GeneRIF_BASIC.symbol and
- MATCH (GeneRIF_BASIC.comment)
- AGAINST ('+%s' IN BOOLEAN MODE)) """ % (self.dataset.type, self.search_term[0])
+ where_clause = f"(MATCH (GeneRIF_BASIC.comment) AGAINST ('+{ self.search_term[0] }' IN BOOLEAN MODE)) "
return where_clause
@@ -401,10 +427,11 @@ class RifSearch(MrnaAssaySearch):
return self.execute(query)
+
class WikiSearch(MrnaAssaySearch):
"""Searches GeneWiki for traits other people have annotated"""
- DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch"
+ DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch"
def get_from_clause(self):
return ", GeneRIF "
@@ -414,7 +441,7 @@ class WikiSearch(MrnaAssaySearch):
and GeneRIF.versionId=0 and GeneRIF.display>0
and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s')
""" % (self.dataset.type,
- "[[:<:]]"+str(self.search_term[0])+"[[:>:]]",
+ "[[:<:]]" + str(self.search_term[0]) + "[[:>:]]",
str(self.search_term[0]))
return where_clause
@@ -426,10 +453,11 @@ class WikiSearch(MrnaAssaySearch):
return self.execute(query)
+
class GoSearch(MrnaAssaySearch):
"""Searches for synapse-associated genes listed in the Gene Ontology."""
- DoSearch.search_types['ProbeSet_GO'] = "GoSearch"
+ DoSearch.search_types['ProbeSet_GO'] = "GoSearch"
def get_from_clause(self):
from_clause = """, db_GeneOntology.term as GOterm,
@@ -440,7 +468,7 @@ class GoSearch(MrnaAssaySearch):
def get_where_clause(self):
field = 'GOterm.acc'
- go_id = 'GO:' + ('0000000'+self.search_term[0])[-7:]
+ go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:]
statements = ("""%s.symbol=GOgene_product.symbol and
GOassociation.gene_product_id=GOgene_product.id and
@@ -459,7 +487,9 @@ class GoSearch(MrnaAssaySearch):
return self.execute(query)
-#ZS: Not sure what the best way to deal with LRS searches is
+# ZS: Not sure what the best way to deal with LRS searches is
+
+
class LrsSearch(DoSearch):
"""Searches for genes with a QTL within the given LRS values
@@ -497,17 +527,18 @@ class LrsSearch(DoSearch):
assert isinstance(self.search_term, (list, tuple))
lrs_min, lrs_max = self.search_term[:2]
if self.search_type == "LOD":
- lrs_min = lrs_min*4.61
- lrs_max = lrs_max*4.61
+ lrs_min = lrs_min * 4.61
+ lrs_max = lrs_max * 4.61
where_clause = """ %sXRef.LRS > %s and
%sXRef.LRS < %s """ % self.mescape(self.dataset.type,
- min(lrs_min, lrs_max),
+ min(lrs_min,
+ lrs_max),
self.dataset.type,
max(lrs_min, lrs_max))
if len(self.search_term) > 2:
- #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats
+ # If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats
chr_num = self.search_term[2]
if "chr" in self.search_term[2].lower():
chr_num = self.search_term[2].lower().replace("chr", "")
@@ -523,27 +554,27 @@ class LrsSearch(DoSearch):
where_clause += """ and %sXRef.Locus = Geno.name and
Geno.SpeciesId = %s
""" % self.mescape(self.dataset.type,
- self.species_id)
+ self.species_id)
else:
# Deal with >, <, >=, and <=
logger.debug("self.search_term is:", self.search_term)
lrs_val = self.search_term[0]
if self.search_type == "LOD":
- lrs_val = lrs_val*4.61
+ lrs_val = lrs_val * 4.61
where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type,
- self.search_operator,
- self.search_term[0])
+ self.search_operator,
+ self.search_term[0])
return where_clause
-
def run(self):
self.from_clause = self.get_from_clause()
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(self.from_clause, self.where_clause)
+ self.query = self.compile_final_query(
+ self.from_clause, self.where_clause)
return self.execute(self.query)
@@ -557,10 +588,12 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch):
self.from_clause = self.get_from_clause()
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause)
+ self.query = self.compile_final_query(
+ from_clause=self.from_clause, where_clause=self.where_clause)
return self.execute(self.query)
+
class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch):
for search_key in ('LRS', 'LOD'):
@@ -571,7 +604,8 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch):
self.from_clause = self.get_from_clause()
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause)
+ self.query = self.compile_final_query(
+ from_clause=self.from_clause, where_clause=self.where_clause)
return self.execute(self.query)
@@ -598,7 +632,8 @@ class CisTransLrsSearch(DoSearch):
elif len(self.search_term) == 3:
lrs_min, lrs_max, self.mb_buffer = self.search_term
elif len(self.search_term) == 4:
- lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]]
+ lrs_min, lrs_max, self.mb_buffer = [
+ float(value) for value in self.search_term[:3]]
chromosome = self.search_term[3]
if "Chr" in chromosome or "chr" in chromosome:
chromosome = int(chromosome[3:])
@@ -610,19 +645,19 @@ class CisTransLrsSearch(DoSearch):
lrs_max = lrs_max * 4.61
sub_clause = """ %sXRef.LRS > %s and
- %sXRef.LRS < %s and """ % (
- escape(self.dataset.type),
- escape(str(min(lrs_min, lrs_max))),
- escape(self.dataset.type),
- escape(str(max(lrs_min, lrs_max)))
- )
+ %sXRef.LRS < %s and """ % (
+ escape(self.dataset.type),
+ escape(str(min(lrs_min, lrs_max))),
+ escape(self.dataset.type),
+ escape(str(max(lrs_min, lrs_max)))
+ )
else:
# Deal with >, <, >=, and <=
- sub_clause = """ %sXRef.LRS %s %s and """ % (
- escape(self.dataset.type),
- escape(self.search_operator),
- escape(self.search_term[0])
- )
+ sub_clause = """ %sXRef.LRS %s %s and """ % (
+ escape(self.dataset.type),
+ escape(self.search_operator),
+ escape(self.search_term[0])
+ )
if cis_trans == "cis":
where_clause = sub_clause + """
@@ -630,36 +665,42 @@ class CisTransLrsSearch(DoSearch):
%sXRef.Locus = Geno.name and
Geno.SpeciesId = %s and
%s.Chr = Geno.Chr""" % (
- escape(self.dataset.type),
- the_operator,
- escape(str(self.mb_buffer)),
- escape(self.dataset.type),
- escape(str(self.species_id)),
- escape(self.dataset.type)
- )
+ escape(self.dataset.type),
+ the_operator,
+ escape(str(self.mb_buffer)),
+ escape(self.dataset.type),
+ escape(str(self.species_id)),
+ escape(self.dataset.type)
+ )
else:
if chromosome:
location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type),
- chromosome,
- escape(self.dataset.type),
- escape(self.dataset.type),
- the_operator,
- escape(str(self.mb_buffer)),
- escape(self.dataset.type),
- chromosome)
+ chromosome,
+ escape(
+ self.dataset.type),
+ escape(
+ self.dataset.type),
+ the_operator,
+ escape(
+ str(self.mb_buffer)),
+ escape(
+ self.dataset.type),
+ chromosome)
else:
- location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type))
+ location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(
+ self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type))
where_clause = sub_clause + """
%sXRef.Locus = Geno.name and
Geno.SpeciesId = %s and
(%s)""" % (
- escape(self.dataset.type),
- escape(str(self.species_id)),
- location_clause
- )
+ escape(self.dataset.type),
+ escape(str(self.species_id)),
+ location_clause
+ )
return where_clause
+
class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
"""
Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
@@ -678,7 +719,7 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
"""
for search_key in ('LRS', 'LOD'):
- DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch"
+ DoSearch.search_types['ProbeSet_CIS' + search_key] = "CisLrsSearch"
def get_where_clause(self):
return CisTransLrsSearch.get_where_clause(self, "cis")
@@ -687,10 +728,12 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
self.from_clause = self.get_from_clause()
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(self.from_clause, self.where_clause)
+ self.query = self.compile_final_query(
+ self.from_clause, self.where_clause)
return self.execute(self.query)
+
class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
"""Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
@@ -708,7 +751,7 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
"""
for search_key in ('LRS', 'LOD'):
- DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch"
+ DoSearch.search_types['ProbeSet_TRANS' + search_key] = "TransLrsSearch"
def get_where_clause(self):
return CisTransLrsSearch.get_where_clause(self, "trans")
@@ -717,7 +760,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
self.from_clause = self.get_from_clause()
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(self.from_clause, self.where_clause)
+ self.query = self.compile_final_query(
+ self.from_clause, self.where_clause)
return self.execute(self.query)
@@ -736,14 +780,15 @@ class MeanSearch(MrnaAssaySearch):
where_clause = """ %sXRef.mean > %s and
%sXRef.mean < %s """ % self.mescape(self.dataset.type,
- min(self.mean_min, self.mean_max),
- self.dataset.type,
- max(self.mean_min, self.mean_max))
+ min(self.mean_min,
+ self.mean_max),
+ self.dataset.type,
+ max(self.mean_min, self.mean_max))
else:
# Deal with >, <, >=, and <=
where_clause = """ %sXRef.mean %s %s """ % self.mescape(self.dataset.type,
- self.search_operator,
- self.search_term[0])
+ self.search_operator,
+ self.search_term[0])
return where_clause
@@ -751,10 +796,11 @@ class MeanSearch(MrnaAssaySearch):
self.where_clause = self.get_where_clause()
logger.debug("where_clause is:", pf(self.where_clause))
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
return self.execute(self.query)
+
class RangeSearch(MrnaAssaySearch):
"""Searches for genes with a range of expression varying between two values"""
@@ -786,10 +832,11 @@ class RangeSearch(MrnaAssaySearch):
def run(self):
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
return self.execute(self.query)
+
class PositionSearch(DoSearch):
"""Searches for genes/markers located within a specified range on a specified chromosome"""
@@ -797,7 +844,8 @@ class PositionSearch(DoSearch):
DoSearch.search_types[search_key] = "PositionSearch"
def get_where_clause(self):
- self.search_term = [float(value) if is_number(value) else value for value in self.search_term]
+ self.search_term = [float(value) if is_number(
+ value) else value for value in self.search_term]
chr, self.mb_min, self.mb_max = self.search_term[:3]
self.chr = str(chr).lower()
self.get_chr()
@@ -807,11 +855,11 @@ class PositionSearch(DoSearch):
%s.Mb < %s """ % self.mescape(self.dataset.type,
self.chr,
self.dataset.type,
- min(self.mb_min, self.mb_max),
+ min(self.mb_min,
+ self.mb_max),
self.dataset.type,
max(self.mb_min, self.mb_max))
-
return where_clause
def get_chr(self):
@@ -826,36 +874,39 @@ class PositionSearch(DoSearch):
def run(self):
self.get_where_clause()
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
return self.execute(self.query)
+
class MrnaPositionSearch(PositionSearch, MrnaAssaySearch):
"""Searches for genes located within a specified range on a specified chromosome"""
for search_key in ('POSITION', 'POS', 'MB'):
- DoSearch.search_types['ProbeSet_'+search_key] = "MrnaPositionSearch"
+ DoSearch.search_types['ProbeSet_' + search_key] = "MrnaPositionSearch"
def run(self):
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
return self.execute(self.query)
+
class GenotypePositionSearch(PositionSearch, GenotypeSearch):
"""Searches for genes located within a specified range on a specified chromosome"""
for search_key in ('POSITION', 'POS', 'MB'):
- DoSearch.search_types['Geno_'+search_key] = "GenotypePositionSearch"
+ DoSearch.search_types['Geno_' + search_key] = "GenotypePositionSearch"
def run(self):
self.where_clause = self.get_where_clause()
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
return self.execute(self.query)
+
class PvalueSearch(MrnaAssaySearch):
"""Searches for traits with a permutationed p-value between low and high"""
@@ -870,25 +921,26 @@ class PvalueSearch(MrnaAssaySearch):
self.pvalue_min, self.pvalue_max = self.search_term[:2]
self.where_clause = """ %sXRef.pValue > %s and %sXRef.pValue < %s
""" % self.mescape(
- self.dataset.type,
- min(self.pvalue_min, self.pvalue_max),
- self.dataset.type,
- max(self.pvalue_min, self.pvalue_max))
+ self.dataset.type,
+ min(self.pvalue_min, self.pvalue_max),
+ self.dataset.type,
+ max(self.pvalue_min, self.pvalue_max))
else:
# Deal with >, <, >=, and <=
self.where_clause = """ %sXRef.pValue %s %s
""" % self.mescape(
- self.dataset.type,
- self.search_operator,
- self.search_term[0])
+ self.dataset.type,
+ self.search_operator,
+ self.search_term[0])
logger.debug("where_clause is:", pf(self.where_clause))
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
logger.sql(self.query)
return self.execute(self.query)
+
class AuthorSearch(PhenotypeSearch):
"""Searches for phenotype traits with specified author(s)"""
@@ -899,7 +951,7 @@ class AuthorSearch(PhenotypeSearch):
self.where_clause = """ Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" and
""" % (self.search_term[0])
- self.query = self.compile_final_query(where_clause = self.where_clause)
+ self.query = self.compile_final_query(where_clause=self.where_clause)
return self.execute(self.query)
@@ -911,6 +963,7 @@ def is_number(s):
except ValueError:
return False
+
def get_aliases(symbol, species):
if species == "mouse":
symbol_string = symbol.capitalize()
@@ -920,7 +973,8 @@ def get_aliases(symbol, species):
return []
filtered_aliases = []
- response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string)
+ response = requests.get(
+ GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string)
if response:
alias_list = json.loads(response.content)
@@ -934,9 +988,10 @@ def get_aliases(symbol, species):
return filtered_aliases
+
if __name__ == "__main__":
- ### Usually this will be used as a library, but call it from the command line for testing
- ### And it runs the code below
+ # Usually this will be used as a library, but call it from the command line for testing
+ # And it runs the code below
import MySQLdb
import sys