aboutsummaryrefslogtreecommitdiff
path: root/gn2/wqflask/do_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/wqflask/do_search.py')
-rw-r--r--gn2/wqflask/do_search.py965
1 files changed, 965 insertions, 0 deletions
diff --git a/gn2/wqflask/do_search.py b/gn2/wqflask/do_search.py
new file mode 100644
index 00000000..3c81783d
--- /dev/null
+++ b/gn2/wqflask/do_search.py
@@ -0,0 +1,965 @@
+import json
+import re
+import requests
+import string
+
+from gn2.wqflask.database import database_connection
+
+import sys
+
+from gn2.db import webqtlDatabaseFunction
+from gn2.utility.tools import get_setting, GN2_BASE_URL
+
+
+class DoSearch:
+ """Parent class containing parameters/functions used for all searches"""
+
+ # Used to translate search phrases into classes
+ search_types = dict()
+
+ def __init__(self, search_term, search_operator=None, dataset=None, search_type=None):
+ self.search_term = search_term
+ # Make sure search_operator is something we expect
+ assert search_operator in (
+ None, "=", "<", ">", "<=", ">="), "Bad search operator"
+ self.search_operator = search_operator
+ self.dataset = dataset
+ self.search_type = search_type
+
+ if self.dataset:
+ # Get group information for dataset and the species id
+ self.species_id = webqtlDatabaseFunction.retrieve_species_id(
+ self.dataset.group.name)
+
+ def execute(self, query):
+ """Executes query and returns results"""
+ query = self.normalize_spaces(query)
+ with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
+ cursor.execute(query)
+ return cursor.fetchall()
+
+ def handle_wildcard(self, str):
+ keyword = str.strip()
+ keyword = keyword.replace("*", ".*")
+ keyword = keyword.replace("?", ".")
+
+ return keyword
+
+ def sescape(self, item):
+ """Single escape"""
+ from gn2.utility.tools import get_setting
+ with database_connection(get_setting("SQL_URI")) as conn:
+ escaped = conn.escape_string(str(item)).decode()
+ return escaped
+
+ def mescape(self, *items):
+ """Multiple escape"""
+ from gn2.utility.tools import get_setting
+ escaped = []
+ with database_connection(get_setting("SQL_URI")) as conn:
+ escaped = [conn.escape_string(str(item)).decode() for item in items]
+ return tuple(escaped)
+
+ def normalize_spaces(self, stringy):
+ """Strips out newlines/extra spaces and replaces them with just spaces"""
+ step_one = " ".join(stringy.split())
+ return step_one
+
+ @classmethod
+ def get_search(cls, search_type):
+ search_type_string = search_type['dataset_type']
+ if 'key' in search_type and search_type['key'] != None:
+ search_type_string += '_' + search_type['key']
+
+ if search_type_string in cls.search_types:
+ return cls.search_types[search_type_string]
+ else:
+ return None
+
+
+class MrnaAssaySearch(DoSearch):
+ """A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites"""
+
+ DoSearch.search_types['ProbeSet'] = "MrnaAssaySearch"
+
+ base_query = """
+ SELECT DISTINCT
+ ProbeSetFreeze.`Name`,
+ ProbeSetFreeze.`FullName`,
+ ProbeSet.`Name`,
+ ProbeSet.`Symbol`,
+ CAST(ProbeSet.`description` AS BINARY),
+ CAST(ProbeSet.`Probe_Target_Description` AS BINARY),
+ ProbeSet.`Chr`,
+ ProbeSet.`Mb`,
+ ProbeSetXRef.`Mean`,
+ ProbeSetXRef.`LRS`,
+ ProbeSetXRef.`Locus`,
+ ProbeSetXRef.`pValue`,
+ ProbeSetXRef.`additive`,
+ Geno.`Chr` as geno_chr,
+ Geno.`Mb` as geno_mb
+ FROM Species
+ INNER JOIN InbredSet ON InbredSet.`SpeciesId`= Species.`Id`
+ INNER JOIN ProbeFreeze ON ProbeFreeze.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN Tissue ON ProbeFreeze.`TissueId` = Tissue.`Id`
+ INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.`ProbeFreezeId` = ProbeFreeze.`Id`
+ INNER JOIN ProbeSetXRef ON ProbeSetXRef.`ProbeSetFreezeId` = ProbeSetFreeze.`Id`
+ INNER JOIN ProbeSet ON ProbeSet.`Id` = ProbeSetXRef.`ProbeSetId`
+ LEFT JOIN Geno ON ProbeSetXRef.`Locus` = Geno.`Name` AND Geno.`SpeciesId` = Species.`Id` """
+
+ header_fields = ['Index',
+ 'Record',
+ 'Symbol',
+ 'Description',
+ 'Location',
+ 'Mean',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
+
+ def get_alias_where_clause(self):
+ search_string = self.sescape(self.search_term[0])
+
+ if self.search_term[0] != "*":
+ match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (
+ search_string)
+ else:
+ match_clause = ""
+
+ where_clause = (match_clause
+ + """ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ """ % (self.sescape(str(self.dataset.id))))
+
+ return where_clause
+
+ def get_where_clause(self):
+ search_string = self.sescape(self.search_term[0])
+
+ if self.search_term[0] != "*":
+ if re.search("\w{1,2}\-\w+|\w+\-\w{1,2}", self.search_term[0]):
+ search_string = f'"{search_string}*"'
+
+ match_clause = f"""((MATCH (ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ alias,
+ GenbankId,
+ UniGeneId,
+ Probe_Target_Description)
+ AGAINST ('{search_string}' IN BOOLEAN MODE))) AND """
+ else:
+ match_clause = ""
+
+ where_clause = (match_clause
+ + """ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ """ % (self.sescape(str(self.dataset.id))))
+
+ return where_clause
+
+ def compile_final_query(self, from_clause='', where_clause=''):
+ """Generates the final query string"""
+
+ from_clause = self.normalize_spaces(from_clause)
+
+ query = (self.base_query +
+ """%s
+ WHERE %s
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ ORDER BY ProbeSet.symbol ASC
+ """ % (self.sescape(from_clause),
+ where_clause,
+ self.sescape(str(self.dataset.id))))
+ return query
+
+ def run_combined(self, from_clause='', where_clause=''):
+ """Generates and runs a combined search of an mRNA expression dataset"""
+ #query = self.base_query + from_clause + " WHERE " + where_clause
+
+ from_clause = self.normalize_spaces(from_clause)
+
+ query = (self.base_query +
+ """%s
+ WHERE %s
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSetXRef.ProbeSetFreezeId = %s
+ ORDER BY ProbeSet.symbol ASC
+ """ % (self.sescape(from_clause),
+ where_clause,
+ self.sescape(str(self.dataset.id))))
+
+ return self.execute(query)
+
+ def run(self):
+ """Generates and runs a simple search of an mRNA expression dataset"""
+ where_clause = self.get_where_clause()
+ query = self.base_query + "WHERE " + where_clause + "ORDER BY ProbeSet.symbol ASC"
+ return self.execute(query)
+
+
+class PhenotypeSearch(DoSearch):
+ """A search within a phenotype dataset"""
+
+ DoSearch.search_types['Publish'] = "PhenotypeSearch"
+
+ base_query = """SELECT PublishXRef.Id,
+ CAST(Phenotype.`Pre_publication_description` AS BINARY),
+ CAST(Phenotype.`Post_publication_description` AS BINARY),
+ Publication.`Authors`,
+ Publication.`Year`,
+ Publication.`PubMed_ID`,
+ PublishXRef.`mean`,
+ PublishXRef.`LRS`,
+ PublishXRef.`additive`,
+ PublishXRef.`Locus`,
+ InbredSet.`InbredSetCode`,
+ Geno.`Chr`,
+ Geno.`Mb`
+ FROM Species
+ INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id`
+ INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId`
+ INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId`
+ LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id """
+
+ search_fields = ('Phenotype.Post_publication_description',
+ 'Phenotype.Pre_publication_description',
+ 'Phenotype.Pre_publication_abbreviation',
+ 'Phenotype.Post_publication_abbreviation',
+ 'Phenotype.Lab_code',
+ 'Publication.PubMed_ID',
+ 'Publication.Abstract',
+ 'Publication.Title',
+ 'Publication.Authors',
+ 'PublishXRef.Id')
+
+ header_fields = ['Index',
+ 'Record',
+ 'Description',
+ 'Mean',
+ 'Authors',
+ 'Year',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
+
+ def get_where_clause(self):
+ """Generate clause for WHERE portion of query"""
+
+ # Todo: Zach will figure out exactly what both these lines mean
+ # and comment here
+
+ # if "'" not in self.search_term[0]:
+ search_term = "%" + \
+ self.handle_wildcard(self.search_term[0]) + "%"
+ if "_" in self.search_term[0]:
+ if len(self.search_term[0].split("_")[0]) == 3:
+ search_term = "%" + self.handle_wildcard(
+ self.search_term[0].split("_")[1]) + "%"
+
+ # This adds a clause to the query that matches the search term
+ # against each field in the search_fields tuple
+ where_clause_list = []
+ for field in self.search_fields:
+ where_clause_list.append('''%s LIKE "%s"''' %
+ (field, search_term))
+ where_clause = "(%s) " % ' OR '.join(where_clause_list)
+
+ return where_clause
+
+ def compile_final_query(self, from_clause='', where_clause=''):
+ """Generates the final query string"""
+
+ from_clause = self.normalize_spaces(from_clause)
+
+ if self.search_term[0] == "*":
+ query = (self.base_query +
+ """%s
+ WHERE PublishXRef.InbredSetId = %s
+ and PublishXRef.PhenotypeId = Phenotype.Id
+ and PublishXRef.PublicationId = Publication.Id
+ and PublishFreeze.Id = %s
+ ORDER BY PublishXRef.Id""" % (
+ from_clause,
+ self.sescape(str(self.dataset.group.id)),
+ self.sescape(str(self.dataset.id))))
+ else:
+ query = (self.base_query +
+ """%s
+ WHERE %s
+ and PublishXRef.InbredSetId = %s
+ and PublishXRef.PhenotypeId = Phenotype.Id
+ and PublishXRef.PublicationId = Publication.Id
+ and PublishFreeze.Id = %s
+ ORDER BY PublishXRef.Id""" % (
+ from_clause,
+ where_clause,
+ self.sescape(str(self.dataset.group.id)),
+ self.sescape(str(self.dataset.id))))
+
+ return query
+
+ def run_combined(self, from_clause, where_clause):
+ """Generates and runs a combined search of an phenotype dataset"""
+ from_clause = self.normalize_spaces(from_clause)
+
+ query = (self.base_query +
+ """%s
+ WHERE %s
+ PublishXRef.InbredSetId = %s and
+ PublishXRef.PhenotypeId = Phenotype.Id and
+ PublishXRef.PublicationId = Publication.Id and
+ PublishFreeze.Id = %s""" % (
+ from_clause,
+ where_clause,
+ self.sescape(str(self.dataset.group.id)),
+ self.sescape(str(self.dataset.id))))
+
+ return self.execute(query)
+
+ def run(self):
+ """Generates and runs a simple search of a phenotype dataset"""
+
+ query = self.compile_final_query(where_clause=self.get_where_clause())
+
+ return self.execute(query)
+
+
+class GenotypeSearch(DoSearch):
+ """A search within a genotype dataset"""
+
+ DoSearch.search_types['Geno'] = "GenotypeSearch"
+
+ base_query = """SELECT Geno.Name,
+ GenoFreeze.createtime as thistable,
+ Geno.Name as Geno_Name,
+ Geno.Source2 as Geno_Source2,
+ Geno.Chr as Geno_Chr,
+ Geno.Mb as Geno_Mb
+ FROM GenoXRef, GenoFreeze, Geno """
+
+ search_fields = ('Name', 'Chr')
+
+ header_fields = ['Index',
+ 'Record',
+ 'Location']
+
+ def get_where_clause(self):
+ """Generate clause for part of the WHERE portion of query"""
+
+ # This adds a clause to the query that matches the search term
+ # against each field in search_fields (above)
+ where_clause = []
+
+ if "'" not in self.search_term[0]:
+ self.search_term = "%" + self.search_term[0] + "%"
+
+ for field in self.search_fields:
+ where_clause.append('''%s LIKE "%s"''' % ("%s.%s" % self.mescape(self.dataset.type,
+ field),
+ self.search_term))
+ where_clause = "(%s) " % ' OR '.join(where_clause)
+
+ return where_clause
+
+ def compile_final_query(self, from_clause='', where_clause=''):
+ """Generates the final query string"""
+
+ from_clause = self.normalize_spaces(from_clause)
+
+ if self.search_term[0] == "*":
+ query = (self.base_query
+ + """WHERE Geno.Id = GenoXRef.GenoId
+ and GenoXRef.GenoFreezeId = GenoFreeze.Id
+ and GenoFreeze.Id = %s""" % (self.sescape(str(self.dataset.id))))
+ else:
+ query = (self.base_query +
+ """WHERE %s
+ and Geno.Id = GenoXRef.GenoId
+ and GenoXRef.GenoFreezeId = GenoFreeze.Id
+ and GenoFreeze.Id = %s""" % (where_clause,
+ self.sescape(str(self.dataset.id))))
+
+ return query
+
+ def run(self):
+ """Generates and runs a simple search of a genotype dataset"""
+ # Todo: Zach will figure out exactly what both these lines mean
+ # and comment here
+
+ if self.search_term[0] == "*":
+ self.query = self.compile_final_query()
+ else:
+ self.query = self.compile_final_query(
+ where_clause=self.get_where_clause())
+
+ return self.execute(self.query)
+
+
+class RifSearch(MrnaAssaySearch):
+ """Searches for traits with a Gene RIF entry including the search term."""
+
+ DoSearch.search_types['ProbeSet_RIF'] = "RifSearch"
+
+ def get_from_clause(self):
+ return f" INNER JOIN GeneRIF_BASIC ON GeneRIF_BASIC.`symbol` = { self.dataset.type }.`symbol` "
+
+ def get_where_clause(self):
+ where_clause = f"(MATCH (GeneRIF_BASIC.comment) AGAINST ('+{ self.search_term[0] }' IN BOOLEAN MODE)) "
+
+ return where_clause
+
+ def run(self):
+ from_clause = self.get_from_clause()
+ where_clause = self.get_where_clause()
+
+ query = self.compile_final_query(from_clause, where_clause)
+
+ return self.execute(query)
+
+
+class WikiSearch(MrnaAssaySearch):
+ """Searches GeneWiki for traits other people have annotated"""
+
+ DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch"
+
+ def get_from_clause(self):
+ return ", GeneRIF "
+
+ def get_where_clause(self):
+ where_clause = """%s.symbol = GeneRIF.symbol
+ and GeneRIF.versionId=0 and GeneRIF.display>0
+ and (GeneRIF.comment LIKE '%s' or GeneRIF.initial = '%s')
+ """ % (self.dataset.type,
+ "%" + str(self.search_term[0]) + "%",
+ str(self.search_term[0]))
+ return where_clause
+
+ def run(self):
+ from_clause = self.get_from_clause()
+ where_clause = self.get_where_clause()
+
+ query = self.compile_final_query(from_clause, where_clause)
+
+ return self.execute(query)
+
+
+class GoSearch(MrnaAssaySearch):
+ """Searches for synapse-associated genes listed in the Gene Ontology."""
+
+ DoSearch.search_types['ProbeSet_GO'] = "GoSearch"
+
+ def get_from_clause(self):
+ from_clause = """, db_GeneOntology.term as GOterm,
+ db_GeneOntology.association as GOassociation,
+ db_GeneOntology.gene_product as GOgene_product """
+
+ return from_clause
+
+ def get_where_clause(self):
+ field = 'GOterm.acc'
+ go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:]
+
+ statements = ("""%s.symbol=GOgene_product.symbol and
+ GOassociation.gene_product_id=GOgene_product.id and
+ GOterm.id=GOassociation.term_id""" % (
+ self.sescape(self.dataset.type)))
+
+ where_clause = " %s = '%s' and %s " % (field, go_id, statements)
+
+ return where_clause
+
+ def run(self):
+ from_clause = self.get_from_clause()
+ where_clause = self.get_where_clause()
+
+ query = self.compile_final_query(from_clause, where_clause)
+
+ return self.execute(query)
+
+# ZS: Not sure what the best way to deal with LRS searches is
+
+
+class LrsSearch(DoSearch):
+ """Searches for genes with a QTL within the given LRS values
+
+ LRS searches can take 3 different forms:
+ - LRS > (or <) min/max_LRS
+ - LRS=(min_LRS max_LRS)
+ - LRS=(min_LRS max_LRS chromosome start_Mb end_Mb)
+ where min/max_LRS represent the range of LRS scores and start/end_Mb represent
+ the range in megabases on the given chromosome
+
+ """
+
+ for search_key in ('LRS', 'LOD'):
+ DoSearch.search_types[search_key] = "LrsSearch"
+
+ def get_from_clause(self):
+ converted_search_term = []
+ for value in self.search_term:
+ try:
+ converted_search_term.append(float(value))
+ except:
+ converted_search_term.append(value)
+
+ self.search_term = converted_search_term
+
+ from_clause = ""
+
+ return from_clause
+
+ def get_where_clause(self):
+ if self.search_operator == "=":
+ assert isinstance(self.search_term, (list, tuple))
+ lrs_min, lrs_max = self.search_term[:2]
+ if self.search_type == "LOD":
+ lrs_min = lrs_min * 4.61
+ lrs_max = lrs_max * 4.61
+
+ where_clause = """ %sXRef.LRS > %s and
+ %sXRef.LRS < %s """ % self.mescape(self.dataset.type,
+ min(lrs_min,
+ lrs_max),
+ self.dataset.type,
+ max(lrs_min, lrs_max))
+
+ if len(self.search_term) > 2:
+ try:
+ chr_num = int(float(self.search_term[2]))
+ except:
+ chr_num = self.search_term[2].lower().replace('chr', '')
+ self.search_term[2] = chr_num
+
+ where_clause += """ and Geno.Chr = '%s' """ % (chr_num)
+ if len(self.search_term) == 5:
+ mb_low, mb_high = self.search_term[3:]
+ where_clause += """ and Geno.Mb > %s and
+ Geno.Mb < %s
+ """ % self.mescape(min(mb_low, mb_high),
+ max(mb_low, mb_high))
+
+ where_clause += """ and %sXRef.Locus = Geno.name and
+ Geno.SpeciesId = %s
+ """ % self.mescape(self.dataset.type,
+ self.species_id)
+ else:
+ # Deal with >, <, >=, and <=
+ lrs_val = self.search_term[0]
+ if self.search_type == "LOD":
+ lrs_val = lrs_val * 4.61
+
+ where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type,
+ self.search_operator,
+ self.search_term[0])
+
+ return where_clause
+
+ def run(self):
+
+ self.from_clause = self.get_from_clause()
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(
+ self.from_clause, self.where_clause)
+
+ return self.execute(self.query)
+
+
+class MrnaLrsSearch(LrsSearch, MrnaAssaySearch):
+
+ for search_key in ('LRS', 'LOD'):
+ DoSearch.search_types['ProbeSet_' + search_key] = "MrnaLrsSearch"
+
+ def run(self):
+ self.from_clause = self.get_from_clause()
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(
+ from_clause=self.from_clause, where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch):
+
+ for search_key in ('LRS', 'LOD'):
+ DoSearch.search_types['Publish_' + search_key] = "PhenotypeLrsSearch"
+
+ def run(self):
+
+ self.from_clause = self.get_from_clause()
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(
+ from_clause=self.from_clause, where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class CisTransLrsSearch(DoSearch):
+
+ def get_where_clause(self, cis_trans):
+ self.mb_buffer = 5 # default
+ chromosome = None
+ if cis_trans == "cis":
+ the_operator = "<"
+ else:
+ the_operator = ">"
+
+ if self.search_operator == "=":
+ if len(self.search_term) == 2 or len(self.search_term) == 3:
+ self.search_term = [float(value) for value in self.search_term]
+ if len(self.search_term) == 2:
+ lrs_min, lrs_max = self.search_term
+ #[int(value) for value in self.search_term]
+ elif len(self.search_term) == 3:
+ lrs_min, lrs_max, self.mb_buffer = self.search_term
+ elif len(self.search_term) == 4:
+ lrs_min, lrs_max, self.mb_buffer = [
+ float(value) for value in self.search_term[:3]]
+ chromosome = self.search_term[3]
+ chr_str = re.match("(^c|^C)[a-z]*", chromosome)
+ if chr_str:
+ chromosome = int(chromosome.replace(chr_str.group(0), ''))
+ else:
+ SomeError
+
+ if self.search_type == "CISLOD" or self.search_type == "TRANSLOD":
+ lrs_min = lrs_min * 4.61
+ lrs_max = lrs_max * 4.61
+
+ sub_clause = """ %sXRef.LRS > %s and
+ %sXRef.LRS < %s and """ % (
+ self.sescape(self.dataset.type),
+ self.sescape(str(min(lrs_min, lrs_max))),
+ self.sescape(self.dataset.type),
+ self.sescape(str(max(lrs_min, lrs_max)))
+ )
+ else:
+ # Deal with >, <, >=, and <=
+ sub_clause = """ %sXRef.LRS %s %s and """ % (
+ self.sescape(self.dataset.type),
+ self.sescape(self.search_operator),
+ self.sescape(self.search_term[0])
+ )
+
+ if cis_trans == "cis":
+ where_clause = sub_clause + """
+ ABS(%s.Mb-Geno.Mb) %s %s and
+ %sXRef.Locus = Geno.name and
+ Geno.SpeciesId = %s and
+ %s.Chr = Geno.Chr""" % (
+ self.sescape(self.dataset.type),
+ the_operator,
+ self.sescape(str(self.mb_buffer)),
+ self.sescape(self.dataset.type),
+ self.sescape(str(self.species_id)),
+ self.sescape(self.dataset.type)
+ )
+ else:
+ if chromosome:
+ location_clause = """
+ (%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s)
+ or (%s.Chr != Geno.Chr and Geno.Chr = '%s')""" % (
+ self.sescape(self.dataset.type),
+ chromosome,
+ self.sescape(
+ self.dataset.type),
+ self.sescape(
+ self.dataset.type),
+ the_operator,
+ self.sescape(
+ str(self.mb_buffer)),
+ self.sescape(
+ self.dataset.type),
+ chromosome)
+ else:
+ location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (self.sescape(
+ self.dataset.type), the_operator, self.sescape(str(self.mb_buffer)), self.sescape(self.dataset.type), self.sescape(self.dataset.type))
+ where_clause = sub_clause + """
+ %sXRef.Locus = Geno.name and
+ Geno.SpeciesId = %s and
+ (%s)""" % (
+ self.sescape(self.dataset.type),
+ self.sescape(str(self.species_id)),
+ location_clause
+ )
+
+ return where_clause
+
+
+class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
+ """
+ Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
+
+ A cisLRS search can take 3 forms:
+ - cisLRS=(min_LRS max_LRS)
+ - cisLRS=(min_LRS max_LRS mb_buffer)
+ - cisLRS>min_LRS
+ where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around
+ a particular QTL where its eQTL would be considered "cis". If there is no third parameter,
+ mb_buffer will default to 5 megabases.
+
+ A QTL is a cis-eQTL if a gene's expression is regulated by a QTL in roughly the same area
+ (where the area is determined by the mb_buffer that the user can choose).
+
+ """
+
+ for search_key in ('LRS', 'LOD'):
+ DoSearch.search_types['ProbeSet_CIS' + search_key] = "CisLrsSearch"
+
+ def get_where_clause(self):
+ return CisTransLrsSearch.get_where_clause(self, "cis")
+
+ def run(self):
+ self.from_clause = self.get_from_clause()
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(
+ self.from_clause, self.where_clause)
+
+ return self.execute(self.query)
+
+
+class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch):
+ """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values
+
+ A transLRS search can take 3 forms:
+ - transLRS=(min_LRS max_LRS)
+ - transLRS=(min_LRS max_LRS mb_buffer)
+ - transLRS>min_LRS
+ where min/max_LRS represent the range of LRS scores and the mb_buffer is the range around
+ a particular QTL where its eQTL would be considered "cis". If there is no third parameter,
+ mb_buffer will default to 5 megabases.
+
+ A QTL is a trans-eQTL if a gene's expression is regulated by a QTL in a different location/area
+ (where the area is determined by the mb_buffer that the user can choose). Opposite of cis-eQTL.
+
+ """
+
+ for search_key in ('LRS', 'LOD'):
+ DoSearch.search_types['ProbeSet_TRANS' + search_key] = "TransLrsSearch"
+
+ def get_where_clause(self):
+ return CisTransLrsSearch.get_where_clause(self, "trans")
+
+ def run(self):
+ self.from_clause = self.get_from_clause()
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(
+ self.from_clause, self.where_clause)
+
+ return self.execute(self.query)
+
+
+class MeanSearch(MrnaAssaySearch):
+ """Searches for genes expressed within an interval (log2 units) determined by the user"""
+
+ DoSearch.search_types['ProbeSet_MEAN'] = "MeanSearch"
+
+ def get_where_clause(self):
+ self.search_term = [float(value) for value in self.search_term]
+
+ if self.search_operator == "=":
+ assert isinstance(self.search_term, (list, tuple))
+ self.mean_min, self.mean_max = self.search_term[:2]
+
+ where_clause = """ %sXRef.mean > %s and
+ %sXRef.mean < %s """ % self.mescape(self.dataset.type,
+ min(self.mean_min,
+ self.mean_max),
+ self.dataset.type,
+ max(self.mean_min, self.mean_max))
+ else:
+ # Deal with >, <, >=, and <=
+ where_clause = """ %sXRef.mean %s %s """ % self.mescape(self.dataset.type,
+ self.search_operator,
+ self.search_term[0])
+
+ return where_clause
+
+ def run(self):
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class RangeSearch(MrnaAssaySearch):
+ """Searches for genes with a range of expression varying between two values"""
+
+ DoSearch.search_types['ProbeSet_RANGE'] = "RangeSearch"
+
+ def get_where_clause(self):
+ if self.search_operator == "=":
+ assert isinstance(self.search_term, (list, tuple))
+ self.range_min, self.range_max = self.search_term[:2]
+ where_clause = """ (SELECT Pow(2, max(value) -min(value))
+ FROM ProbeSetData
+ WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s AND
+ (SELECT Pow(2, max(value) -min(value))
+ FROM ProbeSetData
+ WHERE ProbeSetData.Id = ProbeSetXRef.dataId) < %s
+ """ % self.mescape(min(self.range_min, self.range_max),
+ max(self.range_min, self.range_max))
+ else:
+ # Deal with >, <, >=, and <=
+ where_clause = """ (SELECT Pow(2, max(value) -min(value))
+ FROM ProbeSetData
+ WHERE ProbeSetData.Id = ProbeSetXRef.dataId) > %s
+ """ % (self.sescape(self.search_term[0]))
+ return where_clause
+
+ def run(self):
+ self.where_clause = self.get_where_clause()
+
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class PositionSearch(DoSearch):
+ """Searches for genes/markers located within a specified range on a specified chromosome"""
+
+ for search_key in ('POSITION', 'POS', 'MB'):
+ DoSearch.search_types[search_key] = "PositionSearch"
+
+ def get_where_clause(self):
+ self.search_term = [float(value) if is_number(
+ value) else value for value in self.search_term]
+ chr, self.mb_min, self.mb_max = self.search_term[:3]
+ self.chr = str(chr).lower()
+ self.get_chr()
+
+ where_clause = """ %s.Chr = '%s' and
+ %s.Mb > %s and
+ %s.Mb < %s """ % self.mescape(self.dataset.type,
+ self.chr,
+ self.dataset.type,
+ min(self.mb_min,
+ self.mb_max),
+ self.dataset.type,
+ max(self.mb_min, self.mb_max))
+
+ return where_clause
+
+ def get_chr(self):
+ try:
+ self.chr = int(float(self.chr))
+ except:
+ self.chr = self.chr.lower().replace('chr', '')
+
+ def run(self):
+
+ self.get_where_clause()
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class MrnaPositionSearch(PositionSearch, MrnaAssaySearch):
+ """Searches for genes located within a specified range on a specified chromosome"""
+
+ for search_key in ('POSITION', 'POS', 'MB'):
+ DoSearch.search_types['ProbeSet_' + search_key] = "MrnaPositionSearch"
+
+ def run(self):
+
+ self.where_clause = self.get_where_clause()
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class GenotypePositionSearch(PositionSearch, GenotypeSearch):
+ """Searches for genes located within a specified range on a specified chromosome"""
+
+ for search_key in ('POSITION', 'POS', 'MB'):
+ DoSearch.search_types['Geno_' + search_key] = "GenotypePositionSearch"
+
+ def run(self):
+
+ self.where_clause = self.get_where_clause()
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+class PvalueSearch(MrnaAssaySearch):
+ """Searches for traits with a permutationed p-value between low and high"""
+
+ DoSearch.search_types['ProbeSet_PVALUE'] = "PvalueSearch"
+
+ def run(self):
+
+ self.search_term = [float(value) for value in self.search_term]
+
+ if self.search_operator == "=":
+ assert isinstance(self.search_term, (list, tuple))
+ self.pvalue_min, self.pvalue_max = self.search_term[:2]
+ self.where_clause = """ %sXRef.pValue > %s and %sXRef.pValue < %s
+ """ % self.mescape(
+ self.dataset.type,
+ min(self.pvalue_min, self.pvalue_max),
+ self.dataset.type,
+ max(self.pvalue_min, self.pvalue_max))
+ else:
+ # Deal with >, <, >=, and <=
+ self.where_clause = """ %sXRef.pValue %s %s
+ """ % self.mescape(
+ self.dataset.type,
+ self.search_operator,
+ self.search_term[0])
+
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+ return self.execute(self.query)
+
+
+class AuthorSearch(PhenotypeSearch):
+ """Searches for phenotype traits with specified author(s)"""
+
+ DoSearch.search_types["Publish_NAME"] = "AuthorSearch"
+
+ def run(self):
+ search_term = "%" + self.search_term[0] + "%"
+ self.where_clause = """ Publication.Authors LIKE "%s" and
+ """ % (search_term)
+
+ self.query = self.compile_final_query(where_clause=self.where_clause)
+
+ return self.execute(self.query)
+
+
+def is_number(s):
+ try:
+ float(s)
+ return True
+ except ValueError:
+ return False
+
+
+if __name__ == "__main__":
+ # Usually this will be used as a library, but call it from the command line for testing
+ # And it runs the code below
+ import sys
+
+ from gn2.base import webqtlConfig
+ from gn2.base.data_set import create_dataset
+ from gn2.utility import webqtlUtil
+ from gn2.db import webqtlDatabaseFunction
+
+ from gn2.wqflask.database import database_connection
+
+ with database_connection(get_setting("SQL_URI")) as db_conn:
+ with db_conn.cursor() as cursor:
+ dataset_name = "HC_M2_0606_P"
+ dataset = create_dataset(db_conn, dataset_name)
+
+ results = PvalueSearch(['0.005'], '<', dataset, cursor, db_conn).run()