diff options
Diffstat (limited to 'wqflask/wqflask/do_search.py')
-rw-r--r-- | wqflask/wqflask/do_search.py | 399 |
1 files changed, 227 insertions, 172 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index b0ca5ced..6b8dfa41 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -1,16 +1,13 @@ -from __future__ import print_function, division - import string import requests import json from flask import Flask, g -from MySQLdb import escape_string as escape +from utility.db_tools import escape from pprint import pformat as pf import sys -# sys.path.append("..") Never in a running webserver from db import webqtlDatabaseFunction from utility.tools import GN2_BASE_URL @@ -19,7 +16,8 @@ import logging from utility.logger import getLogger logger = getLogger(__name__) -class DoSearch(object): + +class DoSearch: """Parent class containing parameters/functions used for all searches""" # Used to translate search phrases into classes @@ -28,17 +26,16 @@ class DoSearch(object): def __init__(self, search_term, search_operator=None, dataset=None, search_type=None): self.search_term = search_term # Make sure search_operator is something we expect - assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" + assert search_operator in ( + None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset self.search_type = search_type if self.dataset: - logger.debug("self.dataset is boo: ", type(self.dataset), pf(self.dataset)) - logger.debug("self.dataset.group is: ", pf(self.dataset.group)) - #Get group information for dataset and the species id - - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) + # Get group information for dataset and the species id + self.species_id = webqtlDatabaseFunction.retrieve_species_id( + self.dataset.group.name) def execute(self, query): """Executes query and returns results""" @@ -49,15 +46,11 @@ class DoSearch(object): def handle_wildcard(self, str): keyword = str.strip() - keyword = keyword.replace("*",".*") - keyword = keyword.replace("?",".") + keyword = keyword.replace("*", ".*") + keyword = keyword.replace("?", ".") return keyword - #def escape(self, stringy): - # """Shorter name than self.db_conn.escape_string""" - # return escape(str(stringy)) - def mescape(self, *items): """Multiple escape""" escaped = [escape(str(item)) for item in items] @@ -71,8 +64,6 @@ class DoSearch(object): @classmethod def get_search(cls, search_type): - logger.debug("search_types are:", pf(cls.search_types)) - search_type_string = search_type['dataset_type'] if 'key' in search_type and search_type['key'] != None: search_type_string += '_' + search_type['key'] @@ -84,21 +75,37 @@ class DoSearch(object): else: return None + class MrnaAssaySearch(DoSearch): """A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites""" DoSearch.search_types['ProbeSet'] = "MrnaAssaySearch" - base_query = """SELECT distinct ProbeSet.Name as TNAME, - 0 as thistable, - ProbeSetXRef.Mean as TMEAN, - ProbeSetXRef.LRS as TLRS, - ProbeSetXRef.PVALUE as TPVALUE, - ProbeSet.Chr_num as TCHR_NUM, - ProbeSet.Mb as TMB, - ProbeSet.Symbol as TSYMBOL, - ProbeSet.name_num as TNAME_NUM - FROM ProbeSetXRef, ProbeSet """ + base_query = """ + SELECT + ProbeSetFreeze.`Name`, + ProbeSetFreeze.`FullName`, + ProbeSet.`Name`, + ProbeSet.`Symbol`, + CAST(ProbeSet.`description` AS BINARY), + CAST(ProbeSet.`Probe_Target_Description` AS BINARY), + ProbeSet.`Chr`, + ProbeSet.`Mb`, + ProbeSetXRef.`Mean`, + ProbeSetXRef.`LRS`, + ProbeSetXRef.`Locus`, + ProbeSetXRef.`pValue`, + ProbeSetXRef.`additive`, + Geno.`Chr` as geno_chr, + Geno.`Mb` as geno_mb + FROM Species + INNER JOIN InbredSet ON InbredSet.`SpeciesId`= Species.`Id` + INNER JOIN ProbeFreeze ON ProbeFreeze.`InbredSetId` = InbredSet.`Id` + INNER JOIN Tissue ON ProbeFreeze.`TissueId` = Tissue.`Id` + INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.`ProbeFreezeId` = ProbeFreeze.`Id` + INNER JOIN ProbeSetXRef ON ProbeSetXRef.`ProbeSetFreezeId` = ProbeSetFreeze.`Id` + INNER JOIN ProbeSet ON ProbeSet.`Id` = ProbeSetXRef.`ProbeSetId` + LEFT JOIN Geno ON ProbeSetXRef.`Locus` = Geno.`Name` AND Geno.`SpeciesId` = Species.`Id` """ header_fields = ['Index', 'Record', @@ -114,12 +121,13 @@ class MrnaAssaySearch(DoSearch): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": - match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string) + match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % ( + search_string) else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) @@ -141,30 +149,30 @@ class MrnaAssaySearch(DoSearch): else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return query - def run_combined(self, from_clause = '', where_clause = ''): + def run_combined(self, from_clause='', where_clause=''): """Generates and runs a combined search of an mRNA expression dataset""" logger.debug("Running ProbeSetSearch") @@ -173,14 +181,14 @@ class MrnaAssaySearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return self.execute(query) @@ -200,21 +208,36 @@ class PhenotypeSearch(DoSearch): DoSearch.search_types['Publish'] = "PhenotypeSearch" base_query = """SELECT PublishXRef.Id, - PublishFreeze.createtime as thistable, - Publication.PubMed_ID as Publication_PubMed_ID, - Phenotype.Post_publication_description as Phenotype_Name - FROM Phenotype, PublishFreeze, Publication, PublishXRef """ + CAST(Phenotype.`Pre_publication_description` AS BINARY), + CAST(Phenotype.`Post_publication_description` AS BINARY), + Publication.`Authors`, + Publication.`Year`, + Publication.`PubMed_ID`, + PublishXRef.`mean`, + PublishXRef.`LRS`, + PublishXRef.`additive`, + PublishXRef.`Locus`, + InbredSet.`InbredSetCode`, + Geno.`Chr`, + Geno.`Mb` + FROM Species + INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id` + INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id` + INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id` + INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId` + INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId` + LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id """ search_fields = ('Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors', - 'PublishXRef.Id') + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors', + 'PublishXRef.Id') header_fields = ['Index', 'Record', @@ -229,53 +252,56 @@ class PhenotypeSearch(DoSearch): def get_where_clause(self): """Generate clause for WHERE portion of query""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here - #if "'" not in self.search_term[0]: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + # if "'" not in self.search_term[0]: + search_term = "[[:<:]]" + \ + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" if "_" in self.search_term[0]: if len(self.search_term[0].split("_")[0]) == 3: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]" + search_term = "[[:<:]]" + self.handle_wildcard( + self.search_term[0].split("_")[1]) + "[[:>:]]" # This adds a clause to the query that matches the search term # against each field in the search_fields tuple where_clause_list = [] for field in self.search_fields: - where_clause_list.append('''%s REGEXP "%s"''' % (field, search_term)) + where_clause_list.append('''%s REGEXP "%s"''' % + (field, search_term)) where_clause = "(%s) " % ' OR '.join(where_clause_list) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) if self.search_term[0] == "*": query = (self.base_query + - """%s + """%s WHERE PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) else: query = (self.base_query + - """%s + """%s WHERE %s and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return query @@ -287,26 +313,27 @@ class PhenotypeSearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return self.execute(query) def run(self): """Generates and runs a simple search of a phenotype dataset""" - query = self.compile_final_query(where_clause = self.get_where_clause()) + query = self.compile_final_query(where_clause=self.get_where_clause()) return self.execute(query) + class GenotypeSearch(DoSearch): """A search within a genotype dataset""" @@ -339,57 +366,56 @@ class GenotypeSearch(DoSearch): for field in self.search_fields: where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % self.mescape(self.dataset.type, field), - self.search_term)) + self.search_term)) logger.debug("hello ;where_clause is:", pf(where_clause)) where_clause = "(%s) " % ' OR '.join(where_clause) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) - if self.search_term[0] == "*": - query = (self.base_query + - """WHERE Geno.Id = GenoXRef.GenoId + query = (self.base_query + + """WHERE Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (escape(str(self.dataset.id)))) else: query = (self.base_query + - """WHERE %s + """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (where_clause, - escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (where_clause, + escape(str(self.dataset.id)))) return query def run(self): """Generates and runs a simple search of a genotype dataset""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here if self.search_term[0] == "*": self.query = self.compile_final_query() else: - self.query = self.compile_final_query(where_clause = self.get_where_clause()) + self.query = self.compile_final_query( + where_clause=self.get_where_clause()) return self.execute(self.query) + class RifSearch(MrnaAssaySearch): """Searches for traits with a Gene RIF entry including the search term.""" DoSearch.search_types['ProbeSet_RIF'] = "RifSearch" def get_from_clause(self): - return ", GeneRIF_BASIC " + return f" INNER JOIN GeneRIF_BASIC ON GeneRIF_BASIC.`symbol` = { self.dataset.type }.`symbol` " def get_where_clause(self): - where_clause = """( %s.symbol = GeneRIF_BASIC.symbol and - MATCH (GeneRIF_BASIC.comment) - AGAINST ('+%s' IN BOOLEAN MODE)) """ % (self.dataset.type, self.search_term[0]) + where_clause = f"(MATCH (GeneRIF_BASIC.comment) AGAINST ('+{ self.search_term[0] }' IN BOOLEAN MODE)) " return where_clause @@ -401,10 +427,11 @@ class RifSearch(MrnaAssaySearch): return self.execute(query) + class WikiSearch(MrnaAssaySearch): """Searches GeneWiki for traits other people have annotated""" - DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" + DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" def get_from_clause(self): return ", GeneRIF " @@ -414,7 +441,7 @@ class WikiSearch(MrnaAssaySearch): and GeneRIF.versionId=0 and GeneRIF.display>0 and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s') """ % (self.dataset.type, - "[[:<:]]"+str(self.search_term[0])+"[[:>:]]", + "[[:<:]]" + str(self.search_term[0]) + "[[:>:]]", str(self.search_term[0])) return where_clause @@ -426,10 +453,11 @@ class WikiSearch(MrnaAssaySearch): return self.execute(query) + class GoSearch(MrnaAssaySearch): """Searches for synapse-associated genes listed in the Gene Ontology.""" - DoSearch.search_types['ProbeSet_GO'] = "GoSearch" + DoSearch.search_types['ProbeSet_GO'] = "GoSearch" def get_from_clause(self): from_clause = """, db_GeneOntology.term as GOterm, @@ -440,7 +468,7 @@ class GoSearch(MrnaAssaySearch): def get_where_clause(self): field = 'GOterm.acc' - go_id = 'GO:' + ('0000000'+self.search_term[0])[-7:] + go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:] statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and @@ -459,7 +487,9 @@ class GoSearch(MrnaAssaySearch): return self.execute(query) -#ZS: Not sure what the best way to deal with LRS searches is +# ZS: Not sure what the best way to deal with LRS searches is + + class LrsSearch(DoSearch): """Searches for genes with a QTL within the given LRS values @@ -497,17 +527,18 @@ class LrsSearch(DoSearch): assert isinstance(self.search_term, (list, tuple)) lrs_min, lrs_max = self.search_term[:2] if self.search_type == "LOD": - lrs_min = lrs_min*4.61 - lrs_max = lrs_max*4.61 + lrs_min = lrs_min * 4.61 + lrs_max = lrs_max * 4.61 where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s """ % self.mescape(self.dataset.type, - min(lrs_min, lrs_max), + min(lrs_min, + lrs_max), self.dataset.type, max(lrs_min, lrs_max)) if len(self.search_term) > 2: - #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats + # If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats chr_num = self.search_term[2] if "chr" in self.search_term[2].lower(): chr_num = self.search_term[2].lower().replace("chr", "") @@ -523,27 +554,27 @@ class LrsSearch(DoSearch): where_clause += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s """ % self.mescape(self.dataset.type, - self.species_id) + self.species_id) else: # Deal with >, <, >=, and <= logger.debug("self.search_term is:", self.search_term) lrs_val = self.search_term[0] if self.search_type == "LOD": - lrs_val = lrs_val*4.61 + lrs_val = lrs_val * 4.61 where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause - def run(self): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -557,10 +588,12 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) + class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): for search_key in ('LRS', 'LOD'): @@ -571,7 +604,8 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) @@ -598,7 +632,8 @@ class CisTransLrsSearch(DoSearch): elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term elif len(self.search_term) == 4: - lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]] + lrs_min, lrs_max, self.mb_buffer = [ + float(value) for value in self.search_term[:3]] chromosome = self.search_term[3] if "Chr" in chromosome or "chr" in chromosome: chromosome = int(chromosome[3:]) @@ -610,19 +645,19 @@ class CisTransLrsSearch(DoSearch): lrs_max = lrs_max * 4.61 sub_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and """ % ( - escape(self.dataset.type), - escape(str(min(lrs_min, lrs_max))), - escape(self.dataset.type), - escape(str(max(lrs_min, lrs_max))) - ) + %sXRef.LRS < %s and """ % ( + escape(self.dataset.type), + escape(str(min(lrs_min, lrs_max))), + escape(self.dataset.type), + escape(str(max(lrs_min, lrs_max))) + ) else: # Deal with >, <, >=, and <= - sub_clause = """ %sXRef.LRS %s %s and """ % ( - escape(self.dataset.type), - escape(self.search_operator), - escape(self.search_term[0]) - ) + sub_clause = """ %sXRef.LRS %s %s and """ % ( + escape(self.dataset.type), + escape(self.search_operator), + escape(self.search_term[0]) + ) if cis_trans == "cis": where_clause = sub_clause + """ @@ -630,36 +665,42 @@ class CisTransLrsSearch(DoSearch): %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - escape(str(self.species_id)), - escape(self.dataset.type) - ) + escape(self.dataset.type), + the_operator, + escape(str(self.mb_buffer)), + escape(self.dataset.type), + escape(str(self.species_id)), + escape(self.dataset.type) + ) else: if chromosome: location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type), - chromosome, - escape(self.dataset.type), - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - chromosome) + chromosome, + escape( + self.dataset.type), + escape( + self.dataset.type), + the_operator, + escape( + str(self.mb_buffer)), + escape( + self.dataset.type), + chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape( + self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s)""" % ( - escape(self.dataset.type), - escape(str(self.species_id)), - location_clause - ) + escape(self.dataset.type), + escape(str(self.species_id)), + location_clause + ) return where_clause + class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -678,7 +719,7 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch" + DoSearch.search_types['ProbeSet_CIS' + search_key] = "CisLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "cis") @@ -687,10 +728,12 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) + class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -708,7 +751,7 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch" + DoSearch.search_types['ProbeSet_TRANS' + search_key] = "TransLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "trans") @@ -717,7 +760,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -736,14 +780,15 @@ class MeanSearch(MrnaAssaySearch): where_clause = """ %sXRef.mean > %s and %sXRef.mean < %s """ % self.mescape(self.dataset.type, - min(self.mean_min, self.mean_max), - self.dataset.type, - max(self.mean_min, self.mean_max)) + min(self.mean_min, + self.mean_max), + self.dataset.type, + max(self.mean_min, self.mean_max)) else: # Deal with >, <, >=, and <= where_clause = """ %sXRef.mean %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause @@ -751,10 +796,11 @@ class MeanSearch(MrnaAssaySearch): self.where_clause = self.get_where_clause() logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class RangeSearch(MrnaAssaySearch): """Searches for genes with a range of expression varying between two values""" @@ -786,10 +832,11 @@ class RangeSearch(MrnaAssaySearch): def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PositionSearch(DoSearch): """Searches for genes/markers located within a specified range on a specified chromosome""" @@ -797,7 +844,8 @@ class PositionSearch(DoSearch): DoSearch.search_types[search_key] = "PositionSearch" def get_where_clause(self): - self.search_term = [float(value) if is_number(value) else value for value in self.search_term] + self.search_term = [float(value) if is_number( + value) else value for value in self.search_term] chr, self.mb_min, self.mb_max = self.search_term[:3] self.chr = str(chr).lower() self.get_chr() @@ -807,11 +855,11 @@ class PositionSearch(DoSearch): %s.Mb < %s """ % self.mescape(self.dataset.type, self.chr, self.dataset.type, - min(self.mb_min, self.mb_max), + min(self.mb_min, + self.mb_max), self.dataset.type, max(self.mb_min, self.mb_max)) - return where_clause def get_chr(self): @@ -826,36 +874,39 @@ class PositionSearch(DoSearch): def run(self): self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class MrnaPositionSearch(PositionSearch, MrnaAssaySearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['ProbeSet_'+search_key] = "MrnaPositionSearch" + DoSearch.search_types['ProbeSet_' + search_key] = "MrnaPositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class GenotypePositionSearch(PositionSearch, GenotypeSearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['Geno_'+search_key] = "GenotypePositionSearch" + DoSearch.search_types['Geno_' + search_key] = "GenotypePositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PvalueSearch(MrnaAssaySearch): """Searches for traits with a permutationed p-value between low and high""" @@ -870,25 +921,26 @@ class PvalueSearch(MrnaAssaySearch): self.pvalue_min, self.pvalue_max = self.search_term[:2] self.where_clause = """ %sXRef.pValue > %s and %sXRef.pValue < %s """ % self.mescape( - self.dataset.type, - min(self.pvalue_min, self.pvalue_max), - self.dataset.type, - max(self.pvalue_min, self.pvalue_max)) + self.dataset.type, + min(self.pvalue_min, self.pvalue_max), + self.dataset.type, + max(self.pvalue_min, self.pvalue_max)) else: # Deal with >, <, >=, and <= self.where_clause = """ %sXRef.pValue %s %s """ % self.mescape( - self.dataset.type, - self.search_operator, - self.search_term[0]) + self.dataset.type, + self.search_operator, + self.search_term[0]) logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) logger.sql(self.query) return self.execute(self.query) + class AuthorSearch(PhenotypeSearch): """Searches for phenotype traits with specified author(s)""" @@ -899,7 +951,7 @@ class AuthorSearch(PhenotypeSearch): self.where_clause = """ Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" and """ % (self.search_term[0]) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) @@ -911,6 +963,7 @@ def is_number(s): except ValueError: return False + def get_aliases(symbol, species): if species == "mouse": symbol_string = symbol.capitalize() @@ -920,7 +973,8 @@ def get_aliases(symbol, species): return [] filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) if response: alias_list = json.loads(response.content) @@ -934,9 +988,10 @@ def get_aliases(symbol, species): return filtered_aliases + if __name__ == "__main__": - ### Usually this will be used as a library, but call it from the command line for testing - ### And it runs the code below + # Usually this will be used as a library, but call it from the command line for testing + # And it runs the code below import MySQLdb import sys |