diff options
Diffstat (limited to 'wqflask/base/mrna_assay_tissue_data.py')
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 6fec5dcd..8f8e2b0a 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, print_function, division - import collections from flask import g @@ -7,14 +5,15 @@ from flask import g from utility import db_tools from utility import Bunch -from MySQLdb import escape_string as escape +from utility.db_tools import escape +from gn3.db_utils import database_connector -from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + -class MrnaAssayTissueData(object): +class MrnaAssayTissueData: def __init__(self, gene_symbols=None): self.gene_symbols = gene_symbols @@ -23,7 +22,7 @@ class MrnaAssayTissueData(object): self.data = collections.defaultdict(Bunch) - query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description + query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description from ( select Symbol, max(Mean) as maxmean from TissueProbeSetXRef @@ -34,29 +33,31 @@ class MrnaAssayTissueData(object): # Due to the limit size of TissueProbeSetFreezeId table in DB, # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) if len(gene_symbols) == 0: - query += '''Symbol!='' and Symbol Is Not Null group by Symbol) + query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; ''' else: in_clause = db_tools.create_in_clause(gene_symbols) - #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower + # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; '''.format(in_clause) - results = g.db.execute(query).fetchall() - lower_symbols = [] + # lower_symbols = [] + lower_symbols = {} for gene_symbol in gene_symbols: + # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: - lower_symbols.append(gene_symbol.lower()) - + lower_symbols[gene_symbol.lower()] = True + results = list(g.db.execute(query).fetchall()) for result in results: symbol = result[0] - if symbol.lower() in lower_symbols: + if symbol is not None and lower_symbols.get(symbol.lower()): + symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId @@ -67,16 +68,16 @@ class MrnaAssayTissueData(object): self.data[symbol].probe_target_description = result.Probe_Target_Description ########################################################################### - #Input: cursor, symbolList (list), dataIdDict(Dict) - #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, + # Input: cursor, symbolList (list), dataIdDict(Dict) + # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, # key is symbol, value is one list of expression values of one probeSet; - #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). - #Attention! All keys are lower case! + # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). + # Attention! All keys are lower case! ########################################################################### def get_symbol_values_pairs(self): id_list = [self.data[symbol].data_id for symbol in self.data] - + symbol_values_dict = {} if len(id_list) > 0: @@ -85,11 +86,13 @@ class MrnaAssayTissueData(object): WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: symbol_values_dict[result.Symbol.lower()] = [result.value] else: - symbol_values_dict[result.Symbol.lower()].append(result.value) + symbol_values_dict[result.Symbol.lower()].append( + result.value) - return symbol_values_dict
\ No newline at end of file + return symbol_values_dict |