diff options
Diffstat (limited to 'wqflask/base/mrna_assay_tissue_data.py')
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py new file mode 100644 index 00000000..8ae71858 --- /dev/null +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -0,0 +1,134 @@ +from __future__ import absolute_import, print_function, division + +import collections + +from flask import g + +from utility import dbtools +from uitility import Bunch + +from MySQLdb import escape_string as escape + +class MrnaAssayTissueData(object): + + def __init__(self, gene_symbols=None): + self.gene_symbols = gene_symbols + self.have_data = False + if self.gene_symbols == None: + self.gene_symbols = [] + + self.data = collections.defaultdict(Bunch) + + #self.gene_id_dict ={} + #self.data_id_dict = {} + #self.chr_dict = {} + #self.mb_dict = {} + #self.desc_dict = {} + #self.probe_target_desc_dict = {} + + query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description + from ( + select Symbol, max(Mean) as maxmean + from TissueProbeSetXRef + where TissueProbeSetFreezeId=1 and ''' + + # Note that inner join is necessary in this query to get distinct record in one symbol group + # with highest mean value + # Due to the limit size of TissueProbeSetFreezeId table in DB, + # performance of inner join is acceptable. + if len(gene_symbols) == 0: + query += '''Symbol!='' and Symbol Is Not Null group by Symbol) + as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol + and t.Mean = x.maxmean; + ''' + else: + in_clause = dbtools.create_in_clause(gene_symbols) + + query += ''' Symbol in {} group by Symbol) + as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol + and t.Mean = x.maxmean; + '''.format(in_clause) + + results = g.db.execute(query).fetchall() + for result in results: + symbol = item[0] + gene_symbols.append(symbol) + symbol = symbol.lower() + + self.data[symbol].gene_id = result.GeneId + self.data[symbol].data_id = result.DataId + self.data[symbol].chr = result.Chr + self.data[symbol].mb = result.Mb + self.data[symbol].description = result.description + self.data[symbol].probe_target_description = result.Probe_Target_Description + + + ########################################################################### + #Input: cursor, symbolList (list), dataIdDict(Dict) + #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, + # key is symbol, value is one list of expression values of one probeSet; + #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). + #Attention! All keys are lower case! + ########################################################################### + def get_symbol_value_pairs(self): + + id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data] + + symbol_value_pairs = {} + value_list=[] + + query = """SELECT value, id + FROM TissueProbeSetData + WHERE Id IN {}""".format(create_in_clause(id_list)) + + try : + results = g.db.execute(query).fetchall() + for result in results: + value_list.append(result.value) + symbol_value_pairs[symbol] = value_list + except: + symbol_value_pairs[symbol] = None + + #for symbol in symbol_list: + # if tissue_data.has_key(symbol): + # data_id = tissue_data[symbol].data_id + # + # query = """select value, id + # from TissueProbeSetData + # where Id={}""".format(escape(data_id)) + # try : + # results = g.db.execute(query).fetchall() + # for item in results: + # item = item[0] + # value_list.append(item) + # symbol_value_pairs[symbol] = value_list + # value_list=[] + # except: + # symbol_value_pairs[symbol] = None + + return symbol_value_pairs + + ######################################################################################################## + #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol + #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. + # key is symbol, value is one list of expression values of one probeSet. + #function: wrapper function for getSymbolValuePairDict function + # build gene symbol list if necessary, cut it into small lists if necessary, + # then call getSymbolValuePairDict function and merge the results. + ######################################################################################################## + + def get_trait_symbol_and_tissue_values(symbol_list=None): + tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) + + #symbolList, + #geneIdDict, + #dataIdDict, + #ChrDict, + #MbDict, + #descDict, + #pTargetDescDict = getTissueProbeSetXRefInfo( + # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId) + + if len(tissue_data.gene_symbols): + return get_symbol_value_pairs(tissue_data) + |