diff options
Diffstat (limited to 'gn3/base/mrna_assay_tissue_data.py')
-rw-r--r-- | gn3/base/mrna_assay_tissue_data.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/gn3/base/mrna_assay_tissue_data.py b/gn3/base/mrna_assay_tissue_data.py new file mode 100644 index 0000000..0f51ade --- /dev/null +++ b/gn3/base/mrna_assay_tissue_data.py @@ -0,0 +1,94 @@ + +# pylint: disable-all +import collections + +from flask import g + +from gn3.utility.db_tools import create_in_clause +from gn3.utility.db_tools import escape +from gn3.utility.bunch import Bunch + + +# from utility.logger import getLogger +# logger = getLogger(__name__ ) + +class MrnaAssayTissueData(object): + + def __init__(self, gene_symbols=None): + self.gene_symbols = gene_symbols + if self.gene_symbols == None: + self.gene_symbols = [] + + self.data = collections.defaultdict(Bunch) + + query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description + from ( + select Symbol, max(Mean) as maxmean + from TissueProbeSetXRef + where TissueProbeSetFreezeId=1 and ''' + + # Note that inner join is necessary in this query to get distinct record in one symbol group + # with highest mean value + # Due to the limit size of TissueProbeSetFreezeId table in DB, + # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) + if len(gene_symbols) == 0: + query += '''Symbol!='' and Symbol Is Not Null group by Symbol) + as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol + and t.Mean = x.maxmean; + ''' + else: + in_clause = create_in_clause(gene_symbols) + + # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower + query += ''' Symbol in {} group by Symbol) + as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol + and t.Mean = x.maxmean; + '''.format(in_clause) + + results = g.db.execute(query).fetchall() + + lower_symbols = [] + for gene_symbol in gene_symbols: + if gene_symbol != None: + lower_symbols.append(gene_symbol.lower()) + + for result in results: + symbol = result[0] + if symbol.lower() in lower_symbols: + symbol = symbol.lower() + + self.data[symbol].gene_id = result.GeneId + self.data[symbol].data_id = result.DataId + self.data[symbol].chr = result.Chr + self.data[symbol].mb = result.Mb + self.data[symbol].description = result.description + self.data[symbol].probe_target_description = result.Probe_Target_Description + + ########################################################################### + # Input: cursor, symbolList (list), dataIdDict(Dict) + # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, + # key is symbol, value is one list of expression values of one probeSet; + # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). + # Attention! All keys are lower case! + ########################################################################### + + def get_symbol_values_pairs(self): + id_list = [self.data[symbol].data_id for symbol in self.data] + + symbol_values_dict = {} + + if len(id_list) > 0: + query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value + FROM TissueProbeSetXRef, TissueProbeSetData + WHERE TissueProbeSetData.Id IN {} and + TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(create_in_clause(id_list)) + + results = g.db.execute(query).fetchall() + for result in results: + if result.Symbol.lower() not in symbol_values_dict: + symbol_values_dict[result.Symbol.lower()] = [result.value] + else: + symbol_values_dict[result.Symbol.lower()].append( + result.value) + + return symbol_values_dict |