1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
from __future__ import absolute_import, print_function, division
import collections
from flask import g
from utility import db_tools
from utility import Bunch
from MySQLdb import escape_string as escape
class MrnaAssayTissueData(object):
def __init__(self, gene_symbols=None):
self.gene_symbols = gene_symbols
self.have_data = False
if self.gene_symbols == None:
self.gene_symbols = []
self.data = collections.defaultdict(Bunch)
#self.gene_id_dict ={}
#self.data_id_dict = {}
#self.chr_dict = {}
#self.mb_dict = {}
#self.desc_dict = {}
#self.probe_target_desc_dict = {}
query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
from (
select Symbol, max(Mean) as maxmean
from TissueProbeSetXRef
where TissueProbeSetFreezeId=1 and '''
# Note that inner join is necessary in this query to get distinct record in one symbol group
# with highest mean value
# Due to the limit size of TissueProbeSetFreezeId table in DB,
# performance of inner join is acceptable.
if len(gene_symbols) == 0:
query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''
else:
in_clause = dbtools.create_in_clause(gene_symbols)
query += ''' Symbol in {} group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''.format(in_clause)
results = g.db.execute(query).fetchall()
for result in results:
symbol = item[0]
gene_symbols.append(symbol)
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
self.data[symbol].data_id = result.DataId
self.data[symbol].chr = result.Chr
self.data[symbol].mb = result.Mb
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
#output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
# key is symbol, value is one list of expression values of one probeSet;
#function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
#Attention! All keys are lower case!
###########################################################################
def get_symbol_value_pairs(self):
id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
symbol_value_pairs = {}
value_list=[]
query = """SELECT value, id
FROM TissueProbeSetData
WHERE Id IN {}""".format(create_in_clause(id_list))
try :
results = g.db.execute(query).fetchall()
for result in results:
value_list.append(result.value)
symbol_value_pairs[symbol] = value_list
except:
symbol_value_pairs[symbol] = None
#for symbol in symbol_list:
# if tissue_data.has_key(symbol):
# data_id = tissue_data[symbol].data_id
#
# query = """select value, id
# from TissueProbeSetData
# where Id={}""".format(escape(data_id))
# try :
# results = g.db.execute(query).fetchall()
# for item in results:
# item = item[0]
# value_list.append(item)
# symbol_value_pairs[symbol] = value_list
# value_list=[]
# except:
# symbol_value_pairs[symbol] = None
return symbol_value_pairs
########################################################################################################
#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
# key is symbol, value is one list of expression values of one probeSet.
#function: wrapper function for getSymbolValuePairDict function
# build gene symbol list if necessary, cut it into small lists if necessary,
# then call getSymbolValuePairDict function and merge the results.
########################################################################################################
def get_trait_symbol_and_tissue_values(symbol_list=None):
tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
#symbolList,
#geneIdDict,
#dataIdDict,
#ChrDict,
#MbDict,
#descDict,
#pTargetDescDict = getTissueProbeSetXRefInfo(
# GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
if len(tissue_data.gene_symbols):
return get_symbol_value_pairs(tissue_data)
|