1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
import collections
from utility import Bunch
class MrnaAssayTissueData:
def __init__(self, conn, gene_symbols=None):
self.gene_symbols = gene_symbols
self.conn = conn
if self.gene_symbols is None:
self.gene_symbols = []
self.data = collections.defaultdict(Bunch)
results = ()
# Note that inner join is necessary in this query to get
# distinct record in one symbol group with highest mean value
# Due to the limit size of TissueProbeSetFreezeId table in DB,
# performance of inner join is
# acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
with conn.cursor() as cursor:
if len(self.gene_symbols) == 0:
cursor.execute(
"SELECT t.Symbol, t.GeneId, t.DataId, "
"t.Chr, t.Mb, t.description, "
"t.Probe_Target_Description FROM (SELECT Symbol, "
"max(Mean) AS maxmean "
"FROM TissueProbeSetXRef WHERE "
"TissueProbeSetFreezeId=1 AND "
"Symbol != '' AND Symbol IS NOT "
"Null GROUP BY Symbol) "
"AS x INNER JOIN "
"TissueProbeSetXRef AS t ON "
"t.Symbol = x.Symbol "
"AND t.Mean = x.maxmean")
else:
cursor.execute(
"SELECT t.Symbol, t.GeneId, t.DataId, "
"t.Chr, t.Mb, t.description, "
"t.Probe_Target_Description FROM (SELECT Symbol, "
"max(Mean) AS maxmean "
"FROM TissueProbeSetXRef WHERE "
"TissueProbeSetFreezeId=1 AND "
"Symbol IN "
f"({', '.join(['%s'] * len(self.gene_symbols))}) "
"GROUP BY Symbol) AS x INNER JOIN "
"TissueProbeSetXRef AS t ON t.Symbol = x.Symbol "
"AND t.Mean = x.maxmean",
tuple(self.gene_symbols))
results = list(cursor.fetchall())
lower_symbols = {}
for gene_symbol in self.gene_symbols:
if gene_symbol is not None:
lower_symbols[gene_symbol.lower()] = True
for result in results:
(symbol, gene_id, data_id, _chr, _mb,
descr, probeset_target_descr) = result
if symbol is not None and lower_symbols.get(symbol.lower()):
symbol = symbol.lower()
self.data[symbol].gene_id = gene_id
self.data[symbol].data_id = data_id
self.data[symbol].chr = _chr
self.data[symbol].mb = _mb
self.data[symbol].description = descr
(self.data[symbol]
.probe_target_description) = probeset_target_descr
def get_symbol_values_pairs(self):
"""Get one dictionary whose key is gene symbol and value is
tissue expression data (list type). All keys are lower case.
The output is a symbolValuepairDict (dictionary): one
dictionary of Symbol and Value Pair; key is symbol, value is
one list of expression values of one probeSet;
"""
id_list = [self.data[symbol].data_id for symbol in self.data]
symbol_values_dict = {}
if len(id_list) > 0:
results = []
with self.conn.cursor() as cursor:
cursor.execute(
"SELECT TissueProbeSetXRef.Symbol, "
"TissueProbeSetData.value FROM "
"TissueProbeSetXRef, TissueProbeSetData "
"WHERE TissueProbeSetData.Id IN ("
f"{', '.join(['%s' * len(id_list)])}) "
"AND TissueProbeSetXRef.DataId = TissueProbeSetData.Id",
tuple(id_list))
results = cursor.fetchall()
for result in results:
(symbol, value) = result
if symbol.lower() not in symbol_values_dict:
symbol_values_dict[symbol.lower()] = [value]
else:
symbol_values_dict[symbol.lower()].append(
value)
return symbol_values_dict
|