1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
import collections
from flask import g
from utility import db_tools
from utility import Bunch
from utility.db_tools import escape
from gn3.db_utils import database_connector
class MrnaAssayTissueData:
def __init__(self, gene_symbols=None):
self.gene_symbols = gene_symbols
if self.gene_symbols == None:
self.gene_symbols = []
self.data = collections.defaultdict(Bunch)
query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
from (
select Symbol, max(Mean) as maxmean
from TissueProbeSetXRef
where TissueProbeSetFreezeId=1 and '''
# Note that inner join is necessary in this query to get distinct record in one symbol group
# with highest mean value
# Due to the limit size of TissueProbeSetFreezeId table in DB,
# performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
if len(gene_symbols) == 0:
query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''
else:
in_clause = db_tools.create_in_clause(gene_symbols)
# ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
query += ''' Symbol in {} group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''.format(in_clause)
# lower_symbols = []
lower_symbols = {}
for gene_symbol in gene_symbols:
# lower_symbols[gene_symbol.lower()] = True
if gene_symbol != None:
lower_symbols[gene_symbol.lower()] = True
results = list(g.db.execute(query).fetchall())
for result in results:
symbol = result[0]
if symbol is not None and lower_symbols.get(symbol.lower()):
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
self.data[symbol].data_id = result.DataId
self.data[symbol].chr = result.Chr
self.data[symbol].mb = result.Mb
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
###########################################################################
# Input: cursor, symbolList (list), dataIdDict(Dict)
# output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
# key is symbol, value is one list of expression values of one probeSet;
# function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
# Attention! All keys are lower case!
###########################################################################
def get_symbol_values_pairs(self):
id_list = [self.data[symbol].data_id for symbol in self.data]
symbol_values_dict = {}
if len(id_list) > 0:
query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
FROM TissueProbeSetXRef, TissueProbeSetData
WHERE TissueProbeSetData.Id IN {} and
TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
results = g.db.execute(query).fetchall()
for result in results:
if result.Symbol.lower() not in symbol_values_dict:
symbol_values_dict[result.Symbol.lower()] = [result.value]
else:
symbol_values_dict[result.Symbol.lower()].append(
result.value)
return symbol_values_dict
|