aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base/mrna_assay_tissue_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base/mrna_assay_tissue_data.py')
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py148
1 files changed, 80 insertions, 68 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index a08f3f21..be5df657 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -9,6 +9,8 @@ from utility import Bunch
from MySQLdb import escape_string as escape
+from pprint import pformat as pf
+
class MrnaAssayTissueData(object):
def __init__(self, gene_symbols=None):
@@ -35,14 +37,15 @@ class MrnaAssayTissueData(object):
# Note that inner join is necessary in this query to get distinct record in one symbol group
# with highest mean value
# Due to the limit size of TissueProbeSetFreezeId table in DB,
- # performance of inner join is acceptable.
+ # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
+ #print("len(gene_symbols): ", len(gene_symbols))
if len(gene_symbols) == 0:
query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''
else:
- in_clause = dbtools.create_in_clause(gene_symbols)
+ in_clause = db_tools.create_in_clause(gene_symbols)
query += ''' Symbol in {} group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
@@ -51,17 +54,19 @@ class MrnaAssayTissueData(object):
results = g.db.execute(query).fetchall()
for result in results:
- symbol = item[0]
- gene_symbols.append(symbol)
- symbol = symbol.lower()
-
- self.data[symbol].gene_id = result.GeneId
- self.data[symbol].data_id = result.DataId
- self.data[symbol].chr = result.Chr
- self.data[symbol].mb = result.Mb
- self.data[symbol].description = result.description
- self.data[symbol].probe_target_description = result.Probe_Target_Description
+ symbol = result[0]
+ if symbol in gene_symbols:
+ #gene_symbols.append(symbol)
+ symbol = symbol.lower()
+
+ self.data[symbol].gene_id = result.GeneId
+ self.data[symbol].data_id = result.DataId
+ self.data[symbol].chr = result.Chr
+ self.data[symbol].mb = result.Mb
+ self.data[symbol].description = result.description
+ self.data[symbol].probe_target_description = result.Probe_Target_Description
+ #print("self.data: ", pf(self.data))
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -70,65 +75,72 @@ class MrnaAssayTissueData(object):
#function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
#Attention! All keys are lower case!
###########################################################################
- def get_symbol_value_pairs(self):
-
- id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
-
- symbol_value_pairs = {}
- value_list=[]
- query = """SELECT value, id
- FROM TissueProbeSetData
- WHERE Id IN {}""".format(create_in_clause(id_list))
-
- try :
- results = g.db.execute(query).fetchall()
- for result in results:
- value_list.append(result.value)
- symbol_value_pairs[symbol] = value_list
- except:
- symbol_value_pairs[symbol] = None
-
- #for symbol in symbol_list:
- # if tissue_data.has_key(symbol):
- # data_id = tissue_data[symbol].data_id
- #
- # query = """select value, id
- # from TissueProbeSetData
- # where Id={}""".format(escape(data_id))
- # try :
- # results = g.db.execute(query).fetchall()
- # for item in results:
- # item = item[0]
- # value_list.append(item)
- # symbol_value_pairs[symbol] = value_list
- # value_list=[]
- # except:
- # symbol_value_pairs[symbol] = None
+ def get_symbol_values_pairs(self):
+ id_list = [self.data[symbol].data_id for symbol in self.data]
+
+ symbol_values_dict = {}
+
+ query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
+ FROM TissueProbeSetXRef, TissueProbeSetData
+ WHERE TissueProbeSetData.Id IN {} and
+ TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ if result.Symbol.lower() not in symbol_values_dict:
+ symbol_values_dict[result.Symbol.lower()] = [result.value]
+ else:
+ symbol_values_dict[result.Symbol.lower()].append(result.value)
+
+ #for symbol in self.data:
+ # data_id = self.data[symbol].data_id
+ # symbol_values_dict[symbol] = self.get_tissue_values(data_id)
+
- return symbol_value_pairs
+ return symbol_values_dict
- ########################################################################################################
- #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
- #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
- # key is symbol, value is one list of expression values of one probeSet.
- #function: wrapper function for getSymbolValuePairDict function
- # build gene symbol list if necessary, cut it into small lists if necessary,
- # then call getSymbolValuePairDict function and merge the results.
- ########################################################################################################
- def get_trait_symbol_and_tissue_values(symbol_list=None):
- tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+ #def get_tissue_values(self, data_id):
+ # """Gets the tissue values for a particular gene"""
+ #
+ # tissue_values=[]
+ #
+ # query = """SELECT value, id
+ # FROM TissueProbeSetData
+ # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id))
+ #
+ # #try :
+ # results = g.db.execute(query).fetchall()
+ # for result in results:
+ # tissue_values.append(result.value)
+ # #symbol_values_dict[symbol] = value_list
+ # #except:
+ # # symbol_values_pairs[symbol] = None
+ #
+ # return tissue_values
- #symbolList,
- #geneIdDict,
- #dataIdDict,
- #ChrDict,
- #MbDict,
- #descDict,
- #pTargetDescDict = getTissueProbeSetXRefInfo(
- # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-
- if len(tissue_data.gene_symbols):
- return get_symbol_value_pairs(tissue_data)
+########################################################################################################
+#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+# key is symbol, value is one list of expression values of one probeSet.
+#function: wrapper function for getSymbolValuePairDict function
+# build gene symbol list if necessary, cut it into small lists if necessary,
+# then call getSymbolValuePairDict function and merge the results.
+########################################################################################################
+
+#def get_trait_symbol_and_tissue_values(symbol_list=None):
+# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+#
+# #symbolList,
+# #geneIdDict,
+# #dataIdDict,
+# #ChrDict,
+# #MbDict,
+# #descDict,
+# #pTargetDescDict = getTissueProbeSetXRefInfo(
+# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+#
+# if len(tissue_data.gene_symbols):
+# return get_symbol_values_pairs(tissue_data)