aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py134
-rwxr-xr-xwqflask/base/trait.py5
-rw-r--r--wqflask/utility/db_tools.py15
-rw-r--r--wqflask/wqflask/correlation/correlation_functions.py (renamed from wqflask/wqflask/correlation/correlation_function.py)158
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py85
-rw-r--r--wqflask/wqflask/templates/correlation_page.html12
6 files changed, 300 insertions, 109 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
new file mode 100644
index 00000000..8ae71858
--- /dev/null
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import, print_function, division
+
+import collections
+
+from flask import g
+
+from utility import dbtools
+from uitility import Bunch
+
+from MySQLdb import escape_string as escape
+
+class MrnaAssayTissueData(object):
+
+ def __init__(self, gene_symbols=None):
+ self.gene_symbols = gene_symbols
+ self.have_data = False
+ if self.gene_symbols == None:
+ self.gene_symbols = []
+
+ self.data = collections.defaultdict(Bunch)
+
+ #self.gene_id_dict ={}
+ #self.data_id_dict = {}
+ #self.chr_dict = {}
+ #self.mb_dict = {}
+ #self.desc_dict = {}
+ #self.probe_target_desc_dict = {}
+
+ query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
+ from (
+ select Symbol, max(Mean) as maxmean
+ from TissueProbeSetXRef
+ where TissueProbeSetFreezeId=1 and '''
+
+ # Note that inner join is necessary in this query to get distinct record in one symbol group
+ # with highest mean value
+ # Due to the limit size of TissueProbeSetFreezeId table in DB,
+ # performance of inner join is acceptable.
+ if len(gene_symbols) == 0:
+ query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''
+ else:
+ in_clause = dbtools.create_in_clause(gene_symbols)
+
+ query += ''' Symbol in {} group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''.format(in_clause)
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ symbol = item[0]
+ gene_symbols.append(symbol)
+ symbol = symbol.lower()
+
+ self.data[symbol].gene_id = result.GeneId
+ self.data[symbol].data_id = result.DataId
+ self.data[symbol].chr = result.Chr
+ self.data[symbol].mb = result.Mb
+ self.data[symbol].description = result.description
+ self.data[symbol].probe_target_description = result.Probe_Target_Description
+
+
+ ###########################################################################
+ #Input: cursor, symbolList (list), dataIdDict(Dict)
+ #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+ # key is symbol, value is one list of expression values of one probeSet;
+ #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+ #Attention! All keys are lower case!
+ ###########################################################################
+ def get_symbol_value_pairs(self):
+
+ id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
+
+ symbol_value_pairs = {}
+ value_list=[]
+
+ query = """SELECT value, id
+ FROM TissueProbeSetData
+ WHERE Id IN {}""".format(create_in_clause(id_list))
+
+ try :
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ value_list.append(result.value)
+ symbol_value_pairs[symbol] = value_list
+ except:
+ symbol_value_pairs[symbol] = None
+
+ #for symbol in symbol_list:
+ # if tissue_data.has_key(symbol):
+ # data_id = tissue_data[symbol].data_id
+ #
+ # query = """select value, id
+ # from TissueProbeSetData
+ # where Id={}""".format(escape(data_id))
+ # try :
+ # results = g.db.execute(query).fetchall()
+ # for item in results:
+ # item = item[0]
+ # value_list.append(item)
+ # symbol_value_pairs[symbol] = value_list
+ # value_list=[]
+ # except:
+ # symbol_value_pairs[symbol] = None
+
+ return symbol_value_pairs
+
+ ########################################################################################################
+ #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+ #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+ # key is symbol, value is one list of expression values of one probeSet.
+ #function: wrapper function for getSymbolValuePairDict function
+ # build gene symbol list if necessary, cut it into small lists if necessary,
+ # then call getSymbolValuePairDict function and merge the results.
+ ########################################################################################################
+
+ def get_trait_symbol_and_tissue_values(symbol_list=None):
+ tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+
+ #symbolList,
+ #geneIdDict,
+ #dataIdDict,
+ #ChrDict,
+ #MbDict,
+ #descDict,
+ #pTargetDescDict = getTissueProbeSetXRefInfo(
+ # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+
+ if len(tissue_data.gene_symbols):
+ return get_symbol_value_pairs(tissue_data)
+
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index c893c887..6a64eeaf 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -286,7 +286,6 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
trait_info = g.db.execute(query).fetchone()
- #print("trait_info is: ", pf(trait_info))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.dataset.type == 'Geno':
@@ -359,7 +358,6 @@ class GeneralTrait(object):
InbredSet.SpeciesId = Species.Id AND
Species.TaxonomyId = Homologene.TaxonomyId
""" % (escape(str(self.geneid)), escape(self.dataset.group.name))
- print("-> query is:", query)
result = g.db.execute(query).fetchone()
#else:
# result = None
@@ -391,7 +389,6 @@ class GeneralTrait(object):
Geno.Name = '{}' and
Geno.SpeciesId = Species.Id
""".format(self.dataset.group.species, self.locus)
- print("query is:", query)
result = g.db.execute(query).fetchone()
self.locus_chr = result[0]
self.locus_mb = result[1]
@@ -603,4 +600,4 @@ class GeneralTrait(object):
else:
ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
ZValue = ZValue*sqrt(self.overlap-3)
- self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) \ No newline at end of file
+ self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
diff --git a/wqflask/utility/db_tools.py b/wqflask/utility/db_tools.py
new file mode 100644
index 00000000..4034f39c
--- /dev/null
+++ b/wqflask/utility/db_tools.py
@@ -0,0 +1,15 @@
+from __future__ import absolute_import, print_function, division
+
+from MySQLdb import escape_string as escape
+
+def create_in_clause(items):
+ """Create an in clause for mysql"""
+ in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+ in_clause = '( {} )'.format(in_clause)
+ return in_clause
+
+def mescape(*items):
+ """Multiple escape"""
+ escaped = [escape(str(item)) for item in items]
+ #print("escaped is:", escaped)
+ return escaped
diff --git a/wqflask/wqflask/correlation/correlation_function.py b/wqflask/wqflask/correlation/correlation_functions.py
index 7d4b58a9..56f66810 100644
--- a/wqflask/wqflask/correlation/correlation_function.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -24,6 +24,7 @@
#
# Last updated by NL 2011/03/23
+from __future__ import absolute_import, print_function, division
import math
#import rpy2.robjects
@@ -31,10 +32,11 @@ import pp
import string
from utility import webqtlUtil
+from base.mrna_assay_tissue_data import MrnaAssayTissueData
from base.trait import GeneralTrait
from dbFunction import webqtlDatabaseFunction
-
+from flask import Flask, g
#XZ: The input 'controls' is String. It contains the full name of control traits.
#XZ: The input variable 'strainlst' is List. It contains the strain names of primary trait.
@@ -676,7 +678,7 @@ def batchCalTissueCorr(primaryTraitValue=[], SymbolValueDict={}, method='pearson
# getGeneSymbolTissueValueDict to build dict to get CorrPvArray
#Note: If there are multiple probesets for one gene, select the one with highest mean.
###########################################################################
-def getTissueProbeSetXRefInfo(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
+def getTissueProbeSetXRefInfo(GeneNameLst=[],TissueProbeSetFreezeId=0):
Symbols =""
symbolList =[]
geneIdDict ={}
@@ -720,7 +722,6 @@ def getTissueProbeSetXRefInfo(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=
'''% (TissueProbeSetFreezeId,Symbols)
try:
-
cursor.execute(query)
results =cursor.fetchall()
resultCount = len(results)
@@ -755,28 +756,43 @@ def getTissueProbeSetXRefInfo(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=
#function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
#Attention! All keys are lower case!
###########################################################################
-def getSymbolValuePairDict(cursor=None,symbolList=None,dataIdDict={}):
- symbolList = map(string.lower, symbolList)
- symbolValuepairDict={}
- valueList=[]
-
- for key in symbolList:
- if dataIdDict.has_key(key):
- DataId = dataIdDict[key]
-
- valueQuery = "select value from TissueProbeSetData where Id=%s" % DataId
- try :
- cursor.execute(valueQuery)
- valueResults = cursor.fetchall()
- for item in valueResults:
- item =item[0]
- valueList.append(item)
- symbolValuepairDict[key] = valueList
- valueList=[]
- except:
- symbolValuepairDict[key] = None
-
- return symbolValuepairDict
+def get_symbol_value_pairs(tissue_data):
+
+ id_list = [tissue_data[symbol.lower()].data_id for item in tissue_data]
+
+ symbol_value_pairs = {}
+ value_list=[]
+
+ query = """SELECT value, id
+ FROM TissueProbeSetData
+ WHERE Id IN {}""".format(create_in_clause(id_list))
+
+ try :
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ value_list.append(result.value)
+ symbol_value_pairs[symbol] = value_list
+ except:
+ symbol_value_pairs[symbol] = None
+
+ #for symbol in symbol_list:
+ # if tissue_data.has_key(symbol):
+ # data_id = tissue_data[symbol].data_id
+ #
+ # query = """select value, id
+ # from TissueProbeSetData
+ # where Id={}""".format(escape(data_id))
+ # try :
+ # results = g.db.execute(query).fetchall()
+ # for item in results:
+ # item = item[0]
+ # value_list.append(item)
+ # symbol_value_pairs[symbol] = value_list
+ # value_list=[]
+ # except:
+ # symbol_value_pairs[symbol] = None
+
+ return symbol_value_pairs
########################################################################################################
@@ -788,36 +804,51 @@ def getSymbolValuePairDict(cursor=None,symbolList=None,dataIdDict={}):
# then call getSymbolValuePairDict function and merge the results.
########################################################################################################
-def getGeneSymbolTissueValueDict(cursor=None,symbolList=None,dataIdDict={}):
- limitNum=1000
- count = len(symbolList)
-
- SymbolValuePairDict = {}
-
- if count !=0 and count <=limitNum:
- SymbolValuePairDict = getSymbolValuePairDict(cursor=cursor,symbolList=symbolList,dataIdDict=dataIdDict)
-
- elif count >limitNum:
- SymbolValuePairDict={}
- n = count/limitNum
- start =0
- stop =0
-
- for i in range(n):
- stop =limitNum*(i+1)
- gList1 = symbolList[start:stop]
- PairDict1 = getSymbolValuePairDict(cursor=cursor,symbolList=gList1,dataIdDict=dataIdDict)
- start =limitNum*(i+1)
-
- SymbolValuePairDict.update(PairDict1)
-
- if stop < count:
- stop = count
- gList2 = symbolList[start:stop]
- PairDict2 = getSymbolValuePairDict(cursor=cursor,symbolList=gList2,dataIdDict=dataIdDict)
- SymbolValuePairDict.update(PairDict2)
-
- return SymbolValuePairDict
+def get_trait_symbol_and_tissue_values(symbol_list=None):
+ SymbolValuePairDict={}
+
+ tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+
+ #symbolList,
+ #geneIdDict,
+ #dataIdDict,
+ #ChrDict,
+ #MbDict,
+ #descDict,
+ #pTargetDescDict = getTissueProbeSetXRefInfo(
+ # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+
+ if len(tissue_data.gene_symbols):
+ return get_symbol_value_pairs(tissue_data)
+
+ #limit_num=1000
+ #count = len(symbol_list)
+ #
+ #symbol_value_pairs = {}
+ #
+ #if count !=0 and count <= limit_num:
+ # symbol_value_pairs = getSymbolValuePairDict(cursor=cursor,symbolList=symbol_list,dataIdDict=dataIdDict)
+ #
+ #elif count > limit_num:
+ # n = count/limit_num
+ # start = 0
+ # stop = 0
+ #
+ # for i in range(n):
+ # stop =limit_num*(i+1)
+ # gList1 = symbolList[start:stop]
+ # PairDict1 = getSymbolValuePairDict(cursor=cursor,symbolList=gList1,dataIdDict=dataIdDict)
+ # start =limit_num*(i+1)
+ #
+ # SymbolValuePairDict.update(PairDict1)
+ #
+ # if stop < count:
+ # stop = count
+ # gList2 = symbolList[start:stop]
+ # PairDict2 = getSymbolValuePairDict(cursor=cursor,symbolList=gList2,dataIdDict=dataIdDict)
+ # SymbolValuePairDict.update(PairDict2)
+ #
+ #return SymbolValuePairDict
########################################################################################################
#input: cursor, GeneNameLst (list), TissueProbeSetFreezeId(int)
@@ -827,12 +858,17 @@ def getGeneSymbolTissueValueDict(cursor=None,symbolList=None,dataIdDict={}):
# for CorrelationPage.py
########################################################################################################
-def getGeneSymbolTissueValueDictForTrait(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
- SymbolValuePairDict={}
- symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = getTissueProbeSetXRefInfo(cursor=cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
- if symbolList:
- SymbolValuePairDict = getGeneSymbolTissueValueDict(cursor=cursor,symbolList=symbolList,dataIdDict=dataIdDict)
- return SymbolValuePairDict
+#def get_trait_symbol_and_tissue_values(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
+# SymbolValuePairDict={}
+#
+# symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = getTissueProbeSetXRefInfo(
+# cursor=cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+#
+# if symbolList:
+# SymbolValuePairDict = get_gene_symbol_and_tissue_values(symbolList=symbolList,
+# dataIdDict=dataIdDict)
+#
+# return SymbolValuePairDict
########################################################################################################
#Input: cursor(cursor): MySQL connnection cursor;
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 1615fe21..b17e1db1 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -49,13 +49,15 @@ from base.templatePage import templatePage
from utility import webqtlUtil, helper_functions, corr_result_helpers
from dbFunction import webqtlDatabaseFunction
import utility.webqtlUtil #this is for parallel computing only.
-from wqflask.correlation import correlation_function
+from wqflask.correlation import correlation_functions
from utility.benchmark import Bench
from MySQLdb import escape_string as escape
from pprint import pformat as pf
+from flask import Flask, g
+
METHOD_SAMPLE_PEARSON = "1"
METHOD_SAMPLE_RANK = "2"
METHOD_LIT = "3"
@@ -159,6 +161,11 @@ class CorrelationResults(object):
trait_object.sample_r = self.correlation_data[trait][0]
trait_object.sample_p = self.correlation_data[trait][1]
trait_object.num_overlap = self.correlation_data[trait][2]
+
+ # Set some sane defaults
+ trait_object.tissue_corr = None
+ trait_object.tissue_pvalue = None
+
self.correlation_results.append(trait_object)
@@ -916,61 +923,63 @@ class CorrelationResults(object):
symbol_corr_dict = {}
symbol_pvalue_dict = {}
- primary_trait_symbol_value_dict = correlation_function.make_gene_tissue_value_dict(
+ primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
GeneNameLst=[self.this_trait.symbol],
TissueProbeSetFreezeId=tissue_dataset_id)
primary_trait_value = primary_trait_symbol_value_dict.values()[0]
- symbol_value_dict = correlation_function.make_gene_tissue_value_dict(
+ symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
gene_name_list=[],
tissue_dataset_id=tissue_dataset_id)
- symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
primaryTraitValue,
SymbolValueDict,
method=self.corr_method)
#else:
- # symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
# primaryTraitValue,
# SymbolValueDict)
-
return (symbolCorrDict, symbolPvalueDict)
+ def do_tissue_correlation_by_list(self, tissue_dataset_id):
- #XZ, 10/13/2010
- def getTissueCorrelationByList(self, primaryTraitSymbol=None, traitList=None, TissueProbeSetFreezeId=None, method=None):
-
- primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
-
- if primaryTraitSymbol.lower() in primaryTraitSymbolValueDict:
- primaryTraitValue = primaryTraitSymbolValueDict[primaryTraitSymbol.lower()]
-
- geneSymbolList = []
-
- for thisTrait in traitList:
- if hasattr(thisTrait, 'symbol'):
- geneSymbolList.append(thisTrait.symbol)
-
- SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=geneSymbolList, TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+ trait_symbol_and_values = correlation_functions.get_trait_symbol_and_tissue_values(
+ gene_name_list = [self.this_trait.symbol])
- for thisTrait in traitList:
- if hasattr(thisTrait, 'symbol') and thisTrait.symbol and thisTrait.symbol.lower() in SymbolValueDict:
- oneTraitValue = SymbolValueDict[thisTrait.symbol.lower()]
- if method in ["2","5"]:
- result = correlationFunction.calZeroOrderCorrForTiss( primaryTraitValue, oneTraitValue, method='spearman' )
- else:
- result = correlationFunction.calZeroOrderCorrForTiss( primaryTraitValue, oneTraitValue)
- thisTrait.tissueCorr = result[0]
- thisTrait.tissuePValue = result[2]
- else:
- thisTrait.tissueCorr = None
- thisTrait.tissuePValue = None
- else:
- for thisTrait in traitList:
- thisTrait.tissueCorr = None
- thisTrait.tissuePValue = None
+ if self.this_trait.symbol.lower() in trait_symbol_and_values:
+ primary_trait_value = trait_symbol_and_values[self.this_trait_symbol.lower()]
+
+ #gene_symbol_list = []
+ #
+ #for trait in self.correlation_results:
+ # if hasattr(trait, 'symbol'):
+ # gene_symbol_list.append(trait.symbol)
+
+ gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
+
+ symbol_value_dict = correlation_functions.get_trait_gene_symbol_and_tissue_values(
+ gene_symbol_list=gene_symbol_list)
+
+ for trait in self.correlation_results:
+ if trait.symbol and trait.symbol.lower() in symbol_value_dict:
+ this_trait_value = symbol_value_dict[trait.symbol.lower()]
+
+ result = correlation_functions.calZeroOrderCorrForTiss(primary_trait_value,
+ this_trait_value,
+ self.corr_method)
+
+ trait.tissue_corr = result[0]
+ trait.tissue_pvalue = result[2]
+ # else:
+ # trait.tissue_corr = None
+ # trait.tissue_pvalue = None
+ #else:
+ # for trait in self.correlation_results:
+ # trait.tissue_corr = None
+ # trait.tissue_pvalue = None
- return traitList
+ #return self.correlation_results
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index efbf689c..53b12545 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -41,13 +41,13 @@
<td>{{ trait.symbol }}</td>
<td>{{ trait.alias }}</td>
<td>{{ trait.description }}</td>
- <td>Chr{{ trait.chr }}: {{ trait.mb }}</td>
- <td>{{ trait.mean }}</td>
- <td>{{ trait.lrs }}</td>
- <td>Chr{{ trait.locus_chr }}: {{ trait.locus_mb }}</td>
- <td>{{ trait.sample_r }}</td>
+ <td>Chr{{ trait.chr }}:{{'%0.6f'|format(trait.mb)}}</td>
+ <td>{{'%0.3f'|format(trait.mean)}}</td>
+ <td>{{'%0.3f'|format(trait.lrs)}}</td>
+ <td>Chr{{ trait.locus_chr }}:{{'%0.6f'|format(trait.locus_mb)}}</td>
+ <td>{{'%0.3f'|format(trait.sample_r)}}</td>
<td>{{ trait.num_overlap }}</td>
- <td>{{ trait.sample_p }}</td>
+ <td>{{'%0.3e'|format(trait.sample_p)}}</td>
</tr>
{% endfor %}
</tbody>