aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py125
1 files changed, 74 insertions, 51 deletions
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 11eca936..4a9aea73 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -52,6 +52,8 @@ import utility.webqtlUtil #this is for parallel computing only.
from wqflask.correlation import correlationFunction
from utility.benchmark import Bench
+from MySQLdb import escape_string as escape
+
from pprint import pformat as pf
METHOD_SAMPLE_PEARSON = "1"
@@ -101,13 +103,14 @@ class CorrelationResults(object):
self.sample_data = {}
self.corr_method = start_vars['corr_sample_method']
+ self.return_number = 500
#The two if statements below append samples to the sample list based upon whether the user
#rselected Primary Samples Only, Other Samples Only, or All Samples
primary_samples = (self.dataset.group.parlist +
- self.dataset.group.f1list +
- self.dataset.group.samplelist)
+ self.dataset.group.f1list +
+ self.dataset.group.samplelist)
#If either BXD/whatever Only or All Samples, append all of that group's samplelist
if corr_samples_group != 'samples_other':
@@ -153,7 +156,7 @@ class CorrelationResults(object):
#self.correlation_data_slice = collections.OrderedDict()
- for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
+ for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
trait_object.sample_r = self.correlation_data[trait][0]
trait_object.sample_p = self.correlation_data[trait][1]
@@ -203,13 +206,6 @@ class CorrelationResults(object):
# mb = trait_object.mb
# )
-
-
- #trait_list = self.getTissueCorrelationByList( primary_trait_symbol = self.this_trait.symbol,
- # corr_results = self.correlation_results,
- # TissueProbeSetFreezeId = 1,
- # method=1)
-
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
#self.target_db_name = start_vars['corr_dataset']
@@ -531,9 +527,13 @@ class CorrelationResults(object):
#XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
#XZ, 09/24/2008: Note that the correlation value can be negative.
- def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0):
+ def get_temp_tissue_corr_table(self,
+ tissue_probesetfreeze_id=0,
+ method="",
+ return_number=0):
+
- def cmpTissCorrAbsoluteValue(A, B):
+ def cmp_tisscorr_absolute_value(A, B):
try:
if abs(A[1]) < abs(B[1]): return 1
elif abs(A[1]) == abs(B[1]):
@@ -542,26 +542,28 @@ class CorrelationResults(object):
except:
return 0
- symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method)
+ symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_all_tissue_trait(
+ tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+ method=method)
- symbolCorrList = symbolCorrDict.items()
+ symbol_corr_list = symbol_corr_dict.items()
- symbolCorrList.sort(cmpTissCorrAbsoluteValue)
- symbolCorrList = symbolCorrList[0 : 2*returnNumber]
+ symbol_corr_list.sort(cmp_tisscorr_absolute_value)
+ symbol_corr_list = symbol_corr_list[0 : 2*return_number]
- tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE")
+ tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
- q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName
+ q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
self.cursor.execute(q1)
- for one_pair in symbolCorrList:
+ for one_pair in symbol_corr_list:
one_symbol = one_pair[0]
one_corr = one_pair[1]
- one_p_value = symbolPvalueDict[one_symbol]
+ one_p_value = symbol_pvalue_dict[one_symbol]
self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
- return tmpTableName
+ return tmp_table_name
#XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it.
@@ -591,37 +593,47 @@ class CorrelationResults(object):
return litCorrDict
+ def fetch_tissue_correlations(self, method=""):
+ """Comments Possibly Out of Date!!!!!
+
+
+ Uses getTempTissueCorrTable to generate table of tissue correlations.
+
+ This function then gathers that data and pairs it with the TraitID string.
+ Takes as its arguments a formdata instance, and a database instance.
+ Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
+ for the requested correlation
+
+ """
- #XZ, 01/09/2009: Xiaodong created this function.
- def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0):
- """Uses getTempTissueCorrTable to generate table of tissue correlations. This function then gathers that data and
- pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance.
- Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation"""
-
-
- tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber)
+ # table name string
+ temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+ method=method)
- query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" % (tempTable, tempTable)
- query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)'
- query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
- query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable)
+ query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
+ FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
+ WHERE ProbeSetFreeze.Name = '{}'
+ and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSet.Symbol IS NOT NULL
+ and {}.Correlation IS NOT NULL""".format(dataset.mescape(
+ temp_table, temp_table, temp_table, temp_table,
+ self.dataset.name, temp_table))
- self.cursor.execute(query)
- results = self.cursor.fetchall()
+ results = g.db.execute(query).fetchall()
- tissueCorrDict = {}
+ tissue_corr_dict = {}
for entry in results:
- traitName, tissueCorr, tissuePValue = entry
- tissueCorrDict[traitName] = (tissueCorr, tissuePValue)
-
- self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable)
+ trait_name, tissue_corr, tissue_pvalue = entry
+ tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
- return tissueCorrDict
+ g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))
+ return tissue_corr_dict
- #XZ, 01/13/2008
def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None):
tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE")
@@ -672,7 +684,7 @@ class CorrelationResults(object):
use_tissue_corr = False
if self.method in TISSUE_METHODS:
- tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber)
+ tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number)
use_tissue_corr = True
DatabaseFileName = self.getFileName( target_db_name=self.target_db_name )
@@ -897,20 +909,31 @@ class CorrelationResults(object):
return trait_list
"""
- def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None):
+ def calculate_corr_all_tissue_trait(self, tissue_probesetfreeze_id=None, method=None):
- symbolCorrDict = {}
- symbolPvalueDict = {}
+ symbol_corr_dict = {}
+ symbol_pvalue_dict = {}
- primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
- primaryTraitValue = primaryTraitSymbolValueDict.values()[0]
+ primary_trait_symbol_value_dict = correlation_function.
+ get_genesymbol_tissue_value_dict_trait(cursor=self.cursor,
+ GeneNameLst=[primaryTraitSymbol],
+ TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+ primary_trait_value = primary_trait_symbol_value_dict.values()[0]
- SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+ symbol_value_dict = correlation_function.get_genesymbol_tissue_value_dict_trait(
+ cursor=self.cursor,
+ gene_name_list=[],
+ tissue_probeSetfreeze_id=TISSUE_MOUSE_DB)
if method in ["2","5"]:
- symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman')
+ symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict,
+ method='spearman')
else:
- symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict)
+ symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict)
return (symbolCorrDict, symbolPvalueDict)