From 183f9a0ba19b6fcdf1475285af1bb1fcd45a9442 Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Fri, 20 Sep 2013 17:20:52 -0500
Subject: Tissue correlation results work for sample r/rho correlation page and
are written to the template
---
wqflask/base/mrna_assay_tissue_data.py | 152 ++++++++++++++++++---------------
1 file changed, 82 insertions(+), 70 deletions(-)
(limited to 'wqflask/base')
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index 8ae71858..7eb07028 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -4,11 +4,13 @@ import collections
from flask import g
-from utility import dbtools
-from uitility import Bunch
+from utility import db_tools
+from utility import Bunch
from MySQLdb import escape_string as escape
+from pprint import pformat as pf
+
class MrnaAssayTissueData(object):
def __init__(self, gene_symbols=None):
@@ -35,14 +37,15 @@ class MrnaAssayTissueData(object):
# Note that inner join is necessary in this query to get distinct record in one symbol group
# with highest mean value
# Due to the limit size of TissueProbeSetFreezeId table in DB,
- # performance of inner join is acceptable.
+ # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
+ print("len(gene_symbols): ", len(gene_symbols))
if len(gene_symbols) == 0:
query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
and t.Mean = x.maxmean;
'''
else:
- in_clause = dbtools.create_in_clause(gene_symbols)
+ in_clause = db_tools.create_in_clause(gene_symbols)
query += ''' Symbol in {} group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
@@ -51,17 +54,19 @@ class MrnaAssayTissueData(object):
results = g.db.execute(query).fetchall()
for result in results:
- symbol = item[0]
- gene_symbols.append(symbol)
- symbol = symbol.lower()
-
- self.data[symbol].gene_id = result.GeneId
- self.data[symbol].data_id = result.DataId
- self.data[symbol].chr = result.Chr
- self.data[symbol].mb = result.Mb
- self.data[symbol].description = result.description
- self.data[symbol].probe_target_description = result.Probe_Target_Description
+ symbol = result[0]
+ if symbol in gene_symbols:
+ #gene_symbols.append(symbol)
+ symbol = symbol.lower()
+
+ self.data[symbol].gene_id = result.GeneId
+ self.data[symbol].data_id = result.DataId
+ self.data[symbol].chr = result.Chr
+ self.data[symbol].mb = result.Mb
+ self.data[symbol].description = result.description
+ self.data[symbol].probe_target_description = result.Probe_Target_Description
+ #print("self.data: ", pf(self.data))
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -70,65 +75,72 @@ class MrnaAssayTissueData(object):
#function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
#Attention! All keys are lower case!
###########################################################################
- def get_symbol_value_pairs(self):
-
- id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
-
- symbol_value_pairs = {}
- value_list=[]
- query = """SELECT value, id
- FROM TissueProbeSetData
- WHERE Id IN {}""".format(create_in_clause(id_list))
-
- try :
- results = g.db.execute(query).fetchall()
- for result in results:
- value_list.append(result.value)
- symbol_value_pairs[symbol] = value_list
- except:
- symbol_value_pairs[symbol] = None
-
- #for symbol in symbol_list:
- # if tissue_data.has_key(symbol):
- # data_id = tissue_data[symbol].data_id
- #
- # query = """select value, id
- # from TissueProbeSetData
- # where Id={}""".format(escape(data_id))
- # try :
- # results = g.db.execute(query).fetchall()
- # for item in results:
- # item = item[0]
- # value_list.append(item)
- # symbol_value_pairs[symbol] = value_list
- # value_list=[]
- # except:
- # symbol_value_pairs[symbol] = None
+ def get_symbol_values_pairs(self):
+ id_list = [self.data[symbol].data_id for symbol in self.data]
+
+ symbol_values_dict = {}
+
+ query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
+ FROM TissueProbeSetXRef, TissueProbeSetData
+ WHERE TissueProbeSetData.Id IN {} and
+ TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ if result.Symbol.lower() not in symbol_values_dict:
+ symbol_values_dict[result.Symbol.lower()] = [result.value]
+ else:
+ symbol_values_dict[result.Symbol.lower()].append(result.value)
+
+ #for symbol in self.data:
+ # data_id = self.data[symbol].data_id
+ # symbol_values_dict[symbol] = self.get_tissue_values(data_id)
+
- return symbol_value_pairs
+ return symbol_values_dict
- ########################################################################################################
- #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
- #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
- # key is symbol, value is one list of expression values of one probeSet.
- #function: wrapper function for getSymbolValuePairDict function
- # build gene symbol list if necessary, cut it into small lists if necessary,
- # then call getSymbolValuePairDict function and merge the results.
- ########################################################################################################
- def get_trait_symbol_and_tissue_values(symbol_list=None):
- tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+ #def get_tissue_values(self, data_id):
+ # """Gets the tissue values for a particular gene"""
+ #
+ # tissue_values=[]
+ #
+ # query = """SELECT value, id
+ # FROM TissueProbeSetData
+ # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id))
+ #
+ # #try :
+ # results = g.db.execute(query).fetchall()
+ # for result in results:
+ # tissue_values.append(result.value)
+ # #symbol_values_dict[symbol] = value_list
+ # #except:
+ # # symbol_values_pairs[symbol] = None
+ #
+ # return tissue_values
- #symbolList,
- #geneIdDict,
- #dataIdDict,
- #ChrDict,
- #MbDict,
- #descDict,
- #pTargetDescDict = getTissueProbeSetXRefInfo(
- # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-
- if len(tissue_data.gene_symbols):
- return get_symbol_value_pairs(tissue_data)
+########################################################################################################
+#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+# key is symbol, value is one list of expression values of one probeSet.
+#function: wrapper function for getSymbolValuePairDict function
+# build gene symbol list if necessary, cut it into small lists if necessary,
+# then call getSymbolValuePairDict function and merge the results.
+########################################################################################################
+
+#def get_trait_symbol_and_tissue_values(symbol_list=None):
+# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+#
+# #symbolList,
+# #geneIdDict,
+# #dataIdDict,
+# #ChrDict,
+# #MbDict,
+# #descDict,
+# #pTargetDescDict = getTissueProbeSetXRefInfo(
+# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+#
+# if len(tissue_data.gene_symbols):
+# return get_symbol_values_pairs(tissue_data)
--
cgit v1.2.3
From 081f4f222a261c0d84bfb266aa4a32d6d62cab85 Mon Sep 17 00:00:00 2001
From: root
Date: Tue, 24 Sep 2013 18:08:23 -0500
Subject: Did some work towards doing the tissue correlation for all traits in
a dataset (in order to sort by tissue correlation instead of sample
correlation).
---
wqflask/base/data_set.py | 14 +-
wqflask/maintenance/quick_search_table.py | 6 +-
.../wqflask/correlation/correlation_functions.py | 2 -
wqflask/wqflask/correlation/show_corr_results.py | 545 +++++++++++----------
.../show_trait_calculate_correlations.html | 2 +-
5 files changed, 309 insertions(+), 260 deletions(-)
(limited to 'wqflask/base')
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 96e04df0..5d21c901 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -1055,7 +1055,6 @@ class MrnaAssayDataSet(DataSet):
""" % (escape(self.name), escape(self.dataset.name))
results = g.db.execute(query).fetchone()
return results[0]
-
def retrieve_sample_data(self, trait):
query = """
@@ -1077,6 +1076,19 @@ class MrnaAssayDataSet(DataSet):
results = g.db.execute(query).fetchall()
return results
+ def retrieve_gene_symbols(self):
+ query = """
+ select ProbeSet.Name, ProbeSet.Symbol
+ from ProbeSet,ProbeSetXRef
+ where ProbeSetXRef.ProbeSetFreezeId = %s and
+ ProbeSetXRef.ProbeSetId=ProbeSet.Id;
+ """ % (self.id)
+ results = g.db.execute(query).fetchall()
+ symbol_dict = {}
+ for item in results:
+ symbol_dict[item[0]] = item[1]
+ return symbol_dict
+
class TempDataSet(DataSet):
'''Temporary user-generated data set'''
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 9cd792ef..eef61857 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -11,10 +11,10 @@ each trait, its dataset, and several columns determined by its trait type (pheno
"""
-from __future__ import print_function, division, absolute_import
+from __future__ import absolute_import, division, print_function
-import sys
-sys.path.append("../../..")
+#import sys
+#sys.path.append("../../..")
import simplejson as json
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index 84d47bb5..da5c3197 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -805,8 +805,6 @@ def get_symbol_value_pairs(tissue_data):
########################################################################################################
def get_trait_symbol_and_tissue_values(symbol_list=None):
- SymbolValuePairDict={}
-
tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
if len(tissue_data.gene_symbols):
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index b9d009af..c6bc5b2a 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -95,8 +95,7 @@ class CorrelationResults(object):
#self.this_trait = GeneralTrait(dataset=self.dataset.name,
# name=start_vars['trait_id'],
- # cellid=None)
- #print("start_vars: ", pf(start_vars))
+ # cellid=None)
with Bench("Doing correlations"):
helper_functions.get_species_dataset_trait(self, start_vars)
self.dataset.group.read_genotype_file()
@@ -104,6 +103,7 @@ class CorrelationResults(object):
corr_samples_group = start_vars['corr_samples_group']
self.sample_data = {}
+ self.corr_type = start_vars['corr_type']
self.corr_method = start_vars['corr_sample_method']
self.return_number = 50
@@ -127,36 +127,61 @@ class CorrelationResults(object):
self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
self.target_dataset.get_trait_data()
+ self.correlation_results = []
self.correlation_data = {}
- for trait, values in self.target_dataset.trait_data.iteritems():
- this_trait_vals = []
- target_vals = []
- for index, sample in enumerate(self.target_dataset.samplelist):
- if sample in self.sample_data:
- sample_value = self.sample_data[sample]
- target_sample_value = values[index]
- this_trait_vals.append(sample_value)
- target_vals.append(target_sample_value)
-
- this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
- this_trait_vals, target_vals)
-
- if self.corr_method == 'pearson':
- sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
- else:
- sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
-
- self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
-
- self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
- key=lambda t: -abs(t[1][0])))
- self.correlation_results = []
-
- #self.correlation_data_slice = collections.OrderedDict()
+ if self.corr_type == "tissue":
+ trait_symbol_dict = self.dataset.retrieve_gene_symbols()
+ trait_symbols = trait_symbol_dict.values
+
+ tissue_corr_data = self.do_tissue_corr_for_all_traits(gene_symbol_list=trait_symbols)
+
+ for trait in tissue_corr_data.keys()[:self.return_number]:
+ this_trait_vals = []
+ target_vals = []
+ for index, sample in enumerate(self.target_dataset.samplelist):
+ if sample in self.sample_data:
+ sample_value = self.sample_data[sample]
+ target_sample_value = self.target_dataset.trait_data[trait][index]
+ this_trait_vals.append(sample_value)
+ target_vals.append(target_sample_value)
+
+ this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
+ this_trait_vals, target_vals)
+
+ if self.corr_method == 'pearson':
+ sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
+ else:
+ sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
+
+ self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
+
+ elif self.corr_type == "sample":
+ for trait, values in self.target_dataset.trait_data.iteritems():
+ this_trait_vals = []
+ target_vals = []
+ for index, sample in enumerate(self.target_dataset.samplelist):
+ if sample in self.sample_data:
+ sample_value = self.sample_data[sample]
+ target_sample_value = values[index]
+ this_trait_vals.append(sample_value)
+ target_vals.append(target_sample_value)
+
+ this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
+ this_trait_vals, target_vals)
+
+ if self.corr_method == 'pearson':
+ sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
+ else:
+ sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
+
+ self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
+
+ self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
+ key=lambda t: -abs(t[1][0])))
- for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
+ for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
print("gene symbol: ", trait_object.symbol)
@@ -168,63 +193,21 @@ class CorrelationResults(object):
#Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef,
#TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions)
-
-
- # Set some sane defaults
- trait_object.tissue_corr = 0
- trait_object.tissue_pvalue = 0
-
+ if self.corr_method != "tissue":
+ # Set some sane defaults
+ trait_object.tissue_corr = 0
+ trait_object.tissue_pvalue = 0
+ else:
+ trait_object.tissue_corr = tissue_corr_data[trait][1]
+ trait_object.tissue_pvalue = tissue_corr_data[trait][2]
+
self.correlation_results.append(trait_object)
- self.do_tissue_correlation_by_list()
+ if self.corr_method != "tissue":
+ self.do_tissue_correlation_for_trait_list()
print("self.correlation_results: ", pf(self.correlation_results))
-
-
- #self.correlation_data_slice[trait] = self.correlation_data[trait]
- #self.correlation_data_slice[trait].append(trait_object)
- #if self.dataset.type == 'ProbeSet':
- # trait_info = collections.OrderedDict(
- # correlation = float(self.correlation_data[trait][0]),
- # p_value = float(self.correlation_data[trait][1]),
- # symbol = trait_object.symbol,
- # alias = trait_object.alias,
- # description = trait_object.description,
- # chromosome = trait_object.chr,
- # mb = trait_object.mb
- # )
- # if trait_object.mean:
- #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias, # trait_info[mean] = trait_object.mean
- # if hasattr(trait_object, 'mean'):
- # trait_info[mean] = trait_object.mean
- # if hasattr(trait_object, 'lrs'):
- # trait_info[lrs] = trait_object.lrs
- # if hasattr(trait_object, 'locus_chr'):
- # trait_info[locus_chr] = trait_object.locus_chr
- # if hasattr(trait_object, 'locus_mb'):
- # trait_info[locus_mb] = trait_object.locus_mb
- #elif self.dataset.type == 'Geno':
- # trait_info = collections.OrderedDict(
- # correlation = float(self.correlation_data[trait][0]),
- # p_value = float(self.correlation_data[trait][1]),
- # symbol = trait_object.symbol,
- # alias = trai
- #def do_tissue_correlation_by_list(self, tissue_dataset_id):t_object.alias,
- # description = trait_object.description,
- # chromosome = trait_object.chr,
- # mb = trait_object.mb
- # )
- #else: # 'Publish'
- # trait_info = collections.OrderedDict(
- # correlation = float(self.correlation_data[trait][0]),
- # p_value = float(self.correlation_data[trait][1]),
- # symbol = trait_object.symbol,
- # alias = trait_object.alias,
- # description = trait_object.description,
- # chromosome = trait_object.chr,
- # mb = trait_object.mb
- # )
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
@@ -278,6 +261,210 @@ class CorrelationResults(object):
############################################################################################################################################
+ def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1):
+ """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each"""
+
+ #Gets tissue expression values for the primary trait
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list = [self.this_trait.symbol])
+
+ print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict))
+
+ if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
+
+ #gene_symbol_list = []
+ #
+ #for trait in self.correlation_results:
+ # if hasattr(trait, 'symbol'):
+ # gene_symbol_list.append(trait.symbol)
+
+ gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
+
+ corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=gene_symbol_list)
+
+ print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict))
+
+ for trait in self.correlation_results:
+ if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()]
+
+ result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
+ this_trait_tissue_values,
+ self.corr_method)
+
+ trait.tissue_corr = result[0]
+ trait.tissue_pvalue = result[2]
+
+ # else:
+ # trait.tissue_corr = None
+ # trait.tissue_pvalue = None
+ #else:
+ # for trait in self.correlation_results:
+ # trait.tissue_corr = None
+ # trait.tissue_pvalue = None
+
+ #return self.correlation_results
+
+
+ def do_tissue_corr_for_all_traits(self, trait_symbols, tissue_dataset_id=1):
+ #Gets tissue expression values for the primary trait
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list = [self.this_trait.symbol])
+
+ correlation_data = {}
+ if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
+
+ corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=trait_symbols.values)
+
+ print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict))
+
+ for trait, symbol in trait_symbols.iteritems():
+ if symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()]
+
+ result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
+ this_trait_tissue_values,
+ self.corr_method)
+
+ correlation_results[trait] = [symbol, result[0], result[2]]
+
+ correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
+ key=lambda t: -abs(t[1][1])))
+
+ return correlation_data
+
+
+
+ def do_tissue_corr_for_all_traits_2(self):
+ """Comments Possibly Out of Date!!!!!
+
+ Uses get_temp_tissue_corr_table to generate table of tissue correlations
+
+ This function then gathers that data and pairs it with the TraitID string.
+ Takes as its arguments a formdata instance, and a dataset instance.
+ Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
+ for the requested correlation
+
+ Used when the user selects the tissue correlation method; i.e. not for the
+ column that is appended to all probeset trait correlation tables
+
+ """
+
+ # table name string
+ temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+ method=method)
+
+ query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
+ FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
+ WHERE ProbeSetFreeze.Name = '{}'
+ and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSet.Symbol IS NOT NULL
+ and {}.Correlation IS NOT NULL""".format(dataset.mescape(
+ temp_table, temp_table, temp_table, temp_table,
+ self.dataset.name, temp_table))
+
+ results = g.db.execute(query).fetchall()
+
+ tissue_corr_dict = {}
+
+ for entry in results:
+ trait_name, tissue_corr, tissue_pvalue = entry
+ tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
+ #symbolList,
+ #geneIdDict,
+ #dataIdDict,
+ #ChrDict,
+ #MbDict,
+ #descDict,
+ #pTargetDescDict = getTissueProbeSetXRefInfo(
+ # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+
+ g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table)))
+
+ return tissue_corr_dict
+
+
+ #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
+ #XZ, 09/24/2008: Note that the correlation value can be negative.
+ def get_temp_tissue_corr_table(self,
+ tissue_probesetfreeze_id=0,
+ method="",
+ return_number=0):
+
+
+ def cmp_tisscorr_absolute_value(A, B):
+ try:
+ if abs(A[1]) < abs(B[1]): return 1
+ elif abs(A[1]) == abs(B[1]):
+ return 0
+ else: return -1
+ except:
+ return 0
+
+ symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues(
+ tissue_dataset_id=TISSUE_MOUSE_DB)
+
+ symbol_corr_list = symbol_corr_dict.items()
+
+ symbol_corr_list.sort(cmp_tisscorr_absolute_value)
+ symbol_corr_list = symbol_corr_list[0 : 2*return_number]
+
+ tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
+
+ q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
+ self.cursor.execute(q1)
+
+ for one_pair in symbol_corr_list:
+ one_symbol = one_pair[0]
+ one_corr = one_pair[1]
+ one_p_value = symbol_pvalue_dict[one_symbol]
+
+ self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
+
+ return tmp_table_name
+
+
+ def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
+
+ symbol_corr_dict = {}
+ symbol_pvalue_dict = {}
+
+ primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
+ GeneNameLst=[self.this_trait.symbol],
+ TissueProbeSetFreezeId=tissue_dataset_id)
+ primary_trait_value = primary_trait_symbol_value_dict.values()[0]
+
+ symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
+ gene_name_list=[],
+ tissue_dataset_id=tissue_dataset_id)
+
+ symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict,
+ method=self.corr_method)
+ #else:
+ # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
+ # primaryTraitValue,
+ # SymbolValueDict)
+
+ return (symbolCorrDict, symbolPvalueDict)
+
+ ##XZ, 12/16/2008: the input geneid is of mouse type
+ #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""):
+ # q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol)
+ # self.cursor.execute(q)
+ # try:
+ # x = self.cursor.fetchone()
+ # if x: return True
+ # else: raise
+ # except: return False
+
def get_all_dataset_data(self):
@@ -353,6 +540,8 @@ class CorrelationResults(object):
return mouse_geneid
+
+
##XZ, 12/16/2008: the input geneid is of mouse type
#def checkForLitInfo(self,geneId):
# q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId
@@ -364,16 +553,6 @@ class CorrelationResults(object):
# except: return False
- ##XZ, 12/16/2008: the input geneid is of mouse type
- #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""):
- # q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol)
- # self.cursor.execute(q)
- # try:
- # x = self.cursor.fetchone()
- # if x: return True
- # else: raise
- # except: return False
-
def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId):
@@ -545,46 +724,6 @@ class CorrelationResults(object):
- #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
- #XZ, 09/24/2008: Note that the correlation value can be negative.
- def get_temp_tissue_corr_table(self,
- tissue_probesetfreeze_id=0,
- method="",
- return_number=0):
-
-
- def cmp_tisscorr_absolute_value(A, B):
- try:
- if abs(A[1]) < abs(B[1]): return 1
- elif abs(A[1]) == abs(B[1]):
- return 0
- else: return -1
- except:
- return 0
-
- symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues(
- tissue_dataset_id=TISSUE_MOUSE_DB)
-
- symbol_corr_list = symbol_corr_dict.items()
-
- symbol_corr_list.sort(cmp_tisscorr_absolute_value)
- symbol_corr_list = symbol_corr_list[0 : 2*return_number]
-
- tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
-
- q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
- self.cursor.execute(q1)
-
- for one_pair in symbol_corr_list:
- one_symbol = one_pair[0]
- one_corr = one_pair[1]
- one_p_value = symbol_pvalue_dict[one_symbol]
-
- self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
-
- return tmp_table_name
-
-
#XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it.
def fetchLitCorrelations(self, species, GeneId, db, returnNumber): ### Used to generate Lit Correlations when calculations are done from text file. dcrowell August 2008
"""Uses getTempLiteratureTable to generate table of literatire correlations. This function then gathers that data and
@@ -612,57 +751,6 @@ class CorrelationResults(object):
return litCorrDict
- def fetch_tissue_correlations(self):
- """Comments Possibly Out of Date!!!!!
-
-
- Uses getTempTissueCorrTable to generate table of tissue correlations
-
- This function then gathers that data and pairs it with the TraitID string.
- Takes as its arguments a formdata instance, and a database instance.
- Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
- for the requested correlation
-
- Used when the user selects the tissue correlation method; i.e. not for the
- column that is appended to all probeset trait correlation tables
-
- """
-
- # table name string
- temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
- method=method)
-
- query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
- FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
- LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
- WHERE ProbeSetFreeze.Name = '{}'
- and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- and ProbeSet.Symbol IS NOT NULL
- and {}.Correlation IS NOT NULL""".format(dataset.mescape(
- temp_table, temp_table, temp_table, temp_table,
- self.dataset.name, temp_table))
-
- results = g.db.execute(query).fetchall()
-
- tissue_corr_dict = {}
-
- for entry in results:
- trait_name, tissue_corr, tissue_pvalue = entry
- tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
- #symbolList,
- #geneIdDict,
- #dataIdDict,
- #ChrDict,
- #MbDict,
- #descDict,
- #pTargetDescDict = getTissueProbeSetXRefInfo(
- # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-
- g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table)))
-
- return tissue_corr_dict
-
def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None):
@@ -819,7 +907,30 @@ class CorrelationResults(object):
allcorrelations.append( one_traitinfo )
_log.info("Appending the results")
+ def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
+
+ symbol_corr_dict = {}
+ symbol_pvalue_dict = {}
+ primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
+ GeneNameLst=[self.this_trait.symbol],
+ TissueProbeSetFreezeId=tissue_dataset_id)
+ primary_trait_value = primary_trait_symbol_value_dict.values()[0]
+
+ symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
+ gene_name_list=[],
+ tissue_dataset_id=tissue_dataset_id)
+
+ symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict,
+ method=self.corr_method)
+ #else:
+ # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
+ # primaryTraitValue,
+ # SymbolValueDict)
+
+ return (symbolCorrDict, symbolPvalueDict)
datasetFile.close()
totalTraits = len(allcorrelations)
_log.info("Done correlating using the fast method")
@@ -939,78 +1050,6 @@ class CorrelationResults(object):
return trait_list
"""
- def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
- symbol_corr_dict = {}
- symbol_pvalue_dict = {}
- primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
- GeneNameLst=[self.this_trait.symbol],
- TissueProbeSetFreezeId=tissue_dataset_id)
- primary_trait_value = primary_trait_symbol_value_dict.values()[0]
-
- symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
- gene_name_list=[],
- tissue_dataset_id=tissue_dataset_id)
-
- symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
- primaryTraitValue,
- SymbolValueDict,
- method=self.corr_method)
- #else:
- # symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
- # primaryTraitValue,
- # SymbolValueDict)
-
- return (symbolCorrDict, symbolPvalueDict)
-
-
- def do_tissue_correlation_by_list(self, tissue_dataset_id=1):
- """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each"""
-
- #Gets tissue expression values for the primary trait
- primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = [self.this_trait.symbol])
-
- print("primary_trait_tissue_vals: ", pf(primary_trait_tissue_vals_dict))
-
- if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
-
- #gene_symbol_list = []
- #
- #for trait in self.correlation_results:
- # if hasattr(trait, 'symbol'):
- # gene_symbol_list.append(trait.symbol)
-
- gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
-
- corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=gene_symbol_list)
-
- print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict))
-
- for trait in self.correlation_results:
- if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()]
-
- result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
- this_trait_tissue_values,
- self.corr_method)
-
- trait.tissue_corr = result[0]
- trait.tissue_pvalue = result[2]
-
- #print("trait.tissue_corr / pvalue: ", str(trait.tissue_corr) + " :: " + str(trait.tissue_pvalue))
-
-
- # else:
- # trait.tissue_corr = None
- # trait.tissue_pvalue = None
- #else:
- # for trait in self.correlation_results:
- # trait.tissue_corr = None
- # trait.tissue_pvalue = None
-
- #return self.correlation_results
diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index 12a064c0..73502392 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -5,7 +5,7 @@
-