From 615b861dfd05c04df2e1a753dd135b07c1d88a94 Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Wed, 19 Jun 2013 23:04:21 +0000
Subject: Moved the normalize_values function to separate file
corr_result_helpers.py
Added docstring test to normalize_values
Number of overlapping samples column now displays correctly in the
correlation results page
---
wqflask/base/trait.py | 7 -
wqflask/utility/corr_result_helpers.py | 30 ++++
wqflask/wqflask/correlation/show_corr_results.py | 183 ++---------------------
3 files changed, 44 insertions(+), 176 deletions(-)
create mode 100644 wqflask/utility/corr_result_helpers.py
(limited to 'wqflask')
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 801d32c2..3429d9c1 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -286,7 +286,6 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
trait_info = g.db.execute(query).fetchone()
- #print("trait_info is: ", pf(trait_info))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.dataset.type == 'Geno':
@@ -319,7 +318,6 @@ class GeneralTrait(object):
#XZ: assign SQL query result to trait attributes.
for i, field in enumerate(self.dataset.display_fields):
- print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i]))
setattr(self, field, trait_info[i])
if self.dataset.type == 'Publish':
@@ -329,9 +327,6 @@ class GeneralTrait(object):
self.homologeneid = None
- print("self.geneid is:", self.geneid)
- print(" type:", type(self.geneid))
- print("self.dataset.group.name is:", self.dataset.group.name)
if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
#XZ: So I have to test if geneid is number before execute the query.
@@ -356,7 +351,6 @@ class GeneralTrait(object):
InbredSet.SpeciesId = Species.Id AND
Species.TaxonomyId = Homologene.TaxonomyId
""" % (escape(str(self.geneid)), escape(self.dataset.group.name))
- print("-> query is:", query)
result = g.db.execute(query).fetchone()
#else:
# result = None
@@ -388,7 +382,6 @@ class GeneralTrait(object):
Geno.Name = '{}' and
Geno.SpeciesId = Species.Id
""".format(self.dataset.group.species, self.locus)
- print("query is:", query)
result = g.db.execute(query).fetchone()
self.locus_chr = result[0]
self.locus_mb = result[1]
diff --git a/wqflask/utility/corr_result_helpers.py b/wqflask/utility/corr_result_helpers.py
new file mode 100644
index 00000000..edf32449
--- /dev/null
+++ b/wqflask/utility/corr_result_helpers.py
@@ -0,0 +1,30 @@
+def normalize_values(a_values, b_values):
+ """
+ Trim two lists of values to contain only the values they both share
+
+ Given two lists of sample values, trim each list so that it contains
+ only the samples that contain a value in both lists. Also returns
+ the number of such samples.
+
+ >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
+ ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
+
+ """
+
+ min_length = min(len(a_values), len(b_values))
+ a_new = []
+ b_new = []
+ for counter in range(min_length):
+ if a_values[counter] and b_values[counter]:
+ a_new.append(a_values[counter])
+ b_new.append(b_values[counter])
+
+ num_overlap = len(a_new)
+ assert num_overlap == len(b_new), "Lengths should be the same"
+
+ return a_new, b_new, num_overlap
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
\ No newline at end of file
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 3b1ac87d..1410ae0c 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -20,6 +20,9 @@
from __future__ import absolute_import, print_function, division
+import sys
+sys.path.append(".")
+
import gc
import string
import cPickle
@@ -43,7 +46,7 @@ from utility.TDCell import TDCell
from base.trait import GeneralTrait
from base import data_set
from base.templatePage import templatePage
-from utility import webqtlUtil, helper_functions
+from utility import webqtlUtil, helper_functions, corr_result_helpers
from dbFunction import webqtlDatabaseFunction
import utility.webqtlUtil #this is for parallel computing only.
from wqflask.correlation import correlationFunction
@@ -122,22 +125,24 @@ class CorrelationResults(object):
self.correlation_data = {}
for trait, values in self.target_dataset.trait_data.iteritems():
- this_trait_values = []
- target_values = []
+ this_trait_vals = []
+ target_vals = []
for index, sample in enumerate(self.target_dataset.samplelist):
if sample in self.sample_data:
sample_value = self.sample_data[sample]
target_sample_value = values[index]
- this_trait_values.append(sample_value)
- target_values.append(target_sample_value)
+ this_trait_vals.append(sample_value)
+ target_vals.append(target_sample_value)
+
+ this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
+ this_trait_vals, target_vals)
- this_trait_values, target_values, num_overlap = normalize_values(this_trait_values,
- target_values)
+ print("num_overlap:", num_overlap)
if self.corr_method == 'pearson':
- sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values)
+ sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
else:
- sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values)
+ sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
@@ -940,163 +945,3 @@ class CorrelationResults(object):
return traitList
-
- def createExcelFileWithTitleAndFooter(self, workbook=None, identification=None, db=None, returnNumber=None):
-
- worksheet = workbook.add_worksheet()
-
- titleStyle = workbook.add_format(align = 'left', bold = 0, size=14, border = 1, border_color="gray")
-
- ##Write title Info
- # Modified by Hongqiang Li
- worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle)
- worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle)
- worksheet.write([2, 0], "Trait : %s" % identification, titleStyle)
- worksheet.write([3, 0], "Database : %s" % db.fullname, titleStyle)
- worksheet.write([4, 0], "Date : %s" % time.strftime("%B %d, %Y", time.gmtime()), titleStyle)
- worksheet.write([5, 0], "Time : %s GMT" % time.strftime("%H:%M ", time.gmtime()), titleStyle)
- worksheet.write([6, 0], "Status of data ownership: Possibly unpublished data; please see %s/statusandContact.html for details on sources, ownership, and usage of these data." % webqtlConfig.PORTADDR, titleStyle)
- #Write footer info
- worksheet.write([9 + returnNumber, 0], "Funding for The GeneNetwork: NIAAA (U01AA13499, U24AA13513), NIDA, NIMH, and NIAAA (P20-DA21131), NCI MMHCC (U01CA105417), and NCRR (U01NR 105417)", titleStyle)
- worksheet.write([10 + returnNumber, 0], "PLEASE RETAIN DATA SOURCE INFORMATION WHENEVER POSSIBLE", titleStyle)
-
- return worksheet
-
-
- def getTableHeaderForGeno(self, method=None, worksheet=None, newrow=None, headingStyle=None):
-
- tblobj_header = []
-
- if method in ["1","3","4"]:
- tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb"), sort=0),
- THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Record ID', idx=1),
- THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Location (Chr and Mb)', idx=2),
- THCell(HT.TD(HT.Href(
- text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"),
- target = '_blank',
- url = "/correlationAnnotation.html#genetic_r"),
- Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=3),
- THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=4),
- THCell(HT.TD(HT.Href(
- text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"),
- target = '_blank',
- url = "/correlationAnnotation.html#genetic_p_r"),
- Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=5)]]
-
- for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)', 'Sample r', 'N Cases', 'Sample p(r)']):
- worksheet.write([newrow, ncol], item, headingStyle)
- worksheet.set_column([ncol, ncol], 2*len(item))
- else:
- tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb"), sort=0),
- THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Record ID', idx=1),
- THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text='Location (Chr and Mb)', idx=2),
- THCell(HT.TD(HT.Href(
- text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"),
- target = '_blank',
- url = "/correlationAnnotation.html#genetic_rho"),
- Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=3),
- THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=4),
- THCell(HT.TD(HT.Href(
- text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"),
- target = '_blank',
- url = "/correlationAnnotation.html#genetic_p_rho"),
- Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=5)]]
-
- for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)', 'Sample rho', 'N Cases', 'Sample p(rho)']):
- worksheet.write([newrow, ncol], item, headingStyle)
- worksheet.set_column([ncol, ncol], 2*len(item))
-
-
- return tblobj_header, worksheet
-
-
- def getTableBodyForGeno(self, traitList, formName=None, worksheet=None, newrow=None, corrScript=None):
-
- tblobj_body = []
-
- for thisTrait in traitList:
- tr = []
-
- trId = str(thisTrait)
-
- corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr))
-
- tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId))
-
- tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"),align="left", Class="fs12 fwn ffl b1 c222"), text=thisTrait.name, val=thisTrait.name.upper()))
-
- #XZ: trait_location_value is used for sorting
- trait_location_repr = '--'
- trait_location_value = 1000000
-
- if thisTrait.chr and thisTrait.mb:
- try:
- trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb
- except:
- if thisTrait.chr.upper() == 'X':
- trait_location_value = 20*1000 + thisTrait.mb
- else:
- trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb
-
- trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) )
-
- tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value))
-
-
- repr='%3.3f' % thisTrait.corr
- tr.append(TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlot('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"), Class="fs12 fwn ffl b1 c222", nowrap='ON', align='right'),repr,abs(thisTrait.corr)))
-
- repr = '%d' % thisTrait.nOverlap
- tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222",align='right'),repr,thisTrait.nOverlap))
-
- repr = webqtlUtil.SciFloat(thisTrait.corrPValue)
- tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue))
-
- tblobj_body.append(tr)
-
- for ncol, item in enumerate([thisTrait.name, trait_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue]):
- worksheet.write([newrow, ncol], item)
- newrow += 1
-
- return tblobj_body, worksheet, corrScript
-
-def normalize_values(values_1, values_2):
- N = min(len(values_1), len(values_2))
- X = []
- Y = []
- for i in range(N):
- if values_1[i]!= None and values_2[i]!= None:
- X.append(values_1[i])
- Y.append(values_2[i])
- num_overlap = len(X)
-
- return (X, Y, num_overlap)
-
-
-def cal_correlation(values_1, values_2):
- N = min(len(values_1), len(values_2))
- X = []
- Y = []
- for i in range(N):
- if values_1[i]!= None and values_2[i]!= None:
- X.append(values_1[i])
- Y.append(values_2[i])
- NN = len(X)
- if NN <6:
- return (0.0,NN)
- sx = reduce(lambda x,y:x+y,X,0.0)
- sy = reduce(lambda x,y:x+y,Y,0.0)
- x_mean = sx/NN
- y_mean = sy/NN
- xyd = 0.0
- sxd = 0.0
- syd = 0.0
- for i in range(NN):
- xyd += (X[i] - x_mean)*(Y[i] - y_mean)
- sxd += (X[i] - x_mean)*(X[i] - x_mean)
- syd += (Y[i] - y_mean)*(Y[i] - y_mean)
- try:
- corr = xyd/(sqrt(sxd)*sqrt(syd))
- except:
- corr = 0
- return (corr, NN)
--
cgit v1.2.3
From d95eff3f0410ec5cddd82398f6db0fffa5d9be0b Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Thu, 27 Jun 2013 21:49:58 +0000
Subject: Fixed N Cases column on correlation results page
---
wqflask/wqflask/correlation/show_corr_results.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'wqflask')
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 1410ae0c..03059d68 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -157,7 +157,7 @@ class CorrelationResults(object):
trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
trait_object.sample_r = self.correlation_data[trait][0]
trait_object.sample_p = self.correlation_data[trait][1]
- trait_object_num_overlap = self.correlation_data[trait][2]
+ trait_object.num_overlap = self.correlation_data[trait][2]
self.correlation_results.append(trait_object)
#self.correlation_data_slice[trait] = self.correlation_data[trait]
--
cgit v1.2.3
From ad4a6f4d12df9a00f2a9b925f1147a26c6ee0227 Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Fri, 5 Jul 2013 16:25:13 +0000
Subject: Change Correlation table value format
---
wqflask/wqflask/templates/correlation_page.html | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
(limited to 'wqflask')
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index efbf689c..53b12545 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -41,13 +41,13 @@
{{ trait.symbol }} |
{{ trait.alias }} |
{{ trait.description }} |
- Chr{{ trait.chr }}: {{ trait.mb }} |
- {{ trait.mean }} |
- {{ trait.lrs }} |
- Chr{{ trait.locus_chr }}: {{ trait.locus_mb }} |
- {{ trait.sample_r }} |
+ Chr{{ trait.chr }}:{{'%0.6f'|format(trait.mb)}} |
+ {{'%0.3f'|format(trait.mean)}} |
+ {{'%0.3f'|format(trait.lrs)}} |
+ Chr{{ trait.locus_chr }}:{{'%0.6f'|format(trait.locus_mb)}} |
+ {{'%0.3f'|format(trait.sample_r)}} |
{{ trait.num_overlap }} |
- {{ trait.sample_p }} |
+ {{'%0.3e'|format(trait.sample_p)}} |
{% endfor %}
--
cgit v1.2.3
From 0feeb303cb1874cffd6e20f2758dcd578247bd54 Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Thu, 11 Jul 2013 22:00:38 +0000
Subject: Began changing the style of the code related to the tissue
correlation column of the correlation page
---
wqflask/wqflask/correlation/show_corr_results.py | 125 ++++++++++++++---------
1 file changed, 74 insertions(+), 51 deletions(-)
(limited to 'wqflask')
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 11eca936..4a9aea73 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -52,6 +52,8 @@ import utility.webqtlUtil #this is for parallel computing only.
from wqflask.correlation import correlationFunction
from utility.benchmark import Bench
+from MySQLdb import escape_string as escape
+
from pprint import pformat as pf
METHOD_SAMPLE_PEARSON = "1"
@@ -101,13 +103,14 @@ class CorrelationResults(object):
self.sample_data = {}
self.corr_method = start_vars['corr_sample_method']
+ self.return_number = 500
#The two if statements below append samples to the sample list based upon whether the user
#rselected Primary Samples Only, Other Samples Only, or All Samples
primary_samples = (self.dataset.group.parlist +
- self.dataset.group.f1list +
- self.dataset.group.samplelist)
+ self.dataset.group.f1list +
+ self.dataset.group.samplelist)
#If either BXD/whatever Only or All Samples, append all of that group's samplelist
if corr_samples_group != 'samples_other':
@@ -153,7 +156,7 @@ class CorrelationResults(object):
#self.correlation_data_slice = collections.OrderedDict()
- for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
+ for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
trait_object.sample_r = self.correlation_data[trait][0]
trait_object.sample_p = self.correlation_data[trait][1]
@@ -203,13 +206,6 @@ class CorrelationResults(object):
# mb = trait_object.mb
# )
-
-
- #trait_list = self.getTissueCorrelationByList( primary_trait_symbol = self.this_trait.symbol,
- # corr_results = self.correlation_results,
- # TissueProbeSetFreezeId = 1,
- # method=1)
-
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
#self.target_db_name = start_vars['corr_dataset']
@@ -531,9 +527,13 @@ class CorrelationResults(object):
#XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
#XZ, 09/24/2008: Note that the correlation value can be negative.
- def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0):
+ def get_temp_tissue_corr_table(self,
+ tissue_probesetfreeze_id=0,
+ method="",
+ return_number=0):
+
- def cmpTissCorrAbsoluteValue(A, B):
+ def cmp_tisscorr_absolute_value(A, B):
try:
if abs(A[1]) < abs(B[1]): return 1
elif abs(A[1]) == abs(B[1]):
@@ -542,26 +542,28 @@ class CorrelationResults(object):
except:
return 0
- symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method)
+ symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_all_tissue_trait(
+ tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+ method=method)
- symbolCorrList = symbolCorrDict.items()
+ symbol_corr_list = symbol_corr_dict.items()
- symbolCorrList.sort(cmpTissCorrAbsoluteValue)
- symbolCorrList = symbolCorrList[0 : 2*returnNumber]
+ symbol_corr_list.sort(cmp_tisscorr_absolute_value)
+ symbol_corr_list = symbol_corr_list[0 : 2*return_number]
- tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE")
+ tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
- q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName
+ q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
self.cursor.execute(q1)
- for one_pair in symbolCorrList:
+ for one_pair in symbol_corr_list:
one_symbol = one_pair[0]
one_corr = one_pair[1]
- one_p_value = symbolPvalueDict[one_symbol]
+ one_p_value = symbol_pvalue_dict[one_symbol]
self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
- return tmpTableName
+ return tmp_table_name
#XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it.
@@ -591,37 +593,47 @@ class CorrelationResults(object):
return litCorrDict
+ def fetch_tissue_correlations(self, method=""):
+ """Comments Possibly Out of Date!!!!!
+
+
+ Uses getTempTissueCorrTable to generate table of tissue correlations.
+
+ This function then gathers that data and pairs it with the TraitID string.
+ Takes as its arguments a formdata instance, and a database instance.
+ Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
+ for the requested correlation
+
+ """
- #XZ, 01/09/2009: Xiaodong created this function.
- def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0):
- """Uses getTempTissueCorrTable to generate table of tissue correlations. This function then gathers that data and
- pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance.
- Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation"""
-
-
- tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber)
+ # table name string
+ temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+ method=method)
- query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" % (tempTable, tempTable)
- query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)'
- query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
- query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable)
+ query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
+ FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
+ WHERE ProbeSetFreeze.Name = '{}'
+ and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSet.Symbol IS NOT NULL
+ and {}.Correlation IS NOT NULL""".format(dataset.mescape(
+ temp_table, temp_table, temp_table, temp_table,
+ self.dataset.name, temp_table))
- self.cursor.execute(query)
- results = self.cursor.fetchall()
+ results = g.db.execute(query).fetchall()
- tissueCorrDict = {}
+ tissue_corr_dict = {}
for entry in results:
- traitName, tissueCorr, tissuePValue = entry
- tissueCorrDict[traitName] = (tissueCorr, tissuePValue)
-
- self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable)
+ trait_name, tissue_corr, tissue_pvalue = entry
+ tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
- return tissueCorrDict
+ g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))
+ return tissue_corr_dict
- #XZ, 01/13/2008
def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None):
tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE")
@@ -672,7 +684,7 @@ class CorrelationResults(object):
use_tissue_corr = False
if self.method in TISSUE_METHODS:
- tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber)
+ tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number)
use_tissue_corr = True
DatabaseFileName = self.getFileName( target_db_name=self.target_db_name )
@@ -897,20 +909,31 @@ class CorrelationResults(object):
return trait_list
"""
- def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None):
+ def calculate_corr_all_tissue_trait(self, tissue_probesetfreeze_id=None, method=None):
- symbolCorrDict = {}
- symbolPvalueDict = {}
+ symbol_corr_dict = {}
+ symbol_pvalue_dict = {}
- primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
- primaryTraitValue = primaryTraitSymbolValueDict.values()[0]
+ primary_trait_symbol_value_dict = correlation_function.
+ get_genesymbol_tissue_value_dict_trait(cursor=self.cursor,
+ GeneNameLst=[primaryTraitSymbol],
+ TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+ primary_trait_value = primary_trait_symbol_value_dict.values()[0]
- SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+ symbol_value_dict = correlation_function.get_genesymbol_tissue_value_dict_trait(
+ cursor=self.cursor,
+ gene_name_list=[],
+ tissue_probeSetfreeze_id=TISSUE_MOUSE_DB)
if method in ["2","5"]:
- symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman')
+ symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict,
+ method='spearman')
else:
- symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict)
+ symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict)
return (symbolCorrDict, symbolPvalueDict)
--
cgit v1.2.3
From 155e2997613c0750de30b734686f8977524956f9 Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Fri, 12 Jul 2013 22:58:39 +0000
Subject: Rewrote code related to getting the tissue correlation column to
display
Created new files for mrna assay tissue data and commonly used
db query related functions
---
wqflask/base/mrna_assay_tissue_data.py | 134 +++
wqflask/utility/db_tools.py | 15 +
wqflask/wqflask/correlation/correlationFunction.py | 923 --------------------
.../wqflask/correlation/correlation_functions.py | 959 +++++++++++++++++++++
wqflask/wqflask/correlation/show_corr_results.py | 76 +-
5 files changed, 1151 insertions(+), 956 deletions(-)
create mode 100644 wqflask/base/mrna_assay_tissue_data.py
create mode 100644 wqflask/utility/db_tools.py
delete mode 100644 wqflask/wqflask/correlation/correlationFunction.py
create mode 100644 wqflask/wqflask/correlation/correlation_functions.py
(limited to 'wqflask')
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
new file mode 100644
index 00000000..8ae71858
--- /dev/null
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import, print_function, division
+
+import collections
+
+from flask import g
+
+from utility import dbtools
+from uitility import Bunch
+
+from MySQLdb import escape_string as escape
+
+class MrnaAssayTissueData(object):
+
+ def __init__(self, gene_symbols=None):
+ self.gene_symbols = gene_symbols
+ self.have_data = False
+ if self.gene_symbols == None:
+ self.gene_symbols = []
+
+ self.data = collections.defaultdict(Bunch)
+
+ #self.gene_id_dict ={}
+ #self.data_id_dict = {}
+ #self.chr_dict = {}
+ #self.mb_dict = {}
+ #self.desc_dict = {}
+ #self.probe_target_desc_dict = {}
+
+ query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
+ from (
+ select Symbol, max(Mean) as maxmean
+ from TissueProbeSetXRef
+ where TissueProbeSetFreezeId=1 and '''
+
+ # Note that inner join is necessary in this query to get distinct record in one symbol group
+ # with highest mean value
+ # Due to the limit size of TissueProbeSetFreezeId table in DB,
+ # performance of inner join is acceptable.
+ if len(gene_symbols) == 0:
+ query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''
+ else:
+ in_clause = dbtools.create_in_clause(gene_symbols)
+
+ query += ''' Symbol in {} group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''.format(in_clause)
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ symbol = item[0]
+ gene_symbols.append(symbol)
+ symbol = symbol.lower()
+
+ self.data[symbol].gene_id = result.GeneId
+ self.data[symbol].data_id = result.DataId
+ self.data[symbol].chr = result.Chr
+ self.data[symbol].mb = result.Mb
+ self.data[symbol].description = result.description
+ self.data[symbol].probe_target_description = result.Probe_Target_Description
+
+
+ ###########################################################################
+ #Input: cursor, symbolList (list), dataIdDict(Dict)
+ #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+ # key is symbol, value is one list of expression values of one probeSet;
+ #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+ #Attention! All keys are lower case!
+ ###########################################################################
+ def get_symbol_value_pairs(self):
+
+ id_list = [self.tissue_data[symbol.lower()].data_id for item in self.tissue_data]
+
+ symbol_value_pairs = {}
+ value_list=[]
+
+ query = """SELECT value, id
+ FROM TissueProbeSetData
+ WHERE Id IN {}""".format(create_in_clause(id_list))
+
+ try :
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ value_list.append(result.value)
+ symbol_value_pairs[symbol] = value_list
+ except:
+ symbol_value_pairs[symbol] = None
+
+ #for symbol in symbol_list:
+ # if tissue_data.has_key(symbol):
+ # data_id = tissue_data[symbol].data_id
+ #
+ # query = """select value, id
+ # from TissueProbeSetData
+ # where Id={}""".format(escape(data_id))
+ # try :
+ # results = g.db.execute(query).fetchall()
+ # for item in results:
+ # item = item[0]
+ # value_list.append(item)
+ # symbol_value_pairs[symbol] = value_list
+ # value_list=[]
+ # except:
+ # symbol_value_pairs[symbol] = None
+
+ return symbol_value_pairs
+
+ ########################################################################################################
+ #input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+ #output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+ # key is symbol, value is one list of expression values of one probeSet.
+ #function: wrapper function for getSymbolValuePairDict function
+ # build gene symbol list if necessary, cut it into small lists if necessary,
+ # then call getSymbolValuePairDict function and merge the results.
+ ########################################################################################################
+
+ def get_trait_symbol_and_tissue_values(symbol_list=None):
+ tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+
+ #symbolList,
+ #geneIdDict,
+ #dataIdDict,
+ #ChrDict,
+ #MbDict,
+ #descDict,
+ #pTargetDescDict = getTissueProbeSetXRefInfo(
+ # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+
+ if len(tissue_data.gene_symbols):
+ return get_symbol_value_pairs(tissue_data)
+
diff --git a/wqflask/utility/db_tools.py b/wqflask/utility/db_tools.py
new file mode 100644
index 00000000..4034f39c
--- /dev/null
+++ b/wqflask/utility/db_tools.py
@@ -0,0 +1,15 @@
+from __future__ import absolute_import, print_function, division
+
+from MySQLdb import escape_string as escape
+
+def create_in_clause(items):
+ """Create an in clause for mysql"""
+ in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+ in_clause = '( {} )'.format(in_clause)
+ return in_clause
+
+def mescape(*items):
+ """Multiple escape"""
+ escaped = [escape(str(item)) for item in items]
+ #print("escaped is:", escaped)
+ return escaped
diff --git a/wqflask/wqflask/correlation/correlationFunction.py b/wqflask/wqflask/correlation/correlationFunction.py
deleted file mode 100644
index 7d4b58a9..00000000
--- a/wqflask/wqflask/correlation/correlationFunction.py
+++ /dev/null
@@ -1,923 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by NL 2011/03/23
-
-
-import math
-#import rpy2.robjects
-import pp
-import string
-
-from utility import webqtlUtil
-from base.trait import GeneralTrait
-from dbFunction import webqtlDatabaseFunction
-
-
-
-#XZ: The input 'controls' is String. It contains the full name of control traits.
-#XZ: The input variable 'strainlst' is List. It contains the strain names of primary trait.
-#XZ: The returned tcstrains is the list of list [[],[]...]. So are tcvals and tcvars. The last returned parameter is list of numbers.
-#XZ, 03/29/2010: For each returned control trait, there is no None value in it.
-def controlStrains(controls, strainlst):
-
- controls = controls.split(',')
-
- cvals = {}
- for oneTraitName in controls:
- oneTrait = webqtlTrait(fullname=oneTraitName, cursor=webqtlDatabaseFunction.getCursor() )
- oneTrait.retrieveData()
- cvals[oneTraitName] = oneTrait.data
-
- tcstrains = []
- tcvals = []
- tcvars = []
-
- for oneTraitName in controls:
- strains = []
- vals = []
- vars = []
-
- for _strain in strainlst:
- if cvals[oneTraitName].has_key(_strain):
- _val = cvals[oneTraitName][_strain].val
- if _val != None:
- strains.append(_strain)
- vals.append(_val)
- vars.append(None)
-
- tcstrains.append(strains)
- tcvals.append(vals)
- tcvars.append(vars)
-
- return tcstrains, tcvals, tcvars, [len(x) for x in tcstrains]
-
-
-
-#XZ, 03/29/2010: After execution of functon "controlStrains" and "fixStrains", primary trait and control traits have the same strains and in the same order. There is no 'None' value in them.
-def fixStrains(_strains,_controlstrains,_vals,_controlvals,_vars,_controlvars):
- """Corrects strains, vals, and vars so that all contrain only those strains common
- to the reference trait and all control traits."""
-
- def dictify(strains,vals,vars):
- subdict = {}
- for i in xrange(len(strains)):
- subdict[strains[i]] = (vals[i],vars[i])
- return subdict
-
- #XZ: The 'dicts' is a list of dictionary. The first element is the dictionary of reference trait. The rest elements are for control traits.
- dicts = []
- dicts.append(dictify(_strains,_vals,_vars))
-
- nCstrains = len(_controlstrains)
- for i in xrange(nCstrains):
- dicts.append(dictify(_controlstrains[i],_controlvals[i],_controlvars[i]))
-
- _newstrains = []
- _vals = []
- _vars = []
- _controlvals = [[] for x in xrange(nCstrains)]
- _controlvars = [[] for x in xrange(nCstrains)]
-
- for strain in _strains:
- inall = True
- for d in dicts:
- if strain not in d:
- inall = False
- break
- if inall:
- _newstrains.append(strain)
- _vals.append(dicts[0][strain][0])
- _vars.append(dicts[0][strain][1])
- for i in xrange(nCstrains):
- _controlvals[i].append(dicts[i+1][strain][0])
- _controlvars[i].append(dicts[i+1][strain][1])
-
- return _newstrains, _vals, _controlvals, _vars, _controlvars
-
-
-#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty.
-#else, the returned list has two elements of control trait name.
-def findIdenticalControlTraits ( controlVals, controlNames ):
- nameOfIdenticalTraits = []
-
- controlTraitNumber = len(controlVals)
-
- if controlTraitNumber > 1:
-
- #XZ: reset the precision of values and convert to string type
- for oneTraitVal in controlVals:
- for oneStrainVal in oneTraitVal:
- oneStrainVal = '%.3f' % oneStrainVal
-
- for i, oneTraitVal in enumerate( controlVals ):
- for j in range(i+1, controlTraitNumber):
- if oneTraitVal == controlVals[j]:
- nameOfIdenticalTraits.append(controlNames[i])
- nameOfIdenticalTraits.append(controlNames[j])
-
- return nameOfIdenticalTraits
-
-#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty.
-#else, the returned list has two elements of control trait name.
-#primaryVal is of list type. It contains value of primary trait.
-#primaryName is of string type.
-#controlVals is of list type. Each element is list too. Each element contain value of one control trait.
-#controlNames is of list type.
-def findIdenticalTraits (primaryVal, primaryName, controlVals, controlNames ):
- nameOfIdenticalTraits = []
-
- #XZ: reset the precision of values and convert to string type
- for oneStrainVal in primaryVal:
- oneStrainVal = '%.3f' % oneStrainVal
-
- for oneTraitVal in controlVals:
- for oneStrainVal in oneTraitVal:
- oneStrainVal = '%.3f' % oneStrainVal
-
- controlTraitNumber = len(controlVals)
-
- if controlTraitNumber > 1:
- for i, oneTraitVal in enumerate( controlVals ):
- for j in range(i+1, controlTraitNumber):
- if oneTraitVal == controlVals[j]:
- nameOfIdenticalTraits.append(controlNames[i])
- nameOfIdenticalTraits.append(controlNames[j])
- break
-
- if len(nameOfIdenticalTraits) == 0:
- for i, oneTraitVal in enumerate( controlVals ):
- if primaryVal == oneTraitVal:
- nameOfIdenticalTraits.append(primaryName)
- nameOfIdenticalTraits.append(controlNames[i])
- break
-
- return nameOfIdenticalTraits
-
-
-
-#XZ, 03/29/2010: The strains in primaryVal, controlVals, targetVals must be of the same number and in same order.
-#XZ: No value in primaryVal and controlVals could be None.
-
-def determinePartialsByR (primaryVal, controlVals, targetVals, targetNames, method='p'):
-
- def compute_partial ( primaryVal, controlVals, targetVals, targetNames, method ):
-
- rpy2.robjects.r("""
-pcor.test <- function(x,y,z,use="mat",method="p",na.rm=T){
- # The partial correlation coefficient between x and y given z
- #
- # pcor.test is free and comes with ABSOLUTELY NO WARRANTY.
- #
- # x and y should be vectors
- #
- # z can be either a vector or a matrix
- #
- # use: There are two methods to calculate the partial correlation coefficient.
- # One is by using variance-covariance matrix ("mat") and the other is by using recursive formula ("rec").
- # Default is "mat".
- #
- # method: There are three ways to calculate the correlation coefficient,
- # which are Pearson's ("p"), Spearman's ("s"), and Kendall's ("k") methods.
- # The last two methods which are Spearman's and Kendall's coefficient are based on the non-parametric analysis.
- # Default is "p".
- #
- # na.rm: If na.rm is T, then all the missing samples are deleted from the whole dataset, which is (x,y,z).
- # If not, the missing samples will be removed just when the correlation coefficient is calculated.
- # However, the number of samples for the p-value is the number of samples after removing
- # all the missing samples from the whole dataset.
- # Default is "T".
-
- x <- c(x)
- y <- c(y)
- z <- as.data.frame(z)
-
- if(use == "mat"){
- p.use <- "Var-Cov matrix"
- pcor = pcor.mat(x,y,z,method=method,na.rm=na.rm)
- }else if(use == "rec"){
- p.use <- "Recursive formula"
- pcor = pcor.rec(x,y,z,method=method,na.rm=na.rm)
- }else{
- stop("use should be either rec or mat!\n")
- }
-
- # print the method
- if(gregexpr("p",method)[[1]][1] == 1){
- p.method <- "Pearson"
- }else if(gregexpr("s",method)[[1]][1] == 1){
- p.method <- "Spearman"
- }else if(gregexpr("k",method)[[1]][1] == 1){
- p.method <- "Kendall"
- }else{
- stop("method should be pearson or spearman or kendall!\n")
- }
-
- # sample number
- n <- dim(na.omit(data.frame(x,y,z)))[1]
-
- # given variables' number
- gn <- dim(z)[2]
-
- # p-value
- if(p.method == "Kendall"){
- statistic <- pcor/sqrt(2*(2*(n-gn)+5)/(9*(n-gn)*(n-1-gn)))
- p.value <- 2*pnorm(-abs(statistic))
-
- }else{
- statistic <- pcor*sqrt((n-2-gn)/(1-pcor^2))
- p.value <- 2*pnorm(-abs(statistic))
- }
-
- data.frame(estimate=pcor,p.value=p.value,statistic=statistic,n=n,gn=gn,Method=p.method,Use=p.use)
-}
-
-# By using var-cov matrix
-pcor.mat <- function(x,y,z,method="p",na.rm=T){
-
- x <- c(x)
- y <- c(y)
- z <- as.data.frame(z)
-
- if(dim(z)[2] == 0){
- stop("There should be given data\n")
- }
-
- data <- data.frame(x,y,z)
-
- if(na.rm == T){
- data = na.omit(data)
- }
-
- xdata <- na.omit(data.frame(data[,c(1,2)]))
- Sxx <- cov(xdata,xdata,m=method)
-
- xzdata <- na.omit(data)
- xdata <- data.frame(xzdata[,c(1,2)])
- zdata <- data.frame(xzdata[,-c(1,2)])
- Sxz <- cov(xdata,zdata,m=method)
-
- zdata <- na.omit(data.frame(data[,-c(1,2)]))
- Szz <- cov(zdata,zdata,m=method)
-
- # is Szz positive definite?
- zz.ev <- eigen(Szz)$values
- if(min(zz.ev)[1]<0){
- stop("\'Szz\' is not positive definite!\n")
- }
-
- # partial correlation
- Sxx.z <- Sxx - Sxz %*% solve(Szz) %*% t(Sxz)
-
- rxx.z <- cov2cor(Sxx.z)[1,2]
-
- rxx.z
-}
-
-# By using recursive formula
-pcor.rec <- function(x,y,z,method="p",na.rm=T){
- #
-
- x <- c(x)
- y <- c(y)
- z <- as.data.frame(z)
-
- if(dim(z)[2] == 0){
- stop("There should be given data\n")
- }
-
- data <- data.frame(x,y,z)
-
- if(na.rm == T){
- data = na.omit(data)
- }
-
- # recursive formula
- if(dim(z)[2] == 1){
- tdata <- na.omit(data.frame(data[,1],data[,2]))
- rxy <- cor(tdata[,1],tdata[,2],m=method)
-
- tdata <- na.omit(data.frame(data[,1],data[,-c(1,2)]))
- rxz <- cor(tdata[,1],tdata[,2],m=method)
-
- tdata <- na.omit(data.frame(data[,2],data[,-c(1,2)]))
- ryz <- cor(tdata[,1],tdata[,2],m=method)
-
- rxy.z <- (rxy - rxz*ryz)/( sqrt(1-rxz^2)*sqrt(1-ryz^2) )
-
- return(rxy.z)
- }else{
- x <- c(data[,1])
- y <- c(data[,2])
- z0 <- c(data[,3])
- zc <- as.data.frame(data[,-c(1,2,3)])
-
- rxy.zc <- pcor.rec(x,y,zc,method=method,na.rm=na.rm)
- rxz0.zc <- pcor.rec(x,z0,zc,method=method,na.rm=na.rm)
- ryz0.zc <- pcor.rec(y,z0,zc,method=method,na.rm=na.rm)
-
- rxy.z <- (rxy.zc - rxz0.zc*ryz0.zc)/( sqrt(1-rxz0.zc^2)*sqrt(1-ryz0.zc^2) )
- return(rxy.z)
- }
-}
-""")
-
- R_pcorr_function = rpy2.robjects.r['pcor.test']
- R_corr_test = rpy2.robjects.r['cor.test']
-
- primary = rpy2.robjects.FloatVector(range(len(primaryVal)))
- for i in range(len(primaryVal)):
- primary[i] = primaryVal[i]
-
- control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(controlVals[0])) ), ncol=len(controlVals))
- for i in range(len(controlVals)):
- for j in range(len(controlVals[0])):
- control[i*len(controlVals[0]) + j] = controlVals[i][j]
-
- allcorrelations = []
-
- for targetIndex, oneTargetVals in enumerate(targetVals):
-
- this_primary = None
- this_control = None
- this_target = None
-
- if None in oneTargetVals:
-
- goodIndex = []
- for i in range(len(oneTargetVals)):
- if oneTargetVals[i] != None:
- goodIndex.append(i)
-
- this_primary = rpy2.robjects.FloatVector(range(len(goodIndex)))
- for i in range(len(goodIndex)):
- this_primary[i] = primaryVal[goodIndex[i]]
-
- this_control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(goodIndex)) ), ncol=len(controlVals))
- for i in range(len(controlVals)):
- for j in range(len(goodIndex)):
- this_control[i*len(goodIndex) + j] = controlVals[i][goodIndex[j]]
-
- this_target = rpy2.robjects.FloatVector(range(len(goodIndex)))
- for i in range(len(goodIndex)):
- this_target[i] = oneTargetVals[goodIndex[i]]
-
- else:
- this_primary = primary
- this_control = control
- this_target = rpy2.robjects.FloatVector(range(len(oneTargetVals)))
- for i in range(len(oneTargetVals)):
- this_target[i] = oneTargetVals[i]
-
- one_name = targetNames[targetIndex]
- one_N = len(this_primary)
-
- #calculate partial correlation
- one_pc_coefficient = 'NA'
- one_pc_p = 1
-
- try:
- if method == 's':
- result = R_pcorr_function(this_primary, this_target, this_control, method='s')
- else:
- result = R_pcorr_function(this_primary, this_target, this_control)
-
- #XZ: In very few cases, the returned coefficient is nan.
- #XZ: One way to detect nan is to compare the number to itself. NaN is always != NaN
- if result[0][0] == result[0][0]:
- one_pc_coefficient = result[0][0]
- #XZ: when the coefficient value is 1 (primary trait and target trait are the same),
- #XZ: occationally, the returned p value is nan instead of 0.
- if result[1][0] == result[1][0]:
- one_pc_p = result[1][0]
- elif abs(one_pc_coefficient - 1) < 0.0000001:
- one_pc_p = 0
- except:
- pass
-
- #calculate zero order correlation
- one_corr_coefficient = 0
- one_corr_p = 1
-
- try:
- if method == 's':
- R_result = R_corr_test(this_primary, this_target, method='spearman')
- else:
- R_result = R_corr_test(this_primary, this_target)
-
- one_corr_coefficient = R_result[3][0]
- one_corr_p = R_result[2][0]
- except:
- pass
-
- traitinfo = [ one_name, one_N, one_pc_coefficient, one_pc_p, one_corr_coefficient, one_corr_p ]
-
- allcorrelations.append(traitinfo)
-
- return allcorrelations
- #End of function compute_partial
-
-
- allcorrelations = []
-
- target_trait_number = len(targetVals)
-
- if target_trait_number < 1000:
- allcorrelations = compute_partial ( primaryVal, controlVals, targetVals, targetNames, method )
- else:
- step = 1000
- job_number = math.ceil( float(target_trait_number)/step )
-
- job_targetVals_lists = []
- job_targetNames_lists = []
-
- for job_index in range( int(job_number) ):
- starti = job_index*step
- endi = min((job_index+1)*step, target_trait_number)
-
- one_job_targetVals_list = []
- one_job_targetNames_list = []
-
- for i in range( starti, endi ):
- one_job_targetVals_list.append( targetVals[i] )
- one_job_targetNames_list.append( targetNames[i] )
-
- job_targetVals_lists.append( one_job_targetVals_list )
- job_targetNames_lists.append( one_job_targetNames_list )
-
- ppservers = ()
- # Creates jobserver with automatically detected number of workers
- job_server = pp.Server(ppservers=ppservers)
-
- jobs = []
- results = []
-
- for i, one_job_targetVals_list in enumerate( job_targetVals_lists ):
- one_job_targetNames_list = job_targetNames_lists[i]
- #pay attention to modules from outside
- jobs.append( job_server.submit(func=compute_partial, args=( primaryVal, controlVals, one_job_targetVals_list, one_job_targetNames_list, method), depfuncs=(), modules=("rpy2.robjects",)) )
-
- for one_job in jobs:
- one_result = one_job()
- results.append( one_result )
-
- for one_result in results:
- for one_traitinfo in one_result:
- allcorrelations.append( one_traitinfo )
-
- return allcorrelations
-
-
-
-#XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait
-#XZ: The primaryTrait and targetTrait should have executed retrieveData function
-def calZeroOrderCorr (primaryTrait, targetTrait, method='pearson'):
-
- #primaryTrait.retrieveData()
-
- #there is no None value in primary_val
- primary_strain, primary_val, primary_var = primaryTrait.exportInformative()
-
- #targetTrait.retrieveData()
-
- #there might be None value in target_val
- target_val = targetTrait.exportData(primary_strain, type="val")
-
- R_primary = rpy2.robjects.FloatVector(range(len(primary_val)))
- for i in range(len(primary_val)):
- R_primary[i] = primary_val[i]
-
- N = len(target_val)
-
- if None in target_val:
- goodIndex = []
- for i in range(len(target_val)):
- if target_val[i] != None:
- goodIndex.append(i)
-
- N = len(goodIndex)
-
- R_primary = rpy2.robjects.FloatVector(range(len(goodIndex)))
- for i in range(len(goodIndex)):
- R_primary[i] = primary_val[goodIndex[i]]
-
- R_target = rpy2.robjects.FloatVector(range(len(goodIndex)))
- for i in range(len(goodIndex)):
- R_target[i] = target_val[goodIndex[i]]
-
- else:
- R_target = rpy2.robjects.FloatVector(range(len(target_val)))
- for i in range(len(target_val)):
- R_target[i] = target_val[i]
-
- R_corr_test = rpy2.robjects.r['cor.test']
-
- if method == 'spearman':
- R_result = R_corr_test(R_primary, R_target, method='spearman')
- else:
- R_result = R_corr_test(R_primary, R_target)
-
- corr_result = []
- corr_result.append( R_result[3][0] )
- corr_result.append( N )
- corr_result.append( R_result[2][0] )
-
- return corr_result
-
-#####################################################################################
-#Input: primaryValue(list): one list of expression values of one probeSet,
-# targetValue(list): one list of expression values of one probeSet,
-# method(string): indicate correlation method ('pearson' or 'spearman')
-#Output: corr_result(list): first item is Correlation Value, second item is tissue number,
-# third item is PValue
-#Function: get correlation value,Tissue quantity ,p value result by using R;
-#Note : This function is special case since both primaryValue and targetValue are from
-#the same dataset. So the length of these two parameters is the same. They are pairs.
-#Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on
-#the same tissue order
-#####################################################################################
-
-def calZeroOrderCorrForTiss (primaryValue=[], targetValue=[], method='pearson'):
-
- R_primary = rpy2.robjects.FloatVector(range(len(primaryValue)))
- N = len(primaryValue)
- for i in range(len(primaryValue)):
- R_primary[i] = primaryValue[i]
-
- R_target = rpy2.robjects.FloatVector(range(len(targetValue)))
- for i in range(len(targetValue)):
- R_target[i]=targetValue[i]
-
- R_corr_test = rpy2.robjects.r['cor.test']
- if method =='spearman':
- R_result = R_corr_test(R_primary, R_target, method='spearman')
- else:
- R_result = R_corr_test(R_primary, R_target)
-
- corr_result =[]
- corr_result.append( R_result[3][0])
- corr_result.append( N )
- corr_result.append( R_result[2][0])
-
- return corr_result
-
-
-
-
-def batchCalTissueCorr(primaryTraitValue=[], SymbolValueDict={}, method='pearson'):
-
- def cal_tissue_corr(primaryTraitValue, oneSymbolValueDict, method ):
-
- oneSymbolCorrDict = {}
- oneSymbolPvalueDict = {}
-
- R_corr_test = rpy2.robjects.r['cor.test']
-
- R_primary = rpy2.robjects.FloatVector(range(len(primaryTraitValue)))
-
- for i in range(len(primaryTraitValue)):
- R_primary[i] = primaryTraitValue[i]
-
- for (oneTraitSymbol, oneTraitValue) in oneSymbolValueDict.iteritems():
- R_target = rpy2.robjects.FloatVector(range(len(oneTraitValue)))
- for i in range(len(oneTraitValue)):
- R_target[i] = oneTraitValue[i]
-
- if method =='spearman':
- R_result = R_corr_test(R_primary, R_target, method='spearman')
- else:
- R_result = R_corr_test(R_primary, R_target)
-
- oneSymbolCorrDict[oneTraitSymbol] = R_result[3][0]
- oneSymbolPvalueDict[oneTraitSymbol] = R_result[2][0]
-
- return(oneSymbolCorrDict, oneSymbolPvalueDict)
-
-
-
- symbolCorrDict = {}
- symbolPvalueDict = {}
-
- items_number = len(SymbolValueDict)
-
- if items_number <= 1000:
- symbolCorrDict, symbolPvalueDict = cal_tissue_corr(primaryTraitValue, SymbolValueDict, method)
- else:
- items_list = SymbolValueDict.items()
-
- step = 1000
- job_number = math.ceil( float(items_number)/step )
-
- job_oneSymbolValueDict_list = []
-
- for job_index in range( int(job_number) ):
- starti = job_index*step
- endi = min((job_index+1)*step, items_number)
-
- oneSymbolValueDict = {}
-
- for i in range( starti, endi ):
- one_item = items_list[i]
- one_symbol = one_item[0]
- one_value = one_item[1]
- oneSymbolValueDict[one_symbol] = one_value
-
- job_oneSymbolValueDict_list.append( oneSymbolValueDict )
-
-
- ppservers = ()
- # Creates jobserver with automatically detected number of workers
- job_server = pp.Server(ppservers=ppservers)
-
- jobs = []
- results = []
-
- for i, oneSymbolValueDict in enumerate( job_oneSymbolValueDict_list ):
-
- #pay attention to modules from outside
- jobs.append( job_server.submit(func=cal_tissue_corr, args=(primaryTraitValue, oneSymbolValueDict, method), depfuncs=(), modules=("rpy2.robjects",)) )
-
- for one_job in jobs:
- one_result = one_job()
- results.append( one_result )
-
- for one_result in results:
- oneSymbolCorrDict, oneSymbolPvalueDict = one_result
- symbolCorrDict.update( oneSymbolCorrDict )
- symbolPvalueDict.update( oneSymbolPvalueDict )
-
- return (symbolCorrDict, symbolPvalueDict)
-
-###########################################################################
-#Input: cursor, GeneNameLst (list), TissueProbeSetFreezeId
-#output: geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict (Dict)
-#function: get multi dicts for short and long label functions, and for getSymbolValuePairDict and
-# getGeneSymbolTissueValueDict to build dict to get CorrPvArray
-#Note: If there are multiple probesets for one gene, select the one with highest mean.
-###########################################################################
-def getTissueProbeSetXRefInfo(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
- Symbols =""
- symbolList =[]
- geneIdDict ={}
- dataIdDict = {}
- ChrDict = {}
- MbDict = {}
- descDict = {}
- pTargetDescDict = {}
-
- count = len(GeneNameLst)
-
- # Added by NL 01/06/2011
- # Note that:inner join is necessary in this query to get distinct record in one symbol group with highest mean value
- # Duo to the limit size of TissueProbeSetFreezeId table in DB, performance of inner join is acceptable.
- if count==0:
- query='''
- select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description
- from (
- select Symbol, max(Mean) as maxmean
- from TissueProbeSetXRef
- where TissueProbeSetFreezeId=%s and Symbol!='' and Symbol Is Not Null group by Symbol)
- as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean;
- '''%TissueProbeSetFreezeId
-
- else:
- for i, item in enumerate(GeneNameLst):
-
- if i == count-1:
- Symbols += "'%s'" %item
- else:
- Symbols += "'%s'," %item
-
- Symbols = "("+ Symbols+")"
- query='''
- select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description
- from (
- select Symbol, max(Mean) as maxmean
- from TissueProbeSetXRef
- where TissueProbeSetFreezeId=%s and Symbol in %s group by Symbol)
- as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean;
- '''% (TissueProbeSetFreezeId,Symbols)
-
- try:
-
- cursor.execute(query)
- results =cursor.fetchall()
- resultCount = len(results)
- # Key in all dicts is the lower-cased symbol
- for i, item in enumerate(results):
- symbol = item[0]
- symbolList.append(symbol)
-
- key =symbol.lower()
- geneIdDict[key]=item[1]
- dataIdDict[key]=item[2]
- ChrDict[key]=item[3]
- MbDict[key]=item[4]
- descDict[key]=item[5]
- pTargetDescDict[key]=item[6]
-
- except:
- symbolList = None
- geneIdDict=None
- dataIdDict=None
- ChrDict=None
- MbDict=None
- descDict=None
- pTargetDescDict=None
-
- return symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict
-
-###########################################################################
-#Input: cursor, symbolList (list), dataIdDict(Dict)
-#output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
-# key is symbol, value is one list of expression values of one probeSet;
-#function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
-#Attention! All keys are lower case!
-###########################################################################
-def getSymbolValuePairDict(cursor=None,symbolList=None,dataIdDict={}):
- symbolList = map(string.lower, symbolList)
- symbolValuepairDict={}
- valueList=[]
-
- for key in symbolList:
- if dataIdDict.has_key(key):
- DataId = dataIdDict[key]
-
- valueQuery = "select value from TissueProbeSetData where Id=%s" % DataId
- try :
- cursor.execute(valueQuery)
- valueResults = cursor.fetchall()
- for item in valueResults:
- item =item[0]
- valueList.append(item)
- symbolValuepairDict[key] = valueList
- valueList=[]
- except:
- symbolValuepairDict[key] = None
-
- return symbolValuepairDict
-
-
-########################################################################################################
-#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
-#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
-# key is symbol, value is one list of expression values of one probeSet.
-#function: wrapper function for getSymbolValuePairDict function
-# build gene symbol list if necessary, cut it into small lists if necessary,
-# then call getSymbolValuePairDict function and merge the results.
-########################################################################################################
-
-def getGeneSymbolTissueValueDict(cursor=None,symbolList=None,dataIdDict={}):
- limitNum=1000
- count = len(symbolList)
-
- SymbolValuePairDict = {}
-
- if count !=0 and count <=limitNum:
- SymbolValuePairDict = getSymbolValuePairDict(cursor=cursor,symbolList=symbolList,dataIdDict=dataIdDict)
-
- elif count >limitNum:
- SymbolValuePairDict={}
- n = count/limitNum
- start =0
- stop =0
-
- for i in range(n):
- stop =limitNum*(i+1)
- gList1 = symbolList[start:stop]
- PairDict1 = getSymbolValuePairDict(cursor=cursor,symbolList=gList1,dataIdDict=dataIdDict)
- start =limitNum*(i+1)
-
- SymbolValuePairDict.update(PairDict1)
-
- if stop < count:
- stop = count
- gList2 = symbolList[start:stop]
- PairDict2 = getSymbolValuePairDict(cursor=cursor,symbolList=gList2,dataIdDict=dataIdDict)
- SymbolValuePairDict.update(PairDict2)
-
- return SymbolValuePairDict
-
-########################################################################################################
-#input: cursor, GeneNameLst (list), TissueProbeSetFreezeId(int)
-#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
-# key is symbol, value is one list of expression values of one probeSet.
-#function: wrapper function of getGeneSymbolTissueValueDict function
-# for CorrelationPage.py
-########################################################################################################
-
-def getGeneSymbolTissueValueDictForTrait(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
- SymbolValuePairDict={}
- symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = getTissueProbeSetXRefInfo(cursor=cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
- if symbolList:
- SymbolValuePairDict = getGeneSymbolTissueValueDict(cursor=cursor,symbolList=symbolList,dataIdDict=dataIdDict)
- return SymbolValuePairDict
-
-########################################################################################################
-#Input: cursor(cursor): MySQL connnection cursor;
-# priGeneSymbolList(list): one list of gene symbol;
-# symbolValuepairDict(dictionary): one dictionary of Symbol and Value Pair,
-# key is symbol, value is one list of expression values of one probeSet;
-#Output: corrArray(array): array of Correlation Value,
-# pvArray(array): array of PValue;
-#Function: build corrArray, pvArray for display by calling calculation function:calZeroOrderCorrForTiss
-########################################################################################################
-
-def getCorrPvArray(cursor=None,priGeneSymbolList=[],symbolValuepairDict={}):
- # setting initial value for corrArray, pvArray equal to 0
- Num = len(priGeneSymbolList)
-
- corrArray = [([0] * (Num))[:] for i in range(Num)]
- pvArray = [([0] * (Num))[:] for i in range(Num)]
- i = 0
- for pkey in priGeneSymbolList:
- j = 0
- pkey = pkey.strip().lower()# key in symbolValuepairDict is low case
- if symbolValuepairDict.has_key(pkey):
- priValue = symbolValuepairDict[pkey]
- for tkey in priGeneSymbolList:
- tkey = tkey.strip().lower()# key in symbolValuepairDict is low case
- if priValue and symbolValuepairDict.has_key(tkey):
- tarValue = symbolValuepairDict[tkey]
-
- if tarValue:
- if i>j:
- # corrArray stores Pearson Correlation values
- # pvArray stores Pearson P-Values
- pcorr_result =calZeroOrderCorrForTiss(primaryValue=priValue,targetValue=tarValue)
- corrArray[i][j] =pcorr_result[0]
- pvArray[i][j] =pcorr_result[2]
- elif i 1:
+
+ #XZ: reset the precision of values and convert to string type
+ for oneTraitVal in controlVals:
+ for oneStrainVal in oneTraitVal:
+ oneStrainVal = '%.3f' % oneStrainVal
+
+ for i, oneTraitVal in enumerate( controlVals ):
+ for j in range(i+1, controlTraitNumber):
+ if oneTraitVal == controlVals[j]:
+ nameOfIdenticalTraits.append(controlNames[i])
+ nameOfIdenticalTraits.append(controlNames[j])
+
+ return nameOfIdenticalTraits
+
+#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty.
+#else, the returned list has two elements of control trait name.
+#primaryVal is of list type. It contains value of primary trait.
+#primaryName is of string type.
+#controlVals is of list type. Each element is list too. Each element contain value of one control trait.
+#controlNames is of list type.
+def findIdenticalTraits (primaryVal, primaryName, controlVals, controlNames ):
+ nameOfIdenticalTraits = []
+
+ #XZ: reset the precision of values and convert to string type
+ for oneStrainVal in primaryVal:
+ oneStrainVal = '%.3f' % oneStrainVal
+
+ for oneTraitVal in controlVals:
+ for oneStrainVal in oneTraitVal:
+ oneStrainVal = '%.3f' % oneStrainVal
+
+ controlTraitNumber = len(controlVals)
+
+ if controlTraitNumber > 1:
+ for i, oneTraitVal in enumerate( controlVals ):
+ for j in range(i+1, controlTraitNumber):
+ if oneTraitVal == controlVals[j]:
+ nameOfIdenticalTraits.append(controlNames[i])
+ nameOfIdenticalTraits.append(controlNames[j])
+ break
+
+ if len(nameOfIdenticalTraits) == 0:
+ for i, oneTraitVal in enumerate( controlVals ):
+ if primaryVal == oneTraitVal:
+ nameOfIdenticalTraits.append(primaryName)
+ nameOfIdenticalTraits.append(controlNames[i])
+ break
+
+ return nameOfIdenticalTraits
+
+
+
+#XZ, 03/29/2010: The strains in primaryVal, controlVals, targetVals must be of the same number and in same order.
+#XZ: No value in primaryVal and controlVals could be None.
+
+def determinePartialsByR (primaryVal, controlVals, targetVals, targetNames, method='p'):
+
+ def compute_partial ( primaryVal, controlVals, targetVals, targetNames, method ):
+
+ rpy2.robjects.r("""
+pcor.test <- function(x,y,z,use="mat",method="p",na.rm=T){
+ # The partial correlation coefficient between x and y given z
+ #
+ # pcor.test is free and comes with ABSOLUTELY NO WARRANTY.
+ #
+ # x and y should be vectors
+ #
+ # z can be either a vector or a matrix
+ #
+ # use: There are two methods to calculate the partial correlation coefficient.
+ # One is by using variance-covariance matrix ("mat") and the other is by using recursive formula ("rec").
+ # Default is "mat".
+ #
+ # method: There are three ways to calculate the correlation coefficient,
+ # which are Pearson's ("p"), Spearman's ("s"), and Kendall's ("k") methods.
+ # The last two methods which are Spearman's and Kendall's coefficient are based on the non-parametric analysis.
+ # Default is "p".
+ #
+ # na.rm: If na.rm is T, then all the missing samples are deleted from the whole dataset, which is (x,y,z).
+ # If not, the missing samples will be removed just when the correlation coefficient is calculated.
+ # However, the number of samples for the p-value is the number of samples after removing
+ # all the missing samples from the whole dataset.
+ # Default is "T".
+
+ x <- c(x)
+ y <- c(y)
+ z <- as.data.frame(z)
+
+ if(use == "mat"){
+ p.use <- "Var-Cov matrix"
+ pcor = pcor.mat(x,y,z,method=method,na.rm=na.rm)
+ }else if(use == "rec"){
+ p.use <- "Recursive formula"
+ pcor = pcor.rec(x,y,z,method=method,na.rm=na.rm)
+ }else{
+ stop("use should be either rec or mat!\n")
+ }
+
+ # print the method
+ if(gregexpr("p",method)[[1]][1] == 1){
+ p.method <- "Pearson"
+ }else if(gregexpr("s",method)[[1]][1] == 1){
+ p.method <- "Spearman"
+ }else if(gregexpr("k",method)[[1]][1] == 1){
+ p.method <- "Kendall"
+ }else{
+ stop("method should be pearson or spearman or kendall!\n")
+ }
+
+ # sample number
+ n <- dim(na.omit(data.frame(x,y,z)))[1]
+
+ # given variables' number
+ gn <- dim(z)[2]
+
+ # p-value
+ if(p.method == "Kendall"){
+ statistic <- pcor/sqrt(2*(2*(n-gn)+5)/(9*(n-gn)*(n-1-gn)))
+ p.value <- 2*pnorm(-abs(statistic))
+
+ }else{
+ statistic <- pcor*sqrt((n-2-gn)/(1-pcor^2))
+ p.value <- 2*pnorm(-abs(statistic))
+ }
+
+ data.frame(estimate=pcor,p.value=p.value,statistic=statistic,n=n,gn=gn,Method=p.method,Use=p.use)
+}
+
+# By using var-cov matrix
+pcor.mat <- function(x,y,z,method="p",na.rm=T){
+
+ x <- c(x)
+ y <- c(y)
+ z <- as.data.frame(z)
+
+ if(dim(z)[2] == 0){
+ stop("There should be given data\n")
+ }
+
+ data <- data.frame(x,y,z)
+
+ if(na.rm == T){
+ data = na.omit(data)
+ }
+
+ xdata <- na.omit(data.frame(data[,c(1,2)]))
+ Sxx <- cov(xdata,xdata,m=method)
+
+ xzdata <- na.omit(data)
+ xdata <- data.frame(xzdata[,c(1,2)])
+ zdata <- data.frame(xzdata[,-c(1,2)])
+ Sxz <- cov(xdata,zdata,m=method)
+
+ zdata <- na.omit(data.frame(data[,-c(1,2)]))
+ Szz <- cov(zdata,zdata,m=method)
+
+ # is Szz positive definite?
+ zz.ev <- eigen(Szz)$values
+ if(min(zz.ev)[1]<0){
+ stop("\'Szz\' is not positive definite!\n")
+ }
+
+ # partial correlation
+ Sxx.z <- Sxx - Sxz %*% solve(Szz) %*% t(Sxz)
+
+ rxx.z <- cov2cor(Sxx.z)[1,2]
+
+ rxx.z
+}
+
+# By using recursive formula
+pcor.rec <- function(x,y,z,method="p",na.rm=T){
+ #
+
+ x <- c(x)
+ y <- c(y)
+ z <- as.data.frame(z)
+
+ if(dim(z)[2] == 0){
+ stop("There should be given data\n")
+ }
+
+ data <- data.frame(x,y,z)
+
+ if(na.rm == T){
+ data = na.omit(data)
+ }
+
+ # recursive formula
+ if(dim(z)[2] == 1){
+ tdata <- na.omit(data.frame(data[,1],data[,2]))
+ rxy <- cor(tdata[,1],tdata[,2],m=method)
+
+ tdata <- na.omit(data.frame(data[,1],data[,-c(1,2)]))
+ rxz <- cor(tdata[,1],tdata[,2],m=method)
+
+ tdata <- na.omit(data.frame(data[,2],data[,-c(1,2)]))
+ ryz <- cor(tdata[,1],tdata[,2],m=method)
+
+ rxy.z <- (rxy - rxz*ryz)/( sqrt(1-rxz^2)*sqrt(1-ryz^2) )
+
+ return(rxy.z)
+ }else{
+ x <- c(data[,1])
+ y <- c(data[,2])
+ z0 <- c(data[,3])
+ zc <- as.data.frame(data[,-c(1,2,3)])
+
+ rxy.zc <- pcor.rec(x,y,zc,method=method,na.rm=na.rm)
+ rxz0.zc <- pcor.rec(x,z0,zc,method=method,na.rm=na.rm)
+ ryz0.zc <- pcor.rec(y,z0,zc,method=method,na.rm=na.rm)
+
+ rxy.z <- (rxy.zc - rxz0.zc*ryz0.zc)/( sqrt(1-rxz0.zc^2)*sqrt(1-ryz0.zc^2) )
+ return(rxy.z)
+ }
+}
+""")
+
+ R_pcorr_function = rpy2.robjects.r['pcor.test']
+ R_corr_test = rpy2.robjects.r['cor.test']
+
+ primary = rpy2.robjects.FloatVector(range(len(primaryVal)))
+ for i in range(len(primaryVal)):
+ primary[i] = primaryVal[i]
+
+ control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(controlVals[0])) ), ncol=len(controlVals))
+ for i in range(len(controlVals)):
+ for j in range(len(controlVals[0])):
+ control[i*len(controlVals[0]) + j] = controlVals[i][j]
+
+ allcorrelations = []
+
+ for targetIndex, oneTargetVals in enumerate(targetVals):
+
+ this_primary = None
+ this_control = None
+ this_target = None
+
+ if None in oneTargetVals:
+
+ goodIndex = []
+ for i in range(len(oneTargetVals)):
+ if oneTargetVals[i] != None:
+ goodIndex.append(i)
+
+ this_primary = rpy2.robjects.FloatVector(range(len(goodIndex)))
+ for i in range(len(goodIndex)):
+ this_primary[i] = primaryVal[goodIndex[i]]
+
+ this_control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(goodIndex)) ), ncol=len(controlVals))
+ for i in range(len(controlVals)):
+ for j in range(len(goodIndex)):
+ this_control[i*len(goodIndex) + j] = controlVals[i][goodIndex[j]]
+
+ this_target = rpy2.robjects.FloatVector(range(len(goodIndex)))
+ for i in range(len(goodIndex)):
+ this_target[i] = oneTargetVals[goodIndex[i]]
+
+ else:
+ this_primary = primary
+ this_control = control
+ this_target = rpy2.robjects.FloatVector(range(len(oneTargetVals)))
+ for i in range(len(oneTargetVals)):
+ this_target[i] = oneTargetVals[i]
+
+ one_name = targetNames[targetIndex]
+ one_N = len(this_primary)
+
+ #calculate partial correlation
+ one_pc_coefficient = 'NA'
+ one_pc_p = 1
+
+ try:
+ if method == 's':
+ result = R_pcorr_function(this_primary, this_target, this_control, method='s')
+ else:
+ result = R_pcorr_function(this_primary, this_target, this_control)
+
+ #XZ: In very few cases, the returned coefficient is nan.
+ #XZ: One way to detect nan is to compare the number to itself. NaN is always != NaN
+ if result[0][0] == result[0][0]:
+ one_pc_coefficient = result[0][0]
+ #XZ: when the coefficient value is 1 (primary trait and target trait are the same),
+ #XZ: occationally, the returned p value is nan instead of 0.
+ if result[1][0] == result[1][0]:
+ one_pc_p = result[1][0]
+ elif abs(one_pc_coefficient - 1) < 0.0000001:
+ one_pc_p = 0
+ except:
+ pass
+
+ #calculate zero order correlation
+ one_corr_coefficient = 0
+ one_corr_p = 1
+
+ try:
+ if method == 's':
+ R_result = R_corr_test(this_primary, this_target, method='spearman')
+ else:
+ R_result = R_corr_test(this_primary, this_target)
+
+ one_corr_coefficient = R_result[3][0]
+ one_corr_p = R_result[2][0]
+ except:
+ pass
+
+ traitinfo = [ one_name, one_N, one_pc_coefficient, one_pc_p, one_corr_coefficient, one_corr_p ]
+
+ allcorrelations.append(traitinfo)
+
+ return allcorrelations
+ #End of function compute_partial
+
+
+ allcorrelations = []
+
+ target_trait_number = len(targetVals)
+
+ if target_trait_number < 1000:
+ allcorrelations = compute_partial ( primaryVal, controlVals, targetVals, targetNames, method )
+ else:
+ step = 1000
+ job_number = math.ceil( float(target_trait_number)/step )
+
+ job_targetVals_lists = []
+ job_targetNames_lists = []
+
+ for job_index in range( int(job_number) ):
+ starti = job_index*step
+ endi = min((job_index+1)*step, target_trait_number)
+
+ one_job_targetVals_list = []
+ one_job_targetNames_list = []
+
+ for i in range( starti, endi ):
+ one_job_targetVals_list.append( targetVals[i] )
+ one_job_targetNames_list.append( targetNames[i] )
+
+ job_targetVals_lists.append( one_job_targetVals_list )
+ job_targetNames_lists.append( one_job_targetNames_list )
+
+ ppservers = ()
+ # Creates jobserver with automatically detected number of workers
+ job_server = pp.Server(ppservers=ppservers)
+
+ jobs = []
+ results = []
+
+ for i, one_job_targetVals_list in enumerate( job_targetVals_lists ):
+ one_job_targetNames_list = job_targetNames_lists[i]
+ #pay attention to modules from outside
+ jobs.append( job_server.submit(func=compute_partial, args=( primaryVal, controlVals, one_job_targetVals_list, one_job_targetNames_list, method), depfuncs=(), modules=("rpy2.robjects",)) )
+
+ for one_job in jobs:
+ one_result = one_job()
+ results.append( one_result )
+
+ for one_result in results:
+ for one_traitinfo in one_result:
+ allcorrelations.append( one_traitinfo )
+
+ return allcorrelations
+
+
+
+#XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait
+#XZ: The primaryTrait and targetTrait should have executed retrieveData function
+def calZeroOrderCorr (primaryTrait, targetTrait, method='pearson'):
+
+ #primaryTrait.retrieveData()
+
+ #there is no None value in primary_val
+ primary_strain, primary_val, primary_var = primaryTrait.exportInformative()
+
+ #targetTrait.retrieveData()
+
+ #there might be None value in target_val
+ target_val = targetTrait.exportData(primary_strain, type="val")
+
+ R_primary = rpy2.robjects.FloatVector(range(len(primary_val)))
+ for i in range(len(primary_val)):
+ R_primary[i] = primary_val[i]
+
+ N = len(target_val)
+
+ if None in target_val:
+ goodIndex = []
+ for i in range(len(target_val)):
+ if target_val[i] != None:
+ goodIndex.append(i)
+
+ N = len(goodIndex)
+
+ R_primary = rpy2.robjects.FloatVector(range(len(goodIndex)))
+ for i in range(len(goodIndex)):
+ R_primary[i] = primary_val[goodIndex[i]]
+
+ R_target = rpy2.robjects.FloatVector(range(len(goodIndex)))
+ for i in range(len(goodIndex)):
+ R_target[i] = target_val[goodIndex[i]]
+
+ else:
+ R_target = rpy2.robjects.FloatVector(range(len(target_val)))
+ for i in range(len(target_val)):
+ R_target[i] = target_val[i]
+
+ R_corr_test = rpy2.robjects.r['cor.test']
+
+ if method == 'spearman':
+ R_result = R_corr_test(R_primary, R_target, method='spearman')
+ else:
+ R_result = R_corr_test(R_primary, R_target)
+
+ corr_result = []
+ corr_result.append( R_result[3][0] )
+ corr_result.append( N )
+ corr_result.append( R_result[2][0] )
+
+ return corr_result
+
+#####################################################################################
+#Input: primaryValue(list): one list of expression values of one probeSet,
+# targetValue(list): one list of expression values of one probeSet,
+# method(string): indicate correlation method ('pearson' or 'spearman')
+#Output: corr_result(list): first item is Correlation Value, second item is tissue number,
+# third item is PValue
+#Function: get correlation value,Tissue quantity ,p value result by using R;
+#Note : This function is special case since both primaryValue and targetValue are from
+#the same dataset. So the length of these two parameters is the same. They are pairs.
+#Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on
+#the same tissue order
+#####################################################################################
+
+def calZeroOrderCorrForTiss (primaryValue=[], targetValue=[], method='pearson'):
+
+ R_primary = rpy2.robjects.FloatVector(range(len(primaryValue)))
+ N = len(primaryValue)
+ for i in range(len(primaryValue)):
+ R_primary[i] = primaryValue[i]
+
+ R_target = rpy2.robjects.FloatVector(range(len(targetValue)))
+ for i in range(len(targetValue)):
+ R_target[i]=targetValue[i]
+
+ R_corr_test = rpy2.robjects.r['cor.test']
+ if method =='spearman':
+ R_result = R_corr_test(R_primary, R_target, method='spearman')
+ else:
+ R_result = R_corr_test(R_primary, R_target)
+
+ corr_result =[]
+ corr_result.append( R_result[3][0])
+ corr_result.append( N )
+ corr_result.append( R_result[2][0])
+
+ return corr_result
+
+
+
+
+def batchCalTissueCorr(primaryTraitValue=[], SymbolValueDict={}, method='pearson'):
+
+ def cal_tissue_corr(primaryTraitValue, oneSymbolValueDict, method ):
+
+ oneSymbolCorrDict = {}
+ oneSymbolPvalueDict = {}
+
+ R_corr_test = rpy2.robjects.r['cor.test']
+
+ R_primary = rpy2.robjects.FloatVector(range(len(primaryTraitValue)))
+
+ for i in range(len(primaryTraitValue)):
+ R_primary[i] = primaryTraitValue[i]
+
+ for (oneTraitSymbol, oneTraitValue) in oneSymbolValueDict.iteritems():
+ R_target = rpy2.robjects.FloatVector(range(len(oneTraitValue)))
+ for i in range(len(oneTraitValue)):
+ R_target[i] = oneTraitValue[i]
+
+ if method =='spearman':
+ R_result = R_corr_test(R_primary, R_target, method='spearman')
+ else:
+ R_result = R_corr_test(R_primary, R_target)
+
+ oneSymbolCorrDict[oneTraitSymbol] = R_result[3][0]
+ oneSymbolPvalueDict[oneTraitSymbol] = R_result[2][0]
+
+ return(oneSymbolCorrDict, oneSymbolPvalueDict)
+
+
+
+ symbolCorrDict = {}
+ symbolPvalueDict = {}
+
+ items_number = len(SymbolValueDict)
+
+ if items_number <= 1000:
+ symbolCorrDict, symbolPvalueDict = cal_tissue_corr(primaryTraitValue, SymbolValueDict, method)
+ else:
+ items_list = SymbolValueDict.items()
+
+ step = 1000
+ job_number = math.ceil( float(items_number)/step )
+
+ job_oneSymbolValueDict_list = []
+
+ for job_index in range( int(job_number) ):
+ starti = job_index*step
+ endi = min((job_index+1)*step, items_number)
+
+ oneSymbolValueDict = {}
+
+ for i in range( starti, endi ):
+ one_item = items_list[i]
+ one_symbol = one_item[0]
+ one_value = one_item[1]
+ oneSymbolValueDict[one_symbol] = one_value
+
+ job_oneSymbolValueDict_list.append( oneSymbolValueDict )
+
+
+ ppservers = ()
+ # Creates jobserver with automatically detected number of workers
+ job_server = pp.Server(ppservers=ppservers)
+
+ jobs = []
+ results = []
+
+ for i, oneSymbolValueDict in enumerate( job_oneSymbolValueDict_list ):
+
+ #pay attention to modules from outside
+ jobs.append( job_server.submit(func=cal_tissue_corr, args=(primaryTraitValue, oneSymbolValueDict, method), depfuncs=(), modules=("rpy2.robjects",)) )
+
+ for one_job in jobs:
+ one_result = one_job()
+ results.append( one_result )
+
+ for one_result in results:
+ oneSymbolCorrDict, oneSymbolPvalueDict = one_result
+ symbolCorrDict.update( oneSymbolCorrDict )
+ symbolPvalueDict.update( oneSymbolPvalueDict )
+
+ return (symbolCorrDict, symbolPvalueDict)
+
+###########################################################################
+#Input: cursor, GeneNameLst (list), TissueProbeSetFreezeId
+#output: geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict (Dict)
+#function: get multi dicts for short and long label functions, and for getSymbolValuePairDict and
+# getGeneSymbolTissueValueDict to build dict to get CorrPvArray
+#Note: If there are multiple probesets for one gene, select the one with highest mean.
+###########################################################################
+def getTissueProbeSetXRefInfo(GeneNameLst=[],TissueProbeSetFreezeId=0):
+ Symbols =""
+ symbolList =[]
+ geneIdDict ={}
+ dataIdDict = {}
+ ChrDict = {}
+ MbDict = {}
+ descDict = {}
+ pTargetDescDict = {}
+
+ count = len(GeneNameLst)
+
+ # Added by NL 01/06/2011
+ # Note that:inner join is necessary in this query to get distinct record in one symbol group with highest mean value
+ # Duo to the limit size of TissueProbeSetFreezeId table in DB, performance of inner join is acceptable.
+ if count==0:
+ query='''
+ select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description
+ from (
+ select Symbol, max(Mean) as maxmean
+ from TissueProbeSetXRef
+ where TissueProbeSetFreezeId=%s and Symbol!='' and Symbol Is Not Null group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean;
+ '''%TissueProbeSetFreezeId
+
+ else:
+ for i, item in enumerate(GeneNameLst):
+
+ if i == count-1:
+ Symbols += "'%s'" %item
+ else:
+ Symbols += "'%s'," %item
+
+ Symbols = "("+ Symbols+")"
+ query='''
+ select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description
+ from (
+ select Symbol, max(Mean) as maxmean
+ from TissueProbeSetXRef
+ where TissueProbeSetFreezeId=%s and Symbol in %s group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean;
+ '''% (TissueProbeSetFreezeId,Symbols)
+
+ try:
+ cursor.execute(query)
+ results =cursor.fetchall()
+ resultCount = len(results)
+ # Key in all dicts is the lower-cased symbol
+ for i, item in enumerate(results):
+ symbol = item[0]
+ symbolList.append(symbol)
+
+ key =symbol.lower()
+ geneIdDict[key]=item[1]
+ dataIdDict[key]=item[2]
+ ChrDict[key]=item[3]
+ MbDict[key]=item[4]
+ descDict[key]=item[5]
+ pTargetDescDict[key]=item[6]
+
+ except:
+ symbolList = None
+ geneIdDict=None
+ dataIdDict=None
+ ChrDict=None
+ MbDict=None
+ descDict=None
+ pTargetDescDict=None
+
+ return symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict
+
+###########################################################################
+#Input: cursor, symbolList (list), dataIdDict(Dict)
+#output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+# key is symbol, value is one list of expression values of one probeSet;
+#function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+#Attention! All keys are lower case!
+###########################################################################
+def get_symbol_value_pairs(tissue_data):
+
+ id_list = [tissue_data[symbol.lower()].data_id for item in tissue_data]
+
+ symbol_value_pairs = {}
+ value_list=[]
+
+ query = """SELECT value, id
+ FROM TissueProbeSetData
+ WHERE Id IN {}""".format(create_in_clause(id_list))
+
+ try :
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ value_list.append(result.value)
+ symbol_value_pairs[symbol] = value_list
+ except:
+ symbol_value_pairs[symbol] = None
+
+ #for symbol in symbol_list:
+ # if tissue_data.has_key(symbol):
+ # data_id = tissue_data[symbol].data_id
+ #
+ # query = """select value, id
+ # from TissueProbeSetData
+ # where Id={}""".format(escape(data_id))
+ # try :
+ # results = g.db.execute(query).fetchall()
+ # for item in results:
+ # item = item[0]
+ # value_list.append(item)
+ # symbol_value_pairs[symbol] = value_list
+ # value_list=[]
+ # except:
+ # symbol_value_pairs[symbol] = None
+
+ return symbol_value_pairs
+
+
+########################################################################################################
+#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+# key is symbol, value is one list of expression values of one probeSet.
+#function: wrapper function for getSymbolValuePairDict function
+# build gene symbol list if necessary, cut it into small lists if necessary,
+# then call getSymbolValuePairDict function and merge the results.
+########################################################################################################
+
+def get_trait_symbol_and_tissue_values(symbol_list=None):
+ SymbolValuePairDict={}
+
+ tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+
+ #symbolList,
+ #geneIdDict,
+ #dataIdDict,
+ #ChrDict,
+ #MbDict,
+ #descDict,
+ #pTargetDescDict = getTissueProbeSetXRefInfo(
+ # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+
+ if len(tissue_data.gene_symbols):
+ return get_symbol_value_pairs(tissue_data)
+
+ #limit_num=1000
+ #count = len(symbol_list)
+ #
+ #symbol_value_pairs = {}
+ #
+ #if count !=0 and count <= limit_num:
+ # symbol_value_pairs = getSymbolValuePairDict(cursor=cursor,symbolList=symbol_list,dataIdDict=dataIdDict)
+ #
+ #elif count > limit_num:
+ # n = count/limit_num
+ # start = 0
+ # stop = 0
+ #
+ # for i in range(n):
+ # stop =limit_num*(i+1)
+ # gList1 = symbolList[start:stop]
+ # PairDict1 = getSymbolValuePairDict(cursor=cursor,symbolList=gList1,dataIdDict=dataIdDict)
+ # start =limit_num*(i+1)
+ #
+ # SymbolValuePairDict.update(PairDict1)
+ #
+ # if stop < count:
+ # stop = count
+ # gList2 = symbolList[start:stop]
+ # PairDict2 = getSymbolValuePairDict(cursor=cursor,symbolList=gList2,dataIdDict=dataIdDict)
+ # SymbolValuePairDict.update(PairDict2)
+ #
+ #return SymbolValuePairDict
+
+########################################################################################################
+#input: cursor, GeneNameLst (list), TissueProbeSetFreezeId(int)
+#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+# key is symbol, value is one list of expression values of one probeSet.
+#function: wrapper function of getGeneSymbolTissueValueDict function
+# for CorrelationPage.py
+########################################################################################################
+
+#def get_trait_symbol_and_tissue_values(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
+# SymbolValuePairDict={}
+#
+# symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = getTissueProbeSetXRefInfo(
+# cursor=cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+#
+# if symbolList:
+# SymbolValuePairDict = get_gene_symbol_and_tissue_values(symbolList=symbolList,
+# dataIdDict=dataIdDict)
+#
+# return SymbolValuePairDict
+
+########################################################################################################
+#Input: cursor(cursor): MySQL connnection cursor;
+# priGeneSymbolList(list): one list of gene symbol;
+# symbolValuepairDict(dictionary): one dictionary of Symbol and Value Pair,
+# key is symbol, value is one list of expression values of one probeSet;
+#Output: corrArray(array): array of Correlation Value,
+# pvArray(array): array of PValue;
+#Function: build corrArray, pvArray for display by calling calculation function:calZeroOrderCorrForTiss
+########################################################################################################
+
+def getCorrPvArray(cursor=None,priGeneSymbolList=[],symbolValuepairDict={}):
+ # setting initial value for corrArray, pvArray equal to 0
+ Num = len(priGeneSymbolList)
+
+ corrArray = [([0] * (Num))[:] for i in range(Num)]
+ pvArray = [([0] * (Num))[:] for i in range(Num)]
+ i = 0
+ for pkey in priGeneSymbolList:
+ j = 0
+ pkey = pkey.strip().lower()# key in symbolValuepairDict is low case
+ if symbolValuepairDict.has_key(pkey):
+ priValue = symbolValuepairDict[pkey]
+ for tkey in priGeneSymbolList:
+ tkey = tkey.strip().lower()# key in symbolValuepairDict is low case
+ if priValue and symbolValuepairDict.has_key(tkey):
+ tarValue = symbolValuepairDict[tkey]
+
+ if tarValue:
+ if i>j:
+ # corrArray stores Pearson Correlation values
+ # pvArray stores Pearson P-Values
+ pcorr_result =calZeroOrderCorrForTiss(primaryValue=priValue,targetValue=tarValue)
+ corrArray[i][j] =pcorr_result[0]
+ pvArray[i][j] =pcorr_result[2]
+ elif i