Added tissue correlation p value to correlation page results, since it was missing before

Removed a bunch of unused coded from all correlation-related files and the ctl analysis code
author: zsloan 2018-04-11 20:39:24 +0000
committer: zsloan 2018-04-11 20:39:24 +0000
commit: 9750e63d64849d7fa9e1e681f56b73cae96905df (patch)
tree: daa743957497d92b8a3573b00405593d3454cdd0
parent: d8cec0ef94b7683f42946ce182a937484ad1034a (diff)
download: genenetwork2-9750e63d64849d7fa9e1e681f56b73cae96905df.tar.gz
8 files changed, 33 insertions, 1759 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index 53f7c16a..6fec5dcd 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -21,17 +21,8 @@ class MrnaAssayTissueData(object):
         if self.gene_symbols == None:
             self.gene_symbols = []
 
-        #print("self.gene_symbols:", self.gene_symbols)
-
         self.data = collections.defaultdict(Bunch)
 
-        #self.gene_id_dict ={}
-        #self.data_id_dict = {}
-        #self.chr_dict = {}
-        #self.mb_dict = {}
-        #self.desc_dict = {}
-        #self.probe_target_desc_dict = {}
-
         query =  '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
                         from (
                         select Symbol, max(Mean) as maxmean
@@ -51,7 +42,6 @@ class MrnaAssayTissueData(object):
             in_clause = db_tools.create_in_clause(gene_symbols)
 
             #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
-
             query += ''' Symbol in {} group by Symbol)
                 as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
                 and t.Mean = x.maxmean;
@@ -66,9 +56,7 @@ class MrnaAssayTissueData(object):
 
         for result in results:
             symbol = result[0]
-            #if symbol.lower() in [gene_symbol.lower() for gene_symbol in gene_symbols]:
             if symbol.lower() in lower_symbols:
-                #gene_symbols.append(symbol)
                 symbol = symbol.lower()
 
                 self.data[symbol].gene_id = result.GeneId
@@ -78,8 +66,6 @@ class MrnaAssayTissueData(object):
                 self.data[symbol].description = result.description
                 self.data[symbol].probe_target_description = result.Probe_Target_Description
 
-        print("self.data: ", pf(self.data))
-
     ###########################################################################
     #Input: cursor, symbolList (list), dataIdDict(Dict)
     #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
@@ -106,53 +92,4 @@ class MrnaAssayTissueData(object):
                 else:
                     symbol_values_dict[result.Symbol.lower()].append(result.value)
 
-        #for symbol in self.data:
-        #    data_id = self.data[symbol].data_id
-        #    symbol_values_dict[symbol] = self.get_tissue_values(data_id)
-
-
-        return symbol_values_dict
-
-
-    #def get_tissue_values(self, data_id):
-    #    """Gets the tissue values for a particular gene"""
-    #
-    #    tissue_values=[]
-    #
-    #    query = """SELECT value, id
-    #               FROM TissueProbeSetData
-    #               WHERE Id IN {}""".format(db_tools.create_in_clause(data_id))
-    #
-    #    #try :
-    #    results = g.db.execute(query).fetchall()
-    #    for result in results:
-    #        tissue_values.append(result.value)
-    #    #symbol_values_dict[symbol] = value_list
-    #    #except:
-    #    #    symbol_values_pairs[symbol] = None
-    #
-    #    return tissue_values
-
-########################################################################################################
-#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
-#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
-#        key is symbol, value is one list of expression values of one probeSet.
-#function: wrapper function for getSymbolValuePairDict function
-#          build gene symbol list if necessary, cut it into small lists if necessary,
-#          then call getSymbolValuePairDict function and merge the results.
-########################################################################################################
-
-#def get_trait_symbol_and_tissue_values(symbol_list=None):
-#    tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
-#
-#    #symbolList,
-#    #geneIdDict,
-#    #dataIdDict,
-#    #ChrDict,
-#    #MbDict,
-#    #descDict,
-#    #pTargetDescDict = getTissueProbeSetXRefInfo(
-#    #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-#
-#    if len(tissue_data.gene_symbols):
-#        return get_symbol_values_pairs(tissue_data)
+        return symbol_values_dict
\ No newline at end of file
diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py
index ffc698de..04035e96 100644
--- a/wqflask/wqflask/collect.py
+++ b/wqflask/wqflask/collect.py
@@ -40,13 +40,6 @@ import logging
 from utility.logger import getLogger
 logger = getLogger(__name__)
 
-def get_collection():
-    if g.user_session.logged_in:
-        return UserCollection()
-    else:
-        return AnonCollection()
-    #else:
-    #    CauseError
 
 class AnonCollection(object):
     """User is not logged in"""
@@ -282,7 +275,6 @@ def create_new(collection_name):
         db_session.commit()
         return redirect(url_for('view_collection', uc_id=uc.id))
     else:
-        current_collections = user_manager.AnonUser().get_collections()
         ac = AnonCollection(collection_name)
         ac.changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p')
         ac.add_traits(params)
diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py
index a08cd759..94711c67 100644
--- a/wqflask/wqflask/correlation/corr_scatter_plot.py
+++ b/wqflask/wqflask/correlation/corr_scatter_plot.py
@@ -19,37 +19,31 @@ class CorrScatterPlot(object):
             width = int(params['width'])
         except:
             width = 800
-        self.width = width
 
         try:
             height = int(params['height'])
         except:
             height = 600
-        self.height = height
 
         try:
             circle_color = params['circle_color']
         except:
             circle_color = '#3D85C6'
-        self.circle_color = circle_color
 
         try:
             circle_radius = int(params['circle_radius'])
         except:
             circle_radius = 5
-        self.circle_radius = circle_radius
 
         try:
             line_color = params['line_color']
         except:
             line_color = '#FF0000'
-        self.line_color = line_color
 
         try:
             line_width = int(params['line_width'])
         except:
             line_width = 1
-        self.line_width = line_width
 
         samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)
 
@@ -66,14 +60,14 @@ class CorrScatterPlot(object):
 
         x = np.array(vals_1)
         y = np.array(vals_2)
-        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
+        slope, intercept, r_value, p_value, _std_err = stats.linregress(x, y)
         
         rx = stats.rankdata(x)
         ry = stats.rankdata(y)        
         self.rdata = []
         self.rdata.append(rx.tolist())
         self.rdata.append(ry.tolist())        
-        srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry)
+        srslope, srintercept, srr_value, srp_value, _srstd_err = stats.linregress(rx, ry)
 
         self.js_data = dict(
             data = self.data,
@@ -86,17 +80,17 @@ class CorrScatterPlot(object):
             num_overlap = num_overlap,
             vals_1 = vals_1,
             vals_2 = vals_2,
-            
+
             slope = slope,
             intercept = intercept,
             r_value = r_value,
             p_value = p_value,
-            
+
             srslope = srslope,
             srintercept = srintercept,
             srr_value = srr_value,
             srp_value = srp_value,
-            
+
             width = width,
             height = height,
             circle_color = circle_color,
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index 1ee9b558..06dec795 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -28,468 +28,12 @@ from __future__ import absolute_import, print_function, division
 
 import math
 import rpy2.robjects
-import pp
 import string
 
-from utility import webqtlUtil
 from base.mrna_assay_tissue_data import MrnaAssayTissueData
-from base.trait import GeneralTrait
-from db import webqtlDatabaseFunction
 
 from flask import Flask, g
 
-#XZ: The input 'controls' is String. It contains the full name of control traits.
-#XZ: The input variable 'strainlst' is List. It contains the strain names of primary trait.
-#XZ: The returned tcstrains is the list of list [[],[]...]. So are tcvals and tcvars. The last returned parameter is list of numbers.
-#XZ, 03/29/2010: For each returned control trait, there is no None value in it.
-def controlStrains(controls, strainlst):
-
-    controls = controls.split(',')
-
-    cvals = {}
-    for oneTraitName in controls:
-        oneTrait = webqtlTrait(fullname=oneTraitName, cursor=webqtlDatabaseFunction.getCursor() )
-        oneTrait.retrieveData()
-        cvals[oneTraitName] = oneTrait.data
-
-    tcstrains = []
-    tcvals = []
-    tcvars = []
-
-    for oneTraitName in controls:
-        strains = []
-        vals = []
-        vars = []
-
-        for _strain in strainlst:
-            if cvals[oneTraitName].has_key(_strain):
-                _val = cvals[oneTraitName][_strain].val
-                if _val != None:
-                    strains.append(_strain)
-                    vals.append(_val)
-                    vars.append(None)
-
-        tcstrains.append(strains)
-        tcvals.append(vals)
-        tcvars.append(vars)
-
-    return tcstrains, tcvals, tcvars, [len(x) for x in tcstrains]
-
-
-
-#XZ, 03/29/2010: After execution of functon "controlStrains" and "fixStrains", primary trait and control traits have the same strains and in the same order. There is no 'None' value in them.
-def fixStrains(_strains,_controlstrains,_vals,_controlvals,_vars,_controlvars):
-    """Corrects strains, vals, and vars so that all contrain only those strains common
-    to the reference trait and all control traits."""
-
-    def dictify(strains,vals,vars):
-        subdict = {}
-        for i in xrange(len(strains)):
-            subdict[strains[i]] = (vals[i],vars[i])
-        return subdict
-
-    #XZ: The 'dicts' is a list of dictionary. The first element is the dictionary of reference trait. The rest elements are for control traits.
-    dicts = []
-    dicts.append(dictify(_strains,_vals,_vars))
-
-    nCstrains = len(_controlstrains)
-    for i in xrange(nCstrains):
-        dicts.append(dictify(_controlstrains[i],_controlvals[i],_controlvars[i]))
-
-    _newstrains = []
-    _vals = []
-    _vars = []
-    _controlvals = [[] for x in xrange(nCstrains)]
-    _controlvars = [[] for x in xrange(nCstrains)]
-
-    for strain in _strains:
-        inall = True
-        for d in dicts:
-            if strain not in d:
-                inall = False
-                break
-        if inall:
-            _newstrains.append(strain)
-            _vals.append(dicts[0][strain][0])
-            _vars.append(dicts[0][strain][1])
-            for i in xrange(nCstrains):
-                _controlvals[i].append(dicts[i+1][strain][0])
-                _controlvars[i].append(dicts[i+1][strain][1])
-
-    return _newstrains,  _vals, _controlvals, _vars, _controlvars
-
-
-#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty.
-#else, the returned list has two elements of control trait name.
-def findIdenticalControlTraits ( controlVals, controlNames ):
-    nameOfIdenticalTraits = []
-
-    controlTraitNumber = len(controlVals)
-
-    if controlTraitNumber > 1:
-
-        #XZ: reset the precision of values and convert to string type
-        for oneTraitVal in controlVals:
-            for oneStrainVal in oneTraitVal:
-                oneStrainVal = '%.3f' % oneStrainVal
-
-        for i, oneTraitVal in enumerate( controlVals ):
-            for j in range(i+1, controlTraitNumber):
-                if oneTraitVal == controlVals[j]:
-                    nameOfIdenticalTraits.append(controlNames[i])
-                    nameOfIdenticalTraits.append(controlNames[j])
-
-    return nameOfIdenticalTraits
-
-#XZ, 6/15/2010: If there is no identical control traits, the returned list is empty.
-#else, the returned list has two elements of control trait name.
-#primaryVal is of list type. It contains value of primary trait.
-#primaryName is of string type.
-#controlVals is of list type. Each element is list too. Each element contain value of one control trait.
-#controlNames is of list type.
-def findIdenticalTraits (primaryVal, primaryName, controlVals, controlNames ):
-    nameOfIdenticalTraits = []
-
-    #XZ: reset the precision of values and convert to string type
-    for oneStrainVal in primaryVal:
-        oneStrainVal = '%.3f' % oneStrainVal
-
-    for oneTraitVal in controlVals:
-        for oneStrainVal in oneTraitVal:
-            oneStrainVal = '%.3f' % oneStrainVal
-
-    controlTraitNumber = len(controlVals)
-
-    if controlTraitNumber > 1:
-        for i, oneTraitVal in enumerate( controlVals ):
-            for j in range(i+1, controlTraitNumber):
-                if oneTraitVal == controlVals[j]:
-                    nameOfIdenticalTraits.append(controlNames[i])
-                    nameOfIdenticalTraits.append(controlNames[j])
-                    break
-
-    if len(nameOfIdenticalTraits) == 0:
-        for i, oneTraitVal in enumerate( controlVals ):
-            if primaryVal == oneTraitVal:
-                nameOfIdenticalTraits.append(primaryName)
-                nameOfIdenticalTraits.append(controlNames[i])
-                break
-
-    return nameOfIdenticalTraits
-
-
-
-#XZ, 03/29/2010: The strains in primaryVal, controlVals, targetVals must be of the same number and in same order.
-#XZ: No value in primaryVal and controlVals could be None.
-
-def determinePartialsByR (primaryVal, controlVals, targetVals, targetNames, method='p'):
-
-    def compute_partial ( primaryVal, controlVals, targetVals, targetNames, method ):
-
-        rpy2.robjects.r("""
-pcor.test <- function(x,y,z,use="mat",method="p",na.rm=T){
-        # The partial correlation coefficient between x and y given z
-        #
-        # pcor.test is free and comes with ABSOLUTELY NO WARRANTY.
-        #
-        # x and y should be vectors
-        #
-        # z can be either a vector or a matrix
-        #
-        # use: There are two methods to calculate the partial correlation coefficient.
-        #        One is by using variance-covariance matrix ("mat") and the other is by using recursive formula ("rec").
-        #        Default is "mat".
-        #
-        # method: There are three ways to calculate the correlation coefficient,
-        #           which are Pearson's ("p"), Spearman's ("s"), and Kendall's ("k") methods.
-        #           The last two methods which are Spearman's and Kendall's coefficient are based on the non-parametric analysis.
-        #           Default is "p".
-        #
-        # na.rm: If na.rm is T, then all the missing samples are deleted from the whole dataset, which is (x,y,z).
-        #        If not, the missing samples will be removed just when the correlation coefficient is calculated.
-        #          However, the number of samples for the p-value is the number of samples after removing
-        #          all the missing samples from the whole dataset.
-        #          Default is "T".
-
-        x <- c(x)
-        y <- c(y)
-        z <- as.data.frame(z)
-
-        if(use == "mat"){
-                p.use <- "Var-Cov matrix"
-                pcor = pcor.mat(x,y,z,method=method,na.rm=na.rm)
-        }else if(use == "rec"){
-                p.use <- "Recursive formula"
-                pcor = pcor.rec(x,y,z,method=method,na.rm=na.rm)
-        }else{
-                stop("use should be either rec or mat!\n")
-        }
-
-        # print the method
-        if(gregexpr("p",method)[[1]][1] == 1){
-                p.method <- "Pearson"
-        }else if(gregexpr("s",method)[[1]][1] == 1){
-                p.method <- "Spearman"
-        }else if(gregexpr("k",method)[[1]][1] == 1){
-                p.method <- "Kendall"
-        }else{
-                stop("method should be pearson or spearman or kendall!\n")
-        }
-
-        # sample number
-        n <- dim(na.omit(data.frame(x,y,z)))[1]
-
-        # given variables' number
-        gn <- dim(z)[2]
-
-        # p-value
-        if(p.method == "Kendall"){
-                statistic <- pcor/sqrt(2*(2*(n-gn)+5)/(9*(n-gn)*(n-1-gn)))
-                p.value <- 2*pnorm(-abs(statistic))
-
-        }else{
-                statistic <- pcor*sqrt((n-2-gn)/(1-pcor^2))
-                p.value <- 2*pnorm(-abs(statistic))
-        }
-
-        data.frame(estimate=pcor,p.value=p.value,statistic=statistic,n=n,gn=gn,Method=p.method,Use=p.use)
-}
-
-# By using var-cov matrix
-pcor.mat <- function(x,y,z,method="p",na.rm=T){
-
-        x <- c(x)
-        y <- c(y)
-        z <- as.data.frame(z)
-
-        if(dim(z)[2] == 0){
-                stop("There should be given data\n")
-        }
-
-        data <- data.frame(x,y,z)
-
-        if(na.rm == T){
-                data = na.omit(data)
-        }
-
-        xdata <- na.omit(data.frame(data[,c(1,2)]))
-        Sxx <- cov(xdata,xdata,m=method)
-
-        xzdata <- na.omit(data)
-        xdata <- data.frame(xzdata[,c(1,2)])
-        zdata <- data.frame(xzdata[,-c(1,2)])
-        Sxz <- cov(xdata,zdata,m=method)
-
-        zdata <- na.omit(data.frame(data[,-c(1,2)]))
-        Szz <- cov(zdata,zdata,m=method)
-
-        # is Szz positive definite?
-        zz.ev <- eigen(Szz)$values
-        if(min(zz.ev)[1]<0){
-                stop("\'Szz\' is not positive definite!\n")
-        }
-
-        # partial correlation
-        Sxx.z <- Sxx - Sxz %*% solve(Szz) %*% t(Sxz)
-
-        rxx.z <- cov2cor(Sxx.z)[1,2]
-
-        rxx.z
-}
-
-# By using recursive formula
-pcor.rec <- function(x,y,z,method="p",na.rm=T){
-        #
-
-        x <- c(x)
-        y <- c(y)
-        z <- as.data.frame(z)
-
-        if(dim(z)[2] == 0){
-                stop("There should be given data\n")
-        }
-
-        data <- data.frame(x,y,z)
-
-        if(na.rm == T){
-                data = na.omit(data)
-        }
-
-        # recursive formula
-        if(dim(z)[2] == 1){
-                tdata <- na.omit(data.frame(data[,1],data[,2]))
-                rxy <- cor(tdata[,1],tdata[,2],m=method)
-
-                tdata <- na.omit(data.frame(data[,1],data[,-c(1,2)]))
-                rxz <- cor(tdata[,1],tdata[,2],m=method)
-
-                tdata <- na.omit(data.frame(data[,2],data[,-c(1,2)]))
-                ryz <- cor(tdata[,1],tdata[,2],m=method)
-
-                rxy.z <- (rxy - rxz*ryz)/( sqrt(1-rxz^2)*sqrt(1-ryz^2) )
-
-                return(rxy.z)
-        }else{
-                x <- c(data[,1])
-                y <- c(data[,2])
-                z0 <- c(data[,3])
-                zc <- as.data.frame(data[,-c(1,2,3)])
-
-                rxy.zc <- pcor.rec(x,y,zc,method=method,na.rm=na.rm)
-                rxz0.zc <- pcor.rec(x,z0,zc,method=method,na.rm=na.rm)
-                ryz0.zc <- pcor.rec(y,z0,zc,method=method,na.rm=na.rm)
-
-                rxy.z <- (rxy.zc - rxz0.zc*ryz0.zc)/( sqrt(1-rxz0.zc^2)*sqrt(1-ryz0.zc^2) )
-                return(rxy.z)
-        }
-}
-""")
-
-        R_pcorr_function = rpy2.robjects.r['pcor.test']
-        R_corr_test = rpy2.robjects.r['cor.test']
-
-        primary = rpy2.robjects.FloatVector(range(len(primaryVal)))
-        for i in range(len(primaryVal)):
-            primary[i] = primaryVal[i]
-
-        control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(controlVals[0])) ), ncol=len(controlVals))
-        for i in range(len(controlVals)):
-            for j in range(len(controlVals[0])):
-                control[i*len(controlVals[0]) + j] = controlVals[i][j]
-
-        allcorrelations = []
-
-        for targetIndex, oneTargetVals in enumerate(targetVals):
-
-            this_primary = None
-            this_control = None
-            this_target = None
-
-            if None in oneTargetVals:
-
-                goodIndex = []
-                for i in range(len(oneTargetVals)):
-                    if oneTargetVals[i] != None:
-                        goodIndex.append(i)
-
-                this_primary = rpy2.robjects.FloatVector(range(len(goodIndex)))
-                for i in range(len(goodIndex)):
-                    this_primary[i] = primaryVal[goodIndex[i]]
-
-                this_control = rpy2.robjects.r.matrix(rpy2.robjects.FloatVector( range(len(controlVals)*len(goodIndex)) ), ncol=len(controlVals))
-                for i in range(len(controlVals)):
-                    for j in range(len(goodIndex)):
-                        this_control[i*len(goodIndex) + j] = controlVals[i][goodIndex[j]]
-
-                this_target = rpy2.robjects.FloatVector(range(len(goodIndex)))
-                for i in range(len(goodIndex)):
-                    this_target[i] = oneTargetVals[goodIndex[i]]
-
-            else:
-                this_primary = primary
-                this_control = control
-                this_target = rpy2.robjects.FloatVector(range(len(oneTargetVals)))
-                for i in range(len(oneTargetVals)):
-                    this_target[i] = oneTargetVals[i]
-
-            one_name = targetNames[targetIndex]
-            one_N = len(this_primary)
-
-            #calculate partial correlation
-            one_pc_coefficient = 'NA'
-            one_pc_p = 1
-
-            try:
-                if method == 's':
-                    result = R_pcorr_function(this_primary, this_target, this_control, method='s')
-                else:
-                    result = R_pcorr_function(this_primary, this_target, this_control)
-
-                #XZ: In very few cases, the returned coefficient is nan.
-                #XZ: One way to detect nan is to compare the number to itself. NaN is always != NaN
-                if result[0][0] == result[0][0]:
-                    one_pc_coefficient = result[0][0]
-                    #XZ: when the coefficient value is 1 (primary trait and target trait are the same),
-                    #XZ: occationally, the returned p value is nan instead of 0.
-                    if result[1][0] == result[1][0]:
-                        one_pc_p = result[1][0]
-                    elif abs(one_pc_coefficient - 1) < 0.0000001:
-                        one_pc_p = 0
-            except:
-                pass
-
-            #calculate zero order correlation
-            one_corr_coefficient = 0
-            one_corr_p = 1
-
-            try:
-                if method == 's':
-                    R_result = R_corr_test(this_primary, this_target, method='spearman')
-                else:
-                    R_result = R_corr_test(this_primary, this_target)
-
-                one_corr_coefficient = R_result[3][0]
-                one_corr_p = R_result[2][0]
-            except:
-                pass
-
-            traitinfo = [ one_name, one_N, one_pc_coefficient, one_pc_p, one_corr_coefficient, one_corr_p ]
-
-            allcorrelations.append(traitinfo)
-
-        return allcorrelations
-    #End of function compute_partial
-
-
-    allcorrelations = []
-
-    target_trait_number = len(targetVals)
-
-    if target_trait_number < 1000:
-        allcorrelations = compute_partial ( primaryVal, controlVals, targetVals, targetNames, method )
-    else:
-        step = 1000
-        job_number = math.ceil( float(target_trait_number)/step )
-
-        job_targetVals_lists = []
-        job_targetNames_lists = []
-
-        for job_index in range( int(job_number) ):
-            starti = job_index*step
-            endi = min((job_index+1)*step, target_trait_number)
-
-            one_job_targetVals_list = []
-            one_job_targetNames_list = []
-
-            for i in range( starti, endi ):
-                one_job_targetVals_list.append( targetVals[i] )
-                one_job_targetNames_list.append( targetNames[i] )
-
-            job_targetVals_lists.append( one_job_targetVals_list )
-            job_targetNames_lists.append( one_job_targetNames_list )
-
-        ppservers = ()
-        # Creates jobserver with automatically detected number of workers
-        job_server = pp.Server(ppservers=ppservers)
-
-        jobs = []
-        results = []
-
-        for i, one_job_targetVals_list in enumerate( job_targetVals_lists ):
-            one_job_targetNames_list = job_targetNames_lists[i]
-            #pay attention to modules from outside
-            jobs.append( job_server.submit(func=compute_partial, args=( primaryVal, controlVals, one_job_targetVals_list, one_job_targetNames_list, method), depfuncs=(), modules=("rpy2.robjects",)) )
-
-        for one_job in jobs:
-            one_result = one_job()
-            results.append( one_result )
-
-        for one_result in results:
-            for one_traitinfo in one_result:
-                allcorrelations.append( one_traitinfo )
-
-    return allcorrelations
-
 
 #####################################################################################
 #Input: primaryValue(list): one list of expression values of one probeSet,
@@ -529,170 +73,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears
     return corr_result
 
 
-
-
-def batchCalTissueCorr(primaryTraitValue=[], SymbolValueDict={}, method='pearson'):
-
-    def cal_tissue_corr(primaryTraitValue, oneSymbolValueDict, method ):
-
-        oneSymbolCorrDict = {}
-        oneSymbolPvalueDict = {}
-
-        R_corr_test = rpy2.robjects.r['cor.test']
-
-        R_primary = rpy2.robjects.FloatVector(range(len(primaryTraitValue)))
-
-        for i in range(len(primaryTraitValue)):
-            R_primary[i] = primaryTraitValue[i]
-
-        for (oneTraitSymbol, oneTraitValue) in oneSymbolValueDict.iteritems():
-            R_target = rpy2.robjects.FloatVector(range(len(oneTraitValue)))
-            for i in range(len(oneTraitValue)):
-                R_target[i] = oneTraitValue[i]
-
-            if method =='spearman':
-                R_result = R_corr_test(R_primary, R_target, method='spearman')
-            else:
-                R_result = R_corr_test(R_primary, R_target)
-
-            oneSymbolCorrDict[oneTraitSymbol] = R_result[3][0]
-            oneSymbolPvalueDict[oneTraitSymbol] = R_result[2][0]
-
-        return(oneSymbolCorrDict, oneSymbolPvalueDict)
-
-
-
-    symbolCorrDict = {}
-    symbolPvalueDict = {}
-
-    items_number = len(SymbolValueDict)
-
-    if items_number <= 1000:
-        symbolCorrDict, symbolPvalueDict = cal_tissue_corr(primaryTraitValue, SymbolValueDict, method)
-    else:
-        items_list = SymbolValueDict.items()
-
-        step = 1000
-        job_number = math.ceil( float(items_number)/step )
-
-        job_oneSymbolValueDict_list = []
-
-        for job_index in range( int(job_number) ):
-            starti = job_index*step
-            endi = min((job_index+1)*step, items_number)
-
-            oneSymbolValueDict = {}
-
-            for i in range( starti, endi ):
-                one_item = items_list[i]
-                one_symbol = one_item[0]
-                one_value = one_item[1]
-                oneSymbolValueDict[one_symbol] = one_value
-
-            job_oneSymbolValueDict_list.append( oneSymbolValueDict )
-
-
-        ppservers = ()
-        # Creates jobserver with automatically detected number of workers
-        job_server = pp.Server(ppservers=ppservers)
-
-        jobs = []
-        results = []
-
-        for i, oneSymbolValueDict in enumerate( job_oneSymbolValueDict_list ):
-
-            #pay attention to modules from outside
-            jobs.append( job_server.submit(func=cal_tissue_corr, args=(primaryTraitValue, oneSymbolValueDict, method), depfuncs=(), modules=("rpy2.robjects",)) )
-
-        for one_job in jobs:
-            one_result = one_job()
-            results.append( one_result )
-
-        for one_result in results:
-            oneSymbolCorrDict, oneSymbolPvalueDict = one_result
-            symbolCorrDict.update( oneSymbolCorrDict )
-            symbolPvalueDict.update( oneSymbolPvalueDict )
-
-    return (symbolCorrDict, symbolPvalueDict)
-
-###########################################################################
-#Input: cursor, GeneNameLst (list), TissueProbeSetFreezeId
-#output: geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict (Dict)
-#function: get multi dicts for short and long label functions, and for getSymbolValuePairDict and
-# getGeneSymbolTissueValueDict to build dict to get CorrPvArray
-#Note: If there are multiple probesets for one gene, select the one with highest mean.
-###########################################################################
-def getTissueProbeSetXRefInfo(GeneNameLst=[],TissueProbeSetFreezeId=0):
-    Symbols =""
-    symbolList =[]
-    geneIdDict ={}
-    dataIdDict = {}
-    ChrDict = {}
-    MbDict = {}
-    descDict = {}
-    pTargetDescDict = {}
-
-    count = len(GeneNameLst)
-
-    # Added by NL 01/06/2011
-    # Note that:inner join is necessary in this query to get distinct record in one symbol group with highest mean value
-    # Duo to the limit size of TissueProbeSetFreezeId table in DB, performance of inner join is acceptable.
-    if count==0:
-        query='''
-                        select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description
-                        from (
-                                select Symbol, max(Mean) as maxmean
-                                from TissueProbeSetXRef
-                                where TissueProbeSetFreezeId=%s and Symbol!='' and Symbol Is Not Null group by Symbol)
-                        as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean;
-                '''%TissueProbeSetFreezeId
-
-    else:
-        for i, item in enumerate(GeneNameLst):
-
-            if i == count-1:
-                Symbols += "'%s'" %item
-            else:
-                Symbols += "'%s'," %item
-
-        Symbols = "("+ Symbols+")"
-        query='''
-                        select t.Symbol,t.GeneId, t.DataId,t.Chr, t.Mb,t.description,t.Probe_Target_Description
-                        from (
-                                select Symbol, max(Mean) as maxmean
-                                from TissueProbeSetXRef
-                                where TissueProbeSetFreezeId=%s and Symbol in %s group by Symbol)
-                        as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean;
-                '''% (TissueProbeSetFreezeId,Symbols)
-
-    try:
-        cursor.execute(query)
-        results =cursor.fetchall()
-        resultCount = len(results)
-        # Key in all dicts is the lower-cased symbol
-        for i, item in enumerate(results):
-            symbol = item[0]
-            symbolList.append(symbol)
-
-            key =symbol.lower()
-            geneIdDict[key]=item[1]
-            dataIdDict[key]=item[2]
-            ChrDict[key]=item[3]
-            MbDict[key]=item[4]
-            descDict[key]=item[5]
-            pTargetDescDict[key]=item[6]
-
-    except:
-        symbolList = None
-        geneIdDict=None
-        dataIdDict=None
-        ChrDict=None
-        MbDict=None
-        descDict=None
-        pTargetDescDict=None
-
-    return symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict
-
 ###########################################################################
 #Input: cursor, symbolList (list), dataIdDict(Dict)
 #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
@@ -701,7 +81,6 @@ def getTissueProbeSetXRefInfo(GeneNameLst=[],TissueProbeSetFreezeId=0):
 #Attention! All keys are lower case!
 ###########################################################################
 def get_symbol_value_pairs(tissue_data):
-
     id_list = [tissue_data[symbol.lower()].data_id for item in tissue_data]
 
     symbol_value_pairs = {}
@@ -719,23 +98,6 @@ def get_symbol_value_pairs(tissue_data):
     except:
         symbol_value_pairs[symbol] = None
 
-    #for symbol in symbol_list:
-    #    if tissue_data.has_key(symbol):
-    #        data_id = tissue_data[symbol].data_id
-    #
-    #        query = """select value, id
-    #                   from TissueProbeSetData
-    #                   where Id={}""".format(escape(data_id))
-    #        try :
-    #            results = g.db.execute(query).fetchall()
-    #            for item in results:
-    #                item = item[0]
-    #                value_list.append(item)
-    #            symbol_value_pairs[symbol] = value_list
-    #            value_list=[]
-    #        except:
-    #            symbol_value_pairs[symbol] = None
-
     return symbol_value_pairs
 
 
@@ -752,150 +114,4 @@ def get_trait_symbol_and_tissue_values(symbol_list=None):
     tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
 
     if len(tissue_data.gene_symbols):
-        return tissue_data.get_symbol_values_pairs()
-
-    #symbolList,
-    #geneIdDict,
-    #dataIdDict,
-    #ChrDict,
-    #MbDict,
-    #descDict,
-    #pTargetDescDict = getTissueProbeSetXRefInfo(
-    #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-
-    #limit_num=1000
-    #count = len(symbol_list)
-    #
-    #symbol_value_pairs = {}
-    #
-    #if count !=0 and count <= limit_num:
-    #    symbol_value_pairs = getSymbolValuePairDict(cursor=cursor,symbolList=symbol_list,dataIdDict=dataIdDict)
-    #
-    #elif count > limit_num:
-    #    n = count/limit_num
-    #    start = 0
-    #    stop = 0
-    #
-    #    for i in range(n):
-    #        stop =limit_num*(i+1)
-    #        gList1 = symbolList[start:stop]
-    #        PairDict1 = getSymbolValuePairDict(cursor=cursor,symbolList=gList1,dataIdDict=dataIdDict)
-    #        start =limit_num*(i+1)
-    #
-    #        SymbolValuePairDict.update(PairDict1)
-    #
-    #    if stop < count:
-    #        stop = count
-    #        gList2 = symbolList[start:stop]
-    #        PairDict2 = getSymbolValuePairDict(cursor=cursor,symbolList=gList2,dataIdDict=dataIdDict)
-    #        SymbolValuePairDict.update(PairDict2)
-    #
-    #return SymbolValuePairDict
-
-########################################################################################################
-#input: cursor, GeneNameLst (list), TissueProbeSetFreezeId(int)
-#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
-#        key is symbol, value is one list of expression values of one probeSet.
-#function: wrapper function of getGeneSymbolTissueValueDict function
-#          for CorrelationPage.py
-########################################################################################################
-
-#def get_trait_symbol_and_tissue_values(cursor=None,GeneNameLst=[],TissueProbeSetFreezeId=0):
-#    SymbolValuePairDict={}
-#
-#    symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = getTissueProbeSetXRefInfo(
-#        cursor=cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-#
-#    if symbolList:
-#        SymbolValuePairDict = get_gene_symbol_and_tissue_values(symbolList=symbolList,
-#                                                                dataIdDict=dataIdDict)
-#
-#    return SymbolValuePairDict
-
-########################################################################################################
-#Input: cursor(cursor): MySQL connnection cursor;
-#       priGeneSymbolList(list): one list of gene symbol;
-#       symbolValuepairDict(dictionary): one dictionary of Symbol and Value Pair,
-#               key is symbol, value is one list of expression values of one probeSet;
-#Output: corrArray(array): array of Correlation Value,
-#        pvArray(array): array of PValue;
-#Function: build corrArray, pvArray for display by calling  calculation function:calZeroOrderCorrForTiss
-########################################################################################################
-
-def getCorrPvArray(cursor=None,priGeneSymbolList=[],symbolValuepairDict={}):
-    # setting initial value for corrArray, pvArray equal to 0
-    Num = len(priGeneSymbolList)
-
-    corrArray = [([0] * (Num))[:] for i in range(Num)]
-    pvArray = [([0] * (Num))[:] for i in range(Num)]
-    i = 0
-    for pkey in priGeneSymbolList:
-        j = 0
-        pkey = pkey.strip().lower()# key in symbolValuepairDict is low case
-        if symbolValuepairDict.has_key(pkey):
-            priValue = symbolValuepairDict[pkey]
-            for tkey in priGeneSymbolList:
-                tkey = tkey.strip().lower()# key in symbolValuepairDict is low case
-                if priValue and symbolValuepairDict.has_key(tkey):
-                    tarValue = symbolValuepairDict[tkey]
-
-                    if tarValue:
-                        if i>j:
-                            # corrArray stores Pearson Correlation values
-                            # pvArray stores Pearson P-Values
-                            pcorr_result =calZeroOrderCorrForTiss(primaryValue=priValue,targetValue=tarValue)
-                            corrArray[i][j] =pcorr_result[0]
-                            pvArray[i][j] =pcorr_result[2]
-                        elif i<j:
-                            # corrArray stores Spearman Correlation values
-                            # pvArray stores Spearman P-Values
-                            scorr_result =calZeroOrderCorrForTiss(primaryValue=priValue,targetValue=tarValue,method='spearman')
-                            corrArray[i][j] =scorr_result[0]
-                            pvArray[i][j] =scorr_result[2]
-                        else:
-                            # on the diagonal line, correlation value is 1, P-Values is 0
-                            corrArray[i][j] =1
-                            pvArray[i][j] =0
-                        j+=1
-                    else:
-                        corrArray[i][j] = None
-                        pvArray[i][j] = None
-                        j+=1
-                else:
-                    corrArray[i][j] = None
-                    pvArray[i][j] = None
-                    j+=1
-        else:
-            corrArray[i][j] = None
-            pvArray[i][j] = None
-
-        i+=1
-
-    return corrArray, pvArray
-
-########################################################################################################
-#Input: cursor(cursor): MySQL connnection cursor;
-#       primaryTraitSymbol(string): one gene symbol;
-#               TissueProbeSetFreezeId (int): Id of related TissueProbeSetFreeze
-#       method: '0' default value, Pearson Correlation; '1', Spearman Correlation
-#Output: symbolCorrDict(Dict): Dict of Correlation Value, key is symbol
-#        symbolPvalueDict(Dict): Dict of PValue,key is symbol ;
-#Function: build symbolCorrDict, symbolPvalueDict for display by calling  calculation function:calZeroOrderCorrForTiss
-########################################################################################################
-def calculateCorrOfAllTissueTrait(cursor=None, primaryTraitSymbol=None, TissueProbeSetFreezeId=None,method='0'):
-
-    symbolCorrDict = {}
-    symbolPvalueDict = {}
-
-    primaryTraitSymbolValueDict = getGeneSymbolTissueValueDictForTrait(cursor=cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-    primaryTraitValue = primaryTraitSymbolValueDict.values()[0]
-
-    SymbolValueDict = getGeneSymbolTissueValueDictForTrait(cursor=cursor, GeneNameLst=[], TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-
-    if method =='1':
-        symbolCorrDict, symbolPvalueDict = batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman')
-    else:
-        symbolCorrDict, symbolPvalueDict = batchCalTissueCorr(primaryTraitValue,SymbolValueDict)
-
-
-    return (symbolCorrDict, symbolPvalueDict)
+        return tissue_data.get_symbol_values_pairs()
\ No newline at end of file
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 85a8c0ef..9f3f7982 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -21,9 +21,7 @@
 from __future__ import absolute_import, print_function, division
 
 import sys
-# sys.path.append(".") Never in a running webserver
 
-import gc
 import string
 import cPickle
 import os
@@ -64,8 +62,6 @@ from flask import Flask, g
 import utility.logger
 logger = utility.logger.getLogger(__name__ )
 
-METHOD_SAMPLE_PEARSON = "1"
-METHOD_SAMPLE_RANK = "2"
 METHOD_LIT = "3"
 METHOD_TISSUE_PEARSON = "4"
 METHOD_TISSUE_RANK = "5"
@@ -74,26 +70,7 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
 
 TISSUE_MOUSE_DB = 1
 
-def print_mem(stage=""):
-    mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-    #print("{}: {}".format(stage, mem/1024))
-
-class AuthException(Exception):
-    pass
-
 class CorrelationResults(object):
-
-    corr_min_informative = 4
-
-    #PAGE_HEADING = "Correlation Table"
-    #CORRELATION_METHODS = {"1" : "Genetic Correlation (Pearson's r)",
-    #                       "2" : "Genetic Correlation (Spearman's rho)",
-    #                       "3" : "SGO Literature Correlation",
-    #                       "4" : "Tissue Correlation (Pearson's r)",
-    #                       "5" : "Tissue Correlation (Spearman's rho)"}
-    #
-    #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1}
-
     def __init__(self, start_vars):
         # get trait list from db (database name)
         # calculate correlation with Base vector and targets
@@ -167,16 +144,12 @@ class CorrelationResults(object):
                 self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
 
             self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
-            # print("self.sample_data.keys: %s" % self.sample_data.keys)
             self.target_dataset.get_trait_data(self.sample_data.keys())
 
             self.correlation_results = []
 
             self.correlation_data = {}
 
-            db_filename = self.getFileName(target_db_name = self.target_dataset.name)
-            cache_available = db_filename in os.listdir(webqtlConfig.GENERATED_TEXT_DIR)
-
             if self.corr_type == "tissue":
                 self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")
 
@@ -196,24 +169,6 @@ class CorrelationResults(object):
                     self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
 
             elif self.corr_type == "sample":
-              #ZS: Commented out since parallel correlation has issues with gunicorn
-                # if self.dataset.type == "ProbeSet" and cache_available:
-                    # dataset_file = open(webqtlConfig.GENERATED_TEXT_DIR+db_filename,'r')
-
-                    ##XZ, 01/08/2009: read the first line
-                    # line = dataset_file.readline()
-                    # dataset_strains = webqtlUtil.readLineCSV(line)[1:]
-
-                    # self.this_trait_vals = []
-                    # for item in dataset_strains:
-                        # if item in self.sample_data:
-                            # self.this_trait_vals.append(self.sample_data[item])
-                        # else:
-                            # self.this_trait_vals.append("None")
-                    # num_overlap = len(self.this_trait_vals)
-                    # logger.debug("DOING PARALLEL")
-                    # self.do_parallel_correlation(db_filename, num_overlap)
-                # else:
                 for trait, values in self.target_dataset.trait_data.iteritems():
                     self.get_sample_r_and_p_values(trait, values)
 
@@ -256,9 +211,6 @@ class CorrelationResults(object):
                         trait_object.sample_p,
                         trait_object.num_overlap) = self.correlation_data[trait]
 
-                        #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef,
-                        #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions)
-
                         # Set some sane defaults
                         trait_object.tissue_corr = 0
                         trait_object.tissue_pvalue = 0
@@ -274,9 +226,6 @@ class CorrelationResults(object):
                         trait_object.sample_p,
                         trait_object.num_overlap) = self.correlation_data[trait]
 
-                        #Get symbol for trait and call function that gets each tissue value from the database (tables TissueProbeSetXRef,
-                        #TissueProbeSetData, etc) and calculates the correlation (cal_zero_order_corr_for_tissue in correlation_functions)
-
                         # Set some sane defaults
                         trait_object.tissue_corr = 0
                         trait_object.tissue_pvalue = 0
@@ -323,13 +272,6 @@ class CorrelationResults(object):
 
         if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
             primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
-
-            #gene_symbol_list = []
-            #
-            #for trait in self.correlation_results:
-            #    if hasattr(trait, 'symbol'):
-            #        gene_symbol_list.append(trait.symbol)
-
             gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
 
             corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
@@ -346,17 +288,6 @@ class CorrelationResults(object):
                     trait.tissue_corr = result[0]
                     trait.tissue_pvalue = result[2]
 
-        #        else:
-        #            trait.tissue_corr = None
-        #            trait.tissue_pvalue = None
-        #else:
-        #    for trait in self.correlation_results:
-        #        trait.tissue_corr = None
-        #        trait.tissue_pvalue = None
-
-        #return self.correlation_results
-
-
     def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1):
         #Gets tissue expression values for the primary trait
         primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
@@ -377,7 +308,6 @@ class CorrelationResults(object):
             for trait, symbol in self.trait_symbol_dict.iteritems():
                 if symbol and symbol.lower() in corr_result_tissue_vals_dict:
                     this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()]
-                    #print("this_trait_tissue_values: ", pf(this_trait_tissue_values))
 
                     result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
                                                                           this_trait_tissue_values,
@@ -390,7 +320,6 @@ class CorrelationResults(object):
 
             return tissue_corr_data
 
-
     def do_lit_correlation_for_trait_list(self):
 
         input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
@@ -529,219 +458,6 @@ class CorrelationResults(object):
         if num_overlap > 5:
             self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
 
-
-        """
-        correlations = []
-
-        #XZ: Use the fast method only for probeset dataset, and this dataset must have been created.
-        #XZ: Otherwise, use original method
-        #print("Entering correlation")
-
-        #db_filename = self.getFileName(target_db_name=self.target_db_name)
-        #
-        #cache_available = db_filename in os.listdir(webqtlConfig.GENERATED_TEXT_DIR)
-
-         # If the cache file exists, do a cached correlation for probeset data
-        if self.dataset.type == "ProbeSet":
-#           if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available:
-#               traits = do_parallel_correlation()
-#
-#           else:
-
-            traits = self.get_traits(self.vals)
-
-            for trait in traits:
-                trait.calculate_correlation(vals, self.method)
-
-        self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later
-
-        traits = sortTraitCorrelations(traits, self.method)
-
-        # Strip to the top N correlations
-        traits = traits[:min(self.returnNumber, len(traits))]
-
-        addLiteratureCorr = False
-        addTissueCorr = False
-
-        trait_list = []
-        for trait in traits:
-            db_trait = webqtlTrait(db=self.db, name=trait.name, cursor=self.cursor)
-            db_trait.retrieveInfo( QTL='Yes' )
-
-            db_trait.Name = trait.name
-            db_trait.corr = trait.correlation
-            db_trait.nOverlap = trait.overlap
-            db_trait.corrPValue = trait.p_value
-
-            # NL, 07/19/2010
-            # js function changed, add a new parameter rankOrder for js function 'showTissueCorrPlot'
-            db_trait.RANK_ORDER = self.RANK_ORDERS[self.method]
-
-            #XZ, 26/09/2008: Method is 4 or 5. Have fetched tissue corr, but no literature correlation yet.
-            if self.method in TISSUE_METHODS:
-                db_trait.tissueCorr = trait.tissue_corr
-                db_trait.tissuePValue = trait.p_tissue
-                addTissueCorr = True
-
-
-            #XZ, 26/09/2008: Method is 3,  Have fetched literature corr, but no tissue corr yet.
-            elif self.method == METHOD_LIT:
-                db_trait.LCorr = trait.lit_corr
-                db_trait.mouse_geneid = self.translateToMouseGeneID(self.species, db_trait.geneid)
-                addLiteratureCorr = True
-
-            #XZ, 26/09/2008: Method is 1 or 2. Have NOT fetched literature corr and tissue corr yet.
-            # Phenotype data will not have geneid, and neither will some probes
-            # we need to handle this because we will get an attribute error
-            else:
-                if self.input_trait_mouse_gene_id and self.db.type=="ProbeSet":
-                    addLiteratureCorr = True
-                if self.trait_symbol and self.db.type=="ProbeSet":
-                    addTissueCorr = True
-
-            trait_list.append(db_trait)
-
-        if addLiteratureCorr:
-            trait_list = self.getLiteratureCorrelationByList(self.input_trait_mouse_gene_id,
-                                                    self.species, trait_list)
-        if addTissueCorr:
-            trait_list = self.getTissueCorrelationByList(
-                        primaryTraitSymbol = self.trait_symbol,
-                        traitList = trait_list,
-                        TissueProbeSetFreezeId = TISSUE_MOUSE_DB,
-                        method=self.method)
-
-        return trait_list
-        """
-
-
-    def do_tissue_corr_for_all_traits_2(self):
-        """Comments Possibly Out of Date!!!!!
-
-        Uses get_temp_tissue_corr_table to generate table of tissue correlations
-
-        This function then gathers that data and pairs it with the TraitID string.
-        Takes as its arguments a formdata instance, and a dataset instance.
-        Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
-        for the requested correlation
-
-        Used when the user selects the tissue correlation method; i.e. not for the
-        column that is appended to all probeset trait correlation tables
-
-        """
-
-        # table name string
-        temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
-                                                    method=method)
-
-        query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
-                FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
-                LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
-                WHERE ProbeSetFreeze.Name = '{}'
-                and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
-                and ProbeSet.Id = ProbeSetXRef.ProbeSetId
-                and ProbeSet.Symbol IS NOT NULL
-                and {}.Correlation IS NOT NULL""".format(dataset.mescape(
-                    temp_table, temp_table, temp_table, temp_table,
-                    self.dataset.name, temp_table))
-
-        results = g.db.execute(query).fetchall()
-
-        tissue_corr_dict = {}
-
-        for entry in results:
-            trait_name, tissue_corr, tissue_pvalue = entry
-            tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
-    #symbolList,
-    #geneIdDict,
-    #dataIdDict,
-    #ChrDict,
-    #MbDict,
-    #descDict,
-    #pTargetDescDict = getTissueProbeSetXRefInfo(
-    #                    GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
-
-        g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table)))
-
-        return tissue_corr_dict
-
-
-    #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
-    #XZ, 09/24/2008: Note that the correlation value can be negative.
-    def get_temp_tissue_corr_table(self,
-                                   tissue_probesetfreeze_id=0,
-                                   method="",
-                                   return_number=0):
-
-
-        def cmp_tisscorr_absolute_value(A, B):
-            try:
-                if abs(A[1]) < abs(B[1]): return 1
-                elif abs(A[1]) == abs(B[1]):
-                    return 0
-                else: return -1
-            except:
-                return 0
-
-        symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues(
-                                                                tissue_dataset_id=TISSUE_MOUSE_DB)
-
-        symbol_corr_list = symbol_corr_dict.items()
-
-        symbol_corr_list.sort(cmp_tisscorr_absolute_value)
-        symbol_corr_list = symbol_corr_list[0 : 2*return_number]
-
-        tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
-
-        q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
-        self.cursor.execute(q1)
-
-        for one_pair in symbol_corr_list:
-            one_symbol = one_pair[0]
-            one_corr = one_pair[1]
-            one_p_value = symbol_pvalue_dict[one_symbol]
-
-            self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
-
-        return tmp_table_name
-
-
-    def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
-
-        symbol_corr_dict = {}
-        symbol_pvalue_dict = {}
-
-        primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
-                                                    GeneNameLst=[self.this_trait.symbol],
-                                                    TissueProbeSetFreezeId=tissue_dataset_id)
-        primary_trait_value = primary_trait_symbol_value_dict.values()[0]
-
-        symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
-                                        gene_name_list=[],
-                                        tissue_dataset_id=tissue_dataset_id)
-
-        symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
-                primaryTraitValue,
-                SymbolValueDict,
-                method=self.corr_method)
-        #else:
-        #    symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
-        #        primaryTraitValue,
-        #        SymbolValueDict)
-
-        return (symbolCorrDict, symbolPvalueDict)
-
-    ##XZ, 12/16/2008: the input geneid is of mouse type
-    #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""):
-    #    q = "SELECT 1 FROM  TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol)
-    #    self.cursor.execute(q)
-    #    try:
-    #        x = self.cursor.fetchone()
-    #        if x: return True
-    #        else: raise
-    #    except: return False
-
-
     def process_samples(self, start_vars, sample_names, excluded_samples=None):
         if not excluded_samples:
             excluded_samples = ()
@@ -754,566 +470,6 @@ class CorrelationResults(object):
                     if not value.strip().lower() == 'x':
                         self.sample_data[str(sample)] = float(value)
 
-    ##XZ, 12/16/2008: the input geneid is of mouse type
-    #def checkForLitInfo(self,geneId):
-    #    q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId
-    #    self.cursor.execute(q)
-    #    try:
-    #        x = self.cursor.fetchone()
-    #        if x: return True
-    #        else: raise
-    #    except: return False
-
-
-
-    def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId):
-
-        StrainIds = []
-        for item in strains:
-            self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species))
-            Id = self.cursor.fetchone()[0]
-            StrainIds.append('%d' % Id)
-
-        # break it into smaller chunks so we don't overload the MySql server
-        nnn = len(StrainIds) / 25
-        if len(StrainIds) % 25:
-            nnn += 1
-        oridata = []
-
-        #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
-        tempTable = None
-        if GeneId and db.type == "ProbeSet":
-            if method == "3":
-                tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber)
-
-            if method == "4" or method == "5":
-                tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber)
-
-        for step in range(nnn):
-            temp = []
-            StrainIdstep = StrainIds[step*25:min(len(StrainIds), (step+1)*25)]
-            for item in StrainIdstep: temp.append('T%s.value' % item)
-
-            if db.type == "Publish":
-                query = "SELECT PublishXRef.Id, "
-                dataStartPos = 1
-                query += string.join(temp,', ')
-                query += ' FROM (PublishXRef, PublishFreeze)'
-                #XZ, 03/04/2009: Xiaodong changed Data to PublishData
-                for item in StrainIdstep:
-                    query += 'left join PublishData as T%s on T%s.Id = PublishXRef.DataId and T%s.StrainId=%s\n' %(item,item,item,item)
-                query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Name = '%s'" % (db.name, )
-            #XZ, 09/20/2008: extract literature correlation value together with gene expression values.
-            #XZ, 09/20/2008: notice the difference between the code in next block.
-            #elif tempTable:
-            #    # we can get a little performance out of selecting our LitCorr here
-            #    # but also we need to do this because we are unconcerned with probes that have no geneId associated with them
-            #    # as we would not have litCorr data.
-            #
-            #    if method == "3":
-            #        query = "SELECT %s.Name, %s.value," %  (db.type,tempTable)
-            #        dataStartPos = 2
-            #    if method == "4" or method == "5":
-            #        query = "SELECT %s.Name, %s.Correlation, %s.PValue," %  (db.type,tempTable, tempTable)
-            #        dataStartPos = 3
-            #
-            #    query += string.join(temp,', ')
-            #    query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
-            #    if method == "3":
-            #        query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable)
-            #    if method == "4" or method == "5":
-            #        query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
-            #    #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
-            #    for item in StrainIdstep:
-            #        query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item)
-            #
-            #    if method == "3":
-            #        query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
-            #    if method == "4" or method == "5":
-            #        query += "WHERE ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable,db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
-            else:
-                query = "SELECT %s.Name," %  db.type
-                dataStartPos = 1
-                query += string.join(temp,', ')
-                query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
-                #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item)
-                for item in StrainIdstep:
-                    query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(db.type, item,item, db.type,item,item)
-                query += "WHERE %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
-
-            self.cursor.execute(query)
-            results = self.cursor.fetchall()
-            oridata.append(results)
-
-        datasize = len(oridata[0])
-        traits = []
-        # put all of the separate data together into a huge list of lists
-        for j in range(datasize):
-            traitdata = list(oridata[0][j])
-            for i in range(1,nnn):
-                traitdata += list(oridata[i][j][dataStartPos:])
-
-            trait = Trait(traitdata[0], traitdata[dataStartPos:])
-
-            if method == METHOD_LIT:
-                trait.lit_corr = traitdata[1]
-
-            if method in TISSUE_METHODS:
-                trait.tissue_corr = traitdata[1]
-                trait.p_tissue = traitdata[2]
-
-            traits.append(trait)
-
-        if tempTable:
-            self.cursor.execute( 'DROP TEMPORARY TABLE %s' % tempTable )
-
-        return traits
-
-
-
-
-    # XZ, 09/20/2008: This function creates TEMPORARY TABLE tmpTableName_2 and return its name.
-    # XZ, 09/20/2008: It stores top literature correlation values associated with the input geneId.
-    # XZ, 09/20/2008: Attention: In each row, the input geneId is always in column GeneId1.
-    #XZ, 12/16/2008: the input geneid can be of mouse, rat or human type
-    def getTempLiteratureTable(self, species, input_species_geneid, returnNumber):
-        # according to mysql the TEMPORARY TABLE name should not have to be unique because
-        # it is only available to the current connection. This program will be invoked via command line, but if it
-        # were to be invoked over mod_python this could cuase problems.  mod_python will keep the connection alive
-        # in its executing threads ( i think) so there is a potential for the table not being dropped between users.
-        #XZ, 01/29/2009: To prevent the potential risk, I generate random table names and drop the tables after use them.
-
-
-        # the 'input_species_geneid' could be rat or human geneid, need to translate it to mouse geneid
-        translated_mouse_geneid = self.translateToMouseGeneID (species, input_species_geneid)
-
-        tmpTableName_1 = webqtlUtil.genRandStr(prefix="LITERATURE")
-
-        q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_1
-        q2 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId1,GeneId2,value FROM LCorrRamin3 WHERE GeneId1=%s' % (tmpTableName_1, translated_mouse_geneid)
-        q3 = 'INSERT INTO %s (GeneId1, GeneId2, value) SELECT GeneId2,GeneId1,value FROM LCorrRamin3 WHERE GeneId2=%s AND GeneId1!=%s' % (tmpTableName_1, translated_mouse_geneid,translated_mouse_geneid)
-        for x in [q1,q2,q3]: self.cursor.execute(x)
-
-        #XZ, 09/23/2008: Just use the top records insteard of using all records
-        tmpTableName_2 = webqtlUtil.genRandStr(prefix="TOPLITERATURE")
-
-        q1 = 'CREATE TEMPORARY TABLE %s (GeneId1 int(12) unsigned, GeneId2 int(12) unsigned PRIMARY KEY, value double)' % tmpTableName_2
-        self.cursor.execute(q1)
-        q2 = 'SELECT GeneId1, GeneId2, value FROM %s ORDER BY value DESC' % tmpTableName_1
-        self.cursor.execute(q2)
-        result = self.cursor.fetchall()
-
-        counter = 0 #this is to count how many records being inserted into table
-        for one_row in result:
-            mouse_geneid1, mouse_geneid2, lit_corr_alue = one_row
-
-            #mouse_geneid1 has been tested before, now should test if mouse_geneid2 has corresponding geneid in other species
-            translated_species_geneid = 0
-            if species == 'mouse':
-                translated_species_geneid = mouse_geneid2
-            elif species == 'rat':
-                self.cursor.execute( "SELECT rat FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) )
-                record = self.cursor.fetchone()
-                if record:
-                    translated_species_geneid = record[0]
-            elif species == 'human':
-                self.cursor.execute( "SELECT human FROM GeneIDXRef WHERE mouse=%d" % int(mouse_geneid2) )
-                record = self.cursor.fetchone()
-                if record:
-                    translated_species_geneid = record[0]
-
-            if translated_species_geneid:
-                self.cursor.execute( 'INSERT INTO %s (GeneId1, GeneId2, value) VALUES (%d,%d,%f)' % (tmpTableName_2, int(input_species_geneid),int(translated_species_geneid), float(lit_corr_alue)) )
-                counter = counter + 1
-
-            #pay attention to the number
-            if (counter > 2*returnNumber):
-                break
-
-        self.cursor.execute('DROP TEMPORARY TABLE %s' % tmpTableName_1)
-
-        return tmpTableName_2
-
-
-
-    #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it.
-    def fetchLitCorrelations(self, species, GeneId, db, returnNumber): ### Used to generate Lit Correlations when calculations are done from text file.  dcrowell August 2008
-        """Uses getTempLiteratureTable to generate table of literatire correlations.  This function then gathers that data and
-        pairs it with the TraitID string.  Takes as its arguments a formdata instance, and a database instance.
-        Returns a dictionary of 'TraitID':'LitCorr' for the requested correlation"""
-
-        tempTable = self.getTempLiteratureTable(species=species, input_species_geneid=GeneId, returnNumber=returnNumber)
-
-        query = "SELECT %s.Name, %s.value" %  (db.type,tempTable)
-        query += ' FROM (%s, %sXRef, %sFreeze)' % (db.type, db.type, db.type)
-        query += ' LEFT JOIN %s ON %s.GeneId2=ProbeSet.GeneId ' % (tempTable,tempTable)
-        query += "WHERE ProbeSet.GeneId IS NOT NULL AND %s.value IS NOT NULL AND %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Name = '%s'  and %s.Id = %sXRef.%sId order by %s.Id" % (tempTable, db.type, db.type, db.type, db.type, db.name, db.type, db.type, db.type, db.type)
-
-        self.cursor.execute(query)
-        results = self.cursor.fetchall()
-
-        litCorrDict = {}
-
-        for entry in results:
-            traitName,litcorr = entry
-            litCorrDict[traitName] = litcorr
-
-        self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable)
-
-        return litCorrDict
-
-
-    def get_traits(self, vals):
-
-        #Todo: Redo cached stuff using memcached
-        if False:
-            lit_corrs = {}
-            tissue_corrs = {}
-            use_lit = False
-            if self.method == METHOD_LIT:
-                lit_corrs = self.fetchLitCorrelations(species=self.species, GeneId=self.gene_id, db=self.db, returnNumber=self.returnNumber)
-                use_lit = True
-
-            use_tissue_corr = False
-            if self.method in TISSUE_METHODS:
-                tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number)
-                use_tissue_corr = True
-
-            DatabaseFileName = self.getFileName( target_db_name=self.target_db_name )
-            datasetFile = open(webqtlConfig.CACHEDIR+DatabaseFileName,'r')
-
-            #XZ, 01/08/2009: read the first line
-            line = datasetFile.readline()
-            cached_sample_names = webqtlUtil.readLineCSV(line)[1:]
-
-            #XZ, 01/08/2009: This step is critical. It is necessary for this new method.
-            #XZ: The original function fetchAllDatabaseData uses all strains stored in variable _strains to
-            #XZ: retrieve the values of each strain from database in real time.
-            #XZ: The new method uses all strains stored in variable dataset_strains to create a new variable
-            #XZ: _newvals. _newvals has the same length as dataset_strains. The items in _newvals is in
-            #XZ: the same order of items in dataset_strains. The value of each item in _newvals is either
-            #XZ: the value of correspinding strain in _vals or 'None'.
-            new_vals = []
-            for name in cached_sample_names:
-                if name in self.sample_names:
-                    new_vals.append(float(vals[self.sample_names.index(name)]))
-                else:
-                    new_vals.append('None')
-
-            nnCorr = len(new_vals)
-
-            #XZ, 01/14/2009: If literature corr or tissue corr is selected,
-            #XZ: there is no need to use parallel computing.
-
-            traits = []
-            data_start = 1
-            for line in datasetFile:
-                raw_trait = webqtlUtil.readLineCSV(line)
-                trait = Trait.from_csv(raw_trait, data_start)
-                trait.lit_corr = lit_corrs.get(trait.name)
-                trait.tissue_corr, trait.p_tissue = tissue_corrs.get(trait.name, (None, None))
-                traits.append(trait)
-
-            return traits, new_vals
-
-        else:
-            traits = self.fetchAllDatabaseData(species=self.dataset.species,
-                                               GeneId=self.gene_id,
-                                               GeneSymbol=self.trait.symbol,
-                                               strains=self.sample_names,
-                                               db=self.db,
-                                               method=self.method,
-                                               returnNumber=self.returnNumber,
-                                               tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
-            totalTraits = len(traits) #XZ, 09/18/2008: total trait number
-
-        return traits
-
-    def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
-
-        symbol_corr_dict = {}
-        symbol_pvalue_dict = {}
-
-        primary_trait_symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
-                                                    GeneNameLst=[self.this_trait.symbol],
-                                                    TissueProbeSetFreezeId=tissue_dataset_id)
-        primary_trait_value = primary_trait_symbol_value_dict.values()[0]
-
-        symbol_value_dict = correlation_functions.make_gene_tissue_value_dict(
-                                        gene_name_list=[],
-                                        tissue_dataset_id=tissue_dataset_id)
-
-        symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
-                primaryTraitValue,
-                SymbolValueDict,
-                method=self.corr_method)
-        #else:
-        #    symbol_corr_dict, symbol_pvalue_dict = correlation_functions.batch_cal_tissue_corr(
-        #        primaryTraitValue,
-        #        SymbolValueDict)
-
-        return (symbolCorrDict, symbolPvalueDict)
-
-    def getFileName(self, target_db_name):  ### dcrowell  August 2008
-        """Returns the name of the reference database file with which correlations are calculated.
-        Takes argument cursor which is a cursor object of any instance of a subclass of templatePage
-        Used by correlationPage"""
-
-        dataset_id = str(self.target_dataset.id)
-        dataset_fullname = self.target_dataset.fullname.replace(' ','_')
-        dataset_fullname = dataset_fullname.replace('/','_')
-
-        FileName = 'ProbeSetFreezeId_' + dataset_id + '_FullName_' + dataset_fullname + '.txt'
-
-        return FileName
-
-    def do_parallel_correlation(self, db_filename, num_overlap):
-
-        #XZ, 01/14/2009: This method is for parallel computing only.
-        #XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1)
-        #XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected
-        def compute_corr(input_nnCorr, input_trait, input_list, corr_method):
-
-            import math
-            import reaper
-
-            def cmpOrder2(A,B):
-                try:
-                    if A[-1] < B[-1]:
-                        return -1
-                    elif A[-1] == B[-1]:
-                        return 0
-                    else:
-                        return 1
-                except:
-                    return 0
-
-            def calCorrelation(dbdata,userdata,N):
-                X = []
-                Y = []
-                for i in range(N):
-                    if (dbdata[i] != None and userdata[i] != None) and (dbdata[i] != "None" and userdata[i] != "None"):
-                        X.append(float(dbdata[i]))
-                        Y.append(float(userdata[i]))
-                NN = len(X)
-                if NN <6:
-                    return (0.0,NN)
-                sx = reduce(lambda x,y:x+y,X,0.0)
-                sy = reduce(lambda x,y:x+y,Y,0.0)
-                meanx = sx/NN
-                meany = sy/NN
-                xyd = 0.0
-                sxd = 0.0
-                syd = 0.0
-                for i in range(NN):
-                    xyd += (X[i] - meanx)*(Y[i]-meany)
-                    sxd += (X[i] - meanx)*(X[i] - meanx)
-                    syd += (Y[i] - meany)*(Y[i] - meany)
-                try:
-                    corr = xyd/(math.sqrt(sxd)*math.sqrt(syd))
-                except:
-                    corr = 0
-                return (corr,NN)
-
-            def calCorrelationRank(xVals,yVals,N):
-                """
-                Calculated Spearman Ranked Correlation. The algorithm works
-                by setting all tied ranks to the average of those ranks (for
-                example, if ranks 5-10 all have the same value, each will be set
-                to rank 7.5).
-                """
-
-                XX = []
-                YY = []
-                j = 0
-
-                for i in range(len(xVals)):
-                    if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"):
-                        XX.append((j,float(xVals[i])))
-                        YY.append((j,float(yVals[i])))
-                        j = j+1
-
-                NN = len(XX)
-                if NN <6:
-                    return (0.0,NN)
-                XX.sort(cmpOrder2)
-                YY.sort(cmpOrder2)
-                X = [0]*NN
-                Y = [0]*NN
-
-                j = 1
-                rank = 0.0
-                t = 0.0
-                sx = 0.0
-
-                while j < NN:
-
-                    if XX[j][1] != XX[j-1][1]:
-                        X[XX[j-1][0]] = j
-                        j = j+1
-
-                    else:
-                        jt = j+1
-                        ji = j
-                        for jt in range(j+1, NN):
-                            if (XX[jt][1] != XX[j-1][1]):
-                                break
-                        rank = 0.5*(j+jt)
-                        for ji in range(j-1, jt):
-                            X[XX[ji][0]] = rank
-                        t = jt-j
-                        sx = sx + (t*t*t-t)
-                        if (jt == NN-1):
-                            if (XX[jt][1] == XX[j-1][1]):
-                                X[XX[NN-1][0]] = rank
-                        j = jt+1
-
-                if j == NN:
-                    if X[XX[NN-1][0]] == 0:
-                        X[XX[NN-1][0]] = NN
-
-                j = 1
-                rank = 0.0
-                t = 0.0
-                sy = 0.0
-
-                while j < NN:
-
-                    if YY[j][1] != YY[j-1][1]:
-                        Y[YY[j-1][0]] = j
-                        j = j+1
-                    else:
-                        jt = j+1
-                        ji = j
-                        for jt in range(j+1, NN):
-                            if (YY[jt][1] != YY[j-1][1]):
-                                break
-                        rank = 0.5*(j+jt)
-                        for ji in range(j-1, jt):
-                            Y[YY[ji][0]] = rank
-                        t = jt - j
-                        sy = sy + (t*t*t-t)
-                        if (jt == NN-1):
-                            if (YY[jt][1] == YY[j-1][1]):
-                                Y[YY[NN-1][0]] = rank
-                        j = jt+1
-
-                if j == NN:
-                    if Y[YY[NN-1][0]] == 0:
-                        Y[YY[NN-1][0]] = NN
-
-                D = 0.0
-
-                for i in range(NN):
-                    D += (X[i]-Y[i])*(X[i]-Y[i])
-
-                fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN))
-
-                return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN)
-
-            # allcorrelations = []
-
-            correlation_data = {}
-            for i, line in enumerate(input_list):
-                if i == 0:
-                    continue
-                tokens = line.split('","')
-                tokens[-1] = tokens[-1][:-2] #remove the last "
-                tokens[0] = tokens[0][1:] #remove the first "
-
-                traitdataName = tokens[0]
-                database_trait = tokens[1:]
-
-                #print("database_trait:", database_trait)
-
-                #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
-                # if corr_method == 'pearson':
-                    # sample_r, sample_p = scipy.stats.pearsonr(input_trait, database_trait)
-                # else:
-                    # sample_r, sample_p = scipy.stats.spearmanr(input_trait, database_trait)
-
-                if corr_method == "pearson": #XZ: Pearson's r
-                    sample_r, nOverlap = calCorrelation(input_trait, database_trait, input_nnCorr)
-                else: #XZ: Spearman's rho
-                    sample_r, nOverlap = calCorrelationRank(input_trait, database_trait, input_nnCorr)
-
-                #XZ: calculate corrPValue
-                if nOverlap < 3:
-                    sample_p = 1.0
-                else:
-                    if abs(sample_r) >= 1.0:
-                        sample_p = 0.0
-                    else:
-                        z_value = 0.5*math.log((1.0+sample_r)/(1.0-sample_r))
-                        z_value = z_value*math.sqrt(nOverlap-3)
-                        sample_p = 2.0*(1.0 - reaper.normp(abs(z_value)))
-
-                correlation_data[traitdataName] = [sample_r, sample_p, nOverlap]
-
-                # traitinfo = [traitdataName, sample_r, nOverlap]
-                # allcorrelations.append(traitinfo)
-
-            return correlation_data
-            # return allcorrelations
-
-
-        datasetFile = open(webqtlConfig.GENERATED_TEXT_DIR+db_filename,'r')
-
-        print("Invoking parallel computing")
-        input_line_list = datasetFile.readlines()
-        print("Read lines from the file")
-        all_line_number = len(input_line_list)
-
-        step = 1000
-        job_number = math.ceil( float(all_line_number)/step )
-
-        print("JOB NUMBER", job_number)
-
-        job_input_lists = []
-
-        print("Configuring jobs")
-
-        for job_index in range( int(job_number) ):
-            starti = job_index*step
-            endi = min((job_index+1)*step, all_line_number)
-
-            one_job_input_list = []
-
-            for i in range( starti, endi ):
-                one_job_input_list.append( input_line_list[i] )
-
-            job_input_lists.append( one_job_input_list )
-
-        print("Creating pp servers")
-
-        ppservers = ()
-        # Creates jobserver with automatically detected number of workers
-        job_server = pp.Server(ppservers=ppservers)
-
-        print("Done creating servers")
-
-        jobs = []
-        results = []
-
-        print("Starting parallel computation, submitting jobs")
-        for one_job_input_list in job_input_lists: #pay attention to modules from outside
-            jobs.append( job_server.submit(func=compute_corr, args=(num_overlap, self.this_trait_vals, one_job_input_list, self.corr_method), depfuncs=(), modules=("webqtlUtil",)) )
-        print("Done submitting jobs")
-
-        for one_job in jobs:
-            one_result = one_job()
-            self.correlation_data.update(one_result)
-            # one_result = one_job()
-            # results.append( one_result )
-
-        #print("CORRELATION DATA:", self.correlation_data)
-
-        # print("Acquiring results")
-
-        # for one_result in results:
-            # for one_traitinfo in one_result:
-                # allcorrelations.append( one_traitinfo )
-
 def generate_corr_json(corr_results, this_trait, dataset, target_dataset):
     results_list = []
     for i, trait in enumerate(corr_results):
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index 077386a3..4bb4d65d 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -23,7 +23,6 @@ from __future__ import absolute_import, print_function, division
 import sys
 # sys.path.append(".")   Never do this in a webserver!
 
-import gc
 import string
 import cPickle
 import os
diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py
index 9515d23a..6fda02fd 100644
--- a/wqflask/wqflask/ctl/ctl_analysis.py
+++ b/wqflask/wqflask/ctl/ctl_analysis.py
@@ -2,7 +2,6 @@
 # Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
 import sys
 from numpy import *
-import scipy as sp                            # SciPy
 import rpy2.robjects as ro                    # R Objects
 import rpy2.rinterface as ri
 
@@ -24,60 +23,38 @@ from utility import helper_functions
 from utility.tools import locate
 
 from rpy2.robjects.packages import importr
-utils = importr("utils")
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
 
 ## Get pointers to some common R functions
 r_library       = ro.r["library"]             # Map the library function
 r_options       = ro.r["options"]             # Map the options function
-r_read_csv      = ro.r["read.csv"]            # Map the read.csv function
-r_dim           = ro.r["dim"]                 # Map the dim function
-r_c             = ro.r["c"]                   # Map the c function
 r_t             = ro.r["t"]                   # Map the t function
-r_cat           = ro.r["cat"]                 # Map the cat function
-r_paste         = ro.r["paste"]               # Map the paste function
-r_unlist        = ro.r["unlist"]              # Map the unlist function
-r_head          = ro.r["head"]                # Map the unlist function
-r_unique        = ro.r["unique"]              # Map the unique function
-r_length        = ro.r["length"]              # Map the length function
 r_unlist        = ro.r["unlist"]              # Map the unlist function
 r_list          = ro.r.list                   # Map the list function
-r_matrix        = ro.r.matrix                 # Map the matrix function
-r_seq           = ro.r["seq"]                 # Map the seq function
-r_table         = ro.r["table"]               # Map the table function
-r_names         = ro.r["names"]               # Map the names function
-r_sink          = ro.r["sink"]                # Map the sink function
-r_is_NA         = ro.r["is.na"]               # Map the is.na function
-r_file          = ro.r["file"]                # Map the file function
 r_png           = ro.r["png"]                 # Map the png function for plotting
 r_dev_off       = ro.r["dev.off"]             # Map the dev.off function
-r_save_image    = ro.r["save.image"]          # Map the save.image function
-r_class         = ro.r["class"]               # Map the class function
-r_save          = ro.r["save"]                # Map the save function
 r_write_table   = ro.r["write.table"]         # Map the write.table function
-r_read_table   = ro.r["read.table"]         # Map the read.table function
-r_as_data_frame = ro.r["as.data.frame"]         # Map the write.table function
 r_data_frame    = ro.r["data.frame"]         # Map the write.table function
 r_as_numeric    = ro.r["as.numeric"]         # Map the write.table function
 
 class CTL(object):
     def __init__(self):
-        print("Initialization of CTL")
+        logger.info("Initialization of CTL")
         #log = r_file("/tmp/genenetwork_ctl.log", open = "wt")
-        #r_sink(log)                                  # Uncomment the r_sink() commands to log output from stdout/stderr to a file
+        #r_sink(log)                                                       # Uncomment the r_sink() commands to log output from stdout/stderr to a file
         #r_sink(log, type = "message")
-        r_library("ctl")                                                  # Load CTL - Should only be done once, since it is quite expensive
+        r_library("ctl")                                                   # Load CTL - Should only be done once, since it is quite expensive
         r_options(stringsAsFactors = False)
-        print("Initialization of CTL done, package loaded in R session")
+        logger.info("Initialization of CTL done, package loaded in R session")
         self.r_CTLscan            = ro.r["CTLscan"]                        # Map the CTLscan function
         self.r_CTLsignificant     = ro.r["CTLsignificant"]                 # Map the CTLsignificant function
         self.r_lineplot           = ro.r["ctl.lineplot"]                   # Map the ctl.lineplot function
-        self.r_CTLsignificant     = ro.r["CTLsignificant"]                 # Map the CTLsignificant function
-        self.r_CTLnetwork         = ro.r["CTLnetwork"]                     # Map the CTLnetwork function
-        self.r_CTLprofiles        = ro.r["CTLprofiles"]                    # Map the CTLprofiles function
         self.r_plotCTLobject      = ro.r["plot.CTLobject"]                 # Map the CTLsignificant function
         self.nodes_list = []
         self.edges_list = []
-        print("Obtained pointers to CTL functions")
+        logger.info("Obtained pointers to CTL functions")
 
     def addNode(self, gt):
         node_dict = { 'data' : {'id' : str(gt.name) + ":" + str(gt.dataset.name),
@@ -100,20 +77,20 @@ class CTL(object):
         self.edges_list.append(edge_dict)
 
     def run_analysis(self, requestform):
-        print("Starting CTL analysis on dataset")
+        logger.info("Starting CTL analysis on dataset")
         self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
         self.trait_db_list = [x for x in self.trait_db_list if x]
 
-        print("strategy:", requestform.get("strategy"))
+        logger.debug("strategy:", requestform.get("strategy"))
         strategy = requestform.get("strategy")
 
-        print("nperm:", requestform.get("nperm"))
+        logger.debug("nperm:", requestform.get("nperm"))
         nperm = int(requestform.get("nperm"))
 
-        print("parametric:", requestform.get("parametric"))
+        logger.debug("parametric:", requestform.get("parametric"))
         parametric = bool(requestform.get("parametric"))
 
-        print("significance:", requestform.get("significance"))
+        logger.debug("significance:", requestform.get("significance"))
         significance = float(requestform.get("significance"))
 
         # Get the name of the .geno file belonging to the first phenotype
@@ -123,7 +100,7 @@ class CTL(object):
         genofilelocation = locate(dataset.group.name + ".geno", "genotype")
         parser = genofile_parser.ConvertGenoFile(genofilelocation)
         parser.process_csv()
-        print(dataset.group)
+        logger.debug("dataset group: ", dataset.group)
         # Create a genotype matrix
         individuals = parser.individuals
         markers = []
@@ -133,14 +110,14 @@ class CTL(object):
           markers.append(marker["genotypes"])
 
         genotypes = list(itertools.chain(*markers))
-        print(len(genotypes) / len(individuals), "==", len(parser.markers))
+        logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers))
 
         rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))
 
         # Create a phenotype matrix
         traits = []
         for trait in self.trait_db_list:
-          print("retrieving data for", trait)
+          logger.debug("retrieving data for", trait)
           if trait != "":
             ts = trait.split(':')
             gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
@@ -153,7 +130,7 @@ class CTL(object):
 
         rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))
 
-        print(rPheno)
+        logger.debug(rPheno)
 
         # Use a data frame to store the objects
         rPheno = r_data_frame(rPheno, check_names = False)
@@ -196,10 +173,9 @@ class CTL(object):
         sys.stdout.flush()
 
         # Create the interactive graph for cytoscape visualization (Nodes and Edges)
-        print(type(significant))
         if not type(significant) == ri.RNULLType:
           for x in range(len(significant[0])):
-            print(significant[0][x], significant[1][x], significant[2][x])            # Debug to console
+            logger.debug(significant[0][x], significant[1][x], significant[2][x])     # Debug to console
             tsS = significant[0][x].split(':')                                        # Source
             tsT = significant[2][x].split(':')                                        # Target
             gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1])            # Retrieve Source info from the DB
@@ -214,7 +190,6 @@ class CTL(object):
         self.elements = json.dumps(self.nodes_list + self.edges_list)
 
     def loadImage(self, path, name):
-        print("pre-loading imgage results:", self.results[path])
         imgfile = open(self.results[path], 'rb')
         imgdata = imgfile.read()
         imgB64 = imgdata.encode("base64")
@@ -229,7 +204,7 @@ class CTL(object):
           n = n + 1
 
     def process_results(self, results):
-        print("Processing CTL output")
+        logger.info("Processing CTL output")
         template_vars = {}
         template_vars["results"] = self.results
         template_vars["elements"] = self.elements
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index fb4e19a1..05136ad8 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -94,12 +94,14 @@
                         <th>Sample p(r)</th>
                         <th>Lit r</th>
                         <th>Tissue r</th>
+                        <th>Tissue p(r)</th>
                         {% else %}
                         <th>Sample rho</th>
                         <th>N</th>
                         <th>Sample p(rho)</th>
                         <th>Lit r</th>
                         <th>Tissue rho</th>
+                        <th>Tissue p(rho)</th>
                         {% endif %}
                     {% elif target_dataset.type == "Publish" %}
                         {% if corr_method == 'pearson' %}
@@ -156,8 +158,10 @@
                         {% endif %}
                         {% if trait.tissue_corr == "" or trait.tissue_corr == 0.000 %}
                         <td align="right">--</td>
+                        <td align="right">--</td>
                         {% else %}
                         <td align="right">{{'%0.3f'|format(trait.tissue_corr)}}</td>
+                        <td align="right">{{'%0.3e'|format(trait.tissue_pvalue)}}</td>
                         {% endif %}
                       {% elif target_dataset.type == "Publish" %}
                         <td>{{ trait.description_display }}</td>
@@ -319,7 +323,7 @@
                        title: 'correlation_results',
                        fieldBoundary: '"',
                        exportOptions: {
-                           columns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
+                           columns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
                        }
                     }
                 ],
@@ -342,7 +346,8 @@
                     { "type": "natural" },
                     { "type": "scientific" },
                     { "type": "natural" },
-                    { "type": "natural" }
+                    { "type": "natural" },
+                    { "type": "scientific" }
                 ],
                 "createdRow": function ( row, data, index ) {
                     $('td', row).eq(4).attr('title', $('td', row).eq(4).text());
author	zsloan	2018-04-11 20:39:24 +0000
committer	zsloan	2018-04-11 20:39:24 +0000
commit	9750e63d64849d7fa9e1e681f56b73cae96905df (patch)
tree	daa743957497d92b8a3573b00405593d3454cdd0
parent	d8cec0ef94b7683f42946ce182a937484ad1034a (diff)
download	genenetwork2-9750e63d64849d7fa9e1e681f56b73cae96905df.tar.gz