aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/utility/webqtlUtil.py700
-rw-r--r--wqflask/wqflask/correlation/correlation_functions.py56
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py30
3 files changed, 1 insertions, 785 deletions
diff --git a/wqflask/utility/webqtlUtil.py b/wqflask/utility/webqtlUtil.py
index 94dd7cbf..83fa90b7 100644
--- a/wqflask/utility/webqtlUtil.py
+++ b/wqflask/utility/webqtlUtil.py
@@ -65,41 +65,6 @@ ParInfo ={
# Accessory Functions
#########################################
-def exportData(hddn, tdata, NP = None):
- for key in tdata.keys():
- _val, _var, _N = tdata[key].val, tdata[key].var, tdata[key].N
- if _val != None:
- hddn[key] = _val
- if _var != None:
- hddn['V'+key] = _var
- if NP and _N != None:
- hddn['N'+key] = _N
-
-def genShortStrainName(RISet='', input_strainName=''):
- #aliasStrainDict = {'C57BL/6J':'B6','DBA/2J':'D2'}
- strainName = input_strainName
- if RISet != 'AXBXA':
- if RISet == 'BXD300':
- this_RISet = 'BXD'
- elif RISet == 'BDF2-2005':
- this_RISet = 'CASE05_'
- else:
- this_RISet = RISet
- strainName = string.replace(strainName,this_RISet,'')
- strainName = string.replace(strainName,'CASE','')
- try:
- strainName = "%02d" % int(strainName)
- except:
- pass
- else:
- strainName = string.replace(strainName,'AXB','A')
- strainName = string.replace(strainName,'BXA','B')
- try:
- strainName = strainName[0] + "%02d" % int(strainName[1:])
- except:
- pass
- return strainName
-
def genRandStr(prefix = "", length=8, chars=string.letters+string.digits):
from random import choice
_str = prefix[:]
@@ -107,63 +72,6 @@ def genRandStr(prefix = "", length=8, chars=string.letters+string.digits):
_str += choice(chars)
return _str
-def StringAsFloat(str):
- 'Converts string to float but catches any exception and returns None'
- try:
- return float(str)
- except:
- return None
-
-def IntAsFloat(str):
- 'Converts string to Int but catches any exception and returns None'
- try:
- return int(str)
- except:
- return None
-
-def FloatAsFloat(flt):
- 'Converts float to string but catches any exception and returns None'
- try:
- return float("%2.3f" % flt)
- except:
- return None
-
-def RemoveZero(flt):
- 'Converts string to float but catches any exception and returns None'
- try:
- if abs(flt) < 1e-6:
- return None
- else:
- return flt
- except:
- return None
-
-
-def SciFloat(d):
- 'Converts string to float but catches any exception and returns None'
-
- try:
- if abs(d) <= 1.0e-4:
- return "%1.2e" % d
- else:
- return "%1.5f" % d
- except:
- return None
-
-###To be removed
-def FloatList2String(lst):
- 'Converts float list to string but catches any exception and returns None'
- tt=''
- try:
- for item in lst:
- if item == None:
- tt += 'X '
- else:
- tt += '%f ' % item
- return tt
- except:
- return ""
-
def ListNotNull(lst):
'''Obsolete - Use built in function any (or all or whatever)
@@ -176,427 +84,6 @@ def ListNotNull(lst):
return 1
return None
-###To be removed
-def FileDataProcess(str):
- 'Remove the description text from the input file if theres any'
- i=0
- while i<len(str):
- if str[i]<'\x7f' and str[i]>'\x20':
- break
- else:
- i+=1
- str=str[i:]
- str=string.join(string.split(str,'\000'),'')
- i=string.find(str,"*****")
- if i>-1:
- return str[i+5:]
- else:
- return str
-
-def rank(a,lst,offset=0):
- """Calculate the integer rank of a number in an array, can be used to calculate p-value"""
- n = len(lst)
- if n == 2:
- if a <lst[0]:
- return offset
- elif a > lst[1]:
- return offset + 2
- else:
- return offset +1
- elif n == 1:
- if a <lst[0]:
- return offset
- else:
- return offset +1
- elif n== 0:
- return offset
- else:
- mid = n/2
- if a < lst[mid]:
- return rank(a,lst[:mid-1],offset)
- else:
- return rank(a,lst[mid:],offset+mid)
-
-def cmpScanResult(A,B):
- try:
- if A.LRS > B.LRS:
- return 1
- elif A.LRS == B.LRS:
- return 0
- else:
- return -1
- except:
- return 0
-
-
-def cmpScanResult2(A,B):
- try:
- if A.LRS < B.LRS:
- return 1
- elif A.LRS == B.LRS:
- return 0
- else:
- return -1
- except:
- return 0
-
-def cmpOrder(A,B):
- try:
- if A[1] < B[1]:
- return -1
- elif A[1] == B[1]:
- return 0
- else:
- return 1
- except:
- return 0
-
-def cmpOrder2(A,B):
- try:
- if A[-1] < B[-1]:
- return -1
- elif A[-1] == B[-1]:
- return 0
- else:
- return 1
- except:
- return 0
-
-
-
-
-def calRank(xVals, yVals, N): ### Zach Sloan, February 4 2010
- """
- Returns a ranked set of X and Y values. These are used when generating
- a Spearman scatterplot. Bear in mind that this sets values equal to each
- other as the same rank.
- """
- XX = []
- YY = []
- X = [0]*len(xVals)
- Y = [0]*len(yVals)
- j = 0
-
- for i in range(len(xVals)):
-
- if xVals[i] != None and yVals[i] != None:
- XX.append((j, xVals[i]))
- YY.append((j, yVals[i]))
- j = j + 1
-
- NN = len(XX)
-
- XX.sort(cmpOrder2)
- YY.sort(cmpOrder2)
-
- j = 1
- rank = 0.0
-
- while j < NN:
-
- if XX[j][1] != XX[j-1][1]:
- X[XX[j-1][0]] = j
- j = j+1
-
- else:
- jt = j+1
- ji = j
- for jt in range(j+1, NN):
- if (XX[jt][1] != XX[j-1][1]):
- break
- rank = 0.5*(j+jt)
- for ji in range(j-1, jt):
- X[XX[ji][0]] = rank
- if (jt == NN-1):
- if (XX[jt][1] == XX[j-1][1]):
- X[XX[NN-1][0]] = rank
- j = jt+1
-
- if j == NN:
- if X[XX[NN-1][0]] == 0:
- X[XX[NN-1][0]] = NN
-
- j = 1
- rank = 0.0
-
- while j < NN:
-
- if YY[j][1] != YY[j-1][1]:
- Y[YY[j-1][0]] = j
- j = j+1
- else:
- jt = j+1
- ji = j
- for jt in range(j+1, NN):
- if (YY[jt][1] != YY[j-1][1]):
- break
- rank = 0.5*(j+jt)
- for ji in range(j-1, jt):
- Y[YY[ji][0]] = rank
- if (jt == NN-1):
- if (YY[jt][1] == YY[j-1][1]):
- Y[YY[NN-1][0]] = rank
- j = jt+1
-
- if j == NN:
- if Y[YY[NN-1][0]] == 0:
- Y[YY[NN-1][0]] = NN
-
- return (X,Y)
-
-def calCorrelationRank(xVals,yVals,N):
- """
- Calculated Spearman Ranked Correlation. The algorithm works
- by setting all tied ranks to the average of those ranks (for
- example, if ranks 5-10 all have the same value, each will be set
- to rank 7.5).
- """
-
- XX = []
- YY = []
- j = 0
-
- for i in range(len(xVals)):
- if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"):
- XX.append((j,xVals[i]))
- YY.append((j,yVals[i]))
- j = j+1
-
- NN = len(XX)
- if NN <6:
- return (0.0,NN)
- XX.sort(cmpOrder2)
- YY.sort(cmpOrder2)
- X = [0]*NN
- Y = [0]*NN
-
- j = 1
- rank = 0.0
- t = 0.0
- sx = 0.0
-
- while j < NN:
-
- if XX[j][1] != XX[j-1][1]:
- X[XX[j-1][0]] = j
- j = j+1
-
- else:
- jt = j+1
- ji = j
- for jt in range(j+1, NN):
- if (XX[jt][1] != XX[j-1][1]):
- break
- rank = 0.5*(j+jt)
- for ji in range(j-1, jt):
- X[XX[ji][0]] = rank
- t = jt-j
- sx = sx + (t*t*t-t)
- if (jt == NN-1):
- if (XX[jt][1] == XX[j-1][1]):
- X[XX[NN-1][0]] = rank
- j = jt+1
-
- if j == NN:
- if X[XX[NN-1][0]] == 0:
- X[XX[NN-1][0]] = NN
-
- j = 1
- rank = 0.0
- t = 0.0
- sy = 0.0
-
- while j < NN:
-
- if YY[j][1] != YY[j-1][1]:
- Y[YY[j-1][0]] = j
- j = j+1
- else:
- jt = j+1
- ji = j
- for jt in range(j+1, NN):
- if (YY[jt][1] != YY[j-1][1]):
- break
- rank = 0.5*(j+jt)
- for ji in range(j-1, jt):
- Y[YY[ji][0]] = rank
- t = jt - j
- sy = sy + (t*t*t-t)
- if (jt == NN-1):
- if (YY[jt][1] == YY[j-1][1]):
- Y[YY[NN-1][0]] = rank
- j = jt+1
-
- if j == NN:
- if Y[YY[NN-1][0]] == 0:
- Y[YY[NN-1][0]] = NN
-
- D = 0.0
-
- for i in range(NN):
- D += (X[i]-Y[i])*(X[i]-Y[i])
-
- fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN))
-
- return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN)
-
-
-def calCorrelationRankText(dbdata,userdata,N): ### dcrowell = David Crowell, July 2008
- """Calculates correlation ranks with data formatted from the text file.
- dbdata, userdata are lists of strings. N is an int. Returns a float.
- Used by correlationPage"""
- XX = []
- YY = []
- j = 0
- for i in range(N):
- if (dbdata[i]!= None and userdata[i]!=None) and (dbdata[i]!= 'None' and userdata[i]!='None'):
- XX.append((j,float(dbdata[i])))
- YY.append((j,float(userdata[i])))
- j += 1
- NN = len(XX)
- if NN <6:
- return (0.0,NN)
- XX.sort(cmpOrder2)
- YY.sort(cmpOrder2)
- X = [0]*NN
- Y = [0]*NN
-
- j = 1
- rank = 0.0
- t = 0.0
- sx = 0.0
-
- while j < NN:
-
- if XX[j][1] != XX[j-1][1]:
- X[XX[j-1][0]] = j
- j = j+1
-
- else:
- jt = j+1
- ji = j
- for jt in range(j+1, NN):
- if (XX[jt][1] != XX[j-1][1]):
- break
- rank = 0.5*(j+jt)
- for ji in range(j-1, jt):
- X[XX[ji][0]] = rank
- t = jt-j
- sx = sx + (t*t*t-t)
- if (jt == NN-1):
- if (XX[jt][1] == XX[j-1][1]):
- X[XX[NN-1][0]] = rank
- j = jt+1
-
- if j == NN:
- if X[XX[NN-1][0]] == 0:
- X[XX[NN-1][0]] = NN
-
- j = 1
- rank = 0.0
- t = 0.0
- sy = 0.0
-
- while j < NN:
-
- if YY[j][1] != YY[j-1][1]:
- Y[YY[j-1][0]] = j
- j = j+1
- else:
- jt = j+1
- ji = j
- for jt in range(j+1, NN):
- if (YY[jt][1] != YY[j-1][1]):
- break
- rank = 0.5*(j+jt)
- for ji in range(j-1, jt):
- Y[YY[ji][0]] = rank
- t = jt - j
- sy = sy + (t*t*t-t)
- if (jt == NN-1):
- if (YY[jt][1] == YY[j-1][1]):
- Y[YY[NN-1][0]] = rank
- j = jt+1
-
- if j == NN:
- if Y[YY[NN-1][0]] == 0:
- Y[YY[NN-1][0]] = NN
-
- D = 0.0
-
- for i in range(NN):
- D += (X[i]-Y[i])*(X[i]-Y[i])
-
- fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN))
-
- return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN)
-
-
-
-def calCorrelation(dbdata,userdata,N):
- X = []
- Y = []
- for i in range(N):
- if dbdata[i]!= None and userdata[i]!= None:
- X.append(dbdata[i])
- Y.append(userdata[i])
- NN = len(X)
- if NN <6:
- return (0.0,NN)
- sx = reduce(lambda x,y:x+y,X,0.0)
- sy = reduce(lambda x,y:x+y,Y,0.0)
- meanx = sx/NN
- meany = sy/NN
- xyd = 0.0
- sxd = 0.0
- syd = 0.0
- for i in range(NN):
- xyd += (X[i] - meanx)*(Y[i]-meany)
- sxd += (X[i] - meanx)*(X[i] - meanx)
- syd += (Y[i] - meany)*(Y[i] - meany)
- try:
- corr = xyd/(sqrt(sxd)*sqrt(syd))
- except:
- corr = 0
- return (corr,NN)
-
-def calCorrelationText(dbdata,userdata,N): ### dcrowell July 2008
- """Calculates correlation coefficients with values formatted from text files. dbdata, userdata are lists of strings. N is an int. Returns a float
- Used by correlationPage"""
- X = []
- Y = []
- for i in range(N):
- #if (dbdata[i]!= None and userdata[i]!= None) and (dbdata[i]!= 'None' and userdata[i]!= 'None'):
- # X.append(float(dbdata[i]))
- # Y.append(float(userdata[i]))
- if dbdata[i] == None or dbdata[i] == 'None' or userdata[i] == None or userdata[i] == 'None':
- continue
- else:
- X.append(float(dbdata[i]))
- Y.append(float(userdata[i]))
- NN = len(X)
- if NN <6:
- return (0.0,NN)
- sx = sum(X)
- sy = sum(Y)
- meanx = sx/float(NN)
- meany = sy/float(NN)
- xyd = 0.0
- sxd = 0.0
- syd = 0.0
- for i in range(NN):
- x1 = X[i]-meanx
- y1 = Y[i]-meany
- xyd += x1*y1
- sxd += x1**2
- syd += y1**2
- try:
- corr = xyd/(sqrt(sxd)*sqrt(syd))
- except:
- corr = 0
- return (corr,NN)
-
-
def readLineCSV(line): ### dcrowell July 2008
"""Parses a CSV string of text and returns a list containing each element as a string.
Used by correlationPage"""
@@ -605,45 +92,6 @@ def readLineCSV(line): ### dcrowell July 2008
returnList[0]=returnList[0][1:]
return returnList
-
-def cmpCorr(A,B):
- try:
- if abs(A[1]) < abs(B[1]):
- return 1
- elif abs(A[1]) == abs(B[1]):
- return 0
- else:
- return -1
- except:
- return 0
-
-def cmpLitCorr(A,B):
- try:
- if abs(A[3]) < abs(B[3]): return 1
- elif abs(A[3]) == abs(B[3]):
- if abs(A[1]) < abs(B[1]): return 1
- elif abs(A[1]) == abs(B[1]): return 0
- else: return -1
- else: return -1
- except:
- return 0
-
-def cmpPValue(A,B):
- try:
- if A.corrPValue < B.corrPValue:
- return -1
- elif A.corrPValue == B.corrPValue:
- if abs(A.corr) > abs(B.corr):
- return -1
- elif abs(A.corr) < abs(B.corr):
- return 1
- else:
- return 0
- else:
- return 1
- except:
- return 0
-
def cmpEigenValue(A,B):
try:
if A[0] > B[0]:
@@ -655,80 +103,6 @@ def cmpEigenValue(A,B):
except:
return 0
-
-def cmpLRSFull(A,B):
- try:
- if A[0] < B[0]:
- return -1
- elif A[0] == B[0]:
- return 0
- else:
- return 1
- except:
- return 0
-
-def cmpLRSInteract(A,B):
- try:
- if A[1] < B[1]:
- return -1
- elif A[1] == B[1]:
- return 0
- else:
- return 1
- except:
- return 0
-
-
-def cmpPos(A,B):
- try:
- try:
- AChr = int(A.chr)
- except:
- AChr = 20
- try:
- BChr = int(B.chr)
- except:
- BChr = 20
- if AChr > BChr:
- return 1
- elif AChr == BChr:
- if A.mb > B.mb:
- return 1
- if A.mb == B.mb:
- return 0
- else:
- return -1
- else:
- return -1
- except:
- return 0
-
-def cmpGenoPos(A,B):
- try:
- A1 = A.chr
- B1 = B.chr
- try:
- A1 = int(A1)
- except:
- A1 = 25
- try:
- B1 = int(B1)
- except:
- B1 = 25
- if A1 > B1:
- return 1
- elif A1 == B1:
- if A.mb > B.mb:
- return 1
- if A.mb == B.mb:
- return 0
- else:
- return -1
- else:
- return -1
- except:
- return 0
-
def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users):
access_to_confidential_phenotype_trait = 0
if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']:
@@ -737,76 +111,4 @@ def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users)
AuthorizedUsersList=map(string.strip, string.split(authorized_users, ','))
if AuthorizedUsersList.__contains__(userName):
access_to_confidential_phenotype_trait = 1
- return access_to_confidential_phenotype_trait
-
-
-class VisualizeException(Exception):
- def __init__(self, message):
- self.message = message
- def __str__(self):
- return self.message
-
-# safeConvert : (string -> A) -> A -> A
-# to convert a string to type A, using the supplied default value
-# if the given conversion function doesn't work
-def safeConvert(f, value, default):
- try:
- return f(value)
- except:
- return default
-
-# safeFloat : string -> float -> float
-# to convert a string to a float safely
-def safeFloat(value, default):
- return safeConvert(float, value, default)
-
-# safeInt: string -> int -> int
-# to convert a string to an int safely
-def safeInt(value, default):
- return safeConvert(int, value, default)
-
-# safeString : string -> (arrayof string) -> string -> string
-# if a string is not in a list of strings to pick a default value
-# for that string
-def safeString(value, validChoices, default):
- if value in validChoices:
- return value
- else:
- return default
-
-# yesNoToInt: string -> int
-# map "yes" -> 1 and "no" -> 0
-def yesNoToInt(value):
- if value == "yes":
- return 1
- elif value == "no":
- return 0
- else:
- return None
-
-# IntToYesNo: int -> string
-# map 1 -> "yes" and 0 -> "no"
-def intToYesNo(value):
- if value == 1:
- return "yes"
- elif value == 0:
- return "no"
- else:
- return None
-
-def formatField(name):
- name = name.replace("_", " ")
- name = name.title()
- #name = name.replace("Mb Mm6", "Mb");
- return name.replace("Id", "ID")
-
-def natsort_key(string):
- r = []
- for c in string:
- try:
- c = int(c)
- try: r[-1] = r[-1] * 10 + c
- except: r.append(c)
- except:
- r.append(c)
- return r \ No newline at end of file
+ return access_to_confidential_phenotype_trait \ No newline at end of file
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index 80a0818c..1ee9b558 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -491,62 +491,6 @@ pcor.rec <- function(x,y,z,method="p",na.rm=T){
return allcorrelations
-
-#XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait
-#XZ: The primaryTrait and targetTrait should have executed retrieveData function
-def calZeroOrderCorr(primaryTrait, targetTrait, method='pearson'):
-
- #primaryTrait.retrieveData()
-
- #there is no None value in primary_val
- primary_strain, primary_val, primary_var = primaryTrait.exportInformative()
-
- #targetTrait.retrieveData()
-
- #there might be None value in target_val
- target_val = targetTrait.exportData(primary_strain, type="val")
-
- R_primary = rpy2.robjects.FloatVector(range(len(primary_val)))
- for i in range(len(primary_val)):
- R_primary[i] = primary_val[i]
-
- N = len(target_val)
-
- if None in target_val:
- goodIndex = []
- for i in range(len(target_val)):
- if target_val[i] != None:
- goodIndex.append(i)
-
- N = len(goodIndex)
-
- R_primary = rpy2.robjects.FloatVector(range(len(goodIndex)))
- for i in range(len(goodIndex)):
- R_primary[i] = primary_val[goodIndex[i]]
-
- R_target = rpy2.robjects.FloatVector(range(len(goodIndex)))
- for i in range(len(goodIndex)):
- R_target[i] = target_val[goodIndex[i]]
-
- else:
- R_target = rpy2.robjects.FloatVector(range(len(target_val)))
- for i in range(len(target_val)):
- R_target[i] = target_val[i]
-
- R_corr_test = rpy2.robjects.r['cor.test']
-
- if method == 'spearman':
- R_result = R_corr_test(R_primary, R_target, method='spearman')
- else:
- R_result = R_corr_test(R_primary, R_target)
-
- corr_result = []
- corr_result.append( R_result[3][0] )
- corr_result.append( N )
- corr_result.append( R_result[2][0] )
-
- return corr_result
-
#####################################################################################
#Input: primaryValue(list): one list of expression values of one probeSet,
# targetValue(list): one list of expression values of one probeSet,
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index abf9fc89..85a8c0ef 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -555,21 +555,6 @@ class CorrelationResults(object):
self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later
- #XZ, 3/31/2010: Theoretically, we should create one function 'comTissueCorr'
- #to compare each trait by their tissue corr p values.
- #But because the tissue corr p values are generated by permutation test,
- #the top ones always have p value 0. So comparing p values actually does nothing.
- #In addition, for the tissue data in our database, the N is always the same.
- #So it's safe to compare with tissue corr statistic value.
- #That's the same as literature corr.
- #if self.method in [METHOD_LIT, METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] and self.gene_id:
- # traits.sort(webqtlUtil.cmpLitCorr)
- #else:
- #if self.method in TISSUE_METHODS:
- # sort(traits, key=lambda A: math.fabs(A.tissue_corr))
- #elif self.method == METHOD_LIT:
- # traits.sort(traits, key=lambda A: math.fabs(A.lit_corr))
- #else:
traits = sortTraitCorrelations(traits, self.method)
# Strip to the top N correlations
@@ -1069,21 +1054,6 @@ class CorrelationResults(object):
return (symbolCorrDict, symbolPvalueDict)
-
- def correlate(self):
- self.correlation_data = collections.defaultdict(list)
- for trait, values in self.target_dataset.trait_data.iteritems():
- values_1 = []
- values_2 = []
- for index,sample in enumerate(self.target_dataset.samplelist):
- target_value = values[index]
- if sample in self.sample_data.keys():
- this_value = self.sample_data[sample]
- values_1.append(this_value)
- values_2.append(target_value)
- correlation = calCorrelation(values_1, values_2)
- self.correlation_data[trait] = correlation
-
def getFileName(self, target_db_name): ### dcrowell August 2008
"""Returns the name of the reference database file with which correlations are calculated.
Takes argument cursor which is a cursor object of any instance of a subclass of templatePage