diff options
-rw-r--r-- | wqflask/utility/webqtlUtil.py | 700 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/correlation_functions.py | 56 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py | 30 |
3 files changed, 1 insertions, 785 deletions
diff --git a/wqflask/utility/webqtlUtil.py b/wqflask/utility/webqtlUtil.py index 94dd7cbf..83fa90b7 100644 --- a/wqflask/utility/webqtlUtil.py +++ b/wqflask/utility/webqtlUtil.py @@ -65,41 +65,6 @@ ParInfo ={ # Accessory Functions ######################################### -def exportData(hddn, tdata, NP = None): - for key in tdata.keys(): - _val, _var, _N = tdata[key].val, tdata[key].var, tdata[key].N - if _val != None: - hddn[key] = _val - if _var != None: - hddn['V'+key] = _var - if NP and _N != None: - hddn['N'+key] = _N - -def genShortStrainName(RISet='', input_strainName=''): - #aliasStrainDict = {'C57BL/6J':'B6','DBA/2J':'D2'} - strainName = input_strainName - if RISet != 'AXBXA': - if RISet == 'BXD300': - this_RISet = 'BXD' - elif RISet == 'BDF2-2005': - this_RISet = 'CASE05_' - else: - this_RISet = RISet - strainName = string.replace(strainName,this_RISet,'') - strainName = string.replace(strainName,'CASE','') - try: - strainName = "%02d" % int(strainName) - except: - pass - else: - strainName = string.replace(strainName,'AXB','A') - strainName = string.replace(strainName,'BXA','B') - try: - strainName = strainName[0] + "%02d" % int(strainName[1:]) - except: - pass - return strainName - def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): from random import choice _str = prefix[:] @@ -107,63 +72,6 @@ def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): _str += choice(chars) return _str -def StringAsFloat(str): - 'Converts string to float but catches any exception and returns None' - try: - return float(str) - except: - return None - -def IntAsFloat(str): - 'Converts string to Int but catches any exception and returns None' - try: - return int(str) - except: - return None - -def FloatAsFloat(flt): - 'Converts float to string but catches any exception and returns None' - try: - return float("%2.3f" % flt) - except: - return None - -def RemoveZero(flt): - 'Converts string to float but catches any exception and returns None' - try: - if abs(flt) < 1e-6: - return None - else: - return flt - except: - return None - - -def SciFloat(d): - 'Converts string to float but catches any exception and returns None' - - try: - if abs(d) <= 1.0e-4: - return "%1.2e" % d - else: - return "%1.5f" % d - except: - return None - -###To be removed -def FloatList2String(lst): - 'Converts float list to string but catches any exception and returns None' - tt='' - try: - for item in lst: - if item == None: - tt += 'X ' - else: - tt += '%f ' % item - return tt - except: - return "" - def ListNotNull(lst): '''Obsolete - Use built in function any (or all or whatever) @@ -176,427 +84,6 @@ def ListNotNull(lst): return 1 return None -###To be removed -def FileDataProcess(str): - 'Remove the description text from the input file if theres any' - i=0 - while i<len(str): - if str[i]<'\x7f' and str[i]>'\x20': - break - else: - i+=1 - str=str[i:] - str=string.join(string.split(str,'\000'),'') - i=string.find(str,"*****") - if i>-1: - return str[i+5:] - else: - return str - -def rank(a,lst,offset=0): - """Calculate the integer rank of a number in an array, can be used to calculate p-value""" - n = len(lst) - if n == 2: - if a <lst[0]: - return offset - elif a > lst[1]: - return offset + 2 - else: - return offset +1 - elif n == 1: - if a <lst[0]: - return offset - else: - return offset +1 - elif n== 0: - return offset - else: - mid = n/2 - if a < lst[mid]: - return rank(a,lst[:mid-1],offset) - else: - return rank(a,lst[mid:],offset+mid) - -def cmpScanResult(A,B): - try: - if A.LRS > B.LRS: - return 1 - elif A.LRS == B.LRS: - return 0 - else: - return -1 - except: - return 0 - - -def cmpScanResult2(A,B): - try: - if A.LRS < B.LRS: - return 1 - elif A.LRS == B.LRS: - return 0 - else: - return -1 - except: - return 0 - -def cmpOrder(A,B): - try: - if A[1] < B[1]: - return -1 - elif A[1] == B[1]: - return 0 - else: - return 1 - except: - return 0 - -def cmpOrder2(A,B): - try: - if A[-1] < B[-1]: - return -1 - elif A[-1] == B[-1]: - return 0 - else: - return 1 - except: - return 0 - - - - -def calRank(xVals, yVals, N): ### Zach Sloan, February 4 2010 - """ - Returns a ranked set of X and Y values. These are used when generating - a Spearman scatterplot. Bear in mind that this sets values equal to each - other as the same rank. - """ - XX = [] - YY = [] - X = [0]*len(xVals) - Y = [0]*len(yVals) - j = 0 - - for i in range(len(xVals)): - - if xVals[i] != None and yVals[i] != None: - XX.append((j, xVals[i])) - YY.append((j, yVals[i])) - j = j + 1 - - NN = len(XX) - - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - - j = 1 - rank = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - return (X,Y) - -def calCorrelationRank(xVals,yVals,N): - """ - Calculated Spearman Ranked Correlation. The algorithm works - by setting all tied ranks to the average of those ranks (for - example, if ranks 5-10 all have the same value, each will be set - to rank 7.5). - """ - - XX = [] - YY = [] - j = 0 - - for i in range(len(xVals)): - if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"): - XX.append((j,xVals[i])) - YY.append((j,yVals[i])) - j = j+1 - - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - -def calCorrelationRankText(dbdata,userdata,N): ### dcrowell = David Crowell, July 2008 - """Calculates correlation ranks with data formatted from the text file. - dbdata, userdata are lists of strings. N is an int. Returns a float. - Used by correlationPage""" - XX = [] - YY = [] - j = 0 - for i in range(N): - if (dbdata[i]!= None and userdata[i]!=None) and (dbdata[i]!= 'None' and userdata[i]!='None'): - XX.append((j,float(dbdata[i]))) - YY.append((j,float(userdata[i]))) - j += 1 - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - - -def calCorrelation(dbdata,userdata,N): - X = [] - Y = [] - for i in range(N): - if dbdata[i]!= None and userdata[i]!= None: - X.append(dbdata[i]) - Y.append(userdata[i]) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = reduce(lambda x,y:x+y,X,0.0) - sy = reduce(lambda x,y:x+y,Y,0.0) - meanx = sx/NN - meany = sy/NN - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - xyd += (X[i] - meanx)*(Y[i]-meany) - sxd += (X[i] - meanx)*(X[i] - meanx) - syd += (Y[i] - meany)*(Y[i] - meany) - try: - corr = xyd/(sqrt(sxd)*sqrt(syd)) - except: - corr = 0 - return (corr,NN) - -def calCorrelationText(dbdata,userdata,N): ### dcrowell July 2008 - """Calculates correlation coefficients with values formatted from text files. dbdata, userdata are lists of strings. N is an int. Returns a float - Used by correlationPage""" - X = [] - Y = [] - for i in range(N): - #if (dbdata[i]!= None and userdata[i]!= None) and (dbdata[i]!= 'None' and userdata[i]!= 'None'): - # X.append(float(dbdata[i])) - # Y.append(float(userdata[i])) - if dbdata[i] == None or dbdata[i] == 'None' or userdata[i] == None or userdata[i] == 'None': - continue - else: - X.append(float(dbdata[i])) - Y.append(float(userdata[i])) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = sum(X) - sy = sum(Y) - meanx = sx/float(NN) - meany = sy/float(NN) - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - x1 = X[i]-meanx - y1 = Y[i]-meany - xyd += x1*y1 - sxd += x1**2 - syd += y1**2 - try: - corr = xyd/(sqrt(sxd)*sqrt(syd)) - except: - corr = 0 - return (corr,NN) - - def readLineCSV(line): ### dcrowell July 2008 """Parses a CSV string of text and returns a list containing each element as a string. Used by correlationPage""" @@ -605,45 +92,6 @@ def readLineCSV(line): ### dcrowell July 2008 returnList[0]=returnList[0][1:] return returnList - -def cmpCorr(A,B): - try: - if abs(A[1]) < abs(B[1]): - return 1 - elif abs(A[1]) == abs(B[1]): - return 0 - else: - return -1 - except: - return 0 - -def cmpLitCorr(A,B): - try: - if abs(A[3]) < abs(B[3]): return 1 - elif abs(A[3]) == abs(B[3]): - if abs(A[1]) < abs(B[1]): return 1 - elif abs(A[1]) == abs(B[1]): return 0 - else: return -1 - else: return -1 - except: - return 0 - -def cmpPValue(A,B): - try: - if A.corrPValue < B.corrPValue: - return -1 - elif A.corrPValue == B.corrPValue: - if abs(A.corr) > abs(B.corr): - return -1 - elif abs(A.corr) < abs(B.corr): - return 1 - else: - return 0 - else: - return 1 - except: - return 0 - def cmpEigenValue(A,B): try: if A[0] > B[0]: @@ -655,80 +103,6 @@ def cmpEigenValue(A,B): except: return 0 - -def cmpLRSFull(A,B): - try: - if A[0] < B[0]: - return -1 - elif A[0] == B[0]: - return 0 - else: - return 1 - except: - return 0 - -def cmpLRSInteract(A,B): - try: - if A[1] < B[1]: - return -1 - elif A[1] == B[1]: - return 0 - else: - return 1 - except: - return 0 - - -def cmpPos(A,B): - try: - try: - AChr = int(A.chr) - except: - AChr = 20 - try: - BChr = int(B.chr) - except: - BChr = 20 - if AChr > BChr: - return 1 - elif AChr == BChr: - if A.mb > B.mb: - return 1 - if A.mb == B.mb: - return 0 - else: - return -1 - else: - return -1 - except: - return 0 - -def cmpGenoPos(A,B): - try: - A1 = A.chr - B1 = B.chr - try: - A1 = int(A1) - except: - A1 = 25 - try: - B1 = int(B1) - except: - B1 = 25 - if A1 > B1: - return 1 - elif A1 == B1: - if A.mb > B.mb: - return 1 - if A.mb == B.mb: - return 0 - else: - return -1 - else: - return -1 - except: - return 0 - def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users): access_to_confidential_phenotype_trait = 0 if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: @@ -737,76 +111,4 @@ def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users) AuthorizedUsersList=map(string.strip, string.split(authorized_users, ',')) if AuthorizedUsersList.__contains__(userName): access_to_confidential_phenotype_trait = 1 - return access_to_confidential_phenotype_trait - - -class VisualizeException(Exception): - def __init__(self, message): - self.message = message - def __str__(self): - return self.message - -# safeConvert : (string -> A) -> A -> A -# to convert a string to type A, using the supplied default value -# if the given conversion function doesn't work -def safeConvert(f, value, default): - try: - return f(value) - except: - return default - -# safeFloat : string -> float -> float -# to convert a string to a float safely -def safeFloat(value, default): - return safeConvert(float, value, default) - -# safeInt: string -> int -> int -# to convert a string to an int safely -def safeInt(value, default): - return safeConvert(int, value, default) - -# safeString : string -> (arrayof string) -> string -> string -# if a string is not in a list of strings to pick a default value -# for that string -def safeString(value, validChoices, default): - if value in validChoices: - return value - else: - return default - -# yesNoToInt: string -> int -# map "yes" -> 1 and "no" -> 0 -def yesNoToInt(value): - if value == "yes": - return 1 - elif value == "no": - return 0 - else: - return None - -# IntToYesNo: int -> string -# map 1 -> "yes" and 0 -> "no" -def intToYesNo(value): - if value == 1: - return "yes" - elif value == 0: - return "no" - else: - return None - -def formatField(name): - name = name.replace("_", " ") - name = name.title() - #name = name.replace("Mb Mm6", "Mb"); - return name.replace("Id", "ID") - -def natsort_key(string): - r = [] - for c in string: - try: - c = int(c) - try: r[-1] = r[-1] * 10 + c - except: r.append(c) - except: - r.append(c) - return r
\ No newline at end of file + return access_to_confidential_phenotype_trait
\ No newline at end of file diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index 80a0818c..1ee9b558 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -491,62 +491,6 @@ pcor.rec <- function(x,y,z,method="p",na.rm=T){ return allcorrelations - -#XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait -#XZ: The primaryTrait and targetTrait should have executed retrieveData function -def calZeroOrderCorr(primaryTrait, targetTrait, method='pearson'): - - #primaryTrait.retrieveData() - - #there is no None value in primary_val - primary_strain, primary_val, primary_var = primaryTrait.exportInformative() - - #targetTrait.retrieveData() - - #there might be None value in target_val - target_val = targetTrait.exportData(primary_strain, type="val") - - R_primary = rpy2.robjects.FloatVector(range(len(primary_val))) - for i in range(len(primary_val)): - R_primary[i] = primary_val[i] - - N = len(target_val) - - if None in target_val: - goodIndex = [] - for i in range(len(target_val)): - if target_val[i] != None: - goodIndex.append(i) - - N = len(goodIndex) - - R_primary = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - R_primary[i] = primary_val[goodIndex[i]] - - R_target = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - R_target[i] = target_val[goodIndex[i]] - - else: - R_target = rpy2.robjects.FloatVector(range(len(target_val))) - for i in range(len(target_val)): - R_target[i] = target_val[i] - - R_corr_test = rpy2.robjects.r['cor.test'] - - if method == 'spearman': - R_result = R_corr_test(R_primary, R_target, method='spearman') - else: - R_result = R_corr_test(R_primary, R_target) - - corr_result = [] - corr_result.append( R_result[3][0] ) - corr_result.append( N ) - corr_result.append( R_result[2][0] ) - - return corr_result - ##################################################################################### #Input: primaryValue(list): one list of expression values of one probeSet, # targetValue(list): one list of expression values of one probeSet, diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index abf9fc89..85a8c0ef 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -555,21 +555,6 @@ class CorrelationResults(object): self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later - #XZ, 3/31/2010: Theoretically, we should create one function 'comTissueCorr' - #to compare each trait by their tissue corr p values. - #But because the tissue corr p values are generated by permutation test, - #the top ones always have p value 0. So comparing p values actually does nothing. - #In addition, for the tissue data in our database, the N is always the same. - #So it's safe to compare with tissue corr statistic value. - #That's the same as literature corr. - #if self.method in [METHOD_LIT, METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] and self.gene_id: - # traits.sort(webqtlUtil.cmpLitCorr) - #else: - #if self.method in TISSUE_METHODS: - # sort(traits, key=lambda A: math.fabs(A.tissue_corr)) - #elif self.method == METHOD_LIT: - # traits.sort(traits, key=lambda A: math.fabs(A.lit_corr)) - #else: traits = sortTraitCorrelations(traits, self.method) # Strip to the top N correlations @@ -1069,21 +1054,6 @@ class CorrelationResults(object): return (symbolCorrDict, symbolPvalueDict) - - def correlate(self): - self.correlation_data = collections.defaultdict(list) - for trait, values in self.target_dataset.trait_data.iteritems(): - values_1 = [] - values_2 = [] - for index,sample in enumerate(self.target_dataset.samplelist): - target_value = values[index] - if sample in self.sample_data.keys(): - this_value = self.sample_data[sample] - values_1.append(this_value) - values_2.append(target_value) - correlation = calCorrelation(values_1, values_2) - self.correlation_data[trait] = correlation - def getFileName(self, target_db_name): ### dcrowell August 2008 """Returns the name of the reference database file with which correlations are calculated. Takes argument cursor which is a cursor object of any instance of a subclass of templatePage |