From d8cec0ef94b7683f42946ce182a937484ad1034a Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 10 Apr 2018 21:30:33 +0000 Subject: Most code in webqtlUtil.py has been removed. There is so little left that the remaining could should be migrated to the files that use them at some point. Some of this code was in the correlation code but not being used, so it wss removed from there as well. --- wqflask/utility/webqtlUtil.py | 700 +-------------------- .../wqflask/correlation/correlation_functions.py | 56 -- wqflask/wqflask/correlation/show_corr_results.py | 30 - 3 files changed, 1 insertion(+), 785 deletions(-) (limited to 'wqflask') diff --git a/wqflask/utility/webqtlUtil.py b/wqflask/utility/webqtlUtil.py index 94dd7cbf..83fa90b7 100644 --- a/wqflask/utility/webqtlUtil.py +++ b/wqflask/utility/webqtlUtil.py @@ -65,41 +65,6 @@ ParInfo ={ # Accessory Functions ######################################### -def exportData(hddn, tdata, NP = None): - for key in tdata.keys(): - _val, _var, _N = tdata[key].val, tdata[key].var, tdata[key].N - if _val != None: - hddn[key] = _val - if _var != None: - hddn['V'+key] = _var - if NP and _N != None: - hddn['N'+key] = _N - -def genShortStrainName(RISet='', input_strainName=''): - #aliasStrainDict = {'C57BL/6J':'B6','DBA/2J':'D2'} - strainName = input_strainName - if RISet != 'AXBXA': - if RISet == 'BXD300': - this_RISet = 'BXD' - elif RISet == 'BDF2-2005': - this_RISet = 'CASE05_' - else: - this_RISet = RISet - strainName = string.replace(strainName,this_RISet,'') - strainName = string.replace(strainName,'CASE','') - try: - strainName = "%02d" % int(strainName) - except: - pass - else: - strainName = string.replace(strainName,'AXB','A') - strainName = string.replace(strainName,'BXA','B') - try: - strainName = strainName[0] + "%02d" % int(strainName[1:]) - except: - pass - return strainName - def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): from random import choice _str = prefix[:] @@ -107,63 +72,6 @@ def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): _str += choice(chars) return _str -def StringAsFloat(str): - 'Converts string to float but catches any exception and returns None' - try: - return float(str) - except: - return None - -def IntAsFloat(str): - 'Converts string to Int but catches any exception and returns None' - try: - return int(str) - except: - return None - -def FloatAsFloat(flt): - 'Converts float to string but catches any exception and returns None' - try: - return float("%2.3f" % flt) - except: - return None - -def RemoveZero(flt): - 'Converts string to float but catches any exception and returns None' - try: - if abs(flt) < 1e-6: - return None - else: - return flt - except: - return None - - -def SciFloat(d): - 'Converts string to float but catches any exception and returns None' - - try: - if abs(d) <= 1.0e-4: - return "%1.2e" % d - else: - return "%1.5f" % d - except: - return None - -###To be removed -def FloatList2String(lst): - 'Converts float list to string but catches any exception and returns None' - tt='' - try: - for item in lst: - if item == None: - tt += 'X ' - else: - tt += '%f ' % item - return tt - except: - return "" - def ListNotNull(lst): '''Obsolete - Use built in function any (or all or whatever) @@ -176,427 +84,6 @@ def ListNotNull(lst): return 1 return None -###To be removed -def FileDataProcess(str): - 'Remove the description text from the input file if theres any' - i=0 - while i'\x20': - break - else: - i+=1 - str=str[i:] - str=string.join(string.split(str,'\000'),'') - i=string.find(str,"*****") - if i>-1: - return str[i+5:] - else: - return str - -def rank(a,lst,offset=0): - """Calculate the integer rank of a number in an array, can be used to calculate p-value""" - n = len(lst) - if n == 2: - if a lst[1]: - return offset + 2 - else: - return offset +1 - elif n == 1: - if a B.LRS: - return 1 - elif A.LRS == B.LRS: - return 0 - else: - return -1 - except: - return 0 - - -def cmpScanResult2(A,B): - try: - if A.LRS < B.LRS: - return 1 - elif A.LRS == B.LRS: - return 0 - else: - return -1 - except: - return 0 - -def cmpOrder(A,B): - try: - if A[1] < B[1]: - return -1 - elif A[1] == B[1]: - return 0 - else: - return 1 - except: - return 0 - -def cmpOrder2(A,B): - try: - if A[-1] < B[-1]: - return -1 - elif A[-1] == B[-1]: - return 0 - else: - return 1 - except: - return 0 - - - - -def calRank(xVals, yVals, N): ### Zach Sloan, February 4 2010 - """ - Returns a ranked set of X and Y values. These are used when generating - a Spearman scatterplot. Bear in mind that this sets values equal to each - other as the same rank. - """ - XX = [] - YY = [] - X = [0]*len(xVals) - Y = [0]*len(yVals) - j = 0 - - for i in range(len(xVals)): - - if xVals[i] != None and yVals[i] != None: - XX.append((j, xVals[i])) - YY.append((j, yVals[i])) - j = j + 1 - - NN = len(XX) - - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - - j = 1 - rank = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - return (X,Y) - -def calCorrelationRank(xVals,yVals,N): - """ - Calculated Spearman Ranked Correlation. The algorithm works - by setting all tied ranks to the average of those ranks (for - example, if ranks 5-10 all have the same value, each will be set - to rank 7.5). - """ - - XX = [] - YY = [] - j = 0 - - for i in range(len(xVals)): - if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"): - XX.append((j,xVals[i])) - YY.append((j,yVals[i])) - j = j+1 - - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - -def calCorrelationRankText(dbdata,userdata,N): ### dcrowell = David Crowell, July 2008 - """Calculates correlation ranks with data formatted from the text file. - dbdata, userdata are lists of strings. N is an int. Returns a float. - Used by correlationPage""" - XX = [] - YY = [] - j = 0 - for i in range(N): - if (dbdata[i]!= None and userdata[i]!=None) and (dbdata[i]!= 'None' and userdata[i]!='None'): - XX.append((j,float(dbdata[i]))) - YY.append((j,float(userdata[i]))) - j += 1 - NN = len(XX) - if NN <6: - return (0.0,NN) - XX.sort(cmpOrder2) - YY.sort(cmpOrder2) - X = [0]*NN - Y = [0]*NN - - j = 1 - rank = 0.0 - t = 0.0 - sx = 0.0 - - while j < NN: - - if XX[j][1] != XX[j-1][1]: - X[XX[j-1][0]] = j - j = j+1 - - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (XX[jt][1] != XX[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - X[XX[ji][0]] = rank - t = jt-j - sx = sx + (t*t*t-t) - if (jt == NN-1): - if (XX[jt][1] == XX[j-1][1]): - X[XX[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if X[XX[NN-1][0]] == 0: - X[XX[NN-1][0]] = NN - - j = 1 - rank = 0.0 - t = 0.0 - sy = 0.0 - - while j < NN: - - if YY[j][1] != YY[j-1][1]: - Y[YY[j-1][0]] = j - j = j+1 - else: - jt = j+1 - ji = j - for jt in range(j+1, NN): - if (YY[jt][1] != YY[j-1][1]): - break - rank = 0.5*(j+jt) - for ji in range(j-1, jt): - Y[YY[ji][0]] = rank - t = jt - j - sy = sy + (t*t*t-t) - if (jt == NN-1): - if (YY[jt][1] == YY[j-1][1]): - Y[YY[NN-1][0]] = rank - j = jt+1 - - if j == NN: - if Y[YY[NN-1][0]] == 0: - Y[YY[NN-1][0]] = NN - - D = 0.0 - - for i in range(NN): - D += (X[i]-Y[i])*(X[i]-Y[i]) - - fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) - - return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) - - - -def calCorrelation(dbdata,userdata,N): - X = [] - Y = [] - for i in range(N): - if dbdata[i]!= None and userdata[i]!= None: - X.append(dbdata[i]) - Y.append(userdata[i]) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = reduce(lambda x,y:x+y,X,0.0) - sy = reduce(lambda x,y:x+y,Y,0.0) - meanx = sx/NN - meany = sy/NN - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - xyd += (X[i] - meanx)*(Y[i]-meany) - sxd += (X[i] - meanx)*(X[i] - meanx) - syd += (Y[i] - meany)*(Y[i] - meany) - try: - corr = xyd/(sqrt(sxd)*sqrt(syd)) - except: - corr = 0 - return (corr,NN) - -def calCorrelationText(dbdata,userdata,N): ### dcrowell July 2008 - """Calculates correlation coefficients with values formatted from text files. dbdata, userdata are lists of strings. N is an int. Returns a float - Used by correlationPage""" - X = [] - Y = [] - for i in range(N): - #if (dbdata[i]!= None and userdata[i]!= None) and (dbdata[i]!= 'None' and userdata[i]!= 'None'): - # X.append(float(dbdata[i])) - # Y.append(float(userdata[i])) - if dbdata[i] == None or dbdata[i] == 'None' or userdata[i] == None or userdata[i] == 'None': - continue - else: - X.append(float(dbdata[i])) - Y.append(float(userdata[i])) - NN = len(X) - if NN <6: - return (0.0,NN) - sx = sum(X) - sy = sum(Y) - meanx = sx/float(NN) - meany = sy/float(NN) - xyd = 0.0 - sxd = 0.0 - syd = 0.0 - for i in range(NN): - x1 = X[i]-meanx - y1 = Y[i]-meany - xyd += x1*y1 - sxd += x1**2 - syd += y1**2 - try: - corr = xyd/(sqrt(sxd)*sqrt(syd)) - except: - corr = 0 - return (corr,NN) - - def readLineCSV(line): ### dcrowell July 2008 """Parses a CSV string of text and returns a list containing each element as a string. Used by correlationPage""" @@ -605,45 +92,6 @@ def readLineCSV(line): ### dcrowell July 2008 returnList[0]=returnList[0][1:] return returnList - -def cmpCorr(A,B): - try: - if abs(A[1]) < abs(B[1]): - return 1 - elif abs(A[1]) == abs(B[1]): - return 0 - else: - return -1 - except: - return 0 - -def cmpLitCorr(A,B): - try: - if abs(A[3]) < abs(B[3]): return 1 - elif abs(A[3]) == abs(B[3]): - if abs(A[1]) < abs(B[1]): return 1 - elif abs(A[1]) == abs(B[1]): return 0 - else: return -1 - else: return -1 - except: - return 0 - -def cmpPValue(A,B): - try: - if A.corrPValue < B.corrPValue: - return -1 - elif A.corrPValue == B.corrPValue: - if abs(A.corr) > abs(B.corr): - return -1 - elif abs(A.corr) < abs(B.corr): - return 1 - else: - return 0 - else: - return 1 - except: - return 0 - def cmpEigenValue(A,B): try: if A[0] > B[0]: @@ -655,80 +103,6 @@ def cmpEigenValue(A,B): except: return 0 - -def cmpLRSFull(A,B): - try: - if A[0] < B[0]: - return -1 - elif A[0] == B[0]: - return 0 - else: - return 1 - except: - return 0 - -def cmpLRSInteract(A,B): - try: - if A[1] < B[1]: - return -1 - elif A[1] == B[1]: - return 0 - else: - return 1 - except: - return 0 - - -def cmpPos(A,B): - try: - try: - AChr = int(A.chr) - except: - AChr = 20 - try: - BChr = int(B.chr) - except: - BChr = 20 - if AChr > BChr: - return 1 - elif AChr == BChr: - if A.mb > B.mb: - return 1 - if A.mb == B.mb: - return 0 - else: - return -1 - else: - return -1 - except: - return 0 - -def cmpGenoPos(A,B): - try: - A1 = A.chr - B1 = B.chr - try: - A1 = int(A1) - except: - A1 = 25 - try: - B1 = int(B1) - except: - B1 = 25 - if A1 > B1: - return 1 - elif A1 == B1: - if A.mb > B.mb: - return 1 - if A.mb == B.mb: - return 0 - else: - return -1 - else: - return -1 - except: - return 0 - def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users): access_to_confidential_phenotype_trait = 0 if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: @@ -737,76 +111,4 @@ def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users) AuthorizedUsersList=map(string.strip, string.split(authorized_users, ',')) if AuthorizedUsersList.__contains__(userName): access_to_confidential_phenotype_trait = 1 - return access_to_confidential_phenotype_trait - - -class VisualizeException(Exception): - def __init__(self, message): - self.message = message - def __str__(self): - return self.message - -# safeConvert : (string -> A) -> A -> A -# to convert a string to type A, using the supplied default value -# if the given conversion function doesn't work -def safeConvert(f, value, default): - try: - return f(value) - except: - return default - -# safeFloat : string -> float -> float -# to convert a string to a float safely -def safeFloat(value, default): - return safeConvert(float, value, default) - -# safeInt: string -> int -> int -# to convert a string to an int safely -def safeInt(value, default): - return safeConvert(int, value, default) - -# safeString : string -> (arrayof string) -> string -> string -# if a string is not in a list of strings to pick a default value -# for that string -def safeString(value, validChoices, default): - if value in validChoices: - return value - else: - return default - -# yesNoToInt: string -> int -# map "yes" -> 1 and "no" -> 0 -def yesNoToInt(value): - if value == "yes": - return 1 - elif value == "no": - return 0 - else: - return None - -# IntToYesNo: int -> string -# map 1 -> "yes" and 0 -> "no" -def intToYesNo(value): - if value == 1: - return "yes" - elif value == 0: - return "no" - else: - return None - -def formatField(name): - name = name.replace("_", " ") - name = name.title() - #name = name.replace("Mb Mm6", "Mb"); - return name.replace("Id", "ID") - -def natsort_key(string): - r = [] - for c in string: - try: - c = int(c) - try: r[-1] = r[-1] * 10 + c - except: r.append(c) - except: - r.append(c) - return r \ No newline at end of file + return access_to_confidential_phenotype_trait \ No newline at end of file diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index 80a0818c..1ee9b558 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -491,62 +491,6 @@ pcor.rec <- function(x,y,z,method="p",na.rm=T){ return allcorrelations - -#XZ, April 30, 2010: The input primaryTrait and targetTrait are instance of webqtlTrait -#XZ: The primaryTrait and targetTrait should have executed retrieveData function -def calZeroOrderCorr(primaryTrait, targetTrait, method='pearson'): - - #primaryTrait.retrieveData() - - #there is no None value in primary_val - primary_strain, primary_val, primary_var = primaryTrait.exportInformative() - - #targetTrait.retrieveData() - - #there might be None value in target_val - target_val = targetTrait.exportData(primary_strain, type="val") - - R_primary = rpy2.robjects.FloatVector(range(len(primary_val))) - for i in range(len(primary_val)): - R_primary[i] = primary_val[i] - - N = len(target_val) - - if None in target_val: - goodIndex = [] - for i in range(len(target_val)): - if target_val[i] != None: - goodIndex.append(i) - - N = len(goodIndex) - - R_primary = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - R_primary[i] = primary_val[goodIndex[i]] - - R_target = rpy2.robjects.FloatVector(range(len(goodIndex))) - for i in range(len(goodIndex)): - R_target[i] = target_val[goodIndex[i]] - - else: - R_target = rpy2.robjects.FloatVector(range(len(target_val))) - for i in range(len(target_val)): - R_target[i] = target_val[i] - - R_corr_test = rpy2.robjects.r['cor.test'] - - if method == 'spearman': - R_result = R_corr_test(R_primary, R_target, method='spearman') - else: - R_result = R_corr_test(R_primary, R_target) - - corr_result = [] - corr_result.append( R_result[3][0] ) - corr_result.append( N ) - corr_result.append( R_result[2][0] ) - - return corr_result - ##################################################################################### #Input: primaryValue(list): one list of expression values of one probeSet, # targetValue(list): one list of expression values of one probeSet, diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index abf9fc89..85a8c0ef 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -555,21 +555,6 @@ class CorrelationResults(object): self.record_count = len(traits) #ZS: This isn't a good way to get this value, so I need to change it later - #XZ, 3/31/2010: Theoretically, we should create one function 'comTissueCorr' - #to compare each trait by their tissue corr p values. - #But because the tissue corr p values are generated by permutation test, - #the top ones always have p value 0. So comparing p values actually does nothing. - #In addition, for the tissue data in our database, the N is always the same. - #So it's safe to compare with tissue corr statistic value. - #That's the same as literature corr. - #if self.method in [METHOD_LIT, METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] and self.gene_id: - # traits.sort(webqtlUtil.cmpLitCorr) - #else: - #if self.method in TISSUE_METHODS: - # sort(traits, key=lambda A: math.fabs(A.tissue_corr)) - #elif self.method == METHOD_LIT: - # traits.sort(traits, key=lambda A: math.fabs(A.lit_corr)) - #else: traits = sortTraitCorrelations(traits, self.method) # Strip to the top N correlations @@ -1069,21 +1054,6 @@ class CorrelationResults(object): return (symbolCorrDict, symbolPvalueDict) - - def correlate(self): - self.correlation_data = collections.defaultdict(list) - for trait, values in self.target_dataset.trait_data.iteritems(): - values_1 = [] - values_2 = [] - for index,sample in enumerate(self.target_dataset.samplelist): - target_value = values[index] - if sample in self.sample_data.keys(): - this_value = self.sample_data[sample] - values_1.append(this_value) - values_2.append(target_value) - correlation = calCorrelation(values_1, values_2) - self.correlation_data[trait] = correlation - def getFileName(self, target_db_name): ### dcrowell August 2008 """Returns the name of the reference database file with which correlations are calculated. Takes argument cursor which is a cursor object of any instance of a subclass of templatePage -- cgit v1.2.3