# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License # as published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Affero General Public License for more details. # # This program is available from Source Forge: at GeneNetwork Project # (sourceforge.net/projects/genenetwork/). # # Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) # at rwilliams@uthsc.edu and xzhou15@uthsc.edu # # # # This module is used by GeneNetwork project (www.genenetwork.org) # # Created by GeneNetwork Core Team 2010/08/10 # # Last updated by GeneNetwork Core Team 2010/10/20 import string import time import re import math from math import * from htmlgen import HTMLgen2 as HT from base import webqtlConfig # NL, 07/27/2010. moved from webqtlForm.py #Dict of Parents and F1 information, In the order of [F1, Mat, Pat] ParInfo ={ 'BXH':['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'], 'AKXD':['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'], 'BXD':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], 'C57BL-6JxC57BL-6NJF2':['', '', 'C57BL/6J', 'C57BL/6NJ'], 'BXD300':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], 'B6BTBRF2':['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'], 'BHHBF2':['B6HF2','HB6F2','C57BL/6J','C3H/HeJ'], 'BHF2':['B6HF2','HB6F2','C57BL/6J','C3H/HeJ'], 'B6D2F2':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], 'BDF2-1999':['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'], 'BDF2-2005':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], 'CTB6F2':['CTB6F2','B6CTF2','C57BL/6J','Castaneous'], 'CXB':['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'], 'AXBXA':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], 'AXB':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], 'BXA':['BAF1', 'ABF1', 'C57BL/6J', 'A/J'], 'LXS':['LSF1', 'SLF1', 'ISS', 'ILS'], 'HXBBXH':['HSRBNF1', 'BNHSRF1', 'BN', 'HSR'], 'BayXSha':['BayXShaF1', 'ShaXBayF1', 'Bay-0','Shahdara'], 'ColXBur':['ColXBurF1', 'BurXColF1', 'Col-0','Bur-0'], 'ColXCvi':['ColXCviF1', 'CviXColF1', 'Col-0','Cvi'], 'SXM':['SMF1', 'MSF1', 'Steptoe','Morex'] } ######################################### # Accessory Functions ######################################### def exportData(hddn, tdata, NP = None): for key in tdata.keys(): _val, _var, _N = tdata[key].val, tdata[key].var, tdata[key].N if _val != None: hddn[key] = _val if _var != None: hddn['V'+key] = _var if NP and _N != None: hddn['N'+key] = _N def genShortStrainName(RISet='', input_strainName=''): #aliasStrainDict = {'C57BL/6J':'B6','DBA/2J':'D2'} strainName = input_strainName if RISet != 'AXBXA': if RISet == 'BXD300': this_RISet = 'BXD' elif RISet == 'BDF2-2005': this_RISet = 'CASE05_' else: this_RISet = RISet strainName = string.replace(strainName,this_RISet,'') strainName = string.replace(strainName,'CASE','') try: strainName = "%02d" % int(strainName) except: pass else: strainName = string.replace(strainName,'AXB','A') strainName = string.replace(strainName,'BXA','B') try: strainName = strainName[0] + "%02d" % int(strainName[1:]) except: pass return strainName def genRandStr(prefix = "", length=8, chars=string.letters+string.digits): from random import choice _str = prefix[:] for i in range(length): _str += choice(chars) return _str def StringAsFloat(str): 'Converts string to float but catches any exception and returns None' try: return float(str) except: return None def IntAsFloat(str): 'Converts string to Int but catches any exception and returns None' try: return int(str) except: return None def FloatAsFloat(flt): 'Converts float to string but catches any exception and returns None' try: return float("%2.3f" % flt) except: return None def RemoveZero(flt): 'Converts string to float but catches any exception and returns None' try: if abs(flt) < 1e-6: return None else: return flt except: return None def SciFloat(d): 'Converts string to float but catches any exception and returns None' try: if abs(d) <= 1.0e-4: return "%1.2e" % d else: return "%1.5f" % d except: return None ###To be removed def FloatList2String(lst): 'Converts float list to string but catches any exception and returns None' tt='' try: for item in lst: if item == None: tt += 'X ' else: tt += '%f ' % item return tt except: return "" def ListNotNull(lst): '''Obsolete - Use built in function any (or all or whatever) Determine if the elements in a list are all null ''' for item in lst: if item is not None: return 1 return None ###To be removed def FileDataProcess(str): 'Remove the description text from the input file if theres any' i=0 while i'\x20': break else: i+=1 str=str[i:] str=string.join(string.split(str,'\000'),'') i=string.find(str,"*****") if i>-1: return str[i+5:] else: return str def rank(a,lst,offset=0): """Calculate the integer rank of a number in an array, can be used to calculate p-value""" n = len(lst) if n == 2: if a lst[1]: return offset + 2 else: return offset +1 elif n == 1: if a B.LRS: return 1 elif A.LRS == B.LRS: return 0 else: return -1 except: return 0 def cmpScanResult2(A,B): try: if A.LRS < B.LRS: return 1 elif A.LRS == B.LRS: return 0 else: return -1 except: return 0 def cmpOrder(A,B): try: if A[1] < B[1]: return -1 elif A[1] == B[1]: return 0 else: return 1 except: return 0 def cmpOrder2(A,B): try: if A[-1] < B[-1]: return -1 elif A[-1] == B[-1]: return 0 else: return 1 except: return 0 def calRank(xVals, yVals, N): ### Zach Sloan, February 4 2010 """ Returns a ranked set of X and Y values. These are used when generating a Spearman scatterplot. Bear in mind that this sets values equal to each other as the same rank. """ XX = [] YY = [] X = [0]*len(xVals) Y = [0]*len(yVals) j = 0 for i in range(len(xVals)): if xVals[i] != None and yVals[i] != None: XX.append((j, xVals[i])) YY.append((j, yVals[i])) j = j + 1 NN = len(XX) XX.sort(cmpOrder2) YY.sort(cmpOrder2) j = 1 rank = 0.0 while j < NN: if XX[j][1] != XX[j-1][1]: X[XX[j-1][0]] = j j = j+1 else: jt = j+1 ji = j for jt in range(j+1, NN): if (XX[jt][1] != XX[j-1][1]): break rank = 0.5*(j+jt) for ji in range(j-1, jt): X[XX[ji][0]] = rank if (jt == NN-1): if (XX[jt][1] == XX[j-1][1]): X[XX[NN-1][0]] = rank j = jt+1 if j == NN: if X[XX[NN-1][0]] == 0: X[XX[NN-1][0]] = NN j = 1 rank = 0.0 while j < NN: if YY[j][1] != YY[j-1][1]: Y[YY[j-1][0]] = j j = j+1 else: jt = j+1 ji = j for jt in range(j+1, NN): if (YY[jt][1] != YY[j-1][1]): break rank = 0.5*(j+jt) for ji in range(j-1, jt): Y[YY[ji][0]] = rank if (jt == NN-1): if (YY[jt][1] == YY[j-1][1]): Y[YY[NN-1][0]] = rank j = jt+1 if j == NN: if Y[YY[NN-1][0]] == 0: Y[YY[NN-1][0]] = NN return (X,Y) def calCorrelationRank(xVals,yVals,N): """ Calculated Spearman Ranked Correlation. The algorithm works by setting all tied ranks to the average of those ranks (for example, if ranks 5-10 all have the same value, each will be set to rank 7.5). """ XX = [] YY = [] j = 0 for i in range(len(xVals)): if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"): XX.append((j,xVals[i])) YY.append((j,yVals[i])) j = j+1 NN = len(XX) if NN <6: return (0.0,NN) XX.sort(cmpOrder2) YY.sort(cmpOrder2) X = [0]*NN Y = [0]*NN j = 1 rank = 0.0 t = 0.0 sx = 0.0 while j < NN: if XX[j][1] != XX[j-1][1]: X[XX[j-1][0]] = j j = j+1 else: jt = j+1 ji = j for jt in range(j+1, NN): if (XX[jt][1] != XX[j-1][1]): break rank = 0.5*(j+jt) for ji in range(j-1, jt): X[XX[ji][0]] = rank t = jt-j sx = sx + (t*t*t-t) if (jt == NN-1): if (XX[jt][1] == XX[j-1][1]): X[XX[NN-1][0]] = rank j = jt+1 if j == NN: if X[XX[NN-1][0]] == 0: X[XX[NN-1][0]] = NN j = 1 rank = 0.0 t = 0.0 sy = 0.0 while j < NN: if YY[j][1] != YY[j-1][1]: Y[YY[j-1][0]] = j j = j+1 else: jt = j+1 ji = j for jt in range(j+1, NN): if (YY[jt][1] != YY[j-1][1]): break rank = 0.5*(j+jt) for ji in range(j-1, jt): Y[YY[ji][0]] = rank t = jt - j sy = sy + (t*t*t-t) if (jt == NN-1): if (YY[jt][1] == YY[j-1][1]): Y[YY[NN-1][0]] = rank j = jt+1 if j == NN: if Y[YY[NN-1][0]] == 0: Y[YY[NN-1][0]] = NN D = 0.0 for i in range(NN): D += (X[i]-Y[i])*(X[i]-Y[i]) fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) def calCorrelationRankText(dbdata,userdata,N): ### dcrowell = David Crowell, July 2008 """Calculates correlation ranks with data formatted from the text file. dbdata, userdata are lists of strings. N is an int. Returns a float. Used by correlationPage""" XX = [] YY = [] j = 0 for i in range(N): if (dbdata[i]!= None and userdata[i]!=None) and (dbdata[i]!= 'None' and userdata[i]!='None'): XX.append((j,float(dbdata[i]))) YY.append((j,float(userdata[i]))) j += 1 NN = len(XX) if NN <6: return (0.0,NN) XX.sort(cmpOrder2) YY.sort(cmpOrder2) X = [0]*NN Y = [0]*NN j = 1 rank = 0.0 t = 0.0 sx = 0.0 while j < NN: if XX[j][1] != XX[j-1][1]: X[XX[j-1][0]] = j j = j+1 else: jt = j+1 ji = j for jt in range(j+1, NN): if (XX[jt][1] != XX[j-1][1]): break rank = 0.5*(j+jt) for ji in range(j-1, jt): X[XX[ji][0]] = rank t = jt-j sx = sx + (t*t*t-t) if (jt == NN-1): if (XX[jt][1] == XX[j-1][1]): X[XX[NN-1][0]] = rank j = jt+1 if j == NN: if X[XX[NN-1][0]] == 0: X[XX[NN-1][0]] = NN j = 1 rank = 0.0 t = 0.0 sy = 0.0 while j < NN: if YY[j][1] != YY[j-1][1]: Y[YY[j-1][0]] = j j = j+1 else: jt = j+1 ji = j for jt in range(j+1, NN): if (YY[jt][1] != YY[j-1][1]): break rank = 0.5*(j+jt) for ji in range(j-1, jt): Y[YY[ji][0]] = rank t = jt - j sy = sy + (t*t*t-t) if (jt == NN-1): if (YY[jt][1] == YY[j-1][1]): Y[YY[NN-1][0]] = rank j = jt+1 if j == NN: if Y[YY[NN-1][0]] == 0: Y[YY[NN-1][0]] = NN D = 0.0 for i in range(NN): D += (X[i]-Y[i])*(X[i]-Y[i]) fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN)) return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN) def calCorrelation(dbdata,userdata,N): X = [] Y = [] for i in range(N): if dbdata[i]!= None and userdata[i]!= None: X.append(dbdata[i]) Y.append(userdata[i]) NN = len(X) if NN <6: return (0.0,NN) sx = reduce(lambda x,y:x+y,X,0.0) sy = reduce(lambda x,y:x+y,Y,0.0) meanx = sx/NN meany = sy/NN xyd = 0.0 sxd = 0.0 syd = 0.0 for i in range(NN): xyd += (X[i] - meanx)*(Y[i]-meany) sxd += (X[i] - meanx)*(X[i] - meanx) syd += (Y[i] - meany)*(Y[i] - meany) try: corr = xyd/(sqrt(sxd)*sqrt(syd)) except: corr = 0 return (corr,NN) def calCorrelationText(dbdata,userdata,N): ### dcrowell July 2008 """Calculates correlation coefficients with values formatted from text files. dbdata, userdata are lists of strings. N is an int. Returns a float Used by correlationPage""" X = [] Y = [] for i in range(N): #if (dbdata[i]!= None and userdata[i]!= None) and (dbdata[i]!= 'None' and userdata[i]!= 'None'): # X.append(float(dbdata[i])) # Y.append(float(userdata[i])) if dbdata[i] == None or dbdata[i] == 'None' or userdata[i] == None or userdata[i] == 'None': continue else: X.append(float(dbdata[i])) Y.append(float(userdata[i])) NN = len(X) if NN <6: return (0.0,NN) sx = sum(X) sy = sum(Y) meanx = sx/float(NN) meany = sy/float(NN) xyd = 0.0 sxd = 0.0 syd = 0.0 for i in range(NN): x1 = X[i]-meanx y1 = Y[i]-meany xyd += x1*y1 sxd += x1**2 syd += y1**2 try: corr = xyd/(sqrt(sxd)*sqrt(syd)) except: corr = 0 return (corr,NN) def readLineCSV(line): ### dcrowell July 2008 """Parses a CSV string of text and returns a list containing each element as a string. Used by correlationPage""" returnList = line.split('","') returnList[-1]=returnList[-1][:-2] returnList[0]=returnList[0][1:] return returnList def cmpCorr(A,B): try: if abs(A[1]) < abs(B[1]): return 1 elif abs(A[1]) == abs(B[1]): return 0 else: return -1 except: return 0 def cmpLitCorr(A,B): try: if abs(A[3]) < abs(B[3]): return 1 elif abs(A[3]) == abs(B[3]): if abs(A[1]) < abs(B[1]): return 1 elif abs(A[1]) == abs(B[1]): return 0 else: return -1 else: return -1 except: return 0 def cmpPValue(A,B): try: if A.corrPValue < B.corrPValue: return -1 elif A.corrPValue == B.corrPValue: if abs(A.corr) > abs(B.corr): return -1 elif abs(A.corr) < abs(B.corr): return 1 else: return 0 else: return 1 except: return 0 def cmpEigenValue(A,B): try: if A[0] > B[0]: return -1 elif A[0] == B[0]: return 0 else: return 1 except: return 0 def cmpLRSFull(A,B): try: if A[0] < B[0]: return -1 elif A[0] == B[0]: return 0 else: return 1 except: return 0 def cmpLRSInteract(A,B): try: if A[1] < B[1]: return -1 elif A[1] == B[1]: return 0 else: return 1 except: return 0 def cmpPos(A,B): try: try: AChr = int(A.chr) except: AChr = 20 try: BChr = int(B.chr) except: BChr = 20 if AChr > BChr: return 1 elif AChr == BChr: if A.mb > B.mb: return 1 if A.mb == B.mb: return 0 else: return -1 else: return -1 except: return 0 def cmpGenoPos(A,B): try: A1 = A.chr B1 = B.chr try: A1 = int(A1) except: A1 = 25 try: B1 = int(B1) except: B1 = 25 if A1 > B1: return 1 elif A1 == B1: if A.mb > B.mb: return 1 if A.mb == B.mb: return 0 else: return -1 else: return -1 except: return 0 def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users): access_to_confidential_phenotype_trait = 0 if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: access_to_confidential_phenotype_trait = 1 else: AuthorizedUsersList=map(string.strip, string.split(authorized_users, ',')) if AuthorizedUsersList.__contains__(userName): access_to_confidential_phenotype_trait = 1 return access_to_confidential_phenotype_trait class VisualizeException(Exception): def __init__(self, message): self.message = message def __str__(self): return self.message # safeConvert : (string -> A) -> A -> A # to convert a string to type A, using the supplied default value # if the given conversion function doesn't work def safeConvert(f, value, default): try: return f(value) except: return default # safeFloat : string -> float -> float # to convert a string to a float safely def safeFloat(value, default): return safeConvert(float, value, default) # safeInt: string -> int -> int # to convert a string to an int safely def safeInt(value, default): return safeConvert(int, value, default) # safeString : string -> (arrayof string) -> string -> string # if a string is not in a list of strings to pick a default value # for that string def safeString(value, validChoices, default): if value in validChoices: return value else: return default # yesNoToInt: string -> int # map "yes" -> 1 and "no" -> 0 def yesNoToInt(value): if value == "yes": return 1 elif value == "no": return 0 else: return None # IntToYesNo: int -> string # map 1 -> "yes" and 0 -> "no" def intToYesNo(value): if value == 1: return "yes" elif value == 0: return "no" else: return None def formatField(name): name = name.replace("_", " ") name = name.title() #name = name.replace("Mb Mm6", "Mb"); return name.replace("Id", "ID") def natsort_key(string): r = [] for c in string: try: c = int(c) try: r[-1] = r[-1] * 10 + c except: r.append(c) except: r.append(c) return r