diff options
author | root | 2012-05-08 18:39:56 -0500 |
---|---|---|
committer | root | 2012-05-08 18:39:56 -0500 |
commit | ea46f42ee640928b92947bfb204c41a482d80937 (patch) | |
tree | 9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/textUI/cmdCorrelation.py | |
parent | 056b5253fc3857b0444382aa39944f6344dc1ceb (diff) | |
download | genenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz |
Add all the source codes into the github.
Diffstat (limited to 'web/webqtl/textUI/cmdCorrelation.py')
-rwxr-xr-x | web/webqtl/textUI/cmdCorrelation.py | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/web/webqtl/textUI/cmdCorrelation.py b/web/webqtl/textUI/cmdCorrelation.py new file mode 100755 index 00000000..04595fc5 --- /dev/null +++ b/web/webqtl/textUI/cmdCorrelation.py @@ -0,0 +1,325 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by GeneNetwork Core Team 2010/10/20 + +import os +import string +from math import * +import time + +import reaper + +from base import webqtlConfig +from utility import webqtlUtil +from cmdClass import cmdClass + + +######################################### +# Correlation Class +######################################### +class cmdCorrelation(cmdClass): + + calFunction = 'webqtlUtil.calCorrelation' + + def __init__(self,fd=None): + + cmdClass.__init__(self,fd) + + if not webqtlConfig.TEXTUI: + self.contents.append("Please send your request to http://robot.genenetwork.org") + return + + + self.example = '###Example : <a href="%s%s?cmd=%s&probeset=100001_at&probe=136415&db=bra03-03Mas5&searchdb=BXDPublish&return=500&sort=pvalue">%s%s?cmd=%s&probeset=100001_at&probe=136415&db=bra03-03Mas5&searchdb=BXDPublish&return=500&sort=pvalue</a>' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, self.cmdID, webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, self.cmdID) + + if self.accessError: + return + + self.searchDB = self.data.getvalue('searchdb') + if not self.searchDB or self.error: + self.contents.append("###Error: source trait doesn't exist or no target database was given") + self.contents.append(self.example) + self.contents.append(self.accessCode) + return + + try: + self.returnNumber = int(self.data.getvalue('return')) + except: + self.returnNumber = None + + self.sort = self.data.getvalue('sort') + + prefix, dbId = self.getDBId(self.database) + if not prefix or not dbId or (self.probe and string.lower(self.probe) in ("all","mm","pm")): + self.contents.append("###Error: source trait doesn't exist or SELECT more than one trait.") + self.contents.append(self.example) + self.contents.append(self.accessCode) + return + RISet = self.getRISet(prefix, dbId) + prefix2, dbId2 = self.getDBId(self.searchDB) + if not prefix2 or not dbId2: + self.contents.append("###Error: target database doesn't exist.") + self.contents.append(self.example) + self.contents.append(self.accessCode) + return + RISet2 = self.getRISet(prefix2, dbId2) + if RISet2 != RISet: + self.contents.append("###Error: target database has different Mouse InbredSet.") + self.contents.append(self.example) + self.contents.append(self.accessCode) + return + + traitdata, heads = self.getTraitData(prefix, dbId, self.probeset, self.probe) + if not traitdata: + self.contents.append("###Error: source trait doesn't exist.") + self.contents.append(self.example) + self.contents.append(self.accessCode) + return + + StrainNames = [] + sourceTrait = [] + StrainIds = [] + + #XZ, Jan 27, 2011: Only the strains that are of the same inbredset are used to calculate correlation. + for item in traitdata: + one_strain_name = item[0] + one_strain_value = item[1] + + self.cursor.execute('SELECT Strain.Id from Strain,StrainXRef, InbredSet WHERE Strain.Name="%s" and Strain.Id = StrainXRef.StrainId and StrainXRef.InbredSetId = InbredSet.Id and InbredSet.Name = "%s"' % (one_strain_name, RISet2)) + Results = self.cursor.fetchall() + if Results: + StrainIds.append('%d' % Results[0][0]) + StrainNames.append( one_strain_name ) + sourceTrait.append( one_strain_value ) + + correlationArray = [] + + useFastMethod = False + if prefix2 == "ProbeSet": + DatabaseFileName = self.getFileName( target_db_id=dbId2 ) + DirectoryList = os.listdir(webqtlConfig.TEXTDIR) ### List of existing text files. Used to check if a text file already exists + if DatabaseFileName in DirectoryList: + useFastMethod = True + + if useFastMethod: + datasetFile = open(webqtlConfig.TEXTDIR+DatabaseFileName,'r') + + #XZ, 01/08/2009: read the first line + line = datasetFile.readline() + dataset_strains = webqtlUtil.readLineCSV(line)[1:] + + #XZ, 01/08/2009: This step is critical. It is necessary for this new method. + _newvals = [] + for item in dataset_strains: + if item in StrainNames: + _newvals.append(sourceTrait[StrainNames.index(item)]) + else: + _newvals.append('None') + + nnCorr = len(_newvals) + + + for line in datasetFile: + traitdata=webqtlUtil.readLineCSV(line) + traitdataName = traitdata[0] + traitvals = traitdata[1:] + + corr,nOverlap = webqtlUtil.calCorrelationText(traitvals,_newvals,nnCorr) + traitinfo = [traitdataName,corr,nOverlap] + correlationArray.append( traitinfo ) + + #calculate correlation with slow method + else: + correlationArray = self.calCorrelation(sourceTrait, self.readDB(StrainIds, prefix2, dbId2) ) + + correlationArray.sort(self.cmpCorr) #XZ: Do not forget the sort step + + if not self.returnNumber: + correlationArray = correlationArray[:100] + else: + if self.returnNumber < len(correlationArray): + correlationArray = correlationArray[:self.returnNumber] + NN = len(correlationArray) + for i in range(NN): + nOverlap = correlationArray[i][-1] + corr = correlationArray[i][-2] + if nOverlap < 3: + corrPValue = 1.0 + else: + if abs(corr) >= 1.0: + corrPValue = 0.0 + else: + ZValue = 0.5*log((1.0+corr)/(1.0-corr)) + ZValue = ZValue*sqrt(nOverlap-3) + corrPValue = 2.0*(1.0 - reaper.normp(abs(ZValue))) + correlationArray[i].append(corrPValue) + if self.sort == 'pvalue': + correlationArray.sort(self.cmpPValue) + + if prefix2 == 'Publish': + self.contents.append("RecordID\tCorrelation\t#Strains\tp-value") + elif prefix2 == 'Geno': + self.contents.append("Locus\tCorrelation\t#Strains\tp-value") + else: + pass + + if prefix2 == 'Publish' or prefix2 == 'Geno': + for item in correlationArray: + self.contents.append("%s\t%2.6f\t%d\t%2.6f" % tuple(item)) + else: + id = self.data.getvalue('id') + if id == 'yes': + self.contents.append("ProbesetID\tCorrelation\t#Strains\tp-value\tGeneID") + for item in correlationArray: + query = """SELECT GeneID from %s WHERE Name = '%s'""" % (prefix2,item[0]) + self.cursor.execute(query) + results = self.cursor.fetchall() + if not results: + item = item + [None] + else: + item = item + list(results[0]) + self.contents.append("%s\t%2.6f\t%d\t%2.6f\t%s" % tuple(item)) + elif id == 'only': + self.contents.append("GenID") + for item in correlationArray: + query = """SELECT GeneID from %s WHERE Name = '%s'""" % (prefix2,item[0]) + self.cursor.execute(query) + results = self.cursor.fetchall() + if not results: + self.contents.append('None') + else: + self.contents.append(results[0][0]) + else: + self.contents.append("ProbesetID\tCorrelation\t#Strains\tp-value") + for item in correlationArray: + self.contents.append("%s\t%2.6f\t%d\t%2.6f" % tuple(item)) + + + + + def getFileName(self, target_db_id): + + query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Id = %s' % target_db_id + self.cursor.execute(query) + result = self.cursor.fetchone() + Id = result[0] + FullName = result[1] + FullName = FullName.replace(' ','_') + FullName = FullName.replace('/','_') + + FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt' + + return FileName + + + + def calCorrelation(self,source,target): + allcorrelations = [] + NN = len(source) + + if len(source) != len(target[0]) - 1: + return allcorrelations + else: + for traitData in target: + corr,nOverlap = eval("%s(traitData[1:],source,NN)" % self.calFunction) + traitinfo = [traitData[0],corr,nOverlap] + allcorrelations.append(traitinfo) + + return allcorrelations + + def cmpCorr(self,A,B): + try: + if abs(A[1]) < abs(B[1]): + return 1 + elif abs(A[1]) == abs(B[1]): + return 0 + else: + return -1 + except: + return 0 + + def cmpPValue(self,A,B): + try: + if A[-1] > B[-1]: + return 1 + elif A[-1] == B[-1]: + return 0 + else: + return -1 + except: + return 0 + + + def readDB(self, StrainIds=[], prefix2='', dbId2=''): + + #retrieve data from target database + nnn = len(StrainIds) / 25 + if len(StrainIds) % 25: + nnn += 1 + oridata = [] + for step in range(nnn): + temp = [] + StrainIdstep = StrainIds[step*25:min(len(StrainIds), (step+1)*25)] + for item in StrainIdstep: + temp.append('T%s.value' % item) + #XZ, 03/05/2009: test http://www.genenetwork.org/webqtl/WebQTL.py?cmd=cor&probeset=100001_at&probe=136415&db=bra08-03MAS5&searchdb=BXDPublish&return=500&sort=pvalue + if prefix2 == "Publish": + query = "SELECT PublishXRef.Id, " + dataStartPos = 1 + query += string.join(temp,', ') + query += ' from (PublishXRef, PublishFreeze)\n' + #XZ, 03/05/2009: Xiaodong changed Data to PublishData + for item in StrainIdstep: + query += 'left join PublishData as T%s on T%s.Id = PublishXRef.DataId and T%s.StrainId=%s\n' %(item,item,item,item) + query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Id = %d" % (dbId2, ) + #XZ, 03/05/2009: test http://www.genenetwork.org/webqtl/WebQTL.py?cmd=cor&probeset=100001_at&probe=136415&db=bra08-03MAS5&searchdb=HC_M2_1005_M&return=500&sort=pvalue + #XZ, 03/05/2009: test http://www.genenetwork.org/webqtl/WebQTL.py?cmd=cor&probeset=100001_at&probe=136415&db=bra08-03MAS5&searchdb=BXDGeno&return=500&sort=pvalue + else: + query = "SELECT %s.Name," % prefix2 + query += string.join(temp,', ') + query += ' from (%s, %sXRef, %sFreeze) \n' % (prefix2,prefix2,prefix2) + #XZ, 03/05/2009: Xiaodong changed Data to %sData + for item in StrainIdstep: + query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(prefix2,item,item,prefix2,item,item) + query += "WHERE %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Id = %d and %s.Id = %sXRef.%sId" % (prefix2, prefix2, prefix2, prefix2, dbId2, prefix2, prefix2, prefix2) + self.cursor.execute(query) + results = self.cursor.fetchall() + if not results: + self.contents.append("###Error: target database doesn't exist.") + self.contents.append(self.example) + self.contents.append(self.accessCode) + return + oridata.append(results) + + datasize = len(oridata[0]) + targetTrait = [] + for j in range(datasize): + traitdata = list(oridata[0][j]) + for i in range(1,nnn): + traitdata += list(oridata[i][j][1:]) + targetTrait.append(traitdata) + + return targetTrait + |