aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/textUI/cmdCorrelation.py
diff options
context:
space:
mode:
Diffstat (limited to 'web/webqtl/textUI/cmdCorrelation.py')
-rwxr-xr-xweb/webqtl/textUI/cmdCorrelation.py325
1 files changed, 0 insertions, 325 deletions
diff --git a/web/webqtl/textUI/cmdCorrelation.py b/web/webqtl/textUI/cmdCorrelation.py
deleted file mode 100755
index 04595fc5..00000000
--- a/web/webqtl/textUI/cmdCorrelation.py
+++ /dev/null
@@ -1,325 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-import os
-import string
-from math import *
-import time
-
-import reaper
-
-from base import webqtlConfig
-from utility import webqtlUtil
-from cmdClass import cmdClass
-
-
-#########################################
-# Correlation Class
-#########################################
-class cmdCorrelation(cmdClass):
-
- calFunction = 'webqtlUtil.calCorrelation'
-
- def __init__(self,fd=None):
-
- cmdClass.__init__(self,fd)
-
- if not webqtlConfig.TEXTUI:
- self.contents.append("Please send your request to http://robot.genenetwork.org")
- return
-
-
- self.example = '###Example : <a href="%s%s?cmd=%s&probeset=100001_at&probe=136415&db=bra03-03Mas5&searchdb=BXDPublish&return=500&sort=pvalue">%s%s?cmd=%s&probeset=100001_at&probe=136415&db=bra03-03Mas5&searchdb=BXDPublish&return=500&sort=pvalue</a>' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, self.cmdID, webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, self.cmdID)
-
- if self.accessError:
- return
-
- self.searchDB = self.data.getvalue('searchdb')
- if not self.searchDB or self.error:
- self.contents.append("###Error: source trait doesn't exist or no target database was given")
- self.contents.append(self.example)
- self.contents.append(self.accessCode)
- return
-
- try:
- self.returnNumber = int(self.data.getvalue('return'))
- except:
- self.returnNumber = None
-
- self.sort = self.data.getvalue('sort')
-
- prefix, dbId = self.getDBId(self.database)
- if not prefix or not dbId or (self.probe and string.lower(self.probe) in ("all","mm","pm")):
- self.contents.append("###Error: source trait doesn't exist or SELECT more than one trait.")
- self.contents.append(self.example)
- self.contents.append(self.accessCode)
- return
- RISet = self.getRISet(prefix, dbId)
- prefix2, dbId2 = self.getDBId(self.searchDB)
- if not prefix2 or not dbId2:
- self.contents.append("###Error: target database doesn't exist.")
- self.contents.append(self.example)
- self.contents.append(self.accessCode)
- return
- RISet2 = self.getRISet(prefix2, dbId2)
- if RISet2 != RISet:
- self.contents.append("###Error: target database has different Mouse InbredSet.")
- self.contents.append(self.example)
- self.contents.append(self.accessCode)
- return
-
- traitdata, heads = self.getTraitData(prefix, dbId, self.probeset, self.probe)
- if not traitdata:
- self.contents.append("###Error: source trait doesn't exist.")
- self.contents.append(self.example)
- self.contents.append(self.accessCode)
- return
-
- StrainNames = []
- sourceTrait = []
- StrainIds = []
-
- #XZ, Jan 27, 2011: Only the strains that are of the same inbredset are used to calculate correlation.
- for item in traitdata:
- one_strain_name = item[0]
- one_strain_value = item[1]
-
- self.cursor.execute('SELECT Strain.Id from Strain,StrainXRef, InbredSet WHERE Strain.Name="%s" and Strain.Id = StrainXRef.StrainId and StrainXRef.InbredSetId = InbredSet.Id and InbredSet.Name = "%s"' % (one_strain_name, RISet2))
- Results = self.cursor.fetchall()
- if Results:
- StrainIds.append('%d' % Results[0][0])
- StrainNames.append( one_strain_name )
- sourceTrait.append( one_strain_value )
-
- correlationArray = []
-
- useFastMethod = False
- if prefix2 == "ProbeSet":
- DatabaseFileName = self.getFileName( target_db_id=dbId2 )
- DirectoryList = os.listdir(webqtlConfig.TEXTDIR) ### List of existing text files. Used to check if a text file already exists
- if DatabaseFileName in DirectoryList:
- useFastMethod = True
-
- if useFastMethod:
- datasetFile = open(webqtlConfig.TEXTDIR+DatabaseFileName,'r')
-
- #XZ, 01/08/2009: read the first line
- line = datasetFile.readline()
- dataset_strains = webqtlUtil.readLineCSV(line)[1:]
-
- #XZ, 01/08/2009: This step is critical. It is necessary for this new method.
- _newvals = []
- for item in dataset_strains:
- if item in StrainNames:
- _newvals.append(sourceTrait[StrainNames.index(item)])
- else:
- _newvals.append('None')
-
- nnCorr = len(_newvals)
-
-
- for line in datasetFile:
- traitdata=webqtlUtil.readLineCSV(line)
- traitdataName = traitdata[0]
- traitvals = traitdata[1:]
-
- corr,nOverlap = webqtlUtil.calCorrelationText(traitvals,_newvals,nnCorr)
- traitinfo = [traitdataName,corr,nOverlap]
- correlationArray.append( traitinfo )
-
- #calculate correlation with slow method
- else:
- correlationArray = self.calCorrelation(sourceTrait, self.readDB(StrainIds, prefix2, dbId2) )
-
- correlationArray.sort(self.cmpCorr) #XZ: Do not forget the sort step
-
- if not self.returnNumber:
- correlationArray = correlationArray[:100]
- else:
- if self.returnNumber < len(correlationArray):
- correlationArray = correlationArray[:self.returnNumber]
- NN = len(correlationArray)
- for i in range(NN):
- nOverlap = correlationArray[i][-1]
- corr = correlationArray[i][-2]
- if nOverlap < 3:
- corrPValue = 1.0
- else:
- if abs(corr) >= 1.0:
- corrPValue = 0.0
- else:
- ZValue = 0.5*log((1.0+corr)/(1.0-corr))
- ZValue = ZValue*sqrt(nOverlap-3)
- corrPValue = 2.0*(1.0 - reaper.normp(abs(ZValue)))
- correlationArray[i].append(corrPValue)
- if self.sort == 'pvalue':
- correlationArray.sort(self.cmpPValue)
-
- if prefix2 == 'Publish':
- self.contents.append("RecordID\tCorrelation\t#Strains\tp-value")
- elif prefix2 == 'Geno':
- self.contents.append("Locus\tCorrelation\t#Strains\tp-value")
- else:
- pass
-
- if prefix2 == 'Publish' or prefix2 == 'Geno':
- for item in correlationArray:
- self.contents.append("%s\t%2.6f\t%d\t%2.6f" % tuple(item))
- else:
- id = self.data.getvalue('id')
- if id == 'yes':
- self.contents.append("ProbesetID\tCorrelation\t#Strains\tp-value\tGeneID")
- for item in correlationArray:
- query = """SELECT GeneID from %s WHERE Name = '%s'""" % (prefix2,item[0])
- self.cursor.execute(query)
- results = self.cursor.fetchall()
- if not results:
- item = item + [None]
- else:
- item = item + list(results[0])
- self.contents.append("%s\t%2.6f\t%d\t%2.6f\t%s" % tuple(item))
- elif id == 'only':
- self.contents.append("GenID")
- for item in correlationArray:
- query = """SELECT GeneID from %s WHERE Name = '%s'""" % (prefix2,item[0])
- self.cursor.execute(query)
- results = self.cursor.fetchall()
- if not results:
- self.contents.append('None')
- else:
- self.contents.append(results[0][0])
- else:
- self.contents.append("ProbesetID\tCorrelation\t#Strains\tp-value")
- for item in correlationArray:
- self.contents.append("%s\t%2.6f\t%d\t%2.6f" % tuple(item))
-
-
-
-
- def getFileName(self, target_db_id):
-
- query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Id = %s' % target_db_id
- self.cursor.execute(query)
- result = self.cursor.fetchone()
- Id = result[0]
- FullName = result[1]
- FullName = FullName.replace(' ','_')
- FullName = FullName.replace('/','_')
-
- FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt'
-
- return FileName
-
-
-
- def calCorrelation(self,source,target):
- allcorrelations = []
- NN = len(source)
-
- if len(source) != len(target[0]) - 1:
- return allcorrelations
- else:
- for traitData in target:
- corr,nOverlap = eval("%s(traitData[1:],source,NN)" % self.calFunction)
- traitinfo = [traitData[0],corr,nOverlap]
- allcorrelations.append(traitinfo)
-
- return allcorrelations
-
- def cmpCorr(self,A,B):
- try:
- if abs(A[1]) < abs(B[1]):
- return 1
- elif abs(A[1]) == abs(B[1]):
- return 0
- else:
- return -1
- except:
- return 0
-
- def cmpPValue(self,A,B):
- try:
- if A[-1] > B[-1]:
- return 1
- elif A[-1] == B[-1]:
- return 0
- else:
- return -1
- except:
- return 0
-
-
- def readDB(self, StrainIds=[], prefix2='', dbId2=''):
-
- #retrieve data from target database
- nnn = len(StrainIds) / 25
- if len(StrainIds) % 25:
- nnn += 1
- oridata = []
- for step in range(nnn):
- temp = []
- StrainIdstep = StrainIds[step*25:min(len(StrainIds), (step+1)*25)]
- for item in StrainIdstep:
- temp.append('T%s.value' % item)
- #XZ, 03/05/2009: test http://www.genenetwork.org/webqtl/WebQTL.py?cmd=cor&probeset=100001_at&probe=136415&db=bra08-03MAS5&searchdb=BXDPublish&return=500&sort=pvalue
- if prefix2 == "Publish":
- query = "SELECT PublishXRef.Id, "
- dataStartPos = 1
- query += string.join(temp,', ')
- query += ' from (PublishXRef, PublishFreeze)\n'
- #XZ, 03/05/2009: Xiaodong changed Data to PublishData
- for item in StrainIdstep:
- query += 'left join PublishData as T%s on T%s.Id = PublishXRef.DataId and T%s.StrainId=%s\n' %(item,item,item,item)
- query += "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId and PublishFreeze.Id = %d" % (dbId2, )
- #XZ, 03/05/2009: test http://www.genenetwork.org/webqtl/WebQTL.py?cmd=cor&probeset=100001_at&probe=136415&db=bra08-03MAS5&searchdb=HC_M2_1005_M&return=500&sort=pvalue
- #XZ, 03/05/2009: test http://www.genenetwork.org/webqtl/WebQTL.py?cmd=cor&probeset=100001_at&probe=136415&db=bra08-03MAS5&searchdb=BXDGeno&return=500&sort=pvalue
- else:
- query = "SELECT %s.Name," % prefix2
- query += string.join(temp,', ')
- query += ' from (%s, %sXRef, %sFreeze) \n' % (prefix2,prefix2,prefix2)
- #XZ, 03/05/2009: Xiaodong changed Data to %sData
- for item in StrainIdstep:
- query += 'left join %sData as T%s on T%s.Id = %sXRef.DataId and T%s.StrainId=%s\n' %(prefix2,item,item,prefix2,item,item)
- query += "WHERE %sXRef.%sFreezeId = %sFreeze.Id and %sFreeze.Id = %d and %s.Id = %sXRef.%sId" % (prefix2, prefix2, prefix2, prefix2, dbId2, prefix2, prefix2, prefix2)
- self.cursor.execute(query)
- results = self.cursor.fetchall()
- if not results:
- self.contents.append("###Error: target database doesn't exist.")
- self.contents.append(self.example)
- self.contents.append(self.accessCode)
- return
- oridata.append(results)
-
- datasize = len(oridata[0])
- targetTrait = []
- for j in range(datasize):
- traitdata = list(oridata[0][j])
- for i in range(1,nnn):
- traitdata += list(oridata[i][j][1:])
- targetTrait.append(traitdata)
-
- return targetTrait
-