aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/compareCorrelates
diff options
context:
space:
mode:
Diffstat (limited to 'web/webqtl/compareCorrelates')
-rwxr-xr-xweb/webqtl/compareCorrelates/MultipleCorrelationPage.py108
-rwxr-xr-xweb/webqtl/compareCorrelates/__init__.py0
-rwxr-xr-xweb/webqtl/compareCorrelates/correlation.py359
-rwxr-xr-xweb/webqtl/compareCorrelates/htmlModule.py279
-rwxr-xr-xweb/webqtl/compareCorrelates/multitrait.py1121
-rwxr-xr-xweb/webqtl/compareCorrelates/trait.py1074
6 files changed, 2941 insertions, 0 deletions
diff --git a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
new file mode 100755
index 00000000..6a464ab6
--- /dev/null
+++ b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
@@ -0,0 +1,108 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+from base.templatePage import templatePage
+from utility import webqtlUtil
+from base.webqtlTrait import webqtlTrait
+from base import webqtlConfig
+import multitrait
+
+# XZ, 09/09/2008: After adding several traits to collection, click "Compare Correlates" button,
+# XZ, 09/09/2008: This class will generate what you see.
+# XZ, 09/09/2008: This class just collect the input, then pass them to multitrait.py
+#########################################
+# Multiple Correlation Page
+#########################################
+class MultipleCorrelationPage(templatePage):
+
+ def __init__(self,fd):
+
+ templatePage.__init__(self, fd)
+
+ if not self.openMysql():
+ return
+ if not fd.genotype:
+ fd.readData()
+
+ self.searchResult = fd.formdata.getvalue('searchResult')
+ if not self.searchResult:
+ heading = 'Compare Correlates'
+ detail = ['You need to select at least two traits in order to generate correlation matrix.']
+ self.error(heading=heading,detail=detail)
+ print 'Content-type: text/html\n'
+ self.write()
+ return
+ if type("1") == type(self.searchResult):
+ self.searchResult = [self.searchResult]
+
+ if self.searchResult:
+ if len(self.searchResult) > 100:
+ heading = 'Compare Correlates'
+ detail = ['In order to display Compare Correlates properly, Do not select more than %d traits for Compare Correlates.' % 100]
+ self.error(heading=heading,detail=detail)
+ print 'Content-type: text/html\n'
+ self.write()
+ return
+ else:
+ pass
+
+ traitList = []
+ for item in self.searchResult:
+ thisTrait = webqtlTrait(fullname=item, cursor=self.cursor)
+ thisTrait.retrieveInfo()
+ traitList.append(thisTrait)
+ else:
+ heading = 'Compare Correlates'
+ detail = [HT.Font('Error : ',color='red'),HT.Font('Error occurs while retrieving data from database.',color='black')]
+ self.error(heading=heading,detail=detail)
+ print 'Content-type: text/html\n'
+ self.write()
+ return
+
+
+ ##########
+ filename= webqtlUtil.genRandStr("mult_")
+ fp = open(webqtlConfig.IMGDIR+filename, 'wb')
+ fp.write('%s\n' % fd.RISet)
+ for thisTrait in traitList:
+ fp.write("%s,%s,%s\n" % (thisTrait.db.type,thisTrait.db.id,thisTrait.name))
+ fp.close()
+ fd.formdata["filename"] = filename
+
+ params = {"filename":filename, "targetDatabase":"",
+ "threshold":0.5, "subsetSize":10,
+ "correlation":"pearson", "subsetCount":10,
+ "firstRun":"1"}
+ results = []
+ txtOutputFileName = ""
+
+ self.dict['body'] = multitrait.TraitCorrelationPage(fd, params, self.cursor, traitList, results,
+ fd.RISet,txtOutputFileName).dict['body']
+ self.dict['title'] = 'Compare Correlates'
+
+
+
+
diff --git a/web/webqtl/compareCorrelates/__init__.py b/web/webqtl/compareCorrelates/__init__.py
new file mode 100755
index 00000000..e69de29b
--- /dev/null
+++ b/web/webqtl/compareCorrelates/__init__.py
diff --git a/web/webqtl/compareCorrelates/correlation.py b/web/webqtl/compareCorrelates/correlation.py
new file mode 100755
index 00000000..f2ea55b3
--- /dev/null
+++ b/web/webqtl/compareCorrelates/correlation.py
@@ -0,0 +1,359 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+# correlation.py
+# functions for computing correlations for traits
+#
+# Originally, this code was designed to compute Pearson product-moment
+# coefficents. The basic function calcPearson scans the strain data
+# for the two traits and drops data for a strain unless both traits have it.
+# If there are less than six strains left, we conclude that there's
+# insufficent data and drop the correlation.
+#
+# In addition, this code can compute Spearman rank-order coefficents using
+# the calcSpearman function.
+
+#Xiaodong changed the dependancy structure
+import numarray
+import numarray.ma as MA
+import time
+
+import trait
+
+# strainDataUnion : StrainData -> StrainData -> array, array
+def strainDataUnion(s1, s2):
+ # build lists of values that both have
+ # and make sure that both sets of values are in the same order
+ s1p = []
+ s2p = []
+ sortedKeys = s1.keys()
+ sortedKeys.sort()
+ for s in sortedKeys:
+ if s2.has_key(s):
+ s1p.append(s1[s])
+ s2p.append(s2[s])
+
+ return (numarray.array(s1p, numarray.Float64),
+ numarray.array(s2p, numarray.Float64))
+
+# calcCorrelationHelper : array -> array -> float
+def calcCorrelationHelper(s1p, s2p):
+ # if the traits share less than six strains, then we don't
+ # bother with the correlations
+ if len(s1p) < 6:
+ return 0.0
+
+ # subtract by x-bar and y-bar elementwise
+ #oldS1P = s1p.copy()
+ #oldS2P = s2p.copy()
+
+ s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64)
+ s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64)
+
+ # square for the variances
+ s1p_2 = numarray.sum(s1p**2)
+ s2p_2 = numarray.sum(s2p**2)
+
+ try:
+ corr = (numarray.sum(s1p*s2p)/
+ numarray.sqrt(s1p_2 * s2p_2))
+ except ZeroDivisionError:
+ corr = 0.0
+
+ return corr
+
+# calcSpearman : Trait -> Trait -> float
+def calcSpearman(trait1, trait2):
+ s1p, s2p = strainDataUnion(trait1.strainData,
+ trait2.strainData)
+ s1p = rankArray(s1p)
+ s2p = rankArray(s2p)
+ return calcCorrelationHelper(s1p, s2p)
+
+# calcPearson : Trait -> Trait -> float
+def calcPearson(trait1, trait2):
+ # build lists of values that both have
+ # and make sure that both sets of values are in the same order
+ s1p, s2p = strainDataUnion(trait1.strainData,
+ trait2.strainData)
+
+ return calcCorrelationHelper(s1p, s2p)
+
+# buildPearsonCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
+#def buildPearsonCorrelationMatrix(traits, sc):
+# dim = (len(traits), sc)
+# matrix = numarray.zeros(dim, MA.Float64)
+# testMatrix = numarray.zeros(dim, MA.Float64)
+
+# for i in range(len(traits)):
+# sd = traits[i].strainData
+# for key in sd.keys():
+# matrix[i,int(key) - 1] = sd[key]
+# testMatrix[i,int(key) - 1] = 1
+
+def buildPearsonCorrelationMatrix(traits, commonStrains):
+ dim = (len(traits), len(commonStrains))
+ matrix = numarray.zeros(dim, MA.Float64)
+ testMatrix = numarray.zeros(dim, MA.Float64)
+
+ for i in range(len(traits)):
+ sd = traits[i].strainData
+ keys = sd.keys()
+ for j in range(0, len(commonStrains)):
+ if keys.__contains__(commonStrains[j]):
+ matrix[i,j] = sd[commonStrains[j]]
+ testMatrix[i,j] = 1
+
+ return matrix, testMatrix
+
+# buildSpearmanCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
+def buildSpearmanCorrelationMatrix(traits, sc):
+ dim = (len(traits), sc)
+ matrix = numarray.zeros(dim, MA.Float64)
+ testMatrix = numarray.zeros(dim, MA.Float64)
+
+ def customCmp(a, b):
+ return cmp(a[1], b[1])
+
+ for i in range(len(traits)):
+ # copy strain data to a temporary list and turn it into
+ # (strain, expression) pairs
+ sd = traits[i].strainData
+ tempList = []
+ for key in sd.keys():
+ tempList.append((key, sd[key]))
+
+ # sort the temporary list by expression
+ tempList.sort(customCmp)
+
+ for j in range(len(tempList)):
+ # k is the strain id minus 1
+ # 1-based strain id -> 0-based column index
+ k = int(tempList[j][0]) - 1
+
+ # j is the rank of the particular strain
+ matrix[i,k] = j
+
+ testMatrix[i,k] = 1
+
+ return matrix, testMatrix
+
+def findLargestStrain(traits, sc):
+ strainMaxes = []
+ for i in range(len(traits)):
+ keys = traits[i].strainData.keys()
+ strainMaxes.append(max(keys))
+
+ return max(strainMaxes)
+
+def findCommonStrains(traits1, traits2):
+ commonStrains = []
+ strains1 = []
+ strains2 = []
+
+ for trait in traits1:
+ keys = trait.strainData.keys()
+ for key in keys:
+ if not strains1.__contains__(key):
+ strains1.append(key)
+
+ for trait in traits2:
+ keys = trait.strainData.keys()
+ for key in keys:
+ if not strains2.__contains__(key):
+ strains2.append(key)
+
+ for strain in strains1:
+ if strains2.__contains__(strain):
+ commonStrains.append(strain)
+
+ return commonStrains
+
+def calcPearsonMatrix(traits1, traits2, sc, strainThreshold=6,
+ verbose = 0):
+ return calcMatrixHelper(buildPearsonCorrelationMatrix,
+ traits1, traits2, sc, strainThreshold,
+ verbose)
+
+def calcProbeSetPearsonMatrix(cursor, freezeId, traits2, strainThreshold=6,
+ verbose = 0):
+
+ cursor.execute('select ProbeSetId from ProbeSetXRef where ProbeSetFreezeId = %s order by ProbeSetId' % freezeId)
+ ProbeSetIds = cursor.fetchall()
+
+ results = []
+ i=0
+ while i<len(ProbeSetIds):
+ ProbeSetId1 = ProbeSetIds[i][0]
+ if (i+4999) < len(ProbeSetIds):
+ ProbeSetId2 = ProbeSetIds[i+4999][0]
+ else:
+ ProbeSetId2 = ProbeSetIds[len(ProbeSetIds)-1][0]
+
+ traits1 = trait.queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2) # XZ,09/10/2008: add module name 'trait.'
+ SubMatrix = calcMatrixHelper(buildPearsonCorrelationMatrix,
+ traits1, traits2, 1000, strainThreshold,
+ verbose)
+ results.append(SubMatrix)
+ i += 5000
+
+ returnValue = numarray.zeros((len(ProbeSetIds), len(traits2)), MA.Float64)
+ row = 0
+ col = 0
+ for SubMatrix in results:
+ for i in range(0, len(SubMatrix)):
+ for j in range(0, len(traits2)):
+ returnValue[row,col] = SubMatrix[i,j]
+ col += 1
+ col = 0
+ row +=1
+
+ return returnValue
+
+
+
+# note: this code DOES NOT WORK, especially in cases where
+# there are missing observations (e.g. when comparing traits
+# from different probesetfreezes)
+def calcSpearmanMatrix(traits1, traits2, sc, strainThreshold=6,
+ verbose=0):
+ return calcMatrixHelper(buildSpearmanCorrelationMatrix,
+ traits1, traits2, sc, strainThreshold,
+ verbose)
+
+def calcMatrixHelper(builder, traits1, traits2, sc, strainThreshold,
+ verbose):
+
+ # intelligently figure out strain count
+ step0 = time.time()
+ #localSC = max(findLargestStrain(traits1, sc),
+ # findLargestStrain(traits2, sc))
+
+ commonStrains = findCommonStrains(traits1, traits2)
+
+ buildStart = time.time()
+ matrix1, test1 = builder(traits1, commonStrains)
+ matrix2, test2 = builder(traits2, commonStrains)
+ buildTime = time.time() - buildStart
+
+ step1 = time.time()
+
+ ns = numarray.innerproduct(test1, test2)
+
+ # mask all ns less than strainThreshold so the correlation values
+ # end up masked
+ # ns is now a MaskedArray and so all ops involving ns will be
+ # MaskedArrays
+ ns = MA.masked_less(ns, strainThreshold, copy=0)
+
+ # divide-by-zero errors are automatically masked
+ #ns = -1.0/ns
+
+ step2 = time.time()
+
+ # see comment above to find out where this ridiculously cool
+ # matrix algebra comes from
+ xs = numarray.innerproduct(matrix1, test2)
+ ys = numarray.innerproduct(test1, matrix2)
+ xys = numarray.innerproduct(matrix1, matrix2)
+
+ # use in-place operations to try to speed things up
+ numarray.power(matrix1, 2, matrix1)
+ numarray.power(matrix2, 2, matrix2)
+
+ x2s = numarray.innerproduct(matrix1, test2)
+ y2s = numarray.innerproduct(test1, matrix2)
+
+ step3 = time.time()
+
+ # parens below are very important
+ # the instant we touch ns, arrays become masked and
+ # computation is much, much slower
+ top = ns*xys - (xs*ys)
+ bottom1 = ns*x2s - (xs*xs)
+ bottom2 = ns*y2s - (ys*ys)
+ bottom = MA.sqrt(bottom1*bottom2)
+
+ # mask catches floating point divide-by-zero problems here
+ corrs = top / bottom
+
+ step4 = time.time()
+
+ # we define undefined correlations as zero even though there
+ # is a mathematical distinction
+ returnValue = MA.filled(corrs, 0.0)
+
+ step5 = time.time()
+
+ #print ("calcMatrixHelper: %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, total: %.2f s"
+ # %(buildTime,
+ # buildStart - step0,
+ # step2 - step1,
+ # step3 - step2,
+ # step4 - step3,
+ # step5 - step4,
+ # step5 - step0))
+
+ if verbose:
+ print "Matrix 1:", matrix1
+ print "Matrix 2:", matrix2
+ print "Ns:", ns
+ print "Xs", xs
+ print "Ys", ys
+ print "XYs:", xys
+ print "Top:", top
+ print "Bottom 1:", bottom1
+ print "Bottom 2:", bottom2
+ print "Bottom:", bottom
+ print "Corrs:", corrsa
+
+
+ return returnValue
+
+
+
+# rankArray: listof float -> listof float
+# to generate a companion list to alof with
+# the actual value of each element replaced by the
+# value's rank
+def rankArray(floatArray):
+ # first we save the original index of each element
+ tmpAlof = []
+ returnArray = numarray.zeros(len(floatArray), numarray.Float64)
+ i = 0
+ for i in range(len(floatArray)):
+ tmpAlof.append((i,floatArray[i]))
+
+ # now we sort by the data value
+ def customCmp(a,b): return cmp(a[1],b[1])
+ tmpAlof.sort(customCmp)
+
+ # finally we use the new rank data to populate the
+ # return array
+ for i in range(len(floatArray)):
+ returnArray[tmpAlof[i][0]] = i+1
+
+ return returnArray
diff --git a/web/webqtl/compareCorrelates/htmlModule.py b/web/webqtl/compareCorrelates/htmlModule.py
new file mode 100755
index 00000000..ebba3b86
--- /dev/null
+++ b/web/webqtl/compareCorrelates/htmlModule.py
@@ -0,0 +1,279 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+import sys
+import string
+import os
+import MySQLdb
+import cgi
+
+from htmlgen import HTMLgen2 as HT
+
+from base import webqtlConfig
+
+
+# XZ 08/14/2008: When I tried to replace 'from webqtlConfig import *' with 'import webqtlConfig'
+# XZ 08/14/2008: I found some problems. I discussed with Hongqiang and the below is conclusion.
+# XZ 08/14/2008: The program uses webqtlConfig.DB_NAME, webqtlConfig.MYSQL_SERVER and so on
+# XZ 08/14/2008: without 'import webqtlConfig'. This program will not work.
+# XZ 08/14/2008: CONFIG_htmlpath doesn't exist in webqtlConfig.py
+# XZ 08/14/2008: Hongqian said this was done by Fan Zhang, and this program was not tested.
+# XZ 08/14/2008: So nobody realize these bugs.
+
+# XZ, 09/09/2008: This function is not called any where.
+# XZ, 09/09/2008: Actually, I don't think this function works.
+def genHeaderFooter(i=1,title='',basehref='',js1='',js2='',layer='',body=''):
+ """
+ generate footer and header HTML code
+ default is header
+ i = 0 is footer+header
+ i = 1 is header
+ i = 2 is footer
+ """
+ try:
+ temp_file = CONFIG_htmlpath + 'beta-template.html'
+ fp = open(temp_file, 'rb')
+ template = fp.read()
+ fp.close()
+ template = template % (title,basehref,js1,js2,layer,body, "")
+ header,footer = string.split(template,'<!-- split from Here -->')
+ if i == 0:
+ return header + footer
+ elif i == 1:
+ return header
+ elif i == 2:
+ return footer
+ else:
+ return ""
+ except:
+ if i == 0:
+ return "header + footer"
+ elif i == 1:
+ return "header"
+ elif i == 2:
+ return "footer"
+ else:
+ return ""
+
+# XZ, 09/09/2008: This function is only used in multitrait.py where it is called with value assigned to db.
+# XZ, 09/09/2008: So the try-except block is not executed.
+# XZ, 09/09/2008: This explains why no error was generated even without 'import webqtlConfig'
+def genDatabaseMenu(db = None, public =1, RISetgp = 'BXD', selectname = 'database', selected = ""):
+ """
+ generate database Menu
+ public = 0 : search3.html databases Menu
+ public = 1 : search.html databases Menu
+ """
+ if not db:
+ try:
+ # import MySQLdb
+ # con = MySQLdb.Connect(db='db_webqtl')
+ # Modified by Fan Zhang
+ con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD)
+ db = con.cursor()
+ except:
+ return "Connect MySQL Server Error"
+ else:
+ pass
+
+ databaseMenu = HT.Select(name=selectname)
+ nmenu = 0
+
+ # here's a hack: bxd and bxd300 can be correlated against each other
+ # if either of those are the group, we put in special SQL that pulls both
+ if RISetgp in ("BXD", "BXD300"):
+ ibsNameQry = '(InbredSet.Name = "BXD" OR InbredSet.Name = "BXD300")'
+ else:
+ ibsNameQry = 'InbredSet.Name = "%s"' % RISetgp
+
+ #Publish Database
+ db.execute('''
+ SelecT
+ PublishFreeze.FullName,
+ PublishFreeze.Name
+ from
+ PublishFreeze,
+ InbredSet
+ where
+ PublishFreeze.InbredSetId = InbredSet.Id and
+ %s
+ ''' % ibsNameQry)
+ for item in db.fetchall():
+ databaseMenu.append(item)
+ nmenu += 1
+
+ #Genome Database
+ db.execute('''
+ SelecT
+ GenoFreeze.FullName,
+ GenoFreeze.Name
+ from
+ GenoFreeze,InbredSet
+ where
+ GenoFreeze.InbredSetId = InbredSet.Id and
+ %s
+ ''' % ibsNameQry)
+ for item in db.fetchall():
+ databaseMenu.append(item)
+ nmenu += 1
+
+ #Microarray Database
+ db.execute('SelecT Id, Name from Tissue')
+ for item in db.fetchall():
+ TId, TName = item
+ databaseMenuSub = HT.Optgroup(label = '%s ------' % TName)
+ db.execute('''
+ SelecT
+ ProbeSetFreeze.FullName,
+ ProbeSetFreeze.Name
+ from
+ ProbeSetFreeze,
+ ProbeFreeze,
+ InbredSet
+ where
+ ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
+ ProbeFreeze.TissueId = %d and
+ ProbeSetFreeze.public > %d and
+ ProbeFreeze.InbredSetId = InbredSet.Id and
+ %s
+ order by
+ ProbeSetFreeze.CreateTime desc,
+ ProbeSetFreeze.AvgId
+ ''' % (TId,public,ibsNameQry))
+ for item2 in db.fetchall():
+ databaseMenuSub.append(item2)
+ nmenu += 1
+ databaseMenu.append(databaseMenuSub)
+
+ if nmenu:
+ if selected:
+ databaseMenu.selected.append(selected)
+ return str(databaseMenu)
+ else:
+ return ''
+
+
+# XZ, 09/09/2008: This function is not called any where.
+# XZ, 09/09/2008: Actually, I don't think this function works.
+# XZ, 09/09/2008: There is no 'DataForm' file now. It should be webqtlForm.py
+def genRISample():
+ import glob
+ import reaper
+ import random
+ import math
+ import webqtlUtil
+ risets = filter(lambda X:X.find('F2')<0, map(os.path.basename, glob.glob(os.path.join(CONFIG_genodir, "*.geno"))))
+ risets = map(lambda X:X.split('.')[0], risets)
+ risets.remove("BayXSha")
+ risets.sort()
+ body = HT.Blockquote()
+ NPerRow = 6
+ for item in risets:
+ values = []
+ if item == 'AXBXA': item2='AXB/BXA'
+ elif item == 'HXBBXH': item2='HXB/BXH'
+ else: item2=item
+ body.append(HT.Paragraph(item2, Class='subtitle'))
+ tbl = HT.TableLite(Class="collap")
+ dataset = reaper.Dataset()
+ dataset.read(os.path.join(CONFIG_genodir, "%s.geno"%item))
+ prgy = webqtlUtil.ParInfo[item] + list(dataset.prgy)
+
+ mean = random.random()*100
+ variance = random.random()*500
+ variables = []
+ while len(variables) < len(prgy):
+ S = 2
+ while (S>=1):
+ U1= random.random()
+ U2= random.random()
+ V1= 2*U1-1.0
+ V2= 2*U2-1.0
+ S=V1*V1+V2*V2
+ X= math.sqrt(-2 * math.log(S) / S) * V1
+ Y= math.sqrt(-2 * math.log(S) / S) * V2
+ variables.append(mean + math.sqrt(variance) * X)
+ variables.append(mean + math.sqrt(variance) * Y)
+
+ tempTR = HT.TR()
+ for i, strain in enumerate(prgy):
+ if i and i%NPerRow==0:
+ tbl.append(tempTR)
+ tempTR = HT.TR()
+ if random.random() < 0.2:
+ variable = 'X'
+ else:
+ variable = "%2.3f" % variables[i]
+
+ tempTR.append(HT.TD(strain, Class="strains", width=80))
+ tempTR.append(HT.TD(variable, Class="values", width=60))
+ values.append(variable)
+
+ for j in range(NPerRow-i%NPerRow-1):
+ tempTR.append(HT.TD())
+ tbl.append(tempTR)
+ body.append(tbl)
+ body.append(HT.Paragraph("Copy the following line to paste into the GeneNetwork entry box:"))
+ body.append(HT.Code(string.join(values, " ")))
+ body.append(HT.HR(width="90%"))
+ return body
+
+if __name__ == "__main__":
+ if os.environ.has_key('SCRIPT_FILENAME'):
+ script_filename = os.environ['SCRIPT_FILENAME']
+ else:
+ script_filename = ''
+ #Used as cgi script
+ if script_filename and script_filename[-2:] == 'py':
+ print 'Content-type: text/html\n'
+ formdata = cgi.FieldStorage()
+ sys.stderr = sys.stdout
+ try:
+ getID = string.lower(formdata.getvalue('get'))
+ except:
+ getID = ''
+ #Used as command
+ else:
+ if len(sys.argv) >= 2:
+ getID = string.lower(sys.argv[1])
+ else:
+ getID = ''
+
+ if getID == 'headerfooter':
+ print genHeaderFooter(0)
+ elif getID == 'header':
+ print genHeaderFooter(1)
+ elif getID == 'footer':
+ print genHeaderFooter(2)
+ elif getID == 'databasemenu':
+ print genDatabaseMenu(public=0)
+ elif getID == 'datasample':
+ print genRISample()
+ else:
+ print genHeaderFooter(0)
+else:
+ pass
+
diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py
new file mode 100755
index 00000000..047620af
--- /dev/null
+++ b/web/webqtl/compareCorrelates/multitrait.py
@@ -0,0 +1,1121 @@
+# multitrait.py
+# a tool to analyze the correlations between several different traits and the traits
+# in a given dataset
+#
+# Parameters:
+# correlation -- either "pearson" or "spearman" depending on which ones we want to use
+#
+# filename -- an input file containing the traits to analyze
+#
+# progress -- if set, this parameter outputs a static progress page
+# and uses a META redirect to trigger the real computation
+#
+# targetDatabaseType:
+# one of "ProbeSet", "Publish", "Genotype" depending on the type of database
+# we will use for the analysis
+#
+# targetDatabaseId:
+# the id (*Freeze.Id in the database) of the particular database we will analyze
+#
+# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider
+#
+# firstRun -- either 0 or 1
+# whether to automatically pick reasonable defaults for the other three parameters
+#
+# outputType -- either "html" or "text"
+#
+# Author: Stephen Pitts
+# June 15, 2004
+
+#Xiaodong changed the dependancy structure
+
+import copy
+import sys
+import cgi
+import os
+import os.path
+import math
+import time
+import numarray
+import tempfile
+import string
+import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle
+
+from base import templatePage
+from base.webqtlTrait import webqtlTrait
+from utility import webqtlUtil
+from base import webqtlConfig
+import trait
+import correlation
+import htmlModule
+
+cgitb.enable()
+
+
+# where this program's data files are
+RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.'
+RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module
+
+tempfile.tempdir = RootDir
+tempfile.template = "multitrait"
+
+# MultitraitException: used if something goes wrong
+# maybe in the future we should make exceptions more granular
+class MultitraitException(Exception):
+ def __init__(self, message):
+ self.message = message
+
+ def __repr__(self):
+ return "MultitraitException: %s" % self.message
+
+# buildParamDict: Cursor -> ParamDict
+# to process and validate CGI arguments
+# see the comment at the top of this file for valid cgi
+# parameters
+def buildParamDict(cursor, fd):
+ params = {}
+ fs = fd.formdata #cgi.FieldStorage()
+ params["progress"] = fs.getfirst("progress", "0")
+ params["filename"] = fs.getfirst("filename", "")
+ if params["filename"] == "":
+ raise MultitraitException("Required parameter filename missing.")
+
+ params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04")
+ params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0)
+ params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5)
+ params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10)
+
+ if params["subsetSize"] < -1:
+ params["subsetSize"] = -1
+
+ params["correlation"] = fs.getfirst("correlation", "pearson")
+ params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10)
+
+ if params["subsetCount"] < -1:
+ params["subsetCount"] = -1
+
+ #params["outputType"] = fs.getfirst("outputType", "html")
+
+ #if params["outputType"] not in ("html", "text"):
+ # params["outputType"] = "html"
+
+ if params["correlation"] not in ("pearson", "spearman"):
+ params["correlation"] = "pearson"
+
+ params["correlationName"] = params["correlation"].capitalize()
+
+ # one of two cases:
+ # 1) We have just come from a submit, so there are a bunch of display*
+ # but no displaySets. Thus, the code down there converts the display*
+ # to displaySets so the GET request doesn't get too long
+ # 2) We have just been redirected from a progress page which already has
+ # a converted displaySets for us.
+
+ displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0)
+
+ if displaySets == 0:
+ for key in fs.keys():
+ if key[:7] == "display":
+ #print "Hit display key %s<br>" % key
+ try:
+ whichSet = int(key[7:])
+
+ # prevent malicious attacks
+ whichSet = min(whichSet, 512)
+ displaySets += pow(2, whichSet)
+
+ except ValueError: pass
+
+ params["displaySets"] = displaySets
+ #print "In the beginning, display sets was %s: %s<br>" % (displaySets,
+ # str(binaryDecompose(displaySets)))
+
+ # if we are just gonna display a progress page, then there's no
+ # reason to look up detailed database information
+ #if params["progress"] == "1":
+ # return params
+
+ a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name
+ params["targetDatabaseType"] = a
+ params["targetDatabaseId"] = b
+ params["targetDatabaseName"] = params["targetDatabase"]
+
+ return params
+
+# readInputFile: DB cursor -> string -> string, (arrayof Trait)
+def readInputFile(cursor, filename):
+ """
+ To read an input file with n lines in the following format
+ <databasetype>,<databaseid>,<traitname>
+ and retrieve and populate traits with appropriate data
+ from the database
+
+ Also, for our purposes. we store the database type and
+ database id in fields attached to the trait instances. We use
+ this information to generate Javascript popups with trait
+ information.
+
+ In addition, we read the strain of mice that the traits are
+ from so we can only show those databases to correlate against.
+ """
+ handle = open(filename)
+ line = handle.readline()
+ inbredSetName = line.strip()
+ line = handle.readline()
+ traits = []
+
+# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait'
+ while line != "":
+ line = line.strip()
+ dbType, dbId, tName = line.split(",")
+
+ if dbType == "ProbeSet":
+ oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
+ oneTrait.populateDataId(cursor, dbId)
+ oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
+ elif dbType == "Geno":
+ speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId)
+ oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name
+ oneTrait.populateDataId(cursor, dbId)
+ oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
+ elif dbType == "Publish":
+ oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name
+ oneTrait.populateDataId(cursor, dbId)
+ oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
+ elif dbType == "Temp":
+ oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
+ oneTrait.populateDataId(cursor, dbId)
+ oneTrait.dbName = "Temp"
+
+ oneTrait.populateStrainData(cursor)
+ traits.append(oneTrait)
+
+ line = handle.readline()
+
+ return inbredSetName, traits
+
+# loadDatabase: Cursor -> ParamDict -> arrayof Trait
+def loadDatabase(cursor, p):
+ """
+ To load a set of traits as specified by the
+ targetDatabaseId
+ and targetDatabaseType parameters
+
+ Cursor should be a fastCursor from the webqtl library (i.e.
+ a MySQLdb SSCursor).
+
+ Calling populateStrainData 20,000 or so times on a ProbeSet
+ is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits
+ in the trait module that uses a join to get all of the rows in
+ bulk, store the resultset on the server, and do all sorts of nice buffering.
+ It's about two or three times faster.
+ """
+ if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name
+ dbTraits = trait.queryPopulatedProbeSetTraits(cursor,
+ p["targetDatabaseId"])
+ elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name
+ dbTraits = trait.queryPublishTraits(cursor,
+ p["targetDatabaseId"])
+ psd = trait.PublishTrait.populateStrainData
+ elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name
+ dbTraits = trait.queryGenotypeTraits(cursor,
+ p["targetDatabaseId"])
+ psd = trait.GenotypeTrait.populateStrainData
+ else:
+ print "Unknown target database type %s" % p["targetDatabaseType"]
+
+ if p["targetDatabaseType"] != "ProbeSet":
+ map(psd, dbTraits, [cursor]*len(dbTraits))
+
+ return dbTraits
+
+def runProbeSetCorrelations(cursor, p, traits):
+ """
+ To run the correlations between the traits and the database.
+ This function computes a correlation coefficent between each
+ trait and every entry in the database, and partitions the database
+ into a disjoint array of arrays which it returns.
+
+ The length of the return array is 2^n, where n is the length of
+ the trait array. Which constitutent element a of the return array
+ a given trait ends up in is determined by the following formula
+ i = i_02^0 + ... + i_(n-1)2^(n-1)
+ where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
+
+ Since most of the several thousand database traits will end up
+ with i=0, we don't return them, so the first element of the
+ return array will be empty.
+
+ A particular element of subarray j of the return array contains
+ a 2-tuple (trait,kvalues). The variable trait is obviously the
+ particular database trait that matches the user traits l_1, ..., l_m
+ to which subarray j corresponds. kvalues is a list of the correlation
+ values linking trait to l_1, ..., l_m, so the length of kvalues is
+ the number of 1s in the binary representation of j (there must be
+ a better way to describe this length).
+
+ The return array is an array of 2-tuples. The first element of
+ each tuple is the index of the particular subarray, and the second
+ element is the subarray itself. The array is sorted in descending
+ order by the number of 1's in the binary representation of the
+ index so the first few subarrays are the ones that correspond to
+ the largest sets. Each subarray is then sorted by the average of
+ the magnitude of the individual correlation values.
+ """
+
+ kMin = p["threshold"]
+ traitArrays = {}
+
+ # TODO: Add Spearman support
+ freezeId = p["targetDatabaseId"]
+ if p["correlation"] == "pearson":
+ correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name
+ else:
+ correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name
+
+ # now we test all of the correlations in bulk
+ test = numarray.absolute(correlations)
+ test = numarray.greater_equal(test, kMin)
+ test = test.astype(numarray.Int8)
+ #print test
+
+ db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name
+ for i in range(len(db)):
+ cIndex = 0
+ prods = []
+ for j in range(len(traits)):
+ if test[i,j] == 1:
+ cIndex += pow(2, j)
+ prods.append(correlations[i,j])
+ if cIndex != 0:
+ if not traitArrays.has_key(cIndex):
+ traitArrays[cIndex] = []
+
+ traitArrays[cIndex].append((db[i], prods))
+
+
+ # sort each inner list of traitArrays
+ # so the matched traits appear in descending order by the
+ # average magnitude of the correlation
+ def customCmp(traitPair, traitPair2):
+ magAvg1 = numarray.average(map(abs, traitPair[1]))
+ magAvg2 = numarray.average(map(abs, traitPair2[1]))
+
+ # invert the sign to get descending order
+ return -cmp(magAvg1, magAvg2)
+
+ for traitArray in traitArrays.values():
+ traitArray.sort(customCmp)
+
+ # sort the outer list of traitArrays
+ traitArrays2 = []
+ i = 0
+ for key in traitArrays.keys():
+ a = traitArrays[key]
+ if len(a) > 0:
+ traitArrays2.append((key,a,len(binaryDecompose(key)),
+ len(a)))
+
+ # we sort by the number of 1's in the binary output
+ # and then by the size of the list, both in descending order
+ def customCmp2(aL,bL):
+ a = -cmp(aL[2], bL[2])
+ if a == 0:
+ return -cmp(aL[3], bL[3])
+ else:
+ return a
+
+ traitArrays2.sort(customCmp2)
+
+ return traitArrays2
+
+def runCorrelations(p, strainCount, traits, db):
+ """
+ To run the correlations between the traits and the database.
+ This function computes a correlation coefficent between each
+ trait and every entry in the database, and partitions the database
+ into a disjoint array of arrays which it returns.
+
+ The length of the return array is 2^n, where n is the length of
+ the trait array. Which constitutent element a of the return array
+ a given trait ends up in is determined by the following formula
+ i = i_02^0 + ... + i_(n-1)2^(n-1)
+ where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
+
+ Since most of the several thousand database traits will end up
+ with i=0, we don't return them, so the first element of the
+ return array will be empty.
+
+ A particular element of subarray j of the return array contains
+ a 2-tuple (trait,kvalues). The variable trait is obviously the
+ particular database trait that matches the user traits l_1, ..., l_m
+ to which subarray j corresponds. kvalues is a list of the correlation
+ values linking trait to l_1, ..., l_m, so the length of kvalues is
+ the number of 1s in the binary representation of j (there must be
+ a better way to describe this length).
+
+ The return array is an array of 2-tuples. The first element of
+ each tuple is the index of the particular subarray, and the second
+ element is the subarray itself. The array is sorted in descending
+ order by the number of 1's in the binary representation of the
+ index so the first few subarrays are the ones that correspond to
+ the largest sets. Each subarray is then sorted by the average of
+ the magnitude of the individual correlation values.
+ """
+ kMin = p["threshold"]
+ traitArrays = {}
+
+ # TODO: Add Spearman support
+ if p["correlation"] == "pearson":
+ correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
+ else:
+ correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
+
+ # now we test all of the correlations in bulk
+ test = numarray.absolute(correlations)
+ test = numarray.greater_equal(test, kMin)
+ test = test.astype(numarray.Int8)
+ #print test
+
+
+ for i in range(len(db)):
+ cIndex = 0
+ prods = []
+ for j in range(len(traits)):
+ if test[i,j] == 1:
+ cIndex += pow(2, j)
+ prods.append(correlations[i,j])
+ if cIndex != 0:
+ if not traitArrays.has_key(cIndex):
+ traitArrays[cIndex] = []
+
+ traitArrays[cIndex].append((db[i], prods))
+
+ # sort each inner list of traitArrays
+ # so the matched traits appear in descending order by the
+ # average magnitude of the correlation
+ def customCmp(traitPair, traitPair2):
+ magAvg1 = numarray.average(map(abs, traitPair[1]))
+ magAvg2 = numarray.average(map(abs, traitPair2[1]))
+
+ # invert the sign to get descending order
+ return -cmp(magAvg1, magAvg2)
+
+ for traitArray in traitArrays.values():
+ traitArray.sort(customCmp)
+
+ # sort the outer list of traitArrays
+ traitArrays2 = []
+ i = 0
+ for key in traitArrays.keys():
+ a = traitArrays[key]
+ if len(a) > 0:
+ traitArrays2.append((key,a,len(binaryDecompose(key)),
+ len(a)))
+
+ # we sort by the number of 1's in the binary output
+ # and then by the size of the list, both in descending order
+ def customCmp2(aL,bL):
+ a = -cmp(aL[2], bL[2])
+ if a == 0:
+ return -cmp(aL[3], bL[3])
+ else:
+ return a
+
+ traitArrays2.sort(customCmp2)
+
+ return traitArrays2
+
+
+# XZ, 09/09/2008: In multiple trait correlation result page,
+# XZ, 09/09/2008: click "Download a text version of the above results in CSV format"
+
+# TraitCorrelationText: a class to display trait correlations
+# as textual output
+class TraitCorrelationText:
+ # build a text shell to describe the given trait correlations
+ # this method sets self.output; use str(self) to actually
+ # get the text page
+ #
+ # traits is a list of traits and traitArray is a
+ # list of 3-tuples: index, traits', garbage
+ # where index is a binary-encoded description of which subset of
+ # traits the list traits' matches
+ #
+ # traits' is a list of 3-tuples as well: trait, correlations, garbage
+ # where trait is a particular trait and correlations is a list of float
+ # correlations (matching traits above)
+ def __init__(self, p, traits, traitArray):
+ output = "Correlation Comparison\n"
+ output += "from WebQTL and the University of Tennessee Health Science Center\n"
+ output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n"
+
+ output += self.showOptionPanel(p)
+ output += self.showSelectedTraits(traits)
+ output += self.showSummaryCorrelationResults(p, traits, traitArray)
+ output += self.showDetailedCorrelationResults(p, traits, traitArray)
+
+ self.output = output
+
+ # showOptionPanel: ParamDict -> string
+ # to display the options used to run this correlation
+ def showOptionPanel(self, params):
+ output = "Correlation Comparison Options:\n"
+ output += "Target database,%s\n" % params["targetDatabase"]
+ output += "Correlation type,%s\n" % params["correlationName"]
+ output += "Threshold,%f\n" % params["threshold"]
+ #output += "Subsets to Show,%d\n" % params["subsetCount"]
+ #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"]
+ return output
+
+ # showSelectedTraits: (listof Trait) -> string
+ # to display the traits compared with the database
+ # note: we can't use tabular output because the traits could be of
+ # different types and produce different fields
+ def showSelectedTraits(self, traits):
+ output = "Selected Traits:\n"
+ for trait in traits:
+ output += '"' + trait.longName() + '"' + "\n"
+ output += "\n"
+ return output
+
+ # showSummaryCorrelationResults: ParamDict -> (listof Trait) ->
+ # TraitArray -> string
+ # see comment for __init__ for a description of TraitArray
+ #
+ # to show a summary (sets and sizes) of the correlation results
+ # as well as an X to indicate whether they will be included
+ # in the detailed output
+ def showSummaryCorrelationResults(self, p, traits, traitArray):
+ output = "Correlation Comparison Summary:\n"
+
+ #if p["subsetCount"] != -1:
+ # ourSubsetCount = min(p["subsetCount"], len(traitArray))
+ #else:
+
+ ourSubsetCount = len(traitArray)
+
+ displayDecomposition = binaryDecompose(p["displaySets"])
+ for j in range(ourSubsetCount):
+ i = traitArray[j][0]
+ traitSubarray = traitArray[j][1]
+ if len(traitSubarray) == 0:
+ continue
+
+ targetTraits = decomposeIndex(traits, i)
+ traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name
+ ", ")
+ if j in displayDecomposition:
+ checked = "X"
+ else:
+ checked = ""
+
+ output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray))
+
+ output += "\n"
+ return output
+
+ # showDetailedCorrelationResults: ParamDict -> (listof Trait) ->
+ # TraitArray -> string
+ #
+ # to show a detailed list of the correlation results; that is,
+ # to completely enumerate each subset of traitArray using the
+ # filtering parameters in p
+ def showDetailedCorrelationResults(self, p, traits, traitArray):
+ output = "Correlation Comparison Details:\n"
+ displayDecomposition = binaryDecompose(p["displaySets"])
+ displayDecomposition.sort()
+
+ def formatCorr(c):
+ return "%.4f" % c
+
+ for j in displayDecomposition:
+ i = traitArray[j][0]
+ traitSubarray = traitArray[j][1]
+
+ if len(traitSubarray) == 0:
+ continue
+
+ targetTraits = decomposeIndex(traits, i)
+ extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name
+ traitDesc = string.join(extraColumnHeaders, ", ")
+
+ #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
+ # traitDesc += ",(showing top %s of %s)" % (p["subsetSize"],
+ # len(traitSubarray))
+ # traitSubarray = traitSubarray[0:p["subsetSize"]]
+
+ output += "%s\n" % traitDesc
+ output += traitSubarray[0][0].csvHeader([], extraColumnHeaders)
+ output += "\n"
+ for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait'
+ corr = map(formatCorr, corr)
+ output += oneTrait.csvRow([], corr) + "\n"
+
+ output += "\n"
+
+ return output
+
+ # __str__ : string
+ # to return self.output as the string representation of this page
+ # self.output is built in __init__
+ def __str__(self):
+ return self.output
+
+# TraitCorrelationPage: a class to display trait correlations
+# for now this is just one HTML file, so we don't even write it
+# to a temporary file somewhere
+class TraitCorrelationPage(templatePage.templatePage):
+ """
+ Using the templatePage class, we build an HTML shell for
+ the core data here: the trait correlation lists.
+
+ The way templatePage works, we build the page in pieces in
+ the __init__ method and later on use the inherited write
+ method to render the page.
+ """
+ def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename):
+
+ templatePage.templatePage.__init__(self, fd)
+
+ self.dict["title"] = "Correlation Comparison"
+ self.dict["basehref"] = ""
+ # NL: deleted js1 content part, since it has not been used in this project
+ self.dict["js1"] = ""
+ self.dict["js2"] = ""
+
+ body = "<td><h1>Correlation Comparison</h1>"
+ body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime())
+ body += """
+ <p>The correlation comparison tool identifies intersecting sets of traits that are
+correlated with your selections at a specified threshold. A correlation comparison
+involves the following steps:</p>
+<ol>
+<li><p>
+<b>Correlate:</b>
+Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation
+Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and
+<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation
+Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to
+Show</i>, and <i>Traits to Show per Subset</i>.
+</p></li>
+
+<li><p>
+<b>Add to Collection:</b>
+You can use the check boxes in the <i>Correlation
+Comparison Details</i> panel and the buttons at the bottom of the page to add these
+results to your selections page for further analysis in WebQTL.
+</p></li>
+
+<li><p>
+<b>Filter:</b>
+Using the <i>Correlation Comparison Summary</i> panel, choose which
+subsets you would like to display for export. Note that if you change the
+parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter.
+</p></li>
+
+<li><p>
+<b>Export:</b>
+Once you are satisfied with your report, use the export link at
+the bottom of the page to save the report as a comma-separated (CSV) text file
+which you can then import into Excel or another tool. Note: the exported report
+will list all subsets in the summary view and only those traits in the subsets
+you have selected in the Filter step.
+</p></li>
+</ol>
+"""
+
+# body += """
+# <p>The correlation
+# comparison tool identifies the intersecting sets of traits that are
+# correlated with your selections. A correlation comparison involves
+# the following steps:</p>
+# <ol>
+# <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>.
+# For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i>
+# at their default values of 10.</p></li>
+# <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel,
+# adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to
+# Show per Subset</i> until you have a reasonable number of traits.</p></li>
+# <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would
+# like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will
+# loose the filter you have selected.</p></li>
+# <li><p><b>Export:</b> Once you are satisfied with your report, use the export
+# link at the bottom of the page to save the report as a comma-separated (CSV) text file which
+# you can then import into Excel or another tool. Note: the exported report
+# will show all subsets in the summary view and all traits in each subset you have
+# selected in the Filter step.
+# <li><p><b>Shopping Cart:</b> In addition, you can use the
+# check boxes in the <i>Correlation Comparison Details</i> panel and the
+# buttons at the bottom of the page to add the traits you have found to the shopping cart.</p>
+# </li>
+# </ol>
+# """
+
+ body += self.showOptionPanel(p, cursor, inbredSetName)
+ body += self.showSelectedTraits(traits, p, inbredSetName)
+
+ if p["firstRun"] == 0:
+ body += self.showCorrelationResults(p, inbredSetName, traits, traitArray)
+
+ exportParams = copy.copy(p)
+ exportParams["outputType"] = "text"
+
+ body += ('''
+ <h2>Export these results</h2>
+ <p>
+ <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from
+ the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details
+ view shows all traits in the subsets that you have selected.
+ </p>
+ '''
+ % txtFilename)
+
+
+
+ body += "</td>"
+ self.dict["body"] = body
+
+
+ # showOptionPanel: ParamDict -> Cursor -> String -> String
+ # to build an option panel for the multitrait correlation
+ # we expect the database list to be a list of 2-tuples
+ # the first element of each tuple is the short name
+ # and the second element of the tuple is the long name
+ def showOptionPanel(self, params, cursor, inbredSetName):
+ output = '''
+ <h2>Correlation Options</h2>
+ <FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data">
+ <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
+ <input type="hidden" name="filename" value="%s">
+ <input type="hidden" name="firstRun" value="0">
+ <input type="hidden" name="progress" value="1">
+ <table>
+ <tr>
+ <td>Target Database:</td><td>
+ ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"])
+
+ output += htmlModule.genDatabaseMenu(db = cursor,
+ public=0,
+ RISetgp = inbredSetName,
+ selectname="targetDatabase",
+ selected=params["targetDatabase"])
+ output += "</td></tr>"
+
+ corrSelected = ["",""]
+
+ if params["correlation"] == "pearson":
+ corrSelected[0] = "SELECTED"
+ else:
+ corrSelected[1] = "SELECTED"
+
+ output += ('''
+ <tr>
+ <td>Correlation Method:</td>
+ <td><select name="correlation">
+ <option value="pearson" %s>Pearson</option>
+ <!--<option value="spearman" %s>Spearman</option>-->
+ </select></td></tr>
+ ''' % (corrSelected[0], corrSelected[1]))
+ output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>'
+ % params["threshold"])
+ output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>'
+ % params["subsetCount"])
+ output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>'
+ % params["subsetSize"])
+
+ # a cosmetic change to hopefully make this form a bit easier to use
+# if params["firstRun"] == 1:
+# applyName = "Correlate"
+# else:
+# applyName = "Refine Correlation"
+
+ output += '''
+ <tr>
+ <td colspan="2"><input class="button" type="submit" value="Correlate" /></td>
+ </tr>
+ </table>
+ </form>
+ '''
+
+ return output
+
+ # showSelectedTraits: listof Trait -> string
+ # to show a list of the selected traits
+ def showSelectedTraits(self, traits, p, inbredSetName):
+ output = '''
+ <form action="%s%s" method="post" name="showDatabase">
+ <INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase">
+
+ <input type="hidden" name="incparentsf1" value="ON">
+ <input type="hidden" name="ShowStrains" value="ON">
+ <input type="hidden" name="ShowLine" value="ON">
+ <input type="hidden" name="database" value="">
+ <input type="hidden" name="ProbeSetID" value="">
+ <input type="hidden" name="RISet" value="%s">
+ <input type="hidden" name="CellID" value="">
+ <input type="hidden" name="database2" value="">
+ <input type="hidden" name="rankOrder" value="">
+ <input type="hidden" name="ProbeSetID2" value="">
+ <input type="hidden" name="CellID2" value="">
+ ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName)
+
+ output += "<h2>Selected Traits</h2>"
+ output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>'
+ flip = 1
+ colors = ["FFFFFF", "cccccc"]
+
+ for trait in traits:
+ # we take advantage of the secret dbName attribute that
+ # loadDatabase fills in
+ descriptionString = trait.genHTML()
+ if trait.db.type == 'Publish' and trait.confidential:
+ descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users)
+ output += '''
+ <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td>
+ <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td>
+ </tr>
+ ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString)
+ flip = not flip
+
+ output += "</table></form>"
+ return output
+
+
+ # showSummaryCorrelationResults
+ # show just the number of traits in each subarray
+ def showSummaryCorrelationResults(self, p, traits, traitArray):
+ output = '''
+ <form action="%s%s" method="post">
+ <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
+ <input type="hidden" name="filename" value="%s">
+ <input type="hidden" name="firstRun" value="0">
+ <input type="hidden" name="progress" value="1">
+ <input type="hidden" name="correlation" value="%s">
+ <input type="hidden" name="threshold" value="%s">
+ <input type="hidden" name="rankOrder" value="">
+ <input type="hidden" name="subsetCount" value="%s">
+ <input type="hidden" name="subsetSize" value="%s">
+ <input type="hidden" name="targetDatabase" value="%s">
+ ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"],
+ p["subsetCount"], p["subsetSize"], p["targetDatabase"])
+
+ output += '''
+ <table cellpadding="2" cellspacing="0">
+ <tr>
+ <th>Trait Subsets</th>
+ <th colspan="2">Intersecting Set Size</th>
+ </tr>
+ '''
+ # figure out a scale for the summary graph
+ # for now we set max = 300 pixels wide
+ if p["subsetCount"] != -1:
+ ourSubsetCount = min(p["subsetCount"], len(traitArray))
+ else:
+ ourSubsetCount = len(traitArray)
+
+ screenWidth = 600
+ lengths = []
+ for j in range(ourSubsetCount):
+ lengths.append(len(traitArray[j][1]))
+ maxLength = max(lengths)
+
+ displayDecomposition = binaryDecompose(p["displaySets"])
+ flip = 0
+ colors = ["FFFFFF", "cccccc"]
+
+ for j in range(ourSubsetCount):
+ i = traitArray[j][0]
+ traitSubarray = traitArray[j][1]
+
+ if len(traitSubarray) == 0:
+ continue
+
+ targetTraits = decomposeIndex(traits, i)
+ traitDesc = string.join(map(webqtlTrait.displayName, targetTraits),
+ ", ")
+
+ if j in displayDecomposition:
+ checked = "CHECKED"
+ else:
+ checked = ""
+
+ barWidth = (len(traitSubarray) * screenWidth) / maxLength
+ output += ('''<tr bgcolor="%s">
+ <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td>
+ <td>%s</td>
+ <td><img src="/images/blue.png" width="%d" height="25"></td></tr>'''
+ % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth))
+ flip = not flip
+
+ output += '''
+ <tr>
+ <td colspan="3">
+ <input class="button" type="submit" value="Filter" /></td>
+ </tr>
+ </table></form>
+ '''
+ return output
+
+ # showDetailedCorrelationResults
+ # actually show the traits in each subarray
+ def showDetailedCorrelationResults(self, p, inbredSetName, traits,
+ traitArray):
+ output = "<h2>Correlation Comparison Details</h2>"
+
+ # the hidden form below powers all of the JavaScript links,
+ # the shopping cart links, and the correlation plot links
+
+ output += '''
+ <form action="%s%s" method="post">
+ <input type="hidden" name="database" value="%s">
+ <input type="hidden" name="FormID" value="showDatabase">
+ <input type="hidden" name="traitfile" value="">
+ <input type="hidden" name="incparentsf1" value="ON">
+ <input type="hidden" name="ShowStrains" value="ON">
+ <input type="hidden" name="ProbeSetID" value="">
+ <input type="hidden" name="ShowLine" value="ON">
+ <input type="hidden" name="RISet" value="%s">
+ <input type="hidden" name="CellID" value="">
+ <input type="hidden" name="database2" value="">
+ <input type="hidden" name="rankOrder" value="">
+ <input type="hidden" name="ProbeSetID2" value="">
+ <input type="hidden" name="CellID2" value="">
+ ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName)
+
+
+ displayDecomposition = binaryDecompose(p["displaySets"])
+
+ # necessary to ensure that subset order is the same in the
+ # summary and the detailed view
+ displayDecomposition.sort()
+
+ # here's a trick: the first trait we show must have the widest row because it correlates
+ # with the largest set of input traits
+ firstSubset = traitArray[displayDecomposition[0]]
+ firstTrait = firstSubset[1][0][0]
+ extraColumnCount = firstSubset[2]
+ totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount
+
+ output += "<table cellpadding=2 cellspacing=0>\n"
+ for j in displayDecomposition:
+ i = traitArray[j][0]
+ traitSubarray = traitArray[j][1]
+
+ # we don't display trait combinations for which there are
+ # no correlations
+ if len(traitSubarray) == 0:
+ continue
+
+ # generate a description of the traits that this particular array
+ # matches highly
+ targetTraits = decomposeIndex(traits, i)
+ extraColumnHeaders = map(webqtlTrait.displayName, targetTraits)
+ traitDesc = string.join(extraColumnHeaders, ", ")
+
+ # massage extraColumnHeaders so that they can be wrapped
+ for i in range(len(extraColumnHeaders)):
+ ech = extraColumnHeaders[i]
+ ech = ech.replace("-", " ")
+ ech = ech.replace("_", " ")
+ extraColumnHeaders[i] = ech
+
+ # pad extraColumnHeaders if we have less columns than the max
+ paddingNeeded = extraColumnCount - len(extraColumnHeaders)
+ if paddingNeeded > 0:
+ extraColumnHeaders.extend(paddingNeeded * ["&nbsp;"])
+
+ # we limit the output to the top ones
+ if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
+ traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray))
+ traitSubarray = traitSubarray[0:p["subsetSize"]]
+
+ # combine that description with actual database traits themselves
+ # and the correlation values
+ output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc)
+ #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc
+
+ # we assume that every trait in traitSubarray is the same type
+ # of trait
+ flip = 0
+ colors = ["FFFFFF", "cccccc"]
+
+ output += traitSubarray[0][0].tableRowHeader(["&nbsp;"], extraColumnHeaders, colors[0])
+
+ for traitPair in traitSubarray:
+ corr = []
+ traitPair[0].dbName = p['targetDatabase']
+ trait = traitPair[0]
+
+ for i in range(len(traitPair[1])):
+ corrValue = traitPair[1][i]
+ corrPlotLink = ('''
+ <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a>
+ ''' % (p["targetDatabaseName"], trait.name, targetTraits[i].db.name, targetTraits[i].name, "0", corrValue))
+ corr.append(corrPlotLink)
+
+ corr.extend(paddingNeeded * ["&nbsp;"])
+
+ checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />'
+ % (p["targetDatabaseName"], trait.name))
+ flip = not flip
+ output += traitPair[0].tableRow([checkbox], corr, colors[flip])
+
+ #output += "</table>"
+ i += 1
+ output += '<tr><td colspan="%d">&nbsp;</td></tr>' % totalColumnCount
+
+ output += "</table>"
+
+ # print form buttons if there were checkboxes above
+ output += '''
+ <div align="left">
+ <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection"
+ onClick="addRmvSelection('%s',this.form, 'addToSelection');">
+ <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);">
+ <INPUT TYPE="reset" CLASS="button" VALUE="Select None">
+ </div>
+ </form>
+ ''' % inbredSetName
+
+ return output
+
+ # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String
+ # to build an output display for the multitrait correlation results
+ def showCorrelationResults(self, p, inbredSetName, traits, traitArray):
+ output = '''
+ <h2>Correlation Comparison Summary</h2>
+ <p>
+ %s correlations were computed for each of the selected traits with each trait in
+ the <a href="/dbdoc/%s.html">%s</a> database.
+ Subsets of database traits for which correlations were higher than %s
+ or lower than -%s are shown below based on which traits
+ they correlated highly with. The top %s subsets, ranked by the number of input traits that
+ they correspond with, are shown, and at most %s traits in each subset are shown. </p>
+ ''' % (p["correlationName"],
+ p["targetDatabase"], p["targetDatabaseName"],
+ p["threshold"], p["threshold"], p["subsetCount"],
+ p["subsetSize"])
+
+
+ totalTraits = 0
+ for j in range(len(traitArray)):
+ totalTraits += len(traitArray[j][1])
+
+ if totalTraits == 0:
+ output += """
+ <p>
+ No shared corrrelates were found with your given traits at this
+ threshold. You may wish to lower the correlation threshold or choose different traits.
+ </p>
+ """
+ else:
+ output += self.showSummaryCorrelationResults(p, traits, traitArray)
+ output += self.showDetailedCorrelationResults(p, inbredSetName,
+ traits, traitArray)
+
+ return output
+
+# decomposeIndex: (listof Trait) -> Int ->
+# (listof Trait)
+# to use i to partition T into a sublist
+# each bit in i controls the inclusion or exclusion of a trait
+def decomposeIndex(traits, i):
+ targetTraits = []
+
+ for j in range(len(traits)):
+ # look, mom, a bitwise and!
+ # expression below tests whether the jth bit is
+ # set in i
+ # see runCorrelation for how we decompose the
+ # array index
+ if (i & pow(2,j)) == pow(2,j):
+ targetTraits.append(traits[j])
+
+ return targetTraits
+
+# binaryDecompose: int -> (listof int)
+# to decompose a number into its constituent powers of 2
+# returns a list of the exponents a_1...a_n such that the input m
+# is m = 2^a_1 + ... + 2^a_n
+def binaryDecompose(n):
+ if n == 0:
+ return []
+
+ # we start with the highest power of 2 <= this number
+ # and work our way down, subtracting powers of 2
+ start = long(math.floor(math.log(n)/math.log(2)))
+
+ exponents = []
+ while start >= 0:
+ if n >= long(math.pow(2, start)):
+ n -= math.pow(2,start)
+ exponents.append(start)
+ start -= 1
+ return exponents
+
+# powerOf : int -> int -> boolean
+# to determine whether m is a power of n;
+# more precisely, whether there exists z in Z s.t.
+# n^z = m
+def powerOf(m, n):
+ trialZ = math.floor(math.log(m)/math.log(n))
+ return pow(n,trialZ) == m
+
+
+class compCorrPage(templatePage.templatePage):
+ def __init__(self,fd):
+ templatePage.templatePage.__init__(self, fd)
+
+ if not self.openMysql():
+ return
+
+ cursor = self.cursor
+ params = buildParamDict(cursor, fd)
+
+ # get the input data
+ inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"])
+
+ # and what we are comparing the data to
+ dbTraits = []
+ if params["targetDatabaseType"] != "ProbeSet":
+ dbTraits = loadDatabase(cursor, params)
+
+
+ # run the comparison itself
+ strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name
+ if params["targetDatabaseType"] == "ProbeSet":
+ results = runProbeSetCorrelations(cursor, params, traits)
+ else:
+ results = runCorrelations(params, strainCount, traits, dbTraits)
+
+ # try to be smart about what to output:
+ # we want to limit the number of traits shown, at least initially
+ # and since traitArray is already sorted with most interesting
+ # subsets first, we simply pick up the first 500 or so traits
+ # that we find
+ if params["displaySets"] == 0:
+ selectedTraits = 0
+ for j in range(len(results)):
+ #print "Scanning subarray %d" % j
+ if selectedTraits <= 200:
+ params["displaySets"] += pow(2, j)
+ selectedTraits += len(results[j][1])
+
+ traitList = []
+ for oneTrait in traits: # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
+ traitName = oneTrait.dbName+'::'+oneTrait.name # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
+ aTrait = webqtlTrait(cursor=self.cursor, fullname=traitName)
+ traitList.append(aTrait)
+
+ # and generate some output
+ txtOutputFilename = tempfile.mktemp()
+ txtOutputHandle = open(txtOutputFilename, "w")
+ txtOutput = TraitCorrelationText(params, traits, results)
+ txtOutputHandle.write(str(txtOutput))
+ txtOutputHandle.close()
+ txtOutputFilename = os.path.split(txtOutputFilename)[1]
+
+ self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList,
+ results, inbredSetName,
+ txtOutputFilename).dict['body']
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
new file mode 100755
index 00000000..ff1f8119
--- /dev/null
+++ b/web/webqtl/compareCorrelates/trait.py
@@ -0,0 +1,1074 @@
+#Trait.py
+#
+#--Individual functions are already annotated, more or less.
+#
+#Classes:
+#RawPoint
+#Trait
+#ProbeSetTrait
+#GenotypeTrait
+#PublishTrait
+#TempTrait
+#-KA
+
+# trait.py: a data structure to represent a trait
+import time
+import string
+
+CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
+
+# RawPoint: to store information about the relationship between two particular
+# traits
+# RawPoint represents directly the input file
+class RawPoint:
+ def __init__(self, i, j):
+ self.i = i
+ self.j = j
+
+ def __eq__(self, other):
+ return (self.i == other.i and
+ self.j == other.j and
+ self.spearman == other.spearman and
+ self.pearson == other.pearson)
+
+ def __str__(self):
+ return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
+
+def tdEscapeList(cols, align="left"):
+ """
+ A helper function used by tableRow
+ in Trait that will convert a list of strings into a set of
+ table cells enclosed by <td>%s</td> tags
+ """
+ html = ""
+ for col in cols:
+ html += '<td style="text-align: %s">%s</td>' % (align, col)
+ return html
+
+def thEscapeList(cols):
+ """
+ A helper function used by tableRowHeader
+ in Trait that will convert a list of strings into a set of
+ table cells enclosed by <td>%s</td> tags
+ """
+ html = ""
+ for col in cols:
+ html += "<th>%s</th>" % col
+ return html
+
+def commaEscapeList(cols):
+ """
+ A helper function used by csvHeader and csvRow.
+ Really it's just a wrapper for string.join
+ """
+ return '"' + string.join(cols, '","') + '"'
+
+
+class Trait:
+ """
+ A trait represents an attribute of an object. In the WebQTL database, traits are stored
+ as ProbeSets; that is, the average values of a set of probes are stored.
+ """
+ def __init__(self, id="", name="", description="", symbol="", href=""):
+ self.id = id
+ self.name = name
+ self.dbName = ""
+ self.symbol = symbol
+ self.href = href
+ self.strainData = {}
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Retrieve the dataId for trait data corresponding to the given database
+ The way to do this depends on the particular type of trait, so we leave implementation
+ to subclasses.
+ """
+ raise NotImplementedError
+
+ def populateStrainData(self, cursor):
+ """
+ Load this trait full of train data corresponding to the data id
+ The data id can either come from populateDataId
+ or can be set manually by the user of this class.
+ Xiaodong added: The way to do this depends on the particular type of trait,
+ so we leave implementation to subclasses.
+
+ """
+ raise NotImplementedError
+
+ def shortName(self):
+ """
+ To return a short name for this trait; this name should be
+ appropriate for a row or column title
+ """
+ return self.name
+
+ def nameNoDB(self):
+ """
+ To return only the short name without the database attached
+ """
+ strArray = self.shortName().split('::')
+
+ return strArray[1]
+
+ def datasetName(self):
+ """
+ To return only the name of the dataset
+ """
+ strArray = self.shortName().split('::')
+
+ return strArray[0].strip()
+
+ def longName(self):
+ """
+ To return a long name for this trait; this name should be
+ appropriate for a key to a table
+ """
+ return self.shortName()
+
+ def __str__(self):
+ return self.shortName()
+
+ def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
+ """
+ tableRowHelper: (arrayof String) -. String
+ To generate a table row to represent this object, appending
+ the additional information in beforeCols and afterCols
+ to the beginning and the end
+ """
+ thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
+ self.name)
+ html = '<tr bgcolor="%s">' % color
+ html += tdEscapeList(beforeCols + thisRow)
+ html += tdEscapeList(afterCols, align="right")
+ html += "</tr>"
+
+ return html
+
+
+ def header(self):
+ """
+ header: (listof String)
+ To generate a list of strings describing each piece of data
+ returned by row
+ """
+ raise NotImplementedError
+
+ def row(self):
+ """
+ row: (listof String)
+ To generate a list of strings describing this object. The
+ elements of this list should be described by header()
+ """
+ raise NotImplementedError
+
+ def tableRowHeader(self, beforeCols, afterCols, color):
+ """
+ tableRowHeader: (arrayof String) -> (arrayof String) -> String
+ To generate a table row header to represent this object,
+ appending the additional information in beforeCols and
+ afterCols to the beginning and end
+ """
+ html = '<tr bgcolor="%s">' % color
+ html += thEscapeList(beforeCols + self.header() +
+ afterCols)
+ html += "</tr>"
+ return html
+
+ def csvHeader(self, beforeCols, afterCols):
+ return commaEscapeList(beforeCols + self.header() + afterCols)
+
+ def csvRow(self, beforeCols, afterCols):
+ return commaEscapeList(beforeCols + self.row() + afterCols)
+
+
+ def traitInfoLink(self):
+ """
+ To build a trait info link to show information about this
+ trait. We assume that the database attribute is properly set
+ on the hidden form on the page where this link will go.
+ """
+ return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
+
+# ProbeSetTrait: a trait with data from a probeset
+class ProbeSetTrait(Trait):
+ def __init__(self, id="", name="", description="", symbol="", href="",
+ chromosome="", MB="", GeneId=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.description = description
+ self.symbol = symbol
+ self.chromosome = chromosome
+ self.MB = MB
+ self.GeneId = GeneId
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Look up the data id for this trait given which
+ freeze it came from.
+ """
+ cursor.execute('''
+ SELECT
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSetXRef
+ WHERE
+ ProbeSetId = %s AND
+ ProbeSetFreezeId = %s
+ ''' % (self.id, freezeId))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ ProbeSetData.StrainId,
+ ProbeSetData.value
+ FROM
+ ProbeSetData
+ WHERE
+ ProbeSetData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+
+ def shortName(self):
+ """
+ An improved string method that uses the gene symbol where
+ we have it
+ """
+ if self.symbol != "":
+ return self.symbol
+ else:
+ return Trait.shortName(self)
+
+ def longName(self):
+ """
+ We use several bits of genetic information to give
+ useful information about this trait and where it is
+ """
+ if self.chromosome != "":
+ chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
+ self.chromosome,
+ self.MB)
+ else:
+ chrPart = ""
+
+ return "%s%s: %s" % (self.name, chrPart, self.description)
+
+ def header(self):
+ return ["Name", "Symbol", "Description",
+ "Chr", "Position (Mb)"]
+
+ def row(self):
+ if type(self.MB) is float:
+ MB = "%.2f" % self.MB
+ else:
+ MB = ""
+
+ return [self.name, self.symbol, self.description,
+ self.chromosome, MB]
+
+ def tableRow(self, beforeCols, afterCols, color):
+ """
+ tableRow: (arrayof String) -> (arrayof String) -> String
+ To generate a table row to represent this object, appending
+ the additional information in beforeCols and afterCols to the
+ beginning and end
+ """
+ thisRow = self.row()
+
+ # trim description
+ if len(thisRow[2]) > 20:
+ thisRow[2] = thisRow[2][:20] + "..."
+
+ # add NCBI info link
+ thisRow[1] = self.ncbiInfoLink()
+
+ return self.tableRowHelper(beforeCols, afterCols, color,
+ thisRow)
+
+
+ def ncbiInfoLink(self):
+ """
+ ncbiInfoLink :: String
+ To generate an NCBI info link for this trait. If we have a GeneId,
+ then we can go straight to the gene. If not, then we generate a search
+ link based on the gene symbol. If we have none of them, then we don't
+ generate a link at all.
+ """
+ if self.GeneId != "":
+ cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
+ elif self.symbol != "":
+ cmd = "cmd=Search&term=%s" % self.symbol
+ else:
+ return ""
+
+ return '''
+ <a target="_new"
+ href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
+ %s</a> ''' % (cmd, self.symbol)
+
+
+# GenotypeTrait: a trait with data from the genotype
+class GenotypeTrait(Trait):
+ def __init__(self, id="", name="", href="", chromosome="", MB=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.chromosome = chromosome
+ self.MB = MB
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Look up the data id for this trait from the
+ genotype.
+ """
+ cursor.execute('''
+ SELECT
+ GenoXRef.DataId
+ FROM
+ GenoXRef
+ WHERE
+ GenoId = %s AND
+ GenoFreezeId = %s
+ ''' % (self.id, freezeId))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ GenoData.StrainId,
+ GenoData.value
+ FROM
+ GenoData
+ WHERE
+ GenoData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+ def header(self):
+ return ["Locus", "Chr", "Position (Mb)"]
+
+ def row(self):
+ return [self.name, self.chromosome, "%.3f" % self.MB]
+
+ def tableRow(self, beforeCols, afterCols, color):
+ return self.tableRowHelper(beforeCols, afterCols, color, self.row())
+
+# PublishTrait: a trait with data from publications
+class PublishTrait(Trait):
+ def __init__(self, id="", name="", href="", authors="", title="",
+ phenotype="", year=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.authors = authors
+ self.title = title
+ self.phenotype = phenotype
+ self.year = year
+
+ def populateDataId(self, cursor, freezeId):
+ """
+ Look up the data id for this trait from the
+ published set. For the moment, we assume that there's
+ only one publish freeze.
+ """
+ cursor.execute('''
+ SELECT
+ PublishXRef.DataId
+ FROM
+ PublishXRef, PublishFreeze
+ WHERE
+ PublishFreeze.Id = %s AND
+ PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+ PublishXRef.Id = %s
+ ''' % (freezeId, self.id))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ PublishData.StrainId,
+ PublishData.value
+ FROM
+ PublishData
+ WHERE
+ PublishData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+
+ def longName(self):
+ """
+ A more intelligent string function that uses
+ information about the publication from which this trait came
+ """
+ return "%s: %s by %s" % (self.name, self.title, self.authors)
+
+ def header(self):
+ return ["Record", "Phenotype", "Authors", "Year", "URL"]
+
+ def row(self):
+ return [self.name,
+ self.phenotype,
+ self.authors,
+ str(self.year),
+ ""]
+
+ def tableRow(self, beforeCols, afterCols, color):
+ """
+ tableRow: (arrayof String) -> (arrayof String) -> String
+ To generate a table row to represent this object, appending
+ the additional information in beforeCols and afterCols to the
+ beginning and end
+ """
+ thisRow = self.row()
+
+ # for multiple authors, use "et. al" after first two
+ authors = thisRow[2].split(",")
+ if len(authors) > 2:
+ thisRow[2] = string.join(authors[:2], ",") + ", et al"
+
+ # clip phenotype to 20 chars
+ if len(thisRow[1]) > 20:
+ thisRow[1] = thisRow[1][:20] + "..."
+
+ # add Pub Med URL
+ thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
+
+ return self.tableRowHelper(beforeCols, afterCols, color,
+ thisRow)
+
+
+# TempTrait: a trait with data generate by user and stored in temp table
+class TempTrait(Trait):
+ def __init__(self, id="", name="", href="", description=""):
+ Trait.__init__(self, id=id, name=name, href=href)
+ self.description = description
+
+ def populateDataId(self, cursor, freeezeId):
+ """
+ Look up the data id for this trait from the Temp table, freezeId isn't used,
+ it just for fixing the inherit
+ """
+ cursor.execute('''
+ SELECT
+ DataId
+ FROM
+ Temp
+ WHERE
+ Id=%s
+ ''' % (self.id))
+
+ # we hope that there's only one record here
+ row = cursor.fetchone()
+ self.dataId = row[0]
+
+ #XZ, 03/03/2009: Xiaodong implemented this fuction
+ def populateStrainData(self, cursor):
+ cursor.execute('''
+ SELECT
+ TempData.StrainId,
+ TempData.value
+ FROM
+ TempData
+ WHERE
+ TempData.Id = %s''' % self.dataId)
+ for row in cursor.fetchall():
+ self.strainData[int(row[0])] = float(row[1])
+
+
+ def row(self):
+ return [self.id,
+ self.name,
+ self.description,
+ ""]
+
+
+ def longName(self):
+ """
+ For temp trait, the description always contents whole useful information
+ """
+ return self.description
+
+
+# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
+def queryGenotypeTraitByName(cursor, speciesId, name):
+ qry = '''
+ SELECT
+ Geno.Id,
+ Geno.Name,
+ Geno.Chr,
+ Geno.Mb
+ FROM
+ Geno
+ WHERE
+ Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
+
+ cursor.execute(qry)
+ row = cursor.fetchone()
+ return GenotypeTrait(id=row[0], name=row[1],
+ chromosome=row[2], MB=row[3])
+
+# queryPublishTraitByName : Cursor -> string -> PublishTrait
+def queryPublishTraitByName(cursor, freezeId, name):
+ qry = '''
+ SELECT
+ PublishXRef.Id,
+ Phenotype.Id,
+ Publication.Authors,
+ Publication.Title,
+ Publication.Year,
+ Publication.PubMed_ID
+ FROM
+ Publication, PublishXRef, Phenotype, PublishFreeze
+ WHERE
+ PublishFreeze.Id = %s AND
+ PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+ PublishXRef.Id = %s AND
+ PublishXRef.PublicationId = Publication.Id AND
+ PublishXRef.PhenotypeId = Phenotype.Id
+ ''' % (freezeId, name)
+
+ cursor.execute(qry)
+ if cursor.rowcount == 0:
+ return None
+ else:
+ row = cursor.fetchone()
+
+ return PublishTrait(id=row[0], name='%s'%row[0],
+ authors=row[2], title=row[3],
+ year=row[4], href=row[5])
+
+
+def queryTempTraitByName(cursor, name):
+ name=name.strip()
+ qry = '''
+ SELECT
+ Temp.Id,
+ Temp.Name,
+ Temp.description
+ FROM
+ Temp
+ WHERE
+ Temp.Name= "%s"
+ ''' % (name)
+
+ cursor.execute(qry)
+ if cursor.rowcount == 0:
+ return None
+ else:
+ row = cursor.fetchone()
+ return TempTrait(id=row[0], name=row[1], description=row[2], href='')
+
+# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
+# to retrieve an entire probeset fully populated with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+#XZ, 03/04/2009: It seems to me that this function is never be executed.
+#XZ: Although it can be called from multitrait.loadDatabase,
+#XZ: but the loadDatabase function will not be called
+#XZ: if the targetDatabaseType is probeset.
+#XZ: The probeset traits of target database are retrieved by execute
+#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
+def queryPopulatedProbeSetTraits(cursor, freezeId):
+ step1 = time.time()
+ traits = queryProbeSetTraits(cursor, freezeId)
+ traitDict = {}
+ for trait in traits:
+ traitDict[trait.id] = trait
+
+ step2 = time.time()
+ print
+ #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+ cursor.execute('''
+ SELECT
+ ProbeSetXRef.ProbeSetId,
+ ProbeSetData.StrainId,
+ ProbeSetData.value
+ FROM
+ ProbeSetXRef
+ Left Join ProbeSetData ON
+ ProbeSetXRef.DataId = ProbeSetData.Id
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = %s
+ ''' % freezeId)
+
+ step3 = time.time()
+ totalrows = 0
+ somerows = cursor.fetchmany(1000)
+ while len(somerows) > 0:
+ totalrows += len(somerows)
+ for row in somerows:
+ # this line of code can execute more than one million times
+ traitDict[row[0]].strainData[int(row[1])] = row[2]
+ somerows = cursor.fetchmany(1000)
+
+ #cursor.close()
+ step4 = time.time()
+
+ time1 = step2 - step1
+ time2 = step3 - step2
+ time3 = step4 - step3
+ time4 = step4 - step1
+ #print "%f %f %f %f %d rows" % (round(time1, 2),
+ # round(time2, 2),
+ # round(time3, 2),
+ # round(time4, 2),
+ # totalrows)
+ #print "Fetched %d traits" % len(traits)
+ return traits
+
+
+# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
+# to retrieve probeset fully populated whose ProbeSetId in a range
+# a special ProbeSetId with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+ step1 = time.time()
+ traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
+ traitDict = {}
+ for trait in traits:
+ traitDict[trait.id] = trait
+
+ step2 = time.time()
+ print
+ #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+ cursor.execute('''
+ SELECT
+ ProbeSetXRef.ProbeSetId,
+ ProbeSetData.StrainId,
+ ProbeSetData.value
+ FROM
+ ProbeSetXRef
+ Left Join ProbeSetData ON
+ ProbeSetXRef.DataId = ProbeSetData.Id
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = %s AND
+ ProbeSetXRef.ProbeSetId >= %s AND
+ ProbeSetXRef.ProbeSetId <= %s
+ ''' % (freezeId, ProbeSetId1, ProbeSetId2))
+
+ step3 = time.time()
+ totalrows = 0
+ somerows = cursor.fetchmany(1000)
+ while len(somerows) > 0:
+ totalrows += len(somerows)
+ for row in somerows:
+ # this line of code can execute more than one million times
+ traitDict[row[0]].strainData[int(row[1])] = row[2]
+ somerows = cursor.fetchmany(1000)
+
+ #cursor.close()
+ step4 = time.time()
+
+ time1 = step2 - step1
+ time2 = step3 - step2
+ time3 = step4 - step3
+ time4 = step4 - step1
+ #print "%f %f %f %f %d rows" % (round(time1, 2),
+ # round(time2, 2),
+ # round(time3, 2),
+ # round(time4, 2),
+ # totalrows)
+ #print "Fetched %d traits" % len(traits)
+ return traits
+
+
+# def noneFilter : string or none -> string
+# to replace a possible None by an empty string
+def noneFilter(x):
+ if x is None:
+ return ""
+ else:
+ return x
+
+# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits(cursor, freezeId):
+ """
+ To locate all of the traits in a particular probeset
+ """
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ ProbeSet.Chr,
+ ProbeSet.Mb,
+ ProbeSet.GeneId,
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSet,
+ ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = %s
+ ORDER BY ProbeSet.Id
+ ''' % freezeId
+
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+
+ for row in rows:
+ row = map(noneFilter, row)
+ trait = ProbeSetTrait(id=row[0], name=row[1],
+ description=row[2],
+ chromosome=row[4],
+ MB=row[5],
+ symbol=row[3],
+ GeneId=row[6])
+ trait.dataId = row[7]
+ traits.append(trait)
+
+ return traits
+
+
+# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+ """
+ To locate all of the traits in a particular probeset
+ """
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ ProbeSet.Chr,
+ ProbeSet.Mb,
+ ProbeSet.GeneId,
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSet,
+ ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = %s AND
+ ProbeSet.Id >= %s AND
+ ProbeSet.Id <= %s
+ ORDER BY ProbeSet.Id
+ ''' % (freezeId, ProbeSetId1, ProbeSetId2)
+
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+
+ for row in rows:
+ row = map(noneFilter, row)
+ trait = ProbeSetTrait(id=row[0], name=row[1],
+ description=row[2],
+ chromosome=row[4],
+ MB=row[5],
+ symbol=row[3],
+ GeneId=row[6])
+ trait.dataId = row[7]
+ traits.append(trait)
+
+ return traits
+
+
+# queryPublishTraits : Cursor -> arrayof Trait
+def queryPublishTraits(cursor, freezeId):
+ """
+ To locate all published traits
+ """
+ qry = '''
+ SELECT
+ Publication.Id,
+ Publication.Name,
+ Publication.PhenoType,
+ Publication.Authors,
+ Publication.Title,
+ Publication.Year,
+ Publication.PubMed_ID,
+ PublishXRef.DataId
+ FROM
+ Publication,
+ PublishXRef
+ WHERE
+ PublishXRef.PublishFreezeId = %s AND
+ PublishXRef.PublishId = Publication.Id
+ ''' % freezeId
+
+ qry = '''
+ SELECT
+ Publication.Id,
+ PublishXRef.Id,
+ Phenotype.Pre_publication_description,
+ Phenotype.Post_publication_description,
+ Publication.Authors,
+ Publication.Title,
+ Publication.Year,
+ Publication.PubMed_ID,
+ PublishXRef.DataId
+ FROM
+ Publication, PublishXRef, Phenotype, PublishFreeze
+ WHERE
+ PublishFreeze.Id = %s AND
+ PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+ PublishXRef.PublicationId = Publication.Id AND
+ PublishXRef.PhenotypeId = Phenotype.Id
+ ''' % freezeId
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+ for row in rows:
+ PhenotypeString = row[3]
+ if not row[7] and row[2]:
+ PhenotypeString = row[2]
+ trait = PublishTrait(id=row[0], name= '%s' %row[1],
+ phenotype=PhenotypeString,
+ authors=row[4],
+ title=row[5],
+ year=row[6],
+ href=row[7])
+ trait.dataId = row[8]
+ traits.append(trait)
+
+ return traits
+
+# queryGenotypeTraits : Cursor -> arrayof Trait
+def queryGenotypeTraits(cursor, freezeId):
+ """
+ To locate all traits in the genotype
+ """
+ qry = '''
+ SELECT
+ Geno.Id,
+ Geno.Name,
+ Geno.Chr,
+ GenoXRef.DataId,
+ Geno.Mb
+ FROM
+ Geno,
+ GenoXRef
+ WHERE
+ GenoXRef.GenoId = Geno.Id
+ AND GenoXRef.GenoFreezeId = %s
+ ''' % freezeId
+ cursor.execute(qry)
+ rows = cursor.fetchall()
+ traits = []
+
+ for row in rows:
+ trait = GenotypeTrait(id=row[0], name=row[1],
+ chromosome=row[2], MB=row[4])
+ trait.dataId = row[3]
+ traits.append(trait)
+
+ return traits
+
+# queryProbeSetTraitByName : Cursor -> string -> Trait
+# to find a particular trait given its name
+def queryProbeSetTraitByName(cursor, name):
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSet.symbol,
+ ProbeSet.Chr,
+ ProbeSet.Mb,
+ ProbeSet.GeneId
+ FROM
+ ProbeSet
+ WHERE
+ ProbeSet.Name = "%s"
+ ''' % name
+ #print qry
+ cursor.execute(qry)
+ row = cursor.fetchone()
+
+ # convert a MySQL NULL value to an empty string
+ # for gene symbol
+ if row[3] is None:
+ sym = ""
+ else:
+ sym = row[3]
+
+ return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
+ symbol=sym, chromosome=row[4], MB=row[5],
+ GeneId=row[6])
+
+
+# queryTraits : Cursor -> string -> string -> arrayof Traits
+# to find all of the traits whose descriptions match a certain string in a
+# particular database
+def queryTraits(cursor, dbId, queryString):
+ # we do this in two steps:
+ # first we get the data id for the matching traits
+ qry = '''
+ SELECT
+ ProbeSet.Id,
+ ProbeSet.Name,
+ ProbeSet.description,
+ ProbeSetXRef.DataId
+ FROM
+ ProbeSet,
+ ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = %s AND
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
+ ProbeSet.description LIKE "%%%s%%"
+ ''' % (dbId, queryString)
+ # print qry
+ cursor.execute(qry)
+
+ if cursor.rowcount == 0:
+ print "No traits found"
+ return []
+ else:
+ print "%s traits found" % (cursor.rowcount)
+
+ # maybe fetchall is bad; we will see
+ traits = []
+ for row in cursor.fetchall():
+ myTrait = Trait(row[0], row[1], row[2])
+ myTrait.dataId = row[3]
+ traits.append(myTrait)
+
+ # second we pull all of the strain data for each trait
+ print "Retrieving individual trait data..."
+ for trait in traits:
+ trait.populateStrainData(cursor, trait.dataId)
+ print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
+
+ print "done"
+ return traits
+
+# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
+# to return the short and long name for each ProbeSetFreeze
+# this function is designed specifically for building
+# a database selector
+def queryProbeSetFreezes(cursor):
+ cursor.execute("""
+ SELECT
+ ProbeSetFreeze.Name,
+ ProbeSetFreeze.FullName
+ FROM
+ ProbeSetFreeze
+ ORDER BY
+ ProbeSetFreeze.Name
+ """)
+
+ # for now, fetchall returns the data as a list of tuples
+ # which is what we want
+ return list(cursor.fetchall())
+
+# queryProbeSetFreezeIdName: Cursor -> String -> String, String
+# this function returns the
+# id and the long name of a probesetfreeze given its name
+# once again, it's designed specifically for building
+# the database selector
+def queryProbeSetFreezeIdName(cursor, name):
+ qry = ('''
+ SELECT
+ ProbeSetFreeze.Id,
+ ProbeSetFreeze.FullName
+ FROM
+ ProbeSetFreeze
+ WHERE
+ ProbeSetFreeze.Name = "%s"
+ ''' % name)
+ #print qry
+ cursor.execute(qry)
+
+ row = cursor.fetchone()
+ return row
+
+# queryProbeSetFreezeName: Cursor -> String -> String
+# to return the name of a probe set freeze given its id
+def queryProbeSetFreezeName(cursor, id):
+ cursor.execute('''
+ SELECT
+ ProbeSetFreeze.FullName
+ FROM
+ ProbeSetFreeze
+ WHERE
+ ProbeSetFreeze.Id = %s
+ ''' % id)
+
+ row = cursor.fetchone()
+ return row[0]
+
+# dbNameToTypeId : Cursor -> String -> (String, String)
+# to convert a database name to a (type, id) pair
+def dbNameToTypeId(cursor, name):
+ types = ["ProbeSet", "Geno", "Publish"]
+ for type_ in types:
+ count = cursor.execute('''
+ SELECT
+ %sFreeze.Id
+ FROM
+ %sFreeze
+ WHERE
+ Name = "%s"
+ ''' % (type_, type_, name))
+
+ if count != 0:
+ id = cursor.fetchone()[0]
+ return type_, id
+
+ return None, None
+
+# dbTypeIdToName : Cursor -> String -> String -> String
+# to convert a database (type,id) pair into a name
+def dbTypeIdToName(cursor, dbType, dbId):
+ cursor.execute('''
+ SELECT
+ %sFreeze.Name
+ FROM
+ %sFreeze
+ WHERE
+ Id = %s
+ ''' % (dbType, dbType, dbId))
+
+ row = cursor.fetchone()
+ return row[0]
+
+#XZ, July 21, 2010: I add this function
+def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
+ cursor.execute('''
+ SELECT
+ SpeciesId
+ FROM
+ InbredSet, %sFreeze
+ WHERE
+ %sFreeze.Id = %s
+ and InbredSetId = InbredSet.Id
+ ''' % (dbType, dbType, dbId))
+
+ row = cursor.fetchone()
+ return row[0]
+
+
+# queryStrainCount : Cursor -> int
+# return the number of strains in the database
+def queryStrainCount(cursor):
+ cursor.execute('''
+ SELECT
+ Max(Strain.Id)
+ FROM
+ Strain
+ ''')
+ return (cursor.fetchone())[0]