about summary refs log tree commit diff
path: root/web/webqtl/compareCorrelates
diff options
context:
space:
mode:
authorroot2012-05-08 18:39:56 -0500
committerroot2012-05-08 18:39:56 -0500
commitea46f42ee640928b92947bfb204c41a482d80937 (patch)
tree9b27a4eb852d12539b543c3efee9d2a47ef470f3 /web/webqtl/compareCorrelates
parent056b5253fc3857b0444382aa39944f6344dc1ceb (diff)
downloadgenenetwork2-ea46f42ee640928b92947bfb204c41a482d80937.tar.gz
Add all the source codes into the github.
Diffstat (limited to 'web/webqtl/compareCorrelates')
-rwxr-xr-xweb/webqtl/compareCorrelates/MultipleCorrelationPage.py108
-rwxr-xr-xweb/webqtl/compareCorrelates/__init__.py0
-rwxr-xr-xweb/webqtl/compareCorrelates/correlation.py359
-rwxr-xr-xweb/webqtl/compareCorrelates/htmlModule.py279
-rwxr-xr-xweb/webqtl/compareCorrelates/multitrait.py1121
-rwxr-xr-xweb/webqtl/compareCorrelates/trait.py1074
6 files changed, 2941 insertions, 0 deletions
diff --git a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
new file mode 100755
index 00000000..6a464ab6
--- /dev/null
+++ b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
@@ -0,0 +1,108 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+from base.templatePage import templatePage
+from utility import webqtlUtil
+from base.webqtlTrait import webqtlTrait
+from base import webqtlConfig
+import multitrait
+
+# XZ, 09/09/2008: After adding several traits to collection, click "Compare Correlates" button,
+# XZ, 09/09/2008: This class will generate what you see.
+# XZ, 09/09/2008: This class just collect the input, then pass them to multitrait.py
+#########################################
+#     Multiple Correlation Page
+#########################################
+class MultipleCorrelationPage(templatePage):
+
+        def __init__(self,fd):
+
+                templatePage.__init__(self, fd)
+
+                if not self.openMysql():
+                        return
+                if not fd.genotype:
+                        fd.readData()
+
+                self.searchResult = fd.formdata.getvalue('searchResult')
+                if not self.searchResult:
+                        heading = 'Compare Correlates'
+                        detail = ['You need to select at least two traits in order to generate correlation matrix.']
+                        self.error(heading=heading,detail=detail)
+                        print 'Content-type: text/html\n'
+                        self.write()
+                        return
+                if type("1") == type(self.searchResult):
+                        self.searchResult = [self.searchResult]
+
+                if self.searchResult:
+                        if len(self.searchResult) > 100:
+                                heading = 'Compare Correlates'
+                                detail = ['In order to display Compare Correlates properly, Do not select more than %d traits for Compare Correlates.' % 100]
+                                self.error(heading=heading,detail=detail)
+                                print 'Content-type: text/html\n'
+                                self.write()
+                                return
+                        else:
+                                pass
+
+                        traitList = []
+                        for item in self.searchResult:
+                                thisTrait = webqtlTrait(fullname=item, cursor=self.cursor)
+                                thisTrait.retrieveInfo()
+                                traitList.append(thisTrait)
+                else:
+                        heading = 'Compare Correlates'
+                        detail = [HT.Font('Error : ',color='red'),HT.Font('Error occurs while retrieving data from database.',color='black')]
+                        self.error(heading=heading,detail=detail)
+                        print 'Content-type: text/html\n'
+                        self.write()
+                        return
+
+
+                ##########
+                filename= webqtlUtil.genRandStr("mult_")
+                fp = open(webqtlConfig.IMGDIR+filename, 'wb')
+                fp.write('%s\n' % fd.RISet)
+                for thisTrait in traitList:
+                        fp.write("%s,%s,%s\n" % (thisTrait.db.type,thisTrait.db.id,thisTrait.name))
+                fp.close()
+                fd.formdata["filename"] = filename
+
+                params = {"filename":filename, "targetDatabase":"",
+                        "threshold":0.5, "subsetSize":10,
+                        "correlation":"pearson", "subsetCount":10,
+                        "firstRun":"1"}
+                results = []
+                txtOutputFileName = ""
+
+                self.dict['body'] = multitrait.TraitCorrelationPage(fd, params, self.cursor, traitList, results, 
+                                                        fd.RISet,txtOutputFileName).dict['body']
+                self.dict['title'] = 'Compare Correlates'
+            
+
+
+
diff --git a/web/webqtl/compareCorrelates/__init__.py b/web/webqtl/compareCorrelates/__init__.py
new file mode 100755
index 00000000..e69de29b
--- /dev/null
+++ b/web/webqtl/compareCorrelates/__init__.py
diff --git a/web/webqtl/compareCorrelates/correlation.py b/web/webqtl/compareCorrelates/correlation.py
new file mode 100755
index 00000000..f2ea55b3
--- /dev/null
+++ b/web/webqtl/compareCorrelates/correlation.py
@@ -0,0 +1,359 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+# correlation.py
+# functions for computing correlations for traits
+#
+# Originally, this code was designed to compute Pearson product-moment
+# coefficents. The basic function calcPearson scans the strain data
+# for the two traits and drops data for a strain unless both traits have it.
+# If there are less than six strains left, we conclude that there's
+# insufficent data and drop the correlation.
+#
+# In addition, this code can compute Spearman rank-order coefficents using
+# the calcSpearman function.
+
+#Xiaodong changed the dependancy structure
+import numarray
+import numarray.ma as MA
+import time
+
+import trait
+
+# strainDataUnion : StrainData -> StrainData -> array, array
+def strainDataUnion(s1, s2):
+    # build lists of values that both have
+    # and make sure that both sets of values are in the same order
+    s1p = []
+    s2p = []
+    sortedKeys = s1.keys()
+    sortedKeys.sort()
+    for s in sortedKeys:
+        if s2.has_key(s):
+            s1p.append(s1[s])
+            s2p.append(s2[s])
+
+    return (numarray.array(s1p, numarray.Float64),
+            numarray.array(s2p, numarray.Float64))
+
+# calcCorrelationHelper : array -> array -> float
+def calcCorrelationHelper(s1p, s2p):
+    # if the traits share less than six strains, then we don't
+    # bother with the correlations
+    if len(s1p) < 6:
+        return 0.0
+    
+    # subtract by x-bar and y-bar elementwise
+    #oldS1P = s1p.copy()
+    #oldS2P = s2p.copy()
+    
+    s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64)
+    s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64)
+
+    # square for the variances 
+    s1p_2 = numarray.sum(s1p**2)
+    s2p_2 = numarray.sum(s2p**2)
+
+    try: 
+        corr = (numarray.sum(s1p*s2p)/
+                numarray.sqrt(s1p_2 * s2p_2))
+    except ZeroDivisionError:
+        corr = 0.0
+
+    return corr
+    
+# calcSpearman : Trait -> Trait -> float
+def calcSpearman(trait1, trait2):
+    s1p, s2p = strainDataUnion(trait1.strainData,
+                               trait2.strainData)
+    s1p = rankArray(s1p)
+    s2p = rankArray(s2p)
+    return calcCorrelationHelper(s1p, s2p)
+
+# calcPearson : Trait -> Trait -> float
+def calcPearson(trait1, trait2):
+    # build lists of values that both have
+    # and make sure that both sets of values are in the same order
+    s1p, s2p = strainDataUnion(trait1.strainData,
+                               trait2.strainData)
+
+    return calcCorrelationHelper(s1p, s2p)
+
+# buildPearsonCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
+#def buildPearsonCorrelationMatrix(traits, sc):
+#    dim = (len(traits), sc)
+#    matrix = numarray.zeros(dim, MA.Float64)
+#    testMatrix = numarray.zeros(dim, MA.Float64)
+
+#    for i in range(len(traits)):
+#        sd = traits[i].strainData
+#        for key in sd.keys():
+#            matrix[i,int(key) - 1] = sd[key]
+#            testMatrix[i,int(key) - 1] = 1
+
+def buildPearsonCorrelationMatrix(traits, commonStrains):
+    dim = (len(traits), len(commonStrains))
+    matrix = numarray.zeros(dim, MA.Float64)
+    testMatrix = numarray.zeros(dim, MA.Float64)
+
+    for i in range(len(traits)):
+        sd = traits[i].strainData
+        keys = sd.keys()
+        for j in range(0, len(commonStrains)):
+            if keys.__contains__(commonStrains[j]):
+                matrix[i,j] = sd[commonStrains[j]]
+                testMatrix[i,j] = 1
+
+    return matrix, testMatrix
+
+# buildSpearmanCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
+def buildSpearmanCorrelationMatrix(traits, sc):
+    dim = (len(traits), sc)
+    matrix = numarray.zeros(dim, MA.Float64)
+    testMatrix = numarray.zeros(dim, MA.Float64)
+
+    def customCmp(a, b):
+        return cmp(a[1], b[1])
+    
+    for i in range(len(traits)):
+        # copy strain data to a temporary list and turn it into
+        # (strain, expression) pairs
+        sd = traits[i].strainData
+        tempList = []
+        for key in sd.keys():
+            tempList.append((key, sd[key]))
+
+        # sort the temporary list by expression
+        tempList.sort(customCmp)
+        
+        for j in range(len(tempList)):
+            # k is the strain id minus 1
+            # 1-based strain id -> 0-based column index
+            k = int(tempList[j][0]) - 1
+
+            # j is the rank of the particular strain
+            matrix[i,k] = j
+
+            testMatrix[i,k] = 1
+
+    return matrix, testMatrix
+            
+def findLargestStrain(traits, sc):
+    strainMaxes = []
+    for i in range(len(traits)):
+        keys = traits[i].strainData.keys()
+        strainMaxes.append(max(keys))
+
+    return max(strainMaxes)
+
+def findCommonStrains(traits1, traits2):
+    commonStrains = []
+    strains1 = []
+    strains2 = []
+
+    for trait in traits1:
+        keys = trait.strainData.keys()
+        for key in keys:
+            if not strains1.__contains__(key):
+                strains1.append(key)
+
+    for trait in traits2:
+        keys = trait.strainData.keys()
+        for key in keys:
+            if not strains2.__contains__(key):
+                strains2.append(key)
+ 
+    for strain in strains1:
+        if strains2.__contains__(strain):
+           commonStrains.append(strain)
+
+    return commonStrains
+
+def calcPearsonMatrix(traits1, traits2, sc, strainThreshold=6,
+                      verbose = 0):
+    return calcMatrixHelper(buildPearsonCorrelationMatrix,
+                            traits1, traits2, sc, strainThreshold,
+                            verbose)
+
+def calcProbeSetPearsonMatrix(cursor, freezeId, traits2, strainThreshold=6,
+                      verbose = 0):
+
+    cursor.execute('select ProbeSetId from ProbeSetXRef where ProbeSetFreezeId = %s order by ProbeSetId' % freezeId)
+    ProbeSetIds = cursor.fetchall()
+
+    results = []
+    i=0
+    while i<len(ProbeSetIds):
+        ProbeSetId1 = ProbeSetIds[i][0]
+        if (i+4999) < len(ProbeSetIds):
+            ProbeSetId2 = ProbeSetIds[i+4999][0]
+        else:
+            ProbeSetId2 = ProbeSetIds[len(ProbeSetIds)-1][0]
+
+        traits1 = trait.queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2) # XZ,09/10/2008: add module name 'trait.'
+        SubMatrix = calcMatrixHelper(buildPearsonCorrelationMatrix,
+                                     traits1, traits2, 1000, strainThreshold,
+                                     verbose)
+        results.append(SubMatrix)
+        i += 5000
+
+    returnValue = numarray.zeros((len(ProbeSetIds), len(traits2)), MA.Float64)
+    row = 0
+    col = 0
+    for SubMatrix in results:
+        for i in range(0, len(SubMatrix)):
+            for j in range(0, len(traits2)):
+                returnValue[row,col] = SubMatrix[i,j]
+                col += 1
+            col = 0
+            row +=1
+
+    return returnValue
+
+    
+
+# note: this code DOES NOT WORK, especially in cases where
+# there are missing observations (e.g. when comparing traits
+# from different probesetfreezes)
+def calcSpearmanMatrix(traits1, traits2, sc, strainThreshold=6,
+                       verbose=0):
+    return calcMatrixHelper(buildSpearmanCorrelationMatrix,
+                            traits1, traits2, sc, strainThreshold,
+                            verbose)
+    
+def calcMatrixHelper(builder, traits1, traits2, sc, strainThreshold,
+                     verbose):
+
+    # intelligently figure out strain count
+    step0 = time.time()
+    #localSC = max(findLargestStrain(traits1, sc),
+    #              findLargestStrain(traits2, sc))
+
+    commonStrains = findCommonStrains(traits1, traits2)
+
+    buildStart = time.time()
+    matrix1, test1 = builder(traits1, commonStrains)
+    matrix2, test2 = builder(traits2, commonStrains)
+    buildTime = time.time() - buildStart
+
+    step1 = time.time()
+
+    ns = numarray.innerproduct(test1, test2)
+
+    # mask all ns less than strainThreshold so the correlation values
+    # end up masked
+    # ns is now a MaskedArray and so all ops involving ns will be
+    # MaskedArrays
+    ns = MA.masked_less(ns, strainThreshold, copy=0)
+        
+    # divide-by-zero errors are automatically masked
+    #ns = -1.0/ns
+
+    step2 = time.time()
+    
+    # see comment above to find out where this ridiculously cool
+    # matrix algebra comes from
+    xs = numarray.innerproduct(matrix1, test2)
+    ys = numarray.innerproduct(test1, matrix2)
+    xys = numarray.innerproduct(matrix1, matrix2)
+
+    # use in-place operations to try to speed things up
+    numarray.power(matrix1, 2, matrix1)
+    numarray.power(matrix2, 2, matrix2)
+
+    x2s = numarray.innerproduct(matrix1, test2)
+    y2s = numarray.innerproduct(test1, matrix2)
+
+    step3 = time.time()
+
+    # parens below are very important
+    # the instant we touch ns, arrays become masked and
+    # computation is much, much slower
+    top = ns*xys - (xs*ys)
+    bottom1 = ns*x2s - (xs*xs)
+    bottom2 = ns*y2s - (ys*ys)
+    bottom = MA.sqrt(bottom1*bottom2)
+
+    # mask catches floating point divide-by-zero problems here
+    corrs = top / bottom
+
+    step4 = time.time()
+
+    # we define undefined correlations as zero even though there
+    # is a mathematical distinction
+    returnValue = MA.filled(corrs, 0.0)
+
+    step5 = time.time()
+    
+    #print ("calcMatrixHelper: %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, total: %.2f s"
+    #       %(buildTime,
+    #         buildStart - step0,
+    #         step2 - step1,
+    #         step3 - step2,
+    #         step4 - step3,
+    #         step5 - step4,
+    #         step5 - step0))
+
+    if verbose:
+        print "Matrix 1:", matrix1
+        print "Matrix 2:", matrix2
+        print "Ns:", ns
+        print "Xs", xs
+        print "Ys", ys
+        print "XYs:", xys
+        print "Top:", top
+        print "Bottom 1:", bottom1
+        print "Bottom 2:", bottom2
+        print "Bottom:", bottom
+        print "Corrs:", corrsa
+
+        
+    return returnValue
+    
+    
+
+# rankArray: listof float -> listof float
+# to generate a companion list to alof with
+# the actual value of each element replaced by the
+# value's rank
+def rankArray(floatArray):
+    # first we save the original index of each element
+    tmpAlof = []
+    returnArray = numarray.zeros(len(floatArray), numarray.Float64)
+    i = 0
+    for i in range(len(floatArray)):
+        tmpAlof.append((i,floatArray[i]))
+
+    # now we sort by the data value
+    def customCmp(a,b): return cmp(a[1],b[1])
+    tmpAlof.sort(customCmp)
+
+    # finally we use the new rank data to populate the
+    # return array
+    for i in range(len(floatArray)):
+        returnArray[tmpAlof[i][0]] = i+1
+
+    return returnArray
diff --git a/web/webqtl/compareCorrelates/htmlModule.py b/web/webqtl/compareCorrelates/htmlModule.py
new file mode 100755
index 00000000..ebba3b86
--- /dev/null
+++ b/web/webqtl/compareCorrelates/htmlModule.py
@@ -0,0 +1,279 @@
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+import sys
+import string
+import os
+import MySQLdb
+import cgi
+
+from htmlgen import HTMLgen2 as HT
+
+from base import webqtlConfig
+
+
+# XZ 08/14/2008: When I tried to replace 'from webqtlConfig import *' with 'import webqtlConfig'
+# XZ 08/14/2008: I found some problems. I discussed with Hongqiang and the below is conclusion.
+# XZ 08/14/2008: The program uses webqtlConfig.DB_NAME, webqtlConfig.MYSQL_SERVER and so on
+# XZ 08/14/2008: without 'import webqtlConfig'. This program will not work.
+# XZ 08/14/2008: CONFIG_htmlpath doesn't exist in webqtlConfig.py
+# XZ 08/14/2008: Hongqian said this was done by Fan Zhang, and this program was not tested.
+# XZ 08/14/2008: So nobody realize these bugs.
+
+# XZ, 09/09/2008: This function is not called any where. 
+# XZ, 09/09/2008: Actually, I don't think this function works.
+def genHeaderFooter(i=1,title='',basehref='',js1='',js2='',layer='',body=''):
+	"""
+	generate footer and header HTML code
+	default is header
+	i = 0 is footer+header
+	i = 1 is header
+	i = 2 is footer	
+	"""
+	try:
+                temp_file = CONFIG_htmlpath + 'beta-template.html'
+		fp = open(temp_file, 'rb')
+		template = fp.read()
+		fp.close()
+		template = template % (title,basehref,js1,js2,layer,body, "")
+		header,footer = string.split(template,'<!-- split from Here -->')
+		if i == 0:
+			return header + footer
+		elif i == 1:
+			return header
+		elif i == 2:
+			return footer
+		else:
+			return ""
+	except:
+		if i == 0:
+			return "header + footer"
+		elif i == 1:
+			return "header"
+		elif i == 2:
+			return "footer"
+		else:
+			return ""
+
+# XZ, 09/09/2008: This function is only used in multitrait.py where it is called with value assigned to db.
+# XZ, 09/09/2008: So the try-except block is not executed.
+# XZ, 09/09/2008: This explains why no error was generated even without 'import webqtlConfig'
+def genDatabaseMenu(db = None, public =1, RISetgp = 'BXD', selectname = 'database', selected = ""):
+	"""
+	generate database Menu
+	public = 0 : search3.html databases Menu
+	public = 1 : search.html databases Menu
+	"""
+	if not db:
+		try:
+			# import MySQLdb
+			# con = MySQLdb.Connect(db='db_webqtl')
+			# Modified by Fan Zhang
+			con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD)
+			db = con.cursor()
+		except:
+			return "Connect MySQL Server Error"
+	else:
+		pass
+	
+	databaseMenu = HT.Select(name=selectname)
+	nmenu = 0
+
+	# here's a hack: bxd and bxd300 can be correlated against each other
+	# if either of those are the group, we put in special SQL that pulls both
+	if RISetgp in ("BXD", "BXD300"):
+		ibsNameQry = '(InbredSet.Name = "BXD" OR InbredSet.Name = "BXD300")'
+	else:
+		ibsNameQry = 'InbredSet.Name = "%s"' % RISetgp
+	
+	#Publish Database
+	db.execute('''
+		   SelecT
+		     PublishFreeze.FullName,
+		     PublishFreeze.Name
+		   from
+		     PublishFreeze,
+		     InbredSet
+		   where
+		     PublishFreeze.InbredSetId = InbredSet.Id and
+		     %s
+		   ''' % ibsNameQry)
+	for item in db.fetchall():
+		databaseMenu.append(item)
+		nmenu += 1
+	
+	#Genome Database
+	db.execute('''
+		   SelecT
+		     GenoFreeze.FullName,
+		     GenoFreeze.Name
+		   from
+		     GenoFreeze,InbredSet
+		   where
+		     GenoFreeze.InbredSetId = InbredSet.Id and
+		     %s
+		   ''' % ibsNameQry)
+	for item in db.fetchall():
+		databaseMenu.append(item)
+		nmenu += 1
+	
+	#Microarray Database
+	db.execute('SelecT Id, Name from Tissue')
+	for item in db.fetchall():
+		TId, TName = item
+		databaseMenuSub = HT.Optgroup(label = '%s ------' % TName)
+		db.execute('''
+			   SelecT
+			     ProbeSetFreeze.FullName,
+			     ProbeSetFreeze.Name
+			   from
+			     ProbeSetFreeze,
+			     ProbeFreeze,
+			     InbredSet
+			   where
+			     ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
+			     ProbeFreeze.TissueId = %d and
+			     ProbeSetFreeze.public > %d and
+			     ProbeFreeze.InbredSetId = InbredSet.Id and
+			     %s
+			   order by
+			     ProbeSetFreeze.CreateTime desc,
+			     ProbeSetFreeze.AvgId
+			   '''  % (TId,public,ibsNameQry))
+		for item2 in db.fetchall():
+			databaseMenuSub.append(item2)
+			nmenu += 1
+		databaseMenu.append(databaseMenuSub)
+	
+	if nmenu:
+		if selected:
+			databaseMenu.selected.append(selected)
+		return str(databaseMenu)
+	else:
+		return ''
+
+
+# XZ, 09/09/2008: This function is not called any where. 
+# XZ, 09/09/2008: Actually, I don't think this function works.
+# XZ, 09/09/2008: There is no 'DataForm' file now. It should be webqtlForm.py
+def genRISample():
+	import glob
+	import reaper
+	import random
+	import math
+	import webqtlUtil
+	risets = filter(lambda X:X.find('F2')<0, map(os.path.basename, glob.glob(os.path.join(CONFIG_genodir, "*.geno"))))
+	risets = map(lambda X:X.split('.')[0], risets)
+	risets.remove("BayXSha")
+	risets.sort()
+	body = HT.Blockquote()
+	NPerRow = 6
+	for item in risets:
+		values = []
+		if item == 'AXBXA': item2='AXB/BXA'
+		elif item == 'HXBBXH': item2='HXB/BXH'
+		else: item2=item
+		body.append(HT.Paragraph(item2, Class='subtitle'))
+		tbl = HT.TableLite(Class="collap")
+		dataset = reaper.Dataset()
+		dataset.read(os.path.join(CONFIG_genodir, "%s.geno"%item))
+		prgy = webqtlUtil.ParInfo[item] + list(dataset.prgy)
+		
+		mean = random.random()*100
+		variance = random.random()*500
+		variables = []
+		while len(variables) < len(prgy):
+			S = 2
+			while (S>=1):
+				U1= random.random()
+				U2= random.random()
+				V1= 2*U1-1.0
+				V2= 2*U2-1.0
+				S=V1*V1+V2*V2
+			X= math.sqrt(-2 *  math.log(S) / S) * V1
+			Y= math.sqrt(-2 *  math.log(S) / S) * V2	
+			variables.append(mean + math.sqrt(variance) * X)
+			variables.append(mean + math.sqrt(variance) * Y) 
+		
+		tempTR = HT.TR()
+		for i, strain in enumerate(prgy):
+			if i and i%NPerRow==0:
+				tbl.append(tempTR)
+				tempTR = HT.TR()
+			if random.random() < 0.2:
+				variable = 'X'
+			else:
+				variable = "%2.3f" % variables[i]
+			
+			tempTR.append(HT.TD(strain, Class="strains", width=80))
+			tempTR.append(HT.TD(variable, Class="values", width=60))
+			values.append(variable)
+		
+		for j in range(NPerRow-i%NPerRow-1):
+			tempTR.append(HT.TD())
+		tbl.append(tempTR)	
+		body.append(tbl)
+		body.append(HT.Paragraph("Copy the following line to paste into the GeneNetwork entry box:"))
+		body.append(HT.Code(string.join(values, " ")))
+		body.append(HT.HR(width="90%"))
+	return body
+		
+if __name__ == "__main__":
+	if os.environ.has_key('SCRIPT_FILENAME'):
+		script_filename = os.environ['SCRIPT_FILENAME']
+	else:
+		script_filename = ''
+	#Used as cgi script
+	if script_filename and script_filename[-2:] == 'py':
+		print 'Content-type: text/html\n'
+		formdata = cgi.FieldStorage()
+		sys.stderr = sys.stdout
+		try:
+			getID = string.lower(formdata.getvalue('get'))
+		except:
+			getID = ''
+	#Used as command
+	else:
+		if len(sys.argv) >= 2:
+			getID = string.lower(sys.argv[1])
+		else:
+			getID = ''
+	
+	if getID == 'headerfooter':
+		print genHeaderFooter(0)
+	elif getID == 'header':
+		print genHeaderFooter(1)
+	elif getID == 'footer':
+		print genHeaderFooter(2)
+	elif getID == 'databasemenu':
+		print genDatabaseMenu(public=0)
+	elif getID == 'datasample':
+		print genRISample()
+	else:
+		print genHeaderFooter(0)
+else:
+	pass
+
diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py
new file mode 100755
index 00000000..047620af
--- /dev/null
+++ b/web/webqtl/compareCorrelates/multitrait.py
@@ -0,0 +1,1121 @@
+# multitrait.py
+# a tool to analyze the correlations between several different traits and the traits
+# in a given dataset
+#
+# Parameters:
+# correlation -- either "pearson" or "spearman" depending on which ones we want to use 
+#
+# filename -- an input file containing the traits to analyze
+#
+# progress -- if set, this parameter outputs a static progress page
+# and uses a META redirect to trigger the real computation
+#
+# targetDatabaseType:
+# one of "ProbeSet", "Publish", "Genotype" depending on the type of database
+# we will use for the analysis
+#
+# targetDatabaseId:
+# the id (*Freeze.Id in the database) of the particular database we will analyze
+#
+# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider
+#
+# firstRun -- either 0 or 1
+# whether to automatically pick reasonable defaults for the other three parameters
+#
+# outputType -- either "html" or "text"
+# 
+# Author: Stephen Pitts
+# June 15, 2004
+
+#Xiaodong changed the dependancy structure
+
+import copy
+import sys
+import cgi
+import os
+import os.path
+import math
+import time
+import numarray
+import tempfile
+import string
+import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle
+
+from base import templatePage
+from base.webqtlTrait import webqtlTrait
+from utility import webqtlUtil
+from base import webqtlConfig
+import trait
+import correlation
+import htmlModule
+
+cgitb.enable()
+
+
+# where this program's data files are
+RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.'
+RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module
+
+tempfile.tempdir = RootDir
+tempfile.template = "multitrait"
+
+# MultitraitException: used if something goes wrong
+# maybe in the future we should make exceptions more granular
+class MultitraitException(Exception):
+    def __init__(self, message):
+        self.message = message
+
+    def __repr__(self):
+        return "MultitraitException: %s" % self.message
+
+# buildParamDict: Cursor -> ParamDict
+# to process and validate CGI arguments
+# see the comment at the top of this file for valid cgi
+# parameters
+def buildParamDict(cursor, fd):
+    params = {}
+    fs = fd.formdata #cgi.FieldStorage()
+    params["progress"] = fs.getfirst("progress", "0")
+    params["filename"] = fs.getfirst("filename", "")
+    if params["filename"] == "":
+        raise MultitraitException("Required parameter filename missing.")
+
+    params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04")
+    params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0)
+    params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5)
+    params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10)
+    
+    if params["subsetSize"] < -1:
+        params["subsetSize"] = -1
+        
+    params["correlation"] = fs.getfirst("correlation", "pearson")
+    params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10)
+    
+    if params["subsetCount"] < -1:
+        params["subsetCount"] = -1
+        
+    #params["outputType"] = fs.getfirst("outputType", "html")
+    
+    #if params["outputType"] not in ("html", "text"):
+    #    params["outputType"] = "html"
+    
+    if params["correlation"] not in ("pearson", "spearman"):
+        params["correlation"] = "pearson"
+
+    params["correlationName"] = params["correlation"].capitalize()
+
+    # one of two cases:
+    # 1) We have just come from a submit, so there are a bunch of display*
+    #    but no displaySets. Thus, the code down there converts the display*
+    #    to displaySets so the GET request doesn't get too long
+    # 2) We have just been redirected from a progress page which already has
+    #    a converted displaySets for us.
+
+    displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0)
+
+    if displaySets == 0:
+        for key in fs.keys():
+            if key[:7] == "display":
+                #print "Hit display key %s<br>" % key
+                try:
+                    whichSet = int(key[7:])
+                    
+                    # prevent malicious attacks
+                    whichSet = min(whichSet, 512)
+                    displaySets += pow(2, whichSet)
+                
+                except ValueError: pass
+
+    params["displaySets"] = displaySets
+    #print "In the beginning, display sets was %s: %s<br>" % (displaySets,
+    #                                                     str(binaryDecompose(displaySets)))
+
+    # if we are just gonna display a progress page, then there's no
+    # reason to look up detailed database information
+    #if params["progress"] == "1":
+    #    return params
+    
+    a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name
+    params["targetDatabaseType"] = a
+    params["targetDatabaseId"] = b
+    params["targetDatabaseName"] = params["targetDatabase"]
+
+    return params
+
+# readInputFile: DB cursor -> string -> string, (arrayof Trait)
+def readInputFile(cursor, filename):
+    """
+    To read an input file with n lines in the following format
+    <databasetype>,<databaseid>,<traitname>
+    and retrieve and populate traits with appropriate data
+    from the database
+
+    Also, for our purposes. we store the database type and
+    database id in fields attached to the trait instances. We use
+    this information to generate Javascript popups with trait
+    information.
+
+    In addition, we read the strain of mice that the traits are
+    from so we can only show those databases to correlate against.
+    """
+    handle = open(filename)
+    line = handle.readline()
+    inbredSetName = line.strip()
+    line = handle.readline()
+    traits = []
+
+# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait'
+    while line != "":
+        line = line.strip()
+        dbType, dbId, tName = line.split(",")
+
+        if dbType == "ProbeSet":
+            oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
+            oneTrait.populateDataId(cursor, dbId)
+            oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
+        elif dbType == "Geno":
+            speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId)
+            oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name
+            oneTrait.populateDataId(cursor, dbId)
+            oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
+        elif dbType == "Publish":
+            oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name
+            oneTrait.populateDataId(cursor, dbId)
+            oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
+	elif dbType == "Temp":
+	    oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
+            oneTrait.populateDataId(cursor, dbId)
+            oneTrait.dbName = "Temp"
+
+        oneTrait.populateStrainData(cursor)
+        traits.append(oneTrait)
+
+        line = handle.readline()
+
+    return inbredSetName, traits
+
+# loadDatabase: Cursor -> ParamDict -> arrayof Trait
+def loadDatabase(cursor, p):
+    """
+    To load a set of traits as specified by the
+    targetDatabaseId
+    and targetDatabaseType parameters
+
+    Cursor should be a fastCursor from the webqtl library (i.e.
+    a MySQLdb SSCursor). 
+    
+    Calling populateStrainData 20,000 or so times on a ProbeSet
+    is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits
+    in the trait module that uses a join to get all of the rows in
+    bulk, store the resultset on the server, and do all sorts of nice buffering.
+    It's about two or three times faster.
+    """
+    if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name 
+        dbTraits = trait.queryPopulatedProbeSetTraits(cursor,
+                                       p["targetDatabaseId"])
+    elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name 
+        dbTraits = trait.queryPublishTraits(cursor,
+                                      p["targetDatabaseId"])
+        psd = trait.PublishTrait.populateStrainData
+    elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name 
+        dbTraits = trait.queryGenotypeTraits(cursor,
+                                       p["targetDatabaseId"])
+        psd = trait.GenotypeTrait.populateStrainData
+    else:
+        print "Unknown target database type %s" % p["targetDatabaseType"]
+
+    if p["targetDatabaseType"] != "ProbeSet":
+        map(psd, dbTraits, [cursor]*len(dbTraits))
+        
+    return dbTraits
+
+def runProbeSetCorrelations(cursor, p, traits):
+    """
+    To run the correlations between the traits and the database.
+    This function computes a correlation coefficent between each
+    trait and every entry in the database, and partitions the database
+    into a disjoint array of arrays which it returns.
+
+    The length of the return array is 2^n, where n is the length of
+    the trait array. Which constitutent element a of the return array
+    a given trait ends up in is determined by the following formula
+    i = i_02^0 + ... + i_(n-1)2^(n-1)
+    where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
+
+    Since most of the several thousand database traits will end up
+    with i=0, we don't return them, so the first element of the
+    return array will be empty.
+
+    A particular element of subarray j of the return array contains
+    a 2-tuple (trait,kvalues). The variable trait is obviously the
+    particular database trait that matches the user traits l_1, ..., l_m
+    to which subarray j corresponds. kvalues is a list of the correlation
+    values linking trait to l_1, ..., l_m, so the length of kvalues is
+    the number of 1s in the binary representation of j (there must be
+    a better way to describe this length).
+
+    The return array is an array of 2-tuples. The first element of
+    each tuple is the index of the particular subarray, and the second
+    element is the subarray itself. The array is sorted in descending
+    order by the number of 1's in the binary representation of the
+    index so the first few subarrays are the ones that correspond to
+    the largest sets. Each subarray is then sorted by the average of
+    the magnitude of the individual correlation values.
+    """
+
+    kMin = p["threshold"]
+    traitArrays = {}
+
+    # TODO: Add Spearman support
+    freezeId = p["targetDatabaseId"]
+    if p["correlation"] == "pearson":
+        correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name
+    else:
+        correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name
+
+    # now we test all of the correlations in bulk
+    test = numarray.absolute(correlations)
+    test = numarray.greater_equal(test, kMin)
+    test = test.astype(numarray.Int8)
+    #print test
+
+    db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name
+    for i in range(len(db)):
+        cIndex = 0
+        prods = []
+        for j in range(len(traits)):
+            if test[i,j] == 1:
+                cIndex += pow(2, j)
+                prods.append(correlations[i,j])
+        if cIndex != 0:
+            if not traitArrays.has_key(cIndex):
+                traitArrays[cIndex] = []
+
+            traitArrays[cIndex].append((db[i], prods))
+
+
+    # sort each inner list of traitArrays
+    # so the matched traits appear in descending order by the
+    # average magnitude of the correlation
+    def customCmp(traitPair, traitPair2):
+        magAvg1 = numarray.average(map(abs, traitPair[1]))
+        magAvg2 = numarray.average(map(abs, traitPair2[1]))
+
+        # invert the sign to get descending order
+        return -cmp(magAvg1, magAvg2)
+
+    for traitArray in traitArrays.values():
+        traitArray.sort(customCmp)
+
+    # sort the outer list of traitArrays
+    traitArrays2 = []
+    i = 0
+    for key in traitArrays.keys():
+        a = traitArrays[key]
+        if len(a) > 0:
+            traitArrays2.append((key,a,len(binaryDecompose(key)),
+                                 len(a)))
+
+    # we sort by the number of 1's in the binary output
+    # and then by the size of the list, both in descending order
+    def customCmp2(aL,bL):
+        a = -cmp(aL[2], bL[2])
+        if a == 0:
+            return -cmp(aL[3], bL[3])
+        else:
+            return a
+
+    traitArrays2.sort(customCmp2)
+
+    return traitArrays2
+
+def runCorrelations(p, strainCount, traits, db):
+    """
+    To run the correlations between the traits and the database.
+    This function computes a correlation coefficent between each
+    trait and every entry in the database, and partitions the database
+    into a disjoint array of arrays which it returns.
+
+    The length of the return array is 2^n, where n is the length of
+    the trait array. Which constitutent element a of the return array
+    a given trait ends up in is determined by the following formula
+    i = i_02^0 + ... + i_(n-1)2^(n-1)
+    where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
+
+    Since most of the several thousand database traits will end up
+    with i=0, we don't return them, so the first element of the
+    return array will be empty.
+
+    A particular element of subarray j of the return array contains
+    a 2-tuple (trait,kvalues). The variable trait is obviously the
+    particular database trait that matches the user traits l_1, ..., l_m
+    to which subarray j corresponds. kvalues is a list of the correlation
+    values linking trait to l_1, ..., l_m, so the length of kvalues is
+    the number of 1s in the binary representation of j (there must be
+    a better way to describe this length).
+
+    The return array is an array of 2-tuples. The first element of
+    each tuple is the index of the particular subarray, and the second
+    element is the subarray itself. The array is sorted in descending
+    order by the number of 1's in the binary representation of the
+    index so the first few subarrays are the ones that correspond to
+    the largest sets. Each subarray is then sorted by the average of
+    the magnitude of the individual correlation values.
+    """
+    kMin = p["threshold"]
+    traitArrays = {}
+
+    # TODO: Add Spearman support
+    if p["correlation"] == "pearson":
+        correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
+    else:
+        correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
+
+    # now we test all of the correlations in bulk
+    test = numarray.absolute(correlations) 
+    test = numarray.greater_equal(test, kMin)
+    test = test.astype(numarray.Int8)
+    #print test
+    
+
+    for i in range(len(db)):
+        cIndex = 0
+        prods = []
+        for j in range(len(traits)):
+            if test[i,j] == 1:
+                cIndex += pow(2, j)
+                prods.append(correlations[i,j])
+        if cIndex != 0:
+            if not traitArrays.has_key(cIndex):
+                traitArrays[cIndex] = []
+
+            traitArrays[cIndex].append((db[i], prods))
+                
+    # sort each inner list of traitArrays
+    # so the matched traits appear in descending order by the
+    # average magnitude of the correlation
+    def customCmp(traitPair, traitPair2):
+        magAvg1 = numarray.average(map(abs, traitPair[1]))
+        magAvg2 = numarray.average(map(abs, traitPair2[1]))
+
+        # invert the sign to get descending order
+        return -cmp(magAvg1, magAvg2)
+    
+    for traitArray in traitArrays.values():
+        traitArray.sort(customCmp)
+
+    # sort the outer list of traitArrays
+    traitArrays2 = []
+    i = 0
+    for key in traitArrays.keys():
+        a = traitArrays[key]
+        if len(a) > 0:
+            traitArrays2.append((key,a,len(binaryDecompose(key)),
+                                 len(a)))
+
+    # we sort by the number of 1's in the binary output
+    # and then by the size of the list, both in descending order
+    def customCmp2(aL,bL):
+        a = -cmp(aL[2], bL[2])
+        if a == 0:
+            return -cmp(aL[3], bL[3])
+        else:
+            return a
+
+    traitArrays2.sort(customCmp2)
+
+    return traitArrays2
+
+
+# XZ, 09/09/2008: In multiple trait correlation result page,
+# XZ, 09/09/2008: click "Download a text version of the above results in CSV format"
+
+# TraitCorrelationText: a class to display trait correlations
+# as textual output
+class TraitCorrelationText:
+    # build a text shell to describe the given trait correlations
+    # this method sets self.output; use str(self) to actually
+    # get the text page
+    #
+    # traits is a list of traits and traitArray is a
+    # list of 3-tuples: index, traits', garbage
+    # where index is a binary-encoded description of which subset of
+    # traits the list traits' matches
+    #
+    # traits' is a list of 3-tuples as well: trait, correlations, garbage
+    # where trait is a particular trait and correlations is a list of float
+    # correlations (matching traits above)
+    def __init__(self, p, traits, traitArray):
+        output = "Correlation Comparison\n"
+        output += "from WebQTL and the University of Tennessee Health Science Center\n"
+        output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n"
+        
+        output += self.showOptionPanel(p)
+        output += self.showSelectedTraits(traits)
+        output += self.showSummaryCorrelationResults(p, traits, traitArray)
+        output += self.showDetailedCorrelationResults(p, traits, traitArray)
+
+        self.output = output
+
+    # showOptionPanel: ParamDict -> string
+    # to display the options used to run this correlation
+    def showOptionPanel(self, params):
+        output = "Correlation Comparison Options:\n"
+        output += "Target database,%s\n" % params["targetDatabase"]
+        output += "Correlation type,%s\n" % params["correlationName"]
+        output += "Threshold,%f\n" % params["threshold"]
+        #output += "Subsets to Show,%d\n" % params["subsetCount"]
+        #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"]
+        return output
+
+    # showSelectedTraits: (listof Trait) -> string
+    # to display the traits compared with the database
+    # note: we can't use tabular output because the traits could be of
+    # different types and produce different fields
+    def showSelectedTraits(self, traits):
+        output = "Selected Traits:\n"
+        for trait in traits:
+            output += '"' + trait.longName() + '"' + "\n"
+        output += "\n"
+        return output
+
+    # showSummaryCorrelationResults: ParamDict -> (listof Trait) ->
+    #    TraitArray -> string
+    # see comment for __init__ for a description of TraitArray
+    #
+    # to show a summary (sets and sizes) of the correlation results
+    # as well as an X to indicate whether they will be included
+    # in the detailed output
+    def showSummaryCorrelationResults(self, p, traits, traitArray):
+        output = "Correlation Comparison Summary:\n"
+
+        #if p["subsetCount"] != -1:
+        #    ourSubsetCount = min(p["subsetCount"], len(traitArray))
+        #else:
+
+        ourSubsetCount = len(traitArray)
+            
+        displayDecomposition = binaryDecompose(p["displaySets"])
+        for j in range(ourSubsetCount):
+            i = traitArray[j][0]
+            traitSubarray = traitArray[j][1]
+            if len(traitSubarray) == 0:
+                continue
+            
+            targetTraits = decomposeIndex(traits, i)
+            traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name
+                                    ", ")
+            if j in displayDecomposition:
+                checked = "X"
+            else:
+                checked = ""
+
+            output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray))
+
+        output += "\n"
+        return output
+
+    # showDetailedCorrelationResults: ParamDict -> (listof Trait) ->
+    #   TraitArray -> string
+    #
+    # to show a detailed list of the correlation results; that is,
+    # to completely enumerate each subset of traitArray using the
+    # filtering parameters in p
+    def showDetailedCorrelationResults(self, p, traits, traitArray):
+        output = "Correlation Comparison Details:\n"
+        displayDecomposition = binaryDecompose(p["displaySets"])
+        displayDecomposition.sort()
+
+        def formatCorr(c):
+            return "%.4f" % c
+        
+        for j in displayDecomposition:
+            i = traitArray[j][0]
+            traitSubarray = traitArray[j][1]
+
+            if len(traitSubarray) == 0:
+                continue
+
+            targetTraits = decomposeIndex(traits, i)
+            extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name
+            traitDesc = string.join(extraColumnHeaders, ", ")
+
+            #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
+            #    traitDesc += ",(showing top %s of %s)" % (p["subsetSize"],
+            #                                             len(traitSubarray))
+            #    traitSubarray = traitSubarray[0:p["subsetSize"]]
+
+            output += "%s\n" % traitDesc
+            output += traitSubarray[0][0].csvHeader([], extraColumnHeaders)
+            output += "\n"
+            for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait'
+                corr = map(formatCorr, corr)
+                output += oneTrait.csvRow([], corr) + "\n"
+
+            output += "\n"
+        
+        return output
+
+    # __str__ : string
+    # to return self.output as the string representation of this page
+    # self.output is built in __init__
+    def __str__(self):
+        return self.output
+
+# TraitCorrelationPage: a class to display trait correlations
+# for now this is just one HTML file, so we don't even write it
+# to a temporary file somewhere
+class TraitCorrelationPage(templatePage.templatePage):
+    """
+    Using the templatePage class, we build an HTML shell for
+    the core data here: the trait correlation lists.
+
+    The way templatePage works, we build the page in pieces in
+    the __init__ method and later on use the inherited write
+    method to render the page.
+    """
+    def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename):
+
+        templatePage.templatePage.__init__(self, fd)
+         
+        self.dict["title"] = "Correlation Comparison"
+        self.dict["basehref"] = ""
+		# NL: deleted js1 content part, since it has not been used in this project
+        self.dict["js1"] = ""
+        self.dict["js2"] = ""
+
+        body = "<td><h1>Correlation Comparison</h1>"
+        body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime())
+        body += """
+        <p>The correlation comparison tool identifies intersecting sets of traits that are
+correlated with your selections at a specified threshold. A correlation comparison 
+involves the following steps:</p>
+<ol>
+<li><p>
+<b>Correlate:</b> 
+Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation
+Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and 
+<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation 
+Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to
+Show</i>, and <i>Traits to Show per Subset</i>.
+</p></li>
+
+<li><p>
+<b>Add to Collection:</b>
+You can use the check boxes in the <i>Correlation 
+Comparison Details</i> panel and the buttons at the bottom of the page to add these
+results to your selections page for further analysis in WebQTL.
+</p></li>
+
+<li><p> 
+<b>Filter:</b>
+Using the <i>Correlation Comparison Summary</i> panel, choose which
+subsets you would like to display for export. Note that if you change the
+parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter.
+</p></li>
+
+<li><p>
+<b>Export:</b>
+Once you are satisfied with your report, use the export link at
+the bottom of the page to save the report as a comma-separated (CSV) text file
+which you can then import into Excel or another tool. Note: the exported report
+will list all subsets in the summary view and only those traits in the subsets
+you have selected in the Filter step.
+</p></li>
+</ol>
+"""
+        
+#        body += """
+#        <p>The correlation
+#        comparison tool identifies the intersecting sets of traits that are
+#        correlated with your selections. A correlation comparison involves
+#        the following steps:</p>
+#        <ol>
+#        <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>.
+#        For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i>
+#        at their default values of 10.</p></li>
+#        <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel,
+#        adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to
+#        Show per Subset</i> until you have a reasonable number of traits.</p></li>
+#        <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would
+#        like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will
+#        loose the filter you have selected.</p></li>
+#        <li><p><b>Export:</b> Once you are satisfied with your report, use the export
+#        link at the bottom of the page to save the report as a comma-separated (CSV) text file which
+#        you can then import into Excel or another tool. Note: the exported report
+#        will show all subsets in the summary view and all traits in each subset you have
+#        selected in the Filter step.
+#        <li><p><b>Shopping Cart:</b> In addition, you can use the
+#        check boxes in the <i>Correlation Comparison Details</i> panel and the
+#        buttons at the bottom of the page to add the traits you have found to the shopping cart.</p>
+#        </li>
+#        </ol>
+#        """
+
+        body += self.showOptionPanel(p, cursor, inbredSetName)        
+        body += self.showSelectedTraits(traits, p, inbredSetName)
+
+        if p["firstRun"] == 0:
+            body += self.showCorrelationResults(p, inbredSetName, traits, traitArray)
+
+            exportParams = copy.copy(p)
+            exportParams["outputType"] = "text"
+            
+            body += ('''
+            <h2>Export these results</h2>
+            <p>
+            <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from
+            the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details
+            view shows all traits in the subsets that you have selected.
+            </p>
+            '''
+                     % txtFilename)
+
+
+        
+        body += "</td>"
+        self.dict["body"] = body
+
+
+    # showOptionPanel: ParamDict -> Cursor -> String ->  String
+    # to build an option panel for the multitrait correlation
+    # we expect the database list to be a list of 2-tuples
+    # the first element of each tuple is the short name
+    # and the second element of the tuple is the long name
+    def showOptionPanel(self, params, cursor, inbredSetName):
+        output = '''
+        <h2>Correlation Options</h2>
+	<FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data">
+	<INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
+        <input type="hidden" name="filename" value="%s">
+        <input type="hidden" name="firstRun" value="0">
+        <input type="hidden" name="progress" value="1">
+        <table>
+        <tr>
+        <td>Target Database:</td><td>
+        ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"])
+
+        output += htmlModule.genDatabaseMenu(db = cursor,
+                                  public=0,
+                                  RISetgp = inbredSetName,
+                                  selectname="targetDatabase",
+                                  selected=params["targetDatabase"])
+        output += "</td></tr>"
+
+        corrSelected = ["",""]
+
+        if params["correlation"] == "pearson":
+            corrSelected[0] = "SELECTED"
+        else:
+            corrSelected[1] = "SELECTED"
+        
+        output += ('''
+                   <tr>
+                   <td>Correlation Method:</td>
+                   <td><select name="correlation">
+                       <option value="pearson" %s>Pearson</option>
+                       <!--<option value="spearman" %s>Spearman</option>-->
+                       </select></td></tr>
+                       ''' % (corrSelected[0], corrSelected[1]))
+        output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>'
+                   % params["threshold"])
+        output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>'
+                   % params["subsetCount"])
+        output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>'
+                   % params["subsetSize"])
+
+        # a cosmetic change to hopefully make this form a bit easier to use
+#        if params["firstRun"] == 1:
+#            applyName = "Correlate"
+#        else:
+#            applyName = "Refine Correlation"
+            
+        output += '''
+        <tr>
+        <td colspan="2"><input class="button" type="submit" value="Correlate" /></td>
+        </tr>
+        </table>
+        </form>
+        ''' 
+
+        return output
+
+    # showSelectedTraits: listof Trait -> string
+    # to show a list of the selected traits
+    def showSelectedTraits(self, traits, p, inbredSetName):
+        output = '''
+		<form action="%s%s" method="post" name="showDatabase">
+		<INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase">
+
+		<input type="hidden" name="incparentsf1" value="ON">
+		<input type="hidden" name="ShowStrains" value="ON">
+		<input type="hidden" name="ShowLine" value="ON">
+		<input type="hidden" name="database" value="">
+		<input type="hidden" name="ProbeSetID" value="">
+		<input type="hidden" name="RISet" value="%s">
+		<input type="hidden" name="CellID" value="">
+		<input type="hidden" name="database2" value="">
+		<input type="hidden" name="rankOrder" value="">
+		<input type="hidden" name="ProbeSetID2" value="">
+		<input type="hidden" name="CellID2" value="">
+		''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName)
+
+        output += "<h2>Selected Traits</h2>"        
+        output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>'
+        flip = 1
+        colors = ["FFFFFF", "cccccc"]
+        
+        for trait in traits:
+            # we take advantage of the secret dbName attribute that
+            # loadDatabase fills in
+            descriptionString = trait.genHTML()
+            if trait.db.type == 'Publish' and trait.confidential:
+                descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users)
+            output += '''
+            <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td>
+                <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td>
+                </tr>
+            ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString)
+            flip = not flip
+
+        output += "</table></form>"
+        return output
+
+
+    # showSummaryCorrelationResults
+    # show just the number of traits in each subarray
+    def showSummaryCorrelationResults(self, p, traits, traitArray):
+        output = '''
+        <form action="%s%s" method="post">
+	<INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
+        <input type="hidden" name="filename" value="%s">
+        <input type="hidden" name="firstRun" value="0">
+        <input type="hidden" name="progress" value="1">
+        <input type="hidden" name="correlation" value="%s">
+        <input type="hidden" name="threshold" value="%s">
+        <input type="hidden" name="rankOrder" value="">
+        <input type="hidden" name="subsetCount" value="%s">
+        <input type="hidden" name="subsetSize" value="%s">
+        <input type="hidden" name="targetDatabase" value="%s">
+        ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"],
+               p["subsetCount"], p["subsetSize"], p["targetDatabase"])
+        
+        output += '''
+        <table cellpadding="2" cellspacing="0">
+        <tr>
+        <th>Trait Subsets</th>
+        <th colspan="2">Intersecting Set Size</th>
+        </tr>
+        '''
+        # figure out a scale for the summary graph
+        # for now we set max = 300 pixels wide
+        if p["subsetCount"] != -1:
+            ourSubsetCount = min(p["subsetCount"], len(traitArray))
+        else:
+            ourSubsetCount = len(traitArray)
+            
+        screenWidth = 600
+        lengths = []
+        for j in range(ourSubsetCount):
+            lengths.append(len(traitArray[j][1]))
+        maxLength = max(lengths)
+        
+        displayDecomposition = binaryDecompose(p["displaySets"])
+        flip = 0
+        colors = ["FFFFFF", "cccccc"]
+        
+        for j in range(ourSubsetCount):
+            i = traitArray[j][0]
+            traitSubarray = traitArray[j][1]
+            
+            if len(traitSubarray) == 0:
+                continue
+
+            targetTraits = decomposeIndex(traits, i)
+            traitDesc = string.join(map(webqtlTrait.displayName, targetTraits),
+                                    ", ")
+            
+            if j in displayDecomposition:
+                checked = "CHECKED"
+            else:
+                checked = ""
+
+            barWidth = (len(traitSubarray) * screenWidth) / maxLength
+            output += ('''<tr bgcolor="%s">
+                              <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td>
+                              <td>%s</td>
+                              <td><img src="/images/blue.png" width="%d" height="25"></td></tr>'''
+                       % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth))
+            flip = not flip
+            
+        output += '''
+        <tr>
+        <td colspan="3">
+        <input class="button" type="submit" value="Filter" /></td>
+        </tr>
+        </table></form>
+        '''
+        return output
+    
+    # showDetailedCorrelationResults
+    # actually show the traits in each subarray
+    def showDetailedCorrelationResults(self, p, inbredSetName, traits,
+                                       traitArray):
+        output = "<h2>Correlation Comparison Details</h2>"
+
+        # the hidden form below powers all of the JavaScript links,
+        # the shopping cart links, and the correlation plot links
+
+        output += '''
+        <form action="%s%s" method="post">
+        <input type="hidden" name="database" value="%s">
+        <input type="hidden" name="FormID" value="showDatabase">
+        <input type="hidden" name="traitfile" value="">
+	<input type="hidden" name="incparentsf1" value="ON">
+	<input type="hidden" name="ShowStrains" value="ON">
+        <input type="hidden" name="ProbeSetID" value="">
+	<input type="hidden" name="ShowLine" value="ON">
+        <input type="hidden" name="RISet" value="%s">
+        <input type="hidden" name="CellID" value="">
+        <input type="hidden" name="database2" value="">
+        <input type="hidden" name="rankOrder" value="">
+        <input type="hidden" name="ProbeSetID2" value="">
+        <input type="hidden" name="CellID2" value="">
+        ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName)
+
+
+        displayDecomposition = binaryDecompose(p["displaySets"])
+
+        # necessary to ensure that subset order is the same in the
+        # summary and the detailed view
+        displayDecomposition.sort()
+
+        # here's a trick: the first trait we show must have the widest row because it correlates
+        # with the largest set of input traits
+        firstSubset = traitArray[displayDecomposition[0]]
+        firstTrait = firstSubset[1][0][0]
+        extraColumnCount = firstSubset[2]
+        totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount
+
+        output += "<table cellpadding=2 cellspacing=0>\n"
+        for j in displayDecomposition:
+            i = traitArray[j][0]
+            traitSubarray = traitArray[j][1]
+
+            # we don't display trait combinations for which there are
+            # no correlations
+            if len(traitSubarray) == 0:
+                continue
+            
+            # generate a description of the traits that this particular array
+            # matches highly
+            targetTraits = decomposeIndex(traits, i)
+            extraColumnHeaders = map(webqtlTrait.displayName, targetTraits)
+            traitDesc = string.join(extraColumnHeaders, ", ")
+
+            # massage extraColumnHeaders so that they can be wrapped
+            for i in range(len(extraColumnHeaders)):
+                ech = extraColumnHeaders[i]
+                ech = ech.replace("-", " ")
+                ech = ech.replace("_", " ")
+                extraColumnHeaders[i] = ech
+
+            # pad extraColumnHeaders if we have less columns than the max
+            paddingNeeded = extraColumnCount - len(extraColumnHeaders)
+            if paddingNeeded > 0:
+                extraColumnHeaders.extend(paddingNeeded * ["&nbsp;"])
+                    
+            # we limit the output to the top ones
+            if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
+                traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray))
+                traitSubarray = traitSubarray[0:p["subsetSize"]]
+                
+            # combine that description with actual database traits themselves
+            # and the correlation values
+            output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc)
+            #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc
+
+            # we assume that every trait in traitSubarray is the same type
+            # of trait
+            flip = 0
+            colors = ["FFFFFF", "cccccc"]
+            
+            output += traitSubarray[0][0].tableRowHeader(["&nbsp;"], extraColumnHeaders, colors[0])
+
+            for traitPair in traitSubarray:
+                corr = []
+		traitPair[0].dbName = p['targetDatabase']
+                trait = traitPair[0]
+
+                for i in range(len(traitPair[1])):
+                    corrValue = traitPair[1][i]
+                    corrPlotLink = ('''
+                    <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a>
+                    ''' % (p["targetDatabaseName"], trait.name,  targetTraits[i].db.name, targetTraits[i].name, "0", corrValue))
+                    corr.append(corrPlotLink)
+    
+                corr.extend(paddingNeeded * ["&nbsp;"])
+                    
+                checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />'
+                            % (p["targetDatabaseName"], trait.name))
+                flip = not flip
+                output += traitPair[0].tableRow([checkbox], corr, colors[flip])
+            
+            #output += "</table>"
+            i += 1
+            output += '<tr><td colspan="%d">&nbsp;</td></tr>' % totalColumnCount
+
+        output += "</table>"
+
+        # print form buttons if there were checkboxes above
+        output += '''
+        <div align="left">
+        <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection"
+        onClick="addRmvSelection('%s',this.form, 'addToSelection');">
+        <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);">
+        <INPUT TYPE="reset" CLASS="button" VALUE="Select None">
+        </div>
+        </form>
+        ''' % inbredSetName
+
+        return output
+    
+    # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String
+    # to build an output display for the multitrait correlation results
+    def showCorrelationResults(self, p, inbredSetName, traits, traitArray):
+        output = '''
+        <h2>Correlation Comparison Summary</h2>
+        <p>
+        %s correlations were computed for each of the selected traits with each trait in
+        the <a href="/dbdoc/%s.html">%s</a> database.
+        Subsets of database traits for which correlations were higher than %s
+        or lower than -%s are shown below based on which traits
+        they correlated highly with. The top %s subsets, ranked by the number of input traits that
+        they correspond with, are shown, and at most %s traits in each subset are shown. </p>
+        ''' % (p["correlationName"],
+               p["targetDatabase"], p["targetDatabaseName"],
+               p["threshold"], p["threshold"], p["subsetCount"],
+               p["subsetSize"])
+
+
+        totalTraits = 0
+        for j in range(len(traitArray)):
+            totalTraits += len(traitArray[j][1])
+                        
+        if totalTraits == 0:
+            output += """
+            <p>
+            No shared corrrelates were found with your given traits at this
+            threshold. You may wish to lower the correlation threshold or choose different traits.
+            </p>
+            """
+        else:
+            output += self.showSummaryCorrelationResults(p, traits, traitArray)
+            output += self.showDetailedCorrelationResults(p, inbredSetName,
+                                                          traits, traitArray)
+
+        return output
+
+# decomposeIndex: (listof Trait) -> Int ->
+#   (listof Trait)
+# to use i to partition T into a sublist
+# each bit in i controls the inclusion or exclusion of a trait
+def decomposeIndex(traits, i):
+    targetTraits = []
+    
+    for j in range(len(traits)):
+        # look, mom, a bitwise and!
+        # expression below tests whether the jth bit is
+        # set in i
+        # see runCorrelation for how we decompose the
+        # array index
+        if (i & pow(2,j)) == pow(2,j):
+            targetTraits.append(traits[j])
+
+    return targetTraits
+    
+# binaryDecompose: int -> (listof int)
+# to decompose a number into its constituent powers of 2
+# returns a list of the exponents a_1...a_n such that the input m
+# is m = 2^a_1 + ... + 2^a_n
+def binaryDecompose(n):
+    if n == 0:
+        return []
+
+    # we start with the highest power of 2 <= this number
+    # and work our way down, subtracting powers of 2
+    start = long(math.floor(math.log(n)/math.log(2)))
+    
+    exponents = []
+    while start >= 0:
+        if n >= long(math.pow(2, start)):
+            n -= math.pow(2,start)
+            exponents.append(start)
+        start -= 1
+    return exponents
+
+# powerOf : int -> int -> boolean
+# to determine whether m is a power of n;
+# more precisely, whether there exists z in Z s.t.
+# n^z = m
+def powerOf(m, n):
+    trialZ = math.floor(math.log(m)/math.log(n))
+    return pow(n,trialZ) == m
+
+
+class compCorrPage(templatePage.templatePage):
+	def __init__(self,fd):
+		templatePage.templatePage.__init__(self, fd)
+
+                if not self.openMysql():
+                        return
+
+		cursor = self.cursor
+		params = buildParamDict(cursor, fd)
+
+		# get the input data
+		inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"])
+        
+		# and what we are comparing the data to
+		dbTraits = []
+		if params["targetDatabaseType"] != "ProbeSet":
+			dbTraits = loadDatabase(cursor, params)
+
+        
+		# run the comparison itself
+		strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name 
+		if params["targetDatabaseType"] == "ProbeSet":
+			results = runProbeSetCorrelations(cursor, params, traits)
+		else:
+			results = runCorrelations(params, strainCount, traits, dbTraits)
+
+		# try to be smart about what to output:
+		# we want to limit the number of traits shown, at least initially
+		# and since traitArray is already sorted with most interesting
+		# subsets first, we simply pick up the first 500 or so traits
+		# that we find
+		if params["displaySets"] == 0:
+			selectedTraits = 0
+			for j in range(len(results)):
+				#print "Scanning subarray %d" % j
+				if selectedTraits <= 200:
+					params["displaySets"] += pow(2, j)
+					selectedTraits += len(results[j][1])
+
+		traitList = []
+		for oneTrait in traits:  # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
+			traitName = oneTrait.dbName+'::'+oneTrait.name  # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
+			aTrait =  webqtlTrait(cursor=self.cursor, fullname=traitName)
+			traitList.append(aTrait)
+
+		# and generate some output
+		txtOutputFilename = tempfile.mktemp() 
+		txtOutputHandle = open(txtOutputFilename, "w")
+		txtOutput = TraitCorrelationText(params, traits, results)
+		txtOutputHandle.write(str(txtOutput))
+		txtOutputHandle.close()
+		txtOutputFilename = os.path.split(txtOutputFilename)[1]
+
+		self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList,
+					results, inbredSetName,
+					txtOutputFilename).dict['body']
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
new file mode 100755
index 00000000..ff1f8119
--- /dev/null
+++ b/web/webqtl/compareCorrelates/trait.py
@@ -0,0 +1,1074 @@
+#Trait.py
+#
+#--Individual functions are already annotated, more or less.
+#
+#Classes:
+#RawPoint
+#Trait
+#ProbeSetTrait
+#GenotypeTrait
+#PublishTrait
+#TempTrait
+#-KA
+
+# trait.py: a data structure to represent a trait
+import time
+import string
+
+CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
+
+# RawPoint: to store information about the relationship between two particular
+# traits
+# RawPoint represents directly the input file
+class RawPoint:
+    def __init__(self, i, j):
+        self.i = i
+        self.j = j
+        
+    def __eq__(self, other):
+        return (self.i == other.i and
+                self.j == other.j and
+                self.spearman == other.spearman and
+                self.pearson == other.pearson)
+    
+    def __str__(self):
+        return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
+
+def tdEscapeList(cols, align="left"):
+    """
+    A helper function used by tableRow
+    in Trait that will convert a list of strings into a set of
+    table cells enclosed by <td>%s</td> tags
+    """
+    html = ""
+    for col in cols:
+        html += '<td style="text-align: %s">%s</td>' % (align, col)
+    return html
+
+def thEscapeList(cols):
+    """
+    A helper function used by tableRowHeader
+    in Trait that will convert a list of strings into a set of
+    table cells enclosed by <td>%s</td> tags
+    """
+    html = ""
+    for col in cols:
+        html += "<th>%s</th>" % col
+    return html
+
+def commaEscapeList(cols):
+    """
+    A helper function used by csvHeader and csvRow.
+    Really it's just a wrapper for string.join
+    """
+    return '"' + string.join(cols, '","') + '"'
+
+
+class Trait:
+    """
+    A trait represents an attribute of an object. In the WebQTL database, traits are stored
+    as ProbeSets; that is, the average values of a set of probes are stored.
+    """
+    def __init__(self, id="", name="", description="", symbol="", href=""):
+        self.id  = id
+        self.name = name
+	self.dbName = ""
+        self.symbol = symbol
+        self.href = href
+        self.strainData = {}
+
+    def populateDataId(self, cursor, freezeId):
+        """
+        Retrieve the dataId for trait data corresponding to the given database
+        The way to do this depends on the particular type of trait, so we leave implementation
+        to subclasses.
+        """
+        raise NotImplementedError
+    
+    def populateStrainData(self, cursor):
+        """
+        Load this trait full of train data corresponding to the data id
+        The data id can either come from populateDataId
+        or can be set manually by the user of this class.
+        Xiaodong added: The way to do this depends on the particular type of trait,
+        so we leave implementation to subclasses.
+
+        """
+        raise NotImplementedError
+
+    def shortName(self):
+        """
+        To return a short name for this trait; this name should be
+        appropriate for a row or column title
+        """
+        return self.name
+
+    def nameNoDB(self):
+        """
+        To return only the short name without the database attached
+        """
+        strArray = self.shortName().split('::')
+        
+        return strArray[1]
+    
+    def datasetName(self):
+        """
+        To return only the name of the dataset
+        """
+        strArray = self.shortName().split('::')
+        
+        return strArray[0].strip()
+
+    def longName(self):
+        """
+        To return a long name for this trait; this name should be
+        appropriate for a key to a table
+        """
+        return self.shortName()
+
+    def __str__(self):
+        return self.shortName()
+
+    def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
+        """
+        tableRowHelper: (arrayof String) -. String
+        To generate a table row to represent this object, appending
+        the additional information in beforeCols and afterCols
+        to the beginning and the end
+        """
+        thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
+                                              self.name)
+        html = '<tr bgcolor="%s">' % color
+        html += tdEscapeList(beforeCols + thisRow)
+        html += tdEscapeList(afterCols, align="right")
+        html += "</tr>"
+        
+        return html
+
+
+    def header(self):
+        """
+        header: (listof String)
+        To generate a list of strings describing each piece of data
+        returned by row
+        """
+        raise NotImplementedError
+
+    def row(self):
+        """
+        row: (listof String)
+        To generate a list of strings describing this object. The
+        elements of this list should be described by header()
+        """
+        raise NotImplementedError
+    
+    def tableRowHeader(self, beforeCols, afterCols, color):
+        """
+        tableRowHeader: (arrayof String) -> (arrayof String) -> String
+        To generate a table row header to represent this object,
+        appending the additional information in beforeCols and
+        afterCols to the beginning and end
+        """
+        html = '<tr bgcolor="%s">' % color
+        html += thEscapeList(beforeCols + self.header() +
+                             afterCols)
+        html += "</tr>"
+        return html
+
+    def csvHeader(self, beforeCols, afterCols):
+        return commaEscapeList(beforeCols + self.header() + afterCols)
+    
+    def csvRow(self, beforeCols, afterCols):
+        return commaEscapeList(beforeCols + self.row() + afterCols)
+    
+        
+    def traitInfoLink(self):
+        """
+        To build a trait info link to show information about this
+        trait. We assume that the database attribute is properly set
+        on the hidden form on the page where this link will go.
+        """
+        return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
+
+# ProbeSetTrait: a trait with data from a probeset
+class ProbeSetTrait(Trait):
+    def __init__(self, id="", name="", description="", symbol="", href="",
+                 chromosome="", MB="", GeneId=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.description = description
+        self.symbol = symbol
+        self.chromosome = chromosome
+        self.MB = MB
+        self.GeneId = GeneId
+        
+    def populateDataId(self, cursor, freezeId):
+        """
+        Look up the data id for this trait given which
+        freeze it came from.
+        """
+        cursor.execute('''
+        SELECT
+          ProbeSetXRef.DataId
+        FROM
+          ProbeSetXRef
+        WHERE
+          ProbeSetId = %s AND
+          ProbeSetFreezeId = %s
+        ''' % (self.id, freezeId))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          ProbeSetData.StrainId,
+          ProbeSetData.value
+        FROM
+          ProbeSetData
+        WHERE
+          ProbeSetData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+
+    def shortName(self):
+        """
+        An improved string method that uses the gene symbol where
+        we have it
+        """
+        if self.symbol != "":
+            return self.symbol
+        else:
+            return Trait.shortName(self)
+
+    def longName(self):
+        """
+        We use several bits of genetic information to give
+        useful information about this trait and where it is
+        """
+        if self.chromosome != "":
+            chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
+                                                     self.chromosome,
+                                                     self.MB)
+        else:
+            chrPart = ""
+
+        return "%s%s: %s" % (self.name, chrPart, self.description)
+
+    def header(self):
+        return ["Name", "Symbol", "Description",
+                "Chr", "Position (Mb)"]
+
+    def row(self):
+        if type(self.MB) is float:
+            MB = "%.2f" % self.MB
+        else:
+            MB = ""
+            
+        return [self.name, self.symbol, self.description,
+                self.chromosome, MB]
+    
+    def tableRow(self, beforeCols, afterCols, color):
+        """
+        tableRow: (arrayof String) -> (arrayof String) -> String
+        To generate a table row to represent this object, appending
+        the additional information in beforeCols and afterCols to the
+        beginning and end
+        """
+        thisRow = self.row()
+
+        # trim description
+        if len(thisRow[2]) > 20:
+            thisRow[2] = thisRow[2][:20] + "..."
+
+        # add NCBI info link 
+        thisRow[1] = self.ncbiInfoLink()
+
+        return self.tableRowHelper(beforeCols, afterCols, color,
+                                   thisRow)
+
+
+    def ncbiInfoLink(self):
+        """
+        ncbiInfoLink :: String
+        To generate an NCBI info link for this trait. If we have a GeneId,
+        then we can go straight to the gene. If not, then we generate a search
+        link based on the gene symbol. If we have none of them, then we don't
+        generate a link at all.
+        """
+        if self.GeneId != "":
+            cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
+        elif self.symbol != "":
+            cmd = "cmd=Search&term=%s" % self.symbol
+        else:
+            return ""
+
+        return '''
+        <a target="_new"
+           href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
+           %s</a> ''' % (cmd, self.symbol)
+
+
+# GenotypeTrait: a trait with data from the genotype
+class GenotypeTrait(Trait):
+    def __init__(self, id="", name="", href="", chromosome="", MB=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.chromosome = chromosome
+        self.MB = MB
+
+    def populateDataId(self, cursor, freezeId):
+        """
+        Look up the data id for this trait from the
+        genotype.
+        """
+        cursor.execute('''
+        SELECT
+          GenoXRef.DataId
+        FROM
+          GenoXRef
+        WHERE
+          GenoId = %s AND
+          GenoFreezeId = %s
+        ''' % (self.id, freezeId))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          GenoData.StrainId,
+          GenoData.value
+        FROM
+          GenoData
+        WHERE
+          GenoData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+    def header(self):
+        return ["Locus", "Chr", "Position (Mb)"]
+
+    def row(self):
+        return [self.name, self.chromosome, "%.3f" % self.MB]
+
+    def tableRow(self, beforeCols, afterCols, color):
+        return self.tableRowHelper(beforeCols, afterCols, color, self.row())
+
+# PublishTrait: a trait with data from publications
+class PublishTrait(Trait):
+    def __init__(self, id="", name="", href="", authors="", title="",
+                 phenotype="", year=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.authors = authors
+        self.title = title
+        self.phenotype = phenotype
+        self.year = year
+
+    def populateDataId(self, cursor, freezeId):
+        """
+        Look up the data id for this trait from the
+        published set. For the moment, we assume that there's
+        only one publish freeze.
+        """
+        cursor.execute('''
+        SELECT
+          PublishXRef.DataId
+        FROM
+          PublishXRef, PublishFreeze
+        WHERE
+          PublishFreeze.Id = %s AND 
+          PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+          PublishXRef.Id = %s 
+        ''' % (freezeId, self.id))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          PublishData.StrainId,
+          PublishData.value
+        FROM
+          PublishData
+        WHERE
+          PublishData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+
+    def longName(self):
+        """
+        A more intelligent string function that uses
+        information about the publication from which this trait came
+        """
+        return "%s: %s by %s" % (self.name, self.title, self.authors)
+
+    def header(self):
+        return ["Record", "Phenotype", "Authors", "Year", "URL"]
+
+    def row(self):
+        return [self.name,
+                self.phenotype,
+                self.authors,
+                str(self.year),
+                ""]
+    
+    def tableRow(self, beforeCols, afterCols, color):
+        """
+        tableRow: (arrayof String) -> (arrayof String) ->  String
+        To generate a table row to represent this object, appending
+        the additional information in beforeCols and afterCols to the
+        beginning and end
+        """
+        thisRow = self.row()
+
+        # for multiple authors, use "et. al" after first two
+        authors = thisRow[2].split(",")
+        if len(authors) > 2:
+            thisRow[2] = string.join(authors[:2], ",") + ", et al"
+
+        # clip phenotype to 20 chars
+        if len(thisRow[1]) > 20:
+            thisRow[1] = thisRow[1][:20] + "..."
+
+        # add Pub Med URL
+        thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
+
+        return self.tableRowHelper(beforeCols, afterCols, color,
+                                   thisRow)
+
+
+# TempTrait: a trait with data generate by user and stored in temp table
+class TempTrait(Trait):
+    def __init__(self, id="", name="", href="", description=""):
+        Trait.__init__(self, id=id, name=name, href=href)
+        self.description = description
+
+    def populateDataId(self, cursor, freeezeId):
+        """
+        Look up the data id for this trait from the Temp table, freezeId isn't used, 
+        it just for fixing the inherit
+        """
+        cursor.execute('''
+        SELECT
+          DataId
+        FROM
+          Temp
+        WHERE
+          Id=%s
+        ''' % (self.id))
+
+        # we hope that there's only one record here
+        row = cursor.fetchone()
+        self.dataId = row[0]
+
+    #XZ, 03/03/2009: Xiaodong implemented this fuction
+    def populateStrainData(self, cursor):
+        cursor.execute('''
+        SELECT
+          TempData.StrainId,
+          TempData.value
+        FROM
+          TempData
+        WHERE
+          TempData.Id = %s''' % self.dataId)
+        for row in cursor.fetchall():
+            self.strainData[int(row[0])] = float(row[1])
+
+
+    def row(self):
+        return [self.id,
+                self.name,
+                self.description,
+                ""]
+    
+
+    def longName(self):
+        """
+        For temp trait, the description always contents whole useful information
+        """
+        return self.description
+
+
+# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
+def queryGenotypeTraitByName(cursor, speciesId, name):
+    qry = '''
+    SELECT
+      Geno.Id,
+      Geno.Name,
+      Geno.Chr,
+      Geno.Mb
+    FROM
+      Geno
+    WHERE
+      Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
+
+    cursor.execute(qry)
+    row = cursor.fetchone()
+    return GenotypeTrait(id=row[0], name=row[1],
+                         chromosome=row[2], MB=row[3])
+
+# queryPublishTraitByName : Cursor -> string -> PublishTrait
+def queryPublishTraitByName(cursor, freezeId, name):
+    qry = '''
+    SELECT
+      PublishXRef.Id,
+      Phenotype.Id,
+      Publication.Authors,
+      Publication.Title,
+      Publication.Year,
+      Publication.PubMed_ID
+    FROM
+      Publication, PublishXRef, Phenotype, PublishFreeze
+    WHERE
+      PublishFreeze.Id = %s AND 
+      PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+      PublishXRef.Id = %s AND 
+      PublishXRef.PublicationId = Publication.Id AND
+      PublishXRef.PhenotypeId = Phenotype.Id 
+      ''' % (freezeId, name)
+
+    cursor.execute(qry)
+    if cursor.rowcount == 0:
+        return None
+    else:
+        row = cursor.fetchone()
+        
+        return PublishTrait(id=row[0], name='%s'%row[0],
+                            authors=row[2], title=row[3],
+                            year=row[4], href=row[5])
+
+
+def queryTempTraitByName(cursor, name):
+    name=name.strip()
+    qry = '''
+    SELECT
+      Temp.Id,
+      Temp.Name,
+      Temp.description
+    FROM
+      Temp
+    WHERE
+      Temp.Name= "%s"
+      ''' % (name)
+
+    cursor.execute(qry)
+    if cursor.rowcount == 0:
+        return None
+    else:
+        row = cursor.fetchone()
+        return TempTrait(id=row[0], name=row[1], description=row[2], href='')
+
+# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
+# to retrieve an entire probeset fully populated with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+#XZ, 03/04/2009: It seems to me that this function is never be executed.
+#XZ: Although it can be called from multitrait.loadDatabase,
+#XZ: but the loadDatabase function will not be called
+#XZ: if the targetDatabaseType is probeset.
+#XZ: The probeset traits of target database are retrieved by execute
+#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
+def queryPopulatedProbeSetTraits(cursor, freezeId):
+    step1 = time.time()
+    traits = queryProbeSetTraits(cursor, freezeId)
+    traitDict = {}
+    for trait in traits:
+        traitDict[trait.id] = trait
+        
+    step2 = time.time()
+    print 
+    #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+    cursor.execute('''
+    SELECT
+      ProbeSetXRef.ProbeSetId,
+      ProbeSetData.StrainId,
+      ProbeSetData.value
+    FROM
+      ProbeSetXRef
+    Left Join ProbeSetData ON
+      ProbeSetXRef.DataId = ProbeSetData.Id
+    WHERE
+      ProbeSetXRef.ProbeSetFreezeId = %s
+    ''' % freezeId)
+
+    step3 = time.time()
+    totalrows = 0
+    somerows = cursor.fetchmany(1000)
+    while len(somerows) > 0:
+        totalrows += len(somerows)
+        for row in somerows:
+            # this line of code can execute more than one million times
+            traitDict[row[0]].strainData[int(row[1])] = row[2]
+        somerows = cursor.fetchmany(1000)
+
+    #cursor.close()
+    step4 = time.time()
+    
+    time1 = step2 - step1
+    time2 = step3 - step2
+    time3 = step4 - step3
+    time4 = step4 - step1
+    #print "%f %f %f %f %d rows" % (round(time1, 2),
+    #                               round(time2, 2),
+    #                               round(time3, 2),
+    #                               round(time4, 2),
+    #                               totalrows)
+    #print "Fetched %d traits" % len(traits)
+    return traits
+
+
+# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
+# to retrieve probeset fully populated whose ProbeSetId in a range
+# a special ProbeSetId with data
+# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
+# a million records)
+# so we ask for the data in bulk
+#
+# cursor should be SSCursor for MySQL so rows are stored on the server side
+# and tuples are used
+# we explicitly close the cursor here as well
+def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+    step1 = time.time()
+    traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
+    traitDict = {}
+    for trait in traits:
+        traitDict[trait.id] = trait
+
+    step2 = time.time()
+    print
+    #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
+    cursor.execute('''
+    SELECT
+      ProbeSetXRef.ProbeSetId,
+      ProbeSetData.StrainId,
+      ProbeSetData.value
+    FROM
+      ProbeSetXRef
+    Left Join ProbeSetData ON
+      ProbeSetXRef.DataId = ProbeSetData.Id
+    WHERE
+      ProbeSetXRef.ProbeSetFreezeId = %s AND
+      ProbeSetXRef.ProbeSetId >= %s AND
+      ProbeSetXRef.ProbeSetId <= %s
+      ''' % (freezeId, ProbeSetId1, ProbeSetId2))
+
+    step3 = time.time()
+    totalrows = 0
+    somerows = cursor.fetchmany(1000)
+    while len(somerows) > 0:
+        totalrows += len(somerows)
+        for row in somerows:
+            # this line of code can execute more than one million times
+            traitDict[row[0]].strainData[int(row[1])] = row[2]
+        somerows = cursor.fetchmany(1000)
+
+    #cursor.close()
+    step4 = time.time()
+
+    time1 = step2 - step1
+    time2 = step3 - step2
+    time3 = step4 - step3
+    time4 = step4 - step1
+    #print "%f %f %f %f %d rows" % (round(time1, 2),
+    #                               round(time2, 2),
+    #                               round(time3, 2),
+    #                               round(time4, 2),
+    #                               totalrows)
+    #print "Fetched %d traits" % len(traits)
+    return traits
+
+
+# def noneFilter : string or none -> string
+# to replace a possible None by an empty string
+def noneFilter(x):
+    if x is None:
+        return ""
+    else:
+        return x
+
+# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits(cursor, freezeId):
+    """
+    To locate all of the traits in a particular probeset
+    """
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSet.symbol,
+      ProbeSet.Chr,
+      ProbeSet.Mb,
+      ProbeSet.GeneId,
+      ProbeSetXRef.DataId
+    FROM
+      ProbeSet,
+      ProbeSetXRef
+    WHERE
+      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+      ProbeSetXRef.ProbeSetFreezeId = %s
+    ORDER BY ProbeSet.Id
+    ''' % freezeId
+
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+
+    for row in rows:
+        row = map(noneFilter, row)
+        trait = ProbeSetTrait(id=row[0], name=row[1],
+                              description=row[2],
+                              chromosome=row[4],
+                              MB=row[5],
+                              symbol=row[3],
+                              GeneId=row[6])
+        trait.dataId = row[7]
+        traits.append(trait)
+
+    return traits
+
+
+# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
+def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
+    """
+    To locate all of the traits in a particular probeset
+    """
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSet.symbol,
+      ProbeSet.Chr,
+      ProbeSet.Mb,
+      ProbeSet.GeneId,
+      ProbeSetXRef.DataId
+    FROM
+      ProbeSet,
+      ProbeSetXRef
+    WHERE
+      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+      ProbeSetXRef.ProbeSetFreezeId = %s AND
+      ProbeSet.Id >= %s AND
+      ProbeSet.Id <= %s
+    ORDER BY ProbeSet.Id
+    ''' % (freezeId, ProbeSetId1, ProbeSetId2)
+
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+
+    for row in rows:
+        row = map(noneFilter, row)
+        trait = ProbeSetTrait(id=row[0], name=row[1],
+                              description=row[2],
+                              chromosome=row[4],
+                              MB=row[5],
+                              symbol=row[3],
+                              GeneId=row[6])
+        trait.dataId = row[7]
+        traits.append(trait)
+
+    return traits
+
+
+# queryPublishTraits : Cursor -> arrayof Trait
+def queryPublishTraits(cursor, freezeId):
+    """
+    To locate all published traits
+    """
+    qry = '''
+    SELECT
+      Publication.Id,
+      Publication.Name,
+      Publication.PhenoType,
+      Publication.Authors,
+      Publication.Title,
+      Publication.Year,
+      Publication.PubMed_ID,
+      PublishXRef.DataId
+    FROM
+      Publication,
+      PublishXRef
+    WHERE
+      PublishXRef.PublishFreezeId = %s AND
+      PublishXRef.PublishId = Publication.Id
+    ''' % freezeId
+    
+    qry = '''
+    SELECT
+      Publication.Id,
+      PublishXRef.Id,
+      Phenotype.Pre_publication_description,
+      Phenotype.Post_publication_description,
+      Publication.Authors,
+      Publication.Title,
+      Publication.Year,
+      Publication.PubMed_ID,
+      PublishXRef.DataId
+    FROM
+      Publication, PublishXRef, Phenotype, PublishFreeze
+    WHERE
+      PublishFreeze.Id = %s AND 
+      PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+      PublishXRef.PublicationId = Publication.Id AND
+      PublishXRef.PhenotypeId = Phenotype.Id 
+      ''' % freezeId
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+    for row in rows:
+        PhenotypeString = row[3]
+        if not row[7] and row[2]:
+            PhenotypeString = row[2]
+        trait = PublishTrait(id=row[0], name= '%s' %row[1],
+                             phenotype=PhenotypeString,
+                             authors=row[4],
+                             title=row[5],
+                             year=row[6],
+                             href=row[7])
+        trait.dataId = row[8]
+        traits.append(trait)
+        
+    return traits
+
+# queryGenotypeTraits : Cursor -> arrayof Trait
+def queryGenotypeTraits(cursor, freezeId):
+    """
+    To locate all traits in the genotype
+    """
+    qry =    '''
+    SELECT
+      Geno.Id,
+      Geno.Name,
+      Geno.Chr,
+      GenoXRef.DataId,
+      Geno.Mb
+    FROM
+      Geno,
+      GenoXRef
+    WHERE
+      GenoXRef.GenoId = Geno.Id
+      AND GenoXRef.GenoFreezeId = %s
+    ''' % freezeId
+    cursor.execute(qry)
+    rows = cursor.fetchall()
+    traits = []
+    
+    for row in rows:
+        trait = GenotypeTrait(id=row[0], name=row[1],
+                              chromosome=row[2], MB=row[4])
+        trait.dataId = row[3]
+        traits.append(trait)
+        
+    return traits
+
+# queryProbeSetTraitByName : Cursor -> string -> Trait
+# to find a particular trait given its name 
+def queryProbeSetTraitByName(cursor, name):
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSet.symbol,
+      ProbeSet.Chr,
+      ProbeSet.Mb,
+      ProbeSet.GeneId
+    FROM
+      ProbeSet
+    WHERE
+      ProbeSet.Name = "%s"
+    ''' % name
+    #print qry
+    cursor.execute(qry)
+    row = cursor.fetchone()
+
+    # convert a MySQL NULL value to an empty string
+    # for gene symbol
+    if row[3] is None:
+        sym = ""
+    else:
+        sym = row[3]
+        
+    return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
+                         symbol=sym, chromosome=row[4], MB=row[5],
+                         GeneId=row[6])
+    
+                     
+# queryTraits : Cursor -> string -> string -> arrayof Traits
+# to find all of the traits whose descriptions match a certain string in a
+# particular database
+def queryTraits(cursor, dbId, queryString):
+    # we do this in two steps:
+    # first we get the data id for the matching traits
+    qry = '''
+    SELECT
+      ProbeSet.Id,
+      ProbeSet.Name,
+      ProbeSet.description,
+      ProbeSetXRef.DataId
+    FROM
+      ProbeSet,
+      ProbeSetXRef
+    WHERE
+      ProbeSetXRef.ProbeSetFreezeId = %s AND
+      ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
+      ProbeSet.description LIKE "%%%s%%"
+    ''' % (dbId, queryString)
+    #    print qry
+    cursor.execute(qry)
+
+    if cursor.rowcount == 0:
+        print "No traits found"
+        return []
+    else:
+        print "%s traits found" % (cursor.rowcount)
+
+    # maybe fetchall is bad; we will see
+    traits = []
+    for row in cursor.fetchall():
+        myTrait = Trait(row[0], row[1], row[2])
+        myTrait.dataId = row[3]
+        traits.append(myTrait)
+
+    # second we pull all of the strain data for each trait
+    print "Retrieving individual trait data..."
+    for trait in traits:
+        trait.populateStrainData(cursor, trait.dataId)
+        print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
+
+    print "done"
+    return traits
+
+# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
+# to return the short and long name for each ProbeSetFreeze
+# this function is designed specifically for building
+# a database selector
+def queryProbeSetFreezes(cursor):
+    cursor.execute("""
+    SELECT
+      ProbeSetFreeze.Name,
+      ProbeSetFreeze.FullName
+    FROM
+      ProbeSetFreeze
+    ORDER BY
+      ProbeSetFreeze.Name
+    """)
+
+    # for now, fetchall returns the data as a list of tuples
+    # which is what we want
+    return list(cursor.fetchall())
+
+# queryProbeSetFreezeIdName: Cursor -> String -> String, String
+# this function returns the
+# id and the long name of a probesetfreeze given its name
+# once again, it's designed specifically for building
+# the database selector
+def queryProbeSetFreezeIdName(cursor, name):
+    qry = ('''
+    SELECT
+      ProbeSetFreeze.Id,
+      ProbeSetFreeze.FullName
+    FROM
+      ProbeSetFreeze
+    WHERE
+      ProbeSetFreeze.Name = "%s" 
+    ''' % name)
+    #print qry
+    cursor.execute(qry)
+
+    row = cursor.fetchone()
+    return row
+
+# queryProbeSetFreezeName: Cursor -> String -> String
+# to return the name of a probe set freeze given its id
+def queryProbeSetFreezeName(cursor, id):
+    cursor.execute('''
+    SELECT
+      ProbeSetFreeze.FullName
+    FROM
+      ProbeSetFreeze
+    WHERE
+      ProbeSetFreeze.Id = %s
+    ''' % id)
+
+    row = cursor.fetchone()
+    return row[0]
+
+# dbNameToTypeId : Cursor -> String -> (String, String)
+# to convert a database name to a (type, id) pair
+def dbNameToTypeId(cursor, name):
+    types = ["ProbeSet", "Geno", "Publish"]
+    for type_ in types:
+        count = cursor.execute('''
+        SELECT
+          %sFreeze.Id
+        FROM
+          %sFreeze
+        WHERE
+          Name = "%s"
+        ''' % (type_, type_,  name))
+
+        if count != 0:
+            id = cursor.fetchone()[0]
+            return type_, id
+
+    return None, None
+
+# dbTypeIdToName : Cursor -> String -> String -> String
+# to convert a database (type,id) pair into a name
+def dbTypeIdToName(cursor, dbType, dbId):
+    cursor.execute('''
+    SELECT
+      %sFreeze.Name
+    FROM
+      %sFreeze
+    WHERE
+    Id = %s
+    ''' % (dbType, dbType, dbId))
+    
+    row = cursor.fetchone()
+    return row[0]
+
+#XZ, July 21, 2010: I add this function
+def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
+    cursor.execute('''
+    SELECT
+      SpeciesId
+    FROM
+      InbredSet, %sFreeze
+    WHERE
+    %sFreeze.Id = %s
+    and InbredSetId = InbredSet.Id
+    ''' % (dbType, dbType, dbId))
+
+    row = cursor.fetchone()
+    return row[0]
+
+
+# queryStrainCount : Cursor -> int
+# return the number of strains in the database
+def queryStrainCount(cursor):
+    cursor.execute('''
+    SELECT
+      Max(Strain.Id)
+    FROM
+      Strain
+    ''')
+    return (cursor.fetchone())[0]