about summary refs log tree commit diff
path: root/web/webqtl/compareCorrelates
diff options
context:
space:
mode:
authorzsloan2015-03-27 20:28:51 +0000
committerzsloan2015-03-27 20:28:51 +0000
commitd0911a04958a04042da02a334ccc528dae79cc17 (patch)
tree3c48e2e937c1dbeaf00a5697c87ed251afa5c8f1 /web/webqtl/compareCorrelates
parenta840ad18e1fe3db98a359a159e9b9b72367a2839 (diff)
downloadgenenetwork2-d0911a04958a04042da02a334ccc528dae79cc17.tar.gz
Removed everything from 'web' directory except genofiles and renamed the directory to 'genotype_files'
Diffstat (limited to 'web/webqtl/compareCorrelates')
-rwxr-xr-xweb/webqtl/compareCorrelates/MultipleCorrelationPage.py108
-rwxr-xr-xweb/webqtl/compareCorrelates/__init__.py0
-rwxr-xr-xweb/webqtl/compareCorrelates/correlation.py359
-rwxr-xr-xweb/webqtl/compareCorrelates/htmlModule.py279
-rwxr-xr-xweb/webqtl/compareCorrelates/multitrait.py1121
-rwxr-xr-xweb/webqtl/compareCorrelates/trait.py1074
6 files changed, 0 insertions, 2941 deletions
diff --git a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
deleted file mode 100755
index 6a464ab6..00000000
--- a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from base.templatePage import templatePage
-from utility import webqtlUtil
-from base.webqtlTrait import webqtlTrait
-from base import webqtlConfig
-import multitrait
-
-# XZ, 09/09/2008: After adding several traits to collection, click "Compare Correlates" button,
-# XZ, 09/09/2008: This class will generate what you see.
-# XZ, 09/09/2008: This class just collect the input, then pass them to multitrait.py
-#########################################
-#     Multiple Correlation Page
-#########################################
-class MultipleCorrelationPage(templatePage):
-
-        def __init__(self,fd):
-
-                templatePage.__init__(self, fd)
-
-                if not self.openMysql():
-                        return
-                if not fd.genotype:
-                        fd.readData()
-
-                self.searchResult = fd.formdata.getvalue('searchResult')
-                if not self.searchResult:
-                        heading = 'Compare Correlates'
-                        detail = ['You need to select at least two traits in order to generate correlation matrix.']
-                        self.error(heading=heading,detail=detail)
-                        print 'Content-type: text/html\n'
-                        self.write()
-                        return
-                if type("1") == type(self.searchResult):
-                        self.searchResult = [self.searchResult]
-
-                if self.searchResult:
-                        if len(self.searchResult) > 100:
-                                heading = 'Compare Correlates'
-                                detail = ['In order to display Compare Correlates properly, Do not select more than %d traits for Compare Correlates.' % 100]
-                                self.error(heading=heading,detail=detail)
-                                print 'Content-type: text/html\n'
-                                self.write()
-                                return
-                        else:
-                                pass
-
-                        traitList = []
-                        for item in self.searchResult:
-                                thisTrait = webqtlTrait(fullname=item, cursor=self.cursor)
-                                thisTrait.retrieveInfo()
-                                traitList.append(thisTrait)
-                else:
-                        heading = 'Compare Correlates'
-                        detail = [HT.Font('Error : ',color='red'),HT.Font('Error occurs while retrieving data from database.',color='black')]
-                        self.error(heading=heading,detail=detail)
-                        print 'Content-type: text/html\n'
-                        self.write()
-                        return
-
-
-                ##########
-                filename= webqtlUtil.genRandStr("mult_")
-                fp = open(webqtlConfig.IMGDIR+filename, 'wb')
-                fp.write('%s\n' % fd.RISet)
-                for thisTrait in traitList:
-                        fp.write("%s,%s,%s\n" % (thisTrait.db.type,thisTrait.db.id,thisTrait.name))
-                fp.close()
-                fd.formdata["filename"] = filename
-
-                params = {"filename":filename, "targetDatabase":"",
-                        "threshold":0.5, "subsetSize":10,
-                        "correlation":"pearson", "subsetCount":10,
-                        "firstRun":"1"}
-                results = []
-                txtOutputFileName = ""
-
-                self.dict['body'] = multitrait.TraitCorrelationPage(fd, params, self.cursor, traitList, results, 
-                                                        fd.RISet,txtOutputFileName).dict['body']
-                self.dict['title'] = 'Compare Correlates'
-            
-
-
-
diff --git a/web/webqtl/compareCorrelates/__init__.py b/web/webqtl/compareCorrelates/__init__.py
deleted file mode 100755
index e69de29b..00000000
--- a/web/webqtl/compareCorrelates/__init__.py
+++ /dev/null
diff --git a/web/webqtl/compareCorrelates/correlation.py b/web/webqtl/compareCorrelates/correlation.py
deleted file mode 100755
index f2ea55b3..00000000
--- a/web/webqtl/compareCorrelates/correlation.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-# correlation.py
-# functions for computing correlations for traits
-#
-# Originally, this code was designed to compute Pearson product-moment
-# coefficents. The basic function calcPearson scans the strain data
-# for the two traits and drops data for a strain unless both traits have it.
-# If there are less than six strains left, we conclude that there's
-# insufficent data and drop the correlation.
-#
-# In addition, this code can compute Spearman rank-order coefficents using
-# the calcSpearman function.
-
-#Xiaodong changed the dependancy structure
-import numarray
-import numarray.ma as MA
-import time
-
-import trait
-
-# strainDataUnion : StrainData -> StrainData -> array, array
-def strainDataUnion(s1, s2):
-    # build lists of values that both have
-    # and make sure that both sets of values are in the same order
-    s1p = []
-    s2p = []
-    sortedKeys = s1.keys()
-    sortedKeys.sort()
-    for s in sortedKeys:
-        if s2.has_key(s):
-            s1p.append(s1[s])
-            s2p.append(s2[s])
-
-    return (numarray.array(s1p, numarray.Float64),
-            numarray.array(s2p, numarray.Float64))
-
-# calcCorrelationHelper : array -> array -> float
-def calcCorrelationHelper(s1p, s2p):
-    # if the traits share less than six strains, then we don't
-    # bother with the correlations
-    if len(s1p) < 6:
-        return 0.0
-    
-    # subtract by x-bar and y-bar elementwise
-    #oldS1P = s1p.copy()
-    #oldS2P = s2p.copy()
-    
-    s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64)
-    s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64)
-
-    # square for the variances 
-    s1p_2 = numarray.sum(s1p**2)
-    s2p_2 = numarray.sum(s2p**2)
-
-    try: 
-        corr = (numarray.sum(s1p*s2p)/
-                numarray.sqrt(s1p_2 * s2p_2))
-    except ZeroDivisionError:
-        corr = 0.0
-
-    return corr
-    
-# calcSpearman : Trait -> Trait -> float
-def calcSpearman(trait1, trait2):
-    s1p, s2p = strainDataUnion(trait1.strainData,
-                               trait2.strainData)
-    s1p = rankArray(s1p)
-    s2p = rankArray(s2p)
-    return calcCorrelationHelper(s1p, s2p)
-
-# calcPearson : Trait -> Trait -> float
-def calcPearson(trait1, trait2):
-    # build lists of values that both have
-    # and make sure that both sets of values are in the same order
-    s1p, s2p = strainDataUnion(trait1.strainData,
-                               trait2.strainData)
-
-    return calcCorrelationHelper(s1p, s2p)
-
-# buildPearsonCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
-#def buildPearsonCorrelationMatrix(traits, sc):
-#    dim = (len(traits), sc)
-#    matrix = numarray.zeros(dim, MA.Float64)
-#    testMatrix = numarray.zeros(dim, MA.Float64)
-
-#    for i in range(len(traits)):
-#        sd = traits[i].strainData
-#        for key in sd.keys():
-#            matrix[i,int(key) - 1] = sd[key]
-#            testMatrix[i,int(key) - 1] = 1
-
-def buildPearsonCorrelationMatrix(traits, commonStrains):
-    dim = (len(traits), len(commonStrains))
-    matrix = numarray.zeros(dim, MA.Float64)
-    testMatrix = numarray.zeros(dim, MA.Float64)
-
-    for i in range(len(traits)):
-        sd = traits[i].strainData
-        keys = sd.keys()
-        for j in range(0, len(commonStrains)):
-            if keys.__contains__(commonStrains[j]):
-                matrix[i,j] = sd[commonStrains[j]]
-                testMatrix[i,j] = 1
-
-    return matrix, testMatrix
-
-# buildSpearmanCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
-def buildSpearmanCorrelationMatrix(traits, sc):
-    dim = (len(traits), sc)
-    matrix = numarray.zeros(dim, MA.Float64)
-    testMatrix = numarray.zeros(dim, MA.Float64)
-
-    def customCmp(a, b):
-        return cmp(a[1], b[1])
-    
-    for i in range(len(traits)):
-        # copy strain data to a temporary list and turn it into
-        # (strain, expression) pairs
-        sd = traits[i].strainData
-        tempList = []
-        for key in sd.keys():
-            tempList.append((key, sd[key]))
-
-        # sort the temporary list by expression
-        tempList.sort(customCmp)
-        
-        for j in range(len(tempList)):
-            # k is the strain id minus 1
-            # 1-based strain id -> 0-based column index
-            k = int(tempList[j][0]) - 1
-
-            # j is the rank of the particular strain
-            matrix[i,k] = j
-
-            testMatrix[i,k] = 1
-
-    return matrix, testMatrix
-            
-def findLargestStrain(traits, sc):
-    strainMaxes = []
-    for i in range(len(traits)):
-        keys = traits[i].strainData.keys()
-        strainMaxes.append(max(keys))
-
-    return max(strainMaxes)
-
-def findCommonStrains(traits1, traits2):
-    commonStrains = []
-    strains1 = []
-    strains2 = []
-
-    for trait in traits1:
-        keys = trait.strainData.keys()
-        for key in keys:
-            if not strains1.__contains__(key):
-                strains1.append(key)
-
-    for trait in traits2:
-        keys = trait.strainData.keys()
-        for key in keys:
-            if not strains2.__contains__(key):
-                strains2.append(key)
- 
-    for strain in strains1:
-        if strains2.__contains__(strain):
-           commonStrains.append(strain)
-
-    return commonStrains
-
-def calcPearsonMatrix(traits1, traits2, sc, strainThreshold=6,
-                      verbose = 0):
-    return calcMatrixHelper(buildPearsonCorrelationMatrix,
-                            traits1, traits2, sc, strainThreshold,
-                            verbose)
-
-def calcProbeSetPearsonMatrix(cursor, freezeId, traits2, strainThreshold=6,
-                      verbose = 0):
-
-    cursor.execute('select ProbeSetId from ProbeSetXRef where ProbeSetFreezeId = %s order by ProbeSetId' % freezeId)
-    ProbeSetIds = cursor.fetchall()
-
-    results = []
-    i=0
-    while i<len(ProbeSetIds):
-        ProbeSetId1 = ProbeSetIds[i][0]
-        if (i+4999) < len(ProbeSetIds):
-            ProbeSetId2 = ProbeSetIds[i+4999][0]
-        else:
-            ProbeSetId2 = ProbeSetIds[len(ProbeSetIds)-1][0]
-
-        traits1 = trait.queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2) # XZ,09/10/2008: add module name 'trait.'
-        SubMatrix = calcMatrixHelper(buildPearsonCorrelationMatrix,
-                                     traits1, traits2, 1000, strainThreshold,
-                                     verbose)
-        results.append(SubMatrix)
-        i += 5000
-
-    returnValue = numarray.zeros((len(ProbeSetIds), len(traits2)), MA.Float64)
-    row = 0
-    col = 0
-    for SubMatrix in results:
-        for i in range(0, len(SubMatrix)):
-            for j in range(0, len(traits2)):
-                returnValue[row,col] = SubMatrix[i,j]
-                col += 1
-            col = 0
-            row +=1
-
-    return returnValue
-
-    
-
-# note: this code DOES NOT WORK, especially in cases where
-# there are missing observations (e.g. when comparing traits
-# from different probesetfreezes)
-def calcSpearmanMatrix(traits1, traits2, sc, strainThreshold=6,
-                       verbose=0):
-    return calcMatrixHelper(buildSpearmanCorrelationMatrix,
-                            traits1, traits2, sc, strainThreshold,
-                            verbose)
-    
-def calcMatrixHelper(builder, traits1, traits2, sc, strainThreshold,
-                     verbose):
-
-    # intelligently figure out strain count
-    step0 = time.time()
-    #localSC = max(findLargestStrain(traits1, sc),
-    #              findLargestStrain(traits2, sc))
-
-    commonStrains = findCommonStrains(traits1, traits2)
-
-    buildStart = time.time()
-    matrix1, test1 = builder(traits1, commonStrains)
-    matrix2, test2 = builder(traits2, commonStrains)
-    buildTime = time.time() - buildStart
-
-    step1 = time.time()
-
-    ns = numarray.innerproduct(test1, test2)
-
-    # mask all ns less than strainThreshold so the correlation values
-    # end up masked
-    # ns is now a MaskedArray and so all ops involving ns will be
-    # MaskedArrays
-    ns = MA.masked_less(ns, strainThreshold, copy=0)
-        
-    # divide-by-zero errors are automatically masked
-    #ns = -1.0/ns
-
-    step2 = time.time()
-    
-    # see comment above to find out where this ridiculously cool
-    # matrix algebra comes from
-    xs = numarray.innerproduct(matrix1, test2)
-    ys = numarray.innerproduct(test1, matrix2)
-    xys = numarray.innerproduct(matrix1, matrix2)
-
-    # use in-place operations to try to speed things up
-    numarray.power(matrix1, 2, matrix1)
-    numarray.power(matrix2, 2, matrix2)
-
-    x2s = numarray.innerproduct(matrix1, test2)
-    y2s = numarray.innerproduct(test1, matrix2)
-
-    step3 = time.time()
-
-    # parens below are very important
-    # the instant we touch ns, arrays become masked and
-    # computation is much, much slower
-    top = ns*xys - (xs*ys)
-    bottom1 = ns*x2s - (xs*xs)
-    bottom2 = ns*y2s - (ys*ys)
-    bottom = MA.sqrt(bottom1*bottom2)
-
-    # mask catches floating point divide-by-zero problems here
-    corrs = top / bottom
-
-    step4 = time.time()
-
-    # we define undefined correlations as zero even though there
-    # is a mathematical distinction
-    returnValue = MA.filled(corrs, 0.0)
-
-    step5 = time.time()
-    
-    #print ("calcMatrixHelper: %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, total: %.2f s"
-    #       %(buildTime,
-    #         buildStart - step0,
-    #         step2 - step1,
-    #         step3 - step2,
-    #         step4 - step3,
-    #         step5 - step4,
-    #         step5 - step0))
-
-    if verbose:
-        print "Matrix 1:", matrix1
-        print "Matrix 2:", matrix2
-        print "Ns:", ns
-        print "Xs", xs
-        print "Ys", ys
-        print "XYs:", xys
-        print "Top:", top
-        print "Bottom 1:", bottom1
-        print "Bottom 2:", bottom2
-        print "Bottom:", bottom
-        print "Corrs:", corrsa
-
-        
-    return returnValue
-    
-    
-
-# rankArray: listof float -> listof float
-# to generate a companion list to alof with
-# the actual value of each element replaced by the
-# value's rank
-def rankArray(floatArray):
-    # first we save the original index of each element
-    tmpAlof = []
-    returnArray = numarray.zeros(len(floatArray), numarray.Float64)
-    i = 0
-    for i in range(len(floatArray)):
-        tmpAlof.append((i,floatArray[i]))
-
-    # now we sort by the data value
-    def customCmp(a,b): return cmp(a[1],b[1])
-    tmpAlof.sort(customCmp)
-
-    # finally we use the new rank data to populate the
-    # return array
-    for i in range(len(floatArray)):
-        returnArray[tmpAlof[i][0]] = i+1
-
-    return returnArray
diff --git a/web/webqtl/compareCorrelates/htmlModule.py b/web/webqtl/compareCorrelates/htmlModule.py
deleted file mode 100755
index ebba3b86..00000000
--- a/web/webqtl/compareCorrelates/htmlModule.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-import sys
-import string
-import os
-import MySQLdb
-import cgi
-
-from htmlgen import HTMLgen2 as HT
-
-from base import webqtlConfig
-
-
-# XZ 08/14/2008: When I tried to replace 'from webqtlConfig import *' with 'import webqtlConfig'
-# XZ 08/14/2008: I found some problems. I discussed with Hongqiang and the below is conclusion.
-# XZ 08/14/2008: The program uses webqtlConfig.DB_NAME, webqtlConfig.MYSQL_SERVER and so on
-# XZ 08/14/2008: without 'import webqtlConfig'. This program will not work.
-# XZ 08/14/2008: CONFIG_htmlpath doesn't exist in webqtlConfig.py
-# XZ 08/14/2008: Hongqian said this was done by Fan Zhang, and this program was not tested.
-# XZ 08/14/2008: So nobody realize these bugs.
-
-# XZ, 09/09/2008: This function is not called any where. 
-# XZ, 09/09/2008: Actually, I don't think this function works.
-def genHeaderFooter(i=1,title='',basehref='',js1='',js2='',layer='',body=''):
-	"""
-	generate footer and header HTML code
-	default is header
-	i = 0 is footer+header
-	i = 1 is header
-	i = 2 is footer	
-	"""
-	try:
-                temp_file = CONFIG_htmlpath + 'beta-template.html'
-		fp = open(temp_file, 'rb')
-		template = fp.read()
-		fp.close()
-		template = template % (title,basehref,js1,js2,layer,body, "")
-		header,footer = string.split(template,'<!-- split from Here -->')
-		if i == 0:
-			return header + footer
-		elif i == 1:
-			return header
-		elif i == 2:
-			return footer
-		else:
-			return ""
-	except:
-		if i == 0:
-			return "header + footer"
-		elif i == 1:
-			return "header"
-		elif i == 2:
-			return "footer"
-		else:
-			return ""
-
-# XZ, 09/09/2008: This function is only used in multitrait.py where it is called with value assigned to db.
-# XZ, 09/09/2008: So the try-except block is not executed.
-# XZ, 09/09/2008: This explains why no error was generated even without 'import webqtlConfig'
-def genDatabaseMenu(db = None, public =1, RISetgp = 'BXD', selectname = 'database', selected = ""):
-	"""
-	generate database Menu
-	public = 0 : search3.html databases Menu
-	public = 1 : search.html databases Menu
-	"""
-	if not db:
-		try:
-			# import MySQLdb
-			# con = MySQLdb.Connect(db='db_webqtl')
-			# Modified by Fan Zhang
-			con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD)
-			db = con.cursor()
-		except:
-			return "Connect MySQL Server Error"
-	else:
-		pass
-	
-	databaseMenu = HT.Select(name=selectname)
-	nmenu = 0
-
-	# here's a hack: bxd and bxd300 can be correlated against each other
-	# if either of those are the group, we put in special SQL that pulls both
-	if RISetgp in ("BXD", "BXD300"):
-		ibsNameQry = '(InbredSet.Name = "BXD" OR InbredSet.Name = "BXD300")'
-	else:
-		ibsNameQry = 'InbredSet.Name = "%s"' % RISetgp
-	
-	#Publish Database
-	db.execute('''
-		   SelecT
-		     PublishFreeze.FullName,
-		     PublishFreeze.Name
-		   from
-		     PublishFreeze,
-		     InbredSet
-		   where
-		     PublishFreeze.InbredSetId = InbredSet.Id and
-		     %s
-		   ''' % ibsNameQry)
-	for item in db.fetchall():
-		databaseMenu.append(item)
-		nmenu += 1
-	
-	#Genome Database
-	db.execute('''
-		   SelecT
-		     GenoFreeze.FullName,
-		     GenoFreeze.Name
-		   from
-		     GenoFreeze,InbredSet
-		   where
-		     GenoFreeze.InbredSetId = InbredSet.Id and
-		     %s
-		   ''' % ibsNameQry)
-	for item in db.fetchall():
-		databaseMenu.append(item)
-		nmenu += 1
-	
-	#Microarray Database
-	db.execute('SelecT Id, Name from Tissue')
-	for item in db.fetchall():
-		TId, TName = item
-		databaseMenuSub = HT.Optgroup(label = '%s ------' % TName)
-		db.execute('''
-			   SelecT
-			     ProbeSetFreeze.FullName,
-			     ProbeSetFreeze.Name
-			   from
-			     ProbeSetFreeze,
-			     ProbeFreeze,
-			     InbredSet
-			   where
-			     ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
-			     ProbeFreeze.TissueId = %d and
-			     ProbeSetFreeze.public > %d and
-			     ProbeFreeze.InbredSetId = InbredSet.Id and
-			     %s
-			   order by
-			     ProbeSetFreeze.CreateTime desc,
-			     ProbeSetFreeze.AvgId
-			   '''  % (TId,public,ibsNameQry))
-		for item2 in db.fetchall():
-			databaseMenuSub.append(item2)
-			nmenu += 1
-		databaseMenu.append(databaseMenuSub)
-	
-	if nmenu:
-		if selected:
-			databaseMenu.selected.append(selected)
-		return str(databaseMenu)
-	else:
-		return ''
-
-
-# XZ, 09/09/2008: This function is not called any where. 
-# XZ, 09/09/2008: Actually, I don't think this function works.
-# XZ, 09/09/2008: There is no 'DataForm' file now. It should be webqtlForm.py
-def genRISample():
-	import glob
-	import reaper
-	import random
-	import math
-	import webqtlUtil
-	risets = filter(lambda X:X.find('F2')<0, map(os.path.basename, glob.glob(os.path.join(CONFIG_genodir, "*.geno"))))
-	risets = map(lambda X:X.split('.')[0], risets)
-	risets.remove("BayXSha")
-	risets.sort()
-	body = HT.Blockquote()
-	NPerRow = 6
-	for item in risets:
-		values = []
-		if item == 'AXBXA': item2='AXB/BXA'
-		elif item == 'HXBBXH': item2='HXB/BXH'
-		else: item2=item
-		body.append(HT.Paragraph(item2, Class='subtitle'))
-		tbl = HT.TableLite(Class="collap")
-		dataset = reaper.Dataset()
-		dataset.read(os.path.join(CONFIG_genodir, "%s.geno"%item))
-		prgy = webqtlUtil.ParInfo[item] + list(dataset.prgy)
-		
-		mean = random.random()*100
-		variance = random.random()*500
-		variables = []
-		while len(variables) < len(prgy):
-			S = 2
-			while (S>=1):
-				U1= random.random()
-				U2= random.random()
-				V1= 2*U1-1.0
-				V2= 2*U2-1.0
-				S=V1*V1+V2*V2
-			X= math.sqrt(-2 *  math.log(S) / S) * V1
-			Y= math.sqrt(-2 *  math.log(S) / S) * V2	
-			variables.append(mean + math.sqrt(variance) * X)
-			variables.append(mean + math.sqrt(variance) * Y) 
-		
-		tempTR = HT.TR()
-		for i, strain in enumerate(prgy):
-			if i and i%NPerRow==0:
-				tbl.append(tempTR)
-				tempTR = HT.TR()
-			if random.random() < 0.2:
-				variable = 'X'
-			else:
-				variable = "%2.3f" % variables[i]
-			
-			tempTR.append(HT.TD(strain, Class="strains", width=80))
-			tempTR.append(HT.TD(variable, Class="values", width=60))
-			values.append(variable)
-		
-		for j in range(NPerRow-i%NPerRow-1):
-			tempTR.append(HT.TD())
-		tbl.append(tempTR)	
-		body.append(tbl)
-		body.append(HT.Paragraph("Copy the following line to paste into the GeneNetwork entry box:"))
-		body.append(HT.Code(string.join(values, " ")))
-		body.append(HT.HR(width="90%"))
-	return body
-		
-if __name__ == "__main__":
-	if os.environ.has_key('SCRIPT_FILENAME'):
-		script_filename = os.environ['SCRIPT_FILENAME']
-	else:
-		script_filename = ''
-	#Used as cgi script
-	if script_filename and script_filename[-2:] == 'py':
-		print 'Content-type: text/html\n'
-		formdata = cgi.FieldStorage()
-		sys.stderr = sys.stdout
-		try:
-			getID = string.lower(formdata.getvalue('get'))
-		except:
-			getID = ''
-	#Used as command
-	else:
-		if len(sys.argv) >= 2:
-			getID = string.lower(sys.argv[1])
-		else:
-			getID = ''
-	
-	if getID == 'headerfooter':
-		print genHeaderFooter(0)
-	elif getID == 'header':
-		print genHeaderFooter(1)
-	elif getID == 'footer':
-		print genHeaderFooter(2)
-	elif getID == 'databasemenu':
-		print genDatabaseMenu(public=0)
-	elif getID == 'datasample':
-		print genRISample()
-	else:
-		print genHeaderFooter(0)
-else:
-	pass
-
diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py
deleted file mode 100755
index 047620af..00000000
--- a/web/webqtl/compareCorrelates/multitrait.py
+++ /dev/null
@@ -1,1121 +0,0 @@
-# multitrait.py
-# a tool to analyze the correlations between several different traits and the traits
-# in a given dataset
-#
-# Parameters:
-# correlation -- either "pearson" or "spearman" depending on which ones we want to use 
-#
-# filename -- an input file containing the traits to analyze
-#
-# progress -- if set, this parameter outputs a static progress page
-# and uses a META redirect to trigger the real computation
-#
-# targetDatabaseType:
-# one of "ProbeSet", "Publish", "Genotype" depending on the type of database
-# we will use for the analysis
-#
-# targetDatabaseId:
-# the id (*Freeze.Id in the database) of the particular database we will analyze
-#
-# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider
-#
-# firstRun -- either 0 or 1
-# whether to automatically pick reasonable defaults for the other three parameters
-#
-# outputType -- either "html" or "text"
-# 
-# Author: Stephen Pitts
-# June 15, 2004
-
-#Xiaodong changed the dependancy structure
-
-import copy
-import sys
-import cgi
-import os
-import os.path
-import math
-import time
-import numarray
-import tempfile
-import string
-import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle
-
-from base import templatePage
-from base.webqtlTrait import webqtlTrait
-from utility import webqtlUtil
-from base import webqtlConfig
-import trait
-import correlation
-import htmlModule
-
-cgitb.enable()
-
-
-# where this program's data files are
-RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.'
-RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module
-
-tempfile.tempdir = RootDir
-tempfile.template = "multitrait"
-
-# MultitraitException: used if something goes wrong
-# maybe in the future we should make exceptions more granular
-class MultitraitException(Exception):
-    def __init__(self, message):
-        self.message = message
-
-    def __repr__(self):
-        return "MultitraitException: %s" % self.message
-
-# buildParamDict: Cursor -> ParamDict
-# to process and validate CGI arguments
-# see the comment at the top of this file for valid cgi
-# parameters
-def buildParamDict(cursor, fd):
-    params = {}
-    fs = fd.formdata #cgi.FieldStorage()
-    params["progress"] = fs.getfirst("progress", "0")
-    params["filename"] = fs.getfirst("filename", "")
-    if params["filename"] == "":
-        raise MultitraitException("Required parameter filename missing.")
-
-    params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04")
-    params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0)
-    params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5)
-    params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10)
-    
-    if params["subsetSize"] < -1:
-        params["subsetSize"] = -1
-        
-    params["correlation"] = fs.getfirst("correlation", "pearson")
-    params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10)
-    
-    if params["subsetCount"] < -1:
-        params["subsetCount"] = -1
-        
-    #params["outputType"] = fs.getfirst("outputType", "html")
-    
-    #if params["outputType"] not in ("html", "text"):
-    #    params["outputType"] = "html"
-    
-    if params["correlation"] not in ("pearson", "spearman"):
-        params["correlation"] = "pearson"
-
-    params["correlationName"] = params["correlation"].capitalize()
-
-    # one of two cases:
-    # 1) We have just come from a submit, so there are a bunch of display*
-    #    but no displaySets. Thus, the code down there converts the display*
-    #    to displaySets so the GET request doesn't get too long
-    # 2) We have just been redirected from a progress page which already has
-    #    a converted displaySets for us.
-
-    displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0)
-
-    if displaySets == 0:
-        for key in fs.keys():
-            if key[:7] == "display":
-                #print "Hit display key %s<br>" % key
-                try:
-                    whichSet = int(key[7:])
-                    
-                    # prevent malicious attacks
-                    whichSet = min(whichSet, 512)
-                    displaySets += pow(2, whichSet)
-                
-                except ValueError: pass
-
-    params["displaySets"] = displaySets
-    #print "In the beginning, display sets was %s: %s<br>" % (displaySets,
-    #                                                     str(binaryDecompose(displaySets)))
-
-    # if we are just gonna display a progress page, then there's no
-    # reason to look up detailed database information
-    #if params["progress"] == "1":
-    #    return params
-    
-    a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name
-    params["targetDatabaseType"] = a
-    params["targetDatabaseId"] = b
-    params["targetDatabaseName"] = params["targetDatabase"]
-
-    return params
-
-# readInputFile: DB cursor -> string -> string, (arrayof Trait)
-def readInputFile(cursor, filename):
-    """
-    To read an input file with n lines in the following format
-    <databasetype>,<databaseid>,<traitname>
-    and retrieve and populate traits with appropriate data
-    from the database
-
-    Also, for our purposes. we store the database type and
-    database id in fields attached to the trait instances. We use
-    this information to generate Javascript popups with trait
-    information.
-
-    In addition, we read the strain of mice that the traits are
-    from so we can only show those databases to correlate against.
-    """
-    handle = open(filename)
-    line = handle.readline()
-    inbredSetName = line.strip()
-    line = handle.readline()
-    traits = []
-
-# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait'
-    while line != "":
-        line = line.strip()
-        dbType, dbId, tName = line.split(",")
-
-        if dbType == "ProbeSet":
-            oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
-            oneTrait.populateDataId(cursor, dbId)
-            oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
-        elif dbType == "Geno":
-            speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId)
-            oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name
-            oneTrait.populateDataId(cursor, dbId)
-            oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
-        elif dbType == "Publish":
-            oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name
-            oneTrait.populateDataId(cursor, dbId)
-            oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
-	elif dbType == "Temp":
-	    oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
-            oneTrait.populateDataId(cursor, dbId)
-            oneTrait.dbName = "Temp"
-
-        oneTrait.populateStrainData(cursor)
-        traits.append(oneTrait)
-
-        line = handle.readline()
-
-    return inbredSetName, traits
-
-# loadDatabase: Cursor -> ParamDict -> arrayof Trait
-def loadDatabase(cursor, p):
-    """
-    To load a set of traits as specified by the
-    targetDatabaseId
-    and targetDatabaseType parameters
-
-    Cursor should be a fastCursor from the webqtl library (i.e.
-    a MySQLdb SSCursor). 
-    
-    Calling populateStrainData 20,000 or so times on a ProbeSet
-    is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits
-    in the trait module that uses a join to get all of the rows in
-    bulk, store the resultset on the server, and do all sorts of nice buffering.
-    It's about two or three times faster.
-    """
-    if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name 
-        dbTraits = trait.queryPopulatedProbeSetTraits(cursor,
-                                       p["targetDatabaseId"])
-    elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name 
-        dbTraits = trait.queryPublishTraits(cursor,
-                                      p["targetDatabaseId"])
-        psd = trait.PublishTrait.populateStrainData
-    elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name 
-        dbTraits = trait.queryGenotypeTraits(cursor,
-                                       p["targetDatabaseId"])
-        psd = trait.GenotypeTrait.populateStrainData
-    else:
-        print "Unknown target database type %s" % p["targetDatabaseType"]
-
-    if p["targetDatabaseType"] != "ProbeSet":
-        map(psd, dbTraits, [cursor]*len(dbTraits))
-        
-    return dbTraits
-
-def runProbeSetCorrelations(cursor, p, traits):
-    """
-    To run the correlations between the traits and the database.
-    This function computes a correlation coefficent between each
-    trait and every entry in the database, and partitions the database
-    into a disjoint array of arrays which it returns.
-
-    The length of the return array is 2^n, where n is the length of
-    the trait array. Which constitutent element a of the return array
-    a given trait ends up in is determined by the following formula
-    i = i_02^0 + ... + i_(n-1)2^(n-1)
-    where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
-
-    Since most of the several thousand database traits will end up
-    with i=0, we don't return them, so the first element of the
-    return array will be empty.
-
-    A particular element of subarray j of the return array contains
-    a 2-tuple (trait,kvalues). The variable trait is obviously the
-    particular database trait that matches the user traits l_1, ..., l_m
-    to which subarray j corresponds. kvalues is a list of the correlation
-    values linking trait to l_1, ..., l_m, so the length of kvalues is
-    the number of 1s in the binary representation of j (there must be
-    a better way to describe this length).
-
-    The return array is an array of 2-tuples. The first element of
-    each tuple is the index of the particular subarray, and the second
-    element is the subarray itself. The array is sorted in descending
-    order by the number of 1's in the binary representation of the
-    index so the first few subarrays are the ones that correspond to
-    the largest sets. Each subarray is then sorted by the average of
-    the magnitude of the individual correlation values.
-    """
-
-    kMin = p["threshold"]
-    traitArrays = {}
-
-    # TODO: Add Spearman support
-    freezeId = p["targetDatabaseId"]
-    if p["correlation"] == "pearson":
-        correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name
-    else:
-        correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name
-
-    # now we test all of the correlations in bulk
-    test = numarray.absolute(correlations)
-    test = numarray.greater_equal(test, kMin)
-    test = test.astype(numarray.Int8)
-    #print test
-
-    db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name
-    for i in range(len(db)):
-        cIndex = 0
-        prods = []
-        for j in range(len(traits)):
-            if test[i,j] == 1:
-                cIndex += pow(2, j)
-                prods.append(correlations[i,j])
-        if cIndex != 0:
-            if not traitArrays.has_key(cIndex):
-                traitArrays[cIndex] = []
-
-            traitArrays[cIndex].append((db[i], prods))
-
-
-    # sort each inner list of traitArrays
-    # so the matched traits appear in descending order by the
-    # average magnitude of the correlation
-    def customCmp(traitPair, traitPair2):
-        magAvg1 = numarray.average(map(abs, traitPair[1]))
-        magAvg2 = numarray.average(map(abs, traitPair2[1]))
-
-        # invert the sign to get descending order
-        return -cmp(magAvg1, magAvg2)
-
-    for traitArray in traitArrays.values():
-        traitArray.sort(customCmp)
-
-    # sort the outer list of traitArrays
-    traitArrays2 = []
-    i = 0
-    for key in traitArrays.keys():
-        a = traitArrays[key]
-        if len(a) > 0:
-            traitArrays2.append((key,a,len(binaryDecompose(key)),
-                                 len(a)))
-
-    # we sort by the number of 1's in the binary output
-    # and then by the size of the list, both in descending order
-    def customCmp2(aL,bL):
-        a = -cmp(aL[2], bL[2])
-        if a == 0:
-            return -cmp(aL[3], bL[3])
-        else:
-            return a
-
-    traitArrays2.sort(customCmp2)
-
-    return traitArrays2
-
-def runCorrelations(p, strainCount, traits, db):
-    """
-    To run the correlations between the traits and the database.
-    This function computes a correlation coefficent between each
-    trait and every entry in the database, and partitions the database
-    into a disjoint array of arrays which it returns.
-
-    The length of the return array is 2^n, where n is the length of
-    the trait array. Which constitutent element a of the return array
-    a given trait ends up in is determined by the following formula
-    i = i_02^0 + ... + i_(n-1)2^(n-1)
-    where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
-
-    Since most of the several thousand database traits will end up
-    with i=0, we don't return them, so the first element of the
-    return array will be empty.
-
-    A particular element of subarray j of the return array contains
-    a 2-tuple (trait,kvalues). The variable trait is obviously the
-    particular database trait that matches the user traits l_1, ..., l_m
-    to which subarray j corresponds. kvalues is a list of the correlation
-    values linking trait to l_1, ..., l_m, so the length of kvalues is
-    the number of 1s in the binary representation of j (there must be
-    a better way to describe this length).
-
-    The return array is an array of 2-tuples. The first element of
-    each tuple is the index of the particular subarray, and the second
-    element is the subarray itself. The array is sorted in descending
-    order by the number of 1's in the binary representation of the
-    index so the first few subarrays are the ones that correspond to
-    the largest sets. Each subarray is then sorted by the average of
-    the magnitude of the individual correlation values.
-    """
-    kMin = p["threshold"]
-    traitArrays = {}
-
-    # TODO: Add Spearman support
-    if p["correlation"] == "pearson":
-        correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
-    else:
-        correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
-
-    # now we test all of the correlations in bulk
-    test = numarray.absolute(correlations) 
-    test = numarray.greater_equal(test, kMin)
-    test = test.astype(numarray.Int8)
-    #print test
-    
-
-    for i in range(len(db)):
-        cIndex = 0
-        prods = []
-        for j in range(len(traits)):
-            if test[i,j] == 1:
-                cIndex += pow(2, j)
-                prods.append(correlations[i,j])
-        if cIndex != 0:
-            if not traitArrays.has_key(cIndex):
-                traitArrays[cIndex] = []
-
-            traitArrays[cIndex].append((db[i], prods))
-                
-    # sort each inner list of traitArrays
-    # so the matched traits appear in descending order by the
-    # average magnitude of the correlation
-    def customCmp(traitPair, traitPair2):
-        magAvg1 = numarray.average(map(abs, traitPair[1]))
-        magAvg2 = numarray.average(map(abs, traitPair2[1]))
-
-        # invert the sign to get descending order
-        return -cmp(magAvg1, magAvg2)
-    
-    for traitArray in traitArrays.values():
-        traitArray.sort(customCmp)
-
-    # sort the outer list of traitArrays
-    traitArrays2 = []
-    i = 0
-    for key in traitArrays.keys():
-        a = traitArrays[key]
-        if len(a) > 0:
-            traitArrays2.append((key,a,len(binaryDecompose(key)),
-                                 len(a)))
-
-    # we sort by the number of 1's in the binary output
-    # and then by the size of the list, both in descending order
-    def customCmp2(aL,bL):
-        a = -cmp(aL[2], bL[2])
-        if a == 0:
-            return -cmp(aL[3], bL[3])
-        else:
-            return a
-
-    traitArrays2.sort(customCmp2)
-
-    return traitArrays2
-
-
-# XZ, 09/09/2008: In multiple trait correlation result page,
-# XZ, 09/09/2008: click "Download a text version of the above results in CSV format"
-
-# TraitCorrelationText: a class to display trait correlations
-# as textual output
-class TraitCorrelationText:
-    # build a text shell to describe the given trait correlations
-    # this method sets self.output; use str(self) to actually
-    # get the text page
-    #
-    # traits is a list of traits and traitArray is a
-    # list of 3-tuples: index, traits', garbage
-    # where index is a binary-encoded description of which subset of
-    # traits the list traits' matches
-    #
-    # traits' is a list of 3-tuples as well: trait, correlations, garbage
-    # where trait is a particular trait and correlations is a list of float
-    # correlations (matching traits above)
-    def __init__(self, p, traits, traitArray):
-        output = "Correlation Comparison\n"
-        output += "from WebQTL and the University of Tennessee Health Science Center\n"
-        output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n"
-        
-        output += self.showOptionPanel(p)
-        output += self.showSelectedTraits(traits)
-        output += self.showSummaryCorrelationResults(p, traits, traitArray)
-        output += self.showDetailedCorrelationResults(p, traits, traitArray)
-
-        self.output = output
-
-    # showOptionPanel: ParamDict -> string
-    # to display the options used to run this correlation
-    def showOptionPanel(self, params):
-        output = "Correlation Comparison Options:\n"
-        output += "Target database,%s\n" % params["targetDatabase"]
-        output += "Correlation type,%s\n" % params["correlationName"]
-        output += "Threshold,%f\n" % params["threshold"]
-        #output += "Subsets to Show,%d\n" % params["subsetCount"]
-        #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"]
-        return output
-
-    # showSelectedTraits: (listof Trait) -> string
-    # to display the traits compared with the database
-    # note: we can't use tabular output because the traits could be of
-    # different types and produce different fields
-    def showSelectedTraits(self, traits):
-        output = "Selected Traits:\n"
-        for trait in traits:
-            output += '"' + trait.longName() + '"' + "\n"
-        output += "\n"
-        return output
-
-    # showSummaryCorrelationResults: ParamDict -> (listof Trait) ->
-    #    TraitArray -> string
-    # see comment for __init__ for a description of TraitArray
-    #
-    # to show a summary (sets and sizes) of the correlation results
-    # as well as an X to indicate whether they will be included
-    # in the detailed output
-    def showSummaryCorrelationResults(self, p, traits, traitArray):
-        output = "Correlation Comparison Summary:\n"
-
-        #if p["subsetCount"] != -1:
-        #    ourSubsetCount = min(p["subsetCount"], len(traitArray))
-        #else:
-
-        ourSubsetCount = len(traitArray)
-            
-        displayDecomposition = binaryDecompose(p["displaySets"])
-        for j in range(ourSubsetCount):
-            i = traitArray[j][0]
-            traitSubarray = traitArray[j][1]
-            if len(traitSubarray) == 0:
-                continue
-            
-            targetTraits = decomposeIndex(traits, i)
-            traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name
-                                    ", ")
-            if j in displayDecomposition:
-                checked = "X"
-            else:
-                checked = ""
-
-            output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray))
-
-        output += "\n"
-        return output
-
-    # showDetailedCorrelationResults: ParamDict -> (listof Trait) ->
-    #   TraitArray -> string
-    #
-    # to show a detailed list of the correlation results; that is,
-    # to completely enumerate each subset of traitArray using the
-    # filtering parameters in p
-    def showDetailedCorrelationResults(self, p, traits, traitArray):
-        output = "Correlation Comparison Details:\n"
-        displayDecomposition = binaryDecompose(p["displaySets"])
-        displayDecomposition.sort()
-
-        def formatCorr(c):
-            return "%.4f" % c
-        
-        for j in displayDecomposition:
-            i = traitArray[j][0]
-            traitSubarray = traitArray[j][1]
-
-            if len(traitSubarray) == 0:
-                continue
-
-            targetTraits = decomposeIndex(traits, i)
-            extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name
-            traitDesc = string.join(extraColumnHeaders, ", ")
-
-            #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
-            #    traitDesc += ",(showing top %s of %s)" % (p["subsetSize"],
-            #                                             len(traitSubarray))
-            #    traitSubarray = traitSubarray[0:p["subsetSize"]]
-
-            output += "%s\n" % traitDesc
-            output += traitSubarray[0][0].csvHeader([], extraColumnHeaders)
-            output += "\n"
-            for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait'
-                corr = map(formatCorr, corr)
-                output += oneTrait.csvRow([], corr) + "\n"
-
-            output += "\n"
-        
-        return output
-
-    # __str__ : string
-    # to return self.output as the string representation of this page
-    # self.output is built in __init__
-    def __str__(self):
-        return self.output
-
-# TraitCorrelationPage: a class to display trait correlations
-# for now this is just one HTML file, so we don't even write it
-# to a temporary file somewhere
-class TraitCorrelationPage(templatePage.templatePage):
-    """
-    Using the templatePage class, we build an HTML shell for
-    the core data here: the trait correlation lists.
-
-    The way templatePage works, we build the page in pieces in
-    the __init__ method and later on use the inherited write
-    method to render the page.
-    """
-    def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename):
-
-        templatePage.templatePage.__init__(self, fd)
-         
-        self.dict["title"] = "Correlation Comparison"
-        self.dict["basehref"] = ""
-		# NL: deleted js1 content part, since it has not been used in this project
-        self.dict["js1"] = ""
-        self.dict["js2"] = ""
-
-        body = "<td><h1>Correlation Comparison</h1>"
-        body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime())
-        body += """
-        <p>The correlation comparison tool identifies intersecting sets of traits that are
-correlated with your selections at a specified threshold. A correlation comparison 
-involves the following steps:</p>
-<ol>
-<li><p>
-<b>Correlate:</b> 
-Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation
-Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and 
-<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation 
-Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to
-Show</i>, and <i>Traits to Show per Subset</i>.
-</p></li>
-
-<li><p>
-<b>Add to Collection:</b>
-You can use the check boxes in the <i>Correlation 
-Comparison Details</i> panel and the buttons at the bottom of the page to add these
-results to your selections page for further analysis in WebQTL.
-</p></li>
-
-<li><p> 
-<b>Filter:</b>
-Using the <i>Correlation Comparison Summary</i> panel, choose which
-subsets you would like to display for export. Note that if you change the
-parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter.
-</p></li>
-
-<li><p>
-<b>Export:</b>
-Once you are satisfied with your report, use the export link at
-the bottom of the page to save the report as a comma-separated (CSV) text file
-which you can then import into Excel or another tool. Note: the exported report
-will list all subsets in the summary view and only those traits in the subsets
-you have selected in the Filter step.
-</p></li>
-</ol>
-"""
-        
-#        body += """
-#        <p>The correlation
-#        comparison tool identifies the intersecting sets of traits that are
-#        correlated with your selections. A correlation comparison involves
-#        the following steps:</p>
-#        <ol>
-#        <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>.
-#        For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i>
-#        at their default values of 10.</p></li>
-#        <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel,
-#        adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to
-#        Show per Subset</i> until you have a reasonable number of traits.</p></li>
-#        <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would
-#        like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will
-#        loose the filter you have selected.</p></li>
-#        <li><p><b>Export:</b> Once you are satisfied with your report, use the export
-#        link at the bottom of the page to save the report as a comma-separated (CSV) text file which
-#        you can then import into Excel or another tool. Note: the exported report
-#        will show all subsets in the summary view and all traits in each subset you have
-#        selected in the Filter step.
-#        <li><p><b>Shopping Cart:</b> In addition, you can use the
-#        check boxes in the <i>Correlation Comparison Details</i> panel and the
-#        buttons at the bottom of the page to add the traits you have found to the shopping cart.</p>
-#        </li>
-#        </ol>
-#        """
-
-        body += self.showOptionPanel(p, cursor, inbredSetName)        
-        body += self.showSelectedTraits(traits, p, inbredSetName)
-
-        if p["firstRun"] == 0:
-            body += self.showCorrelationResults(p, inbredSetName, traits, traitArray)
-
-            exportParams = copy.copy(p)
-            exportParams["outputType"] = "text"
-            
-            body += ('''
-            <h2>Export these results</h2>
-            <p>
-            <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from
-            the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details
-            view shows all traits in the subsets that you have selected.
-            </p>
-            '''
-                     % txtFilename)
-
-
-        
-        body += "</td>"
-        self.dict["body"] = body
-
-
-    # showOptionPanel: ParamDict -> Cursor -> String ->  String
-    # to build an option panel for the multitrait correlation
-    # we expect the database list to be a list of 2-tuples
-    # the first element of each tuple is the short name
-    # and the second element of the tuple is the long name
-    def showOptionPanel(self, params, cursor, inbredSetName):
-        output = '''
-        <h2>Correlation Options</h2>
-	<FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data">
-	<INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
-        <input type="hidden" name="filename" value="%s">
-        <input type="hidden" name="firstRun" value="0">
-        <input type="hidden" name="progress" value="1">
-        <table>
-        <tr>
-        <td>Target Database:</td><td>
-        ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"])
-
-        output += htmlModule.genDatabaseMenu(db = cursor,
-                                  public=0,
-                                  RISetgp = inbredSetName,
-                                  selectname="targetDatabase",
-                                  selected=params["targetDatabase"])
-        output += "</td></tr>"
-
-        corrSelected = ["",""]
-
-        if params["correlation"] == "pearson":
-            corrSelected[0] = "SELECTED"
-        else:
-            corrSelected[1] = "SELECTED"
-        
-        output += ('''
-                   <tr>
-                   <td>Correlation Method:</td>
-                   <td><select name="correlation">
-                       <option value="pearson" %s>Pearson</option>
-                       <!--<option value="spearman" %s>Spearman</option>-->
-                       </select></td></tr>
-                       ''' % (corrSelected[0], corrSelected[1]))
-        output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>'
-                   % params["threshold"])
-        output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>'
-                   % params["subsetCount"])
-        output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>'
-                   % params["subsetSize"])
-
-        # a cosmetic change to hopefully make this form a bit easier to use
-#        if params["firstRun"] == 1:
-#            applyName = "Correlate"
-#        else:
-#            applyName = "Refine Correlation"
-            
-        output += '''
-        <tr>
-        <td colspan="2"><input class="button" type="submit" value="Correlate" /></td>
-        </tr>
-        </table>
-        </form>
-        ''' 
-
-        return output
-
-    # showSelectedTraits: listof Trait -> string
-    # to show a list of the selected traits
-    def showSelectedTraits(self, traits, p, inbredSetName):
-        output = '''
-		<form action="%s%s" method="post" name="showDatabase">
-		<INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase">
-
-		<input type="hidden" name="incparentsf1" value="ON">
-		<input type="hidden" name="ShowStrains" value="ON">
-		<input type="hidden" name="ShowLine" value="ON">
-		<input type="hidden" name="database" value="">
-		<input type="hidden" name="ProbeSetID" value="">
-		<input type="hidden" name="RISet" value="%s">
-		<input type="hidden" name="CellID" value="">
-		<input type="hidden" name="database2" value="">
-		<input type="hidden" name="rankOrder" value="">
-		<input type="hidden" name="ProbeSetID2" value="">
-		<input type="hidden" name="CellID2" value="">
-		''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName)
-
-        output += "<h2>Selected Traits</h2>"        
-        output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>'
-        flip = 1
-        colors = ["FFFFFF", "cccccc"]
-        
-        for trait in traits:
-            # we take advantage of the secret dbName attribute that
-            # loadDatabase fills in
-            descriptionString = trait.genHTML()
-            if trait.db.type == 'Publish' and trait.confidential:
-                descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users)
-            output += '''
-            <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td>
-                <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td>
-                </tr>
-            ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString)
-            flip = not flip
-
-        output += "</table></form>"
-        return output
-
-
-    # showSummaryCorrelationResults
-    # show just the number of traits in each subarray
-    def showSummaryCorrelationResults(self, p, traits, traitArray):
-        output = '''
-        <form action="%s%s" method="post">
-	<INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
-        <input type="hidden" name="filename" value="%s">
-        <input type="hidden" name="firstRun" value="0">
-        <input type="hidden" name="progress" value="1">
-        <input type="hidden" name="correlation" value="%s">
-        <input type="hidden" name="threshold" value="%s">
-        <input type="hidden" name="rankOrder" value="">
-        <input type="hidden" name="subsetCount" value="%s">
-        <input type="hidden" name="subsetSize" value="%s">
-        <input type="hidden" name="targetDatabase" value="%s">
-        ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"],
-               p["subsetCount"], p["subsetSize"], p["targetDatabase"])
-        
-        output += '''
-        <table cellpadding="2" cellspacing="0">
-        <tr>
-        <th>Trait Subsets</th>
-        <th colspan="2">Intersecting Set Size</th>
-        </tr>
-        '''
-        # figure out a scale for the summary graph
-        # for now we set max = 300 pixels wide
-        if p["subsetCount"] != -1:
-            ourSubsetCount = min(p["subsetCount"], len(traitArray))
-        else:
-            ourSubsetCount = len(traitArray)
-            
-        screenWidth = 600
-        lengths = []
-        for j in range(ourSubsetCount):
-            lengths.append(len(traitArray[j][1]))
-        maxLength = max(lengths)
-        
-        displayDecomposition = binaryDecompose(p["displaySets"])
-        flip = 0
-        colors = ["FFFFFF", "cccccc"]
-        
-        for j in range(ourSubsetCount):
-            i = traitArray[j][0]
-            traitSubarray = traitArray[j][1]
-            
-            if len(traitSubarray) == 0:
-                continue
-
-            targetTraits = decomposeIndex(traits, i)
-            traitDesc = string.join(map(webqtlTrait.displayName, targetTraits),
-                                    ", ")
-            
-            if j in displayDecomposition:
-                checked = "CHECKED"
-            else:
-                checked = ""
-
-            barWidth = (len(traitSubarray) * screenWidth) / maxLength
-            output += ('''<tr bgcolor="%s">
-                              <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td>
-                              <td>%s</td>
-                              <td><img src="/images/blue.png" width="%d" height="25"></td></tr>'''
-                       % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth))
-            flip = not flip
-            
-        output += '''
-        <tr>
-        <td colspan="3">
-        <input class="button" type="submit" value="Filter" /></td>
-        </tr>
-        </table></form>
-        '''
-        return output
-    
-    # showDetailedCorrelationResults
-    # actually show the traits in each subarray
-    def showDetailedCorrelationResults(self, p, inbredSetName, traits,
-                                       traitArray):
-        output = "<h2>Correlation Comparison Details</h2>"
-
-        # the hidden form below powers all of the JavaScript links,
-        # the shopping cart links, and the correlation plot links
-
-        output += '''
-        <form action="%s%s" method="post">
-        <input type="hidden" name="database" value="%s">
-        <input type="hidden" name="FormID" value="showDatabase">
-        <input type="hidden" name="traitfile" value="">
-	<input type="hidden" name="incparentsf1" value="ON">
-	<input type="hidden" name="ShowStrains" value="ON">
-        <input type="hidden" name="ProbeSetID" value="">
-	<input type="hidden" name="ShowLine" value="ON">
-        <input type="hidden" name="RISet" value="%s">
-        <input type="hidden" name="CellID" value="">
-        <input type="hidden" name="database2" value="">
-        <input type="hidden" name="rankOrder" value="">
-        <input type="hidden" name="ProbeSetID2" value="">
-        <input type="hidden" name="CellID2" value="">
-        ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName)
-
-
-        displayDecomposition = binaryDecompose(p["displaySets"])
-
-        # necessary to ensure that subset order is the same in the
-        # summary and the detailed view
-        displayDecomposition.sort()
-
-        # here's a trick: the first trait we show must have the widest row because it correlates
-        # with the largest set of input traits
-        firstSubset = traitArray[displayDecomposition[0]]
-        firstTrait = firstSubset[1][0][0]
-        extraColumnCount = firstSubset[2]
-        totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount
-
-        output += "<table cellpadding=2 cellspacing=0>\n"
-        for j in displayDecomposition:
-            i = traitArray[j][0]
-            traitSubarray = traitArray[j][1]
-
-            # we don't display trait combinations for which there are
-            # no correlations
-            if len(traitSubarray) == 0:
-                continue
-            
-            # generate a description of the traits that this particular array
-            # matches highly
-            targetTraits = decomposeIndex(traits, i)
-            extraColumnHeaders = map(webqtlTrait.displayName, targetTraits)
-            traitDesc = string.join(extraColumnHeaders, ", ")
-
-            # massage extraColumnHeaders so that they can be wrapped
-            for i in range(len(extraColumnHeaders)):
-                ech = extraColumnHeaders[i]
-                ech = ech.replace("-", " ")
-                ech = ech.replace("_", " ")
-                extraColumnHeaders[i] = ech
-
-            # pad extraColumnHeaders if we have less columns than the max
-            paddingNeeded = extraColumnCount - len(extraColumnHeaders)
-            if paddingNeeded > 0:
-                extraColumnHeaders.extend(paddingNeeded * ["&nbsp;"])
-                    
-            # we limit the output to the top ones
-            if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
-                traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray))
-                traitSubarray = traitSubarray[0:p["subsetSize"]]
-                
-            # combine that description with actual database traits themselves
-            # and the correlation values
-            output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc)
-            #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc
-
-            # we assume that every trait in traitSubarray is the same type
-            # of trait
-            flip = 0
-            colors = ["FFFFFF", "cccccc"]
-            
-            output += traitSubarray[0][0].tableRowHeader(["&nbsp;"], extraColumnHeaders, colors[0])
-
-            for traitPair in traitSubarray:
-                corr = []
-		traitPair[0].dbName = p['targetDatabase']
-                trait = traitPair[0]
-
-                for i in range(len(traitPair[1])):
-                    corrValue = traitPair[1][i]
-                    corrPlotLink = ('''
-                    <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a>
-                    ''' % (p["targetDatabaseName"], trait.name,  targetTraits[i].db.name, targetTraits[i].name, "0", corrValue))
-                    corr.append(corrPlotLink)
-    
-                corr.extend(paddingNeeded * ["&nbsp;"])
-                    
-                checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />'
-                            % (p["targetDatabaseName"], trait.name))
-                flip = not flip
-                output += traitPair[0].tableRow([checkbox], corr, colors[flip])
-            
-            #output += "</table>"
-            i += 1
-            output += '<tr><td colspan="%d">&nbsp;</td></tr>' % totalColumnCount
-
-        output += "</table>"
-
-        # print form buttons if there were checkboxes above
-        output += '''
-        <div align="left">
-        <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection"
-        onClick="addRmvSelection('%s',this.form, 'addToSelection');">
-        <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);">
-        <INPUT TYPE="reset" CLASS="button" VALUE="Select None">
-        </div>
-        </form>
-        ''' % inbredSetName
-
-        return output
-    
-    # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String
-    # to build an output display for the multitrait correlation results
-    def showCorrelationResults(self, p, inbredSetName, traits, traitArray):
-        output = '''
-        <h2>Correlation Comparison Summary</h2>
-        <p>
-        %s correlations were computed for each of the selected traits with each trait in
-        the <a href="/dbdoc/%s.html">%s</a> database.
-        Subsets of database traits for which correlations were higher than %s
-        or lower than -%s are shown below based on which traits
-        they correlated highly with. The top %s subsets, ranked by the number of input traits that
-        they correspond with, are shown, and at most %s traits in each subset are shown. </p>
-        ''' % (p["correlationName"],
-               p["targetDatabase"], p["targetDatabaseName"],
-               p["threshold"], p["threshold"], p["subsetCount"],
-               p["subsetSize"])
-
-
-        totalTraits = 0
-        for j in range(len(traitArray)):
-            totalTraits += len(traitArray[j][1])
-                        
-        if totalTraits == 0:
-            output += """
-            <p>
-            No shared corrrelates were found with your given traits at this
-            threshold. You may wish to lower the correlation threshold or choose different traits.
-            </p>
-            """
-        else:
-            output += self.showSummaryCorrelationResults(p, traits, traitArray)
-            output += self.showDetailedCorrelationResults(p, inbredSetName,
-                                                          traits, traitArray)
-
-        return output
-
-# decomposeIndex: (listof Trait) -> Int ->
-#   (listof Trait)
-# to use i to partition T into a sublist
-# each bit in i controls the inclusion or exclusion of a trait
-def decomposeIndex(traits, i):
-    targetTraits = []
-    
-    for j in range(len(traits)):
-        # look, mom, a bitwise and!
-        # expression below tests whether the jth bit is
-        # set in i
-        # see runCorrelation for how we decompose the
-        # array index
-        if (i & pow(2,j)) == pow(2,j):
-            targetTraits.append(traits[j])
-
-    return targetTraits
-    
-# binaryDecompose: int -> (listof int)
-# to decompose a number into its constituent powers of 2
-# returns a list of the exponents a_1...a_n such that the input m
-# is m = 2^a_1 + ... + 2^a_n
-def binaryDecompose(n):
-    if n == 0:
-        return []
-
-    # we start with the highest power of 2 <= this number
-    # and work our way down, subtracting powers of 2
-    start = long(math.floor(math.log(n)/math.log(2)))
-    
-    exponents = []
-    while start >= 0:
-        if n >= long(math.pow(2, start)):
-            n -= math.pow(2,start)
-            exponents.append(start)
-        start -= 1
-    return exponents
-
-# powerOf : int -> int -> boolean
-# to determine whether m is a power of n;
-# more precisely, whether there exists z in Z s.t.
-# n^z = m
-def powerOf(m, n):
-    trialZ = math.floor(math.log(m)/math.log(n))
-    return pow(n,trialZ) == m
-
-
-class compCorrPage(templatePage.templatePage):
-	def __init__(self,fd):
-		templatePage.templatePage.__init__(self, fd)
-
-                if not self.openMysql():
-                        return
-
-		cursor = self.cursor
-		params = buildParamDict(cursor, fd)
-
-		# get the input data
-		inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"])
-        
-		# and what we are comparing the data to
-		dbTraits = []
-		if params["targetDatabaseType"] != "ProbeSet":
-			dbTraits = loadDatabase(cursor, params)
-
-        
-		# run the comparison itself
-		strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name 
-		if params["targetDatabaseType"] == "ProbeSet":
-			results = runProbeSetCorrelations(cursor, params, traits)
-		else:
-			results = runCorrelations(params, strainCount, traits, dbTraits)
-
-		# try to be smart about what to output:
-		# we want to limit the number of traits shown, at least initially
-		# and since traitArray is already sorted with most interesting
-		# subsets first, we simply pick up the first 500 or so traits
-		# that we find
-		if params["displaySets"] == 0:
-			selectedTraits = 0
-			for j in range(len(results)):
-				#print "Scanning subarray %d" % j
-				if selectedTraits <= 200:
-					params["displaySets"] += pow(2, j)
-					selectedTraits += len(results[j][1])
-
-		traitList = []
-		for oneTrait in traits:  # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
-			traitName = oneTrait.dbName+'::'+oneTrait.name  # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
-			aTrait =  webqtlTrait(cursor=self.cursor, fullname=traitName)
-			traitList.append(aTrait)
-
-		# and generate some output
-		txtOutputFilename = tempfile.mktemp() 
-		txtOutputHandle = open(txtOutputFilename, "w")
-		txtOutput = TraitCorrelationText(params, traits, results)
-		txtOutputHandle.write(str(txtOutput))
-		txtOutputHandle.close()
-		txtOutputFilename = os.path.split(txtOutputFilename)[1]
-
-		self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList,
-					results, inbredSetName,
-					txtOutputFilename).dict['body']
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
deleted file mode 100755
index ff1f8119..00000000
--- a/web/webqtl/compareCorrelates/trait.py
+++ /dev/null
@@ -1,1074 +0,0 @@
-#Trait.py
-#
-#--Individual functions are already annotated, more or less.
-#
-#Classes:
-#RawPoint
-#Trait
-#ProbeSetTrait
-#GenotypeTrait
-#PublishTrait
-#TempTrait
-#-KA
-
-# trait.py: a data structure to represent a trait
-import time
-import string
-
-CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
-
-# RawPoint: to store information about the relationship between two particular
-# traits
-# RawPoint represents directly the input file
-class RawPoint:
-    def __init__(self, i, j):
-        self.i = i
-        self.j = j
-        
-    def __eq__(self, other):
-        return (self.i == other.i and
-                self.j == other.j and
-                self.spearman == other.spearman and
-                self.pearson == other.pearson)
-    
-    def __str__(self):
-        return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
-
-def tdEscapeList(cols, align="left"):
-    """
-    A helper function used by tableRow
-    in Trait that will convert a list of strings into a set of
-    table cells enclosed by <td>%s</td> tags
-    """
-    html = ""
-    for col in cols:
-        html += '<td style="text-align: %s">%s</td>' % (align, col)
-    return html
-
-def thEscapeList(cols):
-    """
-    A helper function used by tableRowHeader
-    in Trait that will convert a list of strings into a set of
-    table cells enclosed by <td>%s</td> tags
-    """
-    html = ""
-    for col in cols:
-        html += "<th>%s</th>" % col
-    return html
-
-def commaEscapeList(cols):
-    """
-    A helper function used by csvHeader and csvRow.
-    Really it's just a wrapper for string.join
-    """
-    return '"' + string.join(cols, '","') + '"'
-
-
-class Trait:
-    """
-    A trait represents an attribute of an object. In the WebQTL database, traits are stored
-    as ProbeSets; that is, the average values of a set of probes are stored.
-    """
-    def __init__(self, id="", name="", description="", symbol="", href=""):
-        self.id  = id
-        self.name = name
-	self.dbName = ""
-        self.symbol = symbol
-        self.href = href
-        self.strainData = {}
-
-    def populateDataId(self, cursor, freezeId):
-        """
-        Retrieve the dataId for trait data corresponding to the given database
-        The way to do this depends on the particular type of trait, so we leave implementation
-        to subclasses.
-        """
-        raise NotImplementedError
-    
-    def populateStrainData(self, cursor):
-        """
-        Load this trait full of train data corresponding to the data id
-        The data id can either come from populateDataId
-        or can be set manually by the user of this class.
-        Xiaodong added: The way to do this depends on the particular type of trait,
-        so we leave implementation to subclasses.
-
-        """
-        raise NotImplementedError
-
-    def shortName(self):
-        """
-        To return a short name for this trait; this name should be
-        appropriate for a row or column title
-        """
-        return self.name
-
-    def nameNoDB(self):
-        """
-        To return only the short name without the database attached
-        """
-        strArray = self.shortName().split('::')
-        
-        return strArray[1]
-    
-    def datasetName(self):
-        """
-        To return only the name of the dataset
-        """
-        strArray = self.shortName().split('::')
-        
-        return strArray[0].strip()
-
-    def longName(self):
-        """
-        To return a long name for this trait; this name should be
-        appropriate for a key to a table
-        """
-        return self.shortName()
-
-    def __str__(self):
-        return self.shortName()
-
-    def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
-        """
-        tableRowHelper: (arrayof String) -. String
-        To generate a table row to represent this object, appending
-        the additional information in beforeCols and afterCols
-        to the beginning and the end
-        """
-        thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
-                                              self.name)
-        html = '<tr bgcolor="%s">' % color
-        html += tdEscapeList(beforeCols + thisRow)
-        html += tdEscapeList(afterCols, align="right")
-        html += "</tr>"
-        
-        return html
-
-
-    def header(self):
-        """
-        header: (listof String)
-        To generate a list of strings describing each piece of data
-        returned by row
-        """
-        raise NotImplementedError
-
-    def row(self):
-        """
-        row: (listof String)
-        To generate a list of strings describing this object. The
-        elements of this list should be described by header()
-        """
-        raise NotImplementedError
-    
-    def tableRowHeader(self, beforeCols, afterCols, color):
-        """
-        tableRowHeader: (arrayof String) -> (arrayof String) -> String
-        To generate a table row header to represent this object,
-        appending the additional information in beforeCols and
-        afterCols to the beginning and end
-        """
-        html = '<tr bgcolor="%s">' % color
-        html += thEscapeList(beforeCols + self.header() +
-                             afterCols)
-        html += "</tr>"
-        return html
-
-    def csvHeader(self, beforeCols, afterCols):
-        return commaEscapeList(beforeCols + self.header() + afterCols)
-    
-    def csvRow(self, beforeCols, afterCols):
-        return commaEscapeList(beforeCols + self.row() + afterCols)
-    
-        
-    def traitInfoLink(self):
-        """
-        To build a trait info link to show information about this
-        trait. We assume that the database attribute is properly set
-        on the hidden form on the page where this link will go.
-        """
-        return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
-
-# ProbeSetTrait: a trait with data from a probeset
-class ProbeSetTrait(Trait):
-    def __init__(self, id="", name="", description="", symbol="", href="",
-                 chromosome="", MB="", GeneId=""):
-        Trait.__init__(self, id=id, name=name, href=href)
-        self.description = description
-        self.symbol = symbol
-        self.chromosome = chromosome
-        self.MB = MB
-        self.GeneId = GeneId
-        
-    def populateDataId(self, cursor, freezeId):
-        """
-        Look up the data id for this trait given which
-        freeze it came from.
-        """
-        cursor.execute('''
-        SELECT
-          ProbeSetXRef.DataId
-        FROM
-          ProbeSetXRef
-        WHERE
-          ProbeSetId = %s AND
-          ProbeSetFreezeId = %s
-        ''' % (self.id, freezeId))
-
-        # we hope that there's only one record here
-        row = cursor.fetchone()
-        self.dataId = row[0]
-
-    #XZ, 03/03/2009: Xiaodong implemented this fuction
-    def populateStrainData(self, cursor):
-        cursor.execute('''
-        SELECT
-          ProbeSetData.StrainId,
-          ProbeSetData.value
-        FROM
-          ProbeSetData
-        WHERE
-          ProbeSetData.Id = %s''' % self.dataId)
-        for row in cursor.fetchall():
-            self.strainData[int(row[0])] = float(row[1])
-
-
-    def shortName(self):
-        """
-        An improved string method that uses the gene symbol where
-        we have it
-        """
-        if self.symbol != "":
-            return self.symbol
-        else:
-            return Trait.shortName(self)
-
-    def longName(self):
-        """
-        We use several bits of genetic information to give
-        useful information about this trait and where it is
-        """
-        if self.chromosome != "":
-            chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
-                                                     self.chromosome,
-                                                     self.MB)
-        else:
-            chrPart = ""
-
-        return "%s%s: %s" % (self.name, chrPart, self.description)
-
-    def header(self):
-        return ["Name", "Symbol", "Description",
-                "Chr", "Position (Mb)"]
-
-    def row(self):
-        if type(self.MB) is float:
-            MB = "%.2f" % self.MB
-        else:
-            MB = ""
-            
-        return [self.name, self.symbol, self.description,
-                self.chromosome, MB]
-    
-    def tableRow(self, beforeCols, afterCols, color):
-        """
-        tableRow: (arrayof String) -> (arrayof String) -> String
-        To generate a table row to represent this object, appending
-        the additional information in beforeCols and afterCols to the
-        beginning and end
-        """
-        thisRow = self.row()
-
-        # trim description
-        if len(thisRow[2]) > 20:
-            thisRow[2] = thisRow[2][:20] + "..."
-
-        # add NCBI info link 
-        thisRow[1] = self.ncbiInfoLink()
-
-        return self.tableRowHelper(beforeCols, afterCols, color,
-                                   thisRow)
-
-
-    def ncbiInfoLink(self):
-        """
-        ncbiInfoLink :: String
-        To generate an NCBI info link for this trait. If we have a GeneId,
-        then we can go straight to the gene. If not, then we generate a search
-        link based on the gene symbol. If we have none of them, then we don't
-        generate a link at all.
-        """
-        if self.GeneId != "":
-            cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
-        elif self.symbol != "":
-            cmd = "cmd=Search&term=%s" % self.symbol
-        else:
-            return ""
-
-        return '''
-        <a target="_new"
-           href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
-           %s</a> ''' % (cmd, self.symbol)
-
-
-# GenotypeTrait: a trait with data from the genotype
-class GenotypeTrait(Trait):
-    def __init__(self, id="", name="", href="", chromosome="", MB=""):
-        Trait.__init__(self, id=id, name=name, href=href)
-        self.chromosome = chromosome
-        self.MB = MB
-
-    def populateDataId(self, cursor, freezeId):
-        """
-        Look up the data id for this trait from the
-        genotype.
-        """
-        cursor.execute('''
-        SELECT
-          GenoXRef.DataId
-        FROM
-          GenoXRef
-        WHERE
-          GenoId = %s AND
-          GenoFreezeId = %s
-        ''' % (self.id, freezeId))
-
-        # we hope that there's only one record here
-        row = cursor.fetchone()
-        self.dataId = row[0]
-
-    #XZ, 03/03/2009: Xiaodong implemented this fuction
-    def populateStrainData(self, cursor):
-        cursor.execute('''
-        SELECT
-          GenoData.StrainId,
-          GenoData.value
-        FROM
-          GenoData
-        WHERE
-          GenoData.Id = %s''' % self.dataId)
-        for row in cursor.fetchall():
-            self.strainData[int(row[0])] = float(row[1])
-
-    def header(self):
-        return ["Locus", "Chr", "Position (Mb)"]
-
-    def row(self):
-        return [self.name, self.chromosome, "%.3f" % self.MB]
-
-    def tableRow(self, beforeCols, afterCols, color):
-        return self.tableRowHelper(beforeCols, afterCols, color, self.row())
-
-# PublishTrait: a trait with data from publications
-class PublishTrait(Trait):
-    def __init__(self, id="", name="", href="", authors="", title="",
-                 phenotype="", year=""):
-        Trait.__init__(self, id=id, name=name, href=href)
-        self.authors = authors
-        self.title = title
-        self.phenotype = phenotype
-        self.year = year
-
-    def populateDataId(self, cursor, freezeId):
-        """
-        Look up the data id for this trait from the
-        published set. For the moment, we assume that there's
-        only one publish freeze.
-        """
-        cursor.execute('''
-        SELECT
-          PublishXRef.DataId
-        FROM
-          PublishXRef, PublishFreeze
-        WHERE
-          PublishFreeze.Id = %s AND 
-          PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
-          PublishXRef.Id = %s 
-        ''' % (freezeId, self.id))
-
-        # we hope that there's only one record here
-        row = cursor.fetchone()
-        self.dataId = row[0]
-
-    #XZ, 03/03/2009: Xiaodong implemented this fuction
-    def populateStrainData(self, cursor):
-        cursor.execute('''
-        SELECT
-          PublishData.StrainId,
-          PublishData.value
-        FROM
-          PublishData
-        WHERE
-          PublishData.Id = %s''' % self.dataId)
-        for row in cursor.fetchall():
-            self.strainData[int(row[0])] = float(row[1])
-
-
-    def longName(self):
-        """
-        A more intelligent string function that uses
-        information about the publication from which this trait came
-        """
-        return "%s: %s by %s" % (self.name, self.title, self.authors)
-
-    def header(self):
-        return ["Record", "Phenotype", "Authors", "Year", "URL"]
-
-    def row(self):
-        return [self.name,
-                self.phenotype,
-                self.authors,
-                str(self.year),
-                ""]
-    
-    def tableRow(self, beforeCols, afterCols, color):
-        """
-        tableRow: (arrayof String) -> (arrayof String) ->  String
-        To generate a table row to represent this object, appending
-        the additional information in beforeCols and afterCols to the
-        beginning and end
-        """
-        thisRow = self.row()
-
-        # for multiple authors, use "et. al" after first two
-        authors = thisRow[2].split(",")
-        if len(authors) > 2:
-            thisRow[2] = string.join(authors[:2], ",") + ", et al"
-
-        # clip phenotype to 20 chars
-        if len(thisRow[1]) > 20:
-            thisRow[1] = thisRow[1][:20] + "..."
-
-        # add Pub Med URL
-        thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
-
-        return self.tableRowHelper(beforeCols, afterCols, color,
-                                   thisRow)
-
-
-# TempTrait: a trait with data generate by user and stored in temp table
-class TempTrait(Trait):
-    def __init__(self, id="", name="", href="", description=""):
-        Trait.__init__(self, id=id, name=name, href=href)
-        self.description = description
-
-    def populateDataId(self, cursor, freeezeId):
-        """
-        Look up the data id for this trait from the Temp table, freezeId isn't used, 
-        it just for fixing the inherit
-        """
-        cursor.execute('''
-        SELECT
-          DataId
-        FROM
-          Temp
-        WHERE
-          Id=%s
-        ''' % (self.id))
-
-        # we hope that there's only one record here
-        row = cursor.fetchone()
-        self.dataId = row[0]
-
-    #XZ, 03/03/2009: Xiaodong implemented this fuction
-    def populateStrainData(self, cursor):
-        cursor.execute('''
-        SELECT
-          TempData.StrainId,
-          TempData.value
-        FROM
-          TempData
-        WHERE
-          TempData.Id = %s''' % self.dataId)
-        for row in cursor.fetchall():
-            self.strainData[int(row[0])] = float(row[1])
-
-
-    def row(self):
-        return [self.id,
-                self.name,
-                self.description,
-                ""]
-    
-
-    def longName(self):
-        """
-        For temp trait, the description always contents whole useful information
-        """
-        return self.description
-
-
-# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
-def queryGenotypeTraitByName(cursor, speciesId, name):
-    qry = '''
-    SELECT
-      Geno.Id,
-      Geno.Name,
-      Geno.Chr,
-      Geno.Mb
-    FROM
-      Geno
-    WHERE
-      Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
-
-    cursor.execute(qry)
-    row = cursor.fetchone()
-    return GenotypeTrait(id=row[0], name=row[1],
-                         chromosome=row[2], MB=row[3])
-
-# queryPublishTraitByName : Cursor -> string -> PublishTrait
-def queryPublishTraitByName(cursor, freezeId, name):
-    qry = '''
-    SELECT
-      PublishXRef.Id,
-      Phenotype.Id,
-      Publication.Authors,
-      Publication.Title,
-      Publication.Year,
-      Publication.PubMed_ID
-    FROM
-      Publication, PublishXRef, Phenotype, PublishFreeze
-    WHERE
-      PublishFreeze.Id = %s AND 
-      PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
-      PublishXRef.Id = %s AND 
-      PublishXRef.PublicationId = Publication.Id AND
-      PublishXRef.PhenotypeId = Phenotype.Id 
-      ''' % (freezeId, name)
-
-    cursor.execute(qry)
-    if cursor.rowcount == 0:
-        return None
-    else:
-        row = cursor.fetchone()
-        
-        return PublishTrait(id=row[0], name='%s'%row[0],
-                            authors=row[2], title=row[3],
-                            year=row[4], href=row[5])
-
-
-def queryTempTraitByName(cursor, name):
-    name=name.strip()
-    qry = '''
-    SELECT
-      Temp.Id,
-      Temp.Name,
-      Temp.description
-    FROM
-      Temp
-    WHERE
-      Temp.Name= "%s"
-      ''' % (name)
-
-    cursor.execute(qry)
-    if cursor.rowcount == 0:
-        return None
-    else:
-        row = cursor.fetchone()
-        return TempTrait(id=row[0], name=row[1], description=row[2], href='')
-
-# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
-# to retrieve an entire probeset fully populated with data
-# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
-# a million records)
-# so we ask for the data in bulk
-#
-# cursor should be SSCursor for MySQL so rows are stored on the server side
-# and tuples are used
-# we explicitly close the cursor here as well
-#XZ, 03/04/2009: It seems to me that this function is never be executed.
-#XZ: Although it can be called from multitrait.loadDatabase,
-#XZ: but the loadDatabase function will not be called
-#XZ: if the targetDatabaseType is probeset.
-#XZ: The probeset traits of target database are retrieved by execute
-#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
-def queryPopulatedProbeSetTraits(cursor, freezeId):
-    step1 = time.time()
-    traits = queryProbeSetTraits(cursor, freezeId)
-    traitDict = {}
-    for trait in traits:
-        traitDict[trait.id] = trait
-        
-    step2 = time.time()
-    print 
-    #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
-    cursor.execute('''
-    SELECT
-      ProbeSetXRef.ProbeSetId,
-      ProbeSetData.StrainId,
-      ProbeSetData.value
-    FROM
-      ProbeSetXRef
-    Left Join ProbeSetData ON
-      ProbeSetXRef.DataId = ProbeSetData.Id
-    WHERE
-      ProbeSetXRef.ProbeSetFreezeId = %s
-    ''' % freezeId)
-
-    step3 = time.time()
-    totalrows = 0
-    somerows = cursor.fetchmany(1000)
-    while len(somerows) > 0:
-        totalrows += len(somerows)
-        for row in somerows:
-            # this line of code can execute more than one million times
-            traitDict[row[0]].strainData[int(row[1])] = row[2]
-        somerows = cursor.fetchmany(1000)
-
-    #cursor.close()
-    step4 = time.time()
-    
-    time1 = step2 - step1
-    time2 = step3 - step2
-    time3 = step4 - step3
-    time4 = step4 - step1
-    #print "%f %f %f %f %d rows" % (round(time1, 2),
-    #                               round(time2, 2),
-    #                               round(time3, 2),
-    #                               round(time4, 2),
-    #                               totalrows)
-    #print "Fetched %d traits" % len(traits)
-    return traits
-
-
-# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
-# to retrieve probeset fully populated whose ProbeSetId in a range
-# a special ProbeSetId with data
-# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
-# a million records)
-# so we ask for the data in bulk
-#
-# cursor should be SSCursor for MySQL so rows are stored on the server side
-# and tuples are used
-# we explicitly close the cursor here as well
-def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
-    step1 = time.time()
-    traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
-    traitDict = {}
-    for trait in traits:
-        traitDict[trait.id] = trait
-
-    step2 = time.time()
-    print
-    #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
-    cursor.execute('''
-    SELECT
-      ProbeSetXRef.ProbeSetId,
-      ProbeSetData.StrainId,
-      ProbeSetData.value
-    FROM
-      ProbeSetXRef
-    Left Join ProbeSetData ON
-      ProbeSetXRef.DataId = ProbeSetData.Id
-    WHERE
-      ProbeSetXRef.ProbeSetFreezeId = %s AND
-      ProbeSetXRef.ProbeSetId >= %s AND
-      ProbeSetXRef.ProbeSetId <= %s
-      ''' % (freezeId, ProbeSetId1, ProbeSetId2))
-
-    step3 = time.time()
-    totalrows = 0
-    somerows = cursor.fetchmany(1000)
-    while len(somerows) > 0:
-        totalrows += len(somerows)
-        for row in somerows:
-            # this line of code can execute more than one million times
-            traitDict[row[0]].strainData[int(row[1])] = row[2]
-        somerows = cursor.fetchmany(1000)
-
-    #cursor.close()
-    step4 = time.time()
-
-    time1 = step2 - step1
-    time2 = step3 - step2
-    time3 = step4 - step3
-    time4 = step4 - step1
-    #print "%f %f %f %f %d rows" % (round(time1, 2),
-    #                               round(time2, 2),
-    #                               round(time3, 2),
-    #                               round(time4, 2),
-    #                               totalrows)
-    #print "Fetched %d traits" % len(traits)
-    return traits
-
-
-# def noneFilter : string or none -> string
-# to replace a possible None by an empty string
-def noneFilter(x):
-    if x is None:
-        return ""
-    else:
-        return x
-
-# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
-def queryProbeSetTraits(cursor, freezeId):
-    """
-    To locate all of the traits in a particular probeset
-    """
-    qry = '''
-    SELECT
-      ProbeSet.Id,
-      ProbeSet.Name,
-      ProbeSet.description,
-      ProbeSet.symbol,
-      ProbeSet.Chr,
-      ProbeSet.Mb,
-      ProbeSet.GeneId,
-      ProbeSetXRef.DataId
-    FROM
-      ProbeSet,
-      ProbeSetXRef
-    WHERE
-      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-      ProbeSetXRef.ProbeSetFreezeId = %s
-    ORDER BY ProbeSet.Id
-    ''' % freezeId
-
-    cursor.execute(qry)
-    rows = cursor.fetchall()
-    traits = []
-
-    for row in rows:
-        row = map(noneFilter, row)
-        trait = ProbeSetTrait(id=row[0], name=row[1],
-                              description=row[2],
-                              chromosome=row[4],
-                              MB=row[5],
-                              symbol=row[3],
-                              GeneId=row[6])
-        trait.dataId = row[7]
-        traits.append(trait)
-
-    return traits
-
-
-# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
-def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
-    """
-    To locate all of the traits in a particular probeset
-    """
-    qry = '''
-    SELECT
-      ProbeSet.Id,
-      ProbeSet.Name,
-      ProbeSet.description,
-      ProbeSet.symbol,
-      ProbeSet.Chr,
-      ProbeSet.Mb,
-      ProbeSet.GeneId,
-      ProbeSetXRef.DataId
-    FROM
-      ProbeSet,
-      ProbeSetXRef
-    WHERE
-      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
-      ProbeSetXRef.ProbeSetFreezeId = %s AND
-      ProbeSet.Id >= %s AND
-      ProbeSet.Id <= %s
-    ORDER BY ProbeSet.Id
-    ''' % (freezeId, ProbeSetId1, ProbeSetId2)
-
-    cursor.execute(qry)
-    rows = cursor.fetchall()
-    traits = []
-
-    for row in rows:
-        row = map(noneFilter, row)
-        trait = ProbeSetTrait(id=row[0], name=row[1],
-                              description=row[2],
-                              chromosome=row[4],
-                              MB=row[5],
-                              symbol=row[3],
-                              GeneId=row[6])
-        trait.dataId = row[7]
-        traits.append(trait)
-
-    return traits
-
-
-# queryPublishTraits : Cursor -> arrayof Trait
-def queryPublishTraits(cursor, freezeId):
-    """
-    To locate all published traits
-    """
-    qry = '''
-    SELECT
-      Publication.Id,
-      Publication.Name,
-      Publication.PhenoType,
-      Publication.Authors,
-      Publication.Title,
-      Publication.Year,
-      Publication.PubMed_ID,
-      PublishXRef.DataId
-    FROM
-      Publication,
-      PublishXRef
-    WHERE
-      PublishXRef.PublishFreezeId = %s AND
-      PublishXRef.PublishId = Publication.Id
-    ''' % freezeId
-    
-    qry = '''
-    SELECT
-      Publication.Id,
-      PublishXRef.Id,
-      Phenotype.Pre_publication_description,
-      Phenotype.Post_publication_description,
-      Publication.Authors,
-      Publication.Title,
-      Publication.Year,
-      Publication.PubMed_ID,
-      PublishXRef.DataId
-    FROM
-      Publication, PublishXRef, Phenotype, PublishFreeze
-    WHERE
-      PublishFreeze.Id = %s AND 
-      PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
-      PublishXRef.PublicationId = Publication.Id AND
-      PublishXRef.PhenotypeId = Phenotype.Id 
-      ''' % freezeId
-    cursor.execute(qry)
-    rows = cursor.fetchall()
-    traits = []
-    for row in rows:
-        PhenotypeString = row[3]
-        if not row[7] and row[2]:
-            PhenotypeString = row[2]
-        trait = PublishTrait(id=row[0], name= '%s' %row[1],
-                             phenotype=PhenotypeString,
-                             authors=row[4],
-                             title=row[5],
-                             year=row[6],
-                             href=row[7])
-        trait.dataId = row[8]
-        traits.append(trait)
-        
-    return traits
-
-# queryGenotypeTraits : Cursor -> arrayof Trait
-def queryGenotypeTraits(cursor, freezeId):
-    """
-    To locate all traits in the genotype
-    """
-    qry =    '''
-    SELECT
-      Geno.Id,
-      Geno.Name,
-      Geno.Chr,
-      GenoXRef.DataId,
-      Geno.Mb
-    FROM
-      Geno,
-      GenoXRef
-    WHERE
-      GenoXRef.GenoId = Geno.Id
-      AND GenoXRef.GenoFreezeId = %s
-    ''' % freezeId
-    cursor.execute(qry)
-    rows = cursor.fetchall()
-    traits = []
-    
-    for row in rows:
-        trait = GenotypeTrait(id=row[0], name=row[1],
-                              chromosome=row[2], MB=row[4])
-        trait.dataId = row[3]
-        traits.append(trait)
-        
-    return traits
-
-# queryProbeSetTraitByName : Cursor -> string -> Trait
-# to find a particular trait given its name 
-def queryProbeSetTraitByName(cursor, name):
-    qry = '''
-    SELECT
-      ProbeSet.Id,
-      ProbeSet.Name,
-      ProbeSet.description,
-      ProbeSet.symbol,
-      ProbeSet.Chr,
-      ProbeSet.Mb,
-      ProbeSet.GeneId
-    FROM
-      ProbeSet
-    WHERE
-      ProbeSet.Name = "%s"
-    ''' % name
-    #print qry
-    cursor.execute(qry)
-    row = cursor.fetchone()
-
-    # convert a MySQL NULL value to an empty string
-    # for gene symbol
-    if row[3] is None:
-        sym = ""
-    else:
-        sym = row[3]
-        
-    return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
-                         symbol=sym, chromosome=row[4], MB=row[5],
-                         GeneId=row[6])
-    
-                     
-# queryTraits : Cursor -> string -> string -> arrayof Traits
-# to find all of the traits whose descriptions match a certain string in a
-# particular database
-def queryTraits(cursor, dbId, queryString):
-    # we do this in two steps:
-    # first we get the data id for the matching traits
-    qry = '''
-    SELECT
-      ProbeSet.Id,
-      ProbeSet.Name,
-      ProbeSet.description,
-      ProbeSetXRef.DataId
-    FROM
-      ProbeSet,
-      ProbeSetXRef
-    WHERE
-      ProbeSetXRef.ProbeSetFreezeId = %s AND
-      ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
-      ProbeSet.description LIKE "%%%s%%"
-    ''' % (dbId, queryString)
-    #    print qry
-    cursor.execute(qry)
-
-    if cursor.rowcount == 0:
-        print "No traits found"
-        return []
-    else:
-        print "%s traits found" % (cursor.rowcount)
-
-    # maybe fetchall is bad; we will see
-    traits = []
-    for row in cursor.fetchall():
-        myTrait = Trait(row[0], row[1], row[2])
-        myTrait.dataId = row[3]
-        traits.append(myTrait)
-
-    # second we pull all of the strain data for each trait
-    print "Retrieving individual trait data..."
-    for trait in traits:
-        trait.populateStrainData(cursor, trait.dataId)
-        print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
-
-    print "done"
-    return traits
-
-# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
-# to return the short and long name for each ProbeSetFreeze
-# this function is designed specifically for building
-# a database selector
-def queryProbeSetFreezes(cursor):
-    cursor.execute("""
-    SELECT
-      ProbeSetFreeze.Name,
-      ProbeSetFreeze.FullName
-    FROM
-      ProbeSetFreeze
-    ORDER BY
-      ProbeSetFreeze.Name
-    """)
-
-    # for now, fetchall returns the data as a list of tuples
-    # which is what we want
-    return list(cursor.fetchall())
-
-# queryProbeSetFreezeIdName: Cursor -> String -> String, String
-# this function returns the
-# id and the long name of a probesetfreeze given its name
-# once again, it's designed specifically for building
-# the database selector
-def queryProbeSetFreezeIdName(cursor, name):
-    qry = ('''
-    SELECT
-      ProbeSetFreeze.Id,
-      ProbeSetFreeze.FullName
-    FROM
-      ProbeSetFreeze
-    WHERE
-      ProbeSetFreeze.Name = "%s" 
-    ''' % name)
-    #print qry
-    cursor.execute(qry)
-
-    row = cursor.fetchone()
-    return row
-
-# queryProbeSetFreezeName: Cursor -> String -> String
-# to return the name of a probe set freeze given its id
-def queryProbeSetFreezeName(cursor, id):
-    cursor.execute('''
-    SELECT
-      ProbeSetFreeze.FullName
-    FROM
-      ProbeSetFreeze
-    WHERE
-      ProbeSetFreeze.Id = %s
-    ''' % id)
-
-    row = cursor.fetchone()
-    return row[0]
-
-# dbNameToTypeId : Cursor -> String -> (String, String)
-# to convert a database name to a (type, id) pair
-def dbNameToTypeId(cursor, name):
-    types = ["ProbeSet", "Geno", "Publish"]
-    for type_ in types:
-        count = cursor.execute('''
-        SELECT
-          %sFreeze.Id
-        FROM
-          %sFreeze
-        WHERE
-          Name = "%s"
-        ''' % (type_, type_,  name))
-
-        if count != 0:
-            id = cursor.fetchone()[0]
-            return type_, id
-
-    return None, None
-
-# dbTypeIdToName : Cursor -> String -> String -> String
-# to convert a database (type,id) pair into a name
-def dbTypeIdToName(cursor, dbType, dbId):
-    cursor.execute('''
-    SELECT
-      %sFreeze.Name
-    FROM
-      %sFreeze
-    WHERE
-    Id = %s
-    ''' % (dbType, dbType, dbId))
-    
-    row = cursor.fetchone()
-    return row[0]
-
-#XZ, July 21, 2010: I add this function
-def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
-    cursor.execute('''
-    SELECT
-      SpeciesId
-    FROM
-      InbredSet, %sFreeze
-    WHERE
-    %sFreeze.Id = %s
-    and InbredSetId = InbredSet.Id
-    ''' % (dbType, dbType, dbId))
-
-    row = cursor.fetchone()
-    return row[0]
-
-
-# queryStrainCount : Cursor -> int
-# return the number of strains in the database
-def queryStrainCount(cursor):
-    cursor.execute('''
-    SELECT
-      Max(Strain.Id)
-    FROM
-      Strain
-    ''')
-    return (cursor.fetchone())[0]