aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/compareCorrelates
diff options
context:
space:
mode:
authorzsloan2015-03-27 20:28:51 +0000
committerzsloan2015-03-27 20:28:51 +0000
commitd0911a04958a04042da02a334ccc528dae79cc17 (patch)
tree3c48e2e937c1dbeaf00a5697c87ed251afa5c8f1 /web/webqtl/compareCorrelates
parenta840ad18e1fe3db98a359a159e9b9b72367a2839 (diff)
downloadgenenetwork2-d0911a04958a04042da02a334ccc528dae79cc17.tar.gz
Removed everything from 'web' directory except genofiles and renamed the directory to 'genotype_files'
Diffstat (limited to 'web/webqtl/compareCorrelates')
-rwxr-xr-xweb/webqtl/compareCorrelates/MultipleCorrelationPage.py108
-rwxr-xr-xweb/webqtl/compareCorrelates/__init__.py0
-rwxr-xr-xweb/webqtl/compareCorrelates/correlation.py359
-rwxr-xr-xweb/webqtl/compareCorrelates/htmlModule.py279
-rwxr-xr-xweb/webqtl/compareCorrelates/multitrait.py1121
-rwxr-xr-xweb/webqtl/compareCorrelates/trait.py1074
6 files changed, 0 insertions, 2941 deletions
diff --git a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
deleted file mode 100755
index 6a464ab6..00000000
--- a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from base.templatePage import templatePage
-from utility import webqtlUtil
-from base.webqtlTrait import webqtlTrait
-from base import webqtlConfig
-import multitrait
-
-# XZ, 09/09/2008: After adding several traits to collection, click "Compare Correlates" button,
-# XZ, 09/09/2008: This class will generate what you see.
-# XZ, 09/09/2008: This class just collect the input, then pass them to multitrait.py
-#########################################
-# Multiple Correlation Page
-#########################################
-class MultipleCorrelationPage(templatePage):
-
- def __init__(self,fd):
-
- templatePage.__init__(self, fd)
-
- if not self.openMysql():
- return
- if not fd.genotype:
- fd.readData()
-
- self.searchResult = fd.formdata.getvalue('searchResult')
- if not self.searchResult:
- heading = 'Compare Correlates'
- detail = ['You need to select at least two traits in order to generate correlation matrix.']
- self.error(heading=heading,detail=detail)
- print 'Content-type: text/html\n'
- self.write()
- return
- if type("1") == type(self.searchResult):
- self.searchResult = [self.searchResult]
-
- if self.searchResult:
- if len(self.searchResult) > 100:
- heading = 'Compare Correlates'
- detail = ['In order to display Compare Correlates properly, Do not select more than %d traits for Compare Correlates.' % 100]
- self.error(heading=heading,detail=detail)
- print 'Content-type: text/html\n'
- self.write()
- return
- else:
- pass
-
- traitList = []
- for item in self.searchResult:
- thisTrait = webqtlTrait(fullname=item, cursor=self.cursor)
- thisTrait.retrieveInfo()
- traitList.append(thisTrait)
- else:
- heading = 'Compare Correlates'
- detail = [HT.Font('Error : ',color='red'),HT.Font('Error occurs while retrieving data from database.',color='black')]
- self.error(heading=heading,detail=detail)
- print 'Content-type: text/html\n'
- self.write()
- return
-
-
- ##########
- filename= webqtlUtil.genRandStr("mult_")
- fp = open(webqtlConfig.IMGDIR+filename, 'wb')
- fp.write('%s\n' % fd.RISet)
- for thisTrait in traitList:
- fp.write("%s,%s,%s\n" % (thisTrait.db.type,thisTrait.db.id,thisTrait.name))
- fp.close()
- fd.formdata["filename"] = filename
-
- params = {"filename":filename, "targetDatabase":"",
- "threshold":0.5, "subsetSize":10,
- "correlation":"pearson", "subsetCount":10,
- "firstRun":"1"}
- results = []
- txtOutputFileName = ""
-
- self.dict['body'] = multitrait.TraitCorrelationPage(fd, params, self.cursor, traitList, results,
- fd.RISet,txtOutputFileName).dict['body']
- self.dict['title'] = 'Compare Correlates'
-
-
-
-
diff --git a/web/webqtl/compareCorrelates/__init__.py b/web/webqtl/compareCorrelates/__init__.py
deleted file mode 100755
index e69de29b..00000000
--- a/web/webqtl/compareCorrelates/__init__.py
+++ /dev/null
diff --git a/web/webqtl/compareCorrelates/correlation.py b/web/webqtl/compareCorrelates/correlation.py
deleted file mode 100755
index f2ea55b3..00000000
--- a/web/webqtl/compareCorrelates/correlation.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-# correlation.py
-# functions for computing correlations for traits
-#
-# Originally, this code was designed to compute Pearson product-moment
-# coefficents. The basic function calcPearson scans the strain data
-# for the two traits and drops data for a strain unless both traits have it.
-# If there are less than six strains left, we conclude that there's
-# insufficent data and drop the correlation.
-#
-# In addition, this code can compute Spearman rank-order coefficents using
-# the calcSpearman function.
-
-#Xiaodong changed the dependancy structure
-import numarray
-import numarray.ma as MA
-import time
-
-import trait
-
-# strainDataUnion : StrainData -> StrainData -> array, array
-def strainDataUnion(s1, s2):
- # build lists of values that both have
- # and make sure that both sets of values are in the same order
- s1p = []
- s2p = []
- sortedKeys = s1.keys()
- sortedKeys.sort()
- for s in sortedKeys:
- if s2.has_key(s):
- s1p.append(s1[s])
- s2p.append(s2[s])
-
- return (numarray.array(s1p, numarray.Float64),
- numarray.array(s2p, numarray.Float64))
-
-# calcCorrelationHelper : array -> array -> float
-def calcCorrelationHelper(s1p, s2p):
- # if the traits share less than six strains, then we don't
- # bother with the correlations
- if len(s1p) < 6:
- return 0.0
-
- # subtract by x-bar and y-bar elementwise
- #oldS1P = s1p.copy()
- #oldS2P = s2p.copy()
-
- s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64)
- s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64)
-
- # square for the variances
- s1p_2 = numarray.sum(s1p**2)
- s2p_2 = numarray.sum(s2p**2)
-
- try:
- corr = (numarray.sum(s1p*s2p)/
- numarray.sqrt(s1p_2 * s2p_2))
- except ZeroDivisionError:
- corr = 0.0
-
- return corr
-
-# calcSpearman : Trait -> Trait -> float
-def calcSpearman(trait1, trait2):
- s1p, s2p = strainDataUnion(trait1.strainData,
- trait2.strainData)
- s1p = rankArray(s1p)
- s2p = rankArray(s2p)
- return calcCorrelationHelper(s1p, s2p)
-
-# calcPearson : Trait -> Trait -> float
-def calcPearson(trait1, trait2):
- # build lists of values that both have
- # and make sure that both sets of values are in the same order
- s1p, s2p = strainDataUnion(trait1.strainData,
- trait2.strainData)
-
- return calcCorrelationHelper(s1p, s2p)
-
-# buildPearsonCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
-#def buildPearsonCorrelationMatrix(traits, sc):
-# dim = (len(traits), sc)
-# matrix = numarray.zeros(dim, MA.Float64)
-# testMatrix = numarray.zeros(dim, MA.Float64)
-
-# for i in range(len(traits)):
-# sd = traits[i].strainData
-# for key in sd.keys():
-# matrix[i,int(key) - 1] = sd[key]
-# testMatrix[i,int(key) - 1] = 1
-
-def buildPearsonCorrelationMatrix(traits, commonStrains):
- dim = (len(traits), len(commonStrains))
- matrix = numarray.zeros(dim, MA.Float64)
- testMatrix = numarray.zeros(dim, MA.Float64)
-
- for i in range(len(traits)):
- sd = traits[i].strainData
- keys = sd.keys()
- for j in range(0, len(commonStrains)):
- if keys.__contains__(commonStrains[j]):
- matrix[i,j] = sd[commonStrains[j]]
- testMatrix[i,j] = 1
-
- return matrix, testMatrix
-
-# buildSpearmanCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix
-def buildSpearmanCorrelationMatrix(traits, sc):
- dim = (len(traits), sc)
- matrix = numarray.zeros(dim, MA.Float64)
- testMatrix = numarray.zeros(dim, MA.Float64)
-
- def customCmp(a, b):
- return cmp(a[1], b[1])
-
- for i in range(len(traits)):
- # copy strain data to a temporary list and turn it into
- # (strain, expression) pairs
- sd = traits[i].strainData
- tempList = []
- for key in sd.keys():
- tempList.append((key, sd[key]))
-
- # sort the temporary list by expression
- tempList.sort(customCmp)
-
- for j in range(len(tempList)):
- # k is the strain id minus 1
- # 1-based strain id -> 0-based column index
- k = int(tempList[j][0]) - 1
-
- # j is the rank of the particular strain
- matrix[i,k] = j
-
- testMatrix[i,k] = 1
-
- return matrix, testMatrix
-
-def findLargestStrain(traits, sc):
- strainMaxes = []
- for i in range(len(traits)):
- keys = traits[i].strainData.keys()
- strainMaxes.append(max(keys))
-
- return max(strainMaxes)
-
-def findCommonStrains(traits1, traits2):
- commonStrains = []
- strains1 = []
- strains2 = []
-
- for trait in traits1:
- keys = trait.strainData.keys()
- for key in keys:
- if not strains1.__contains__(key):
- strains1.append(key)
-
- for trait in traits2:
- keys = trait.strainData.keys()
- for key in keys:
- if not strains2.__contains__(key):
- strains2.append(key)
-
- for strain in strains1:
- if strains2.__contains__(strain):
- commonStrains.append(strain)
-
- return commonStrains
-
-def calcPearsonMatrix(traits1, traits2, sc, strainThreshold=6,
- verbose = 0):
- return calcMatrixHelper(buildPearsonCorrelationMatrix,
- traits1, traits2, sc, strainThreshold,
- verbose)
-
-def calcProbeSetPearsonMatrix(cursor, freezeId, traits2, strainThreshold=6,
- verbose = 0):
-
- cursor.execute('select ProbeSetId from ProbeSetXRef where ProbeSetFreezeId = %s order by ProbeSetId' % freezeId)
- ProbeSetIds = cursor.fetchall()
-
- results = []
- i=0
- while i<len(ProbeSetIds):
- ProbeSetId1 = ProbeSetIds[i][0]
- if (i+4999) < len(ProbeSetIds):
- ProbeSetId2 = ProbeSetIds[i+4999][0]
- else:
- ProbeSetId2 = ProbeSetIds[len(ProbeSetIds)-1][0]
-
- traits1 = trait.queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2) # XZ,09/10/2008: add module name 'trait.'
- SubMatrix = calcMatrixHelper(buildPearsonCorrelationMatrix,
- traits1, traits2, 1000, strainThreshold,
- verbose)
- results.append(SubMatrix)
- i += 5000
-
- returnValue = numarray.zeros((len(ProbeSetIds), len(traits2)), MA.Float64)
- row = 0
- col = 0
- for SubMatrix in results:
- for i in range(0, len(SubMatrix)):
- for j in range(0, len(traits2)):
- returnValue[row,col] = SubMatrix[i,j]
- col += 1
- col = 0
- row +=1
-
- return returnValue
-
-
-
-# note: this code DOES NOT WORK, especially in cases where
-# there are missing observations (e.g. when comparing traits
-# from different probesetfreezes)
-def calcSpearmanMatrix(traits1, traits2, sc, strainThreshold=6,
- verbose=0):
- return calcMatrixHelper(buildSpearmanCorrelationMatrix,
- traits1, traits2, sc, strainThreshold,
- verbose)
-
-def calcMatrixHelper(builder, traits1, traits2, sc, strainThreshold,
- verbose):
-
- # intelligently figure out strain count
- step0 = time.time()
- #localSC = max(findLargestStrain(traits1, sc),
- # findLargestStrain(traits2, sc))
-
- commonStrains = findCommonStrains(traits1, traits2)
-
- buildStart = time.time()
- matrix1, test1 = builder(traits1, commonStrains)
- matrix2, test2 = builder(traits2, commonStrains)
- buildTime = time.time() - buildStart
-
- step1 = time.time()
-
- ns = numarray.innerproduct(test1, test2)
-
- # mask all ns less than strainThreshold so the correlation values
- # end up masked
- # ns is now a MaskedArray and so all ops involving ns will be
- # MaskedArrays
- ns = MA.masked_less(ns, strainThreshold, copy=0)
-
- # divide-by-zero errors are automatically masked
- #ns = -1.0/ns
-
- step2 = time.time()
-
- # see comment above to find out where this ridiculously cool
- # matrix algebra comes from
- xs = numarray.innerproduct(matrix1, test2)
- ys = numarray.innerproduct(test1, matrix2)
- xys = numarray.innerproduct(matrix1, matrix2)
-
- # use in-place operations to try to speed things up
- numarray.power(matrix1, 2, matrix1)
- numarray.power(matrix2, 2, matrix2)
-
- x2s = numarray.innerproduct(matrix1, test2)
- y2s = numarray.innerproduct(test1, matrix2)
-
- step3 = time.time()
-
- # parens below are very important
- # the instant we touch ns, arrays become masked and
- # computation is much, much slower
- top = ns*xys - (xs*ys)
- bottom1 = ns*x2s - (xs*xs)
- bottom2 = ns*y2s - (ys*ys)
- bottom = MA.sqrt(bottom1*bottom2)
-
- # mask catches floating point divide-by-zero problems here
- corrs = top / bottom
-
- step4 = time.time()
-
- # we define undefined correlations as zero even though there
- # is a mathematical distinction
- returnValue = MA.filled(corrs, 0.0)
-
- step5 = time.time()
-
- #print ("calcMatrixHelper: %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, total: %.2f s"
- # %(buildTime,
- # buildStart - step0,
- # step2 - step1,
- # step3 - step2,
- # step4 - step3,
- # step5 - step4,
- # step5 - step0))
-
- if verbose:
- print "Matrix 1:", matrix1
- print "Matrix 2:", matrix2
- print "Ns:", ns
- print "Xs", xs
- print "Ys", ys
- print "XYs:", xys
- print "Top:", top
- print "Bottom 1:", bottom1
- print "Bottom 2:", bottom2
- print "Bottom:", bottom
- print "Corrs:", corrsa
-
-
- return returnValue
-
-
-
-# rankArray: listof float -> listof float
-# to generate a companion list to alof with
-# the actual value of each element replaced by the
-# value's rank
-def rankArray(floatArray):
- # first we save the original index of each element
- tmpAlof = []
- returnArray = numarray.zeros(len(floatArray), numarray.Float64)
- i = 0
- for i in range(len(floatArray)):
- tmpAlof.append((i,floatArray[i]))
-
- # now we sort by the data value
- def customCmp(a,b): return cmp(a[1],b[1])
- tmpAlof.sort(customCmp)
-
- # finally we use the new rank data to populate the
- # return array
- for i in range(len(floatArray)):
- returnArray[tmpAlof[i][0]] = i+1
-
- return returnArray
diff --git a/web/webqtl/compareCorrelates/htmlModule.py b/web/webqtl/compareCorrelates/htmlModule.py
deleted file mode 100755
index ebba3b86..00000000
--- a/web/webqtl/compareCorrelates/htmlModule.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-import sys
-import string
-import os
-import MySQLdb
-import cgi
-
-from htmlgen import HTMLgen2 as HT
-
-from base import webqtlConfig
-
-
-# XZ 08/14/2008: When I tried to replace 'from webqtlConfig import *' with 'import webqtlConfig'
-# XZ 08/14/2008: I found some problems. I discussed with Hongqiang and the below is conclusion.
-# XZ 08/14/2008: The program uses webqtlConfig.DB_NAME, webqtlConfig.MYSQL_SERVER and so on
-# XZ 08/14/2008: without 'import webqtlConfig'. This program will not work.
-# XZ 08/14/2008: CONFIG_htmlpath doesn't exist in webqtlConfig.py
-# XZ 08/14/2008: Hongqian said this was done by Fan Zhang, and this program was not tested.
-# XZ 08/14/2008: So nobody realize these bugs.
-
-# XZ, 09/09/2008: This function is not called any where.
-# XZ, 09/09/2008: Actually, I don't think this function works.
-def genHeaderFooter(i=1,title='',basehref='',js1='',js2='',layer='',body=''):
- """
- generate footer and header HTML code
- default is header
- i = 0 is footer+header
- i = 1 is header
- i = 2 is footer
- """
- try:
- temp_file = CONFIG_htmlpath + 'beta-template.html'
- fp = open(temp_file, 'rb')
- template = fp.read()
- fp.close()
- template = template % (title,basehref,js1,js2,layer,body, "")
- header,footer = string.split(template,'<!-- split from Here -->')
- if i == 0:
- return header + footer
- elif i == 1:
- return header
- elif i == 2:
- return footer
- else:
- return ""
- except:
- if i == 0:
- return "header + footer"
- elif i == 1:
- return "header"
- elif i == 2:
- return "footer"
- else:
- return ""
-
-# XZ, 09/09/2008: This function is only used in multitrait.py where it is called with value assigned to db.
-# XZ, 09/09/2008: So the try-except block is not executed.
-# XZ, 09/09/2008: This explains why no error was generated even without 'import webqtlConfig'
-def genDatabaseMenu(db = None, public =1, RISetgp = 'BXD', selectname = 'database', selected = ""):
- """
- generate database Menu
- public = 0 : search3.html databases Menu
- public = 1 : search.html databases Menu
- """
- if not db:
- try:
- # import MySQLdb
- # con = MySQLdb.Connect(db='db_webqtl')
- # Modified by Fan Zhang
- con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD)
- db = con.cursor()
- except:
- return "Connect MySQL Server Error"
- else:
- pass
-
- databaseMenu = HT.Select(name=selectname)
- nmenu = 0
-
- # here's a hack: bxd and bxd300 can be correlated against each other
- # if either of those are the group, we put in special SQL that pulls both
- if RISetgp in ("BXD", "BXD300"):
- ibsNameQry = '(InbredSet.Name = "BXD" OR InbredSet.Name = "BXD300")'
- else:
- ibsNameQry = 'InbredSet.Name = "%s"' % RISetgp
-
- #Publish Database
- db.execute('''
- SelecT
- PublishFreeze.FullName,
- PublishFreeze.Name
- from
- PublishFreeze,
- InbredSet
- where
- PublishFreeze.InbredSetId = InbredSet.Id and
- %s
- ''' % ibsNameQry)
- for item in db.fetchall():
- databaseMenu.append(item)
- nmenu += 1
-
- #Genome Database
- db.execute('''
- SelecT
- GenoFreeze.FullName,
- GenoFreeze.Name
- from
- GenoFreeze,InbredSet
- where
- GenoFreeze.InbredSetId = InbredSet.Id and
- %s
- ''' % ibsNameQry)
- for item in db.fetchall():
- databaseMenu.append(item)
- nmenu += 1
-
- #Microarray Database
- db.execute('SelecT Id, Name from Tissue')
- for item in db.fetchall():
- TId, TName = item
- databaseMenuSub = HT.Optgroup(label = '%s ------' % TName)
- db.execute('''
- SelecT
- ProbeSetFreeze.FullName,
- ProbeSetFreeze.Name
- from
- ProbeSetFreeze,
- ProbeFreeze,
- InbredSet
- where
- ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
- ProbeFreeze.TissueId = %d and
- ProbeSetFreeze.public > %d and
- ProbeFreeze.InbredSetId = InbredSet.Id and
- %s
- order by
- ProbeSetFreeze.CreateTime desc,
- ProbeSetFreeze.AvgId
- ''' % (TId,public,ibsNameQry))
- for item2 in db.fetchall():
- databaseMenuSub.append(item2)
- nmenu += 1
- databaseMenu.append(databaseMenuSub)
-
- if nmenu:
- if selected:
- databaseMenu.selected.append(selected)
- return str(databaseMenu)
- else:
- return ''
-
-
-# XZ, 09/09/2008: This function is not called any where.
-# XZ, 09/09/2008: Actually, I don't think this function works.
-# XZ, 09/09/2008: There is no 'DataForm' file now. It should be webqtlForm.py
-def genRISample():
- import glob
- import reaper
- import random
- import math
- import webqtlUtil
- risets = filter(lambda X:X.find('F2')<0, map(os.path.basename, glob.glob(os.path.join(CONFIG_genodir, "*.geno"))))
- risets = map(lambda X:X.split('.')[0], risets)
- risets.remove("BayXSha")
- risets.sort()
- body = HT.Blockquote()
- NPerRow = 6
- for item in risets:
- values = []
- if item == 'AXBXA': item2='AXB/BXA'
- elif item == 'HXBBXH': item2='HXB/BXH'
- else: item2=item
- body.append(HT.Paragraph(item2, Class='subtitle'))
- tbl = HT.TableLite(Class="collap")
- dataset = reaper.Dataset()
- dataset.read(os.path.join(CONFIG_genodir, "%s.geno"%item))
- prgy = webqtlUtil.ParInfo[item] + list(dataset.prgy)
-
- mean = random.random()*100
- variance = random.random()*500
- variables = []
- while len(variables) < len(prgy):
- S = 2
- while (S>=1):
- U1= random.random()
- U2= random.random()
- V1= 2*U1-1.0
- V2= 2*U2-1.0
- S=V1*V1+V2*V2
- X= math.sqrt(-2 * math.log(S) / S) * V1
- Y= math.sqrt(-2 * math.log(S) / S) * V2
- variables.append(mean + math.sqrt(variance) * X)
- variables.append(mean + math.sqrt(variance) * Y)
-
- tempTR = HT.TR()
- for i, strain in enumerate(prgy):
- if i and i%NPerRow==0:
- tbl.append(tempTR)
- tempTR = HT.TR()
- if random.random() < 0.2:
- variable = 'X'
- else:
- variable = "%2.3f" % variables[i]
-
- tempTR.append(HT.TD(strain, Class="strains", width=80))
- tempTR.append(HT.TD(variable, Class="values", width=60))
- values.append(variable)
-
- for j in range(NPerRow-i%NPerRow-1):
- tempTR.append(HT.TD())
- tbl.append(tempTR)
- body.append(tbl)
- body.append(HT.Paragraph("Copy the following line to paste into the GeneNetwork entry box:"))
- body.append(HT.Code(string.join(values, " ")))
- body.append(HT.HR(width="90%"))
- return body
-
-if __name__ == "__main__":
- if os.environ.has_key('SCRIPT_FILENAME'):
- script_filename = os.environ['SCRIPT_FILENAME']
- else:
- script_filename = ''
- #Used as cgi script
- if script_filename and script_filename[-2:] == 'py':
- print 'Content-type: text/html\n'
- formdata = cgi.FieldStorage()
- sys.stderr = sys.stdout
- try:
- getID = string.lower(formdata.getvalue('get'))
- except:
- getID = ''
- #Used as command
- else:
- if len(sys.argv) >= 2:
- getID = string.lower(sys.argv[1])
- else:
- getID = ''
-
- if getID == 'headerfooter':
- print genHeaderFooter(0)
- elif getID == 'header':
- print genHeaderFooter(1)
- elif getID == 'footer':
- print genHeaderFooter(2)
- elif getID == 'databasemenu':
- print genDatabaseMenu(public=0)
- elif getID == 'datasample':
- print genRISample()
- else:
- print genHeaderFooter(0)
-else:
- pass
-
diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py
deleted file mode 100755
index 047620af..00000000
--- a/web/webqtl/compareCorrelates/multitrait.py
+++ /dev/null
@@ -1,1121 +0,0 @@
-# multitrait.py
-# a tool to analyze the correlations between several different traits and the traits
-# in a given dataset
-#
-# Parameters:
-# correlation -- either "pearson" or "spearman" depending on which ones we want to use
-#
-# filename -- an input file containing the traits to analyze
-#
-# progress -- if set, this parameter outputs a static progress page
-# and uses a META redirect to trigger the real computation
-#
-# targetDatabaseType:
-# one of "ProbeSet", "Publish", "Genotype" depending on the type of database
-# we will use for the analysis
-#
-# targetDatabaseId:
-# the id (*Freeze.Id in the database) of the particular database we will analyze
-#
-# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider
-#
-# firstRun -- either 0 or 1
-# whether to automatically pick reasonable defaults for the other three parameters
-#
-# outputType -- either "html" or "text"
-#
-# Author: Stephen Pitts
-# June 15, 2004
-
-#Xiaodong changed the dependancy structure
-
-import copy
-import sys
-import cgi
-import os
-import os.path
-import math
-import time
-import numarray
-import tempfile
-import string
-import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle
-
-from base import templatePage
-from base.webqtlTrait import webqtlTrait
-from utility import webqtlUtil
-from base import webqtlConfig
-import trait
-import correlation
-import htmlModule
-
-cgitb.enable()
-
-
-# where this program's data files are
-RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.'
-RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module
-
-tempfile.tempdir = RootDir
-tempfile.template = "multitrait"
-
-# MultitraitException: used if something goes wrong
-# maybe in the future we should make exceptions more granular
-class MultitraitException(Exception):
- def __init__(self, message):
- self.message = message
-
- def __repr__(self):
- return "MultitraitException: %s" % self.message
-
-# buildParamDict: Cursor -> ParamDict
-# to process and validate CGI arguments
-# see the comment at the top of this file for valid cgi
-# parameters
-def buildParamDict(cursor, fd):
- params = {}
- fs = fd.formdata #cgi.FieldStorage()
- params["progress"] = fs.getfirst("progress", "0")
- params["filename"] = fs.getfirst("filename", "")
- if params["filename"] == "":
- raise MultitraitException("Required parameter filename missing.")
-
- params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04")
- params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0)
- params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5)
- params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10)
-
- if params["subsetSize"] < -1:
- params["subsetSize"] = -1
-
- params["correlation"] = fs.getfirst("correlation", "pearson")
- params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10)
-
- if params["subsetCount"] < -1:
- params["subsetCount"] = -1
-
- #params["outputType"] = fs.getfirst("outputType", "html")
-
- #if params["outputType"] not in ("html", "text"):
- # params["outputType"] = "html"
-
- if params["correlation"] not in ("pearson", "spearman"):
- params["correlation"] = "pearson"
-
- params["correlationName"] = params["correlation"].capitalize()
-
- # one of two cases:
- # 1) We have just come from a submit, so there are a bunch of display*
- # but no displaySets. Thus, the code down there converts the display*
- # to displaySets so the GET request doesn't get too long
- # 2) We have just been redirected from a progress page which already has
- # a converted displaySets for us.
-
- displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0)
-
- if displaySets == 0:
- for key in fs.keys():
- if key[:7] == "display":
- #print "Hit display key %s<br>" % key
- try:
- whichSet = int(key[7:])
-
- # prevent malicious attacks
- whichSet = min(whichSet, 512)
- displaySets += pow(2, whichSet)
-
- except ValueError: pass
-
- params["displaySets"] = displaySets
- #print "In the beginning, display sets was %s: %s<br>" % (displaySets,
- # str(binaryDecompose(displaySets)))
-
- # if we are just gonna display a progress page, then there's no
- # reason to look up detailed database information
- #if params["progress"] == "1":
- # return params
-
- a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name
- params["targetDatabaseType"] = a
- params["targetDatabaseId"] = b
- params["targetDatabaseName"] = params["targetDatabase"]
-
- return params
-
-# readInputFile: DB cursor -> string -> string, (arrayof Trait)
-def readInputFile(cursor, filename):
- """
- To read an input file with n lines in the following format
- <databasetype>,<databaseid>,<traitname>
- and retrieve and populate traits with appropriate data
- from the database
-
- Also, for our purposes. we store the database type and
- database id in fields attached to the trait instances. We use
- this information to generate Javascript popups with trait
- information.
-
- In addition, we read the strain of mice that the traits are
- from so we can only show those databases to correlate against.
- """
- handle = open(filename)
- line = handle.readline()
- inbredSetName = line.strip()
- line = handle.readline()
- traits = []
-
-# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait'
- while line != "":
- line = line.strip()
- dbType, dbId, tName = line.split(",")
-
- if dbType == "ProbeSet":
- oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
- elif dbType == "Geno":
- speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId)
- oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
- elif dbType == "Publish":
- oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
- elif dbType == "Temp":
- oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = "Temp"
-
- oneTrait.populateStrainData(cursor)
- traits.append(oneTrait)
-
- line = handle.readline()
-
- return inbredSetName, traits
-
-# loadDatabase: Cursor -> ParamDict -> arrayof Trait
-def loadDatabase(cursor, p):
- """
- To load a set of traits as specified by the
- targetDatabaseId
- and targetDatabaseType parameters
-
- Cursor should be a fastCursor from the webqtl library (i.e.
- a MySQLdb SSCursor).
-
- Calling populateStrainData 20,000 or so times on a ProbeSet
- is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits
- in the trait module that uses a join to get all of the rows in
- bulk, store the resultset on the server, and do all sorts of nice buffering.
- It's about two or three times faster.
- """
- if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name
- dbTraits = trait.queryPopulatedProbeSetTraits(cursor,
- p["targetDatabaseId"])
- elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name
- dbTraits = trait.queryPublishTraits(cursor,
- p["targetDatabaseId"])
- psd = trait.PublishTrait.populateStrainData
- elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name
- dbTraits = trait.queryGenotypeTraits(cursor,
- p["targetDatabaseId"])
- psd = trait.GenotypeTrait.populateStrainData
- else:
- print "Unknown target database type %s" % p["targetDatabaseType"]
-
- if p["targetDatabaseType"] != "ProbeSet":
- map(psd, dbTraits, [cursor]*len(dbTraits))
-
- return dbTraits
-
-def runProbeSetCorrelations(cursor, p, traits):
- """
- To run the correlations between the traits and the database.
- This function computes a correlation coefficent between each
- trait and every entry in the database, and partitions the database
- into a disjoint array of arrays which it returns.
-
- The length of the return array is 2^n, where n is the length of
- the trait array. Which constitutent element a of the return array
- a given trait ends up in is determined by the following formula
- i = i_02^0 + ... + i_(n-1)2^(n-1)
- where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
-
- Since most of the several thousand database traits will end up
- with i=0, we don't return them, so the first element of the
- return array will be empty.
-
- A particular element of subarray j of the return array contains
- a 2-tuple (trait,kvalues). The variable trait is obviously the
- particular database trait that matches the user traits l_1, ..., l_m
- to which subarray j corresponds. kvalues is a list of the correlation
- values linking trait to l_1, ..., l_m, so the length of kvalues is
- the number of 1s in the binary representation of j (there must be
- a better way to describe this length).
-
- The return array is an array of 2-tuples. The first element of
- each tuple is the index of the particular subarray, and the second
- element is the subarray itself. The array is sorted in descending
- order by the number of 1's in the binary representation of the
- index so the first few subarrays are the ones that correspond to
- the largest sets. Each subarray is then sorted by the average of
- the magnitude of the individual correlation values.
- """
-
- kMin = p["threshold"]
- traitArrays = {}
-
- # TODO: Add Spearman support
- freezeId = p["targetDatabaseId"]
- if p["correlation"] == "pearson":
- correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name
- else:
- correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name
-
- # now we test all of the correlations in bulk
- test = numarray.absolute(correlations)
- test = numarray.greater_equal(test, kMin)
- test = test.astype(numarray.Int8)
- #print test
-
- db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name
- for i in range(len(db)):
- cIndex = 0
- prods = []
- for j in range(len(traits)):
- if test[i,j] == 1:
- cIndex += pow(2, j)
- prods.append(correlations[i,j])
- if cIndex != 0:
- if not traitArrays.has_key(cIndex):
- traitArrays[cIndex] = []
-
- traitArrays[cIndex].append((db[i], prods))
-
-
- # sort each inner list of traitArrays
- # so the matched traits appear in descending order by the
- # average magnitude of the correlation
- def customCmp(traitPair, traitPair2):
- magAvg1 = numarray.average(map(abs, traitPair[1]))
- magAvg2 = numarray.average(map(abs, traitPair2[1]))
-
- # invert the sign to get descending order
- return -cmp(magAvg1, magAvg2)
-
- for traitArray in traitArrays.values():
- traitArray.sort(customCmp)
-
- # sort the outer list of traitArrays
- traitArrays2 = []
- i = 0
- for key in traitArrays.keys():
- a = traitArrays[key]
- if len(a) > 0:
- traitArrays2.append((key,a,len(binaryDecompose(key)),
- len(a)))
-
- # we sort by the number of 1's in the binary output
- # and then by the size of the list, both in descending order
- def customCmp2(aL,bL):
- a = -cmp(aL[2], bL[2])
- if a == 0:
- return -cmp(aL[3], bL[3])
- else:
- return a
-
- traitArrays2.sort(customCmp2)
-
- return traitArrays2
-
-def runCorrelations(p, strainCount, traits, db):
- """
- To run the correlations between the traits and the database.
- This function computes a correlation coefficent between each
- trait and every entry in the database, and partitions the database
- into a disjoint array of arrays which it returns.
-
- The length of the return array is 2^n, where n is the length of
- the trait array. Which constitutent element a of the return array
- a given trait ends up in is determined by the following formula
- i = i_02^0 + ... + i_(n-1)2^(n-1)
- where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
-
- Since most of the several thousand database traits will end up
- with i=0, we don't return them, so the first element of the
- return array will be empty.
-
- A particular element of subarray j of the return array contains
- a 2-tuple (trait,kvalues). The variable trait is obviously the
- particular database trait that matches the user traits l_1, ..., l_m
- to which subarray j corresponds. kvalues is a list of the correlation
- values linking trait to l_1, ..., l_m, so the length of kvalues is
- the number of 1s in the binary representation of j (there must be
- a better way to describe this length).
-
- The return array is an array of 2-tuples. The first element of
- each tuple is the index of the particular subarray, and the second
- element is the subarray itself. The array is sorted in descending
- order by the number of 1's in the binary representation of the
- index so the first few subarrays are the ones that correspond to
- the largest sets. Each subarray is then sorted by the average of
- the magnitude of the individual correlation values.
- """
- kMin = p["threshold"]
- traitArrays = {}
-
- # TODO: Add Spearman support
- if p["correlation"] == "pearson":
- correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
- else:
- correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
-
- # now we test all of the correlations in bulk
- test = numarray.absolute(correlations)
- test = numarray.greater_equal(test, kMin)
- test = test.astype(numarray.Int8)
- #print test
-
-
- for i in range(len(db)):
- cIndex = 0
- prods = []
- for j in range(len(traits)):
- if test[i,j] == 1:
- cIndex += pow(2, j)
- prods.append(correlations[i,j])
- if cIndex != 0:
- if not traitArrays.has_key(cIndex):
- traitArrays[cIndex] = []
-
- traitArrays[cIndex].append((db[i], prods))
-
- # sort each inner list of traitArrays
- # so the matched traits appear in descending order by the
- # average magnitude of the correlation
- def customCmp(traitPair, traitPair2):
- magAvg1 = numarray.average(map(abs, traitPair[1]))
- magAvg2 = numarray.average(map(abs, traitPair2[1]))
-
- # invert the sign to get descending order
- return -cmp(magAvg1, magAvg2)
-
- for traitArray in traitArrays.values():
- traitArray.sort(customCmp)
-
- # sort the outer list of traitArrays
- traitArrays2 = []
- i = 0
- for key in traitArrays.keys():
- a = traitArrays[key]
- if len(a) > 0:
- traitArrays2.append((key,a,len(binaryDecompose(key)),
- len(a)))
-
- # we sort by the number of 1's in the binary output
- # and then by the size of the list, both in descending order
- def customCmp2(aL,bL):
- a = -cmp(aL[2], bL[2])
- if a == 0:
- return -cmp(aL[3], bL[3])
- else:
- return a
-
- traitArrays2.sort(customCmp2)
-
- return traitArrays2
-
-
-# XZ, 09/09/2008: In multiple trait correlation result page,
-# XZ, 09/09/2008: click "Download a text version of the above results in CSV format"
-
-# TraitCorrelationText: a class to display trait correlations
-# as textual output
-class TraitCorrelationText:
- # build a text shell to describe the given trait correlations
- # this method sets self.output; use str(self) to actually
- # get the text page
- #
- # traits is a list of traits and traitArray is a
- # list of 3-tuples: index, traits', garbage
- # where index is a binary-encoded description of which subset of
- # traits the list traits' matches
- #
- # traits' is a list of 3-tuples as well: trait, correlations, garbage
- # where trait is a particular trait and correlations is a list of float
- # correlations (matching traits above)
- def __init__(self, p, traits, traitArray):
- output = "Correlation Comparison\n"
- output += "from WebQTL and the University of Tennessee Health Science Center\n"
- output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n"
-
- output += self.showOptionPanel(p)
- output += self.showSelectedTraits(traits)
- output += self.showSummaryCorrelationResults(p, traits, traitArray)
- output += self.showDetailedCorrelationResults(p, traits, traitArray)
-
- self.output = output
-
- # showOptionPanel: ParamDict -> string
- # to display the options used to run this correlation
- def showOptionPanel(self, params):
- output = "Correlation Comparison Options:\n"
- output += "Target database,%s\n" % params["targetDatabase"]
- output += "Correlation type,%s\n" % params["correlationName"]
- output += "Threshold,%f\n" % params["threshold"]
- #output += "Subsets to Show,%d\n" % params["subsetCount"]
- #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"]
- return output
-
- # showSelectedTraits: (listof Trait) -> string
- # to display the traits compared with the database
- # note: we can't use tabular output because the traits could be of
- # different types and produce different fields
- def showSelectedTraits(self, traits):
- output = "Selected Traits:\n"
- for trait in traits:
- output += '"' + trait.longName() + '"' + "\n"
- output += "\n"
- return output
-
- # showSummaryCorrelationResults: ParamDict -> (listof Trait) ->
- # TraitArray -> string
- # see comment for __init__ for a description of TraitArray
- #
- # to show a summary (sets and sizes) of the correlation results
- # as well as an X to indicate whether they will be included
- # in the detailed output
- def showSummaryCorrelationResults(self, p, traits, traitArray):
- output = "Correlation Comparison Summary:\n"
-
- #if p["subsetCount"] != -1:
- # ourSubsetCount = min(p["subsetCount"], len(traitArray))
- #else:
-
- ourSubsetCount = len(traitArray)
-
- displayDecomposition = binaryDecompose(p["displaySets"])
- for j in range(ourSubsetCount):
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
- if len(traitSubarray) == 0:
- continue
-
- targetTraits = decomposeIndex(traits, i)
- traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name
- ", ")
- if j in displayDecomposition:
- checked = "X"
- else:
- checked = ""
-
- output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray))
-
- output += "\n"
- return output
-
- # showDetailedCorrelationResults: ParamDict -> (listof Trait) ->
- # TraitArray -> string
- #
- # to show a detailed list of the correlation results; that is,
- # to completely enumerate each subset of traitArray using the
- # filtering parameters in p
- def showDetailedCorrelationResults(self, p, traits, traitArray):
- output = "Correlation Comparison Details:\n"
- displayDecomposition = binaryDecompose(p["displaySets"])
- displayDecomposition.sort()
-
- def formatCorr(c):
- return "%.4f" % c
-
- for j in displayDecomposition:
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
-
- if len(traitSubarray) == 0:
- continue
-
- targetTraits = decomposeIndex(traits, i)
- extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name
- traitDesc = string.join(extraColumnHeaders, ", ")
-
- #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
- # traitDesc += ",(showing top %s of %s)" % (p["subsetSize"],
- # len(traitSubarray))
- # traitSubarray = traitSubarray[0:p["subsetSize"]]
-
- output += "%s\n" % traitDesc
- output += traitSubarray[0][0].csvHeader([], extraColumnHeaders)
- output += "\n"
- for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait'
- corr = map(formatCorr, corr)
- output += oneTrait.csvRow([], corr) + "\n"
-
- output += "\n"
-
- return output
-
- # __str__ : string
- # to return self.output as the string representation of this page
- # self.output is built in __init__
- def __str__(self):
- return self.output
-
-# TraitCorrelationPage: a class to display trait correlations
-# for now this is just one HTML file, so we don't even write it
-# to a temporary file somewhere
-class TraitCorrelationPage(templatePage.templatePage):
- """
- Using the templatePage class, we build an HTML shell for
- the core data here: the trait correlation lists.
-
- The way templatePage works, we build the page in pieces in
- the __init__ method and later on use the inherited write
- method to render the page.
- """
- def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename):
-
- templatePage.templatePage.__init__(self, fd)
-
- self.dict["title"] = "Correlation Comparison"
- self.dict["basehref"] = ""
- # NL: deleted js1 content part, since it has not been used in this project
- self.dict["js1"] = ""
- self.dict["js2"] = ""
-
- body = "<td><h1>Correlation Comparison</h1>"
- body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime())
- body += """
- <p>The correlation comparison tool identifies intersecting sets of traits that are
-correlated with your selections at a specified threshold. A correlation comparison
-involves the following steps:</p>
-<ol>
-<li><p>
-<b>Correlate:</b>
-Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation
-Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and
-<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation
-Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to
-Show</i>, and <i>Traits to Show per Subset</i>.
-</p></li>
-
-<li><p>
-<b>Add to Collection:</b>
-You can use the check boxes in the <i>Correlation
-Comparison Details</i> panel and the buttons at the bottom of the page to add these
-results to your selections page for further analysis in WebQTL.
-</p></li>
-
-<li><p>
-<b>Filter:</b>
-Using the <i>Correlation Comparison Summary</i> panel, choose which
-subsets you would like to display for export. Note that if you change the
-parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter.
-</p></li>
-
-<li><p>
-<b>Export:</b>
-Once you are satisfied with your report, use the export link at
-the bottom of the page to save the report as a comma-separated (CSV) text file
-which you can then import into Excel or another tool. Note: the exported report
-will list all subsets in the summary view and only those traits in the subsets
-you have selected in the Filter step.
-</p></li>
-</ol>
-"""
-
-# body += """
-# <p>The correlation
-# comparison tool identifies the intersecting sets of traits that are
-# correlated with your selections. A correlation comparison involves
-# the following steps:</p>
-# <ol>
-# <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>.
-# For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i>
-# at their default values of 10.</p></li>
-# <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel,
-# adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to
-# Show per Subset</i> until you have a reasonable number of traits.</p></li>
-# <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would
-# like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will
-# loose the filter you have selected.</p></li>
-# <li><p><b>Export:</b> Once you are satisfied with your report, use the export
-# link at the bottom of the page to save the report as a comma-separated (CSV) text file which
-# you can then import into Excel or another tool. Note: the exported report
-# will show all subsets in the summary view and all traits in each subset you have
-# selected in the Filter step.
-# <li><p><b>Shopping Cart:</b> In addition, you can use the
-# check boxes in the <i>Correlation Comparison Details</i> panel and the
-# buttons at the bottom of the page to add the traits you have found to the shopping cart.</p>
-# </li>
-# </ol>
-# """
-
- body += self.showOptionPanel(p, cursor, inbredSetName)
- body += self.showSelectedTraits(traits, p, inbredSetName)
-
- if p["firstRun"] == 0:
- body += self.showCorrelationResults(p, inbredSetName, traits, traitArray)
-
- exportParams = copy.copy(p)
- exportParams["outputType"] = "text"
-
- body += ('''
- <h2>Export these results</h2>
- <p>
- <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from
- the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details
- view shows all traits in the subsets that you have selected.
- </p>
- '''
- % txtFilename)
-
-
-
- body += "</td>"
- self.dict["body"] = body
-
-
- # showOptionPanel: ParamDict -> Cursor -> String -> String
- # to build an option panel for the multitrait correlation
- # we expect the database list to be a list of 2-tuples
- # the first element of each tuple is the short name
- # and the second element of the tuple is the long name
- def showOptionPanel(self, params, cursor, inbredSetName):
- output = '''
- <h2>Correlation Options</h2>
- <FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data">
- <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
- <input type="hidden" name="filename" value="%s">
- <input type="hidden" name="firstRun" value="0">
- <input type="hidden" name="progress" value="1">
- <table>
- <tr>
- <td>Target Database:</td><td>
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"])
-
- output += htmlModule.genDatabaseMenu(db = cursor,
- public=0,
- RISetgp = inbredSetName,
- selectname="targetDatabase",
- selected=params["targetDatabase"])
- output += "</td></tr>"
-
- corrSelected = ["",""]
-
- if params["correlation"] == "pearson":
- corrSelected[0] = "SELECTED"
- else:
- corrSelected[1] = "SELECTED"
-
- output += ('''
- <tr>
- <td>Correlation Method:</td>
- <td><select name="correlation">
- <option value="pearson" %s>Pearson</option>
- <!--<option value="spearman" %s>Spearman</option>-->
- </select></td></tr>
- ''' % (corrSelected[0], corrSelected[1]))
- output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>'
- % params["threshold"])
- output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>'
- % params["subsetCount"])
- output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>'
- % params["subsetSize"])
-
- # a cosmetic change to hopefully make this form a bit easier to use
-# if params["firstRun"] == 1:
-# applyName = "Correlate"
-# else:
-# applyName = "Refine Correlation"
-
- output += '''
- <tr>
- <td colspan="2"><input class="button" type="submit" value="Correlate" /></td>
- </tr>
- </table>
- </form>
- '''
-
- return output
-
- # showSelectedTraits: listof Trait -> string
- # to show a list of the selected traits
- def showSelectedTraits(self, traits, p, inbredSetName):
- output = '''
- <form action="%s%s" method="post" name="showDatabase">
- <INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase">
-
- <input type="hidden" name="incparentsf1" value="ON">
- <input type="hidden" name="ShowStrains" value="ON">
- <input type="hidden" name="ShowLine" value="ON">
- <input type="hidden" name="database" value="">
- <input type="hidden" name="ProbeSetID" value="">
- <input type="hidden" name="RISet" value="%s">
- <input type="hidden" name="CellID" value="">
- <input type="hidden" name="database2" value="">
- <input type="hidden" name="rankOrder" value="">
- <input type="hidden" name="ProbeSetID2" value="">
- <input type="hidden" name="CellID2" value="">
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName)
-
- output += "<h2>Selected Traits</h2>"
- output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>'
- flip = 1
- colors = ["FFFFFF", "cccccc"]
-
- for trait in traits:
- # we take advantage of the secret dbName attribute that
- # loadDatabase fills in
- descriptionString = trait.genHTML()
- if trait.db.type == 'Publish' and trait.confidential:
- descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users)
- output += '''
- <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td>
- <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td>
- </tr>
- ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString)
- flip = not flip
-
- output += "</table></form>"
- return output
-
-
- # showSummaryCorrelationResults
- # show just the number of traits in each subarray
- def showSummaryCorrelationResults(self, p, traits, traitArray):
- output = '''
- <form action="%s%s" method="post">
- <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
- <input type="hidden" name="filename" value="%s">
- <input type="hidden" name="firstRun" value="0">
- <input type="hidden" name="progress" value="1">
- <input type="hidden" name="correlation" value="%s">
- <input type="hidden" name="threshold" value="%s">
- <input type="hidden" name="rankOrder" value="">
- <input type="hidden" name="subsetCount" value="%s">
- <input type="hidden" name="subsetSize" value="%s">
- <input type="hidden" name="targetDatabase" value="%s">
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"],
- p["subsetCount"], p["subsetSize"], p["targetDatabase"])
-
- output += '''
- <table cellpadding="2" cellspacing="0">
- <tr>
- <th>Trait Subsets</th>
- <th colspan="2">Intersecting Set Size</th>
- </tr>
- '''
- # figure out a scale for the summary graph
- # for now we set max = 300 pixels wide
- if p["subsetCount"] != -1:
- ourSubsetCount = min(p["subsetCount"], len(traitArray))
- else:
- ourSubsetCount = len(traitArray)
-
- screenWidth = 600
- lengths = []
- for j in range(ourSubsetCount):
- lengths.append(len(traitArray[j][1]))
- maxLength = max(lengths)
-
- displayDecomposition = binaryDecompose(p["displaySets"])
- flip = 0
- colors = ["FFFFFF", "cccccc"]
-
- for j in range(ourSubsetCount):
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
-
- if len(traitSubarray) == 0:
- continue
-
- targetTraits = decomposeIndex(traits, i)
- traitDesc = string.join(map(webqtlTrait.displayName, targetTraits),
- ", ")
-
- if j in displayDecomposition:
- checked = "CHECKED"
- else:
- checked = ""
-
- barWidth = (len(traitSubarray) * screenWidth) / maxLength
- output += ('''<tr bgcolor="%s">
- <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td>
- <td>%s</td>
- <td><img src="/images/blue.png" width="%d" height="25"></td></tr>'''
- % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth))
- flip = not flip
-
- output += '''
- <tr>
- <td colspan="3">
- <input class="button" type="submit" value="Filter" /></td>
- </tr>
- </table></form>
- '''
- return output
-
- # showDetailedCorrelationResults
- # actually show the traits in each subarray
- def showDetailedCorrelationResults(self, p, inbredSetName, traits,
- traitArray):
- output = "<h2>Correlation Comparison Details</h2>"
-
- # the hidden form below powers all of the JavaScript links,
- # the shopping cart links, and the correlation plot links
-
- output += '''
- <form action="%s%s" method="post">
- <input type="hidden" name="database" value="%s">
- <input type="hidden" name="FormID" value="showDatabase">
- <input type="hidden" name="traitfile" value="">
- <input type="hidden" name="incparentsf1" value="ON">
- <input type="hidden" name="ShowStrains" value="ON">
- <input type="hidden" name="ProbeSetID" value="">
- <input type="hidden" name="ShowLine" value="ON">
- <input type="hidden" name="RISet" value="%s">
- <input type="hidden" name="CellID" value="">
- <input type="hidden" name="database2" value="">
- <input type="hidden" name="rankOrder" value="">
- <input type="hidden" name="ProbeSetID2" value="">
- <input type="hidden" name="CellID2" value="">
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName)
-
-
- displayDecomposition = binaryDecompose(p["displaySets"])
-
- # necessary to ensure that subset order is the same in the
- # summary and the detailed view
- displayDecomposition.sort()
-
- # here's a trick: the first trait we show must have the widest row because it correlates
- # with the largest set of input traits
- firstSubset = traitArray[displayDecomposition[0]]
- firstTrait = firstSubset[1][0][0]
- extraColumnCount = firstSubset[2]
- totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount
-
- output += "<table cellpadding=2 cellspacing=0>\n"
- for j in displayDecomposition:
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
-
- # we don't display trait combinations for which there are
- # no correlations
- if len(traitSubarray) == 0:
- continue
-
- # generate a description of the traits that this particular array
- # matches highly
- targetTraits = decomposeIndex(traits, i)
- extraColumnHeaders = map(webqtlTrait.displayName, targetTraits)
- traitDesc = string.join(extraColumnHeaders, ", ")
-
- # massage extraColumnHeaders so that they can be wrapped
- for i in range(len(extraColumnHeaders)):
- ech = extraColumnHeaders[i]
- ech = ech.replace("-", " ")
- ech = ech.replace("_", " ")
- extraColumnHeaders[i] = ech
-
- # pad extraColumnHeaders if we have less columns than the max
- paddingNeeded = extraColumnCount - len(extraColumnHeaders)
- if paddingNeeded > 0:
- extraColumnHeaders.extend(paddingNeeded * ["&nbsp;"])
-
- # we limit the output to the top ones
- if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
- traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray))
- traitSubarray = traitSubarray[0:p["subsetSize"]]
-
- # combine that description with actual database traits themselves
- # and the correlation values
- output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc)
- #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc
-
- # we assume that every trait in traitSubarray is the same type
- # of trait
- flip = 0
- colors = ["FFFFFF", "cccccc"]
-
- output += traitSubarray[0][0].tableRowHeader(["&nbsp;"], extraColumnHeaders, colors[0])
-
- for traitPair in traitSubarray:
- corr = []
- traitPair[0].dbName = p['targetDatabase']
- trait = traitPair[0]
-
- for i in range(len(traitPair[1])):
- corrValue = traitPair[1][i]
- corrPlotLink = ('''
- <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a>
- ''' % (p["targetDatabaseName"], trait.name, targetTraits[i].db.name, targetTraits[i].name, "0", corrValue))
- corr.append(corrPlotLink)
-
- corr.extend(paddingNeeded * ["&nbsp;"])
-
- checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />'
- % (p["targetDatabaseName"], trait.name))
- flip = not flip
- output += traitPair[0].tableRow([checkbox], corr, colors[flip])
-
- #output += "</table>"
- i += 1
- output += '<tr><td colspan="%d">&nbsp;</td></tr>' % totalColumnCount
-
- output += "</table>"
-
- # print form buttons if there were checkboxes above
- output += '''
- <div align="left">
- <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection"
- onClick="addRmvSelection('%s',this.form, 'addToSelection');">
- <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);">
- <INPUT TYPE="reset" CLASS="button" VALUE="Select None">
- </div>
- </form>
- ''' % inbredSetName
-
- return output
-
- # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String
- # to build an output display for the multitrait correlation results
- def showCorrelationResults(self, p, inbredSetName, traits, traitArray):
- output = '''
- <h2>Correlation Comparison Summary</h2>
- <p>
- %s correlations were computed for each of the selected traits with each trait in
- the <a href="/dbdoc/%s.html">%s</a> database.
- Subsets of database traits for which correlations were higher than %s
- or lower than -%s are shown below based on which traits
- they correlated highly with. The top %s subsets, ranked by the number of input traits that
- they correspond with, are shown, and at most %s traits in each subset are shown. </p>
- ''' % (p["correlationName"],
- p["targetDatabase"], p["targetDatabaseName"],
- p["threshold"], p["threshold"], p["subsetCount"],
- p["subsetSize"])
-
-
- totalTraits = 0
- for j in range(len(traitArray)):
- totalTraits += len(traitArray[j][1])
-
- if totalTraits == 0:
- output += """
- <p>
- No shared corrrelates were found with your given traits at this
- threshold. You may wish to lower the correlation threshold or choose different traits.
- </p>
- """
- else:
- output += self.showSummaryCorrelationResults(p, traits, traitArray)
- output += self.showDetailedCorrelationResults(p, inbredSetName,
- traits, traitArray)
-
- return output
-
-# decomposeIndex: (listof Trait) -> Int ->
-# (listof Trait)
-# to use i to partition T into a sublist
-# each bit in i controls the inclusion or exclusion of a trait
-def decomposeIndex(traits, i):
- targetTraits = []
-
- for j in range(len(traits)):
- # look, mom, a bitwise and!
- # expression below tests whether the jth bit is
- # set in i
- # see runCorrelation for how we decompose the
- # array index
- if (i & pow(2,j)) == pow(2,j):
- targetTraits.append(traits[j])
-
- return targetTraits
-
-# binaryDecompose: int -> (listof int)
-# to decompose a number into its constituent powers of 2
-# returns a list of the exponents a_1...a_n such that the input m
-# is m = 2^a_1 + ... + 2^a_n
-def binaryDecompose(n):
- if n == 0:
- return []
-
- # we start with the highest power of 2 <= this number
- # and work our way down, subtracting powers of 2
- start = long(math.floor(math.log(n)/math.log(2)))
-
- exponents = []
- while start >= 0:
- if n >= long(math.pow(2, start)):
- n -= math.pow(2,start)
- exponents.append(start)
- start -= 1
- return exponents
-
-# powerOf : int -> int -> boolean
-# to determine whether m is a power of n;
-# more precisely, whether there exists z in Z s.t.
-# n^z = m
-def powerOf(m, n):
- trialZ = math.floor(math.log(m)/math.log(n))
- return pow(n,trialZ) == m
-
-
-class compCorrPage(templatePage.templatePage):
- def __init__(self,fd):
- templatePage.templatePage.__init__(self, fd)
-
- if not self.openMysql():
- return
-
- cursor = self.cursor
- params = buildParamDict(cursor, fd)
-
- # get the input data
- inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"])
-
- # and what we are comparing the data to
- dbTraits = []
- if params["targetDatabaseType"] != "ProbeSet":
- dbTraits = loadDatabase(cursor, params)
-
-
- # run the comparison itself
- strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name
- if params["targetDatabaseType"] == "ProbeSet":
- results = runProbeSetCorrelations(cursor, params, traits)
- else:
- results = runCorrelations(params, strainCount, traits, dbTraits)
-
- # try to be smart about what to output:
- # we want to limit the number of traits shown, at least initially
- # and since traitArray is already sorted with most interesting
- # subsets first, we simply pick up the first 500 or so traits
- # that we find
- if params["displaySets"] == 0:
- selectedTraits = 0
- for j in range(len(results)):
- #print "Scanning subarray %d" % j
- if selectedTraits <= 200:
- params["displaySets"] += pow(2, j)
- selectedTraits += len(results[j][1])
-
- traitList = []
- for oneTrait in traits: # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
- traitName = oneTrait.dbName+'::'+oneTrait.name # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
- aTrait = webqtlTrait(cursor=self.cursor, fullname=traitName)
- traitList.append(aTrait)
-
- # and generate some output
- txtOutputFilename = tempfile.mktemp()
- txtOutputHandle = open(txtOutputFilename, "w")
- txtOutput = TraitCorrelationText(params, traits, results)
- txtOutputHandle.write(str(txtOutput))
- txtOutputHandle.close()
- txtOutputFilename = os.path.split(txtOutputFilename)[1]
-
- self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList,
- results, inbredSetName,
- txtOutputFilename).dict['body']
diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py
deleted file mode 100755
index ff1f8119..00000000
--- a/web/webqtl/compareCorrelates/trait.py
+++ /dev/null
@@ -1,1074 +0,0 @@
-#Trait.py
-#
-#--Individual functions are already annotated, more or less.
-#
-#Classes:
-#RawPoint
-#Trait
-#ProbeSetTrait
-#GenotypeTrait
-#PublishTrait
-#TempTrait
-#-KA
-
-# trait.py: a data structure to represent a trait
-import time
-import string
-
-CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
-
-# RawPoint: to store information about the relationship between two particular
-# traits
-# RawPoint represents directly the input file
-class RawPoint:
- def __init__(self, i, j):
- self.i = i
- self.j = j
-
- def __eq__(self, other):
- return (self.i == other.i and
- self.j == other.j and
- self.spearman == other.spearman and
- self.pearson == other.pearson)
-
- def __str__(self):
- return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson)
-
-def tdEscapeList(cols, align="left"):
- """
- A helper function used by tableRow
- in Trait that will convert a list of strings into a set of
- table cells enclosed by <td>%s</td> tags
- """
- html = ""
- for col in cols:
- html += '<td style="text-align: %s">%s</td>' % (align, col)
- return html
-
-def thEscapeList(cols):
- """
- A helper function used by tableRowHeader
- in Trait that will convert a list of strings into a set of
- table cells enclosed by <td>%s</td> tags
- """
- html = ""
- for col in cols:
- html += "<th>%s</th>" % col
- return html
-
-def commaEscapeList(cols):
- """
- A helper function used by csvHeader and csvRow.
- Really it's just a wrapper for string.join
- """
- return '"' + string.join(cols, '","') + '"'
-
-
-class Trait:
- """
- A trait represents an attribute of an object. In the WebQTL database, traits are stored
- as ProbeSets; that is, the average values of a set of probes are stored.
- """
- def __init__(self, id="", name="", description="", symbol="", href=""):
- self.id = id
- self.name = name
- self.dbName = ""
- self.symbol = symbol
- self.href = href
- self.strainData = {}
-
- def populateDataId(self, cursor, freezeId):
- """
- Retrieve the dataId for trait data corresponding to the given database
- The way to do this depends on the particular type of trait, so we leave implementation
- to subclasses.
- """
- raise NotImplementedError
-
- def populateStrainData(self, cursor):
- """
- Load this trait full of train data corresponding to the data id
- The data id can either come from populateDataId
- or can be set manually by the user of this class.
- Xiaodong added: The way to do this depends on the particular type of trait,
- so we leave implementation to subclasses.
-
- """
- raise NotImplementedError
-
- def shortName(self):
- """
- To return a short name for this trait; this name should be
- appropriate for a row or column title
- """
- return self.name
-
- def nameNoDB(self):
- """
- To return only the short name without the database attached
- """
- strArray = self.shortName().split('::')
-
- return strArray[1]
-
- def datasetName(self):
- """
- To return only the name of the dataset
- """
- strArray = self.shortName().split('::')
-
- return strArray[0].strip()
-
- def longName(self):
- """
- To return a long name for this trait; this name should be
- appropriate for a key to a table
- """
- return self.shortName()
-
- def __str__(self):
- return self.shortName()
-
- def tableRowHelper(self, beforeCols, afterCols, color, thisRow):
- """
- tableRowHelper: (arrayof String) -. String
- To generate a table row to represent this object, appending
- the additional information in beforeCols and afterCols
- to the beginning and the end
- """
- thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(),
- self.name)
- html = '<tr bgcolor="%s">' % color
- html += tdEscapeList(beforeCols + thisRow)
- html += tdEscapeList(afterCols, align="right")
- html += "</tr>"
-
- return html
-
-
- def header(self):
- """
- header: (listof String)
- To generate a list of strings describing each piece of data
- returned by row
- """
- raise NotImplementedError
-
- def row(self):
- """
- row: (listof String)
- To generate a list of strings describing this object. The
- elements of this list should be described by header()
- """
- raise NotImplementedError
-
- def tableRowHeader(self, beforeCols, afterCols, color):
- """
- tableRowHeader: (arrayof String) -> (arrayof String) -> String
- To generate a table row header to represent this object,
- appending the additional information in beforeCols and
- afterCols to the beginning and end
- """
- html = '<tr bgcolor="%s">' % color
- html += thEscapeList(beforeCols + self.header() +
- afterCols)
- html += "</tr>"
- return html
-
- def csvHeader(self, beforeCols, afterCols):
- return commaEscapeList(beforeCols + self.header() + afterCols)
-
- def csvRow(self, beforeCols, afterCols):
- return commaEscapeList(beforeCols + self.row() + afterCols)
-
-
- def traitInfoLink(self):
- """
- To build a trait info link to show information about this
- trait. We assume that the database attribute is properly set
- on the hidden form on the page where this link will go.
- """
- return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name)
-
-# ProbeSetTrait: a trait with data from a probeset
-class ProbeSetTrait(Trait):
- def __init__(self, id="", name="", description="", symbol="", href="",
- chromosome="", MB="", GeneId=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.description = description
- self.symbol = symbol
- self.chromosome = chromosome
- self.MB = MB
- self.GeneId = GeneId
-
- def populateDataId(self, cursor, freezeId):
- """
- Look up the data id for this trait given which
- freeze it came from.
- """
- cursor.execute('''
- SELECT
- ProbeSetXRef.DataId
- FROM
- ProbeSetXRef
- WHERE
- ProbeSetId = %s AND
- ProbeSetFreezeId = %s
- ''' % (self.id, freezeId))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- ProbeSetData.StrainId,
- ProbeSetData.value
- FROM
- ProbeSetData
- WHERE
- ProbeSetData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
-
- def shortName(self):
- """
- An improved string method that uses the gene symbol where
- we have it
- """
- if self.symbol != "":
- return self.symbol
- else:
- return Trait.shortName(self)
-
- def longName(self):
- """
- We use several bits of genetic information to give
- useful information about this trait and where it is
- """
- if self.chromosome != "":
- chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol,
- self.chromosome,
- self.MB)
- else:
- chrPart = ""
-
- return "%s%s: %s" % (self.name, chrPart, self.description)
-
- def header(self):
- return ["Name", "Symbol", "Description",
- "Chr", "Position (Mb)"]
-
- def row(self):
- if type(self.MB) is float:
- MB = "%.2f" % self.MB
- else:
- MB = ""
-
- return [self.name, self.symbol, self.description,
- self.chromosome, MB]
-
- def tableRow(self, beforeCols, afterCols, color):
- """
- tableRow: (arrayof String) -> (arrayof String) -> String
- To generate a table row to represent this object, appending
- the additional information in beforeCols and afterCols to the
- beginning and end
- """
- thisRow = self.row()
-
- # trim description
- if len(thisRow[2]) > 20:
- thisRow[2] = thisRow[2][:20] + "..."
-
- # add NCBI info link
- thisRow[1] = self.ncbiInfoLink()
-
- return self.tableRowHelper(beforeCols, afterCols, color,
- thisRow)
-
-
- def ncbiInfoLink(self):
- """
- ncbiInfoLink :: String
- To generate an NCBI info link for this trait. If we have a GeneId,
- then we can go straight to the gene. If not, then we generate a search
- link based on the gene symbol. If we have none of them, then we don't
- generate a link at all.
- """
- if self.GeneId != "":
- cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId
- elif self.symbol != "":
- cmd = "cmd=Search&term=%s" % self.symbol
- else:
- return ""
-
- return '''
- <a target="_new"
- href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s">
- %s</a> ''' % (cmd, self.symbol)
-
-
-# GenotypeTrait: a trait with data from the genotype
-class GenotypeTrait(Trait):
- def __init__(self, id="", name="", href="", chromosome="", MB=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.chromosome = chromosome
- self.MB = MB
-
- def populateDataId(self, cursor, freezeId):
- """
- Look up the data id for this trait from the
- genotype.
- """
- cursor.execute('''
- SELECT
- GenoXRef.DataId
- FROM
- GenoXRef
- WHERE
- GenoId = %s AND
- GenoFreezeId = %s
- ''' % (self.id, freezeId))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- GenoData.StrainId,
- GenoData.value
- FROM
- GenoData
- WHERE
- GenoData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
- def header(self):
- return ["Locus", "Chr", "Position (Mb)"]
-
- def row(self):
- return [self.name, self.chromosome, "%.3f" % self.MB]
-
- def tableRow(self, beforeCols, afterCols, color):
- return self.tableRowHelper(beforeCols, afterCols, color, self.row())
-
-# PublishTrait: a trait with data from publications
-class PublishTrait(Trait):
- def __init__(self, id="", name="", href="", authors="", title="",
- phenotype="", year=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.authors = authors
- self.title = title
- self.phenotype = phenotype
- self.year = year
-
- def populateDataId(self, cursor, freezeId):
- """
- Look up the data id for this trait from the
- published set. For the moment, we assume that there's
- only one publish freeze.
- """
- cursor.execute('''
- SELECT
- PublishXRef.DataId
- FROM
- PublishXRef, PublishFreeze
- WHERE
- PublishFreeze.Id = %s AND
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- PublishXRef.Id = %s
- ''' % (freezeId, self.id))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- PublishData.StrainId,
- PublishData.value
- FROM
- PublishData
- WHERE
- PublishData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
-
- def longName(self):
- """
- A more intelligent string function that uses
- information about the publication from which this trait came
- """
- return "%s: %s by %s" % (self.name, self.title, self.authors)
-
- def header(self):
- return ["Record", "Phenotype", "Authors", "Year", "URL"]
-
- def row(self):
- return [self.name,
- self.phenotype,
- self.authors,
- str(self.year),
- ""]
-
- def tableRow(self, beforeCols, afterCols, color):
- """
- tableRow: (arrayof String) -> (arrayof String) -> String
- To generate a table row to represent this object, appending
- the additional information in beforeCols and afterCols to the
- beginning and end
- """
- thisRow = self.row()
-
- # for multiple authors, use "et. al" after first two
- authors = thisRow[2].split(",")
- if len(authors) > 2:
- thisRow[2] = string.join(authors[:2], ",") + ", et al"
-
- # clip phenotype to 20 chars
- if len(thisRow[1]) > 20:
- thisRow[1] = thisRow[1][:20] + "..."
-
- # add Pub Med URL
- thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href)
-
- return self.tableRowHelper(beforeCols, afterCols, color,
- thisRow)
-
-
-# TempTrait: a trait with data generate by user and stored in temp table
-class TempTrait(Trait):
- def __init__(self, id="", name="", href="", description=""):
- Trait.__init__(self, id=id, name=name, href=href)
- self.description = description
-
- def populateDataId(self, cursor, freeezeId):
- """
- Look up the data id for this trait from the Temp table, freezeId isn't used,
- it just for fixing the inherit
- """
- cursor.execute('''
- SELECT
- DataId
- FROM
- Temp
- WHERE
- Id=%s
- ''' % (self.id))
-
- # we hope that there's only one record here
- row = cursor.fetchone()
- self.dataId = row[0]
-
- #XZ, 03/03/2009: Xiaodong implemented this fuction
- def populateStrainData(self, cursor):
- cursor.execute('''
- SELECT
- TempData.StrainId,
- TempData.value
- FROM
- TempData
- WHERE
- TempData.Id = %s''' % self.dataId)
- for row in cursor.fetchall():
- self.strainData[int(row[0])] = float(row[1])
-
-
- def row(self):
- return [self.id,
- self.name,
- self.description,
- ""]
-
-
- def longName(self):
- """
- For temp trait, the description always contents whole useful information
- """
- return self.description
-
-
-# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait
-def queryGenotypeTraitByName(cursor, speciesId, name):
- qry = '''
- SELECT
- Geno.Id,
- Geno.Name,
- Geno.Chr,
- Geno.Mb
- FROM
- Geno
- WHERE
- Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name)
-
- cursor.execute(qry)
- row = cursor.fetchone()
- return GenotypeTrait(id=row[0], name=row[1],
- chromosome=row[2], MB=row[3])
-
-# queryPublishTraitByName : Cursor -> string -> PublishTrait
-def queryPublishTraitByName(cursor, freezeId, name):
- qry = '''
- SELECT
- PublishXRef.Id,
- Phenotype.Id,
- Publication.Authors,
- Publication.Title,
- Publication.Year,
- Publication.PubMed_ID
- FROM
- Publication, PublishXRef, Phenotype, PublishFreeze
- WHERE
- PublishFreeze.Id = %s AND
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- PublishXRef.Id = %s AND
- PublishXRef.PublicationId = Publication.Id AND
- PublishXRef.PhenotypeId = Phenotype.Id
- ''' % (freezeId, name)
-
- cursor.execute(qry)
- if cursor.rowcount == 0:
- return None
- else:
- row = cursor.fetchone()
-
- return PublishTrait(id=row[0], name='%s'%row[0],
- authors=row[2], title=row[3],
- year=row[4], href=row[5])
-
-
-def queryTempTraitByName(cursor, name):
- name=name.strip()
- qry = '''
- SELECT
- Temp.Id,
- Temp.Name,
- Temp.description
- FROM
- Temp
- WHERE
- Temp.Name= "%s"
- ''' % (name)
-
- cursor.execute(qry)
- if cursor.rowcount == 0:
- return None
- else:
- row = cursor.fetchone()
- return TempTrait(id=row[0], name=row[1], description=row[2], href='')
-
-# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait
-# to retrieve an entire probeset fully populated with data
-# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
-# a million records)
-# so we ask for the data in bulk
-#
-# cursor should be SSCursor for MySQL so rows are stored on the server side
-# and tuples are used
-# we explicitly close the cursor here as well
-#XZ, 03/04/2009: It seems to me that this function is never be executed.
-#XZ: Although it can be called from multitrait.loadDatabase,
-#XZ: but the loadDatabase function will not be called
-#XZ: if the targetDatabaseType is probeset.
-#XZ: The probeset traits of target database are retrieved by execute
-#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix
-def queryPopulatedProbeSetTraits(cursor, freezeId):
- step1 = time.time()
- traits = queryProbeSetTraits(cursor, freezeId)
- traitDict = {}
- for trait in traits:
- traitDict[trait.id] = trait
-
- step2 = time.time()
- print
- #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
- cursor.execute('''
- SELECT
- ProbeSetXRef.ProbeSetId,
- ProbeSetData.StrainId,
- ProbeSetData.value
- FROM
- ProbeSetXRef
- Left Join ProbeSetData ON
- ProbeSetXRef.DataId = ProbeSetData.Id
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = %s
- ''' % freezeId)
-
- step3 = time.time()
- totalrows = 0
- somerows = cursor.fetchmany(1000)
- while len(somerows) > 0:
- totalrows += len(somerows)
- for row in somerows:
- # this line of code can execute more than one million times
- traitDict[row[0]].strainData[int(row[1])] = row[2]
- somerows = cursor.fetchmany(1000)
-
- #cursor.close()
- step4 = time.time()
-
- time1 = step2 - step1
- time2 = step3 - step2
- time3 = step4 - step3
- time4 = step4 - step1
- #print "%f %f %f %f %d rows" % (round(time1, 2),
- # round(time2, 2),
- # round(time3, 2),
- # round(time4, 2),
- # totalrows)
- #print "Fetched %d traits" % len(traits)
- return traits
-
-
-# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait
-# to retrieve probeset fully populated whose ProbeSetId in a range
-# a special ProbeSetId with data
-# this query can take 15+ sec the old way (22,000 traits * 35 strains = half
-# a million records)
-# so we ask for the data in bulk
-#
-# cursor should be SSCursor for MySQL so rows are stored on the server side
-# and tuples are used
-# we explicitly close the cursor here as well
-def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
- step1 = time.time()
- traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2)
- traitDict = {}
- for trait in traits:
- traitDict[trait.id] = trait
-
- step2 = time.time()
- print
- #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData
- cursor.execute('''
- SELECT
- ProbeSetXRef.ProbeSetId,
- ProbeSetData.StrainId,
- ProbeSetData.value
- FROM
- ProbeSetXRef
- Left Join ProbeSetData ON
- ProbeSetXRef.DataId = ProbeSetData.Id
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = %s AND
- ProbeSetXRef.ProbeSetId >= %s AND
- ProbeSetXRef.ProbeSetId <= %s
- ''' % (freezeId, ProbeSetId1, ProbeSetId2))
-
- step3 = time.time()
- totalrows = 0
- somerows = cursor.fetchmany(1000)
- while len(somerows) > 0:
- totalrows += len(somerows)
- for row in somerows:
- # this line of code can execute more than one million times
- traitDict[row[0]].strainData[int(row[1])] = row[2]
- somerows = cursor.fetchmany(1000)
-
- #cursor.close()
- step4 = time.time()
-
- time1 = step2 - step1
- time2 = step3 - step2
- time3 = step4 - step3
- time4 = step4 - step1
- #print "%f %f %f %f %d rows" % (round(time1, 2),
- # round(time2, 2),
- # round(time3, 2),
- # round(time4, 2),
- # totalrows)
- #print "Fetched %d traits" % len(traits)
- return traits
-
-
-# def noneFilter : string or none -> string
-# to replace a possible None by an empty string
-def noneFilter(x):
- if x is None:
- return ""
- else:
- return x
-
-# queryProbeSetTraits: Cursor -> Integer -> dictof Trait
-def queryProbeSetTraits(cursor, freezeId):
- """
- To locate all of the traits in a particular probeset
- """
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- ProbeSet.Chr,
- ProbeSet.Mb,
- ProbeSet.GeneId,
- ProbeSetXRef.DataId
- FROM
- ProbeSet,
- ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = %s
- ORDER BY ProbeSet.Id
- ''' % freezeId
-
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
-
- for row in rows:
- row = map(noneFilter, row)
- trait = ProbeSetTrait(id=row[0], name=row[1],
- description=row[2],
- chromosome=row[4],
- MB=row[5],
- symbol=row[3],
- GeneId=row[6])
- trait.dataId = row[7]
- traits.append(trait)
-
- return traits
-
-
-# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait
-def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2):
- """
- To locate all of the traits in a particular probeset
- """
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- ProbeSet.Chr,
- ProbeSet.Mb,
- ProbeSet.GeneId,
- ProbeSetXRef.DataId
- FROM
- ProbeSet,
- ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = %s AND
- ProbeSet.Id >= %s AND
- ProbeSet.Id <= %s
- ORDER BY ProbeSet.Id
- ''' % (freezeId, ProbeSetId1, ProbeSetId2)
-
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
-
- for row in rows:
- row = map(noneFilter, row)
- trait = ProbeSetTrait(id=row[0], name=row[1],
- description=row[2],
- chromosome=row[4],
- MB=row[5],
- symbol=row[3],
- GeneId=row[6])
- trait.dataId = row[7]
- traits.append(trait)
-
- return traits
-
-
-# queryPublishTraits : Cursor -> arrayof Trait
-def queryPublishTraits(cursor, freezeId):
- """
- To locate all published traits
- """
- qry = '''
- SELECT
- Publication.Id,
- Publication.Name,
- Publication.PhenoType,
- Publication.Authors,
- Publication.Title,
- Publication.Year,
- Publication.PubMed_ID,
- PublishXRef.DataId
- FROM
- Publication,
- PublishXRef
- WHERE
- PublishXRef.PublishFreezeId = %s AND
- PublishXRef.PublishId = Publication.Id
- ''' % freezeId
-
- qry = '''
- SELECT
- Publication.Id,
- PublishXRef.Id,
- Phenotype.Pre_publication_description,
- Phenotype.Post_publication_description,
- Publication.Authors,
- Publication.Title,
- Publication.Year,
- Publication.PubMed_ID,
- PublishXRef.DataId
- FROM
- Publication, PublishXRef, Phenotype, PublishFreeze
- WHERE
- PublishFreeze.Id = %s AND
- PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
- PublishXRef.PublicationId = Publication.Id AND
- PublishXRef.PhenotypeId = Phenotype.Id
- ''' % freezeId
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
- for row in rows:
- PhenotypeString = row[3]
- if not row[7] and row[2]:
- PhenotypeString = row[2]
- trait = PublishTrait(id=row[0], name= '%s' %row[1],
- phenotype=PhenotypeString,
- authors=row[4],
- title=row[5],
- year=row[6],
- href=row[7])
- trait.dataId = row[8]
- traits.append(trait)
-
- return traits
-
-# queryGenotypeTraits : Cursor -> arrayof Trait
-def queryGenotypeTraits(cursor, freezeId):
- """
- To locate all traits in the genotype
- """
- qry = '''
- SELECT
- Geno.Id,
- Geno.Name,
- Geno.Chr,
- GenoXRef.DataId,
- Geno.Mb
- FROM
- Geno,
- GenoXRef
- WHERE
- GenoXRef.GenoId = Geno.Id
- AND GenoXRef.GenoFreezeId = %s
- ''' % freezeId
- cursor.execute(qry)
- rows = cursor.fetchall()
- traits = []
-
- for row in rows:
- trait = GenotypeTrait(id=row[0], name=row[1],
- chromosome=row[2], MB=row[4])
- trait.dataId = row[3]
- traits.append(trait)
-
- return traits
-
-# queryProbeSetTraitByName : Cursor -> string -> Trait
-# to find a particular trait given its name
-def queryProbeSetTraitByName(cursor, name):
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSet.symbol,
- ProbeSet.Chr,
- ProbeSet.Mb,
- ProbeSet.GeneId
- FROM
- ProbeSet
- WHERE
- ProbeSet.Name = "%s"
- ''' % name
- #print qry
- cursor.execute(qry)
- row = cursor.fetchone()
-
- # convert a MySQL NULL value to an empty string
- # for gene symbol
- if row[3] is None:
- sym = ""
- else:
- sym = row[3]
-
- return ProbeSetTrait(id=row[0], name=row[1], description=row[2],
- symbol=sym, chromosome=row[4], MB=row[5],
- GeneId=row[6])
-
-
-# queryTraits : Cursor -> string -> string -> arrayof Traits
-# to find all of the traits whose descriptions match a certain string in a
-# particular database
-def queryTraits(cursor, dbId, queryString):
- # we do this in two steps:
- # first we get the data id for the matching traits
- qry = '''
- SELECT
- ProbeSet.Id,
- ProbeSet.Name,
- ProbeSet.description,
- ProbeSetXRef.DataId
- FROM
- ProbeSet,
- ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = %s AND
- ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
- ProbeSet.description LIKE "%%%s%%"
- ''' % (dbId, queryString)
- # print qry
- cursor.execute(qry)
-
- if cursor.rowcount == 0:
- print "No traits found"
- return []
- else:
- print "%s traits found" % (cursor.rowcount)
-
- # maybe fetchall is bad; we will see
- traits = []
- for row in cursor.fetchall():
- myTrait = Trait(row[0], row[1], row[2])
- myTrait.dataId = row[3]
- traits.append(myTrait)
-
- # second we pull all of the strain data for each trait
- print "Retrieving individual trait data..."
- for trait in traits:
- trait.populateStrainData(cursor, trait.dataId)
- print "%s (%s) -- %s" % (trait.name, trait.id, trait.description)
-
- print "done"
- return traits
-
-# queryProbeSetFreezes : Cursor -> arrayof String,String tuples
-# to return the short and long name for each ProbeSetFreeze
-# this function is designed specifically for building
-# a database selector
-def queryProbeSetFreezes(cursor):
- cursor.execute("""
- SELECT
- ProbeSetFreeze.Name,
- ProbeSetFreeze.FullName
- FROM
- ProbeSetFreeze
- ORDER BY
- ProbeSetFreeze.Name
- """)
-
- # for now, fetchall returns the data as a list of tuples
- # which is what we want
- return list(cursor.fetchall())
-
-# queryProbeSetFreezeIdName: Cursor -> String -> String, String
-# this function returns the
-# id and the long name of a probesetfreeze given its name
-# once again, it's designed specifically for building
-# the database selector
-def queryProbeSetFreezeIdName(cursor, name):
- qry = ('''
- SELECT
- ProbeSetFreeze.Id,
- ProbeSetFreeze.FullName
- FROM
- ProbeSetFreeze
- WHERE
- ProbeSetFreeze.Name = "%s"
- ''' % name)
- #print qry
- cursor.execute(qry)
-
- row = cursor.fetchone()
- return row
-
-# queryProbeSetFreezeName: Cursor -> String -> String
-# to return the name of a probe set freeze given its id
-def queryProbeSetFreezeName(cursor, id):
- cursor.execute('''
- SELECT
- ProbeSetFreeze.FullName
- FROM
- ProbeSetFreeze
- WHERE
- ProbeSetFreeze.Id = %s
- ''' % id)
-
- row = cursor.fetchone()
- return row[0]
-
-# dbNameToTypeId : Cursor -> String -> (String, String)
-# to convert a database name to a (type, id) pair
-def dbNameToTypeId(cursor, name):
- types = ["ProbeSet", "Geno", "Publish"]
- for type_ in types:
- count = cursor.execute('''
- SELECT
- %sFreeze.Id
- FROM
- %sFreeze
- WHERE
- Name = "%s"
- ''' % (type_, type_, name))
-
- if count != 0:
- id = cursor.fetchone()[0]
- return type_, id
-
- return None, None
-
-# dbTypeIdToName : Cursor -> String -> String -> String
-# to convert a database (type,id) pair into a name
-def dbTypeIdToName(cursor, dbType, dbId):
- cursor.execute('''
- SELECT
- %sFreeze.Name
- FROM
- %sFreeze
- WHERE
- Id = %s
- ''' % (dbType, dbType, dbId))
-
- row = cursor.fetchone()
- return row[0]
-
-#XZ, July 21, 2010: I add this function
-def getSpeciesIdByDbTypeId (cursor, dbType, dbId):
- cursor.execute('''
- SELECT
- SpeciesId
- FROM
- InbredSet, %sFreeze
- WHERE
- %sFreeze.Id = %s
- and InbredSetId = InbredSet.Id
- ''' % (dbType, dbType, dbId))
-
- row = cursor.fetchone()
- return row[0]
-
-
-# queryStrainCount : Cursor -> int
-# return the number of strains in the database
-def queryStrainCount(cursor):
- cursor.execute('''
- SELECT
- Max(Strain.Id)
- FROM
- Strain
- ''')
- return (cursor.fetchone())[0]