diff options
Diffstat (limited to 'web/webqtl/compareCorrelates')
-rwxr-xr-x | web/webqtl/compareCorrelates/MultipleCorrelationPage.py | 108 | ||||
-rwxr-xr-x | web/webqtl/compareCorrelates/__init__.py | 0 | ||||
-rwxr-xr-x | web/webqtl/compareCorrelates/correlation.py | 359 | ||||
-rwxr-xr-x | web/webqtl/compareCorrelates/htmlModule.py | 279 | ||||
-rwxr-xr-x | web/webqtl/compareCorrelates/multitrait.py | 1121 | ||||
-rwxr-xr-x | web/webqtl/compareCorrelates/trait.py | 1074 |
6 files changed, 0 insertions, 2941 deletions
diff --git a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py b/web/webqtl/compareCorrelates/MultipleCorrelationPage.py deleted file mode 100755 index 6a464ab6..00000000 --- a/web/webqtl/compareCorrelates/MultipleCorrelationPage.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from base.templatePage import templatePage -from utility import webqtlUtil -from base.webqtlTrait import webqtlTrait -from base import webqtlConfig -import multitrait - -# XZ, 09/09/2008: After adding several traits to collection, click "Compare Correlates" button, -# XZ, 09/09/2008: This class will generate what you see. -# XZ, 09/09/2008: This class just collect the input, then pass them to multitrait.py -######################################### -# Multiple Correlation Page -######################################### -class MultipleCorrelationPage(templatePage): - - def __init__(self,fd): - - templatePage.__init__(self, fd) - - if not self.openMysql(): - return - if not fd.genotype: - fd.readData() - - self.searchResult = fd.formdata.getvalue('searchResult') - if not self.searchResult: - heading = 'Compare Correlates' - detail = ['You need to select at least two traits in order to generate correlation matrix.'] - self.error(heading=heading,detail=detail) - print 'Content-type: text/html\n' - self.write() - return - if type("1") == type(self.searchResult): - self.searchResult = [self.searchResult] - - if self.searchResult: - if len(self.searchResult) > 100: - heading = 'Compare Correlates' - detail = ['In order to display Compare Correlates properly, Do not select more than %d traits for Compare Correlates.' % 100] - self.error(heading=heading,detail=detail) - print 'Content-type: text/html\n' - self.write() - return - else: - pass - - traitList = [] - for item in self.searchResult: - thisTrait = webqtlTrait(fullname=item, cursor=self.cursor) - thisTrait.retrieveInfo() - traitList.append(thisTrait) - else: - heading = 'Compare Correlates' - detail = [HT.Font('Error : ',color='red'),HT.Font('Error occurs while retrieving data from database.',color='black')] - self.error(heading=heading,detail=detail) - print 'Content-type: text/html\n' - self.write() - return - - - ########## - filename= webqtlUtil.genRandStr("mult_") - fp = open(webqtlConfig.IMGDIR+filename, 'wb') - fp.write('%s\n' % fd.RISet) - for thisTrait in traitList: - fp.write("%s,%s,%s\n" % (thisTrait.db.type,thisTrait.db.id,thisTrait.name)) - fp.close() - fd.formdata["filename"] = filename - - params = {"filename":filename, "targetDatabase":"", - "threshold":0.5, "subsetSize":10, - "correlation":"pearson", "subsetCount":10, - "firstRun":"1"} - results = [] - txtOutputFileName = "" - - self.dict['body'] = multitrait.TraitCorrelationPage(fd, params, self.cursor, traitList, results, - fd.RISet,txtOutputFileName).dict['body'] - self.dict['title'] = 'Compare Correlates' - - - - diff --git a/web/webqtl/compareCorrelates/__init__.py b/web/webqtl/compareCorrelates/__init__.py deleted file mode 100755 index e69de29b..00000000 --- a/web/webqtl/compareCorrelates/__init__.py +++ /dev/null diff --git a/web/webqtl/compareCorrelates/correlation.py b/web/webqtl/compareCorrelates/correlation.py deleted file mode 100755 index f2ea55b3..00000000 --- a/web/webqtl/compareCorrelates/correlation.py +++ /dev/null @@ -1,359 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -# correlation.py -# functions for computing correlations for traits -# -# Originally, this code was designed to compute Pearson product-moment -# coefficents. The basic function calcPearson scans the strain data -# for the two traits and drops data for a strain unless both traits have it. -# If there are less than six strains left, we conclude that there's -# insufficent data and drop the correlation. -# -# In addition, this code can compute Spearman rank-order coefficents using -# the calcSpearman function. - -#Xiaodong changed the dependancy structure -import numarray -import numarray.ma as MA -import time - -import trait - -# strainDataUnion : StrainData -> StrainData -> array, array -def strainDataUnion(s1, s2): - # build lists of values that both have - # and make sure that both sets of values are in the same order - s1p = [] - s2p = [] - sortedKeys = s1.keys() - sortedKeys.sort() - for s in sortedKeys: - if s2.has_key(s): - s1p.append(s1[s]) - s2p.append(s2[s]) - - return (numarray.array(s1p, numarray.Float64), - numarray.array(s2p, numarray.Float64)) - -# calcCorrelationHelper : array -> array -> float -def calcCorrelationHelper(s1p, s2p): - # if the traits share less than six strains, then we don't - # bother with the correlations - if len(s1p) < 6: - return 0.0 - - # subtract by x-bar and y-bar elementwise - #oldS1P = s1p.copy() - #oldS2P = s2p.copy() - - s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64) - s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64) - - # square for the variances - s1p_2 = numarray.sum(s1p**2) - s2p_2 = numarray.sum(s2p**2) - - try: - corr = (numarray.sum(s1p*s2p)/ - numarray.sqrt(s1p_2 * s2p_2)) - except ZeroDivisionError: - corr = 0.0 - - return corr - -# calcSpearman : Trait -> Trait -> float -def calcSpearman(trait1, trait2): - s1p, s2p = strainDataUnion(trait1.strainData, - trait2.strainData) - s1p = rankArray(s1p) - s2p = rankArray(s2p) - return calcCorrelationHelper(s1p, s2p) - -# calcPearson : Trait -> Trait -> float -def calcPearson(trait1, trait2): - # build lists of values that both have - # and make sure that both sets of values are in the same order - s1p, s2p = strainDataUnion(trait1.strainData, - trait2.strainData) - - return calcCorrelationHelper(s1p, s2p) - -# buildPearsonCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix -#def buildPearsonCorrelationMatrix(traits, sc): -# dim = (len(traits), sc) -# matrix = numarray.zeros(dim, MA.Float64) -# testMatrix = numarray.zeros(dim, MA.Float64) - -# for i in range(len(traits)): -# sd = traits[i].strainData -# for key in sd.keys(): -# matrix[i,int(key) - 1] = sd[key] -# testMatrix[i,int(key) - 1] = 1 - -def buildPearsonCorrelationMatrix(traits, commonStrains): - dim = (len(traits), len(commonStrains)) - matrix = numarray.zeros(dim, MA.Float64) - testMatrix = numarray.zeros(dim, MA.Float64) - - for i in range(len(traits)): - sd = traits[i].strainData - keys = sd.keys() - for j in range(0, len(commonStrains)): - if keys.__contains__(commonStrains[j]): - matrix[i,j] = sd[commonStrains[j]] - testMatrix[i,j] = 1 - - return matrix, testMatrix - -# buildSpearmanCorrelationMatrix: (listof n traits) -> int s -> n x s matrix, n x s matrix -def buildSpearmanCorrelationMatrix(traits, sc): - dim = (len(traits), sc) - matrix = numarray.zeros(dim, MA.Float64) - testMatrix = numarray.zeros(dim, MA.Float64) - - def customCmp(a, b): - return cmp(a[1], b[1]) - - for i in range(len(traits)): - # copy strain data to a temporary list and turn it into - # (strain, expression) pairs - sd = traits[i].strainData - tempList = [] - for key in sd.keys(): - tempList.append((key, sd[key])) - - # sort the temporary list by expression - tempList.sort(customCmp) - - for j in range(len(tempList)): - # k is the strain id minus 1 - # 1-based strain id -> 0-based column index - k = int(tempList[j][0]) - 1 - - # j is the rank of the particular strain - matrix[i,k] = j - - testMatrix[i,k] = 1 - - return matrix, testMatrix - -def findLargestStrain(traits, sc): - strainMaxes = [] - for i in range(len(traits)): - keys = traits[i].strainData.keys() - strainMaxes.append(max(keys)) - - return max(strainMaxes) - -def findCommonStrains(traits1, traits2): - commonStrains = [] - strains1 = [] - strains2 = [] - - for trait in traits1: - keys = trait.strainData.keys() - for key in keys: - if not strains1.__contains__(key): - strains1.append(key) - - for trait in traits2: - keys = trait.strainData.keys() - for key in keys: - if not strains2.__contains__(key): - strains2.append(key) - - for strain in strains1: - if strains2.__contains__(strain): - commonStrains.append(strain) - - return commonStrains - -def calcPearsonMatrix(traits1, traits2, sc, strainThreshold=6, - verbose = 0): - return calcMatrixHelper(buildPearsonCorrelationMatrix, - traits1, traits2, sc, strainThreshold, - verbose) - -def calcProbeSetPearsonMatrix(cursor, freezeId, traits2, strainThreshold=6, - verbose = 0): - - cursor.execute('select ProbeSetId from ProbeSetXRef where ProbeSetFreezeId = %s order by ProbeSetId' % freezeId) - ProbeSetIds = cursor.fetchall() - - results = [] - i=0 - while i<len(ProbeSetIds): - ProbeSetId1 = ProbeSetIds[i][0] - if (i+4999) < len(ProbeSetIds): - ProbeSetId2 = ProbeSetIds[i+4999][0] - else: - ProbeSetId2 = ProbeSetIds[len(ProbeSetIds)-1][0] - - traits1 = trait.queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2) # XZ,09/10/2008: add module name 'trait.' - SubMatrix = calcMatrixHelper(buildPearsonCorrelationMatrix, - traits1, traits2, 1000, strainThreshold, - verbose) - results.append(SubMatrix) - i += 5000 - - returnValue = numarray.zeros((len(ProbeSetIds), len(traits2)), MA.Float64) - row = 0 - col = 0 - for SubMatrix in results: - for i in range(0, len(SubMatrix)): - for j in range(0, len(traits2)): - returnValue[row,col] = SubMatrix[i,j] - col += 1 - col = 0 - row +=1 - - return returnValue - - - -# note: this code DOES NOT WORK, especially in cases where -# there are missing observations (e.g. when comparing traits -# from different probesetfreezes) -def calcSpearmanMatrix(traits1, traits2, sc, strainThreshold=6, - verbose=0): - return calcMatrixHelper(buildSpearmanCorrelationMatrix, - traits1, traits2, sc, strainThreshold, - verbose) - -def calcMatrixHelper(builder, traits1, traits2, sc, strainThreshold, - verbose): - - # intelligently figure out strain count - step0 = time.time() - #localSC = max(findLargestStrain(traits1, sc), - # findLargestStrain(traits2, sc)) - - commonStrains = findCommonStrains(traits1, traits2) - - buildStart = time.time() - matrix1, test1 = builder(traits1, commonStrains) - matrix2, test2 = builder(traits2, commonStrains) - buildTime = time.time() - buildStart - - step1 = time.time() - - ns = numarray.innerproduct(test1, test2) - - # mask all ns less than strainThreshold so the correlation values - # end up masked - # ns is now a MaskedArray and so all ops involving ns will be - # MaskedArrays - ns = MA.masked_less(ns, strainThreshold, copy=0) - - # divide-by-zero errors are automatically masked - #ns = -1.0/ns - - step2 = time.time() - - # see comment above to find out where this ridiculously cool - # matrix algebra comes from - xs = numarray.innerproduct(matrix1, test2) - ys = numarray.innerproduct(test1, matrix2) - xys = numarray.innerproduct(matrix1, matrix2) - - # use in-place operations to try to speed things up - numarray.power(matrix1, 2, matrix1) - numarray.power(matrix2, 2, matrix2) - - x2s = numarray.innerproduct(matrix1, test2) - y2s = numarray.innerproduct(test1, matrix2) - - step3 = time.time() - - # parens below are very important - # the instant we touch ns, arrays become masked and - # computation is much, much slower - top = ns*xys - (xs*ys) - bottom1 = ns*x2s - (xs*xs) - bottom2 = ns*y2s - (ys*ys) - bottom = MA.sqrt(bottom1*bottom2) - - # mask catches floating point divide-by-zero problems here - corrs = top / bottom - - step4 = time.time() - - # we define undefined correlations as zero even though there - # is a mathematical distinction - returnValue = MA.filled(corrs, 0.0) - - step5 = time.time() - - #print ("calcMatrixHelper: %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, %.2f s, total: %.2f s" - # %(buildTime, - # buildStart - step0, - # step2 - step1, - # step3 - step2, - # step4 - step3, - # step5 - step4, - # step5 - step0)) - - if verbose: - print "Matrix 1:", matrix1 - print "Matrix 2:", matrix2 - print "Ns:", ns - print "Xs", xs - print "Ys", ys - print "XYs:", xys - print "Top:", top - print "Bottom 1:", bottom1 - print "Bottom 2:", bottom2 - print "Bottom:", bottom - print "Corrs:", corrsa - - - return returnValue - - - -# rankArray: listof float -> listof float -# to generate a companion list to alof with -# the actual value of each element replaced by the -# value's rank -def rankArray(floatArray): - # first we save the original index of each element - tmpAlof = [] - returnArray = numarray.zeros(len(floatArray), numarray.Float64) - i = 0 - for i in range(len(floatArray)): - tmpAlof.append((i,floatArray[i])) - - # now we sort by the data value - def customCmp(a,b): return cmp(a[1],b[1]) - tmpAlof.sort(customCmp) - - # finally we use the new rank data to populate the - # return array - for i in range(len(floatArray)): - returnArray[tmpAlof[i][0]] = i+1 - - return returnArray diff --git a/web/webqtl/compareCorrelates/htmlModule.py b/web/webqtl/compareCorrelates/htmlModule.py deleted file mode 100755 index ebba3b86..00000000 --- a/web/webqtl/compareCorrelates/htmlModule.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -import sys -import string -import os -import MySQLdb -import cgi - -from htmlgen import HTMLgen2 as HT - -from base import webqtlConfig - - -# XZ 08/14/2008: When I tried to replace 'from webqtlConfig import *' with 'import webqtlConfig' -# XZ 08/14/2008: I found some problems. I discussed with Hongqiang and the below is conclusion. -# XZ 08/14/2008: The program uses webqtlConfig.DB_NAME, webqtlConfig.MYSQL_SERVER and so on -# XZ 08/14/2008: without 'import webqtlConfig'. This program will not work. -# XZ 08/14/2008: CONFIG_htmlpath doesn't exist in webqtlConfig.py -# XZ 08/14/2008: Hongqian said this was done by Fan Zhang, and this program was not tested. -# XZ 08/14/2008: So nobody realize these bugs. - -# XZ, 09/09/2008: This function is not called any where. -# XZ, 09/09/2008: Actually, I don't think this function works. -def genHeaderFooter(i=1,title='',basehref='',js1='',js2='',layer='',body=''): - """ - generate footer and header HTML code - default is header - i = 0 is footer+header - i = 1 is header - i = 2 is footer - """ - try: - temp_file = CONFIG_htmlpath + 'beta-template.html' - fp = open(temp_file, 'rb') - template = fp.read() - fp.close() - template = template % (title,basehref,js1,js2,layer,body, "") - header,footer = string.split(template,'<!-- split from Here -->') - if i == 0: - return header + footer - elif i == 1: - return header - elif i == 2: - return footer - else: - return "" - except: - if i == 0: - return "header + footer" - elif i == 1: - return "header" - elif i == 2: - return "footer" - else: - return "" - -# XZ, 09/09/2008: This function is only used in multitrait.py where it is called with value assigned to db. -# XZ, 09/09/2008: So the try-except block is not executed. -# XZ, 09/09/2008: This explains why no error was generated even without 'import webqtlConfig' -def genDatabaseMenu(db = None, public =1, RISetgp = 'BXD', selectname = 'database', selected = ""): - """ - generate database Menu - public = 0 : search3.html databases Menu - public = 1 : search.html databases Menu - """ - if not db: - try: - # import MySQLdb - # con = MySQLdb.Connect(db='db_webqtl') - # Modified by Fan Zhang - con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD) - db = con.cursor() - except: - return "Connect MySQL Server Error" - else: - pass - - databaseMenu = HT.Select(name=selectname) - nmenu = 0 - - # here's a hack: bxd and bxd300 can be correlated against each other - # if either of those are the group, we put in special SQL that pulls both - if RISetgp in ("BXD", "BXD300"): - ibsNameQry = '(InbredSet.Name = "BXD" OR InbredSet.Name = "BXD300")' - else: - ibsNameQry = 'InbredSet.Name = "%s"' % RISetgp - - #Publish Database - db.execute(''' - SelecT - PublishFreeze.FullName, - PublishFreeze.Name - from - PublishFreeze, - InbredSet - where - PublishFreeze.InbredSetId = InbredSet.Id and - %s - ''' % ibsNameQry) - for item in db.fetchall(): - databaseMenu.append(item) - nmenu += 1 - - #Genome Database - db.execute(''' - SelecT - GenoFreeze.FullName, - GenoFreeze.Name - from - GenoFreeze,InbredSet - where - GenoFreeze.InbredSetId = InbredSet.Id and - %s - ''' % ibsNameQry) - for item in db.fetchall(): - databaseMenu.append(item) - nmenu += 1 - - #Microarray Database - db.execute('SelecT Id, Name from Tissue') - for item in db.fetchall(): - TId, TName = item - databaseMenuSub = HT.Optgroup(label = '%s ------' % TName) - db.execute(''' - SelecT - ProbeSetFreeze.FullName, - ProbeSetFreeze.Name - from - ProbeSetFreeze, - ProbeFreeze, - InbredSet - where - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeFreeze.TissueId = %d and - ProbeSetFreeze.public > %d and - ProbeFreeze.InbredSetId = InbredSet.Id and - %s - order by - ProbeSetFreeze.CreateTime desc, - ProbeSetFreeze.AvgId - ''' % (TId,public,ibsNameQry)) - for item2 in db.fetchall(): - databaseMenuSub.append(item2) - nmenu += 1 - databaseMenu.append(databaseMenuSub) - - if nmenu: - if selected: - databaseMenu.selected.append(selected) - return str(databaseMenu) - else: - return '' - - -# XZ, 09/09/2008: This function is not called any where. -# XZ, 09/09/2008: Actually, I don't think this function works. -# XZ, 09/09/2008: There is no 'DataForm' file now. It should be webqtlForm.py -def genRISample(): - import glob - import reaper - import random - import math - import webqtlUtil - risets = filter(lambda X:X.find('F2')<0, map(os.path.basename, glob.glob(os.path.join(CONFIG_genodir, "*.geno")))) - risets = map(lambda X:X.split('.')[0], risets) - risets.remove("BayXSha") - risets.sort() - body = HT.Blockquote() - NPerRow = 6 - for item in risets: - values = [] - if item == 'AXBXA': item2='AXB/BXA' - elif item == 'HXBBXH': item2='HXB/BXH' - else: item2=item - body.append(HT.Paragraph(item2, Class='subtitle')) - tbl = HT.TableLite(Class="collap") - dataset = reaper.Dataset() - dataset.read(os.path.join(CONFIG_genodir, "%s.geno"%item)) - prgy = webqtlUtil.ParInfo[item] + list(dataset.prgy) - - mean = random.random()*100 - variance = random.random()*500 - variables = [] - while len(variables) < len(prgy): - S = 2 - while (S>=1): - U1= random.random() - U2= random.random() - V1= 2*U1-1.0 - V2= 2*U2-1.0 - S=V1*V1+V2*V2 - X= math.sqrt(-2 * math.log(S) / S) * V1 - Y= math.sqrt(-2 * math.log(S) / S) * V2 - variables.append(mean + math.sqrt(variance) * X) - variables.append(mean + math.sqrt(variance) * Y) - - tempTR = HT.TR() - for i, strain in enumerate(prgy): - if i and i%NPerRow==0: - tbl.append(tempTR) - tempTR = HT.TR() - if random.random() < 0.2: - variable = 'X' - else: - variable = "%2.3f" % variables[i] - - tempTR.append(HT.TD(strain, Class="strains", width=80)) - tempTR.append(HT.TD(variable, Class="values", width=60)) - values.append(variable) - - for j in range(NPerRow-i%NPerRow-1): - tempTR.append(HT.TD()) - tbl.append(tempTR) - body.append(tbl) - body.append(HT.Paragraph("Copy the following line to paste into the GeneNetwork entry box:")) - body.append(HT.Code(string.join(values, " "))) - body.append(HT.HR(width="90%")) - return body - -if __name__ == "__main__": - if os.environ.has_key('SCRIPT_FILENAME'): - script_filename = os.environ['SCRIPT_FILENAME'] - else: - script_filename = '' - #Used as cgi script - if script_filename and script_filename[-2:] == 'py': - print 'Content-type: text/html\n' - formdata = cgi.FieldStorage() - sys.stderr = sys.stdout - try: - getID = string.lower(formdata.getvalue('get')) - except: - getID = '' - #Used as command - else: - if len(sys.argv) >= 2: - getID = string.lower(sys.argv[1]) - else: - getID = '' - - if getID == 'headerfooter': - print genHeaderFooter(0) - elif getID == 'header': - print genHeaderFooter(1) - elif getID == 'footer': - print genHeaderFooter(2) - elif getID == 'databasemenu': - print genDatabaseMenu(public=0) - elif getID == 'datasample': - print genRISample() - else: - print genHeaderFooter(0) -else: - pass - diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py deleted file mode 100755 index 047620af..00000000 --- a/web/webqtl/compareCorrelates/multitrait.py +++ /dev/null @@ -1,1121 +0,0 @@ -# multitrait.py -# a tool to analyze the correlations between several different traits and the traits -# in a given dataset -# -# Parameters: -# correlation -- either "pearson" or "spearman" depending on which ones we want to use -# -# filename -- an input file containing the traits to analyze -# -# progress -- if set, this parameter outputs a static progress page -# and uses a META redirect to trigger the real computation -# -# targetDatabaseType: -# one of "ProbeSet", "Publish", "Genotype" depending on the type of database -# we will use for the analysis -# -# targetDatabaseId: -# the id (*Freeze.Id in the database) of the particular database we will analyze -# -# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider -# -# firstRun -- either 0 or 1 -# whether to automatically pick reasonable defaults for the other three parameters -# -# outputType -- either "html" or "text" -# -# Author: Stephen Pitts -# June 15, 2004 - -#Xiaodong changed the dependancy structure - -import copy -import sys -import cgi -import os -import os.path -import math -import time -import numarray -import tempfile -import string -import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle - -from base import templatePage -from base.webqtlTrait import webqtlTrait -from utility import webqtlUtil -from base import webqtlConfig -import trait -import correlation -import htmlModule - -cgitb.enable() - - -# where this program's data files are -RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.' -RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module - -tempfile.tempdir = RootDir -tempfile.template = "multitrait" - -# MultitraitException: used if something goes wrong -# maybe in the future we should make exceptions more granular -class MultitraitException(Exception): - def __init__(self, message): - self.message = message - - def __repr__(self): - return "MultitraitException: %s" % self.message - -# buildParamDict: Cursor -> ParamDict -# to process and validate CGI arguments -# see the comment at the top of this file for valid cgi -# parameters -def buildParamDict(cursor, fd): - params = {} - fs = fd.formdata #cgi.FieldStorage() - params["progress"] = fs.getfirst("progress", "0") - params["filename"] = fs.getfirst("filename", "") - if params["filename"] == "": - raise MultitraitException("Required parameter filename missing.") - - params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04") - params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0) - params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5) - params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10) - - if params["subsetSize"] < -1: - params["subsetSize"] = -1 - - params["correlation"] = fs.getfirst("correlation", "pearson") - params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10) - - if params["subsetCount"] < -1: - params["subsetCount"] = -1 - - #params["outputType"] = fs.getfirst("outputType", "html") - - #if params["outputType"] not in ("html", "text"): - # params["outputType"] = "html" - - if params["correlation"] not in ("pearson", "spearman"): - params["correlation"] = "pearson" - - params["correlationName"] = params["correlation"].capitalize() - - # one of two cases: - # 1) We have just come from a submit, so there are a bunch of display* - # but no displaySets. Thus, the code down there converts the display* - # to displaySets so the GET request doesn't get too long - # 2) We have just been redirected from a progress page which already has - # a converted displaySets for us. - - displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0) - - if displaySets == 0: - for key in fs.keys(): - if key[:7] == "display": - #print "Hit display key %s<br>" % key - try: - whichSet = int(key[7:]) - - # prevent malicious attacks - whichSet = min(whichSet, 512) - displaySets += pow(2, whichSet) - - except ValueError: pass - - params["displaySets"] = displaySets - #print "In the beginning, display sets was %s: %s<br>" % (displaySets, - # str(binaryDecompose(displaySets))) - - # if we are just gonna display a progress page, then there's no - # reason to look up detailed database information - #if params["progress"] == "1": - # return params - - a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name - params["targetDatabaseType"] = a - params["targetDatabaseId"] = b - params["targetDatabaseName"] = params["targetDatabase"] - - return params - -# readInputFile: DB cursor -> string -> string, (arrayof Trait) -def readInputFile(cursor, filename): - """ - To read an input file with n lines in the following format - <databasetype>,<databaseid>,<traitname> - and retrieve and populate traits with appropriate data - from the database - - Also, for our purposes. we store the database type and - database id in fields attached to the trait instances. We use - this information to generate Javascript popups with trait - information. - - In addition, we read the strain of mice that the traits are - from so we can only show those databases to correlate against. - """ - handle = open(filename) - line = handle.readline() - inbredSetName = line.strip() - line = handle.readline() - traits = [] - -# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait' - while line != "": - line = line.strip() - dbType, dbId, tName = line.split(",") - - if dbType == "ProbeSet": - oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name - elif dbType == "Geno": - speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId) - oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name - elif dbType == "Publish": - oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name - elif dbType == "Temp": - oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = "Temp" - - oneTrait.populateStrainData(cursor) - traits.append(oneTrait) - - line = handle.readline() - - return inbredSetName, traits - -# loadDatabase: Cursor -> ParamDict -> arrayof Trait -def loadDatabase(cursor, p): - """ - To load a set of traits as specified by the - targetDatabaseId - and targetDatabaseType parameters - - Cursor should be a fastCursor from the webqtl library (i.e. - a MySQLdb SSCursor). - - Calling populateStrainData 20,000 or so times on a ProbeSet - is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits - in the trait module that uses a join to get all of the rows in - bulk, store the resultset on the server, and do all sorts of nice buffering. - It's about two or three times faster. - """ - if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name - dbTraits = trait.queryPopulatedProbeSetTraits(cursor, - p["targetDatabaseId"]) - elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name - dbTraits = trait.queryPublishTraits(cursor, - p["targetDatabaseId"]) - psd = trait.PublishTrait.populateStrainData - elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name - dbTraits = trait.queryGenotypeTraits(cursor, - p["targetDatabaseId"]) - psd = trait.GenotypeTrait.populateStrainData - else: - print "Unknown target database type %s" % p["targetDatabaseType"] - - if p["targetDatabaseType"] != "ProbeSet": - map(psd, dbTraits, [cursor]*len(dbTraits)) - - return dbTraits - -def runProbeSetCorrelations(cursor, p, traits): - """ - To run the correlations between the traits and the database. - This function computes a correlation coefficent between each - trait and every entry in the database, and partitions the database - into a disjoint array of arrays which it returns. - - The length of the return array is 2^n, where n is the length of - the trait array. Which constitutent element a of the return array - a given trait ends up in is determined by the following formula - i = i_02^0 + ... + i_(n-1)2^(n-1) - where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise - - Since most of the several thousand database traits will end up - with i=0, we don't return them, so the first element of the - return array will be empty. - - A particular element of subarray j of the return array contains - a 2-tuple (trait,kvalues). The variable trait is obviously the - particular database trait that matches the user traits l_1, ..., l_m - to which subarray j corresponds. kvalues is a list of the correlation - values linking trait to l_1, ..., l_m, so the length of kvalues is - the number of 1s in the binary representation of j (there must be - a better way to describe this length). - - The return array is an array of 2-tuples. The first element of - each tuple is the index of the particular subarray, and the second - element is the subarray itself. The array is sorted in descending - order by the number of 1's in the binary representation of the - index so the first few subarrays are the ones that correspond to - the largest sets. Each subarray is then sorted by the average of - the magnitude of the individual correlation values. - """ - - kMin = p["threshold"] - traitArrays = {} - - # TODO: Add Spearman support - freezeId = p["targetDatabaseId"] - if p["correlation"] == "pearson": - correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name - else: - correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name - - # now we test all of the correlations in bulk - test = numarray.absolute(correlations) - test = numarray.greater_equal(test, kMin) - test = test.astype(numarray.Int8) - #print test - - db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name - for i in range(len(db)): - cIndex = 0 - prods = [] - for j in range(len(traits)): - if test[i,j] == 1: - cIndex += pow(2, j) - prods.append(correlations[i,j]) - if cIndex != 0: - if not traitArrays.has_key(cIndex): - traitArrays[cIndex] = [] - - traitArrays[cIndex].append((db[i], prods)) - - - # sort each inner list of traitArrays - # so the matched traits appear in descending order by the - # average magnitude of the correlation - def customCmp(traitPair, traitPair2): - magAvg1 = numarray.average(map(abs, traitPair[1])) - magAvg2 = numarray.average(map(abs, traitPair2[1])) - - # invert the sign to get descending order - return -cmp(magAvg1, magAvg2) - - for traitArray in traitArrays.values(): - traitArray.sort(customCmp) - - # sort the outer list of traitArrays - traitArrays2 = [] - i = 0 - for key in traitArrays.keys(): - a = traitArrays[key] - if len(a) > 0: - traitArrays2.append((key,a,len(binaryDecompose(key)), - len(a))) - - # we sort by the number of 1's in the binary output - # and then by the size of the list, both in descending order - def customCmp2(aL,bL): - a = -cmp(aL[2], bL[2]) - if a == 0: - return -cmp(aL[3], bL[3]) - else: - return a - - traitArrays2.sort(customCmp2) - - return traitArrays2 - -def runCorrelations(p, strainCount, traits, db): - """ - To run the correlations between the traits and the database. - This function computes a correlation coefficent between each - trait and every entry in the database, and partitions the database - into a disjoint array of arrays which it returns. - - The length of the return array is 2^n, where n is the length of - the trait array. Which constitutent element a of the return array - a given trait ends up in is determined by the following formula - i = i_02^0 + ... + i_(n-1)2^(n-1) - where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise - - Since most of the several thousand database traits will end up - with i=0, we don't return them, so the first element of the - return array will be empty. - - A particular element of subarray j of the return array contains - a 2-tuple (trait,kvalues). The variable trait is obviously the - particular database trait that matches the user traits l_1, ..., l_m - to which subarray j corresponds. kvalues is a list of the correlation - values linking trait to l_1, ..., l_m, so the length of kvalues is - the number of 1s in the binary representation of j (there must be - a better way to describe this length). - - The return array is an array of 2-tuples. The first element of - each tuple is the index of the particular subarray, and the second - element is the subarray itself. The array is sorted in descending - order by the number of 1's in the binary representation of the - index so the first few subarrays are the ones that correspond to - the largest sets. Each subarray is then sorted by the average of - the magnitude of the individual correlation values. - """ - kMin = p["threshold"] - traitArrays = {} - - # TODO: Add Spearman support - if p["correlation"] == "pearson": - correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name - else: - correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name - - # now we test all of the correlations in bulk - test = numarray.absolute(correlations) - test = numarray.greater_equal(test, kMin) - test = test.astype(numarray.Int8) - #print test - - - for i in range(len(db)): - cIndex = 0 - prods = [] - for j in range(len(traits)): - if test[i,j] == 1: - cIndex += pow(2, j) - prods.append(correlations[i,j]) - if cIndex != 0: - if not traitArrays.has_key(cIndex): - traitArrays[cIndex] = [] - - traitArrays[cIndex].append((db[i], prods)) - - # sort each inner list of traitArrays - # so the matched traits appear in descending order by the - # average magnitude of the correlation - def customCmp(traitPair, traitPair2): - magAvg1 = numarray.average(map(abs, traitPair[1])) - magAvg2 = numarray.average(map(abs, traitPair2[1])) - - # invert the sign to get descending order - return -cmp(magAvg1, magAvg2) - - for traitArray in traitArrays.values(): - traitArray.sort(customCmp) - - # sort the outer list of traitArrays - traitArrays2 = [] - i = 0 - for key in traitArrays.keys(): - a = traitArrays[key] - if len(a) > 0: - traitArrays2.append((key,a,len(binaryDecompose(key)), - len(a))) - - # we sort by the number of 1's in the binary output - # and then by the size of the list, both in descending order - def customCmp2(aL,bL): - a = -cmp(aL[2], bL[2]) - if a == 0: - return -cmp(aL[3], bL[3]) - else: - return a - - traitArrays2.sort(customCmp2) - - return traitArrays2 - - -# XZ, 09/09/2008: In multiple trait correlation result page, -# XZ, 09/09/2008: click "Download a text version of the above results in CSV format" - -# TraitCorrelationText: a class to display trait correlations -# as textual output -class TraitCorrelationText: - # build a text shell to describe the given trait correlations - # this method sets self.output; use str(self) to actually - # get the text page - # - # traits is a list of traits and traitArray is a - # list of 3-tuples: index, traits', garbage - # where index is a binary-encoded description of which subset of - # traits the list traits' matches - # - # traits' is a list of 3-tuples as well: trait, correlations, garbage - # where trait is a particular trait and correlations is a list of float - # correlations (matching traits above) - def __init__(self, p, traits, traitArray): - output = "Correlation Comparison\n" - output += "from WebQTL and the University of Tennessee Health Science Center\n" - output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n" - - output += self.showOptionPanel(p) - output += self.showSelectedTraits(traits) - output += self.showSummaryCorrelationResults(p, traits, traitArray) - output += self.showDetailedCorrelationResults(p, traits, traitArray) - - self.output = output - - # showOptionPanel: ParamDict -> string - # to display the options used to run this correlation - def showOptionPanel(self, params): - output = "Correlation Comparison Options:\n" - output += "Target database,%s\n" % params["targetDatabase"] - output += "Correlation type,%s\n" % params["correlationName"] - output += "Threshold,%f\n" % params["threshold"] - #output += "Subsets to Show,%d\n" % params["subsetCount"] - #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"] - return output - - # showSelectedTraits: (listof Trait) -> string - # to display the traits compared with the database - # note: we can't use tabular output because the traits could be of - # different types and produce different fields - def showSelectedTraits(self, traits): - output = "Selected Traits:\n" - for trait in traits: - output += '"' + trait.longName() + '"' + "\n" - output += "\n" - return output - - # showSummaryCorrelationResults: ParamDict -> (listof Trait) -> - # TraitArray -> string - # see comment for __init__ for a description of TraitArray - # - # to show a summary (sets and sizes) of the correlation results - # as well as an X to indicate whether they will be included - # in the detailed output - def showSummaryCorrelationResults(self, p, traits, traitArray): - output = "Correlation Comparison Summary:\n" - - #if p["subsetCount"] != -1: - # ourSubsetCount = min(p["subsetCount"], len(traitArray)) - #else: - - ourSubsetCount = len(traitArray) - - displayDecomposition = binaryDecompose(p["displaySets"]) - for j in range(ourSubsetCount): - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - if len(traitSubarray) == 0: - continue - - targetTraits = decomposeIndex(traits, i) - traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name - ", ") - if j in displayDecomposition: - checked = "X" - else: - checked = "" - - output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray)) - - output += "\n" - return output - - # showDetailedCorrelationResults: ParamDict -> (listof Trait) -> - # TraitArray -> string - # - # to show a detailed list of the correlation results; that is, - # to completely enumerate each subset of traitArray using the - # filtering parameters in p - def showDetailedCorrelationResults(self, p, traits, traitArray): - output = "Correlation Comparison Details:\n" - displayDecomposition = binaryDecompose(p["displaySets"]) - displayDecomposition.sort() - - def formatCorr(c): - return "%.4f" % c - - for j in displayDecomposition: - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - - if len(traitSubarray) == 0: - continue - - targetTraits = decomposeIndex(traits, i) - extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name - traitDesc = string.join(extraColumnHeaders, ", ") - - #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]): - # traitDesc += ",(showing top %s of %s)" % (p["subsetSize"], - # len(traitSubarray)) - # traitSubarray = traitSubarray[0:p["subsetSize"]] - - output += "%s\n" % traitDesc - output += traitSubarray[0][0].csvHeader([], extraColumnHeaders) - output += "\n" - for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait' - corr = map(formatCorr, corr) - output += oneTrait.csvRow([], corr) + "\n" - - output += "\n" - - return output - - # __str__ : string - # to return self.output as the string representation of this page - # self.output is built in __init__ - def __str__(self): - return self.output - -# TraitCorrelationPage: a class to display trait correlations -# for now this is just one HTML file, so we don't even write it -# to a temporary file somewhere -class TraitCorrelationPage(templatePage.templatePage): - """ - Using the templatePage class, we build an HTML shell for - the core data here: the trait correlation lists. - - The way templatePage works, we build the page in pieces in - the __init__ method and later on use the inherited write - method to render the page. - """ - def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename): - - templatePage.templatePage.__init__(self, fd) - - self.dict["title"] = "Correlation Comparison" - self.dict["basehref"] = "" - # NL: deleted js1 content part, since it has not been used in this project - self.dict["js1"] = "" - self.dict["js2"] = "" - - body = "<td><h1>Correlation Comparison</h1>" - body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime()) - body += """ - <p>The correlation comparison tool identifies intersecting sets of traits that are -correlated with your selections at a specified threshold. A correlation comparison -involves the following steps:</p> -<ol> -<li><p> -<b>Correlate:</b> -Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation -Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and -<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation -Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to -Show</i>, and <i>Traits to Show per Subset</i>. -</p></li> - -<li><p> -<b>Add to Collection:</b> -You can use the check boxes in the <i>Correlation -Comparison Details</i> panel and the buttons at the bottom of the page to add these -results to your selections page for further analysis in WebQTL. -</p></li> - -<li><p> -<b>Filter:</b> -Using the <i>Correlation Comparison Summary</i> panel, choose which -subsets you would like to display for export. Note that if you change the -parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter. -</p></li> - -<li><p> -<b>Export:</b> -Once you are satisfied with your report, use the export link at -the bottom of the page to save the report as a comma-separated (CSV) text file -which you can then import into Excel or another tool. Note: the exported report -will list all subsets in the summary view and only those traits in the subsets -you have selected in the Filter step. -</p></li> -</ol> -""" - -# body += """ -# <p>The correlation -# comparison tool identifies the intersecting sets of traits that are -# correlated with your selections. A correlation comparison involves -# the following steps:</p> -# <ol> -# <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>. -# For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i> -# at their default values of 10.</p></li> -# <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel, -# adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to -# Show per Subset</i> until you have a reasonable number of traits.</p></li> -# <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would -# like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will -# loose the filter you have selected.</p></li> -# <li><p><b>Export:</b> Once you are satisfied with your report, use the export -# link at the bottom of the page to save the report as a comma-separated (CSV) text file which -# you can then import into Excel or another tool. Note: the exported report -# will show all subsets in the summary view and all traits in each subset you have -# selected in the Filter step. -# <li><p><b>Shopping Cart:</b> In addition, you can use the -# check boxes in the <i>Correlation Comparison Details</i> panel and the -# buttons at the bottom of the page to add the traits you have found to the shopping cart.</p> -# </li> -# </ol> -# """ - - body += self.showOptionPanel(p, cursor, inbredSetName) - body += self.showSelectedTraits(traits, p, inbredSetName) - - if p["firstRun"] == 0: - body += self.showCorrelationResults(p, inbredSetName, traits, traitArray) - - exportParams = copy.copy(p) - exportParams["outputType"] = "text" - - body += (''' - <h2>Export these results</h2> - <p> - <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from - the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details - view shows all traits in the subsets that you have selected. - </p> - ''' - % txtFilename) - - - - body += "</td>" - self.dict["body"] = body - - - # showOptionPanel: ParamDict -> Cursor -> String -> String - # to build an option panel for the multitrait correlation - # we expect the database list to be a list of 2-tuples - # the first element of each tuple is the short name - # and the second element of the tuple is the long name - def showOptionPanel(self, params, cursor, inbredSetName): - output = ''' - <h2>Correlation Options</h2> - <FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data"> - <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2"> - <input type="hidden" name="filename" value="%s"> - <input type="hidden" name="firstRun" value="0"> - <input type="hidden" name="progress" value="1"> - <table> - <tr> - <td>Target Database:</td><td> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"]) - - output += htmlModule.genDatabaseMenu(db = cursor, - public=0, - RISetgp = inbredSetName, - selectname="targetDatabase", - selected=params["targetDatabase"]) - output += "</td></tr>" - - corrSelected = ["",""] - - if params["correlation"] == "pearson": - corrSelected[0] = "SELECTED" - else: - corrSelected[1] = "SELECTED" - - output += (''' - <tr> - <td>Correlation Method:</td> - <td><select name="correlation"> - <option value="pearson" %s>Pearson</option> - <!--<option value="spearman" %s>Spearman</option>--> - </select></td></tr> - ''' % (corrSelected[0], corrSelected[1])) - output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>' - % params["threshold"]) - output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>' - % params["subsetCount"]) - output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>' - % params["subsetSize"]) - - # a cosmetic change to hopefully make this form a bit easier to use -# if params["firstRun"] == 1: -# applyName = "Correlate" -# else: -# applyName = "Refine Correlation" - - output += ''' - <tr> - <td colspan="2"><input class="button" type="submit" value="Correlate" /></td> - </tr> - </table> - </form> - ''' - - return output - - # showSelectedTraits: listof Trait -> string - # to show a list of the selected traits - def showSelectedTraits(self, traits, p, inbredSetName): - output = ''' - <form action="%s%s" method="post" name="showDatabase"> - <INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase"> - - <input type="hidden" name="incparentsf1" value="ON"> - <input type="hidden" name="ShowStrains" value="ON"> - <input type="hidden" name="ShowLine" value="ON"> - <input type="hidden" name="database" value=""> - <input type="hidden" name="ProbeSetID" value=""> - <input type="hidden" name="RISet" value="%s"> - <input type="hidden" name="CellID" value=""> - <input type="hidden" name="database2" value=""> - <input type="hidden" name="rankOrder" value=""> - <input type="hidden" name="ProbeSetID2" value=""> - <input type="hidden" name="CellID2" value=""> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName) - - output += "<h2>Selected Traits</h2>" - output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>' - flip = 1 - colors = ["FFFFFF", "cccccc"] - - for trait in traits: - # we take advantage of the secret dbName attribute that - # loadDatabase fills in - descriptionString = trait.genHTML() - if trait.db.type == 'Publish' and trait.confidential: - descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users) - output += ''' - <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td> - <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td> - </tr> - ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString) - flip = not flip - - output += "</table></form>" - return output - - - # showSummaryCorrelationResults - # show just the number of traits in each subarray - def showSummaryCorrelationResults(self, p, traits, traitArray): - output = ''' - <form action="%s%s" method="post"> - <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2"> - <input type="hidden" name="filename" value="%s"> - <input type="hidden" name="firstRun" value="0"> - <input type="hidden" name="progress" value="1"> - <input type="hidden" name="correlation" value="%s"> - <input type="hidden" name="threshold" value="%s"> - <input type="hidden" name="rankOrder" value=""> - <input type="hidden" name="subsetCount" value="%s"> - <input type="hidden" name="subsetSize" value="%s"> - <input type="hidden" name="targetDatabase" value="%s"> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"], - p["subsetCount"], p["subsetSize"], p["targetDatabase"]) - - output += ''' - <table cellpadding="2" cellspacing="0"> - <tr> - <th>Trait Subsets</th> - <th colspan="2">Intersecting Set Size</th> - </tr> - ''' - # figure out a scale for the summary graph - # for now we set max = 300 pixels wide - if p["subsetCount"] != -1: - ourSubsetCount = min(p["subsetCount"], len(traitArray)) - else: - ourSubsetCount = len(traitArray) - - screenWidth = 600 - lengths = [] - for j in range(ourSubsetCount): - lengths.append(len(traitArray[j][1])) - maxLength = max(lengths) - - displayDecomposition = binaryDecompose(p["displaySets"]) - flip = 0 - colors = ["FFFFFF", "cccccc"] - - for j in range(ourSubsetCount): - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - - if len(traitSubarray) == 0: - continue - - targetTraits = decomposeIndex(traits, i) - traitDesc = string.join(map(webqtlTrait.displayName, targetTraits), - ", ") - - if j in displayDecomposition: - checked = "CHECKED" - else: - checked = "" - - barWidth = (len(traitSubarray) * screenWidth) / maxLength - output += ('''<tr bgcolor="%s"> - <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td> - <td>%s</td> - <td><img src="/images/blue.png" width="%d" height="25"></td></tr>''' - % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth)) - flip = not flip - - output += ''' - <tr> - <td colspan="3"> - <input class="button" type="submit" value="Filter" /></td> - </tr> - </table></form> - ''' - return output - - # showDetailedCorrelationResults - # actually show the traits in each subarray - def showDetailedCorrelationResults(self, p, inbredSetName, traits, - traitArray): - output = "<h2>Correlation Comparison Details</h2>" - - # the hidden form below powers all of the JavaScript links, - # the shopping cart links, and the correlation plot links - - output += ''' - <form action="%s%s" method="post"> - <input type="hidden" name="database" value="%s"> - <input type="hidden" name="FormID" value="showDatabase"> - <input type="hidden" name="traitfile" value=""> - <input type="hidden" name="incparentsf1" value="ON"> - <input type="hidden" name="ShowStrains" value="ON"> - <input type="hidden" name="ProbeSetID" value=""> - <input type="hidden" name="ShowLine" value="ON"> - <input type="hidden" name="RISet" value="%s"> - <input type="hidden" name="CellID" value=""> - <input type="hidden" name="database2" value=""> - <input type="hidden" name="rankOrder" value=""> - <input type="hidden" name="ProbeSetID2" value=""> - <input type="hidden" name="CellID2" value=""> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName) - - - displayDecomposition = binaryDecompose(p["displaySets"]) - - # necessary to ensure that subset order is the same in the - # summary and the detailed view - displayDecomposition.sort() - - # here's a trick: the first trait we show must have the widest row because it correlates - # with the largest set of input traits - firstSubset = traitArray[displayDecomposition[0]] - firstTrait = firstSubset[1][0][0] - extraColumnCount = firstSubset[2] - totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount - - output += "<table cellpadding=2 cellspacing=0>\n" - for j in displayDecomposition: - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - - # we don't display trait combinations for which there are - # no correlations - if len(traitSubarray) == 0: - continue - - # generate a description of the traits that this particular array - # matches highly - targetTraits = decomposeIndex(traits, i) - extraColumnHeaders = map(webqtlTrait.displayName, targetTraits) - traitDesc = string.join(extraColumnHeaders, ", ") - - # massage extraColumnHeaders so that they can be wrapped - for i in range(len(extraColumnHeaders)): - ech = extraColumnHeaders[i] - ech = ech.replace("-", " ") - ech = ech.replace("_", " ") - extraColumnHeaders[i] = ech - - # pad extraColumnHeaders if we have less columns than the max - paddingNeeded = extraColumnCount - len(extraColumnHeaders) - if paddingNeeded > 0: - extraColumnHeaders.extend(paddingNeeded * [" "]) - - # we limit the output to the top ones - if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]): - traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray)) - traitSubarray = traitSubarray[0:p["subsetSize"]] - - # combine that description with actual database traits themselves - # and the correlation values - output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc) - #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc - - # we assume that every trait in traitSubarray is the same type - # of trait - flip = 0 - colors = ["FFFFFF", "cccccc"] - - output += traitSubarray[0][0].tableRowHeader([" "], extraColumnHeaders, colors[0]) - - for traitPair in traitSubarray: - corr = [] - traitPair[0].dbName = p['targetDatabase'] - trait = traitPair[0] - - for i in range(len(traitPair[1])): - corrValue = traitPair[1][i] - corrPlotLink = (''' - <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a> - ''' % (p["targetDatabaseName"], trait.name, targetTraits[i].db.name, targetTraits[i].name, "0", corrValue)) - corr.append(corrPlotLink) - - corr.extend(paddingNeeded * [" "]) - - checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />' - % (p["targetDatabaseName"], trait.name)) - flip = not flip - output += traitPair[0].tableRow([checkbox], corr, colors[flip]) - - #output += "</table>" - i += 1 - output += '<tr><td colspan="%d"> </td></tr>' % totalColumnCount - - output += "</table>" - - # print form buttons if there were checkboxes above - output += ''' - <div align="left"> - <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection" - onClick="addRmvSelection('%s',this.form, 'addToSelection');"> - <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);"> - <INPUT TYPE="reset" CLASS="button" VALUE="Select None"> - </div> - </form> - ''' % inbredSetName - - return output - - # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String - # to build an output display for the multitrait correlation results - def showCorrelationResults(self, p, inbredSetName, traits, traitArray): - output = ''' - <h2>Correlation Comparison Summary</h2> - <p> - %s correlations were computed for each of the selected traits with each trait in - the <a href="/dbdoc/%s.html">%s</a> database. - Subsets of database traits for which correlations were higher than %s - or lower than -%s are shown below based on which traits - they correlated highly with. The top %s subsets, ranked by the number of input traits that - they correspond with, are shown, and at most %s traits in each subset are shown. </p> - ''' % (p["correlationName"], - p["targetDatabase"], p["targetDatabaseName"], - p["threshold"], p["threshold"], p["subsetCount"], - p["subsetSize"]) - - - totalTraits = 0 - for j in range(len(traitArray)): - totalTraits += len(traitArray[j][1]) - - if totalTraits == 0: - output += """ - <p> - No shared corrrelates were found with your given traits at this - threshold. You may wish to lower the correlation threshold or choose different traits. - </p> - """ - else: - output += self.showSummaryCorrelationResults(p, traits, traitArray) - output += self.showDetailedCorrelationResults(p, inbredSetName, - traits, traitArray) - - return output - -# decomposeIndex: (listof Trait) -> Int -> -# (listof Trait) -# to use i to partition T into a sublist -# each bit in i controls the inclusion or exclusion of a trait -def decomposeIndex(traits, i): - targetTraits = [] - - for j in range(len(traits)): - # look, mom, a bitwise and! - # expression below tests whether the jth bit is - # set in i - # see runCorrelation for how we decompose the - # array index - if (i & pow(2,j)) == pow(2,j): - targetTraits.append(traits[j]) - - return targetTraits - -# binaryDecompose: int -> (listof int) -# to decompose a number into its constituent powers of 2 -# returns a list of the exponents a_1...a_n such that the input m -# is m = 2^a_1 + ... + 2^a_n -def binaryDecompose(n): - if n == 0: - return [] - - # we start with the highest power of 2 <= this number - # and work our way down, subtracting powers of 2 - start = long(math.floor(math.log(n)/math.log(2))) - - exponents = [] - while start >= 0: - if n >= long(math.pow(2, start)): - n -= math.pow(2,start) - exponents.append(start) - start -= 1 - return exponents - -# powerOf : int -> int -> boolean -# to determine whether m is a power of n; -# more precisely, whether there exists z in Z s.t. -# n^z = m -def powerOf(m, n): - trialZ = math.floor(math.log(m)/math.log(n)) - return pow(n,trialZ) == m - - -class compCorrPage(templatePage.templatePage): - def __init__(self,fd): - templatePage.templatePage.__init__(self, fd) - - if not self.openMysql(): - return - - cursor = self.cursor - params = buildParamDict(cursor, fd) - - # get the input data - inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"]) - - # and what we are comparing the data to - dbTraits = [] - if params["targetDatabaseType"] != "ProbeSet": - dbTraits = loadDatabase(cursor, params) - - - # run the comparison itself - strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name - if params["targetDatabaseType"] == "ProbeSet": - results = runProbeSetCorrelations(cursor, params, traits) - else: - results = runCorrelations(params, strainCount, traits, dbTraits) - - # try to be smart about what to output: - # we want to limit the number of traits shown, at least initially - # and since traitArray is already sorted with most interesting - # subsets first, we simply pick up the first 500 or so traits - # that we find - if params["displaySets"] == 0: - selectedTraits = 0 - for j in range(len(results)): - #print "Scanning subarray %d" % j - if selectedTraits <= 200: - params["displaySets"] += pow(2, j) - selectedTraits += len(results[j][1]) - - traitList = [] - for oneTrait in traits: # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait' - traitName = oneTrait.dbName+'::'+oneTrait.name # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait' - aTrait = webqtlTrait(cursor=self.cursor, fullname=traitName) - traitList.append(aTrait) - - # and generate some output - txtOutputFilename = tempfile.mktemp() - txtOutputHandle = open(txtOutputFilename, "w") - txtOutput = TraitCorrelationText(params, traits, results) - txtOutputHandle.write(str(txtOutput)) - txtOutputHandle.close() - txtOutputFilename = os.path.split(txtOutputFilename)[1] - - self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList, - results, inbredSetName, - txtOutputFilename).dict['body'] diff --git a/web/webqtl/compareCorrelates/trait.py b/web/webqtl/compareCorrelates/trait.py deleted file mode 100755 index ff1f8119..00000000 --- a/web/webqtl/compareCorrelates/trait.py +++ /dev/null @@ -1,1074 +0,0 @@ -#Trait.py -# -#--Individual functions are already annotated, more or less. -# -#Classes: -#RawPoint -#Trait -#ProbeSetTrait -#GenotypeTrait -#PublishTrait -#TempTrait -#-KA - -# trait.py: a data structure to represent a trait -import time -import string - -CONFIG_pubMedLinkURL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" - -# RawPoint: to store information about the relationship between two particular -# traits -# RawPoint represents directly the input file -class RawPoint: - def __init__(self, i, j): - self.i = i - self.j = j - - def __eq__(self, other): - return (self.i == other.i and - self.j == other.j and - self.spearman == other.spearman and - self.pearson == other.pearson) - - def __str__(self): - return "(%s,%s,%s,%s)" % (self.i, self.j, self.spearman, self.pearson) - -def tdEscapeList(cols, align="left"): - """ - A helper function used by tableRow - in Trait that will convert a list of strings into a set of - table cells enclosed by <td>%s</td> tags - """ - html = "" - for col in cols: - html += '<td style="text-align: %s">%s</td>' % (align, col) - return html - -def thEscapeList(cols): - """ - A helper function used by tableRowHeader - in Trait that will convert a list of strings into a set of - table cells enclosed by <td>%s</td> tags - """ - html = "" - for col in cols: - html += "<th>%s</th>" % col - return html - -def commaEscapeList(cols): - """ - A helper function used by csvHeader and csvRow. - Really it's just a wrapper for string.join - """ - return '"' + string.join(cols, '","') + '"' - - -class Trait: - """ - A trait represents an attribute of an object. In the WebQTL database, traits are stored - as ProbeSets; that is, the average values of a set of probes are stored. - """ - def __init__(self, id="", name="", description="", symbol="", href=""): - self.id = id - self.name = name - self.dbName = "" - self.symbol = symbol - self.href = href - self.strainData = {} - - def populateDataId(self, cursor, freezeId): - """ - Retrieve the dataId for trait data corresponding to the given database - The way to do this depends on the particular type of trait, so we leave implementation - to subclasses. - """ - raise NotImplementedError - - def populateStrainData(self, cursor): - """ - Load this trait full of train data corresponding to the data id - The data id can either come from populateDataId - or can be set manually by the user of this class. - Xiaodong added: The way to do this depends on the particular type of trait, - so we leave implementation to subclasses. - - """ - raise NotImplementedError - - def shortName(self): - """ - To return a short name for this trait; this name should be - appropriate for a row or column title - """ - return self.name - - def nameNoDB(self): - """ - To return only the short name without the database attached - """ - strArray = self.shortName().split('::') - - return strArray[1] - - def datasetName(self): - """ - To return only the name of the dataset - """ - strArray = self.shortName().split('::') - - return strArray[0].strip() - - def longName(self): - """ - To return a long name for this trait; this name should be - appropriate for a key to a table - """ - return self.shortName() - - def __str__(self): - return self.shortName() - - def tableRowHelper(self, beforeCols, afterCols, color, thisRow): - """ - tableRowHelper: (arrayof String) -. String - To generate a table row to represent this object, appending - the additional information in beforeCols and afterCols - to the beginning and the end - """ - thisRow[0] = '<a href="%s">%s</a>' % (self.traitInfoLink(), - self.name) - html = '<tr bgcolor="%s">' % color - html += tdEscapeList(beforeCols + thisRow) - html += tdEscapeList(afterCols, align="right") - html += "</tr>" - - return html - - - def header(self): - """ - header: (listof String) - To generate a list of strings describing each piece of data - returned by row - """ - raise NotImplementedError - - def row(self): - """ - row: (listof String) - To generate a list of strings describing this object. The - elements of this list should be described by header() - """ - raise NotImplementedError - - def tableRowHeader(self, beforeCols, afterCols, color): - """ - tableRowHeader: (arrayof String) -> (arrayof String) -> String - To generate a table row header to represent this object, - appending the additional information in beforeCols and - afterCols to the beginning and end - """ - html = '<tr bgcolor="%s">' % color - html += thEscapeList(beforeCols + self.header() + - afterCols) - html += "</tr>" - return html - - def csvHeader(self, beforeCols, afterCols): - return commaEscapeList(beforeCols + self.header() + afterCols) - - def csvRow(self, beforeCols, afterCols): - return commaEscapeList(beforeCols + self.row() + afterCols) - - - def traitInfoLink(self): - """ - To build a trait info link to show information about this - trait. We assume that the database attribute is properly set - on the hidden form on the page where this link will go. - """ - return "javascript:showDatabase2('%s','%s','')" % (self.dbName, self.name) - -# ProbeSetTrait: a trait with data from a probeset -class ProbeSetTrait(Trait): - def __init__(self, id="", name="", description="", symbol="", href="", - chromosome="", MB="", GeneId=""): - Trait.__init__(self, id=id, name=name, href=href) - self.description = description - self.symbol = symbol - self.chromosome = chromosome - self.MB = MB - self.GeneId = GeneId - - def populateDataId(self, cursor, freezeId): - """ - Look up the data id for this trait given which - freeze it came from. - """ - cursor.execute(''' - SELECT - ProbeSetXRef.DataId - FROM - ProbeSetXRef - WHERE - ProbeSetId = %s AND - ProbeSetFreezeId = %s - ''' % (self.id, freezeId)) - - # we hope that there's only one record here - row = cursor.fetchone() - self.dataId = row[0] - - #XZ, 03/03/2009: Xiaodong implemented this fuction - def populateStrainData(self, cursor): - cursor.execute(''' - SELECT - ProbeSetData.StrainId, - ProbeSetData.value - FROM - ProbeSetData - WHERE - ProbeSetData.Id = %s''' % self.dataId) - for row in cursor.fetchall(): - self.strainData[int(row[0])] = float(row[1]) - - - def shortName(self): - """ - An improved string method that uses the gene symbol where - we have it - """ - if self.symbol != "": - return self.symbol - else: - return Trait.shortName(self) - - def longName(self): - """ - We use several bits of genetic information to give - useful information about this trait and where it is - """ - if self.chromosome != "": - chrPart = " (%s on Chr %s @ %s Mb)" % (self.symbol, - self.chromosome, - self.MB) - else: - chrPart = "" - - return "%s%s: %s" % (self.name, chrPart, self.description) - - def header(self): - return ["Name", "Symbol", "Description", - "Chr", "Position (Mb)"] - - def row(self): - if type(self.MB) is float: - MB = "%.2f" % self.MB - else: - MB = "" - - return [self.name, self.symbol, self.description, - self.chromosome, MB] - - def tableRow(self, beforeCols, afterCols, color): - """ - tableRow: (arrayof String) -> (arrayof String) -> String - To generate a table row to represent this object, appending - the additional information in beforeCols and afterCols to the - beginning and end - """ - thisRow = self.row() - - # trim description - if len(thisRow[2]) > 20: - thisRow[2] = thisRow[2][:20] + "..." - - # add NCBI info link - thisRow[1] = self.ncbiInfoLink() - - return self.tableRowHelper(beforeCols, afterCols, color, - thisRow) - - - def ncbiInfoLink(self): - """ - ncbiInfoLink :: String - To generate an NCBI info link for this trait. If we have a GeneId, - then we can go straight to the gene. If not, then we generate a search - link based on the gene symbol. If we have none of them, then we don't - generate a link at all. - """ - if self.GeneId != "": - cmd = "cmd=Retrieve&dopt=Graphics&list_uids=%s" % self.GeneId - elif self.symbol != "": - cmd = "cmd=Search&term=%s" % self.symbol - else: - return "" - - return ''' - <a target="_new" - href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&%s"> - %s</a> ''' % (cmd, self.symbol) - - -# GenotypeTrait: a trait with data from the genotype -class GenotypeTrait(Trait): - def __init__(self, id="", name="", href="", chromosome="", MB=""): - Trait.__init__(self, id=id, name=name, href=href) - self.chromosome = chromosome - self.MB = MB - - def populateDataId(self, cursor, freezeId): - """ - Look up the data id for this trait from the - genotype. - """ - cursor.execute(''' - SELECT - GenoXRef.DataId - FROM - GenoXRef - WHERE - GenoId = %s AND - GenoFreezeId = %s - ''' % (self.id, freezeId)) - - # we hope that there's only one record here - row = cursor.fetchone() - self.dataId = row[0] - - #XZ, 03/03/2009: Xiaodong implemented this fuction - def populateStrainData(self, cursor): - cursor.execute(''' - SELECT - GenoData.StrainId, - GenoData.value - FROM - GenoData - WHERE - GenoData.Id = %s''' % self.dataId) - for row in cursor.fetchall(): - self.strainData[int(row[0])] = float(row[1]) - - def header(self): - return ["Locus", "Chr", "Position (Mb)"] - - def row(self): - return [self.name, self.chromosome, "%.3f" % self.MB] - - def tableRow(self, beforeCols, afterCols, color): - return self.tableRowHelper(beforeCols, afterCols, color, self.row()) - -# PublishTrait: a trait with data from publications -class PublishTrait(Trait): - def __init__(self, id="", name="", href="", authors="", title="", - phenotype="", year=""): - Trait.__init__(self, id=id, name=name, href=href) - self.authors = authors - self.title = title - self.phenotype = phenotype - self.year = year - - def populateDataId(self, cursor, freezeId): - """ - Look up the data id for this trait from the - published set. For the moment, we assume that there's - only one publish freeze. - """ - cursor.execute(''' - SELECT - PublishXRef.DataId - FROM - PublishXRef, PublishFreeze - WHERE - PublishFreeze.Id = %s AND - PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND - PublishXRef.Id = %s - ''' % (freezeId, self.id)) - - # we hope that there's only one record here - row = cursor.fetchone() - self.dataId = row[0] - - #XZ, 03/03/2009: Xiaodong implemented this fuction - def populateStrainData(self, cursor): - cursor.execute(''' - SELECT - PublishData.StrainId, - PublishData.value - FROM - PublishData - WHERE - PublishData.Id = %s''' % self.dataId) - for row in cursor.fetchall(): - self.strainData[int(row[0])] = float(row[1]) - - - def longName(self): - """ - A more intelligent string function that uses - information about the publication from which this trait came - """ - return "%s: %s by %s" % (self.name, self.title, self.authors) - - def header(self): - return ["Record", "Phenotype", "Authors", "Year", "URL"] - - def row(self): - return [self.name, - self.phenotype, - self.authors, - str(self.year), - ""] - - def tableRow(self, beforeCols, afterCols, color): - """ - tableRow: (arrayof String) -> (arrayof String) -> String - To generate a table row to represent this object, appending - the additional information in beforeCols and afterCols to the - beginning and end - """ - thisRow = self.row() - - # for multiple authors, use "et. al" after first two - authors = thisRow[2].split(",") - if len(authors) > 2: - thisRow[2] = string.join(authors[:2], ",") + ", et al" - - # clip phenotype to 20 chars - if len(thisRow[1]) > 20: - thisRow[1] = thisRow[1][:20] + "..." - - # add Pub Med URL - thisRow[4] = '<a href="%s" target="_new">Pub Med</a>' % (CONFIG_pubMedLinkURL % self.href) - - return self.tableRowHelper(beforeCols, afterCols, color, - thisRow) - - -# TempTrait: a trait with data generate by user and stored in temp table -class TempTrait(Trait): - def __init__(self, id="", name="", href="", description=""): - Trait.__init__(self, id=id, name=name, href=href) - self.description = description - - def populateDataId(self, cursor, freeezeId): - """ - Look up the data id for this trait from the Temp table, freezeId isn't used, - it just for fixing the inherit - """ - cursor.execute(''' - SELECT - DataId - FROM - Temp - WHERE - Id=%s - ''' % (self.id)) - - # we hope that there's only one record here - row = cursor.fetchone() - self.dataId = row[0] - - #XZ, 03/03/2009: Xiaodong implemented this fuction - def populateStrainData(self, cursor): - cursor.execute(''' - SELECT - TempData.StrainId, - TempData.value - FROM - TempData - WHERE - TempData.Id = %s''' % self.dataId) - for row in cursor.fetchall(): - self.strainData[int(row[0])] = float(row[1]) - - - def row(self): - return [self.id, - self.name, - self.description, - ""] - - - def longName(self): - """ - For temp trait, the description always contents whole useful information - """ - return self.description - - -# queryGenotypeTraitByName : Cursor -> string -> GenotypeTrait -def queryGenotypeTraitByName(cursor, speciesId, name): - qry = ''' - SELECT - Geno.Id, - Geno.Name, - Geno.Chr, - Geno.Mb - FROM - Geno - WHERE - Geno.SpeciesId = %s and Geno.Name = "%s" ''' % (speciesId, name) - - cursor.execute(qry) - row = cursor.fetchone() - return GenotypeTrait(id=row[0], name=row[1], - chromosome=row[2], MB=row[3]) - -# queryPublishTraitByName : Cursor -> string -> PublishTrait -def queryPublishTraitByName(cursor, freezeId, name): - qry = ''' - SELECT - PublishXRef.Id, - Phenotype.Id, - Publication.Authors, - Publication.Title, - Publication.Year, - Publication.PubMed_ID - FROM - Publication, PublishXRef, Phenotype, PublishFreeze - WHERE - PublishFreeze.Id = %s AND - PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND - PublishXRef.Id = %s AND - PublishXRef.PublicationId = Publication.Id AND - PublishXRef.PhenotypeId = Phenotype.Id - ''' % (freezeId, name) - - cursor.execute(qry) - if cursor.rowcount == 0: - return None - else: - row = cursor.fetchone() - - return PublishTrait(id=row[0], name='%s'%row[0], - authors=row[2], title=row[3], - year=row[4], href=row[5]) - - -def queryTempTraitByName(cursor, name): - name=name.strip() - qry = ''' - SELECT - Temp.Id, - Temp.Name, - Temp.description - FROM - Temp - WHERE - Temp.Name= "%s" - ''' % (name) - - cursor.execute(qry) - if cursor.rowcount == 0: - return None - else: - row = cursor.fetchone() - return TempTrait(id=row[0], name=row[1], description=row[2], href='') - -# queryPopulatedProbeSetTraits: Cursor -> Integer -> dictof Trait -# to retrieve an entire probeset fully populated with data -# this query can take 15+ sec the old way (22,000 traits * 35 strains = half -# a million records) -# so we ask for the data in bulk -# -# cursor should be SSCursor for MySQL so rows are stored on the server side -# and tuples are used -# we explicitly close the cursor here as well -#XZ, 03/04/2009: It seems to me that this function is never be executed. -#XZ: Although it can be called from multitrait.loadDatabase, -#XZ: but the loadDatabase function will not be called -#XZ: if the targetDatabaseType is probeset. -#XZ: The probeset traits of target database are retrieved by execute -#XZ: queryPopulatedProbeSetTraits2 from correlation.calcProbeSetPearsonMatrix -def queryPopulatedProbeSetTraits(cursor, freezeId): - step1 = time.time() - traits = queryProbeSetTraits(cursor, freezeId) - traitDict = {} - for trait in traits: - traitDict[trait.id] = trait - - step2 = time.time() - print - #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData - cursor.execute(''' - SELECT - ProbeSetXRef.ProbeSetId, - ProbeSetData.StrainId, - ProbeSetData.value - FROM - ProbeSetXRef - Left Join ProbeSetData ON - ProbeSetXRef.DataId = ProbeSetData.Id - WHERE - ProbeSetXRef.ProbeSetFreezeId = %s - ''' % freezeId) - - step3 = time.time() - totalrows = 0 - somerows = cursor.fetchmany(1000) - while len(somerows) > 0: - totalrows += len(somerows) - for row in somerows: - # this line of code can execute more than one million times - traitDict[row[0]].strainData[int(row[1])] = row[2] - somerows = cursor.fetchmany(1000) - - #cursor.close() - step4 = time.time() - - time1 = step2 - step1 - time2 = step3 - step2 - time3 = step4 - step3 - time4 = step4 - step1 - #print "%f %f %f %f %d rows" % (round(time1, 2), - # round(time2, 2), - # round(time3, 2), - # round(time4, 2), - # totalrows) - #print "Fetched %d traits" % len(traits) - return traits - - -# queryPopulatedProbeSetTraits2: Cursor -> Integer -> dictof Trait -# to retrieve probeset fully populated whose ProbeSetId in a range -# a special ProbeSetId with data -# this query can take 15+ sec the old way (22,000 traits * 35 strains = half -# a million records) -# so we ask for the data in bulk -# -# cursor should be SSCursor for MySQL so rows are stored on the server side -# and tuples are used -# we explicitly close the cursor here as well -def queryPopulatedProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2): - step1 = time.time() - traits = queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2) - traitDict = {} - for trait in traits: - traitDict[trait.id] = trait - - step2 = time.time() - print - #XZ, 03/04/2009: Xiaodong changed Data to ProbeSetData - cursor.execute(''' - SELECT - ProbeSetXRef.ProbeSetId, - ProbeSetData.StrainId, - ProbeSetData.value - FROM - ProbeSetXRef - Left Join ProbeSetData ON - ProbeSetXRef.DataId = ProbeSetData.Id - WHERE - ProbeSetXRef.ProbeSetFreezeId = %s AND - ProbeSetXRef.ProbeSetId >= %s AND - ProbeSetXRef.ProbeSetId <= %s - ''' % (freezeId, ProbeSetId1, ProbeSetId2)) - - step3 = time.time() - totalrows = 0 - somerows = cursor.fetchmany(1000) - while len(somerows) > 0: - totalrows += len(somerows) - for row in somerows: - # this line of code can execute more than one million times - traitDict[row[0]].strainData[int(row[1])] = row[2] - somerows = cursor.fetchmany(1000) - - #cursor.close() - step4 = time.time() - - time1 = step2 - step1 - time2 = step3 - step2 - time3 = step4 - step3 - time4 = step4 - step1 - #print "%f %f %f %f %d rows" % (round(time1, 2), - # round(time2, 2), - # round(time3, 2), - # round(time4, 2), - # totalrows) - #print "Fetched %d traits" % len(traits) - return traits - - -# def noneFilter : string or none -> string -# to replace a possible None by an empty string -def noneFilter(x): - if x is None: - return "" - else: - return x - -# queryProbeSetTraits: Cursor -> Integer -> dictof Trait -def queryProbeSetTraits(cursor, freezeId): - """ - To locate all of the traits in a particular probeset - """ - qry = ''' - SELECT - ProbeSet.Id, - ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - ProbeSet.Chr, - ProbeSet.Mb, - ProbeSet.GeneId, - ProbeSetXRef.DataId - FROM - ProbeSet, - ProbeSetXRef - WHERE - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = %s - ORDER BY ProbeSet.Id - ''' % freezeId - - cursor.execute(qry) - rows = cursor.fetchall() - traits = [] - - for row in rows: - row = map(noneFilter, row) - trait = ProbeSetTrait(id=row[0], name=row[1], - description=row[2], - chromosome=row[4], - MB=row[5], - symbol=row[3], - GeneId=row[6]) - trait.dataId = row[7] - traits.append(trait) - - return traits - - -# queryProbeSetTraits2: Cursor -> Integer -> dictof Trait -def queryProbeSetTraits2(cursor, freezeId, ProbeSetId1, ProbeSetId2): - """ - To locate all of the traits in a particular probeset - """ - qry = ''' - SELECT - ProbeSet.Id, - ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - ProbeSet.Chr, - ProbeSet.Mb, - ProbeSet.GeneId, - ProbeSetXRef.DataId - FROM - ProbeSet, - ProbeSetXRef - WHERE - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = %s AND - ProbeSet.Id >= %s AND - ProbeSet.Id <= %s - ORDER BY ProbeSet.Id - ''' % (freezeId, ProbeSetId1, ProbeSetId2) - - cursor.execute(qry) - rows = cursor.fetchall() - traits = [] - - for row in rows: - row = map(noneFilter, row) - trait = ProbeSetTrait(id=row[0], name=row[1], - description=row[2], - chromosome=row[4], - MB=row[5], - symbol=row[3], - GeneId=row[6]) - trait.dataId = row[7] - traits.append(trait) - - return traits - - -# queryPublishTraits : Cursor -> arrayof Trait -def queryPublishTraits(cursor, freezeId): - """ - To locate all published traits - """ - qry = ''' - SELECT - Publication.Id, - Publication.Name, - Publication.PhenoType, - Publication.Authors, - Publication.Title, - Publication.Year, - Publication.PubMed_ID, - PublishXRef.DataId - FROM - Publication, - PublishXRef - WHERE - PublishXRef.PublishFreezeId = %s AND - PublishXRef.PublishId = Publication.Id - ''' % freezeId - - qry = ''' - SELECT - Publication.Id, - PublishXRef.Id, - Phenotype.Pre_publication_description, - Phenotype.Post_publication_description, - Publication.Authors, - Publication.Title, - Publication.Year, - Publication.PubMed_ID, - PublishXRef.DataId - FROM - Publication, PublishXRef, Phenotype, PublishFreeze - WHERE - PublishFreeze.Id = %s AND - PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND - PublishXRef.PublicationId = Publication.Id AND - PublishXRef.PhenotypeId = Phenotype.Id - ''' % freezeId - cursor.execute(qry) - rows = cursor.fetchall() - traits = [] - for row in rows: - PhenotypeString = row[3] - if not row[7] and row[2]: - PhenotypeString = row[2] - trait = PublishTrait(id=row[0], name= '%s' %row[1], - phenotype=PhenotypeString, - authors=row[4], - title=row[5], - year=row[6], - href=row[7]) - trait.dataId = row[8] - traits.append(trait) - - return traits - -# queryGenotypeTraits : Cursor -> arrayof Trait -def queryGenotypeTraits(cursor, freezeId): - """ - To locate all traits in the genotype - """ - qry = ''' - SELECT - Geno.Id, - Geno.Name, - Geno.Chr, - GenoXRef.DataId, - Geno.Mb - FROM - Geno, - GenoXRef - WHERE - GenoXRef.GenoId = Geno.Id - AND GenoXRef.GenoFreezeId = %s - ''' % freezeId - cursor.execute(qry) - rows = cursor.fetchall() - traits = [] - - for row in rows: - trait = GenotypeTrait(id=row[0], name=row[1], - chromosome=row[2], MB=row[4]) - trait.dataId = row[3] - traits.append(trait) - - return traits - -# queryProbeSetTraitByName : Cursor -> string -> Trait -# to find a particular trait given its name -def queryProbeSetTraitByName(cursor, name): - qry = ''' - SELECT - ProbeSet.Id, - ProbeSet.Name, - ProbeSet.description, - ProbeSet.symbol, - ProbeSet.Chr, - ProbeSet.Mb, - ProbeSet.GeneId - FROM - ProbeSet - WHERE - ProbeSet.Name = "%s" - ''' % name - #print qry - cursor.execute(qry) - row = cursor.fetchone() - - # convert a MySQL NULL value to an empty string - # for gene symbol - if row[3] is None: - sym = "" - else: - sym = row[3] - - return ProbeSetTrait(id=row[0], name=row[1], description=row[2], - symbol=sym, chromosome=row[4], MB=row[5], - GeneId=row[6]) - - -# queryTraits : Cursor -> string -> string -> arrayof Traits -# to find all of the traits whose descriptions match a certain string in a -# particular database -def queryTraits(cursor, dbId, queryString): - # we do this in two steps: - # first we get the data id for the matching traits - qry = ''' - SELECT - ProbeSet.Id, - ProbeSet.Name, - ProbeSet.description, - ProbeSetXRef.DataId - FROM - ProbeSet, - ProbeSetXRef - WHERE - ProbeSetXRef.ProbeSetFreezeId = %s AND - ProbeSet.Id = ProbeSetXRef.ProbeSetId AND - ProbeSet.description LIKE "%%%s%%" - ''' % (dbId, queryString) - # print qry - cursor.execute(qry) - - if cursor.rowcount == 0: - print "No traits found" - return [] - else: - print "%s traits found" % (cursor.rowcount) - - # maybe fetchall is bad; we will see - traits = [] - for row in cursor.fetchall(): - myTrait = Trait(row[0], row[1], row[2]) - myTrait.dataId = row[3] - traits.append(myTrait) - - # second we pull all of the strain data for each trait - print "Retrieving individual trait data..." - for trait in traits: - trait.populateStrainData(cursor, trait.dataId) - print "%s (%s) -- %s" % (trait.name, trait.id, trait.description) - - print "done" - return traits - -# queryProbeSetFreezes : Cursor -> arrayof String,String tuples -# to return the short and long name for each ProbeSetFreeze -# this function is designed specifically for building -# a database selector -def queryProbeSetFreezes(cursor): - cursor.execute(""" - SELECT - ProbeSetFreeze.Name, - ProbeSetFreeze.FullName - FROM - ProbeSetFreeze - ORDER BY - ProbeSetFreeze.Name - """) - - # for now, fetchall returns the data as a list of tuples - # which is what we want - return list(cursor.fetchall()) - -# queryProbeSetFreezeIdName: Cursor -> String -> String, String -# this function returns the -# id and the long name of a probesetfreeze given its name -# once again, it's designed specifically for building -# the database selector -def queryProbeSetFreezeIdName(cursor, name): - qry = (''' - SELECT - ProbeSetFreeze.Id, - ProbeSetFreeze.FullName - FROM - ProbeSetFreeze - WHERE - ProbeSetFreeze.Name = "%s" - ''' % name) - #print qry - cursor.execute(qry) - - row = cursor.fetchone() - return row - -# queryProbeSetFreezeName: Cursor -> String -> String -# to return the name of a probe set freeze given its id -def queryProbeSetFreezeName(cursor, id): - cursor.execute(''' - SELECT - ProbeSetFreeze.FullName - FROM - ProbeSetFreeze - WHERE - ProbeSetFreeze.Id = %s - ''' % id) - - row = cursor.fetchone() - return row[0] - -# dbNameToTypeId : Cursor -> String -> (String, String) -# to convert a database name to a (type, id) pair -def dbNameToTypeId(cursor, name): - types = ["ProbeSet", "Geno", "Publish"] - for type_ in types: - count = cursor.execute(''' - SELECT - %sFreeze.Id - FROM - %sFreeze - WHERE - Name = "%s" - ''' % (type_, type_, name)) - - if count != 0: - id = cursor.fetchone()[0] - return type_, id - - return None, None - -# dbTypeIdToName : Cursor -> String -> String -> String -# to convert a database (type,id) pair into a name -def dbTypeIdToName(cursor, dbType, dbId): - cursor.execute(''' - SELECT - %sFreeze.Name - FROM - %sFreeze - WHERE - Id = %s - ''' % (dbType, dbType, dbId)) - - row = cursor.fetchone() - return row[0] - -#XZ, July 21, 2010: I add this function -def getSpeciesIdByDbTypeId (cursor, dbType, dbId): - cursor.execute(''' - SELECT - SpeciesId - FROM - InbredSet, %sFreeze - WHERE - %sFreeze.Id = %s - and InbredSetId = InbredSet.Id - ''' % (dbType, dbType, dbId)) - - row = cursor.fetchone() - return row[0] - - -# queryStrainCount : Cursor -> int -# return the number of strains in the database -def queryStrainCount(cursor): - cursor.execute(''' - SELECT - Max(Strain.Id) - FROM - Strain - ''') - return (cursor.fetchone())[0] |