From ea46f42ee640928b92947bfb204c41a482d80937 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 8 May 2012 18:39:56 -0500 Subject: Add all the source codes into the github. --- .../correlationMatrix/CorrelationMatrixPage.py | 595 ++++++++++++++++++ .../correlationMatrix/TissueAbbreviationPage.py | 79 +++ .../correlationMatrix/TissueCorrelationPage.py | 673 +++++++++++++++++++++ web/webqtl/correlationMatrix/__init__.py | 0 .../correlationMatrix/tissueCorrelationMatrix.py | 132 ++++ 5 files changed, 1479 insertions(+) create mode 100755 web/webqtl/correlationMatrix/CorrelationMatrixPage.py create mode 100755 web/webqtl/correlationMatrix/TissueAbbreviationPage.py create mode 100755 web/webqtl/correlationMatrix/TissueCorrelationPage.py create mode 100755 web/webqtl/correlationMatrix/__init__.py create mode 100755 web/webqtl/correlationMatrix/tissueCorrelationMatrix.py (limited to 'web/webqtl/correlationMatrix') diff --git a/web/webqtl/correlationMatrix/CorrelationMatrixPage.py b/web/webqtl/correlationMatrix/CorrelationMatrixPage.py new file mode 100755 index 00000000..a01111f5 --- /dev/null +++ b/web/webqtl/correlationMatrix/CorrelationMatrixPage.py @@ -0,0 +1,595 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by NL 2011/02/14 + +import os +import string +from htmlgen import HTMLgen2 as HT +import sys +import time +import numarray +import numarray.linear_algebra as la +import piddle as pid +import math + +from base.templatePage import templatePage +from base import webqtlConfig +from base.webqtlTrait import webqtlTrait +from utility import webqtlUtil +from utility import Plot + + + +# XZ, 09/09/2008: After adding several traits to collection, click "Correlation Matrix" button, +# XZ, 09/09/2008: This class will generate what you see. +######################################### +# Correlation Matrix Page +######################################### + +class CorrelationMatrixPage(templatePage): + + def __init__(self,fd,InputData=None): + + templatePage.__init__(self, fd) + + self.dict['title'] = 'Correlation Matrix' + + if not self.openMysql(): + return + + if not fd.genotype: + fd.readGenotype() + fd.strainlist = fd.f1list + fd.strainlist + + #self.searchResult = fd.formdata.getvalue('searchResult') + self.oldSearchResult = fd.formdata.getvalue('oldSearchResult') + + if self.oldSearchResult: + try: + self.searchResult = fd.formdata.getvalue('oldSearchResult') + except: + self.searchResult = fd.formdata.getvalue('searchResult') + + else: + self.searchResult = fd.formdata.getvalue('searchResult') + + if not self.searchResult: + heading = 'Correlation Matrix' + detail = ['You need to select at least two traits in order to generate correlation matrix.'] + self.error(heading=heading,detail=detail) + return + if type("1") == type(self.searchResult): + self.searchResult = [self.searchResult] + + if self.searchResult: + #testvals,names,dbInfos = self.getAllSearchResult(fd,self.searchResult) + if len(self.searchResult) > webqtlConfig.MAXCORR: + heading = 'Correlation Matrix' + detail = ['In order to display Correlation Matrix properly, Do not select more than %d traits for Correlation Matrix.' % webqtlConfig.MAXCORR] + self.error(heading=heading,detail=detail) + return + + #XZ, 7/22/2009: this block is not necessary + #elif len(self.searchResult) > 40: + # noPCA = 1 + #else: + # noPCA = 0 + + traitList = [] + traitDataList = [] + for item in self.searchResult: + thisTrait = webqtlTrait(fullname=item, cursor=self.cursor) + thisTrait.retrieveInfo() + thisTrait.retrieveData(fd.strainlist) + traitList.append(thisTrait) + traitDataList.append(thisTrait.exportData(fd.strainlist)) + + else: + heading = 'Correlation Matrix' + detail = [HT.Font('Error : ',color='red'),HT.Font('Error occurs while retrieving data FROM database.',color='black')] + self.error(heading=heading,detail=detail) + return + + NNN = len(traitList) + + if NNN == 0: + heading = "Correlation Matrix" + detail = ['No trait was selected for %s data set. No matrix generated.' % self.data.RISet] + self.error(heading=heading,detail=detail) + return + elif NNN < 2: + heading = 'Correlation Matrix' + detail = ['You need to select at least two traits in order to generate correlation matrix.'] + self.error(heading=heading,detail=detail) + return + else: + + + + corArray = [([0] * (NNN+1))[:] for i in range(NNN+1)] + pearsonArray = [([0] * (NNN))[:] for i in range(NNN)] + spearmanArray = [([0] * (NNN))[:] for i in range(NNN)] + corArray[0][0] = 'Correlation' + TD_LR = HT.TD(colspan=2,width="100%",bgColor='#eeeeee') + form = HT.Form( cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name='showDatabase', submit=HT.Input(type='hidden')) + hddn = {'FormID':'showDatabase', 'ProbeSetID':'_','database':'_', + 'CellID':'_','ProbeSetID2':'_','database2':'_','CellID2':'_', + 'newNames':fd.formdata.getvalue("newNames", "_"), + 'RISet':fd.RISet,'ShowStrains':'ON','ShowLine':'ON', 'rankOrder':'_', + "allstrainlist":string.join(fd.strainlist, " "), 'traitList':string.join(self.searchResult, "\t")} + if fd.incparentsf1: + hddn['incparentsf1']='ON' + + for key in hddn.keys(): + form.append(HT.Input(name=key, value=hddn[key], type='hidden')) + + for item in self.searchResult: + form.append(HT.Input(name='oldSearchResult', value=str(item), type='hidden')) + + traiturls = [] + traiturls2 = [] + shortNames = [] + verboseNames = [] + verboseNames2 = [] + verboseNames3 = [] + abbreviation = '' + + #dbInfo.ProbeSetID = ProbeSetID + #dbInfo.CellID = CellID + for i, thisTrait in enumerate(traitList): + _url = "javascript:showDatabase2('%s','%s','%s');" % (thisTrait.db.name, thisTrait.name, thisTrait.cellid) + #_text = 'Trait%d: ' % (i+1)+str(thisTrait) + _text = 'Trait %d: ' % (i+1)+thisTrait.displayName() + + if thisTrait.db.type == 'Geno': + _shortName = 'Genotype' + abbreviation = 'Genotype' + _verboseName = 'Locus %s' % (thisTrait.name) + _verboseName2 = 'Chr %s @ %s Mb' % (thisTrait.chr, '%2.3f' % thisTrait.mb) + _verboseName3 = '' + elif thisTrait.db.type == 'Publish': + if thisTrait.post_publication_abbreviation: + AbbreviationString = thisTrait.post_publication_abbreviation + else: + AbbreviationString = '' + if thisTrait.confidential: + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=thisTrait.authorized_users): + if thisTrait.pre_publication_abbreviation: + AbbreviationString = thisTrait.pre_publication_abbreviation + else: + AbbreviationString = '' + _shortName = 'Phenotype: %s' % (AbbreviationString) + _verboseName2 = '' + _verboseName3 = '' + if thisTrait.pubmed_id: + _verboseName = 'PubMed %d: ' % thisTrait.pubmed_id + else: + _verboseName = 'Unpublished ' + _verboseName += 'RecordID/%s' % (thisTrait.name) + PhenotypeString = thisTrait.post_publication_description + if thisTrait.confidential: + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=thisTrait.authorized_users): + PhenotypeString = thisTrait.pre_publication_description + _verboseName2 = 'Phenotype: %s' % (PhenotypeString) + if thisTrait.authors: + a1 = string.split(thisTrait.authors,',')[0] + while a1[0] == '"' or a1[0] == "'" : + a1 = a1[1:] + _verboseName += ' by ' + _verboseName += HT.Italic('%s, and colleagues' % (a1)) + elif thisTrait.db.type == 'Temp': + abbreviation = '' + _shortName = thisTrait.name + if thisTrait.description: + _verboseName = thisTrait.description + else: + _verboseName = 'Temp' + _verboseName2 = '' + _verboseName3 = '' + else: + abbreviation = thisTrait.symbol + _shortName = 'Symbol: %s ' % thisTrait.symbol + _verboseName = thisTrait.symbol + _verboseName2 = '' + _verboseName3 = '' + if thisTrait.chr and thisTrait.mb: + _verboseName += ' on Chr %s @ %s Mb' % (thisTrait.chr,thisTrait.mb) + if thisTrait.description: + _verboseName2 = '%s' % (thisTrait.description) + if thisTrait.probe_target_description: + _verboseName3 = '%s' % (thisTrait.probe_target_description) + + cururl = HT.Href(text=_text, url=_url,Class='fs12') + cururl2 = HT.Href(text='Trait%d' % (i+1),url=_url,Class='fs12') + traiturls.append(cururl) + traiturls2.append(cururl2) + shortName = HT.Div(id="shortName_" + str(i), style="display:none") + shortName.append(_shortName) + shortNames.append(shortName) + verboseName = HT.Div(id="verboseName_" + str(i), style="display:none") + verboseName.append(_verboseName) + verboseNames.append(verboseName) + verboseName2 = HT.Div(id="verboseName2_" + str(i), style="display:none") + verboseName2.append(_verboseName2) + verboseNames2.append(verboseName2) + verboseName3 = HT.Div(id="verboseName3_" + str(i), style="display:none") + verboseName3.append(_verboseName3) + verboseNames3.append(verboseName3) + + + + corArray[i+1][0] = 'Trait%d: ' % (i+1)+str(thisTrait) + '/' + str(thisTrait) + ': ' + abbreviation + '/' + str(thisTrait) + ': ' + str(_verboseName) + ' : ' + str(_verboseName2) + ' : ' + str(_verboseName3) + corArray[0][i+1] = 'Trait%d: ' % (i+1)+str(thisTrait) + + corMatrixHeading = HT.Paragraph('Correlation Matrix', Class="title") + + tbl = HT.TableLite(Class="collap", border=0, cellspacing=1, + cellpadding=5, width='100%') + row1 = HT.TR(HT.TD(Class="fs14 fwb ffl b1 cw cbrb"), + HT.TD('Spearman Rank Correlation (rho)', Class="fs14 fwb ffl b1 cw cbrb", colspan= NNN+1,align="center") + ) + row2 = HT.TR( + HT.TD("P e a r s o n     r", rowspan= NNN+1,Class="fs14 fwb ffl b1 cw cbrb", width=10,align="center"), + HT.TD(Class="b1", width=300)) + for i in range(NNN): + row2.append(HT.TD(traiturls2[i], Class="b1", align="center")) + tbl.append(row1,row2) + + nOverlapTrait =9999 + nnCorr = len(fd.strainlist) + for i, thisTrait in enumerate(traitList): + newrow = HT.TR() + newrow.append(HT.TD(traiturls[i], shortNames[i], verboseNames[i], verboseNames2[i], + verboseNames3[i], Class="b1")) + names1 = [thisTrait.db.name, thisTrait.name, thisTrait.cellid] + for j, thisTrait2 in enumerate(traitList): + names2 = [thisTrait2.db.name, thisTrait2.name, thisTrait2.cellid] + if j < i: + corr,nOverlap = webqtlUtil.calCorrelation(traitDataList[i],traitDataList[j],nnCorr) + + rank = fd.formdata.getvalue("rankOrder", "0") + + if nOverlap < nOverlapTrait: + nOverlapTrait = nOverlap + if corr > 0.7: + fontcolor="red" + elif corr > 0.5: + fontcolor="#FF6600" + elif corr < -0.7: + fontcolor="blue" + elif corr < -0.5: + fontcolor="#009900" + else: + fontcolor ="#000000" + + pearsonArray[i][j] = corr + pearsonArray[j][i] = corr + if corr!= 0.0: + corArray[i+1][j+1] = '%2.3f/%d' % (corr,nOverlap) + thisurl = HT.Href(text=HT.Font('%2.3f'% corr,HT.BR(),'%d' % nOverlap ,color=fontcolor, Class="fs11 fwn"),url = "javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='%s',db2='%s',ProbeSetID2='%s',CellID2='%s',rank='%s')" % (names1[0], names1[1], names1[2], names2[0], names2[1], names2[2], rank)) + else: + corArray[i+1][j+1] = '---/%d' % nOverlap + thisurl = HT.Font('---',HT.BR(), '%d' % nOverlap) + + newrow.append(HT.TD(thisurl,Class="b1",NOWRAP="ON",align="middle")) + elif j == i: + corr,nOverlap = webqtlUtil.calCorrelation(traitDataList[i],traitDataList[j],nnCorr) + pearsonArray[i][j] = 1.0 + spearmanArray[i][j] = 1.0 + corArray[i+1][j+1] = '%2.3f/%d' % (corr,nOverlap) + nOverlap = webqtlUtil.calCorrelation(traitDataList[i],traitDataList[j],nnCorr)[1] + newrow.append(HT.TD(HT.Href(text=HT.Font(HT.Italic("n"),HT.BR(),str(nOverlap),Class="fs11 fwn b1",align="center", color="000000"), url="javascript:showDatabase2('%s','%s','%s')" % (thisTrait.db.name, thisTrait.name, thisTrait.cellid)), bgColor='#cccccc', align="center", Class="b1", NOWRAP="ON")) + else: + corr,nOverlap = webqtlUtil.calCorrelationRank(traitDataList[i],traitDataList[j],nnCorr) + + rank = fd.formdata.getvalue("rankOrder", "1") + + if corr > 0.7: + fontcolor="red" + elif corr > 0.5: + fontcolor="#FF6600" + elif corr < -0.7: + fontcolor="blue" + elif corr < -0.5: + fontcolor="#009900" + else: + fontcolor ="#000000" + spearmanArray[i][j] = corr + spearmanArray[j][i] = corr + if corr!= 0.0: + corArray[i+1][j+1] = '%2.3f/%d' % (corr,nOverlap) + thisurl = HT.Href(text=HT.Font('%2.3f'% corr,HT.BR(),'%d' % nOverlap ,color=fontcolor, Class="fs11 fwn"),url = "javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='%s',db2='%s',ProbeSetID2='%s',CellID2='%s',rank='%s')" % (names1[0], names1[1], names1[2], names2[0], names2[1], names2[2], rank)) + else: + corArray[i+1][j+1] = '---/%d' % nOverlap + thisurl = HT.Span('---',HT.BR(), '%d' % nOverlap, Class="fs11 fwn") + newrow.append(HT.TD(thisurl,Class="b1", NOWRAP="ON",align="middle")) + tbl.append(newrow) + + info = HT.Blockquote('Lower left cells list Pearson product-moment correlations; upper right cells list Spearman rank order correlations. Each cell also contains the n of cases. Values higher than 0.7 are displayed in ',HT.Font('red', color='red'),'; those between 0.5 and 0.7 in ',HT.Font('orange', color='#FF6600'),'; Values lower than -0.7 are in ',HT.Font('blue', color='blue'),'; between -0.5 and -0.7 in ',HT.Font('green', color='#009900'),'. Select any cell to generate a scatter plot. Select trait labels for more information.', Class="fs13 fwn") + + exportbutton = HT.Input(type='button', name='export', value='Export', onClick="exportText(allCorrelations);",Class="button") + shortButton = HT.Input(type='button' ,name='dispShort',value=' Short Labels ', onClick="displayShortName();",Class="button") + verboseButton = HT.Input(type='button' ,name='dispVerbose',value=' Long Labels ', onClick="displayVerboseName();", Class="button") + form.append(HT.Blockquote(tbl,HT.P(),shortButton,verboseButton,exportbutton)) + TD_LR.append(corMatrixHeading,info,form,HT.P()) + + #if noPCA: + # TD_LR.append(HT.Blockquote('No PCA is computed if more than 32 traits are selected.')) + + #print corArray + exportScript = """ + + + """ + exportScript = exportScript % str(corArray) + self.dict['js1'] = exportScript+'
' + self.dict['body'] = str(TD_LR) + + #don't calculate PCA while number exceed 32 + #if noPCA: + # return + + #XZ, 7/22/2009: deal with PCA stuff + #Only for Array Data + + if NNN > 2: + + traitname = map(lambda X:str(X.name), traitList) + + #generate eigenvalues + + # import sys + sys.argv=[" "] + # import numarray + # import numarray.linear_algebra as la + #spearmanEigen = eigenvectors(array(spearmanArray)) + pearsonEigen = la.eigenvectors(numarray.array(pearsonArray)) + #spearmanEigenValue,spearmanEigenVectors = self.sortEigenVectors(spearmanEigen) + pearsonEigenValue,pearsonEigenVectors = self.sortEigenVectors(pearsonEigen) + + + """ + for i in range(len(pearsonEigenValue)): + if type(pearsonEigenValue[i]).__name__ == 'complex': + pearsonEigenValue[i] = pearsonEigenValue[i].real + for i in range(len(pearsonEigenVectors)): + for j in range(len(pearsonEigenVectors[i])): + if type(pearsonEigenVectors[i][j]).__name__ == 'complex': + pearsonEigenVectors[i][j] = pearsonEigenVectors[i][j].real + if type(pearsonEigenVectors[i][j]).__name__ == 'complex': + pearsonEigenVectors[i][j] = pearsonEigenVectors[i][j].real + """ + + if type(pearsonEigenValue[0]).__name__ == 'complex': + pass + else: + traitHeading = HT.Paragraph('PCA Traits',align='left', Class="title") + + tbl2 = self.calcPCATraits(traitDataList=traitDataList, nnCorr=nnCorr, NNN=NNN, pearsonEigenValue=pearsonEigenValue, + pearsonEigenVectors=pearsonEigenVectors, form=form, fd=fd) + #Buttons on search page + #mintmap = HT.Input(type='button' ,name='mintmap',value='Multiple Mapping', onClick="databaseFunc(this.form,'showIntMap');",Class="button") + addselect = HT.Input(type='button' ,name='addselect',value='Add to Collection', onClick="addRmvSelection('%s', this.form, 'addToSelection');" % fd.RISet,Class="button") + selectall = HT.Input(type='button' ,name='selectall',value='Select All', onClick="checkAll(this.form);",Class="button") + reset = HT.Input(type='reset',name='',value='Select None',Class="button") + updateNames = HT.Input(type='button', name='updateNames',value='Update Trait Names', onClick="editPCAName(this.form);", Class="button") + chrMenu = HT.Input(type='hidden',name='chromosomes',value='all') + + """ + #need to be refined + if fd.genotype.Mbmap: + scaleMenu = HT.Select(name='scale') + scaleMenu.append(tuple(["Genetic Map",'morgan'])) + scaleMenu.append(tuple(["Physical Map",'physic'])) + else: + scaleMenu = "" + """ + + tbl2.append(HT.TR(HT.TD(HT.P(),chrMenu,updateNames,selectall,reset,addselect,colspan=3))) + form.append(HT.P(),traitHeading,HT.Blockquote(tbl2)) + + plotHeading1 = HT.Paragraph('Scree Plot', Class="title") + TD_LR.append(plotHeading1) + img1 = self.screePlot(NNN=NNN, pearsonEigenValue=pearsonEigenValue) + + TD_LR.append(HT.Blockquote(img1)) + + plotHeading2 = HT.Paragraph('Factor Loadings Plot', Class="title") + TD_LR.append(plotHeading2) + img2 = self.factorLoadingsPlot(pearsonEigenVectors=pearsonEigenVectors, traitList=traitList) + + TD_LR.append(HT.Blockquote(img2)) + + self.dict['body'] = str(TD_LR) + + def screePlot(self, NNN=0, pearsonEigenValue=None): + + c1 = pid.PILCanvas(size=(700,500)) + Plot.plotXY(canvas=c1, dataX=range(1,NNN+1), dataY=pearsonEigenValue, rank=0, labelColor=pid.blue,plotColor=pid.red, symbolColor=pid.blue, XLabel='Factor Number', connectdot=1,YLabel='Percent of Total Variance %', title='Pearson\'s R Scree Plot') + filename= webqtlUtil.genRandStr("Scree_") + c1.save(webqtlConfig.IMGDIR+filename, format='gif') + img=HT.Image('/image/'+filename+'.gif',border=0) + + return img + + def factorLoadingsPlot(self, pearsonEigenVectors=None, traitList=None): + + traitname = map(lambda X:str(X.name), traitList) + c2 = pid.PILCanvas(size=(700,500)) + Plot.plotXY(c2, pearsonEigenVectors[0],pearsonEigenVectors[1], 0, dataLabel = traitname, labelColor=pid.blue, plotColor=pid.red, symbolColor=pid.blue,XLabel='Factor (1)', connectdot=1, YLabel='Factor (2)', title='Factor Loadings Plot (Pearson)', loadingPlot=1) + filename= webqtlUtil.genRandStr("FacL_") + c2.save(webqtlConfig.IMGDIR+filename, format='gif') + img = HT.Image('/image/'+filename+'.gif',border=0) + + return img + + def calcPCATraits(self, traitDataList=None, nnCorr=0, NNN=0, pearsonEigenValue=None, pearsonEigenVectors=None, form=None, fd=None): + """ + This function currently returns the html to be displayed instead of the traits themselves. Need to fix later. + """ + + detailInfo = string.split(self.searchResult[0],':') + + self.sameProbeSet = 'yes' + for item in self.searchResult[1:]: + detailInfo2 = string.split(item,':') + if detailInfo[0] != detailInfo2[0] or detailInfo[1] != detailInfo2[1]: + self.sameProbeSet = None + break + + for item in traitDataList: + if len(item) != nnCorr: + return + infoStrains = [] + infoStrainsPos = [] + dataArray = [[] for i in range(NNN)] + + for i in range(len(traitDataList[0])): + currentStrain = 1 + for j in range(NNN): + if not traitDataList[j][i]: + currentStrain = 0 + break + if currentStrain == 1: + infoStrains.append(fd.strainlist[i]) + infoStrainsPos.append(i) + for j in range(NNN): + dataArray[j].append(traitDataList[j][i]) + + + self.cursor.execute('delete Temp, TempData FROM Temp, TempData WHERE Temp.DataId = TempData.Id and UNIX_TIMESTAMP()-UNIX_TIMESTAMP(CreateTime)>%d;' % webqtlConfig.MAXLIFE) + + StrainIds = [] + for item in infoStrains: + self.cursor.execute('SELECT Strain.Id FROM Strain,StrainXRef, InbredSet WHERE Strain.Name="%s" and Strain.Id = StrainXRef.StrainId and StrainXRef.InbredSetId = InbredSet.Id and InbredSet.Name = "%s"' % (item, fd.RISet)) + StrainIds.append('%d' % self.cursor.fetchone()[0]) + + """ + #minimal 12 overlapping strains + if len(dataArray[0]) < 12: + form.append(HT.P(),traitHeading,HT.Blockquote(HT.Paragraph('The number of overlapping strains is less than 12, no PCA scores computed.',align='left'))) + self.dict['body'] = str(TD_LR) + return + """ + dataArray = self.zScore(dataArray) + dataArray = numarray.array(dataArray) + dataArray2 = numarray.dot(pearsonEigenVectors,dataArray) + + tbl2 = HT.TableLite(cellSpacing=2,cellPadding=0,border=0, width="100%") + + ct0 = time.localtime(time.time()) + ct = time.strftime("%B/%d %H:%M:%S",ct0) + if self.sameProbeSet: + newDescription = 'PCA Traits generated at %s from %s' % (ct,detailInfo[1]) + else: + newDescription = 'PCA Traits generated at %s from traits selected' % ct + + + j = 1 + self.cursor.execute('SELECT Id FROM InbredSet WHERE Name = "%s"' % fd.RISet) + InbredSetId = self.cursor.fetchall()[0][0] + user_ip = fd.remote_ip + if fd.formdata.getvalue("newNames"): + newNames = fd.formdata.getvalue("newNames").split(",") + else: + newNames = 0 + + for item in dataArray2: + if pearsonEigenValue[j-1] < 100.0/NNN: + break + + if (newNames == 0): + description = '%s : PC%02d' % (newDescription, j) + else: + description = '%s : %s' % (newDescription, newNames[j-1]) + + self.cursor.execute('SELECT max(id) FROM TempData') + try: + DataId = self.cursor.fetchall()[0][0] + 1 + except: + DataId = 1 + newProbeSetID = webqtlUtil.genRandStr("PCA_Tmp_") + self.cursor.execute('insert into Temp(Name,description, createtime,DataId,InbredSetId,IP) values(%s,%s,Now(),%s,%s,%s)' ,(newProbeSetID, description, DataId,InbredSetId,user_ip)) + + k = 0 + for StrainId in StrainIds: + self.cursor.execute('insert into TempData(Id, StrainId, value) values(%s,%s,%s)' % (DataId, StrainId, item[k]*(-1.0))) + k += 1 + setDescription = HT.Div(id="pcaTrait%s" % j) + descriptionLink = HT.Href(text=description, url="javascript:showDatabase2('Temp','%s','')" % newProbeSetID, Class="fwn") + descriptionEdit = HT.Input(type='text', value='', name='editName%s' % j) + + #onBlur='editPDAName(this.form, %s);' % j + + setDescription.append(descriptionLink) + setDescription.append(descriptionEdit) + + traitName = "%s:%s" % ('Temp',newProbeSetID) + tbl2.append(HT.TR(HT.TD("%d."%j,align="right",valign="top"),HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=traitName),valign="top",width=50),HT.TD(setDescription))) + j += 1 + + return tbl2 + + def zScore(self,dataArray): + NN = len(dataArray[0]) + if NN < 10: + return dataArray + else: + i = 0 + for data in dataArray: + N = len(data) + S = reduce(lambda x,y: x+y, data, 0.) + SS = reduce(lambda x,y: x+y*y, data, 0.) + mean = S/N + var = SS - S*S/N + stdev = math.sqrt(var/(N-1)) + data2 = map(lambda x:(x-mean)/stdev,data) + dataArray[i] = data2 + i += 1 + return dataArray + + def sortEigenVectors(self,vector): + try: + eigenValues = vector[0].tolist() + eigenVectors = vector[1].tolist() + combines = [] + i = 0 + for item in eigenValues: + combines.append([eigenValues[i],eigenVectors[i]]) + i += 1 + combines.sort(webqtlUtil.cmpEigenValue) + A = [] + B = [] + for item in combines: + A.append(item[0]) + B.append(item[1]) + sum = reduce(lambda x,y: x+y, A, 0.0) + A = map(lambda x:x*100.0/sum, A) + return [A,B] + except: + return [] + diff --git a/web/webqtl/correlationMatrix/TissueAbbreviationPage.py b/web/webqtl/correlationMatrix/TissueAbbreviationPage.py new file mode 100755 index 00000000..ad8f0ac7 --- /dev/null +++ b/web/webqtl/correlationMatrix/TissueAbbreviationPage.py @@ -0,0 +1,79 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2011/12/7 +# +# Last updated by GeneNetwork Core Team 2011/12/7 + + +from base.templatePage import templatePage +from htmlgen import HTMLgen2 as HT + +import string +import os + + +class TissueAbbreviationPage (templatePage): + + def __init__(self,fd): + templatePage.__init__(self, fd) + + shortName=fd.formdata.getfirst("shortTissueName", ',') + fullName=fd.formdata.getfirst("fullTissueName", ',') + shortNameList=[] + fullNameList=[] + + if shortName: + shortNameList=shortName.split(',') + + if fullName: + fullNameList=fullName.split(',') + + tissueAbbrDict={} + for i, item in enumerate(shortNameList): + tissueAbbrDict[item]=fullNameList[i] + + if tissueAbbrDict: + + # Creates the table for the fullname and shortname of Tissue + tissueAbbrTable = HT.TableLite(border=1, cellspacing=5, cellpadding=3, Class="collap") + shortNameList = tissueAbbrDict.keys() + shortNameList.sort() + abbrHeaderStyle="fs14 fwb ffl" + abbrStyle="fs14 fwn ffl" + + tissueAbbrTable.append(HT.TR(HT.TD('Abbr  ', Class=abbrHeaderStyle, NOWRAP = 1),HT.TD('Full Name  ', Class=abbrHeaderStyle, NOWRAP = 1))) + for item in shortNameList: + thisTR = HT.TR(HT.TD(item, Class=abbrStyle, NOWRAP = 1)) + thisTR.append(HT.TD(tissueAbbrDict[item], Class=abbrStyle, NOWRAP = 1)) + + tissueAbbrTable.append(thisTR) + + self.dict['body'] = HT.TD(HT.Paragraph("Tissue Abbreviation", Class="title"), HT.Blockquote(tissueAbbrTable)) + self.dict['title'] = "Tissue Abbreviation" + else: + heading = "Tissue abbreviation" + detail = ["Cannot found Tissue Abbreviation. Please try again later."] + self.error(heading=heading,detail=detail) + return + + diff --git a/web/webqtl/correlationMatrix/TissueCorrelationPage.py b/web/webqtl/correlationMatrix/TissueCorrelationPage.py new file mode 100755 index 00000000..7cb86d8c --- /dev/null +++ b/web/webqtl/correlationMatrix/TissueCorrelationPage.py @@ -0,0 +1,673 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# user can search correlation value and P-Value by inputting one pair gene symbols or multiple gene symbols. + +# Created by GeneNetwork Core Team 2010/07/07 +# Last updated by NL, 2011/03/25 + +from htmlgen import HTMLgen2 as HT +import os +import sys +import time +import string +import pyXLWriter as xl +import cPickle + +from base.templatePage import templatePage +from base import webqtlConfig +from base.webqtlTrait import webqtlTrait +from correlationMatrix.tissueCorrelationMatrix import tissueCorrelationMatrix +from utility import webqtlUtil +from utility.THCell import THCell +from utility.TDCell import TDCell + + +######################################### +# Tissue Correlation Page +######################################### + +class TissueCorrelationPage(templatePage): + + def __init__(self, fd): + + templatePage.__init__(self, fd) + + if not self.openMysql(): + return + + #read input fields + self.action = fd.formdata.getvalue("action", "").strip() + self.geneSymbols = fd.formdata.getvalue("geneSymbols","").strip() + self.tissueProbeSetFeezeId = fd.formdata.getvalue("tissueProbeSetFeezeId", "").strip() + self.recordReturnNum = fd.formdata.getvalue("recordReturnNum", "0").strip() + self.calculateMethod = fd.formdata.getvalue("calculateMethod", "0").strip() + + TissueCorrMatrixObject = tissueCorrelationMatrix(tissueProbeSetFreezeId=self.tissueProbeSetFeezeId) + + if not self.geneSymbols: + # default page + + Heading = HT.Paragraph("Tissue Correlation", Class="title") + Intro = HT.Blockquote("This function computes correlations between transcript expression across different organs and tissues.") + Intro.append(HT.BR(),"Select a data set from the pull-down menu and then compute correlations.") + + formName='searchTissueCorrelation' + form = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), target='_blank',enctype='multipart/form-data', name= formName, submit=HT.Input(type='hidden')) + form.append(HT.Input(type="hidden", name="FormID", value="")) + form.append(HT.Input(type="hidden", name="action", value="disp")) + + # added by NL 10/12/2010, retreive dataSet info from TissueProbeSetFreeze to get all TissueProbeSetFreezeId, datasetName and FullName + tissProbeSetFreezeIds,dataSetNames,dataSetfullNames = TissueCorrMatrixObject.getTissueDataSet() + + dataSetList=[] + for i in range(len(tissProbeSetFreezeIds)): + dataSetList.append((dataSetfullNames[i], tissProbeSetFreezeIds[i])) + dataSetMenu = HT.Select(dataSetList,name="tissueProbeSetFeezeId") + + InfoFile =HT.Input(type="button", Class="button", value=" Info ", onClick="tissueDatasetInfo(this.form.tissueProbeSetFeezeId,%s);"%(dataSetNames)) + form.append(HT.Strong("     "),dataSetMenu,InfoFile,HT.BR()); + + form.append(HT.BR(),HT.Strong("     Please enter only one gene symbol/ENTREZ gene Id per line."),HT.BR(),HT.Strong("     "),HT.Textarea(name="geneSymbols", rows=10, cols=50, text=""),HT.BR(),HT.BR()) + # calculate method radio button + calculateMethodMenu =HT.Input(type="radio", name="calculateMethod", value="0", checked="checked") + calculateMethodMenu1 =HT.Input(type="radio", name="calculateMethod", value="1") + # record Return method dropdown menu + recordReturnMenu = HT.Select(name="recordReturnNum") + recordReturnMenu.append(('Top 100','0')) + recordReturnMenu.append(('Top 200','1')) + recordReturnMenu.append(('Top 500','2')) + recordReturnMenu.append(('Top 1000','3')) + recordReturnMenu.append(('Top 2000','4')) + recordReturnMenu.append(('All','5')) + + # working for input symbol has only one; + form.append(HT.Strong("     "),HT.Span("Return:", Class="ffl fwb fs12"),HT.Strong("     "),recordReturnMenu,HT.BR()); + form.append(HT.BR(),HT.Strong("     "),'Pearson',calculateMethodMenu," "*3,'Spearman Rank',calculateMethodMenu1,HT.BR(),HT.BR()); + form.append(HT.Strong("   "),HT.Input(type="button", value=" Compute ", Class="button",onClick="selectFormIdForTissueCorr('%s');"%formName)) + form.append(HT.Strong("    "),HT.Input(type="button", Class="button", value=" Make Default ", onClick = "makeTissueCorrDefault(this.form);")) + + TD_LR = HT.TD(height=200,width="100%",bgcolor='#eeeeee',align="left") + TD_LR.append(Heading,Intro,form) + self.content_type = 'text/html' + self.dict['js1'] = '
' + # get tissueProbesetFreezeId from cookie + self.dict['js2'] = 'onload ="getTissueCorrDefault(\'searchTissueCorrelation\');"' + self.dict['body'] = str(TD_LR) + self.dict['title'] = "Tissue Correlation" + elif self.action == 'disp': + TissueCount =TissueCorrMatrixObject.getTissueCountofCurrentDataset() + + # add by NL for first Note part in the tissue correlation page. 2010-12-23 + note ="" + dataSetName="" + datasetFullName="" + dataSetName, datasetFullName= TissueCorrMatrixObject.getFullnameofCurrentDataset() + + noteURL = "../dbdoc/"+ dataSetName+".html" + noteText = " was used to compute expression correlation across %s samples of tissues and organs. ["%TissueCount + # dataset download + datasetURL = "../dbdoc/"+ dataSetName+".xls" + datasetDownload =HT.Href(text="Download experiment data",url=datasetURL,Class='fs13',target="_blank") + note = HT.Blockquote(HT.Href(text=datasetFullName,url=noteURL,Class='fs13',target="_blank"),noteText, datasetDownload,"]",HT.BR()) + + geneSymbolLst = [] # gene Symbol list + geneSymbolLst = TissueCorrMatrixObject.getGeneSymbolLst(self.geneSymbols) + + symbolCount = len(geneSymbolLst) + # The input symbol limit is 100. + heading = "Tissue Correlation" + if symbolCount > 100: + detail = ['The Gene symbols you have input are more than 100. Please limit them to 100.'] + self.error(heading=heading,detail=detail) + return + elif symbolCount==0: + detail = ['No Gene Symbol was input. No Tissue Correlation matrix generated.' ] + self.error(heading=heading,detail=detail) + return + else: + # search result page + # The input symbols should be no less than 1. + self.content_type = 'text/html' + if symbolCount == 1: + self.displaySingleSymbolResultPage(primaryGeneSymbol=geneSymbolLst[0],datasetFullName=datasetFullName,tProbeSetFreezeId=self.tissueProbeSetFeezeId, TissueCorrMatrixObject =TissueCorrMatrixObject,recordReturnNum=self.recordReturnNum,method=self.calculateMethod, note=note,TissueCount =TissueCount) + else: + self.displayMultiSymbolsResultPage(geneSymbolLst=geneSymbolLst, symbolCount=symbolCount, tProbeSetFreezeId=self.tissueProbeSetFeezeId,TissueCorrMatrixObject =TissueCorrMatrixObject,note=note,TissueCount =TissueCount) + + else: + heading = "Tissue Correlation" + detail = ['There\'s something wrong with input gene symbol(s), or the value of parameter [action] is not right.' ] + self.error(heading=heading,detail=detail) + return +############################# +# functions +############################# + + # result page when input symbol has only one + def displaySingleSymbolResultPage(self,primaryGeneSymbol=None, datasetFullName=None,tProbeSetFreezeId=None, TissueCorrMatrixObject =None,recordReturnNum=None,method=None,note=None,TissueCount =None): + formName = webqtlUtil.genRandStr("fm_") + form = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data',name= formName, submit=HT.Input(type='hidden')) + # the following hidden elements are required parameter in Class(PlotCorrelationPage). So we need to define them here. + form.append(HT.Input(type="hidden", name="action", value="disp")) + form.append(HT.Input(type="hidden", name="FormID", value="dispSingleTissueCorrelation")) + form.append(HT.Input(type="hidden", name="X_geneSymbol", value="")) + form.append(HT.Input(type="hidden", name="Y_geneSymbol", value="")) + form.append(HT.Input(type="hidden", name="ProbeSetID", value="")) + # RISet is not using in Tissue correlation, but is a required parameter in Class(PlotCorrelationPage). So we set dummy value(BXD). + form.append(HT.Input(type="hidden", name="RISet", value="BXD")) + form.append(HT.Input(type="hidden", name="ShowLine", value="1")) + form.append(HT.Input(type="hidden", name="TissueProbeSetFreezeId", value=tProbeSetFreezeId)) + form.append(HT.Input(type="hidden", name="rankOrder", value=0)) + + traitList =[] + try: + symbolCorrDict, symbolPvalueDict = TissueCorrMatrixObject.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryGeneSymbol,method=method) + except: + heading = "Tissue Correlation" + detail = ['Please use the official NCBI gene symbol.' ] + self.error(heading=heading,detail=detail) + return + + symbolList0,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict=TissueCorrMatrixObject.getTissueProbeSetXRefInfo(GeneNameLst=[]) + # In case, upper case and lower case issue of symbol, mappedByTargetList function will update input geneSymbolLst based on database search result + tempPrimaryGeneSymbol =self.mappedByTargetList(primaryList=symbolList0,targetList=[primaryGeneSymbol]) + primaryGeneSymbol =tempPrimaryGeneSymbol[0] + + returnNum = self.getReturnNum(recordReturnNum) + symbolListSorted=[] + symbolList=[] + # get key(list) of symbolCorrDict(dict) based on sorting symbolCorrDict(dict) by its' value in desc order + symbolListSorted=sorted(symbolCorrDict, key=symbolCorrDict.get, reverse=True) + symbolList = self.mappedByTargetList(primaryList=symbolList0,targetList=symbolListSorted) + + if returnNum==None: + returnNum =len(symbolList0) + IntroReturnNum ="All %d "%returnNum + else: + IntroReturnNum ="The Top %d" %returnNum + + symbolList = symbolList[:returnNum] + + pageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%", border=0, align="Left") + + ############## + # Excel file # + ############## + filename= webqtlUtil.genRandStr("Corr_") + xlsUrl = HT.Input(type='button', value = 'Download Table', onClick= "location.href='/tmp/%s.xls'" % filename, Class='button') + # Create a new Excel workbook + workbook = xl.Writer('%s.xls' % (webqtlConfig.TMPDIR+filename)) + headingStyle = workbook.add_format(align = 'center', bold = 1, border = 1, size=13, fg_color = 0x1E, color="white") + #There are 6 lines of header in this file. + worksheet = self.createExcelFileWithTitleAndFooter(workbook=workbook, datasetName=datasetFullName, returnNumber=returnNum) + newrow = 6 + pageTable.append(HT.TR(HT.TD(xlsUrl,height=40))) + + # get header part of result table and export excel file + tblobj = {} + tblobj['header'], worksheet = self.getTableHeader( method=method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + newrow += 1 + + # get body part of result table and export excel file + tblobj['body'], worksheet = self.getTableBody(symbolCorrDict=symbolCorrDict, symbolPvalueDict=symbolPvalueDict,symbolList=symbolList,geneIdDict=geneIdDict,ChrDict=ChrDict,MbDict=MbDict,descDict=descDict,pTargetDescDict=pTargetDescDict,primarySymbol=primaryGeneSymbol,TissueCount=TissueCount, formName=formName, worksheet=worksheet, newrow=newrow,method=method) + workbook.close() + # creat object for result table for sort function + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + + sortby = ("tissuecorr", "down") + div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), Id="sortable") + + if method =="0": + IntroMethod="Pearson\'s r " + else: + IntroMethod="Spearman\'s rho " + Intro = HT.Blockquote('%s correlations ranked by the %s are displayed.' % (IntroReturnNum,IntroMethod), + ' You can resort this list using the small arrowheads in the top row.') + Intro.append(HT.BR(),' Click the correlation values to generate scatter plots. Select the symbol to open NCBI Entrez.') + + pageTable.append(HT.TR(HT.TD(div))) + form.append(HT.P(), HT.P(),pageTable) + corrHeading = HT.Paragraph('Tissue Correlation Table', Class="title") + TD_LR = HT.TD(height=200,width="100%",bgcolor='#eeeeee',align="left") + TD_LR.append(corrHeading,note,Intro, form, HT.P()) + + self.dict['body'] = str(TD_LR) + self.dict['js1'] = '
' + self.dict['title'] = 'Tissue Correlation Result' + + return + + # result page when input symbols are more than 1 + def displayMultiSymbolsResultPage(self, geneSymbolLst=None, symbolCount=None, tProbeSetFreezeId=None,TissueCorrMatrixObject=None,note=None,TissueCount =None): + + formName = webqtlUtil.genRandStr("fm_") + form = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data',name= formName, submit=HT.Input(type='hidden')) + # the following hidden elements are required parameter in Class(PlotCorrelationPage). So we need to define them here. + form.append(HT.Input(type="hidden", name="action", value="disp")) + form.append(HT.Input(type="hidden", name="FormID", value="dispMultiTissueCorrelation")) + form.append(HT.Input(type="hidden", name="X_geneSymbol", value="")) + form.append(HT.Input(type="hidden", name="Y_geneSymbol", value="")) + form.append(HT.Input(type="hidden", name="ProbeSetID", value="")) + # RISet is not using in Tissue correlation, but is a required parameter in Class(PlotCorrelationPage). So we set dummy value(BXD). + form.append(HT.Input(type="hidden", name="RISet", value="BXD")) + form.append(HT.Input(type="hidden", name="ShowLine", value="1")) + form.append(HT.Input(type="hidden", name="TissueProbeSetFreezeId", value=tProbeSetFreezeId)) + form.append(HT.Input(type="hidden", name="rankOrder", value=0)) + + # updated by NL, 2011-01-06, build multi list for later use to descrease access to db again + symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict = TissueCorrMatrixObject.getTissueProbeSetXRefInfo(GeneNameLst=geneSymbolLst) + # In case, upper case and lower case issue of symbol, mappedByTargetList function will update input geneSymbolLst based on database search result + geneSymbolLst =self.mappedByTargetList(primaryList=symbolList,targetList=geneSymbolLst) + + # Added by NL, 2011-01-06, get all shortNames, verboseNames, verboseNames2, verboseNames3, exportArray + # for Short Label, Long Label, Export functions + geneIdLst,shortNames, verboseNames, verboseNames2, verboseNames3, exportArray = self.getAllLabelsInfo(geneSymbolList =geneSymbolLst, geneIdDict=geneIdDict,ChrDict=ChrDict, MbDict=MbDict, descDict=descDict, pTargetDescDict=pTargetDescDict) + + heading = "Tissue Correlation Matrix" + + #get correlation value and p value based on Gene Symbols list, and return the values in corrArray and pvArray seperately + corrArray,pvArray = TissueCorrMatrixObject.getTissueCorrPvArray(geneNameLst=geneSymbolLst,dataIdDict=dataIdDict) + + # in the matrix table, top right corner displays Spearman Rank Correlation's Values and P-Values for each pair of geneSymbols; + # left bottom displays Pearson Correlation values and P-Vlues for each pair of geneSymbols. + tissueCorrMatrixHeading = HT.Paragraph(heading,Class="title") + tcmTable = HT.TableLite(Class="collap", border=0, cellspacing=1, cellpadding=5, width='100%') + row1 = HT.TR(HT.TD(Class="fs14 fwb ffl b1 cw cbrb"),HT.TD('Spearman Rank Correlation (rho)' , Class="fs14 fwb ffl b1 cw cbrb", colspan= symbolCount+2,align="center")) + col1 = HT.TR(HT.TD("P e a r s o n     r", rowspan= symbolCount+1,Class="fs14 fwb ffl b1 cw cbrb", width=10,align="center"),HT.TD("Gene Symbol",Class="fs13 fwb cb b1", width=300)) + for i in range(symbolCount): + GeneSymbol=geneSymbolLst[i].strip() + geneId = geneIdLst[i] + + if geneId!=0: + _url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % geneId + curURL = HT.Href(text=GeneSymbol,url=_url,Class='fs13',target="_blank") + else: + curURL = GeneSymbol + col1.append(HT.TD(curURL,Class="b1", align="center")) + + tcmTable.append(row1,col1) + # to decide to whether to show note for "*" or not + flag = 0 + for i in range(symbolCount): + GeneSymbol=geneSymbolLst[i].strip() + geneId = geneIdLst[i] + + newrow = HT.TR() + newrow.append(HT.Input(name="Symbol", value=GeneSymbol, type='hidden')) + + if geneId!=0: + _url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" %geneId + geneIdURL = HT.Href(text="%s "%GeneSymbol,url=_url,Class="b1",target="_blank") + else: + # flag =1 will show note for "*" + flag = 1 + geneIdURL =HT.Italic("%s"%GeneSymbol,HT.Font('*', color='red')) + newrow.append(HT.TD(geneIdURL,shortNames[i],verboseNames[i],verboseNames2[i],verboseNames3[i], Class="b1", align="left",NOWRAP="ON")) + + for j in range(symbolCount): + GeneSymbol2=geneSymbolLst[j].strip() + corr = corrArray[i][j] + pValue = pvArray[i][j] + Color='' + + if j==i: + newrow.append(HT.TD(HT.Font(HT.Italic("n"),HT.BR(),str(TissueCount),Class="fs11 fwn b1",align="center", color="000000"), bgColor='#cccccc', align="center", Class="b1", NOWRAP="ON")) + exportArray[i+1][j+1] = '%d/%d' % (TissueCount,TissueCount) + else: + if corr: + corr = float(corr) + tCorr = "%2.3f" % corr + pValue = float(pValue) + tPV = "%2.3f" % pValue + + # updated by NL, based on Rob's requirement: delete p value, 2010-02-14 + # set color for cells by correlationValue + if corr > 0.7: + fontcolor="red" + elif corr > 0.5: + fontcolor="#FF6600" + elif corr < -0.7: + fontcolor="blue" + elif corr < -0.5: + fontcolor="#009900" + else: + fontcolor ="#000000" + + # set label for cells + # if rank is equal to 0, pearson correlation plot will be the first one; + # if rank is equal to 1, spearman ran correlation plot will be the first one. + if j>i: + exportArray[i+1][j+1] =tCorr+"/"+tPV + rank =1 + elif j + var allCorrelations = %s; + + """ + exportScript = exportScript % str(exportArray) + self.dict['js1'] = exportScript+'
' + + TD_LR = HT.TD(colspan=2,width="100%",bgcolor="#eeeeee") + TD_LR.append(tissueCorrMatrixHeading,note,Intro,form,HT.P()) + self.dict['body'] = str(TD_LR) + self.dict['title'] = 'Tissue Correlation Result' + return + + # Added by NL, 2011-01-06, get all shortNames, verboseNames, verboseNames2, verboseNames3, exportArray + # for Short Label, Long Label, Export functions + def getAllLabelsInfo(self, geneSymbolList=None,geneIdDict=None,ChrDict=None,MbDict=None,descDict=None,pTargetDescDict=None): + + symbolCount= len(geneSymbolList) + geneIdLst =[] + exportArray = [([0] * (symbolCount+1))[:] for i in range(symbolCount+1)] + exportArray[0][0] = 'Tissue Correlation' + shortNames = [] + verboseNames = [] + verboseNames2 = [] + verboseNames3 = [] + + # added by NL, 2010-12-21, build DIV and array for short label, long label and export functions + for i, geneSymbolItem in enumerate(geneSymbolList): + geneSymbol =geneSymbolItem.lower() + _shortName =HT.Italic("%s" %geneSymbolItem) + _verboseName ='' + _verboseName2 = '' + _verboseName3 = '' + if geneIdDict.has_key(geneSymbol): + geneIdLst.append(geneIdDict[geneSymbol]) + else: + geneIdLst.append(0) + if ChrDict.has_key(geneSymbol) and MbDict.has_key(geneSymbol): + _verboseName = ' on Chr %s @ %s Mb' % (ChrDict[geneSymbol],MbDict[geneSymbol]) + if descDict.has_key(geneSymbol): + _verboseName2 = '%s' % (descDict[geneSymbol]) + if pTargetDescDict.has_key(geneSymbol): + _verboseName3 = '%s' % (pTargetDescDict[geneSymbol]) + + shortName = HT.Div(id="shortName_" + str(i), style="display:none") + shortName.append('Symbol: ') + shortName.append(_shortName) + shortNames.append(shortName) + + verboseName = HT.Div(id="verboseName_" + str(i), style="display:none") + verboseName.append(_shortName) + verboseName.append(_verboseName) + verboseNames.append(verboseName) + verboseName2 = HT.Div(id="verboseName2_" + str(i), style="display:none") + verboseName2.append(_verboseName2) + verboseNames2.append(verboseName2) + verboseName3 = HT.Div(id="verboseName3_" + str(i), style="display:none") + verboseName3.append(_verboseName3) + verboseNames3.append(verboseName3) + + # exportTissueText in webqtl.js is using '/' as delimilator; add '/', otherwise the last letter in geneSymbol will missing + exportArray[i+1][0] =geneSymbolItem+ '/' + geneSymbolItem + '/' +geneSymbolItem + ':' + str(_verboseName) + ' : ' + str(_verboseName2) + ' : ' + str(_verboseName3) + exportArray[0][i+1] =geneSymbolItem+ '/' + + return geneIdLst,shortNames, verboseNames, verboseNames2, verboseNames3, exportArray + + +######################################################################## +# functions for display and download when input symbol has only one # +######################################################################## + + # build header and footer parts for export excel file + def createExcelFileWithTitleAndFooter(self, workbook=None, datasetName=None,returnNumber=None): + + worksheet = workbook.add_worksheet() + titleStyle = workbook.add_format(align = 'left', bold = 0, size=14, border = 1, border_color="gray") + + ##Write title Info + worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) + worksheet.write([2, 0], "Dataset : %s" % datasetName, titleStyle) + worksheet.write([3, 0], "Date : %s" % time.strftime("%B %d, %Y", time.gmtime()), titleStyle) + worksheet.write([4, 0], "Time : %s GMT" % time.strftime("%H:%M ", time.gmtime()), titleStyle) + worksheet.write([5, 0], "Status of data ownership: Possibly unpublished data; please see %s/statusandContact.html for details on sources, ownership, and usage of these data." % webqtlConfig.PORTADDR, titleStyle) + #Write footer info + worksheet.write([8 + returnNumber, 0], "Funding for The GeneNetwork: NIAAA (U01AA13499, U24AA13513), NIDA, NIMH, and NIAAA (P20-DA21131), NCI MMHCC (U01CA105417), and NCRR (U01NR 105417)", titleStyle) + worksheet.write([9 + returnNumber, 0], "PLEASE RETAIN DATA SOURCE INFORMATION WHENEVER POSSIBLE", titleStyle) + + return worksheet + + # build header of table when input symbol has only one + def getTableHeader(self, method='0', worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + exportList=[] + header=[] + header = [THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), + THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="symbol", idx=1), + THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="desc", idx=2), + THCell(HT.TD('Location',HT.BR(),'Chr and Mb ', Class="fs13 fwb ffl b1 cw cbrb"), text="location", idx=3), + THCell(HT.TD('N Cases',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), text="nstr", idx=4)] + if method =="0":# Pearson Correlation + header.append( THCell(HT.TD(HT.Href( + text = HT.Span(' r ', HT.Sup(' ?', style="color:#f00"),HT.BR(),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="tissuecorr", idx=5)) + header.append( THCell(HT.TD(HT.Href( + text = HT.Span(' p(r) ', HT.Sup(' ?', style="color:#f00"),HT.BR(),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_p_r"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="tissuepvalue", idx=6)) + + exportList =[ 'Gene ID', 'Symbol', 'Description', 'Location', 'N Cases', ' r ', ' p(r) '] + + else:# Spearman Correlation + header.append( THCell(HT.TD(HT.Href( + text = HT.Span(' rho ', HT.Sup(' ?', style="color:#f00"),HT.BR(),HT.BR(), Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="tissuecorr", idx=5)) + header.append( THCell(HT.TD(HT.Href( + text = HT.Span('p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), HT.BR(),Class="fs13 fwb ffl cw"), + target = '_blank', + url = "/correlationAnnotation.html#tissue_p_rho"), + Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="tissuepvalue", idx=6)) + exportList = ['Gene ID', 'Symbol', 'Description', 'Location', 'N Cases','rho', ' p(rho) '] + + # build header of excel for download function + for ncol, item in enumerate(exportList): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + tblobj_header.append(header) + + return tblobj_header, worksheet + + # build body of table when input symbol has only one + def getTableBody(self, symbolCorrDict={}, symbolPvalueDict={},symbolList=[],geneIdDict={},ChrDict={},MbDict={},descDict={},pTargetDescDict={},primarySymbol=None, TissueCount=None,formName=None, worksheet=None, newrow=None,method="0"): + + tblobj_body = [] + + for symbolItem in symbolList: + symbol =symbolItem.lower() + if symbol: + pass + else: + symbol ="N/A" + + if geneIdDict.has_key(symbol) and geneIdDict[symbol]: + geneId = geneIdDict[symbol] + ncbiUrl = HT.Href(text="NCBI",target='_blank',url=webqtlConfig.NCBI_LOCUSID % geneIdDict[symbol], Class="fs10 fwn") + else: + geneId ="N/A" + symbolItem =symbolItem.replace('"','') # some symbol is saved in ["symbol"]format + ncbiUrl = HT.Href(text="NCBI",target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&DB=gene&term=%s" % symbol, Class="fs10 fwn") + + _Species="mouse" + similarTraitUrl = "%s?cmd=sch&gene=%s&alias=1&species=%s" % (os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), symbolItem, _Species) + gnUrl = HT.Href(text="GN",target='_blank',url=similarTraitUrl, Class="fs10 fwn") + + tr = [] + # updated by NL, 04/25/2011: add checkbox and highlight function + # first column of table + # updated by NL. 12-7-2011 + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="tissueResult",value=symbol, onClick="highlight(this)"), align='right',Class="fs12 fwn b1 c222 fsI",nowrap='ON'),symbol,symbol)) + # updated by NL, 04/26/2011: add GN and NCBI links + #gene symbol (symbol column) + tr.append(TDCell(HT.TD(HT.Italic(symbolItem), HT.BR(),gnUrl,"  |  ", ncbiUrl, Class="fs12 fwn b1 c222"),symbolItem, symbolItem)) + + #description and probe target description(description column) + description_string='' + if descDict.has_key(symbol): + description_string = str(descDict[symbol]).strip() + if pTargetDescDict.has_key(symbol): + target_string = str(pTargetDescDict[symbol]).strip() + + description_display = '' + if len(description_string) > 1 and description_string != 'None': + description_display = description_string + else: + description_display = symbolItem + + if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + description_display = description_display + '; ' + target_string.strip() + + tr.append(TDCell(HT.TD(description_display, Class="fs12 fwn b1 c222"), description_display, description_display)) + + #trait_location_value is used for sorting (location column) + trait_location_repr = 'N/A' + trait_location_value = 1000000 + + if ChrDict.has_key(symbol) and MbDict.has_key(symbol): + + if ChrDict[symbol] and MbDict[symbol]: + mb = float(MbDict[symbol]) + try: + trait_location_value = int(ChrDict[symbol])*1000 + mb + except: + if ChrDict[symbol].upper() == 'X': + trait_location_value = 20*1000 + mb + else: + trait_location_value = ord(str(ChrDict[symbol]).upper()[0])*1000 + mb + + trait_location_repr = 'Chr%s: %.6f' % (ChrDict[symbol], mb ) + else: + trait_location_repr="N/A" + trait_location_value ="N/A" + + tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) + + # number of overlaped cases (N Case column) + tr.append(TDCell(HT.TD(TissueCount, Class="fs12 fwn ffl b1 c222", align='right'),TissueCount,TissueCount)) + + #tissue correlation (Tissue r column) + TCorr = 0.0 + TCorrStr = "N/A" + if symbolCorrDict.has_key(symbol): + TCorr = symbolCorrDict[symbol] + TCorrStr = "%2.3f" % TCorr + symbol2 =symbolItem.replace('"','') # some symbol is saved in "symbol" format + # add a new parameter rankOrder for js function 'showTissueCorrPlot' + rankOrder = int(method) + TCorrPlotURL = "javascript:showTissueCorrPlot('%s','%s','%s',%d)" %(formName, primarySymbol, symbol2,rankOrder) + tr.append(TDCell(HT.TD(HT.Href(text=TCorrStr, url=TCorrPlotURL, Class="fs12 fwn ff1"), Class="fs12 fwn ff1 b1 c222", align='right'), TCorrStr, abs(TCorr))) + else: + tr.append(TDCell(HT.TD(TCorrStr, Class="fs12 fwn b1 c222", align='right'), TCorrStr, abs(TCorr))) + + #p value of tissue correlation (Tissue p(r) column) + TPValue = 1.0 + TPValueStr = "N/A" + if symbolPvalueDict.has_key(symbol): + TPValue = symbolPvalueDict[symbol] + #TPValueStr = "%2.3f" % TPValue + TPValueStr=webqtlUtil.SciFloat(TPValue) + tr.append(TDCell(HT.TD(TPValueStr, Class="fs12 fwn b1 c222", align='right'), TPValueStr, TPValue)) + + tblobj_body.append(tr) + # build body(records) of excel for download function + for ncol, item in enumerate([geneId, symbolItem, description_display, trait_location_repr,TissueCount, TCorr, TPValue]): + worksheet.write([newrow, ncol], item) + + newrow += 1 + + return tblobj_body, worksheet + + + # get return number of records when input symbol has only one + def getReturnNum(self,recordReturnNum="0"): + if recordReturnNum=="0": + returnNum=100 + elif recordReturnNum=="1": + returnNum=200 + elif recordReturnNum=="2": + returnNum=500 + elif recordReturnNum=="3": + returnNum=1000 + elif recordReturnNum=="4": + returnNum=2000 + elif recordReturnNum=="5": + returnNum= None + + return returnNum + + # map list based on the order of target List + # if item.lower() exist in both lists, then compare the difference of item's original value of two lists + # if not equal, then replace the item in targetList by using the item in primaryList(list from database) + + def mappedByTargetList(self,primaryList=[],targetList=[]): + + tempPrimaryList =[x.lower() for x in primaryList] + testTargetList =[y.lower() for y in targetList] + + for i, item in enumerate(tempPrimaryList): + if item in testTargetList: + index = testTargetList.index(item) + if primaryList[i]!=targetList[index]: + targetList[index]= primaryList[i] + + return targetList diff --git a/web/webqtl/correlationMatrix/__init__.py b/web/webqtl/correlationMatrix/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/web/webqtl/correlationMatrix/tissueCorrelationMatrix.py b/web/webqtl/correlationMatrix/tissueCorrelationMatrix.py new file mode 100755 index 00000000..23dc14eb --- /dev/null +++ b/web/webqtl/correlationMatrix/tissueCorrelationMatrix.py @@ -0,0 +1,132 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/11/10 +# +# Last updated by Ning Liu, 2011/01/26 + + +#tissueCorrelationMatrix: funciton part for TissueCorrelationPage.py +from htmlgen import HTMLgen2 as HT +from correlation import correlationFunction +from dbFunction import webqtlDatabaseFunction +import sys + +######################################### +# Tissue Correlation Page +######################################### + +class tissueCorrelationMatrix: + def __init__(self,tissueProbeSetFreezeId=None): + + #initialize parameters + self.tProbeSetFreezeId = tissueProbeSetFreezeId + self.cursor = webqtlDatabaseFunction.getCursor() + + + + #retreive dataSet info from database table TissueProbeSetFreeze to get all TissueProbeSetFreezeId(List), Name(List) and FullName(List) + def getTissueDataSet(self): + tissProbeSetFreezeIds,Names,fullNames = webqtlDatabaseFunction.getTissueDataSet(cursor=self.cursor) + return tissProbeSetFreezeIds,Names,fullNames + + + #retrieve DatasetName, DatasetFullName based on TissueProbeSetFreezeId, return DatasetName(string), DatasetFullName(string) + def getFullnameofCurrentDataset(self): + + DatasetName, DatasetFullName =webqtlDatabaseFunction.getDatasetNamesByTissueProbeSetFreezeId(cursor=self.cursor, TissueProbeSetFreezeId=self.tProbeSetFreezeId) + return DatasetName, DatasetFullName + + + #retrieve how many tissue used in the specific dataset based on TissueProbeSetFreezeId, return TissueCount(int) + def getTissueCountofCurrentDataset(self): + + TissueCount =webqtlDatabaseFunction.getTissueCountByTissueProbeSetFreezeId(cursor=self.cursor,TissueProbeSetFreezeId=self.tProbeSetFreezeId) + return TissueCount + + + + #retrieve corrArray(array), pvArray(array) for display by calling calculation function:calZeroOrderCorrForTiss + def getTissueCorrPvArray(self,geneNameLst=None,dataIdDict=None): + #retrieve SymbolValuePairDict(Dict), dictionary of Symbol and Value Pair.key is symbol, value is one list of expression values of one probeSet + symbolValuepairDict =correlationFunction.getGeneSymbolTissueValueDict(cursor=self.cursor,symbolList=geneNameLst,dataIdDict=dataIdDict) + corrArray,pvArray = correlationFunction.getCorrPvArray(cursor=self.cursor,priGeneSymbolList=geneNameLst,symbolValuepairDict=symbolValuepairDict) + return corrArray,pvArray + + + + #retrieve symbolList,geneIdList,dataIdList,ChrList,MbList,descList,pTargetDescList (all are list type) to + #get multi lists for short and long label functions, and for getSymbolValuePairDict and + #getGeneSymbolTissueValueDict to build dict to get CorrPvArray + def getTissueProbeSetXRefInfo(self,GeneNameLst=[]): + symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict =correlationFunction.getTissueProbeSetXRefInfo(cursor=self.cursor,GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=self.tProbeSetFreezeId) + return symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict + + + + #retrieve corrArray(array), pvArray(array) for gene symbol pair + def getCorrPvArrayForGeneSymbolPair(self,geneNameLst=None): + corrArray = None + pvArray = None + + if len(geneNameLst) == 2: + #retrieve SymbolValuePairDict(Dict), dictionary of Symbol and Value Pair.key is symbol, value is one list of expression values of one probeSet + symbolList,geneIdDict,dataIdDict,ChrDict,MbDict,descDict,pTargetDescDict =correlationFunction.getTissueProbeSetXRefInfo(cursor=self.cursor,GeneNameLst=geneNameLst,TissueProbeSetFreezeId=self.tProbeSetFreezeId) + symbolValuepairDict =correlationFunction.getGeneSymbolTissueValueDict(cursor=self.cursor,symbolList=geneNameLst,dataIdDict=dataIdDict) + corrArray,pvArray = correlationFunction.getCorrPvArray(cursor=self.cursor,priGeneSymbolList=geneNameLst,symbolValuepairDict=symbolValuepairDict) + + return corrArray,pvArray + + + #retrieve symbolCorrDict(dict), symbolPvalueDict(dict) to get all tissues' correlation value and P value; key is symbol + def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, method='0'): + symbolCorrDict, symbolPvalueDict = correlationFunction.calculateCorrOfAllTissueTrait(cursor=self.cursor, primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=self.tProbeSetFreezeId,method=method) + + return symbolCorrDict, symbolPvalueDict + + #Translate GeneId to gene symbol and keep the original order. + def getGeneSymbolLst(self, geneSymbols=None): + geneSymbolLst=[] + geneIdLst=[] + #split the input string at every occurrence of the delimiter '\r', and return the substrings in an array. + tokens=geneSymbols.strip().split('\r') + + #Ning: To keep the original order of input symbols and GeneIds + for i in tokens: + i=i.strip() + if (len(i) >0) and (i not in geneSymbolLst): + geneSymbolLst.append(i) + # if input includes geneId(s), then put it/them into geneIdLst + if i.isdigit(): + geneIdLst.append(i) + + #Ning: Replace GeneId with symbol if applicable + if len(geneIdLst)>0: + # if input includes geneId(s), replace geneId by geneSymbol; + geneIdSymbolPair =webqtlDatabaseFunction.getGeneIdSymbolPairByGeneId(cursor=self.cursor, geneIdLst =geneIdLst) + for geneId in geneIdLst: + if geneIdSymbolPair[geneId]: + index = geneSymbolLst.index(geneId) + geneSymbolLst[index] =geneIdSymbolPair[geneId] + + return geneSymbolLst + + + -- cgit v1.2.3