aboutsummaryrefslogtreecommitdiff
path: root/web/webqtl/compareCorrelates/multitrait.py
diff options
context:
space:
mode:
authorzsloan2015-03-27 20:28:51 +0000
committerzsloan2015-03-27 20:28:51 +0000
commitd0911a04958a04042da02a334ccc528dae79cc17 (patch)
tree3c48e2e937c1dbeaf00a5697c87ed251afa5c8f1 /web/webqtl/compareCorrelates/multitrait.py
parenta840ad18e1fe3db98a359a159e9b9b72367a2839 (diff)
downloadgenenetwork2-d0911a04958a04042da02a334ccc528dae79cc17.tar.gz
Removed everything from 'web' directory except genofiles and renamed the directory to 'genotype_files'
Diffstat (limited to 'web/webqtl/compareCorrelates/multitrait.py')
-rwxr-xr-xweb/webqtl/compareCorrelates/multitrait.py1121
1 files changed, 0 insertions, 1121 deletions
diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py
deleted file mode 100755
index 047620af..00000000
--- a/web/webqtl/compareCorrelates/multitrait.py
+++ /dev/null
@@ -1,1121 +0,0 @@
-# multitrait.py
-# a tool to analyze the correlations between several different traits and the traits
-# in a given dataset
-#
-# Parameters:
-# correlation -- either "pearson" or "spearman" depending on which ones we want to use
-#
-# filename -- an input file containing the traits to analyze
-#
-# progress -- if set, this parameter outputs a static progress page
-# and uses a META redirect to trigger the real computation
-#
-# targetDatabaseType:
-# one of "ProbeSet", "Publish", "Genotype" depending on the type of database
-# we will use for the analysis
-#
-# targetDatabaseId:
-# the id (*Freeze.Id in the database) of the particular database we will analyze
-#
-# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider
-#
-# firstRun -- either 0 or 1
-# whether to automatically pick reasonable defaults for the other three parameters
-#
-# outputType -- either "html" or "text"
-#
-# Author: Stephen Pitts
-# June 15, 2004
-
-#Xiaodong changed the dependancy structure
-
-import copy
-import sys
-import cgi
-import os
-import os.path
-import math
-import time
-import numarray
-import tempfile
-import string
-import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle
-
-from base import templatePage
-from base.webqtlTrait import webqtlTrait
-from utility import webqtlUtil
-from base import webqtlConfig
-import trait
-import correlation
-import htmlModule
-
-cgitb.enable()
-
-
-# where this program's data files are
-RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.'
-RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module
-
-tempfile.tempdir = RootDir
-tempfile.template = "multitrait"
-
-# MultitraitException: used if something goes wrong
-# maybe in the future we should make exceptions more granular
-class MultitraitException(Exception):
- def __init__(self, message):
- self.message = message
-
- def __repr__(self):
- return "MultitraitException: %s" % self.message
-
-# buildParamDict: Cursor -> ParamDict
-# to process and validate CGI arguments
-# see the comment at the top of this file for valid cgi
-# parameters
-def buildParamDict(cursor, fd):
- params = {}
- fs = fd.formdata #cgi.FieldStorage()
- params["progress"] = fs.getfirst("progress", "0")
- params["filename"] = fs.getfirst("filename", "")
- if params["filename"] == "":
- raise MultitraitException("Required parameter filename missing.")
-
- params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04")
- params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0)
- params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5)
- params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10)
-
- if params["subsetSize"] < -1:
- params["subsetSize"] = -1
-
- params["correlation"] = fs.getfirst("correlation", "pearson")
- params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10)
-
- if params["subsetCount"] < -1:
- params["subsetCount"] = -1
-
- #params["outputType"] = fs.getfirst("outputType", "html")
-
- #if params["outputType"] not in ("html", "text"):
- # params["outputType"] = "html"
-
- if params["correlation"] not in ("pearson", "spearman"):
- params["correlation"] = "pearson"
-
- params["correlationName"] = params["correlation"].capitalize()
-
- # one of two cases:
- # 1) We have just come from a submit, so there are a bunch of display*
- # but no displaySets. Thus, the code down there converts the display*
- # to displaySets so the GET request doesn't get too long
- # 2) We have just been redirected from a progress page which already has
- # a converted displaySets for us.
-
- displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0)
-
- if displaySets == 0:
- for key in fs.keys():
- if key[:7] == "display":
- #print "Hit display key %s<br>" % key
- try:
- whichSet = int(key[7:])
-
- # prevent malicious attacks
- whichSet = min(whichSet, 512)
- displaySets += pow(2, whichSet)
-
- except ValueError: pass
-
- params["displaySets"] = displaySets
- #print "In the beginning, display sets was %s: %s<br>" % (displaySets,
- # str(binaryDecompose(displaySets)))
-
- # if we are just gonna display a progress page, then there's no
- # reason to look up detailed database information
- #if params["progress"] == "1":
- # return params
-
- a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name
- params["targetDatabaseType"] = a
- params["targetDatabaseId"] = b
- params["targetDatabaseName"] = params["targetDatabase"]
-
- return params
-
-# readInputFile: DB cursor -> string -> string, (arrayof Trait)
-def readInputFile(cursor, filename):
- """
- To read an input file with n lines in the following format
- <databasetype>,<databaseid>,<traitname>
- and retrieve and populate traits with appropriate data
- from the database
-
- Also, for our purposes. we store the database type and
- database id in fields attached to the trait instances. We use
- this information to generate Javascript popups with trait
- information.
-
- In addition, we read the strain of mice that the traits are
- from so we can only show those databases to correlate against.
- """
- handle = open(filename)
- line = handle.readline()
- inbredSetName = line.strip()
- line = handle.readline()
- traits = []
-
-# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait'
- while line != "":
- line = line.strip()
- dbType, dbId, tName = line.split(",")
-
- if dbType == "ProbeSet":
- oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
- elif dbType == "Geno":
- speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId)
- oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
- elif dbType == "Publish":
- oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name
- elif dbType == "Temp":
- oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name
- oneTrait.populateDataId(cursor, dbId)
- oneTrait.dbName = "Temp"
-
- oneTrait.populateStrainData(cursor)
- traits.append(oneTrait)
-
- line = handle.readline()
-
- return inbredSetName, traits
-
-# loadDatabase: Cursor -> ParamDict -> arrayof Trait
-def loadDatabase(cursor, p):
- """
- To load a set of traits as specified by the
- targetDatabaseId
- and targetDatabaseType parameters
-
- Cursor should be a fastCursor from the webqtl library (i.e.
- a MySQLdb SSCursor).
-
- Calling populateStrainData 20,000 or so times on a ProbeSet
- is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits
- in the trait module that uses a join to get all of the rows in
- bulk, store the resultset on the server, and do all sorts of nice buffering.
- It's about two or three times faster.
- """
- if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name
- dbTraits = trait.queryPopulatedProbeSetTraits(cursor,
- p["targetDatabaseId"])
- elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name
- dbTraits = trait.queryPublishTraits(cursor,
- p["targetDatabaseId"])
- psd = trait.PublishTrait.populateStrainData
- elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name
- dbTraits = trait.queryGenotypeTraits(cursor,
- p["targetDatabaseId"])
- psd = trait.GenotypeTrait.populateStrainData
- else:
- print "Unknown target database type %s" % p["targetDatabaseType"]
-
- if p["targetDatabaseType"] != "ProbeSet":
- map(psd, dbTraits, [cursor]*len(dbTraits))
-
- return dbTraits
-
-def runProbeSetCorrelations(cursor, p, traits):
- """
- To run the correlations between the traits and the database.
- This function computes a correlation coefficent between each
- trait and every entry in the database, and partitions the database
- into a disjoint array of arrays which it returns.
-
- The length of the return array is 2^n, where n is the length of
- the trait array. Which constitutent element a of the return array
- a given trait ends up in is determined by the following formula
- i = i_02^0 + ... + i_(n-1)2^(n-1)
- where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
-
- Since most of the several thousand database traits will end up
- with i=0, we don't return them, so the first element of the
- return array will be empty.
-
- A particular element of subarray j of the return array contains
- a 2-tuple (trait,kvalues). The variable trait is obviously the
- particular database trait that matches the user traits l_1, ..., l_m
- to which subarray j corresponds. kvalues is a list of the correlation
- values linking trait to l_1, ..., l_m, so the length of kvalues is
- the number of 1s in the binary representation of j (there must be
- a better way to describe this length).
-
- The return array is an array of 2-tuples. The first element of
- each tuple is the index of the particular subarray, and the second
- element is the subarray itself. The array is sorted in descending
- order by the number of 1's in the binary representation of the
- index so the first few subarrays are the ones that correspond to
- the largest sets. Each subarray is then sorted by the average of
- the magnitude of the individual correlation values.
- """
-
- kMin = p["threshold"]
- traitArrays = {}
-
- # TODO: Add Spearman support
- freezeId = p["targetDatabaseId"]
- if p["correlation"] == "pearson":
- correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name
- else:
- correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name
-
- # now we test all of the correlations in bulk
- test = numarray.absolute(correlations)
- test = numarray.greater_equal(test, kMin)
- test = test.astype(numarray.Int8)
- #print test
-
- db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name
- for i in range(len(db)):
- cIndex = 0
- prods = []
- for j in range(len(traits)):
- if test[i,j] == 1:
- cIndex += pow(2, j)
- prods.append(correlations[i,j])
- if cIndex != 0:
- if not traitArrays.has_key(cIndex):
- traitArrays[cIndex] = []
-
- traitArrays[cIndex].append((db[i], prods))
-
-
- # sort each inner list of traitArrays
- # so the matched traits appear in descending order by the
- # average magnitude of the correlation
- def customCmp(traitPair, traitPair2):
- magAvg1 = numarray.average(map(abs, traitPair[1]))
- magAvg2 = numarray.average(map(abs, traitPair2[1]))
-
- # invert the sign to get descending order
- return -cmp(magAvg1, magAvg2)
-
- for traitArray in traitArrays.values():
- traitArray.sort(customCmp)
-
- # sort the outer list of traitArrays
- traitArrays2 = []
- i = 0
- for key in traitArrays.keys():
- a = traitArrays[key]
- if len(a) > 0:
- traitArrays2.append((key,a,len(binaryDecompose(key)),
- len(a)))
-
- # we sort by the number of 1's in the binary output
- # and then by the size of the list, both in descending order
- def customCmp2(aL,bL):
- a = -cmp(aL[2], bL[2])
- if a == 0:
- return -cmp(aL[3], bL[3])
- else:
- return a
-
- traitArrays2.sort(customCmp2)
-
- return traitArrays2
-
-def runCorrelations(p, strainCount, traits, db):
- """
- To run the correlations between the traits and the database.
- This function computes a correlation coefficent between each
- trait and every entry in the database, and partitions the database
- into a disjoint array of arrays which it returns.
-
- The length of the return array is 2^n, where n is the length of
- the trait array. Which constitutent element a of the return array
- a given trait ends up in is determined by the following formula
- i = i_02^0 + ... + i_(n-1)2^(n-1)
- where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise
-
- Since most of the several thousand database traits will end up
- with i=0, we don't return them, so the first element of the
- return array will be empty.
-
- A particular element of subarray j of the return array contains
- a 2-tuple (trait,kvalues). The variable trait is obviously the
- particular database trait that matches the user traits l_1, ..., l_m
- to which subarray j corresponds. kvalues is a list of the correlation
- values linking trait to l_1, ..., l_m, so the length of kvalues is
- the number of 1s in the binary representation of j (there must be
- a better way to describe this length).
-
- The return array is an array of 2-tuples. The first element of
- each tuple is the index of the particular subarray, and the second
- element is the subarray itself. The array is sorted in descending
- order by the number of 1's in the binary representation of the
- index so the first few subarrays are the ones that correspond to
- the largest sets. Each subarray is then sorted by the average of
- the magnitude of the individual correlation values.
- """
- kMin = p["threshold"]
- traitArrays = {}
-
- # TODO: Add Spearman support
- if p["correlation"] == "pearson":
- correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
- else:
- correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name
-
- # now we test all of the correlations in bulk
- test = numarray.absolute(correlations)
- test = numarray.greater_equal(test, kMin)
- test = test.astype(numarray.Int8)
- #print test
-
-
- for i in range(len(db)):
- cIndex = 0
- prods = []
- for j in range(len(traits)):
- if test[i,j] == 1:
- cIndex += pow(2, j)
- prods.append(correlations[i,j])
- if cIndex != 0:
- if not traitArrays.has_key(cIndex):
- traitArrays[cIndex] = []
-
- traitArrays[cIndex].append((db[i], prods))
-
- # sort each inner list of traitArrays
- # so the matched traits appear in descending order by the
- # average magnitude of the correlation
- def customCmp(traitPair, traitPair2):
- magAvg1 = numarray.average(map(abs, traitPair[1]))
- magAvg2 = numarray.average(map(abs, traitPair2[1]))
-
- # invert the sign to get descending order
- return -cmp(magAvg1, magAvg2)
-
- for traitArray in traitArrays.values():
- traitArray.sort(customCmp)
-
- # sort the outer list of traitArrays
- traitArrays2 = []
- i = 0
- for key in traitArrays.keys():
- a = traitArrays[key]
- if len(a) > 0:
- traitArrays2.append((key,a,len(binaryDecompose(key)),
- len(a)))
-
- # we sort by the number of 1's in the binary output
- # and then by the size of the list, both in descending order
- def customCmp2(aL,bL):
- a = -cmp(aL[2], bL[2])
- if a == 0:
- return -cmp(aL[3], bL[3])
- else:
- return a
-
- traitArrays2.sort(customCmp2)
-
- return traitArrays2
-
-
-# XZ, 09/09/2008: In multiple trait correlation result page,
-# XZ, 09/09/2008: click "Download a text version of the above results in CSV format"
-
-# TraitCorrelationText: a class to display trait correlations
-# as textual output
-class TraitCorrelationText:
- # build a text shell to describe the given trait correlations
- # this method sets self.output; use str(self) to actually
- # get the text page
- #
- # traits is a list of traits and traitArray is a
- # list of 3-tuples: index, traits', garbage
- # where index is a binary-encoded description of which subset of
- # traits the list traits' matches
- #
- # traits' is a list of 3-tuples as well: trait, correlations, garbage
- # where trait is a particular trait and correlations is a list of float
- # correlations (matching traits above)
- def __init__(self, p, traits, traitArray):
- output = "Correlation Comparison\n"
- output += "from WebQTL and the University of Tennessee Health Science Center\n"
- output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n"
-
- output += self.showOptionPanel(p)
- output += self.showSelectedTraits(traits)
- output += self.showSummaryCorrelationResults(p, traits, traitArray)
- output += self.showDetailedCorrelationResults(p, traits, traitArray)
-
- self.output = output
-
- # showOptionPanel: ParamDict -> string
- # to display the options used to run this correlation
- def showOptionPanel(self, params):
- output = "Correlation Comparison Options:\n"
- output += "Target database,%s\n" % params["targetDatabase"]
- output += "Correlation type,%s\n" % params["correlationName"]
- output += "Threshold,%f\n" % params["threshold"]
- #output += "Subsets to Show,%d\n" % params["subsetCount"]
- #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"]
- return output
-
- # showSelectedTraits: (listof Trait) -> string
- # to display the traits compared with the database
- # note: we can't use tabular output because the traits could be of
- # different types and produce different fields
- def showSelectedTraits(self, traits):
- output = "Selected Traits:\n"
- for trait in traits:
- output += '"' + trait.longName() + '"' + "\n"
- output += "\n"
- return output
-
- # showSummaryCorrelationResults: ParamDict -> (listof Trait) ->
- # TraitArray -> string
- # see comment for __init__ for a description of TraitArray
- #
- # to show a summary (sets and sizes) of the correlation results
- # as well as an X to indicate whether they will be included
- # in the detailed output
- def showSummaryCorrelationResults(self, p, traits, traitArray):
- output = "Correlation Comparison Summary:\n"
-
- #if p["subsetCount"] != -1:
- # ourSubsetCount = min(p["subsetCount"], len(traitArray))
- #else:
-
- ourSubsetCount = len(traitArray)
-
- displayDecomposition = binaryDecompose(p["displaySets"])
- for j in range(ourSubsetCount):
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
- if len(traitSubarray) == 0:
- continue
-
- targetTraits = decomposeIndex(traits, i)
- traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name
- ", ")
- if j in displayDecomposition:
- checked = "X"
- else:
- checked = ""
-
- output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray))
-
- output += "\n"
- return output
-
- # showDetailedCorrelationResults: ParamDict -> (listof Trait) ->
- # TraitArray -> string
- #
- # to show a detailed list of the correlation results; that is,
- # to completely enumerate each subset of traitArray using the
- # filtering parameters in p
- def showDetailedCorrelationResults(self, p, traits, traitArray):
- output = "Correlation Comparison Details:\n"
- displayDecomposition = binaryDecompose(p["displaySets"])
- displayDecomposition.sort()
-
- def formatCorr(c):
- return "%.4f" % c
-
- for j in displayDecomposition:
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
-
- if len(traitSubarray) == 0:
- continue
-
- targetTraits = decomposeIndex(traits, i)
- extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name
- traitDesc = string.join(extraColumnHeaders, ", ")
-
- #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
- # traitDesc += ",(showing top %s of %s)" % (p["subsetSize"],
- # len(traitSubarray))
- # traitSubarray = traitSubarray[0:p["subsetSize"]]
-
- output += "%s\n" % traitDesc
- output += traitSubarray[0][0].csvHeader([], extraColumnHeaders)
- output += "\n"
- for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait'
- corr = map(formatCorr, corr)
- output += oneTrait.csvRow([], corr) + "\n"
-
- output += "\n"
-
- return output
-
- # __str__ : string
- # to return self.output as the string representation of this page
- # self.output is built in __init__
- def __str__(self):
- return self.output
-
-# TraitCorrelationPage: a class to display trait correlations
-# for now this is just one HTML file, so we don't even write it
-# to a temporary file somewhere
-class TraitCorrelationPage(templatePage.templatePage):
- """
- Using the templatePage class, we build an HTML shell for
- the core data here: the trait correlation lists.
-
- The way templatePage works, we build the page in pieces in
- the __init__ method and later on use the inherited write
- method to render the page.
- """
- def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename):
-
- templatePage.templatePage.__init__(self, fd)
-
- self.dict["title"] = "Correlation Comparison"
- self.dict["basehref"] = ""
- # NL: deleted js1 content part, since it has not been used in this project
- self.dict["js1"] = ""
- self.dict["js2"] = ""
-
- body = "<td><h1>Correlation Comparison</h1>"
- body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime())
- body += """
- <p>The correlation comparison tool identifies intersecting sets of traits that are
-correlated with your selections at a specified threshold. A correlation comparison
-involves the following steps:</p>
-<ol>
-<li><p>
-<b>Correlate:</b>
-Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation
-Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and
-<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation
-Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to
-Show</i>, and <i>Traits to Show per Subset</i>.
-</p></li>
-
-<li><p>
-<b>Add to Collection:</b>
-You can use the check boxes in the <i>Correlation
-Comparison Details</i> panel and the buttons at the bottom of the page to add these
-results to your selections page for further analysis in WebQTL.
-</p></li>
-
-<li><p>
-<b>Filter:</b>
-Using the <i>Correlation Comparison Summary</i> panel, choose which
-subsets you would like to display for export. Note that if you change the
-parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter.
-</p></li>
-
-<li><p>
-<b>Export:</b>
-Once you are satisfied with your report, use the export link at
-the bottom of the page to save the report as a comma-separated (CSV) text file
-which you can then import into Excel or another tool. Note: the exported report
-will list all subsets in the summary view and only those traits in the subsets
-you have selected in the Filter step.
-</p></li>
-</ol>
-"""
-
-# body += """
-# <p>The correlation
-# comparison tool identifies the intersecting sets of traits that are
-# correlated with your selections. A correlation comparison involves
-# the following steps:</p>
-# <ol>
-# <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>.
-# For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i>
-# at their default values of 10.</p></li>
-# <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel,
-# adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to
-# Show per Subset</i> until you have a reasonable number of traits.</p></li>
-# <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would
-# like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will
-# loose the filter you have selected.</p></li>
-# <li><p><b>Export:</b> Once you are satisfied with your report, use the export
-# link at the bottom of the page to save the report as a comma-separated (CSV) text file which
-# you can then import into Excel or another tool. Note: the exported report
-# will show all subsets in the summary view and all traits in each subset you have
-# selected in the Filter step.
-# <li><p><b>Shopping Cart:</b> In addition, you can use the
-# check boxes in the <i>Correlation Comparison Details</i> panel and the
-# buttons at the bottom of the page to add the traits you have found to the shopping cart.</p>
-# </li>
-# </ol>
-# """
-
- body += self.showOptionPanel(p, cursor, inbredSetName)
- body += self.showSelectedTraits(traits, p, inbredSetName)
-
- if p["firstRun"] == 0:
- body += self.showCorrelationResults(p, inbredSetName, traits, traitArray)
-
- exportParams = copy.copy(p)
- exportParams["outputType"] = "text"
-
- body += ('''
- <h2>Export these results</h2>
- <p>
- <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from
- the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details
- view shows all traits in the subsets that you have selected.
- </p>
- '''
- % txtFilename)
-
-
-
- body += "</td>"
- self.dict["body"] = body
-
-
- # showOptionPanel: ParamDict -> Cursor -> String -> String
- # to build an option panel for the multitrait correlation
- # we expect the database list to be a list of 2-tuples
- # the first element of each tuple is the short name
- # and the second element of the tuple is the long name
- def showOptionPanel(self, params, cursor, inbredSetName):
- output = '''
- <h2>Correlation Options</h2>
- <FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data">
- <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
- <input type="hidden" name="filename" value="%s">
- <input type="hidden" name="firstRun" value="0">
- <input type="hidden" name="progress" value="1">
- <table>
- <tr>
- <td>Target Database:</td><td>
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"])
-
- output += htmlModule.genDatabaseMenu(db = cursor,
- public=0,
- RISetgp = inbredSetName,
- selectname="targetDatabase",
- selected=params["targetDatabase"])
- output += "</td></tr>"
-
- corrSelected = ["",""]
-
- if params["correlation"] == "pearson":
- corrSelected[0] = "SELECTED"
- else:
- corrSelected[1] = "SELECTED"
-
- output += ('''
- <tr>
- <td>Correlation Method:</td>
- <td><select name="correlation">
- <option value="pearson" %s>Pearson</option>
- <!--<option value="spearman" %s>Spearman</option>-->
- </select></td></tr>
- ''' % (corrSelected[0], corrSelected[1]))
- output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>'
- % params["threshold"])
- output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>'
- % params["subsetCount"])
- output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>'
- % params["subsetSize"])
-
- # a cosmetic change to hopefully make this form a bit easier to use
-# if params["firstRun"] == 1:
-# applyName = "Correlate"
-# else:
-# applyName = "Refine Correlation"
-
- output += '''
- <tr>
- <td colspan="2"><input class="button" type="submit" value="Correlate" /></td>
- </tr>
- </table>
- </form>
- '''
-
- return output
-
- # showSelectedTraits: listof Trait -> string
- # to show a list of the selected traits
- def showSelectedTraits(self, traits, p, inbredSetName):
- output = '''
- <form action="%s%s" method="post" name="showDatabase">
- <INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase">
-
- <input type="hidden" name="incparentsf1" value="ON">
- <input type="hidden" name="ShowStrains" value="ON">
- <input type="hidden" name="ShowLine" value="ON">
- <input type="hidden" name="database" value="">
- <input type="hidden" name="ProbeSetID" value="">
- <input type="hidden" name="RISet" value="%s">
- <input type="hidden" name="CellID" value="">
- <input type="hidden" name="database2" value="">
- <input type="hidden" name="rankOrder" value="">
- <input type="hidden" name="ProbeSetID2" value="">
- <input type="hidden" name="CellID2" value="">
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName)
-
- output += "<h2>Selected Traits</h2>"
- output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>'
- flip = 1
- colors = ["FFFFFF", "cccccc"]
-
- for trait in traits:
- # we take advantage of the secret dbName attribute that
- # loadDatabase fills in
- descriptionString = trait.genHTML()
- if trait.db.type == 'Publish' and trait.confidential:
- descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users)
- output += '''
- <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td>
- <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td>
- </tr>
- ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString)
- flip = not flip
-
- output += "</table></form>"
- return output
-
-
- # showSummaryCorrelationResults
- # show just the number of traits in each subarray
- def showSummaryCorrelationResults(self, p, traits, traitArray):
- output = '''
- <form action="%s%s" method="post">
- <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2">
- <input type="hidden" name="filename" value="%s">
- <input type="hidden" name="firstRun" value="0">
- <input type="hidden" name="progress" value="1">
- <input type="hidden" name="correlation" value="%s">
- <input type="hidden" name="threshold" value="%s">
- <input type="hidden" name="rankOrder" value="">
- <input type="hidden" name="subsetCount" value="%s">
- <input type="hidden" name="subsetSize" value="%s">
- <input type="hidden" name="targetDatabase" value="%s">
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"],
- p["subsetCount"], p["subsetSize"], p["targetDatabase"])
-
- output += '''
- <table cellpadding="2" cellspacing="0">
- <tr>
- <th>Trait Subsets</th>
- <th colspan="2">Intersecting Set Size</th>
- </tr>
- '''
- # figure out a scale for the summary graph
- # for now we set max = 300 pixels wide
- if p["subsetCount"] != -1:
- ourSubsetCount = min(p["subsetCount"], len(traitArray))
- else:
- ourSubsetCount = len(traitArray)
-
- screenWidth = 600
- lengths = []
- for j in range(ourSubsetCount):
- lengths.append(len(traitArray[j][1]))
- maxLength = max(lengths)
-
- displayDecomposition = binaryDecompose(p["displaySets"])
- flip = 0
- colors = ["FFFFFF", "cccccc"]
-
- for j in range(ourSubsetCount):
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
-
- if len(traitSubarray) == 0:
- continue
-
- targetTraits = decomposeIndex(traits, i)
- traitDesc = string.join(map(webqtlTrait.displayName, targetTraits),
- ", ")
-
- if j in displayDecomposition:
- checked = "CHECKED"
- else:
- checked = ""
-
- barWidth = (len(traitSubarray) * screenWidth) / maxLength
- output += ('''<tr bgcolor="%s">
- <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td>
- <td>%s</td>
- <td><img src="/images/blue.png" width="%d" height="25"></td></tr>'''
- % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth))
- flip = not flip
-
- output += '''
- <tr>
- <td colspan="3">
- <input class="button" type="submit" value="Filter" /></td>
- </tr>
- </table></form>
- '''
- return output
-
- # showDetailedCorrelationResults
- # actually show the traits in each subarray
- def showDetailedCorrelationResults(self, p, inbredSetName, traits,
- traitArray):
- output = "<h2>Correlation Comparison Details</h2>"
-
- # the hidden form below powers all of the JavaScript links,
- # the shopping cart links, and the correlation plot links
-
- output += '''
- <form action="%s%s" method="post">
- <input type="hidden" name="database" value="%s">
- <input type="hidden" name="FormID" value="showDatabase">
- <input type="hidden" name="traitfile" value="">
- <input type="hidden" name="incparentsf1" value="ON">
- <input type="hidden" name="ShowStrains" value="ON">
- <input type="hidden" name="ProbeSetID" value="">
- <input type="hidden" name="ShowLine" value="ON">
- <input type="hidden" name="RISet" value="%s">
- <input type="hidden" name="CellID" value="">
- <input type="hidden" name="database2" value="">
- <input type="hidden" name="rankOrder" value="">
- <input type="hidden" name="ProbeSetID2" value="">
- <input type="hidden" name="CellID2" value="">
- ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName)
-
-
- displayDecomposition = binaryDecompose(p["displaySets"])
-
- # necessary to ensure that subset order is the same in the
- # summary and the detailed view
- displayDecomposition.sort()
-
- # here's a trick: the first trait we show must have the widest row because it correlates
- # with the largest set of input traits
- firstSubset = traitArray[displayDecomposition[0]]
- firstTrait = firstSubset[1][0][0]
- extraColumnCount = firstSubset[2]
- totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount
-
- output += "<table cellpadding=2 cellspacing=0>\n"
- for j in displayDecomposition:
- i = traitArray[j][0]
- traitSubarray = traitArray[j][1]
-
- # we don't display trait combinations for which there are
- # no correlations
- if len(traitSubarray) == 0:
- continue
-
- # generate a description of the traits that this particular array
- # matches highly
- targetTraits = decomposeIndex(traits, i)
- extraColumnHeaders = map(webqtlTrait.displayName, targetTraits)
- traitDesc = string.join(extraColumnHeaders, ", ")
-
- # massage extraColumnHeaders so that they can be wrapped
- for i in range(len(extraColumnHeaders)):
- ech = extraColumnHeaders[i]
- ech = ech.replace("-", " ")
- ech = ech.replace("_", " ")
- extraColumnHeaders[i] = ech
-
- # pad extraColumnHeaders if we have less columns than the max
- paddingNeeded = extraColumnCount - len(extraColumnHeaders)
- if paddingNeeded > 0:
- extraColumnHeaders.extend(paddingNeeded * ["&nbsp;"])
-
- # we limit the output to the top ones
- if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]):
- traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray))
- traitSubarray = traitSubarray[0:p["subsetSize"]]
-
- # combine that description with actual database traits themselves
- # and the correlation values
- output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc)
- #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc
-
- # we assume that every trait in traitSubarray is the same type
- # of trait
- flip = 0
- colors = ["FFFFFF", "cccccc"]
-
- output += traitSubarray[0][0].tableRowHeader(["&nbsp;"], extraColumnHeaders, colors[0])
-
- for traitPair in traitSubarray:
- corr = []
- traitPair[0].dbName = p['targetDatabase']
- trait = traitPair[0]
-
- for i in range(len(traitPair[1])):
- corrValue = traitPair[1][i]
- corrPlotLink = ('''
- <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a>
- ''' % (p["targetDatabaseName"], trait.name, targetTraits[i].db.name, targetTraits[i].name, "0", corrValue))
- corr.append(corrPlotLink)
-
- corr.extend(paddingNeeded * ["&nbsp;"])
-
- checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />'
- % (p["targetDatabaseName"], trait.name))
- flip = not flip
- output += traitPair[0].tableRow([checkbox], corr, colors[flip])
-
- #output += "</table>"
- i += 1
- output += '<tr><td colspan="%d">&nbsp;</td></tr>' % totalColumnCount
-
- output += "</table>"
-
- # print form buttons if there were checkboxes above
- output += '''
- <div align="left">
- <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection"
- onClick="addRmvSelection('%s',this.form, 'addToSelection');">
- <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);">
- <INPUT TYPE="reset" CLASS="button" VALUE="Select None">
- </div>
- </form>
- ''' % inbredSetName
-
- return output
-
- # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String
- # to build an output display for the multitrait correlation results
- def showCorrelationResults(self, p, inbredSetName, traits, traitArray):
- output = '''
- <h2>Correlation Comparison Summary</h2>
- <p>
- %s correlations were computed for each of the selected traits with each trait in
- the <a href="/dbdoc/%s.html">%s</a> database.
- Subsets of database traits for which correlations were higher than %s
- or lower than -%s are shown below based on which traits
- they correlated highly with. The top %s subsets, ranked by the number of input traits that
- they correspond with, are shown, and at most %s traits in each subset are shown. </p>
- ''' % (p["correlationName"],
- p["targetDatabase"], p["targetDatabaseName"],
- p["threshold"], p["threshold"], p["subsetCount"],
- p["subsetSize"])
-
-
- totalTraits = 0
- for j in range(len(traitArray)):
- totalTraits += len(traitArray[j][1])
-
- if totalTraits == 0:
- output += """
- <p>
- No shared corrrelates were found with your given traits at this
- threshold. You may wish to lower the correlation threshold or choose different traits.
- </p>
- """
- else:
- output += self.showSummaryCorrelationResults(p, traits, traitArray)
- output += self.showDetailedCorrelationResults(p, inbredSetName,
- traits, traitArray)
-
- return output
-
-# decomposeIndex: (listof Trait) -> Int ->
-# (listof Trait)
-# to use i to partition T into a sublist
-# each bit in i controls the inclusion or exclusion of a trait
-def decomposeIndex(traits, i):
- targetTraits = []
-
- for j in range(len(traits)):
- # look, mom, a bitwise and!
- # expression below tests whether the jth bit is
- # set in i
- # see runCorrelation for how we decompose the
- # array index
- if (i & pow(2,j)) == pow(2,j):
- targetTraits.append(traits[j])
-
- return targetTraits
-
-# binaryDecompose: int -> (listof int)
-# to decompose a number into its constituent powers of 2
-# returns a list of the exponents a_1...a_n such that the input m
-# is m = 2^a_1 + ... + 2^a_n
-def binaryDecompose(n):
- if n == 0:
- return []
-
- # we start with the highest power of 2 <= this number
- # and work our way down, subtracting powers of 2
- start = long(math.floor(math.log(n)/math.log(2)))
-
- exponents = []
- while start >= 0:
- if n >= long(math.pow(2, start)):
- n -= math.pow(2,start)
- exponents.append(start)
- start -= 1
- return exponents
-
-# powerOf : int -> int -> boolean
-# to determine whether m is a power of n;
-# more precisely, whether there exists z in Z s.t.
-# n^z = m
-def powerOf(m, n):
- trialZ = math.floor(math.log(m)/math.log(n))
- return pow(n,trialZ) == m
-
-
-class compCorrPage(templatePage.templatePage):
- def __init__(self,fd):
- templatePage.templatePage.__init__(self, fd)
-
- if not self.openMysql():
- return
-
- cursor = self.cursor
- params = buildParamDict(cursor, fd)
-
- # get the input data
- inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"])
-
- # and what we are comparing the data to
- dbTraits = []
- if params["targetDatabaseType"] != "ProbeSet":
- dbTraits = loadDatabase(cursor, params)
-
-
- # run the comparison itself
- strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name
- if params["targetDatabaseType"] == "ProbeSet":
- results = runProbeSetCorrelations(cursor, params, traits)
- else:
- results = runCorrelations(params, strainCount, traits, dbTraits)
-
- # try to be smart about what to output:
- # we want to limit the number of traits shown, at least initially
- # and since traitArray is already sorted with most interesting
- # subsets first, we simply pick up the first 500 or so traits
- # that we find
- if params["displaySets"] == 0:
- selectedTraits = 0
- for j in range(len(results)):
- #print "Scanning subarray %d" % j
- if selectedTraits <= 200:
- params["displaySets"] += pow(2, j)
- selectedTraits += len(results[j][1])
-
- traitList = []
- for oneTrait in traits: # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
- traitName = oneTrait.dbName+'::'+oneTrait.name # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait'
- aTrait = webqtlTrait(cursor=self.cursor, fullname=traitName)
- traitList.append(aTrait)
-
- # and generate some output
- txtOutputFilename = tempfile.mktemp()
- txtOutputHandle = open(txtOutputFilename, "w")
- txtOutput = TraitCorrelationText(params, traits, results)
- txtOutputHandle.write(str(txtOutput))
- txtOutputHandle.close()
- txtOutputFilename = os.path.split(txtOutputFilename)[1]
-
- self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList,
- results, inbredSetName,
- txtOutputFilename).dict['body']