diff options
Diffstat (limited to 'web/webqtl/compareCorrelates/multitrait.py')
-rwxr-xr-x | web/webqtl/compareCorrelates/multitrait.py | 1121 |
1 files changed, 0 insertions, 1121 deletions
diff --git a/web/webqtl/compareCorrelates/multitrait.py b/web/webqtl/compareCorrelates/multitrait.py deleted file mode 100755 index 047620af..00000000 --- a/web/webqtl/compareCorrelates/multitrait.py +++ /dev/null @@ -1,1121 +0,0 @@ -# multitrait.py -# a tool to analyze the correlations between several different traits and the traits -# in a given dataset -# -# Parameters: -# correlation -- either "pearson" or "spearman" depending on which ones we want to use -# -# filename -- an input file containing the traits to analyze -# -# progress -- if set, this parameter outputs a static progress page -# and uses a META redirect to trigger the real computation -# -# targetDatabaseType: -# one of "ProbeSet", "Publish", "Genotype" depending on the type of database -# we will use for the analysis -# -# targetDatabaseId: -# the id (*Freeze.Id in the database) of the particular database we will analyze -# -# threshold -- a float between 0 and 1 to determine which coefficents we wil l consider -# -# firstRun -- either 0 or 1 -# whether to automatically pick reasonable defaults for the other three parameters -# -# outputType -- either "html" or "text" -# -# Author: Stephen Pitts -# June 15, 2004 - -#Xiaodong changed the dependancy structure - -import copy -import sys -import cgi -import os -import os.path -import math -import time -import numarray -import tempfile -import string -import cgitb #all tracebacks come out as HTMLified CGI,useful when we have a random crash in the middle - -from base import templatePage -from base.webqtlTrait import webqtlTrait -from utility import webqtlUtil -from base import webqtlConfig -import trait -import correlation -import htmlModule - -cgitb.enable() - - -# where this program's data files are -RootDir = webqtlConfig.IMGDIR # XZ, 09/10/2008: add module name 'webqtlConfig.' -RootDirURL = "/image/" # XZ, 09/10/2008: This parameter is not used in this module - -tempfile.tempdir = RootDir -tempfile.template = "multitrait" - -# MultitraitException: used if something goes wrong -# maybe in the future we should make exceptions more granular -class MultitraitException(Exception): - def __init__(self, message): - self.message = message - - def __repr__(self): - return "MultitraitException: %s" % self.message - -# buildParamDict: Cursor -> ParamDict -# to process and validate CGI arguments -# see the comment at the top of this file for valid cgi -# parameters -def buildParamDict(cursor, fd): - params = {} - fs = fd.formdata #cgi.FieldStorage() - params["progress"] = fs.getfirst("progress", "0") - params["filename"] = fs.getfirst("filename", "") - if params["filename"] == "": - raise MultitraitException("Required parameter filename missing.") - - params["targetDatabase"] = fs.getfirst("targetDatabase", "U74Av2RMA_Raw_ProbeSet_March04") - params["firstRun"] = webqtlUtil.safeInt(fs.getfirst("firstRun", "0"),0) - params["threshold"] = webqtlUtil.safeFloat(fs.getfirst("threshold", "0.5"), 0.5) - params["subsetSize"] = webqtlUtil.safeInt(fs.getfirst("subsetSize", "10"), 10) - - if params["subsetSize"] < -1: - params["subsetSize"] = -1 - - params["correlation"] = fs.getfirst("correlation", "pearson") - params["subsetCount"] = webqtlUtil.safeInt(fs.getfirst("subsetCount", 10), 10) - - if params["subsetCount"] < -1: - params["subsetCount"] = -1 - - #params["outputType"] = fs.getfirst("outputType", "html") - - #if params["outputType"] not in ("html", "text"): - # params["outputType"] = "html" - - if params["correlation"] not in ("pearson", "spearman"): - params["correlation"] = "pearson" - - params["correlationName"] = params["correlation"].capitalize() - - # one of two cases: - # 1) We have just come from a submit, so there are a bunch of display* - # but no displaySets. Thus, the code down there converts the display* - # to displaySets so the GET request doesn't get too long - # 2) We have just been redirected from a progress page which already has - # a converted displaySets for us. - - displaySets = webqtlUtil.safeInt(fs.getfirst("displaySets","0"), 0) - - if displaySets == 0: - for key in fs.keys(): - if key[:7] == "display": - #print "Hit display key %s<br>" % key - try: - whichSet = int(key[7:]) - - # prevent malicious attacks - whichSet = min(whichSet, 512) - displaySets += pow(2, whichSet) - - except ValueError: pass - - params["displaySets"] = displaySets - #print "In the beginning, display sets was %s: %s<br>" % (displaySets, - # str(binaryDecompose(displaySets))) - - # if we are just gonna display a progress page, then there's no - # reason to look up detailed database information - #if params["progress"] == "1": - # return params - - a,b = trait.dbNameToTypeId(cursor, params["targetDatabase"]) # XZ, 09/10/2008: add module name - params["targetDatabaseType"] = a - params["targetDatabaseId"] = b - params["targetDatabaseName"] = params["targetDatabase"] - - return params - -# readInputFile: DB cursor -> string -> string, (arrayof Trait) -def readInputFile(cursor, filename): - """ - To read an input file with n lines in the following format - <databasetype>,<databaseid>,<traitname> - and retrieve and populate traits with appropriate data - from the database - - Also, for our purposes. we store the database type and - database id in fields attached to the trait instances. We use - this information to generate Javascript popups with trait - information. - - In addition, we read the strain of mice that the traits are - from so we can only show those databases to correlate against. - """ - handle = open(filename) - line = handle.readline() - inbredSetName = line.strip() - line = handle.readline() - traits = [] - -# XZ, 09/10/2008: In this while loop block, I changed the original variable name 'trait' to 'oneTrait' - while line != "": - line = line.strip() - dbType, dbId, tName = line.split(",") - - if dbType == "ProbeSet": - oneTrait = trait.queryProbeSetTraitByName(cursor, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name - elif dbType == "Geno": - speciesId = trait.getSpeciesIdByDbTypeId(cursor, dbType, dbId) - oneTrait = trait.queryGenotypeTraitByName(cursor, speciesId, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name - elif dbType == "Publish": - oneTrait = trait.queryPublishTraitByName(cursor, dbId, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = trait.dbTypeIdToName(cursor, dbType, dbId) # XZ, 09/10/2008: add module name - elif dbType == "Temp": - oneTrait = trait.queryTempTraitByName(cursor, tName) # XZ, 09/10/2008: add module name - oneTrait.populateDataId(cursor, dbId) - oneTrait.dbName = "Temp" - - oneTrait.populateStrainData(cursor) - traits.append(oneTrait) - - line = handle.readline() - - return inbredSetName, traits - -# loadDatabase: Cursor -> ParamDict -> arrayof Trait -def loadDatabase(cursor, p): - """ - To load a set of traits as specified by the - targetDatabaseId - and targetDatabaseType parameters - - Cursor should be a fastCursor from the webqtl library (i.e. - a MySQLdb SSCursor). - - Calling populateStrainData 20,000 or so times on a ProbeSet - is really inefficent, so I wrote an optimized queryPopulatedProbeSetTraits - in the trait module that uses a join to get all of the rows in - bulk, store the resultset on the server, and do all sorts of nice buffering. - It's about two or three times faster. - """ - if p["targetDatabaseType"] == "ProbeSet": # XZ, 09/10/2008: add module name - dbTraits = trait.queryPopulatedProbeSetTraits(cursor, - p["targetDatabaseId"]) - elif p["targetDatabaseType"] == "Publish": # XZ, 09/10/2008: add module name - dbTraits = trait.queryPublishTraits(cursor, - p["targetDatabaseId"]) - psd = trait.PublishTrait.populateStrainData - elif p["targetDatabaseType"] == "Geno": # XZ, 09/10/2008: add module name - dbTraits = trait.queryGenotypeTraits(cursor, - p["targetDatabaseId"]) - psd = trait.GenotypeTrait.populateStrainData - else: - print "Unknown target database type %s" % p["targetDatabaseType"] - - if p["targetDatabaseType"] != "ProbeSet": - map(psd, dbTraits, [cursor]*len(dbTraits)) - - return dbTraits - -def runProbeSetCorrelations(cursor, p, traits): - """ - To run the correlations between the traits and the database. - This function computes a correlation coefficent between each - trait and every entry in the database, and partitions the database - into a disjoint array of arrays which it returns. - - The length of the return array is 2^n, where n is the length of - the trait array. Which constitutent element a of the return array - a given trait ends up in is determined by the following formula - i = i_02^0 + ... + i_(n-1)2^(n-1) - where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise - - Since most of the several thousand database traits will end up - with i=0, we don't return them, so the first element of the - return array will be empty. - - A particular element of subarray j of the return array contains - a 2-tuple (trait,kvalues). The variable trait is obviously the - particular database trait that matches the user traits l_1, ..., l_m - to which subarray j corresponds. kvalues is a list of the correlation - values linking trait to l_1, ..., l_m, so the length of kvalues is - the number of 1s in the binary representation of j (there must be - a better way to describe this length). - - The return array is an array of 2-tuples. The first element of - each tuple is the index of the particular subarray, and the second - element is the subarray itself. The array is sorted in descending - order by the number of 1's in the binary representation of the - index so the first few subarrays are the ones that correspond to - the largest sets. Each subarray is then sorted by the average of - the magnitude of the individual correlation values. - """ - - kMin = p["threshold"] - traitArrays = {} - - # TODO: Add Spearman support - freezeId = p["targetDatabaseId"] - if p["correlation"] == "pearson": - correlations = correlation.calcProbeSetPearsonMatrix(cursor, freezeId, traits) #XZ, 09/10/2008: add module name - else: - correlations = correlation.calcProbeSetSpearmanMatrix(freezeId, traits) #XZ, 09/10/2008: add module name - - # now we test all of the correlations in bulk - test = numarray.absolute(correlations) - test = numarray.greater_equal(test, kMin) - test = test.astype(numarray.Int8) - #print test - - db = trait.queryProbeSetTraits(cursor, freezeId) #XZ, 09/10/2008: add module name - for i in range(len(db)): - cIndex = 0 - prods = [] - for j in range(len(traits)): - if test[i,j] == 1: - cIndex += pow(2, j) - prods.append(correlations[i,j]) - if cIndex != 0: - if not traitArrays.has_key(cIndex): - traitArrays[cIndex] = [] - - traitArrays[cIndex].append((db[i], prods)) - - - # sort each inner list of traitArrays - # so the matched traits appear in descending order by the - # average magnitude of the correlation - def customCmp(traitPair, traitPair2): - magAvg1 = numarray.average(map(abs, traitPair[1])) - magAvg2 = numarray.average(map(abs, traitPair2[1])) - - # invert the sign to get descending order - return -cmp(magAvg1, magAvg2) - - for traitArray in traitArrays.values(): - traitArray.sort(customCmp) - - # sort the outer list of traitArrays - traitArrays2 = [] - i = 0 - for key in traitArrays.keys(): - a = traitArrays[key] - if len(a) > 0: - traitArrays2.append((key,a,len(binaryDecompose(key)), - len(a))) - - # we sort by the number of 1's in the binary output - # and then by the size of the list, both in descending order - def customCmp2(aL,bL): - a = -cmp(aL[2], bL[2]) - if a == 0: - return -cmp(aL[3], bL[3]) - else: - return a - - traitArrays2.sort(customCmp2) - - return traitArrays2 - -def runCorrelations(p, strainCount, traits, db): - """ - To run the correlations between the traits and the database. - This function computes a correlation coefficent between each - trait and every entry in the database, and partitions the database - into a disjoint array of arrays which it returns. - - The length of the return array is 2^n, where n is the length of - the trait array. Which constitutent element a of the return array - a given trait ends up in is determined by the following formula - i = i_02^0 + ... + i_(n-1)2^(n-1) - where i_0 is 1 if corr(a,trait 0) >= threshold and 0 otherwise - - Since most of the several thousand database traits will end up - with i=0, we don't return them, so the first element of the - return array will be empty. - - A particular element of subarray j of the return array contains - a 2-tuple (trait,kvalues). The variable trait is obviously the - particular database trait that matches the user traits l_1, ..., l_m - to which subarray j corresponds. kvalues is a list of the correlation - values linking trait to l_1, ..., l_m, so the length of kvalues is - the number of 1s in the binary representation of j (there must be - a better way to describe this length). - - The return array is an array of 2-tuples. The first element of - each tuple is the index of the particular subarray, and the second - element is the subarray itself. The array is sorted in descending - order by the number of 1's in the binary representation of the - index so the first few subarrays are the ones that correspond to - the largest sets. Each subarray is then sorted by the average of - the magnitude of the individual correlation values. - """ - kMin = p["threshold"] - traitArrays = {} - - # TODO: Add Spearman support - if p["correlation"] == "pearson": - correlations = correlation.calcPearsonMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name - else: - correlations = correlation.calcSpearmanMatrix(db, traits, strainCount) #XZ, 09/10/2008: add module name - - # now we test all of the correlations in bulk - test = numarray.absolute(correlations) - test = numarray.greater_equal(test, kMin) - test = test.astype(numarray.Int8) - #print test - - - for i in range(len(db)): - cIndex = 0 - prods = [] - for j in range(len(traits)): - if test[i,j] == 1: - cIndex += pow(2, j) - prods.append(correlations[i,j]) - if cIndex != 0: - if not traitArrays.has_key(cIndex): - traitArrays[cIndex] = [] - - traitArrays[cIndex].append((db[i], prods)) - - # sort each inner list of traitArrays - # so the matched traits appear in descending order by the - # average magnitude of the correlation - def customCmp(traitPair, traitPair2): - magAvg1 = numarray.average(map(abs, traitPair[1])) - magAvg2 = numarray.average(map(abs, traitPair2[1])) - - # invert the sign to get descending order - return -cmp(magAvg1, magAvg2) - - for traitArray in traitArrays.values(): - traitArray.sort(customCmp) - - # sort the outer list of traitArrays - traitArrays2 = [] - i = 0 - for key in traitArrays.keys(): - a = traitArrays[key] - if len(a) > 0: - traitArrays2.append((key,a,len(binaryDecompose(key)), - len(a))) - - # we sort by the number of 1's in the binary output - # and then by the size of the list, both in descending order - def customCmp2(aL,bL): - a = -cmp(aL[2], bL[2]) - if a == 0: - return -cmp(aL[3], bL[3]) - else: - return a - - traitArrays2.sort(customCmp2) - - return traitArrays2 - - -# XZ, 09/09/2008: In multiple trait correlation result page, -# XZ, 09/09/2008: click "Download a text version of the above results in CSV format" - -# TraitCorrelationText: a class to display trait correlations -# as textual output -class TraitCorrelationText: - # build a text shell to describe the given trait correlations - # this method sets self.output; use str(self) to actually - # get the text page - # - # traits is a list of traits and traitArray is a - # list of 3-tuples: index, traits', garbage - # where index is a binary-encoded description of which subset of - # traits the list traits' matches - # - # traits' is a list of 3-tuples as well: trait, correlations, garbage - # where trait is a particular trait and correlations is a list of float - # correlations (matching traits above) - def __init__(self, p, traits, traitArray): - output = "Correlation Comparison\n" - output += "from WebQTL and the University of Tennessee Health Science Center\n" - output += "initiated at " + time.asctime(time.gmtime()) + " UTC\n\n" - - output += self.showOptionPanel(p) - output += self.showSelectedTraits(traits) - output += self.showSummaryCorrelationResults(p, traits, traitArray) - output += self.showDetailedCorrelationResults(p, traits, traitArray) - - self.output = output - - # showOptionPanel: ParamDict -> string - # to display the options used to run this correlation - def showOptionPanel(self, params): - output = "Correlation Comparison Options:\n" - output += "Target database,%s\n" % params["targetDatabase"] - output += "Correlation type,%s\n" % params["correlationName"] - output += "Threshold,%f\n" % params["threshold"] - #output += "Subsets to Show,%d\n" % params["subsetCount"] - #output += "Traits to Show Per Subset,%d\n\n" % params["subsetSize"] - return output - - # showSelectedTraits: (listof Trait) -> string - # to display the traits compared with the database - # note: we can't use tabular output because the traits could be of - # different types and produce different fields - def showSelectedTraits(self, traits): - output = "Selected Traits:\n" - for trait in traits: - output += '"' + trait.longName() + '"' + "\n" - output += "\n" - return output - - # showSummaryCorrelationResults: ParamDict -> (listof Trait) -> - # TraitArray -> string - # see comment for __init__ for a description of TraitArray - # - # to show a summary (sets and sizes) of the correlation results - # as well as an X to indicate whether they will be included - # in the detailed output - def showSummaryCorrelationResults(self, p, traits, traitArray): - output = "Correlation Comparison Summary:\n" - - #if p["subsetCount"] != -1: - # ourSubsetCount = min(p["subsetCount"], len(traitArray)) - #else: - - ourSubsetCount = len(traitArray) - - displayDecomposition = binaryDecompose(p["displaySets"]) - for j in range(ourSubsetCount): - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - if len(traitSubarray) == 0: - continue - - targetTraits = decomposeIndex(traits, i) - traitDesc = string.join(map(trait.Trait.shortName, targetTraits), # XZ, 09/10/2008: add module name - ", ") - if j in displayDecomposition: - checked = "X" - else: - checked = "" - - output += '"%s","%s","%d"\n' % (checked, traitDesc, len(traitSubarray)) - - output += "\n" - return output - - # showDetailedCorrelationResults: ParamDict -> (listof Trait) -> - # TraitArray -> string - # - # to show a detailed list of the correlation results; that is, - # to completely enumerate each subset of traitArray using the - # filtering parameters in p - def showDetailedCorrelationResults(self, p, traits, traitArray): - output = "Correlation Comparison Details:\n" - displayDecomposition = binaryDecompose(p["displaySets"]) - displayDecomposition.sort() - - def formatCorr(c): - return "%.4f" % c - - for j in displayDecomposition: - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - - if len(traitSubarray) == 0: - continue - - targetTraits = decomposeIndex(traits, i) - extraColumnHeaders = map(trait.Trait.shortName, targetTraits) # XZ, 09/10/2008: add module name - traitDesc = string.join(extraColumnHeaders, ", ") - - #if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]): - # traitDesc += ",(showing top %s of %s)" % (p["subsetSize"], - # len(traitSubarray)) - # traitSubarray = traitSubarray[0:p["subsetSize"]] - - output += "%s\n" % traitDesc - output += traitSubarray[0][0].csvHeader([], extraColumnHeaders) - output += "\n" - for oneTrait, corr in traitSubarray:#XZ, 09/10/2008: change original variable name 'trait' to 'oneTrait' - corr = map(formatCorr, corr) - output += oneTrait.csvRow([], corr) + "\n" - - output += "\n" - - return output - - # __str__ : string - # to return self.output as the string representation of this page - # self.output is built in __init__ - def __str__(self): - return self.output - -# TraitCorrelationPage: a class to display trait correlations -# for now this is just one HTML file, so we don't even write it -# to a temporary file somewhere -class TraitCorrelationPage(templatePage.templatePage): - """ - Using the templatePage class, we build an HTML shell for - the core data here: the trait correlation lists. - - The way templatePage works, we build the page in pieces in - the __init__ method and later on use the inherited write - method to render the page. - """ - def __init__(self, fd, p, cursor, traits, traitArray, inbredSetName, txtFilename): - - templatePage.templatePage.__init__(self, fd) - - self.dict["title"] = "Correlation Comparison" - self.dict["basehref"] = "" - # NL: deleted js1 content part, since it has not been used in this project - self.dict["js1"] = "" - self.dict["js2"] = "" - - body = "<td><h1>Correlation Comparison</h1>" - body += "<p>Run at %s UTC</p>" % time.asctime(time.gmtime()) - body += """ - <p>The correlation comparison tool identifies intersecting sets of traits that are -correlated with your selections at a specified threshold. A correlation comparison -involves the following steps:</p> -<ol> -<li><p> -<b>Correlate:</b> -Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation -Threshold</i>. For your initial correlation, leave <i>Number of Subsets to Show</i> and -<i>Traits to Show per Subset</i> at their default values of 10. Using the Correlation -Options panel, you can adjust the <i>Correlation Threshold</i>, <i>Number of Subsets to -Show</i>, and <i>Traits to Show per Subset</i>. -</p></li> - -<li><p> -<b>Add to Collection:</b> -You can use the check boxes in the <i>Correlation -Comparison Details</i> panel and the buttons at the bottom of the page to add these -results to your selections page for further analysis in WebQTL. -</p></li> - -<li><p> -<b>Filter:</b> -Using the <i>Correlation Comparison Summary</i> panel, choose which -subsets you would like to display for export. Note that if you change the -parameters in the <i>Correlation Options</i> panel, you will need to re-apply your filter. -</p></li> - -<li><p> -<b>Export:</b> -Once you are satisfied with your report, use the export link at -the bottom of the page to save the report as a comma-separated (CSV) text file -which you can then import into Excel or another tool. Note: the exported report -will list all subsets in the summary view and only those traits in the subsets -you have selected in the Filter step. -</p></li> -</ol> -""" - -# body += """ -# <p>The correlation -# comparison tool identifies the intersecting sets of traits that are -# correlated with your selections. A correlation comparison involves -# the following steps:</p> -# <ol> -# <li><p><b>Correlate:</b> Choose a <i>Target Database</i>, a <i>Correlation Type</i>, and a <i>Correlation Threshold</i>. -# For the initial correlation, leave <i>Subsets to Show</i> and <i>Traits to Show per Subset</i> -# at their default values of 10.</p></li> -# <li><p><b>Refine Correlation:</b> Using the <i>Correlation Options</i> panel, -# adjust the <i>Correlation Threshold</i>, <i>Subsets to Show</i>, and <i>Traits to -# Show per Subset</i> until you have a reasonable number of traits.</p></li> -# <li><p><b>Filter:</b> Using the <i>Correlation Comparison Summary</i> panel, choose which subsets you would -# like to see. Note that if you change the parameters in the <i>Correlation Options</i> panel, you will -# loose the filter you have selected.</p></li> -# <li><p><b>Export:</b> Once you are satisfied with your report, use the export -# link at the bottom of the page to save the report as a comma-separated (CSV) text file which -# you can then import into Excel or another tool. Note: the exported report -# will show all subsets in the summary view and all traits in each subset you have -# selected in the Filter step. -# <li><p><b>Shopping Cart:</b> In addition, you can use the -# check boxes in the <i>Correlation Comparison Details</i> panel and the -# buttons at the bottom of the page to add the traits you have found to the shopping cart.</p> -# </li> -# </ol> -# """ - - body += self.showOptionPanel(p, cursor, inbredSetName) - body += self.showSelectedTraits(traits, p, inbredSetName) - - if p["firstRun"] == 0: - body += self.showCorrelationResults(p, inbredSetName, traits, traitArray) - - exportParams = copy.copy(p) - exportParams["outputType"] = "text" - - body += (''' - <h2>Export these results</h2> - <p> - <a href="/image/%s">Download a text version of the above results in CSV format</a>. This text version differs from - the version you see on this page in two ways. First, the summary view shows all subsets. Second, the details - view shows all traits in the subsets that you have selected. - </p> - ''' - % txtFilename) - - - - body += "</td>" - self.dict["body"] = body - - - # showOptionPanel: ParamDict -> Cursor -> String -> String - # to build an option panel for the multitrait correlation - # we expect the database list to be a list of 2-tuples - # the first element of each tuple is the short name - # and the second element of the tuple is the long name - def showOptionPanel(self, params, cursor, inbredSetName): - output = ''' - <h2>Correlation Options</h2> - <FORM METHOD="POST" ACTION="%s%s" ENCTYPE="multipart/form-data"> - <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2"> - <input type="hidden" name="filename" value="%s"> - <input type="hidden" name="firstRun" value="0"> - <input type="hidden" name="progress" value="1"> - <table> - <tr> - <td>Target Database:</td><td> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, params["filename"]) - - output += htmlModule.genDatabaseMenu(db = cursor, - public=0, - RISetgp = inbredSetName, - selectname="targetDatabase", - selected=params["targetDatabase"]) - output += "</td></tr>" - - corrSelected = ["",""] - - if params["correlation"] == "pearson": - corrSelected[0] = "SELECTED" - else: - corrSelected[1] = "SELECTED" - - output += (''' - <tr> - <td>Correlation Method:</td> - <td><select name="correlation"> - <option value="pearson" %s>Pearson</option> - <!--<option value="spearman" %s>Spearman</option>--> - </select></td></tr> - ''' % (corrSelected[0], corrSelected[1])) - output += ('<tr><td>Correlation Threshold:</td><td><input name="threshold" value="%s" /></td></tr>' - % params["threshold"]) - output += ('<tr><td>Subsets to Show (-1 to show all subsets):</td><td><input name="subsetCount" value="%s" /></td></tr>' - % params["subsetCount"]) - output += ('<tr><td>Traits to Show per Subset (-1 to show all traits):</td><td><input name="subsetSize" value="%s" /></td></tr>' - % params["subsetSize"]) - - # a cosmetic change to hopefully make this form a bit easier to use -# if params["firstRun"] == 1: -# applyName = "Correlate" -# else: -# applyName = "Refine Correlation" - - output += ''' - <tr> - <td colspan="2"><input class="button" type="submit" value="Correlate" /></td> - </tr> - </table> - </form> - ''' - - return output - - # showSelectedTraits: listof Trait -> string - # to show a list of the selected traits - def showSelectedTraits(self, traits, p, inbredSetName): - output = ''' - <form action="%s%s" method="post" name="showDatabase"> - <INPUT TYPE="hidden" NAME="FormID" VALUE="showDatabase"> - - <input type="hidden" name="incparentsf1" value="ON"> - <input type="hidden" name="ShowStrains" value="ON"> - <input type="hidden" name="ShowLine" value="ON"> - <input type="hidden" name="database" value=""> - <input type="hidden" name="ProbeSetID" value=""> - <input type="hidden" name="RISet" value="%s"> - <input type="hidden" name="CellID" value=""> - <input type="hidden" name="database2" value=""> - <input type="hidden" name="rankOrder" value=""> - <input type="hidden" name="ProbeSetID2" value=""> - <input type="hidden" name="CellID2" value=""> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, inbredSetName) - - output += "<h2>Selected Traits</h2>" - output += '<table cellpadding="2" cellspacing="0"><tr bgcolor="FFFFFF"><th>Database</th><th>Trait</th></tr>' - flip = 1 - colors = ["FFFFFF", "cccccc"] - - for trait in traits: - # we take advantage of the secret dbName attribute that - # loadDatabase fills in - descriptionString = trait.genHTML() - if trait.db.type == 'Publish' and trait.confidential: - descriptionString = trait.genHTML(privilege=self.privilege, userName=self.userName, authorized_users=trait.authorized_users) - output += ''' - <tr bgcolor="%s"><td><a href="/dbdoc/%s.html">%s</a></td> - <td><a href="javascript:showDatabase2('%s', '%s', '')">%s</a></td> - </tr> - ''' % (colors[flip], trait.db.name, trait.db.name, trait.db.name, trait.name, descriptionString) - flip = not flip - - output += "</table></form>" - return output - - - # showSummaryCorrelationResults - # show just the number of traits in each subarray - def showSummaryCorrelationResults(self, p, traits, traitArray): - output = ''' - <form action="%s%s" method="post"> - <INPUT TYPE="hidden" NAME="FormID" VALUE="compCorr2"> - <input type="hidden" name="filename" value="%s"> - <input type="hidden" name="firstRun" value="0"> - <input type="hidden" name="progress" value="1"> - <input type="hidden" name="correlation" value="%s"> - <input type="hidden" name="threshold" value="%s"> - <input type="hidden" name="rankOrder" value=""> - <input type="hidden" name="subsetCount" value="%s"> - <input type="hidden" name="subsetSize" value="%s"> - <input type="hidden" name="targetDatabase" value="%s"> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["filename"], p["correlation"], p["threshold"], - p["subsetCount"], p["subsetSize"], p["targetDatabase"]) - - output += ''' - <table cellpadding="2" cellspacing="0"> - <tr> - <th>Trait Subsets</th> - <th colspan="2">Intersecting Set Size</th> - </tr> - ''' - # figure out a scale for the summary graph - # for now we set max = 300 pixels wide - if p["subsetCount"] != -1: - ourSubsetCount = min(p["subsetCount"], len(traitArray)) - else: - ourSubsetCount = len(traitArray) - - screenWidth = 600 - lengths = [] - for j in range(ourSubsetCount): - lengths.append(len(traitArray[j][1])) - maxLength = max(lengths) - - displayDecomposition = binaryDecompose(p["displaySets"]) - flip = 0 - colors = ["FFFFFF", "cccccc"] - - for j in range(ourSubsetCount): - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - - if len(traitSubarray) == 0: - continue - - targetTraits = decomposeIndex(traits, i) - traitDesc = string.join(map(webqtlTrait.displayName, targetTraits), - ", ") - - if j in displayDecomposition: - checked = "CHECKED" - else: - checked = "" - - barWidth = (len(traitSubarray) * screenWidth) / maxLength - output += ('''<tr bgcolor="%s"> - <td><input type="checkbox" name="display%d" value="1" %s>%s</input></td> - <td>%s</td> - <td><img src="/images/blue.png" width="%d" height="25"></td></tr>''' - % (colors[flip], j, checked, traitDesc, len(traitSubarray), barWidth)) - flip = not flip - - output += ''' - <tr> - <td colspan="3"> - <input class="button" type="submit" value="Filter" /></td> - </tr> - </table></form> - ''' - return output - - # showDetailedCorrelationResults - # actually show the traits in each subarray - def showDetailedCorrelationResults(self, p, inbredSetName, traits, - traitArray): - output = "<h2>Correlation Comparison Details</h2>" - - # the hidden form below powers all of the JavaScript links, - # the shopping cart links, and the correlation plot links - - output += ''' - <form action="%s%s" method="post"> - <input type="hidden" name="database" value="%s"> - <input type="hidden" name="FormID" value="showDatabase"> - <input type="hidden" name="traitfile" value=""> - <input type="hidden" name="incparentsf1" value="ON"> - <input type="hidden" name="ShowStrains" value="ON"> - <input type="hidden" name="ProbeSetID" value=""> - <input type="hidden" name="ShowLine" value="ON"> - <input type="hidden" name="RISet" value="%s"> - <input type="hidden" name="CellID" value=""> - <input type="hidden" name="database2" value=""> - <input type="hidden" name="rankOrder" value=""> - <input type="hidden" name="ProbeSetID2" value=""> - <input type="hidden" name="CellID2" value=""> - ''' % (webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE, p["targetDatabase"], inbredSetName) - - - displayDecomposition = binaryDecompose(p["displaySets"]) - - # necessary to ensure that subset order is the same in the - # summary and the detailed view - displayDecomposition.sort() - - # here's a trick: the first trait we show must have the widest row because it correlates - # with the largest set of input traits - firstSubset = traitArray[displayDecomposition[0]] - firstTrait = firstSubset[1][0][0] - extraColumnCount = firstSubset[2] - totalColumnCount = 1 + len(firstTrait.row()) + extraColumnCount - - output += "<table cellpadding=2 cellspacing=0>\n" - for j in displayDecomposition: - i = traitArray[j][0] - traitSubarray = traitArray[j][1] - - # we don't display trait combinations for which there are - # no correlations - if len(traitSubarray) == 0: - continue - - # generate a description of the traits that this particular array - # matches highly - targetTraits = decomposeIndex(traits, i) - extraColumnHeaders = map(webqtlTrait.displayName, targetTraits) - traitDesc = string.join(extraColumnHeaders, ", ") - - # massage extraColumnHeaders so that they can be wrapped - for i in range(len(extraColumnHeaders)): - ech = extraColumnHeaders[i] - ech = ech.replace("-", " ") - ech = ech.replace("_", " ") - extraColumnHeaders[i] = ech - - # pad extraColumnHeaders if we have less columns than the max - paddingNeeded = extraColumnCount - len(extraColumnHeaders) - if paddingNeeded > 0: - extraColumnHeaders.extend(paddingNeeded * [" "]) - - # we limit the output to the top ones - if(p["subsetSize"] != -1 and len(traitSubarray) > p["subsetSize"]): - traitDesc += " (showing top %s of %s)" % (p["subsetSize"], len(traitSubarray)) - traitSubarray = traitSubarray[0:p["subsetSize"]] - - # combine that description with actual database traits themselves - # and the correlation values - output += '<tr><td colspan="%d"><h3>%s</h3></td></tr>' % (totalColumnCount, traitDesc) - #output += '<h3>%s</h3>\n<table cellpadding=2 cellspacing=0>\n'% traitDesc - - # we assume that every trait in traitSubarray is the same type - # of trait - flip = 0 - colors = ["FFFFFF", "cccccc"] - - output += traitSubarray[0][0].tableRowHeader([" "], extraColumnHeaders, colors[0]) - - for traitPair in traitSubarray: - corr = [] - traitPair[0].dbName = p['targetDatabase'] - trait = traitPair[0] - - for i in range(len(traitPair[1])): - corrValue = traitPair[1][i] - corrPlotLink = (''' - <a href="javascript:showCorrelationPlot2(db='%s',ProbeSetID='%s',CellID='',db2='%s',ProbeSetID2='%s',CellID2='',rank='%s')">%.2f</a> - ''' % (p["targetDatabaseName"], trait.name, targetTraits[i].db.name, targetTraits[i].name, "0", corrValue)) - corr.append(corrPlotLink) - - corr.extend(paddingNeeded * [" "]) - - checkbox = ('<INPUT TYPE="checkbox" NAME="searchResult" VALUE="%s:%s" />' - % (p["targetDatabaseName"], trait.name)) - flip = not flip - output += traitPair[0].tableRow([checkbox], corr, colors[flip]) - - #output += "</table>" - i += 1 - output += '<tr><td colspan="%d"> </td></tr>' % totalColumnCount - - output += "</table>" - - # print form buttons if there were checkboxes above - output += ''' - <div align="left"> - <INPUT TYPE="button" NAME="addselect" CLASS="button" VALUE="Add to Collection" - onClick="addRmvSelection('%s',this.form, 'addToSelection');"> - <INPUT TYPE="button" NAME="selectall" CLASS="button" VALUE="Select All" onClick="checkAll(this.form);"> - <INPUT TYPE="reset" CLASS="button" VALUE="Select None"> - </div> - </form> - ''' % inbredSetName - - return output - - # showCorrelationResults: ParamDict -> listof Trait -> tupleof (int,arrayof trait) -> String - # to build an output display for the multitrait correlation results - def showCorrelationResults(self, p, inbredSetName, traits, traitArray): - output = ''' - <h2>Correlation Comparison Summary</h2> - <p> - %s correlations were computed for each of the selected traits with each trait in - the <a href="/dbdoc/%s.html">%s</a> database. - Subsets of database traits for which correlations were higher than %s - or lower than -%s are shown below based on which traits - they correlated highly with. The top %s subsets, ranked by the number of input traits that - they correspond with, are shown, and at most %s traits in each subset are shown. </p> - ''' % (p["correlationName"], - p["targetDatabase"], p["targetDatabaseName"], - p["threshold"], p["threshold"], p["subsetCount"], - p["subsetSize"]) - - - totalTraits = 0 - for j in range(len(traitArray)): - totalTraits += len(traitArray[j][1]) - - if totalTraits == 0: - output += """ - <p> - No shared corrrelates were found with your given traits at this - threshold. You may wish to lower the correlation threshold or choose different traits. - </p> - """ - else: - output += self.showSummaryCorrelationResults(p, traits, traitArray) - output += self.showDetailedCorrelationResults(p, inbredSetName, - traits, traitArray) - - return output - -# decomposeIndex: (listof Trait) -> Int -> -# (listof Trait) -# to use i to partition T into a sublist -# each bit in i controls the inclusion or exclusion of a trait -def decomposeIndex(traits, i): - targetTraits = [] - - for j in range(len(traits)): - # look, mom, a bitwise and! - # expression below tests whether the jth bit is - # set in i - # see runCorrelation for how we decompose the - # array index - if (i & pow(2,j)) == pow(2,j): - targetTraits.append(traits[j]) - - return targetTraits - -# binaryDecompose: int -> (listof int) -# to decompose a number into its constituent powers of 2 -# returns a list of the exponents a_1...a_n such that the input m -# is m = 2^a_1 + ... + 2^a_n -def binaryDecompose(n): - if n == 0: - return [] - - # we start with the highest power of 2 <= this number - # and work our way down, subtracting powers of 2 - start = long(math.floor(math.log(n)/math.log(2))) - - exponents = [] - while start >= 0: - if n >= long(math.pow(2, start)): - n -= math.pow(2,start) - exponents.append(start) - start -= 1 - return exponents - -# powerOf : int -> int -> boolean -# to determine whether m is a power of n; -# more precisely, whether there exists z in Z s.t. -# n^z = m -def powerOf(m, n): - trialZ = math.floor(math.log(m)/math.log(n)) - return pow(n,trialZ) == m - - -class compCorrPage(templatePage.templatePage): - def __init__(self,fd): - templatePage.templatePage.__init__(self, fd) - - if not self.openMysql(): - return - - cursor = self.cursor - params = buildParamDict(cursor, fd) - - # get the input data - inbredSetName, traits = readInputFile(cursor, RootDir + params["filename"]) - - # and what we are comparing the data to - dbTraits = [] - if params["targetDatabaseType"] != "ProbeSet": - dbTraits = loadDatabase(cursor, params) - - - # run the comparison itself - strainCount = trait.queryStrainCount(cursor) # XZ, 09/10/2008: add module name - if params["targetDatabaseType"] == "ProbeSet": - results = runProbeSetCorrelations(cursor, params, traits) - else: - results = runCorrelations(params, strainCount, traits, dbTraits) - - # try to be smart about what to output: - # we want to limit the number of traits shown, at least initially - # and since traitArray is already sorted with most interesting - # subsets first, we simply pick up the first 500 or so traits - # that we find - if params["displaySets"] == 0: - selectedTraits = 0 - for j in range(len(results)): - #print "Scanning subarray %d" % j - if selectedTraits <= 200: - params["displaySets"] += pow(2, j) - selectedTraits += len(results[j][1]) - - traitList = [] - for oneTrait in traits: # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait' - traitName = oneTrait.dbName+'::'+oneTrait.name # XZ, 09/10/2008: change the original variable name 'trait' to 'oneTrait' - aTrait = webqtlTrait(cursor=self.cursor, fullname=traitName) - traitList.append(aTrait) - - # and generate some output - txtOutputFilename = tempfile.mktemp() - txtOutputHandle = open(txtOutputFilename, "w") - txtOutput = TraitCorrelationText(params, traits, results) - txtOutputHandle.write(str(txtOutput)) - txtOutputHandle.close() - txtOutputFilename = os.path.split(txtOutputFilename)[1] - - self.dict['body'] = TraitCorrelationPage(fd, params, cursor, traitList, - results, inbredSetName, - txtOutputFilename).dict['body'] |