From be095620bc8126026514fdee43e06a9a9f443f97 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 29 Aug 2012 17:49:43 -0500 Subject: Fixed so that outliers are now correctly highlighted --- wqflask/base/webqtlCaseData.py | 26 ++-- wqflask/utility/Plot.py | 100 ++++++------- wqflask/wqflask/show_trait/DataEditingPage.py | 154 ++++++--------------- .../wqflask/templates/trait_data_and_analysis.html | 2 +- 4 files changed, 103 insertions(+), 179 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 7805df06..25665c55 100755 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -37,6 +37,7 @@ class webqtlCaseData(object): self.variance = variance # Trait Variance self.num_cases = num_cases # Number of individuals/cases self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) + self.outlier = None # Not set to True/False until later def __repr__(self): str = "" @@ -48,6 +49,14 @@ class webqtlCaseData(object): str += " ndata=%d" % self.num_cases return str + @property + def class_outlier(self): + """Template helper""" + if self.outlier: + return "outlier" + else: + return "" + @property def display_value(self): if self.value: @@ -63,20 +72,3 @@ class webqtlCaseData(object): return "x" - #try: - # traitVar = thisvar - # dispVar = "%2.3f" % thisvar - #except: - # traitVar = '' - # dispVar = 'x' - - #try: - # traitVal = thisval - # dispVal = "%2.3f" % thisval - #except: - # traitVal = '' - # dispVal = 'x' - - - #def this_val_full(self): - # strain_name = \ No newline at end of file diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index 086f3d57..51a57a6d 100755 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -25,6 +25,13 @@ # Last updated by GeneNetwork Core Team 2010/10/20 #import piddle as pid + +from __future__ import print_function + +from pprint import pformat as pf + +print("Lysol") + from math import * import random import sys, os @@ -32,6 +39,9 @@ from numarray import linear_algebra as la from numarray import ones, array, dot, swapaxes import reaper +sys.path.append("..") +print(sys.path) +from basicStatistics import corestats import svg import webqtlUtil @@ -254,6 +264,7 @@ def gmedian(lst2): return lst[(N-1)/2] def gpercentile(lst2, np): + """Obsolete - use percentile in corestats instead""" lst = lst2[:] N = len(lst) if N == 0 or np > 100 or np < 0: @@ -270,61 +281,41 @@ def gpercentile(lst2, np): else: return lst[k-1] + d*(lst[k] - lst[k-1]) -def findOutliers(vals): - - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - data = [('', valsOnly[:])] - - for item in data: - itemvalue = item[1] - nValue = len(itemvalue) - catValue = [] - - for item2 in itemvalue: - try: - tstrain, tvalue = item2 - except: - tvalue = item2 - if nValue <= 4: - continue - else: - catValue.append(tvalue) - - if catValue != []: - lowHinge = gpercentile(catValue, 25) - upHinge = gpercentile(catValue, 75) - Hstep = 1.5*(upHinge - lowHinge) +def find_outliers(vals): + """Calculates the upper and lower bounds of a set of sample/case values + + + >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537]) + (11.252500000000001, 0.5364999999999993) + + >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8]) + (32.0, -8.0) + + If there are no vals, returns None for the upper and lower bounds, + which code that calls it will have to deal with. + >>> find_outliers([]) + (None, None) + + """ - outlier = [] - extreme = [] + print("xerxes vals is:", pf(vals)) - upperBound = upHinge + Hstep - lowerBound = lowHinge - Hstep + if vals: + #print("vals is:", pf(vals)) + stats = corestats.Stats(vals) + low_hinge = stats.percentile(25) + up_hinge = stats.percentile(75) + hstep = 1.5 * (up_hinge - low_hinge) - for item in catValue: - if item >= upHinge + 2*Hstep: - extreme.append(item) - elif item >= upHinge + Hstep: - outlier.append(item) - else: - pass + upper_bound = up_hinge + hstep + lower_bound = low_hinge - hstep - for item in catValue: - if item <= lowHinge - 2*Hstep: - extreme.append(item) - elif item <= lowHinge - Hstep: - outlier.append(item) - else: - pass - else: - upperBound = 1000 - lowerBound = -1000 + else: + upper_bound = None + lower_bound = None - return upperBound, lowerBound + print(pf(locals())) + return upper_bound, lower_bound def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"): @@ -1281,3 +1272,12 @@ def BWSpectrum(n=100): out.append(pid.Color(x,x,x)); x += step return out + + +def _test(): + import doctest + doctest.testmod() + + +if __name__=="__main__": + _test() diff --git a/wqflask/wqflask/show_trait/DataEditingPage.py b/wqflask/wqflask/show_trait/DataEditingPage.py index 43f05f14..bb6156c3 100755 --- a/wqflask/wqflask/show_trait/DataEditingPage.py +++ b/wqflask/wqflask/show_trait/DataEditingPage.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, print_function, division +print("Google") + import string import os import cPickle @@ -1673,7 +1675,7 @@ class DataEditingPage(templatePage): #showHideMenuOptions.append(HT.Bold("  Options:"), " "*5, showHideNoValue, " "*5, showHideOutliers, " "*5, resetButton, " "*5, exportButton) #traitTableOptions.append(showHideMenuOptions,HT.BR(),HT.BR()) - #traitTableOptions.append(HT.Span("  Outliers highlighted in ", HT.Bold(" yellow ", style="background-color:yellow;"), " can be hidden using the ", + #traitTableOptions.append(HT.Span("  Outliers highlighted in ", HT.Bold(" red ", style="background-color:red;"), " can be hidden using the ", # HT.Strong(" Hide Outliers "), " button,",HT.BR(),"  and samples with no value (x) can be hidden by clicking ", # HT.Strong(" Hide No Value "), "."), HT.BR()) @@ -1703,7 +1705,7 @@ class DataEditingPage(templatePage): primary_strainlist = fd.parlist + allstrainlist_neworder - primary_strains = self.addTrait2Table(fd=fd, + primary_strains = self.create_strain_objects(fd=fd, varianceDataPage=varianceDataPage, strainlist=primary_strainlist, mainForm=mainForm, @@ -1712,6 +1714,7 @@ class DataEditingPage(templatePage): attribute_ids=attribute_ids, attribute_names=attribute_names, strains='primary') + other_strains = [] for strain in thisTrait.data.keys(): @@ -1727,7 +1730,7 @@ class DataEditingPage(templatePage): other_strains.sort() #Sort other strains other_strains = par_f1_strains + other_strains - other_strains = self.addTrait2Table(fd=fd, + other_strains = self.create_strain_objects(fd=fd, varianceDataPage=varianceDataPage, strainlist=other_strains, mainForm=mainForm, @@ -1736,6 +1739,7 @@ class DataEditingPage(templatePage): attribute_names=attribute_names, strains='other') + #TODO: Figure out why this if statement is written this way - Zach if (other_strains or (fd.f1list and thisTrait.data.has_key(fd.f1list[0])) or (fd.f1list and thisTrait.data.has_key(fd.f1list[1]))): @@ -1747,7 +1751,7 @@ class DataEditingPage(templatePage): self.other_strains = other_strains - def addTrait2Table(self, fd, varianceDataPage, strainlist, mainForm, thisTrait, + def create_strain_objects(self, fd, varianceDataPage, strainlist, mainForm, thisTrait, other_strainsExist=None, attribute_ids=None, attribute_names=None, strains='primary'): @@ -1759,23 +1763,26 @@ class DataEditingPage(templatePage): #XZ, Aug 23, 2010: I commented the code related to the display of animal case #strainInfo = thisTrait.has_key('strainInfo') and thisTrait.strainInfo - print("in addTrait2Table") - table_body = [] - vals = [] - - - #################### Only used to find upperBound and lowerBound + print("in create_strain_objects") + #table_body = [] + + ################### Only used to find upperBound and lowerBound + #vals = [] #for strainNameOrig in strainlist: # strainName = strainNameOrig.replace("_2nd_", "") # print("pen: %s - %s" % (strainNameOrig, strainName)) - # thisval = thisTrait.data[strainName].value - # thisvar = thisTrait.data[strainName].variance - # thisValFull = [strainName, thisval, thisvar] - # - # vals.append(thisValFull) + # try: + # thisval = thisTrait.data[strainName].value + # thisvar = thisTrait.data[strainName].variance + # thisValFull = [strainName, thisval, thisvar] + # + # vals.append(thisValFull) + # except KeyError: + # print("**x** Skipping:", strainName) # #upperBound, lowerBound = Plot.findOutliers(vals) # ZS: Values greater than upperBound or less than lowerBound are considered outliers. + the_strains = [] for counter, strainNameOrig in enumerate(strainlist, 1): @@ -1790,107 +1797,15 @@ class DataEditingPage(templatePage): print("No strain %s, let's create it now" % strainName) strain = webqtlCaseData.webqtlCaseData(strainName) print("zyt - strainNameOrig:", strainNameOrig) - #trId = strainNameOrig - #selectCheck = HT.Input(type="checkbox", name="selectCheck", value=trId, Class="checkbox", onClick="highlight(this)") - - - #try: - # thisval, thisvar, thisNP = thisTrait.data[strainName].value, thisTrait.data[strainName].var, thisTrait.data[strainName].N - # if thisNP: - # mainForm.append(HT.Input(name='N'+strainName, value=thisNP, type='hidden')) - # else: - # pass - #except: - # thisval = thisvar = 'x' - - #thisval = thisTrait.data[strainName].value - #thisvar = thisTrait.data[strainName].variance - #thisTrait.data[strainName].num_cases - - #strain['strain_name'] = strainName - #strainNameDisp = HT.Span(strainName, Class='fs14 fwn ffl') - - #if varianceDataPage: - #try: - # traitVar = thisvar - # dispVar = "%2.3f" % thisvar - #except: - # traitVar = '' - # dispVar = 'x' - - #if thisval == 'x': - # traitVar = '' #ZS: Used to be 0, but it doesn't seem like a good idea for values of 0 to *always* be at the bottom when you sort; it makes more sense to put "nothing" - # - # #className = 'fs13 b1 c222 ' - # #valueClassName = 'fs13 b1 c222 valueField ' - # #rowClassName = 'novalue ' - #else: - # if (thisval >= upperBound) or (thisval <= lowerBound): - # strain['outlier'] = "outlier" # We're going to use this as a class, so we want it to be a word - # #className = 'fs13 b1 c222 outlier ' - # #valueClassName = 'fs13 b1 c222 valueField ' - # #rowClassName = 'outlier' - # else: - # strain['outlier'] = "not_outlier" - # #className = 'fs13 b1 c222 ' - # #valueClassName = 'fs13 b1 c222 valueField ' - # #rowClassName = ' ' - # - #if varianceDataPage: - # varClassName = valueClassName + str(traitVar) - #valueClassName += str(traitVal) - - #if strainNameOrig == strainName: - # if other_strainsExist and strainNameOrig in (fd.parlist + fd.f1list): - # ######################################################################################################################################################## - # # ZS: Append value and variance to the value and variance input fields' list of classes; this is so the javascript can update the value when the user - # # changes it. The updated value is then used when the table is sorted (tablesorter.js). This needs to be done because the "value" attribute is immutable. - # ######################################################################################################################################################### - # - # #valueField = HT.Input(name=strainNameOrig, size=8, maxlength=8, style="text-align:right; background-color:#FFFFFF;", value=dispVal, - # # onChange= "javascript:this.form['_2nd_%s'].value=this.form['%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=valueClassName) - # if varianceDataPage: - # pass - # #seField = HT.Input(name='V'+strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVar, - # # onChange= "javascript:this.form['V_2nd_%s'].value=this.form['V%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=varClassName) - # else: - # pass - # #valueField = HT.Input(name=strainNameOrig, size=8, maxlength=8, style="text-align:right; background-color:#FFFFFF;", value=dispVal, Class=valueClassName) - # if varianceDataPage: - # pass - # #seField = HT.Input(name='V'+strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVar, Class=varClassName) - #else: - # pass - # #valueField = HT.Input(name=strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVal, - # #onChange= "javascript:this.form['%s'].value=this.form['%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=valueClassName) - # if varianceDataPage: - # pass - # #seField = HT.Input(name='V'+strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVar, - # # onChange= "javascript:this.form['V%s'].value=this.form['V%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=varClassName) + if strains == 'primary': strain.this_id = "Primary_" + str(counter) - #table_row = HT.TR(Id="Primary_"+str(i+1), Class=rowClassName) else: strain.this_id = "Other_" + str(counter) - #table_row = HT.TR(Id="Other_"+str(i+1), Class=rowClassName) - - #strain['value'] = traitVal - # - #strain['se'] = dispVar - #if varianceDataPage: - #table_row.append(HT.TD(str(i+1), selectCheck, width=45, align='right', Class=className)) - #table_row.append(HT.TD(strainNameDisp, strainNameAdd, align='right', width=100, Class=className)) - #table_row.append(HT.TD(valueField, width=70, align='right', Id="value_"+str(i)+"_"+strains, Class=className)) - #table_row.append(HT.TD("±", width=20, align='center', Class=className)) - #table_row.append(HT.TD(seField, width=80, align='right', Id="SE_"+str(i)+"_"+strains, Class=className)) - #pass - #else: - #table_row.append(HT.TD(str(i+1), selectCheck, width=45, align='right', Class=className)) - #table_row.append(HT.TD(strainNameDisp, strainNameAdd, align='right', width=100, Class=className)) - #table_row.append(HT.TD(valueField, width=70, align='right', Id="value_"+str(i)+"_"+strains, Class=className)) - #pass - if thisTrait and thisTrait.db and thisTrait.db.type =='ProbeSet': + + #### For extra attribute columns; currently only used by two human datasets - Zach + if thisTrait and thisTrait.db and thisTrait.db.type == 'ProbeSet': if len(attribute_ids) > 0: #ZS: Get StrainId value for the next query @@ -1929,6 +1844,8 @@ class DataEditingPage(templatePage): attr_counter += 1 the_strains.append(strain) #table_body.append(table_row) + + do_outliers(the_strains) print("*the_strains are [%i]: %s" % (len(the_strains), pf(the_strains))) return the_strains @@ -1975,3 +1892,18 @@ class DataEditingPage(templatePage): sortby = ("", "") return sortby + + + +def do_outliers(strain_objects): + values = [strain.value for strain in strain_objects if strain.value != None] + upper_bound, lower_bound = Plot.find_outliers(values) + + for strain in strain_objects: + if strain.value: + if upper_bound and strain.value > upper_bound: + strain.outlier = True + elif lower_bound and strain.value < lower_bound: + strain.outlier = True + else: + strain.outlier = False diff --git a/wqflask/wqflask/templates/trait_data_and_analysis.html b/wqflask/wqflask/templates/trait_data_and_analysis.html index 94ba0aad..3644b436 100644 --- a/wqflask/wqflask/templates/trait_data_and_analysis.html +++ b/wqflask/wqflask/templates/trait_data_and_analysis.html @@ -3076,7 +3076,7 @@ {% for strain in strain_type %} - + {{ loop.index }} -- cgit v1.2.3