about summary refs log tree commit diff
diff options
context:
space:
mode:
authorZachary Sloan2012-08-29 17:49:43 -0500
committerZachary Sloan2012-08-29 17:49:43 -0500
commitbe095620bc8126026514fdee43e06a9a9f443f97 (patch)
treee4b57f292ef3da812b1ec11d1c5f676083a8d853
parent4cc46d84810ca9492e9a38e1a1f88ab36d214791 (diff)
downloadgenenetwork2-be095620bc8126026514fdee43e06a9a9f443f97.tar.gz
Fixed so that outliers are now correctly highlighted
-rwxr-xr-xwqflask/base/webqtlCaseData.py26
-rwxr-xr-xwqflask/utility/Plot.py100
-rwxr-xr-xwqflask/wqflask/show_trait/DataEditingPage.py154
-rw-r--r--wqflask/wqflask/templates/trait_data_and_analysis.html2
4 files changed, 103 insertions, 179 deletions
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 7805df06..25665c55 100755
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -37,6 +37,7 @@ class webqtlCaseData(object):
         self.variance = variance            # Trait Variance
         self.num_cases = num_cases          # Number of individuals/cases
         self.this_id = None   # Set a sane default (can't be just "id" cause that's a reserved word)
+        self.outlier = None   # Not set to True/False until later
 
     def __repr__(self):
         str = ""
@@ -49,6 +50,14 @@ class webqtlCaseData(object):
         return str
     
     @property
+    def class_outlier(self):
+        """Template helper"""
+        if self.outlier:
+            return "outlier"
+        else:
+            return ""
+    
+    @property
     def display_value(self):
         if self.value:
             return "%2.3f" % self.value
@@ -63,20 +72,3 @@ class webqtlCaseData(object):
             return "x"
         
         
-              #try:
-                #    traitVar = thisvar
-                #    dispVar = "%2.3f" % thisvar
-                #except:
-                #    traitVar = ''
-                #    dispVar = 'x'
-        
-        #try:
-        #    traitVal = thisval
-        #    dispVal = "%2.3f" % thisval
-        #except:
-        #    traitVal = ''
-        #    dispVal = 'x'
-
-
-    #def this_val_full(self):
-    #    strain_name = 
\ No newline at end of file
diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py
index 086f3d57..51a57a6d 100755
--- a/wqflask/utility/Plot.py
+++ b/wqflask/utility/Plot.py
@@ -25,6 +25,13 @@
 # Last updated by GeneNetwork Core Team 2010/10/20
 
 #import piddle as pid
+
+from __future__ import print_function
+
+from pprint import pformat as pf
+
+print("Lysol")
+
 from math import *
 import random
 import sys, os
@@ -32,6 +39,9 @@ from numarray import linear_algebra as la
 from numarray import ones, array, dot, swapaxes
 
 import reaper
+sys.path.append("..")
+print(sys.path)
+from basicStatistics import corestats
 
 import svg
 import webqtlUtil
@@ -254,6 +264,7 @@ def gmedian(lst2):
             return lst[(N-1)/2]
 
 def gpercentile(lst2, np):
+    """Obsolete - use percentile in corestats instead"""
     lst = lst2[:]
     N = len(lst)
     if N == 0 or np > 100 or np < 0:
@@ -270,61 +281,41 @@ def gpercentile(lst2, np):
         else:
             return lst[k-1] + d*(lst[k] - lst[k-1])
 
-def findOutliers(vals):
-
-    valsOnly = []
-    dataXZ = vals[:]
-    for i in range(len(dataXZ)):
-        valsOnly.append(dataXZ[i][1])
-
-    data = [('', valsOnly[:])]
-
-    for item in data:
-        itemvalue = item[1]
-        nValue = len(itemvalue)
-        catValue = []
-
-        for item2 in itemvalue:
-            try:
-                tstrain, tvalue = item2
-            except:
-                tvalue = item2
-            if nValue <= 4:
-                continue
-            else:
-                catValue.append(tvalue)
-
-        if catValue != []:
-            lowHinge = gpercentile(catValue, 25)
-            upHinge = gpercentile(catValue, 75)
-            Hstep = 1.5*(upHinge - lowHinge)
+def find_outliers(vals):
+    """Calculates the upper and lower bounds of a set of sample/case values
+    
+    
+    >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537])
+    (11.252500000000001, 0.5364999999999993)
+    
+    >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8])
+    (32.0, -8.0)
+
+    If there are no vals, returns None for the upper and lower bounds,
+    which code that calls it will have to deal with.
+    >>> find_outliers([])
+    (None, None)
+    
+    """
 
-            outlier = []
-            extreme = []
+    print("xerxes vals is:", pf(vals))
 
-            upperBound = upHinge + Hstep
-            lowerBound = lowHinge - Hstep
+    if vals:
+        #print("vals is:", pf(vals))
+        stats = corestats.Stats(vals)
+        low_hinge = stats.percentile(25)
+        up_hinge = stats.percentile(75)
+        hstep = 1.5 * (up_hinge - low_hinge)
 
-            for item in catValue:
-                if item >= upHinge + 2*Hstep:
-                    extreme.append(item)
-                elif item >= upHinge + Hstep:
-                    outlier.append(item)
-                else:
-                    pass
+        upper_bound = up_hinge + hstep
+        lower_bound = low_hinge - hstep
 
-            for item in catValue:
-                if item <= lowHinge - 2*Hstep:
-                    extreme.append(item)
-                elif item <= lowHinge - Hstep:
-                    outlier.append(item)
-                else:
-                    pass
-        else:
-            upperBound = 1000
-            lowerBound = -1000
+    else:
+        upper_bound = None
+        lower_bound = None
 
-    return upperBound, lowerBound
+    print(pf(locals()))
+    return upper_bound, lower_bound
 
 
 def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"):
@@ -1281,3 +1272,12 @@ def BWSpectrum(n=100):
         out.append(pid.Color(x,x,x));
         x += step
     return out
+
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+
+if __name__=="__main__":
+    _test()
diff --git a/wqflask/wqflask/show_trait/DataEditingPage.py b/wqflask/wqflask/show_trait/DataEditingPage.py
index 43f05f14..bb6156c3 100755
--- a/wqflask/wqflask/show_trait/DataEditingPage.py
+++ b/wqflask/wqflask/show_trait/DataEditingPage.py
@@ -1,5 +1,7 @@
 from __future__ import absolute_import, print_function, division
 
+print("Google")
+
 import string
 import os
 import cPickle
@@ -1673,7 +1675,7 @@ class DataEditingPage(templatePage):
         #showHideMenuOptions.append(HT.Bold("&nbsp;&nbsp;Options:"), "&nbsp;"*5, showHideNoValue, "&nbsp;"*5, showHideOutliers, "&nbsp;"*5, resetButton, "&nbsp;"*5, exportButton)
 
         #traitTableOptions.append(showHideMenuOptions,HT.BR(),HT.BR())
-        #traitTableOptions.append(HT.Span("&nbsp;&nbsp;Outliers highlighted in ", HT.Bold("&nbsp;yellow&nbsp;", style="background-color:yellow;"), " can be hidden using the ",
+        #traitTableOptions.append(HT.Span("&nbsp;&nbsp;Outliers highlighted in ", HT.Bold("&nbsp;red&nbsp;", style="background-color:red;"), " can be hidden using the ",
         #                                                    HT.Strong(" Hide Outliers "), " button,",HT.BR(),"&nbsp;&nbsp;and samples with no value (x) can be hidden by clicking ",
         #                                                    HT.Strong(" Hide No Value "), "."), HT.BR())
 
@@ -1703,7 +1705,7 @@ class DataEditingPage(templatePage):
 
         primary_strainlist = fd.parlist + allstrainlist_neworder
 
-        primary_strains = self.addTrait2Table(fd=fd,
+        primary_strains = self.create_strain_objects(fd=fd,
                                               varianceDataPage=varianceDataPage,
                                               strainlist=primary_strainlist,
                                               mainForm=mainForm,
@@ -1712,6 +1714,7 @@ class DataEditingPage(templatePage):
                                               attribute_ids=attribute_ids,
                                               attribute_names=attribute_names,
                                               strains='primary')
+        
 
         other_strains = []
         for strain in thisTrait.data.keys():
@@ -1727,7 +1730,7 @@ class DataEditingPage(templatePage):
             other_strains.sort() #Sort other strains
             other_strains = par_f1_strains + other_strains
 
-            other_strains = self.addTrait2Table(fd=fd,
+            other_strains = self.create_strain_objects(fd=fd,
                                                 varianceDataPage=varianceDataPage,
                                                 strainlist=other_strains,
                                                 mainForm=mainForm,
@@ -1736,6 +1739,7 @@ class DataEditingPage(templatePage):
                                                 attribute_names=attribute_names,
                                                 strains='other')
 
+
         #TODO: Figure out why this if statement is written this way - Zach
         if (other_strains or (fd.f1list and thisTrait.data.has_key(fd.f1list[0])) 
                 or (fd.f1list and thisTrait.data.has_key(fd.f1list[1]))):
@@ -1747,7 +1751,7 @@ class DataEditingPage(templatePage):
         self.other_strains = other_strains
 
 
-    def addTrait2Table(self, fd, varianceDataPage, strainlist, mainForm, thisTrait,
+    def create_strain_objects(self, fd, varianceDataPage, strainlist, mainForm, thisTrait,
                        other_strainsExist=None, attribute_ids=None,
                        attribute_names=None, strains='primary'):
 
@@ -1759,23 +1763,26 @@ class DataEditingPage(templatePage):
 
         #XZ, Aug 23, 2010: I commented the code related to the display of animal case
         #strainInfo = thisTrait.has_key('strainInfo') and thisTrait.strainInfo
-        print("in addTrait2Table")
-        table_body = []
-        vals = []
-
-
-        #################### Only used to find upperBound and lowerBound
+        print("in create_strain_objects")
+        #table_body = []
+        
+        ################### Only used to find upperBound and lowerBound
+        #vals = []
         #for strainNameOrig in strainlist:
         #    strainName = strainNameOrig.replace("_2nd_", "")
         #    print("pen: %s - %s" % (strainNameOrig, strainName))
-        #    thisval = thisTrait.data[strainName].value
-        #    thisvar = thisTrait.data[strainName].variance
-        #    thisValFull = [strainName, thisval, thisvar]
-        #
-        #    vals.append(thisValFull)
+        #    try:
+        #        thisval = thisTrait.data[strainName].value
+        #        thisvar = thisTrait.data[strainName].variance
+        #        thisValFull = [strainName, thisval, thisvar]
+        #    
+        #        vals.append(thisValFull)
+        #    except KeyError:
+        #        print("**x** Skipping:", strainName)
         #
         #upperBound, lowerBound = Plot.findOutliers(vals) # ZS: Values greater than upperBound or less than lowerBound are considered outliers.
 
+
         the_strains = []
 
         for counter, strainNameOrig in enumerate(strainlist, 1):
@@ -1790,107 +1797,15 @@ class DataEditingPage(templatePage):
                 print("No strain %s, let's create it now" % strainName)
                 strain = webqtlCaseData.webqtlCaseData(strainName)
             print("zyt - strainNameOrig:", strainNameOrig)
-            #trId = strainNameOrig
-            #selectCheck = HT.Input(type="checkbox", name="selectCheck", value=trId, Class="checkbox", onClick="highlight(this)")
-
-            
-            #try:
-            #    thisval, thisvar, thisNP = thisTrait.data[strainName].value, thisTrait.data[strainName].var, thisTrait.data[strainName].N
-            #    if thisNP:
-            #        mainForm.append(HT.Input(name='N'+strainName, value=thisNP, type='hidden'))
-            #    else:
-            #        pass
-            #except:
-            #    thisval = thisvar = 'x'
-
-            #thisval = thisTrait.data[strainName].value
-            #thisvar = thisTrait.data[strainName].variance
-            #thisTrait.data[strainName].num_cases
-
-            #strain['strain_name'] = strainName
-            #strainNameDisp = HT.Span(strainName, Class='fs14 fwn ffl')
-
-            #if varianceDataPage:
-                #try:
-                #    traitVar = thisvar
-                #    dispVar = "%2.3f" % thisvar
-                #except:
-                #    traitVar = ''
-                #    dispVar = 'x'
-
-            #if thisval == 'x':
-            #    traitVar = '' #ZS: Used to be 0, but it doesn't seem like a good idea for values of 0 to *always* be at the bottom when you sort; it makes more sense to put "nothing"
-            #
-            #    #className = 'fs13 b1 c222 '
-            #    #valueClassName = 'fs13 b1 c222 valueField '
-            #    #rowClassName = 'novalue '
-            #else:
-            #    if (thisval >= upperBound) or (thisval <= lowerBound):
-            #        strain['outlier'] = "outlier"  # We're going to use this as a class, so we want it to be a word
-            #        #className = 'fs13 b1 c222 outlier '
-            #        #valueClassName = 'fs13 b1 c222 valueField '
-            #        #rowClassName = 'outlier'
-            #    else:
-            #        strain['outlier'] = "not_outlier"
-            #        #className = 'fs13 b1 c222 '
-            #        #valueClassName = 'fs13 b1 c222 valueField '
-            #        #rowClassName = ' '
-            #
-            #if varianceDataPage:
-            #    varClassName = valueClassName + str(traitVar)
-            #valueClassName += str(traitVal)
-
-            #if strainNameOrig == strainName:
-            #    if other_strainsExist and strainNameOrig in (fd.parlist + fd.f1list):
-            #        ########################################################################################################################################################
-            #          # ZS: Append value and variance to the value and variance input fields' list of classes; this is so the javascript can update the value when the user
-            #          # changes it. The updated value is then used when the table is sorted (tablesorter.js). This needs to be done because the "value" attribute is immutable.
-            #        #########################################################################################################################################################
-            #
-            #        #valueField = HT.Input(name=strainNameOrig, size=8, maxlength=8, style="text-align:right; background-color:#FFFFFF;", value=dispVal,
-            #        #        onChange= "javascript:this.form['_2nd_%s'].value=this.form['%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=valueClassName)
-            #        if varianceDataPage:
-            #            pass
-            #            #seField = HT.Input(name='V'+strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVar,
-            #            #        onChange= "javascript:this.form['V_2nd_%s'].value=this.form['V%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=varClassName)
-            #    else:
-            #        pass
-            #        #valueField = HT.Input(name=strainNameOrig, size=8, maxlength=8, style="text-align:right; background-color:#FFFFFF;", value=dispVal, Class=valueClassName)
-            #        if varianceDataPage:
-            #            pass
-            #            #seField = HT.Input(name='V'+strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVar, Class=varClassName)
-            #else:
-            #    pass
-            #    #valueField = HT.Input(name=strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVal,
-            #                          #onChange= "javascript:this.form['%s'].value=this.form['%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=valueClassName)
-            #    if varianceDataPage:
-            #        pass
-            #        #seField = HT.Input(name='V'+strainNameOrig, size=8, maxlength=8, style="text-align:right", value=dispVar,
-            #        #        onChange= "javascript:this.form['V%s'].value=this.form['V%s'].value;" % (strainNameOrig.replace("/", ""), strainNameOrig.replace("/", "")), Class=varClassName)
+          
 
             if strains == 'primary':
                 strain.this_id = "Primary_" + str(counter)
-                #table_row = HT.TR(Id="Primary_"+str(i+1), Class=rowClassName)
             else:
                 strain.this_id = "Other_" + str(counter)
-                #table_row = HT.TR(Id="Other_"+str(i+1), Class=rowClassName)
-
-            #strain['value'] = traitVal
-            #
-            #strain['se'] = dispVar
-            #if varianceDataPage:
-                #table_row.append(HT.TD(str(i+1), selectCheck, width=45, align='right', Class=className))
-                #table_row.append(HT.TD(strainNameDisp, strainNameAdd, align='right', width=100, Class=className))
-                #table_row.append(HT.TD(valueField, width=70, align='right', Id="value_"+str(i)+"_"+strains, Class=className))
-                #table_row.append(HT.TD("&plusmn;", width=20, align='center', Class=className))
-                #table_row.append(HT.TD(seField, width=80, align='right', Id="SE_"+str(i)+"_"+strains, Class=className))
-                #pass
-            #else:
-                #table_row.append(HT.TD(str(i+1), selectCheck, width=45, align='right', Class=className))
-                #table_row.append(HT.TD(strainNameDisp, strainNameAdd, align='right', width=100, Class=className))
-                #table_row.append(HT.TD(valueField, width=70, align='right', Id="value_"+str(i)+"_"+strains, Class=className))
-                #pass
-            if thisTrait and thisTrait.db and thisTrait.db.type =='ProbeSet':
+                
+            #### For extra attribute columns; currently only used by two human datasets - Zach
+            if thisTrait and thisTrait.db and thisTrait.db.type == 'ProbeSet':
                 if len(attribute_ids) > 0:
 
                     #ZS: Get StrainId value for the next query
@@ -1929,6 +1844,8 @@ class DataEditingPage(templatePage):
                         attr_counter += 1
                 the_strains.append(strain)
             #table_body.append(table_row)
+        
+        do_outliers(the_strains)
         print("*the_strains are [%i]: %s" % (len(the_strains), pf(the_strains)))
         return the_strains
 
@@ -1975,3 +1892,18 @@ class DataEditingPage(templatePage):
         sortby = ("", "")
 
         return sortby
+    
+    
+    
+def do_outliers(strain_objects):
+    values = [strain.value for strain in strain_objects if strain.value != None]
+    upper_bound, lower_bound = Plot.find_outliers(values)
+    
+    for strain in strain_objects:
+        if strain.value:
+            if upper_bound and strain.value > upper_bound:
+                strain.outlier = True
+            elif lower_bound and strain.value < lower_bound:
+                strain.outlier = True
+            else:
+                strain.outlier = False
diff --git a/wqflask/wqflask/templates/trait_data_and_analysis.html b/wqflask/wqflask/templates/trait_data_and_analysis.html
index 94ba0aad..3644b436 100644
--- a/wqflask/wqflask/templates/trait_data_and_analysis.html
+++ b/wqflask/wqflask/templates/trait_data_and_analysis.html
@@ -3076,7 +3076,7 @@
                             </tr>
 
 							{% for strain in strain_type %}
-                            <tr class="{{ strain.outlier }} value_se" id="{{ strain.this_id }}">
+                            <tr class="{{ strain.class_outlier }} value_se" id="{{ strain.this_id }}">
                               <td class="fs13 b1 c222" align="right" width="45">
 								{{ loop.index }}
 								<input type="checkbox" name="selectCheck" class="checkbox edit_strain_checkbox" value="{{ strain.name }}" checked="checked">