about summary refs log tree commit diff
path: root/wqflask/utility
diff options
context:
space:
mode:
authorZachary Sloan2012-08-29 17:49:43 -0500
committerZachary Sloan2012-08-29 17:49:43 -0500
commitbe095620bc8126026514fdee43e06a9a9f443f97 (patch)
treee4b57f292ef3da812b1ec11d1c5f676083a8d853 /wqflask/utility
parent4cc46d84810ca9492e9a38e1a1f88ab36d214791 (diff)
downloadgenenetwork2-be095620bc8126026514fdee43e06a9a9f443f97.tar.gz
Fixed so that outliers are now correctly highlighted
Diffstat (limited to 'wqflask/utility')
-rwxr-xr-xwqflask/utility/Plot.py100
1 files changed, 50 insertions, 50 deletions
diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py
index 086f3d57..51a57a6d 100755
--- a/wqflask/utility/Plot.py
+++ b/wqflask/utility/Plot.py
@@ -25,6 +25,13 @@
 # Last updated by GeneNetwork Core Team 2010/10/20
 
 #import piddle as pid
+
+from __future__ import print_function
+
+from pprint import pformat as pf
+
+print("Lysol")
+
 from math import *
 import random
 import sys, os
@@ -32,6 +39,9 @@ from numarray import linear_algebra as la
 from numarray import ones, array, dot, swapaxes
 
 import reaper
+sys.path.append("..")
+print(sys.path)
+from basicStatistics import corestats
 
 import svg
 import webqtlUtil
@@ -254,6 +264,7 @@ def gmedian(lst2):
             return lst[(N-1)/2]
 
 def gpercentile(lst2, np):
+    """Obsolete - use percentile in corestats instead"""
     lst = lst2[:]
     N = len(lst)
     if N == 0 or np > 100 or np < 0:
@@ -270,61 +281,41 @@ def gpercentile(lst2, np):
         else:
             return lst[k-1] + d*(lst[k] - lst[k-1])
 
-def findOutliers(vals):
-
-    valsOnly = []
-    dataXZ = vals[:]
-    for i in range(len(dataXZ)):
-        valsOnly.append(dataXZ[i][1])
-
-    data = [('', valsOnly[:])]
-
-    for item in data:
-        itemvalue = item[1]
-        nValue = len(itemvalue)
-        catValue = []
-
-        for item2 in itemvalue:
-            try:
-                tstrain, tvalue = item2
-            except:
-                tvalue = item2
-            if nValue <= 4:
-                continue
-            else:
-                catValue.append(tvalue)
-
-        if catValue != []:
-            lowHinge = gpercentile(catValue, 25)
-            upHinge = gpercentile(catValue, 75)
-            Hstep = 1.5*(upHinge - lowHinge)
+def find_outliers(vals):
+    """Calculates the upper and lower bounds of a set of sample/case values
+    
+    
+    >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537])
+    (11.252500000000001, 0.5364999999999993)
+    
+    >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8])
+    (32.0, -8.0)
+
+    If there are no vals, returns None for the upper and lower bounds,
+    which code that calls it will have to deal with.
+    >>> find_outliers([])
+    (None, None)
+    
+    """
 
-            outlier = []
-            extreme = []
+    print("xerxes vals is:", pf(vals))
 
-            upperBound = upHinge + Hstep
-            lowerBound = lowHinge - Hstep
+    if vals:
+        #print("vals is:", pf(vals))
+        stats = corestats.Stats(vals)
+        low_hinge = stats.percentile(25)
+        up_hinge = stats.percentile(75)
+        hstep = 1.5 * (up_hinge - low_hinge)
 
-            for item in catValue:
-                if item >= upHinge + 2*Hstep:
-                    extreme.append(item)
-                elif item >= upHinge + Hstep:
-                    outlier.append(item)
-                else:
-                    pass
+        upper_bound = up_hinge + hstep
+        lower_bound = low_hinge - hstep
 
-            for item in catValue:
-                if item <= lowHinge - 2*Hstep:
-                    extreme.append(item)
-                elif item <= lowHinge - Hstep:
-                    outlier.append(item)
-                else:
-                    pass
-        else:
-            upperBound = 1000
-            lowerBound = -1000
+    else:
+        upper_bound = None
+        lower_bound = None
 
-    return upperBound, lowerBound
+    print(pf(locals()))
+    return upper_bound, lower_bound
 
 
 def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"):
@@ -1281,3 +1272,12 @@ def BWSpectrum(n=100):
         out.append(pid.Color(x,x,x));
         x += step
     return out
+
+
+def _test():
+    import doctest
+    doctest.testmod()
+
+
+if __name__=="__main__":
+    _test()