aboutsummaryrefslogtreecommitdiff
path: root/wqflask/utility
diff options
context:
space:
mode:
authorZachary Sloan2012-08-29 17:49:43 -0500
committerZachary Sloan2012-08-29 17:49:43 -0500
commitbe095620bc8126026514fdee43e06a9a9f443f97 (patch)
treee4b57f292ef3da812b1ec11d1c5f676083a8d853 /wqflask/utility
parent4cc46d84810ca9492e9a38e1a1f88ab36d214791 (diff)
downloadgenenetwork2-be095620bc8126026514fdee43e06a9a9f443f97.tar.gz
Fixed so that outliers are now correctly highlighted
Diffstat (limited to 'wqflask/utility')
-rwxr-xr-xwqflask/utility/Plot.py100
1 files changed, 50 insertions, 50 deletions
diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py
index 086f3d57..51a57a6d 100755
--- a/wqflask/utility/Plot.py
+++ b/wqflask/utility/Plot.py
@@ -25,6 +25,13 @@
# Last updated by GeneNetwork Core Team 2010/10/20
#import piddle as pid
+
+from __future__ import print_function
+
+from pprint import pformat as pf
+
+print("Lysol")
+
from math import *
import random
import sys, os
@@ -32,6 +39,9 @@ from numarray import linear_algebra as la
from numarray import ones, array, dot, swapaxes
import reaper
+sys.path.append("..")
+print(sys.path)
+from basicStatistics import corestats
import svg
import webqtlUtil
@@ -254,6 +264,7 @@ def gmedian(lst2):
return lst[(N-1)/2]
def gpercentile(lst2, np):
+ """Obsolete - use percentile in corestats instead"""
lst = lst2[:]
N = len(lst)
if N == 0 or np > 100 or np < 0:
@@ -270,61 +281,41 @@ def gpercentile(lst2, np):
else:
return lst[k-1] + d*(lst[k] - lst[k-1])
-def findOutliers(vals):
-
- valsOnly = []
- dataXZ = vals[:]
- for i in range(len(dataXZ)):
- valsOnly.append(dataXZ[i][1])
-
- data = [('', valsOnly[:])]
-
- for item in data:
- itemvalue = item[1]
- nValue = len(itemvalue)
- catValue = []
-
- for item2 in itemvalue:
- try:
- tstrain, tvalue = item2
- except:
- tvalue = item2
- if nValue <= 4:
- continue
- else:
- catValue.append(tvalue)
-
- if catValue != []:
- lowHinge = gpercentile(catValue, 25)
- upHinge = gpercentile(catValue, 75)
- Hstep = 1.5*(upHinge - lowHinge)
+def find_outliers(vals):
+ """Calculates the upper and lower bounds of a set of sample/case values
+
+
+ >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537])
+ (11.252500000000001, 0.5364999999999993)
+
+ >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8])
+ (32.0, -8.0)
+
+ If there are no vals, returns None for the upper and lower bounds,
+ which code that calls it will have to deal with.
+ >>> find_outliers([])
+ (None, None)
+
+ """
- outlier = []
- extreme = []
+ print("xerxes vals is:", pf(vals))
- upperBound = upHinge + Hstep
- lowerBound = lowHinge - Hstep
+ if vals:
+ #print("vals is:", pf(vals))
+ stats = corestats.Stats(vals)
+ low_hinge = stats.percentile(25)
+ up_hinge = stats.percentile(75)
+ hstep = 1.5 * (up_hinge - low_hinge)
- for item in catValue:
- if item >= upHinge + 2*Hstep:
- extreme.append(item)
- elif item >= upHinge + Hstep:
- outlier.append(item)
- else:
- pass
+ upper_bound = up_hinge + hstep
+ lower_bound = low_hinge - hstep
- for item in catValue:
- if item <= lowHinge - 2*Hstep:
- extreme.append(item)
- elif item <= lowHinge - Hstep:
- outlier.append(item)
- else:
- pass
- else:
- upperBound = 1000
- lowerBound = -1000
+ else:
+ upper_bound = None
+ lower_bound = None
- return upperBound, lowerBound
+ print(pf(locals()))
+ return upper_bound, lower_bound
def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"):
@@ -1281,3 +1272,12 @@ def BWSpectrum(n=100):
out.append(pid.Color(x,x,x));
x += step
return out
+
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+
+if __name__=="__main__":
+ _test()