diff options
author | Zachary Sloan | 2012-08-29 17:49:43 -0500 |
---|---|---|
committer | Zachary Sloan | 2012-08-29 17:49:43 -0500 |
commit | be095620bc8126026514fdee43e06a9a9f443f97 (patch) | |
tree | e4b57f292ef3da812b1ec11d1c5f676083a8d853 /wqflask/utility | |
parent | 4cc46d84810ca9492e9a38e1a1f88ab36d214791 (diff) | |
download | genenetwork2-be095620bc8126026514fdee43e06a9a9f443f97.tar.gz |
Fixed so that outliers are now correctly highlighted
Diffstat (limited to 'wqflask/utility')
-rwxr-xr-x | wqflask/utility/Plot.py | 100 |
1 files changed, 50 insertions, 50 deletions
diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index 086f3d57..51a57a6d 100755 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -25,6 +25,13 @@ # Last updated by GeneNetwork Core Team 2010/10/20 #import piddle as pid + +from __future__ import print_function + +from pprint import pformat as pf + +print("Lysol") + from math import * import random import sys, os @@ -32,6 +39,9 @@ from numarray import linear_algebra as la from numarray import ones, array, dot, swapaxes import reaper +sys.path.append("..") +print(sys.path) +from basicStatistics import corestats import svg import webqtlUtil @@ -254,6 +264,7 @@ def gmedian(lst2): return lst[(N-1)/2] def gpercentile(lst2, np): + """Obsolete - use percentile in corestats instead""" lst = lst2[:] N = len(lst) if N == 0 or np > 100 or np < 0: @@ -270,61 +281,41 @@ def gpercentile(lst2, np): else: return lst[k-1] + d*(lst[k] - lst[k-1]) -def findOutliers(vals): - - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - data = [('', valsOnly[:])] - - for item in data: - itemvalue = item[1] - nValue = len(itemvalue) - catValue = [] - - for item2 in itemvalue: - try: - tstrain, tvalue = item2 - except: - tvalue = item2 - if nValue <= 4: - continue - else: - catValue.append(tvalue) - - if catValue != []: - lowHinge = gpercentile(catValue, 25) - upHinge = gpercentile(catValue, 75) - Hstep = 1.5*(upHinge - lowHinge) +def find_outliers(vals): + """Calculates the upper and lower bounds of a set of sample/case values + + + >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537]) + (11.252500000000001, 0.5364999999999993) + + >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8]) + (32.0, -8.0) + + If there are no vals, returns None for the upper and lower bounds, + which code that calls it will have to deal with. + >>> find_outliers([]) + (None, None) + + """ - outlier = [] - extreme = [] + print("xerxes vals is:", pf(vals)) - upperBound = upHinge + Hstep - lowerBound = lowHinge - Hstep + if vals: + #print("vals is:", pf(vals)) + stats = corestats.Stats(vals) + low_hinge = stats.percentile(25) + up_hinge = stats.percentile(75) + hstep = 1.5 * (up_hinge - low_hinge) - for item in catValue: - if item >= upHinge + 2*Hstep: - extreme.append(item) - elif item >= upHinge + Hstep: - outlier.append(item) - else: - pass + upper_bound = up_hinge + hstep + lower_bound = low_hinge - hstep - for item in catValue: - if item <= lowHinge - 2*Hstep: - extreme.append(item) - elif item <= lowHinge - Hstep: - outlier.append(item) - else: - pass - else: - upperBound = 1000 - lowerBound = -1000 + else: + upper_bound = None + lower_bound = None - return upperBound, lowerBound + print(pf(locals())) + return upper_bound, lower_bound def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"): @@ -1281,3 +1272,12 @@ def BWSpectrum(n=100): out.append(pid.Color(x,x,x)); x += step return out + + +def _test(): + import doctest + doctest.testmod() + + +if __name__=="__main__": + _test() |