diff options
Diffstat (limited to 'gn2/utility/corestats.py')
-rw-r--r-- | gn2/utility/corestats.py | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/gn2/utility/corestats.py b/gn2/utility/corestats.py new file mode 100644 index 00000000..da0a21db --- /dev/null +++ b/gn2/utility/corestats.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +# corestats.py (COREy STATS) +# Copyright (c) 2006-2007, Corey Goldberg (corey@goldb.org) +# +# statistical calculation class +# for processing numeric sequences +# +# license: GNU LGPL +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +import sys + +# ZS: Should switch to using some third party library for this; maybe scipy has an equivalent + + +class Stats: + + def __init__(self, sequence): + # sequence of numbers we will process + # convert all items to floats for numerical processing + self.sequence = [float(item) for item in sequence] + + def sum(self): + if len(self.sequence) < 1: + return None + else: + return sum(self.sequence) + + def count(self): + return len(self.sequence) + + def min(self): + if len(self.sequence) < 1: + return None + else: + return min(self.sequence) + + def max(self): + if len(self.sequence) < 1: + return None + else: + return max(self.sequence) + + def avg(self): + if len(self.sequence) < 1: + return None + else: + return sum(self.sequence) / len(self.sequence) + + def stdev(self): + if len(self.sequence) < 1: + return None + else: + avg = self.avg() + sdsq = sum([(i - avg) ** 2 for i in self.sequence]) + stdev = (sdsq / (len(self.sequence) - 1)) ** .5 + return stdev + + def percentile(self, percentile): + if len(self.sequence) < 1: + value = None + elif (percentile >= 100): + sys.stderr.write( + 'ERROR: percentile must be < 100. you supplied: %s\n' % percentile) + value = None + else: + element_idx = int(len(self.sequence) * (percentile / 100.0)) + self.sequence.sort() + value = self.sequence[element_idx] + return value + +# Sample script using this class: +# ------------------------------------------- +# #!/usr/bin/env python +# import corestats +# +# sequence = [1, 2.5, 7, 13.4, 8.0] +# stats = corestats.Stats(sequence) +# print stats.avg() +# print stats.percentile(90) +# ------------------------------------------- |