about summary refs log tree commit diff
path: root/gn2/utility/corestats.py
diff options
context:
space:
mode:
authorArun Isaac2023-12-29 18:55:37 +0000
committerArun Isaac2023-12-29 19:01:46 +0000
commit204a308be0f741726b9a620d88fbc22b22124c81 (patch)
treeb3cf66906674020b530c844c2bb4982c8a0e2d39 /gn2/utility/corestats.py
parent83062c75442160427b50420161bfcae2c5c34c84 (diff)
downloadgenenetwork2-204a308be0f741726b9a620d88fbc22b22124c81.tar.gz
Namespace all modules under gn2.
We move all modules under a gn2 directory. This is important for
"correct" packaging and deployment as a Guix service.
Diffstat (limited to 'gn2/utility/corestats.py')
-rw-r--r--gn2/utility/corestats.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/gn2/utility/corestats.py b/gn2/utility/corestats.py
new file mode 100644
index 00000000..da0a21db
--- /dev/null
+++ b/gn2/utility/corestats.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+#  corestats.py (COREy STATS)
+#  Copyright (c) 2006-2007, Corey Goldberg (corey@goldb.org)
+#
+#    statistical calculation class
+#    for processing numeric sequences
+#
+#  license: GNU LGPL
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2.1 of the License, or (at your option) any later version.
+
+import sys
+
+# ZS: Should switch to using some third party library for this; maybe scipy has an equivalent
+
+
+class Stats:
+
+    def __init__(self, sequence):
+        # sequence of numbers we will process
+        # convert all items to floats for numerical processing
+        self.sequence = [float(item) for item in sequence]
+
+    def sum(self):
+        if len(self.sequence) < 1:
+            return None
+        else:
+            return sum(self.sequence)
+
+    def count(self):
+        return len(self.sequence)
+
+    def min(self):
+        if len(self.sequence) < 1:
+            return None
+        else:
+            return min(self.sequence)
+
+    def max(self):
+        if len(self.sequence) < 1:
+            return None
+        else:
+            return max(self.sequence)
+
+    def avg(self):
+        if len(self.sequence) < 1:
+            return None
+        else:
+            return sum(self.sequence) / len(self.sequence)
+
+    def stdev(self):
+        if len(self.sequence) < 1:
+            return None
+        else:
+            avg = self.avg()
+            sdsq = sum([(i - avg) ** 2 for i in self.sequence])
+            stdev = (sdsq / (len(self.sequence) - 1)) ** .5
+            return stdev
+
+    def percentile(self, percentile):
+        if len(self.sequence) < 1:
+            value = None
+        elif (percentile >= 100):
+            sys.stderr.write(
+                'ERROR: percentile must be < 100.  you supplied: %s\n' % percentile)
+            value = None
+        else:
+            element_idx = int(len(self.sequence) * (percentile / 100.0))
+            self.sequence.sort()
+            value = self.sequence[element_idx]
+        return value
+
+# Sample script using this class:
+# -------------------------------------------
+#    #!/usr/bin/env python
+#    import corestats
+#
+#    sequence = [1, 2.5, 7, 13.4, 8.0]
+#    stats = corestats.Stats(sequence)
+#    print stats.avg()
+#    print stats.percentile(90)
+# -------------------------------------------