aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-09 10:06:01 +0300
committerFrederick Muriuki Muriithi2021-11-09 10:06:01 +0300
commitd9ea8744e04d891d1d13c710e24a0a7c85127140 (patch)
tree9168713bdc58708378e4802fff09af78666729b5
parent9b590d894f1e68ca5d7d00cb6d268f7fb6e6730c (diff)
downloadgenenetwork3-d9ea8744e04d891d1d13c710e24a0a7c85127140.tar.gz
Add new data processing utility
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: New function (`partition_by`) * tests/unit/test_data_helpers.py: Tests for new function Add a function that approximates Clojure's `partition-by` function, to help with processing the data in a more functional way.
-rw-r--r--gn3/data_helpers.py15
-rw-r--r--tests/unit/test_data_helpers.py15
2 files changed, 29 insertions, 1 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index d3f942b..b72fbc5 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
in reduce(
__compute_start_stop__, iterations, tuple())])
+def partition_by(partition_fn, items):
+ """
+ Given a sequence `items`, return a tuple of tuples, each of which contain
+ the values in `items` partitioned such that the first item in each internal
+ tuple, when passed to `partition_function` returns True.
+
+ This is an approximation of Clojure's `partition-by` function.
+ """
+ def __partitioner__(accumulator, item):
+ if partition_fn(item):
+ return accumulator + ((item,),)
+ return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+ return reduce(__partitioner__, items, tuple())
+
def parse_csv_line(
line: str, delimiter: str = ",",
quoting: Optional[str] = '"') -> Tuple[str, ...]:
diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py
index 39aea45..3f76344 100644
--- a/tests/unit/test_data_helpers.py
+++ b/tests/unit/test_data_helpers.py
@@ -4,7 +4,7 @@ Test functions in gn3.data_helpers
from unittest import TestCase
-from gn3.data_helpers import partition_all, parse_csv_line
+from gn3.data_helpers import partition_by, partition_all, parse_csv_line
class TestDataHelpers(TestCase):
"""
@@ -59,3 +59,16 @@ class TestDataHelpers(TestCase):
parse_csv_line(
line=line, delimiter=delimiter, quoting=quoting),
expected)
+
+ def test_partition_by(self):
+ for part_fn, items, expected in (
+ (lambda s: s.startswith("----"),
+ ("------", "a", "b", "-----", "c", "----", "d", "e", "---",
+ "f"),
+ (("------", "a", "b"), ("-----", "c"),
+ ("----", "d", "e", "---", "f"))),
+ (lambda x: (x % 2) == 0,
+ (0, 1, 3, 2, 4, 5, 7, 6, 9, 1),
+ ((0, 1, 3), (2,), (4, 5, 7), (6, 9, 1))),):
+ with self.subTest(items=items):
+ self.assertEqual(partition_by(part_fn, items), expected)