From d9ea8744e04d891d1d13c710e24a0a7c85127140 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 9 Nov 2021 10:06:01 +0300 Subject: Add new data processing utility Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: New function (`partition_by`) * tests/unit/test_data_helpers.py: Tests for new function Add a function that approximates Clojure's `partition-by` function, to help with processing the data in a more functional way. --- gn3/data_helpers.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'gn3') diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index d3f942b..b72fbc5 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] in reduce( __compute_start_stop__, iterations, tuple())]) +def partition_by(partition_fn, items): + """ + Given a sequence `items`, return a tuple of tuples, each of which contain + the values in `items` partitioned such that the first item in each internal + tuple, when passed to `partition_function` returns True. + + This is an approximation of Clojure's `partition-by` function. + """ + def __partitioner__(accumulator, item): + if partition_fn(item): + return accumulator + ((item,),) + return accumulator[:-1] + (accumulator[-1] + (item,),) + + return reduce(__partitioner__, items, tuple()) + def parse_csv_line( line: str, delimiter: str = ",", quoting: Optional[str] = '"') -> Tuple[str, ...]: -- cgit 1.4.1