From d9ea8744e04d891d1d13c710e24a0a7c85127140 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 9 Nov 2021 10:06:01 +0300
Subject: Add new data processing utility

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/data_helpers.py: New function (`partition_by`)
* tests/unit/test_data_helpers.py: Tests for new function

  Add a function that approximates Clojure's `partition-by` function, to help
  with processing the data in a more functional way.
---
 gn3/data_helpers.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'gn3')

diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index d3f942b..b72fbc5 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
         in reduce(
             __compute_start_stop__, iterations, tuple())])
 
+def partition_by(partition_fn, items):
+    """
+    Given a sequence `items`, return a tuple of tuples, each of which contain
+    the values in `items` partitioned such that the first item in each internal
+    tuple, when passed to `partition_function` returns True.
+
+    This is an approximation of Clojure's `partition-by` function.
+    """
+    def __partitioner__(accumulator, item):
+        if partition_fn(item):
+            return accumulator + ((item,),)
+        return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+    return reduce(__partitioner__, items, tuple())
+
 def parse_csv_line(
         line: str, delimiter: str = ",",
         quoting: Optional[str] = '"') -> Tuple[str, ...]:
-- 
cgit 1.4.1