about summary refs log tree commit diff
path: root/gn3/data_helpers.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-09 10:06:01 +0300
committerFrederick Muriuki Muriithi2021-11-09 10:06:01 +0300
commitd9ea8744e04d891d1d13c710e24a0a7c85127140 (patch)
tree9168713bdc58708378e4802fff09af78666729b5 /gn3/data_helpers.py
parent9b590d894f1e68ca5d7d00cb6d268f7fb6e6730c (diff)
downloadgenenetwork3-d9ea8744e04d891d1d13c710e24a0a7c85127140.tar.gz
Add new data processing utility
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/data_helpers.py: New function (`partition_by`)
* tests/unit/test_data_helpers.py: Tests for new function

  Add a function that approximates Clojure's `partition-by` function, to help
  with processing the data in a more functional way.
Diffstat (limited to 'gn3/data_helpers.py')
-rw-r--r--gn3/data_helpers.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index d3f942b..b72fbc5 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
         in reduce(
             __compute_start_stop__, iterations, tuple())])
 
+def partition_by(partition_fn, items):
+    """
+    Given a sequence `items`, return a tuple of tuples, each of which contain
+    the values in `items` partitioned such that the first item in each internal
+    tuple, when passed to `partition_function` returns True.
+
+    This is an approximation of Clojure's `partition-by` function.
+    """
+    def __partitioner__(accumulator, item):
+        if partition_fn(item):
+            return accumulator + ((item,),)
+        return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+    return reduce(__partitioner__, items, tuple())
+
 def parse_csv_line(
         line: str, delimiter: str = ",",
         quoting: Optional[str] = '"') -> Tuple[str, ...]: