about summary refs log tree commit diff
path: root/gn3/data_helpers.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/data_helpers.py')
-rw-r--r--gn3/data_helpers.py28
1 files changed, 21 insertions, 7 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index d3f942b..268a0bb 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -5,9 +5,9 @@ data structures.
 
 from math import ceil
 from functools import reduce
-from typing import Any, Tuple, Sequence, Optional
+from typing import Any, Tuple, Sequence, Optional, Generator
 
-def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]:
+def partition_all(num: int, items: Sequence[Any]) -> Generator:
     """
     Given a sequence `items`, return a new sequence of the same type as `items`
     with the data partitioned into sections of `n` items per partition.
@@ -19,10 +19,24 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
         return acc + ((start, start + num),)
 
     iterations = range(ceil(len(items) / num))
-    return tuple([# type: ignore[misc]
-        tuple(items[start:stop]) for start, stop # type: ignore[has-type]
-        in reduce(
-            __compute_start_stop__, iterations, tuple())])
+    for start, stop in reduce(# type: ignore[misc]
+            __compute_start_stop__, iterations, tuple()):
+        yield tuple(items[start:stop]) # type: ignore[has-type]
+
+def partition_by(partition_fn, items):
+    """
+    Given a sequence `items`, return a tuple of tuples, each of which contain
+    the values in `items` partitioned such that the first item in each internal
+    tuple, when passed to `partition_function` returns True.
+
+    This is an approximation of Clojure's `partition-by` function.
+    """
+    def __partitioner__(accumulator, item):
+        if partition_fn(item):
+            return accumulator + ((item,),)
+        return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+    return reduce(__partitioner__, items, tuple())
 
 def parse_csv_line(
         line: str, delimiter: str = ",",
@@ -34,4 +48,4 @@ def parse_csv_line(
     function in GeneNetwork1.
     """
     return tuple(
-        col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter))
+        col.strip(f"{quoting} \t\n") for col in line.split(delimiter))