aboutsummaryrefslogtreecommitdiff
path: root/gn3/data_helpers.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-09 10:06:01 +0300
committerFrederick Muriuki Muriithi2021-11-09 10:06:01 +0300
commitd9ea8744e04d891d1d13c710e24a0a7c85127140 (patch)
tree9168713bdc58708378e4802fff09af78666729b5 /gn3/data_helpers.py
parent9b590d894f1e68ca5d7d00cb6d268f7fb6e6730c (diff)
downloadgenenetwork3-d9ea8744e04d891d1d13c710e24a0a7c85127140.tar.gz
Add new data processing utility
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: New function (`partition_by`) * tests/unit/test_data_helpers.py: Tests for new function Add a function that approximates Clojure's `partition-by` function, to help with processing the data in a more functional way.
Diffstat (limited to 'gn3/data_helpers.py')
-rw-r--r--gn3/data_helpers.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index d3f942b..b72fbc5 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
in reduce(
__compute_start_stop__, iterations, tuple())])
+def partition_by(partition_fn, items):
+ """
+ Given a sequence `items`, return a tuple of tuples, each of which contain
+ the values in `items` partitioned such that the first item in each internal
+ tuple, when passed to `partition_function` returns True.
+
+ This is an approximation of Clojure's `partition-by` function.
+ """
+ def __partitioner__(accumulator, item):
+ if partition_fn(item):
+ return accumulator + ((item,),)
+ return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+ return reduce(__partitioner__, items, tuple())
+
def parse_csv_line(
line: str, delimiter: str = ",",
quoting: Optional[str] = '"') -> Tuple[str, ...]: