aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-10-25 19:12:24 +0300
committerFrederick Muriuki Muriithi2021-10-25 19:12:24 +0300
commit783f302c5d4729eb0b5fb6ba79180b7cd97764a5 (patch)
treea36567f82d76c3013bbb6021fa6d08eb09500201
parent5a472ebab04c68cd5228f253cc98d0ae22a520d7 (diff)
downloadgenenetwork3-783f302c5d4729eb0b5fb6ba79180b7cd97764a5.tar.gz
Implement `partition_all` function
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: new function (partition_all) * tests/unit/test_data_helpers.py: tests for function `gn3.data_helpers.partition_all` As part of migrating some functions that access the database, this commit extracts generic processes that can be accomplished on data, and implements the `partition_all` function, that is equivalent to Clojure's `partition-all` function.
-rw-r--r--gn3/data_helpers.py25
-rw-r--r--tests/unit/test_data_helpers.py37
2 files changed, 62 insertions, 0 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
new file mode 100644
index 0000000..f0d971e
--- /dev/null
+++ b/gn3/data_helpers.py
@@ -0,0 +1,25 @@
+"""
+This module will hold generic functions that can operate on a wide-array of
+data structures.
+"""
+
+from math import ceil
+from functools import reduce
+from typing import Any, Tuple, Sequence
+
+def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]:
+ """
+ Given a sequence `items`, return a new sequence of the same type as `items`
+ with the data partitioned into sections of `n` items per partition.
+
+ This is an approximation of clojure's `partition-all` function.
+ """
+ def __compute_start_stop__(acc, iteration):
+ start = iteration * num
+ return acc + ((start, start + num),)
+
+ iterations = range(ceil(len(items) / num))
+ return tuple([# type: ignore[misc]
+ tuple(items[start:stop]) for start, stop # type: ignore[has-type]
+ in reduce(
+ __compute_start_stop__, iterations, tuple())])
diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py
new file mode 100644
index 0000000..1eec3cc
--- /dev/null
+++ b/tests/unit/test_data_helpers.py
@@ -0,0 +1,37 @@
+"""
+Test functions in gn3.data_helpers
+"""
+
+from unittest import TestCase
+
+from gn3.data_helpers import partition_all
+
+class TestDataHelpers(TestCase):
+ """
+ Test functions in gn3.data_helpers
+ """
+
+ def test_partition_all(self):
+ """
+ Test that `gn3.data_helpers.partition_all` partitions sequences as expected.
+
+ Given:
+ - `num`: The number of items per partition
+ - `items`: A sequence of items
+ When:
+ - The arguments above are passed to the `gn3.data_helpers.partition_all`
+ Then:
+ - Return a new sequence with partitions, each of which has `num`
+ items in the same order as those in `items`, save for the last
+ partition which might have fewer items than `num`.
+ """
+ for count, items, expected in (
+ (1, [0, 1, 2, 3], ((0,), (1,), (2,), (3,))),
+ (3, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
+ ((0, 1, 2), (3, 4, 5), (6, 7, 8), (9, ))),
+ (4, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+ ((0, 1, 2, 3), (4, 5, 6, 7), (8, 9))),
+ (13, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+ ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), ))):
+ with self.subTest(n=count, items=items):
+ self.assertEqual(partition_all(count, items), expected)