From 783f302c5d4729eb0b5fb6ba79180b7cd97764a5 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 25 Oct 2021 19:12:24 +0300 Subject: Implement `partition_all` function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: new function (partition_all) * tests/unit/test_data_helpers.py: tests for function `gn3.data_helpers.partition_all` As part of migrating some functions that access the database, this commit extracts generic processes that can be accomplished on data, and implements the `partition_all` function, that is equivalent to Clojure's `partition-all` function. --- tests/unit/test_data_helpers.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/unit/test_data_helpers.py (limited to 'tests/unit/test_data_helpers.py') diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py new file mode 100644 index 0000000..1eec3cc --- /dev/null +++ b/tests/unit/test_data_helpers.py @@ -0,0 +1,37 @@ +""" +Test functions in gn3.data_helpers +""" + +from unittest import TestCase + +from gn3.data_helpers import partition_all + +class TestDataHelpers(TestCase): + """ + Test functions in gn3.data_helpers + """ + + def test_partition_all(self): + """ + Test that `gn3.data_helpers.partition_all` partitions sequences as expected. + + Given: + - `num`: The number of items per partition + - `items`: A sequence of items + When: + - The arguments above are passed to the `gn3.data_helpers.partition_all` + Then: + - Return a new sequence with partitions, each of which has `num` + items in the same order as those in `items`, save for the last + partition which might have fewer items than `num`. + """ + for count, items, expected in ( + (1, [0, 1, 2, 3], ((0,), (1,), (2,), (3,))), + (3, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), + ((0, 1, 2), (3, 4, 5), (6, 7, 8), (9, ))), + (4, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ((0, 1, 2, 3), (4, 5, 6, 7), (8, 9))), + (13, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), ))): + with self.subTest(n=count, items=items): + self.assertEqual(partition_all(count, items), expected) -- cgit v1.2.3 From ff23701bbfd90799f04fdf21c482f113bb408e34 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 07:09:26 +0300 Subject: Parse single line from CSV file Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: New function (parse_csv_line) * tests/unit/test_data_helpers.py: Add tests for new function (parse_csv_line) Add a function to parse a single line from a CSV file. --- gn3/data_helpers.py | 13 ++++++++++++- tests/unit/test_data_helpers.py | 26 +++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'tests/unit/test_data_helpers.py') diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index f0d971e..741a885 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -5,7 +5,7 @@ data structures. from math import ceil from functools import reduce -from typing import Any, Tuple, Sequence +from typing import Any, Tuple, Sequence, Optional def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]: """ @@ -23,3 +23,14 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] tuple(items[start:stop]) for start, stop # type: ignore[has-type] in reduce( __compute_start_stop__, iterations, tuple())]) + +def parse_csv_line( + line:str, delimiter: str = ",", quoting:Optional[str] = '"') -> Tuple[str, ...]: + """ + Parses a line from a CSV file into a tuple of strings. + + This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV` + function in GeneNetwork1. + """ + return tuple( + col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter)) diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py index 1eec3cc..39aea45 100644 --- a/tests/unit/test_data_helpers.py +++ b/tests/unit/test_data_helpers.py @@ -4,7 +4,7 @@ Test functions in gn3.data_helpers from unittest import TestCase -from gn3.data_helpers import partition_all +from gn3.data_helpers import partition_all, parse_csv_line class TestDataHelpers(TestCase): """ @@ -35,3 +35,27 @@ class TestDataHelpers(TestCase): ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), ))): with self.subTest(n=count, items=items): self.assertEqual(partition_all(count, items), expected) + + def test_parse_csv_line(self): + """ + Test parsing a single line from a CSV file + + Given: + - `line`: a line read from a csv file + - `delimiter`: the expected delimiter in the csv file + - `quoting`: the quoting enclosing each column in the csv file + When: + - `line` is parsed with the `parse_csv_file` with the given + parameters + Then: + - return a tuple of the columns in the CSV file, without the + delimiter and quoting + """ + for line, delimiter, quoting, expected in ( + ('"this","is","a","test"', ",", '"', ("this", "is", "a", "test")), + ('"this","is","a","test"', ",", None, ('"this"', '"is"', '"a"', '"test"'))): + with self.subTest(line=line, delimiter=delimiter, quoting=quoting): + self.assertEqual( + parse_csv_line( + line=line, delimiter=delimiter, quoting=quoting), + expected) -- cgit v1.2.3