From 21c8c341956bb3c9cac427ab5b951976d70f4245 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 07:09:26 +0300 Subject: Parse single line from CSV file Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: New function (parse_csv_line) * tests/unit/test_data_helpers.py: Add tests for new function (parse_csv_line) Add a function to parse a single line from a CSV file. --- gn3/data_helpers.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'gn3/data_helpers.py') diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index f0d971e..741a885 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -5,7 +5,7 @@ data structures. from math import ceil from functools import reduce -from typing import Any, Tuple, Sequence +from typing import Any, Tuple, Sequence, Optional def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]: """ @@ -23,3 +23,14 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] tuple(items[start:stop]) for start, stop # type: ignore[has-type] in reduce( __compute_start_stop__, iterations, tuple())]) + +def parse_csv_line( + line:str, delimiter: str = ",", quoting:Optional[str] = '"') -> Tuple[str, ...]: + """ + Parses a line from a CSV file into a tuple of strings. + + This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV` + function in GeneNetwork1. + """ + return tuple( + col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter)) -- cgit v1.2.3 From d4919b2b59facb79ccad155e1d2826a97119ec28 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 08:17:41 +0300 Subject: Fix some linting errors Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi --- gn3/data_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gn3/data_helpers.py') diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index 741a885..d3f942b 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -25,7 +25,8 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] __compute_start_stop__, iterations, tuple())]) def parse_csv_line( - line:str, delimiter: str = ",", quoting:Optional[str] = '"') -> Tuple[str, ...]: + line: str, delimiter: str = ",", + quoting: Optional[str] = '"') -> Tuple[str, ...]: """ Parses a line from a CSV file into a tuple of strings. -- cgit v1.2.3 From d9ea8744e04d891d1d13c710e24a0a7c85127140 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 9 Nov 2021 10:06:01 +0300 Subject: Add new data processing utility Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/data_helpers.py: New function (`partition_by`) * tests/unit/test_data_helpers.py: Tests for new function Add a function that approximates Clojure's `partition-by` function, to help with processing the data in a more functional way. --- gn3/data_helpers.py | 15 +++++++++++++++ tests/unit/test_data_helpers.py | 15 ++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'gn3/data_helpers.py') diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index d3f942b..b72fbc5 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] in reduce( __compute_start_stop__, iterations, tuple())]) +def partition_by(partition_fn, items): + """ + Given a sequence `items`, return a tuple of tuples, each of which contain + the values in `items` partitioned such that the first item in each internal + tuple, when passed to `partition_function` returns True. + + This is an approximation of Clojure's `partition-by` function. + """ + def __partitioner__(accumulator, item): + if partition_fn(item): + return accumulator + ((item,),) + return accumulator[:-1] + (accumulator[-1] + (item,),) + + return reduce(__partitioner__, items, tuple()) + def parse_csv_line( line: str, delimiter: str = ",", quoting: Optional[str] = '"') -> Tuple[str, ...]: diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py index 39aea45..3f76344 100644 --- a/tests/unit/test_data_helpers.py +++ b/tests/unit/test_data_helpers.py @@ -4,7 +4,7 @@ Test functions in gn3.data_helpers from unittest import TestCase -from gn3.data_helpers import partition_all, parse_csv_line +from gn3.data_helpers import partition_by, partition_all, parse_csv_line class TestDataHelpers(TestCase): """ @@ -59,3 +59,16 @@ class TestDataHelpers(TestCase): parse_csv_line( line=line, delimiter=delimiter, quoting=quoting), expected) + + def test_partition_by(self): + for part_fn, items, expected in ( + (lambda s: s.startswith("----"), + ("------", "a", "b", "-----", "c", "----", "d", "e", "---", + "f"), + (("------", "a", "b"), ("-----", "c"), + ("----", "d", "e", "---", "f"))), + (lambda x: (x % 2) == 0, + (0, 1, 3, 2, 4, 5, 7, 6, 9, 1), + ((0, 1, 3), (2,), (4, 5, 7), (6, 9, 1))),): + with self.subTest(items=items): + self.assertEqual(partition_by(part_fn, items), expected) -- cgit v1.2.3