From 21c8c341956bb3c9cac427ab5b951976d70f4245 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 1 Nov 2021 07:09:26 +0300
Subject: Parse single line from CSV file

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/data_helpers.py: New function (parse_csv_line)
* tests/unit/test_data_helpers.py: Add tests for new function (parse_csv_line)

  Add a function to parse a single line from a CSV file.
---
 gn3/data_helpers.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'gn3/data_helpers.py')

diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index f0d971e..741a885 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -5,7 +5,7 @@ data structures.
 
 from math import ceil
 from functools import reduce
-from typing import Any, Tuple, Sequence
+from typing import Any, Tuple, Sequence, Optional
 
 def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]:
     """
@@ -23,3 +23,14 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
         tuple(items[start:stop]) for start, stop # type: ignore[has-type]
         in reduce(
             __compute_start_stop__, iterations, tuple())])
+
+def parse_csv_line(
+        line:str, delimiter: str = ",", quoting:Optional[str] = '"') -> Tuple[str, ...]:
+    """
+    Parses a line from a CSV file into a tuple of strings.
+
+    This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV`
+    function in GeneNetwork1.
+    """
+    return tuple(
+        col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter))
-- 
cgit 1.4.1


From d4919b2b59facb79ccad155e1d2826a97119ec28 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 1 Nov 2021 08:17:41 +0300
Subject: Fix some linting errors

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
---
 gn3/data_helpers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'gn3/data_helpers.py')

diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index 741a885..d3f942b 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -25,7 +25,8 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
             __compute_start_stop__, iterations, tuple())])
 
 def parse_csv_line(
-        line:str, delimiter: str = ",", quoting:Optional[str] = '"') -> Tuple[str, ...]:
+        line: str, delimiter: str = ",",
+        quoting: Optional[str] = '"') -> Tuple[str, ...]:
     """
     Parses a line from a CSV file into a tuple of strings.
 
-- 
cgit 1.4.1


From d9ea8744e04d891d1d13c710e24a0a7c85127140 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 9 Nov 2021 10:06:01 +0300
Subject: Add new data processing utility

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/data_helpers.py: New function (`partition_by`)
* tests/unit/test_data_helpers.py: Tests for new function

  Add a function that approximates Clojure's `partition-by` function, to help
  with processing the data in a more functional way.
---
 gn3/data_helpers.py             | 15 +++++++++++++++
 tests/unit/test_data_helpers.py | 15 ++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'gn3/data_helpers.py')

diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index d3f942b..b72fbc5 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -24,6 +24,21 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
         in reduce(
             __compute_start_stop__, iterations, tuple())])
 
+def partition_by(partition_fn, items):
+    """
+    Given a sequence `items`, return a tuple of tuples, each of which contain
+    the values in `items` partitioned such that the first item in each internal
+    tuple, when passed to `partition_function` returns True.
+
+    This is an approximation of Clojure's `partition-by` function.
+    """
+    def __partitioner__(accumulator, item):
+        if partition_fn(item):
+            return accumulator + ((item,),)
+        return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+    return reduce(__partitioner__, items, tuple())
+
 def parse_csv_line(
         line: str, delimiter: str = ",",
         quoting: Optional[str] = '"') -> Tuple[str, ...]:
diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py
index 39aea45..3f76344 100644
--- a/tests/unit/test_data_helpers.py
+++ b/tests/unit/test_data_helpers.py
@@ -4,7 +4,7 @@ Test functions in gn3.data_helpers
 
 from unittest import TestCase
 
-from gn3.data_helpers import partition_all, parse_csv_line
+from gn3.data_helpers import partition_by, partition_all, parse_csv_line
 
 class TestDataHelpers(TestCase):
     """
@@ -59,3 +59,16 @@ class TestDataHelpers(TestCase):
                     parse_csv_line(
                         line=line, delimiter=delimiter, quoting=quoting),
                     expected)
+
+    def test_partition_by(self):
+        for part_fn, items, expected in (
+                (lambda s: s.startswith("----"),
+                 ("------", "a", "b", "-----", "c", "----", "d", "e", "---",
+                  "f"),
+                 (("------", "a", "b"), ("-----", "c"),
+                  ("----", "d", "e", "---", "f"))),
+                (lambda x: (x % 2) == 0,
+                 (0, 1, 3, 2, 4, 5, 7, 6, 9, 1),
+                 ((0, 1, 3), (2,), (4, 5, 7), (6, 9, 1))),):
+            with self.subTest(items=items):
+                self.assertEqual(partition_by(part_fn, items), expected)
-- 
cgit 1.4.1