about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/data_helpers.py13
-rw-r--r--tests/unit/test_data_helpers.py26
2 files changed, 37 insertions, 2 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
index f0d971e..741a885 100644
--- a/gn3/data_helpers.py
+++ b/gn3/data_helpers.py
@@ -5,7 +5,7 @@ data structures.
 
 from math import ceil
 from functools import reduce
-from typing import Any, Tuple, Sequence
+from typing import Any, Tuple, Sequence, Optional
 
 def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]:
     """
@@ -23,3 +23,14 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]
         tuple(items[start:stop]) for start, stop # type: ignore[has-type]
         in reduce(
             __compute_start_stop__, iterations, tuple())])
+
+def parse_csv_line(
+        line:str, delimiter: str = ",", quoting:Optional[str] = '"') -> Tuple[str, ...]:
+    """
+    Parses a line from a CSV file into a tuple of strings.
+
+    This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV`
+    function in GeneNetwork1.
+    """
+    return tuple(
+        col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter))
diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py
index 1eec3cc..39aea45 100644
--- a/tests/unit/test_data_helpers.py
+++ b/tests/unit/test_data_helpers.py
@@ -4,7 +4,7 @@ Test functions in gn3.data_helpers
 
 from unittest import TestCase
 
-from gn3.data_helpers import partition_all
+from gn3.data_helpers import partition_all, parse_csv_line
 
 class TestDataHelpers(TestCase):
     """
@@ -35,3 +35,27 @@ class TestDataHelpers(TestCase):
                  ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), ))):
             with self.subTest(n=count, items=items):
                 self.assertEqual(partition_all(count, items), expected)
+
+    def test_parse_csv_line(self):
+        """
+        Test parsing a single line from a CSV file
+
+        Given:
+            - `line`: a line read from a csv file
+            - `delimiter`: the expected delimiter in the csv file
+            - `quoting`: the quoting enclosing each column in the csv file
+        When:
+            - `line` is parsed with the `parse_csv_file` with the given
+               parameters
+        Then:
+            - return a tuple of the columns in the CSV file, without the
+              delimiter and quoting
+        """
+        for line, delimiter, quoting, expected in (
+                ('"this","is","a","test"', ",", '"', ("this", "is", "a", "test")),
+                ('"this","is","a","test"', ",", None, ('"this"', '"is"', '"a"', '"test"'))):
+            with self.subTest(line=line, delimiter=delimiter, quoting=quoting):
+                self.assertEqual(
+                    parse_csv_line(
+                        line=line, delimiter=delimiter, quoting=quoting),
+                    expected)