Refactor: Extract potentially reusable functions

The processing of transposed files is probably going to be very similar, thus the need to extract some reusable code from the geno-file-specific function in preparation.
author: Frederick Muriuki Muriithi 2024-01-03 04:52:07 +0300
committer: Frederick Muriuki Muriithi 2024-01-03 04:52:07 +0300
commit: 9481d1705f735a1087ced871bcb169d147e44dd0 (patch)
tree: a4de0e380ca1d379aef2b6917462a2b50b3bd82b /r_qtl
parent: 645e98ab0bf341bdc4f739e5002c47e08fd6159b (diff)
download: gn-uploader-9481d1705f735a1087ced871bcb169d147e44dd0.tar.gz
1 files changed, 18 insertions, 12 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 4d609fd..16bb652 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -47,6 +47,22 @@ def with_non_transposed(zfile: ZipFile,
         for row in reader:
             yield func(row)
 
+def __make_organise_by_id__(id_key):
+    """Return a function to use with `reduce` to organise values by some
+    identifier."""
+    def __organiser__(acc, item):
+        row = acc.get(item[id_key], {})
+        return {**acc, item[id_key]: {**row, **item}}
+    return __organiser__
+
+def __batch_of_n__(iterable: Iterable, num):
+    """Return a batch of `num` items or less from the `iterable`."""
+    while True:
+        items = take(iterable, num)
+        if len(items) <= 0:
+            break
+        yield items
+
 def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
     """Load the genotype file, making use of the control data."""
     def replace_genotype_codes(val):
@@ -78,13 +94,6 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
                  for item in items)))
             for items in zip(samples, line[1:]))
 
-    def __n_batch__(iterable: Iterable, num):
-        while True:
-            items = take(iterable, num)
-            if len(items) <= 0:
-                break
-            yield items
-
     if cdata.get("geno_transposed", False):
         with zfile.open(cdata["geno"]) as genofile:
             lines = (line.strip().split(cdata.get("sep", ","))
@@ -93,13 +102,10 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
             try:
                 id_line = next(lines)
                 id_key, samples = id_line[0], id_line[1:]
-                def __organise_by_id__(acc, item):
-                    row = acc.get(item[id_key], {})
-                    return {**acc, item[id_key]: {**row, **item}}
                 for _key, row in reduce(# type: ignore[var-annotated]
-                        __organise_by_id__,
+                        __make_organise_by_id__(id_key),
                         (row
-                         for batch in __n_batch__(lines, 300)
+                         for batch in __batch_of_n__(lines, 300)
                          for line in batch
                          for row in __merge__(id_key, samples, line)),
                         {}).items():
author	Frederick Muriuki Muriithi	2024-01-03 04:52:07 +0300
committer	Frederick Muriuki Muriithi	2024-01-03 04:52:07 +0300
commit	9481d1705f735a1087ced871bcb169d147e44dd0 (patch)
tree	a4de0e380ca1d379aef2b6917462a2b50b3bd82b /r_qtl
parent	645e98ab0bf341bdc4f739e5002c47e08fd6159b (diff)
download	gn-uploader-9481d1705f735a1087ced871bcb169d147e44dd0.tar.gz