about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-03 04:52:07 +0300
committerFrederick Muriuki Muriithi2024-01-03 04:52:07 +0300
commit9481d1705f735a1087ced871bcb169d147e44dd0 (patch)
treea4de0e380ca1d379aef2b6917462a2b50b3bd82b
parent645e98ab0bf341bdc4f739e5002c47e08fd6159b (diff)
downloadgn-uploader-9481d1705f735a1087ced871bcb169d147e44dd0.tar.gz
Refactor: Extract potentially reusable functions
The processing of transposed files is probably going to be very
similar, thus the need to extract some reusable code from the
geno-file-specific function in preparation.
-rw-r--r--README.org2
-rw-r--r--r_qtl/r_qtl2.py30
2 files changed, 19 insertions, 13 deletions
diff --git a/README.org b/README.org
index d0ccbf5..1f899f5 100644
--- a/README.org
+++ b/README.org
@@ -72,7 +72,7 @@ Run unit tests with:
 
 To run the linter over the code base, run:
 #+BEGIN_SRC shell
-  pylint *.py tests quality_control qc_app r_qtl scripts
+  pylint setup.py wsgi.py tests quality_control qc_app r_qtl scripts
 #+END_SRC
 
 To check for correct type usage in the application, run:
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 4d609fd..16bb652 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -47,6 +47,22 @@ def with_non_transposed(zfile: ZipFile,
         for row in reader:
             yield func(row)
 
+def __make_organise_by_id__(id_key):
+    """Return a function to use with `reduce` to organise values by some
+    identifier."""
+    def __organiser__(acc, item):
+        row = acc.get(item[id_key], {})
+        return {**acc, item[id_key]: {**row, **item}}
+    return __organiser__
+
+def __batch_of_n__(iterable: Iterable, num):
+    """Return a batch of `num` items or less from the `iterable`."""
+    while True:
+        items = take(iterable, num)
+        if len(items) <= 0:
+            break
+        yield items
+
 def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
     """Load the genotype file, making use of the control data."""
     def replace_genotype_codes(val):
@@ -78,13 +94,6 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
                  for item in items)))
             for items in zip(samples, line[1:]))
 
-    def __n_batch__(iterable: Iterable, num):
-        while True:
-            items = take(iterable, num)
-            if len(items) <= 0:
-                break
-            yield items
-
     if cdata.get("geno_transposed", False):
         with zfile.open(cdata["geno"]) as genofile:
             lines = (line.strip().split(cdata.get("sep", ","))
@@ -93,13 +102,10 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
             try:
                 id_line = next(lines)
                 id_key, samples = id_line[0], id_line[1:]
-                def __organise_by_id__(acc, item):
-                    row = acc.get(item[id_key], {})
-                    return {**acc, item[id_key]: {**row, **item}}
                 for _key, row in reduce(# type: ignore[var-annotated]
-                        __organise_by_id__,
+                        __make_organise_by_id__(id_key),
                         (row
-                         for batch in __n_batch__(lines, 300)
+                         for batch in __batch_of_n__(lines, 300)
                          for line in batch
                          for row in __merge__(id_key, samples, line)),
                         {}).items():