aboutsummaryrefslogtreecommitdiff
path: root/r_qtl
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-03 04:52:07 +0300
committerFrederick Muriuki Muriithi2024-01-03 04:52:07 +0300
commit9481d1705f735a1087ced871bcb169d147e44dd0 (patch)
treea4de0e380ca1d379aef2b6917462a2b50b3bd82b /r_qtl
parent645e98ab0bf341bdc4f739e5002c47e08fd6159b (diff)
downloadgn-uploader-9481d1705f735a1087ced871bcb169d147e44dd0.tar.gz
Refactor: Extract potentially reusable functions
The processing of transposed files is probably going to be very similar, thus the need to extract some reusable code from the geno-file-specific function in preparation.
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py30
1 files changed, 18 insertions, 12 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 4d609fd..16bb652 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -47,6 +47,22 @@ def with_non_transposed(zfile: ZipFile,
for row in reader:
yield func(row)
+def __make_organise_by_id__(id_key):
+ """Return a function to use with `reduce` to organise values by some
+ identifier."""
+ def __organiser__(acc, item):
+ row = acc.get(item[id_key], {})
+ return {**acc, item[id_key]: {**row, **item}}
+ return __organiser__
+
+def __batch_of_n__(iterable: Iterable, num):
+ """Return a batch of `num` items or less from the `iterable`."""
+ while True:
+ items = take(iterable, num)
+ if len(items) <= 0:
+ break
+ yield items
+
def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
"""Load the genotype file, making use of the control data."""
def replace_genotype_codes(val):
@@ -78,13 +94,6 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
for item in items)))
for items in zip(samples, line[1:]))
- def __n_batch__(iterable: Iterable, num):
- while True:
- items = take(iterable, num)
- if len(items) <= 0:
- break
- yield items
-
if cdata.get("geno_transposed", False):
with zfile.open(cdata["geno"]) as genofile:
lines = (line.strip().split(cdata.get("sep", ","))
@@ -93,13 +102,10 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
try:
id_line = next(lines)
id_key, samples = id_line[0], id_line[1:]
- def __organise_by_id__(acc, item):
- row = acc.get(item[id_key], {})
- return {**acc, item[id_key]: {**row, **item}}
for _key, row in reduce(# type: ignore[var-annotated]
- __organise_by_id__,
+ __make_organise_by_id__(id_key),
(row
- for batch in __n_batch__(lines, 300)
+ for batch in __batch_of_n__(lines, 300)
for line in batch
for row in __merge__(id_key, samples, line)),
{}).items():