aboutsummaryrefslogtreecommitdiff
path: root/r_qtl/r_qtl2.py
diff options
context:
space:
mode:
Diffstat (limited to 'r_qtl/r_qtl2.py')
-rw-r--r--r_qtl/r_qtl2.py30
1 files changed, 18 insertions, 12 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 4d609fd..16bb652 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -47,6 +47,22 @@ def with_non_transposed(zfile: ZipFile,
for row in reader:
yield func(row)
+def __make_organise_by_id__(id_key):
+ """Return a function to use with `reduce` to organise values by some
+ identifier."""
+ def __organiser__(acc, item):
+ row = acc.get(item[id_key], {})
+ return {**acc, item[id_key]: {**row, **item}}
+ return __organiser__
+
+def __batch_of_n__(iterable: Iterable, num):
+ """Return a batch of `num` items or less from the `iterable`."""
+ while True:
+ items = take(iterable, num)
+ if len(items) <= 0:
+ break
+ yield items
+
def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
"""Load the genotype file, making use of the control data."""
def replace_genotype_codes(val):
@@ -78,13 +94,6 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
for item in items)))
for items in zip(samples, line[1:]))
- def __n_batch__(iterable: Iterable, num):
- while True:
- items = take(iterable, num)
- if len(items) <= 0:
- break
- yield items
-
if cdata.get("geno_transposed", False):
with zfile.open(cdata["geno"]) as genofile:
lines = (line.strip().split(cdata.get("sep", ","))
@@ -93,13 +102,10 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
try:
id_line = next(lines)
id_key, samples = id_line[0], id_line[1:]
- def __organise_by_id__(acc, item):
- row = acc.get(item[id_key], {})
- return {**acc, item[id_key]: {**row, **item}}
for _key, row in reduce(# type: ignore[var-annotated]
- __organise_by_id__,
+ __make_organise_by_id__(id_key),
(row
- for batch in __n_batch__(lines, 300)
+ for batch in __batch_of_n__(lines, 300)
for line in batch
for row in __merge__(id_key, samples, line)),
{}).items():