aboutsummaryrefslogtreecommitdiff
path: root/r_qtl/r_qtl2.py
diff options
context:
space:
mode:
Diffstat (limited to 'r_qtl/r_qtl2.py')
-rw-r--r--r_qtl/r_qtl2.py24
1 files changed, 22 insertions, 2 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 93b8c8e..8c17362 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -5,7 +5,7 @@ import json
from pathlib import Path
from zipfile import ZipFile
from functools import reduce, partial
-from typing import Iterator, Iterable, Callable, Optional
+from typing import Union, Iterator, Iterable, Callable, Optional
import yaml
@@ -302,7 +302,7 @@ def raw_file_data(zipfilepath: Union[str, Path],
zfile.open(memberfilename) as innerfile):
wrappedfile = io.TextIOWrapper(innerfile)
for line in wrappedfile:
- yield line.strip()
+ yield line
def strip_comments(rawdata: Iterator[str], commentchar) -> Iterator[str]:
"""Remove comments from raw text."""
@@ -334,3 +334,23 @@ def read_control_file(zipfilepath: Union[str, Path]) -> dict:
if bool(cdata.get(ftype))
}
}
+
+
+def read_file_data(
+ zipfilepath: Union[str, Path],
+ memberfilename: str,
+ processfile: Callable[[Iterator[str]], Iterator[str]] = lambda itr: itr,
+ processline: Callable[[str], str] = lambda line: line,
+ processfield: Callable[
+ [Optional[str]], Optional[str]] = lambda val: val) -> Iterator[
+ tuple[Optional[str], ...]]:
+ """Read a single file from the bundle processing each field."""
+ cdata = read_control_file(zipfilepath)
+ return (
+ tuple(processfield(field.strip())
+ for field in processline(row.strip()).split(cdata["sep"]))
+ for row in
+ processfile(
+ strip_comments(
+ raw_file_data(zipfilepath, memberfilename),
+ cdata["comment.char"])))