aboutsummaryrefslogtreecommitdiff
path: root/r_qtl
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-02 09:14:21 +0300
committerFrederick Muriuki Muriithi2024-01-02 09:14:21 +0300
commitb3b9ba3b5b4e516d6220668155f9b5c57a51eb7d (patch)
treef76e7fbf8e3fd67fbf6aa57bb31626bc45e252a9 /r_qtl
parent7a2bcc9e86bde0eb9c0d370f83df4684e5522f26 (diff)
downloadgn-uploader-b3b9ba3b5b4e516d6220668155f9b5c57a51eb7d.tar.gz
Abstract away non-transposed file processing
Since the processing of non-transposed files is mostly similar, abstract away the common operations into a separate function and use the function instead of repeating the same pattern of code throughout the codebase.
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py53
1 files changed, 29 insertions, 24 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 22cf62c..4d609fd 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -32,6 +32,21 @@ def control_data(zfile: ZipFile) -> dict:
if files[0].endswith(".json")
else yaml.safe_load(zfile.read(files[0])))
+def with_non_transposed(zfile: ZipFile,
+ member_key: str,
+ cdata: dict,
+ func: Callable[[dict], dict] = lambda val: val) -> Iterator[dict]:
+ """Abstracts away common file-opening for non-transposed R/qtl2 files."""
+ def not_comment_line(line):
+ return not line.startswith(cdata.get("comment.char", "#"))
+
+ with zfile.open(cdata[member_key]) as innerfile:
+ reader = csv.DictReader(
+ filter(not_comment_line, io.TextIOWrapper(innerfile)),
+ delimiter=cdata.get("sep", ","))
+ for row in reader:
+ yield func(row)
+
def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
"""Load the genotype file, making use of the control data."""
def replace_genotype_codes(val):
@@ -44,20 +59,15 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
return val
if not cdata.get("geno_transposed", False):
- with zfile.open(cdata["geno"]) as genofile:
- reader = csv.DictReader(
- filter(lambda line: not line.startswith("#"),
- io.TextIOWrapper(genofile)),
- delimiter=cdata.get("sep", ","))
- for row in reader:
- yield {
- key: thread_op(
- value,
- replace_genotype_codes,
- replace_na_strings)
- for key,value
- in row.items()
- }
+ for line in with_non_transposed(
+ zfile,
+ "geno",
+ cdata,
+ lambda row: {
+ key: thread_op(value, replace_genotype_codes, replace_na_strings)
+ for key,value in row.items()
+ }):
+ yield line
def __merge__(key, samples, line):
marker = line[0]
@@ -100,23 +110,18 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> tuple[dict, ...]:
"""Read gmap files to get the genome mapping data"""
assert map_type in ("genetic-map", "physical-map"), "Invalid map type"
- map_file = cdata[{
+ map_file_key = {
"genetic-map": "gmap",
"physical-map": "pmap"
- }[map_type]]
+ }[map_type]
transposed_dict = {
"genetic-map": "gmap_transposed",
"physical-map": "pmap_transposed"
}
if not cdata.get(transposed_dict[map_type], False):
- with zfile.open(map_file) as gmapfile:
- reader = csv.DictReader(
- filter(lambda line: not line.startswith("#"),
- io.TextIOWrapper(gmapfile)),
- delimiter=cdata.get("sep", ","))
- return tuple(row for row in reader)
-
- with zfile.open(map_file) as gmapfile:
+ return tuple(with_non_transposed(zfile, map_file_key, cdata))
+
+ with zfile.open(cdata[map_file_key]) as gmapfile:
lines = [[field.strip() for field in
line.strip().split(cdata.get("sep", ","))]
for line in