From b3b9ba3b5b4e516d6220668155f9b5c57a51eb7d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 2 Jan 2024 09:14:21 +0300 Subject: Abstract away non-transposed file processing Since the processing of non-transposed files is mostly similar, abstract away the common operations into a separate function and use the function instead of repeating the same pattern of code throughout the codebase. --- r_qtl/r_qtl2.py | 53 +++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'r_qtl') diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index 22cf62c..4d609fd 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -32,6 +32,21 @@ def control_data(zfile: ZipFile) -> dict: if files[0].endswith(".json") else yaml.safe_load(zfile.read(files[0]))) +def with_non_transposed(zfile: ZipFile, + member_key: str, + cdata: dict, + func: Callable[[dict], dict] = lambda val: val) -> Iterator[dict]: + """Abstracts away common file-opening for non-transposed R/qtl2 files.""" + def not_comment_line(line): + return not line.startswith(cdata.get("comment.char", "#")) + + with zfile.open(cdata[member_key]) as innerfile: + reader = csv.DictReader( + filter(not_comment_line, io.TextIOWrapper(innerfile)), + delimiter=cdata.get("sep", ",")) + for row in reader: + yield func(row) + def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: """Load the genotype file, making use of the control data.""" def replace_genotype_codes(val): @@ -44,20 +59,15 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: return val if not cdata.get("geno_transposed", False): - with zfile.open(cdata["geno"]) as genofile: - reader = csv.DictReader( - filter(lambda line: not line.startswith("#"), - io.TextIOWrapper(genofile)), - delimiter=cdata.get("sep", ",")) - for row in reader: - yield { - key: thread_op( - value, - replace_genotype_codes, - replace_na_strings) - for key,value - in row.items() - } + for line in with_non_transposed( + zfile, + "geno", + cdata, + lambda row: { + key: thread_op(value, replace_genotype_codes, replace_na_strings) + for key,value in row.items() + }): + yield line def __merge__(key, samples, line): marker = line[0] @@ -100,23 +110,18 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> tuple[dict, ...]: """Read gmap files to get the genome mapping data""" assert map_type in ("genetic-map", "physical-map"), "Invalid map type" - map_file = cdata[{ + map_file_key = { "genetic-map": "gmap", "physical-map": "pmap" - }[map_type]] + }[map_type] transposed_dict = { "genetic-map": "gmap_transposed", "physical-map": "pmap_transposed" } if not cdata.get(transposed_dict[map_type], False): - with zfile.open(map_file) as gmapfile: - reader = csv.DictReader( - filter(lambda line: not line.startswith("#"), - io.TextIOWrapper(gmapfile)), - delimiter=cdata.get("sep", ",")) - return tuple(row for row in reader) - - with zfile.open(map_file) as gmapfile: + return tuple(with_non_transposed(zfile, map_file_key, cdata)) + + with zfile.open(cdata[map_file_key]) as gmapfile: lines = [[field.strip() for field in line.strip().split(cdata.get("sep", ","))] for line in -- cgit v1.2.3