diff options
author | Frederick Muriuki Muriithi | 2024-01-02 09:14:21 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-01-02 09:14:21 +0300 |
commit | b3b9ba3b5b4e516d6220668155f9b5c57a51eb7d (patch) | |
tree | f76e7fbf8e3fd67fbf6aa57bb31626bc45e252a9 /r_qtl | |
parent | 7a2bcc9e86bde0eb9c0d370f83df4684e5522f26 (diff) | |
download | gn-uploader-b3b9ba3b5b4e516d6220668155f9b5c57a51eb7d.tar.gz |
Abstract away non-transposed file processing
Since the processing of non-transposed files is mostly similar,
abstract away the common operations into a separate function and use
the function instead of repeating the same pattern of code throughout
the codebase.
Diffstat (limited to 'r_qtl')
-rw-r--r-- | r_qtl/r_qtl2.py | 53 |
1 files changed, 29 insertions, 24 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index 22cf62c..4d609fd 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -32,6 +32,21 @@ def control_data(zfile: ZipFile) -> dict: if files[0].endswith(".json") else yaml.safe_load(zfile.read(files[0]))) +def with_non_transposed(zfile: ZipFile, + member_key: str, + cdata: dict, + func: Callable[[dict], dict] = lambda val: val) -> Iterator[dict]: + """Abstracts away common file-opening for non-transposed R/qtl2 files.""" + def not_comment_line(line): + return not line.startswith(cdata.get("comment.char", "#")) + + with zfile.open(cdata[member_key]) as innerfile: + reader = csv.DictReader( + filter(not_comment_line, io.TextIOWrapper(innerfile)), + delimiter=cdata.get("sep", ",")) + for row in reader: + yield func(row) + def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: """Load the genotype file, making use of the control data.""" def replace_genotype_codes(val): @@ -44,20 +59,15 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: return val if not cdata.get("geno_transposed", False): - with zfile.open(cdata["geno"]) as genofile: - reader = csv.DictReader( - filter(lambda line: not line.startswith("#"), - io.TextIOWrapper(genofile)), - delimiter=cdata.get("sep", ",")) - for row in reader: - yield { - key: thread_op( - value, - replace_genotype_codes, - replace_na_strings) - for key,value - in row.items() - } + for line in with_non_transposed( + zfile, + "geno", + cdata, + lambda row: { + key: thread_op(value, replace_genotype_codes, replace_na_strings) + for key,value in row.items() + }): + yield line def __merge__(key, samples, line): marker = line[0] @@ -100,23 +110,18 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> tuple[dict, ...]: """Read gmap files to get the genome mapping data""" assert map_type in ("genetic-map", "physical-map"), "Invalid map type" - map_file = cdata[{ + map_file_key = { "genetic-map": "gmap", "physical-map": "pmap" - }[map_type]] + }[map_type] transposed_dict = { "genetic-map": "gmap_transposed", "physical-map": "pmap_transposed" } if not cdata.get(transposed_dict[map_type], False): - with zfile.open(map_file) as gmapfile: - reader = csv.DictReader( - filter(lambda line: not line.startswith("#"), - io.TextIOWrapper(gmapfile)), - delimiter=cdata.get("sep", ",")) - return tuple(row for row in reader) - - with zfile.open(map_file) as gmapfile: + return tuple(with_non_transposed(zfile, map_file_key, cdata)) + + with zfile.open(cdata[map_file_key]) as gmapfile: lines = [[field.strip() for field in line.strip().split(cdata.get("sep", ","))] for line in |