From 27f6e9e28f2a3244bdd00336cf918de97b2ceed6 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 19 May 2022 10:25:18 +0300 Subject: Extract progress indication from the parsing Since progress indication is not part of the parsing, this commit extracts the progress indication into functions with well defined input arguments that hide the progress indication logic from the parsing function. --- quality_control/parsing.py | 12 +++--------- quality_control/utils.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 quality_control/utils.py (limited to 'quality_control') diff --git a/quality_control/parsing.py b/quality_control/parsing.py index 655b98a..5b1809b 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -65,7 +65,7 @@ def se_errors(line_number, fields): def collect_errors( filepath: str, filetype: FileType, strains: list, - updater: Union[Callable, None] = None) -> Generator: + update_progress: Union[Callable, None] = None) -> Generator: """Run checks against file and collect all the errors""" errors = tuple() def __process_errors__(line_number, line, error_checker_fn, errors = tuple()): @@ -78,8 +78,6 @@ def collect_errors( return errors + tuple(error for error in errs if error is not None) return errors + (errs,) - filesize = os.stat(filepath).st_size - processed_size = 0 with open(filepath, encoding="utf-8") as input_file: for line_number, line in enumerate(input_file, start=1): if line_number == 1: @@ -96,12 +94,8 @@ def collect_errors( errors): yield error - processed_size = processed_size + len(line) - if updater: - updater({ - "line_number": line_number, - "percent": (processed_size/filesize) * 100 - }) + if update_progress: + update_progress(line_number, line) def take(iterable: Iterable, num: int) -> list: """Take at most `num` items from `iterable`.""" diff --git a/quality_control/utils.py b/quality_control/utils.py new file mode 100644 index 0000000..0072608 --- /dev/null +++ b/quality_control/utils.py @@ -0,0 +1,21 @@ +"""Utilities that might be useful elsewhere.""" + +from collections import namedtuple + +ProgressIndicator = namedtuple( + "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent")) + +def make_progress_calculator(filesize: int): + """ + Returns a function that takes two arguments, `linenumber` and `linetext` and + return a `ProgressIndicator` object with the computed progress. + """ + processedsize = 0 + def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator: + nonlocal processedsize + processedsize = processedsize + len(linetext) + return ProgressIndicator( + filesize, processedsize, linenumber, + ((processedsize/filesize) * 100)) + + return __calculator__ -- cgit v1.2.3