diff options
Diffstat (limited to 'quality_control')
-rw-r--r-- | quality_control/parsing.py | 12 | ||||
-rw-r--r-- | quality_control/utils.py | 21 |
2 files changed, 24 insertions, 9 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py index 655b98a..5b1809b 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -65,7 +65,7 @@ def se_errors(line_number, fields): def collect_errors( filepath: str, filetype: FileType, strains: list, - updater: Union[Callable, None] = None) -> Generator: + update_progress: Union[Callable, None] = None) -> Generator: """Run checks against file and collect all the errors""" errors = tuple() def __process_errors__(line_number, line, error_checker_fn, errors = tuple()): @@ -78,8 +78,6 @@ def collect_errors( return errors + tuple(error for error in errs if error is not None) return errors + (errs,) - filesize = os.stat(filepath).st_size - processed_size = 0 with open(filepath, encoding="utf-8") as input_file: for line_number, line in enumerate(input_file, start=1): if line_number == 1: @@ -96,12 +94,8 @@ def collect_errors( errors): yield error - processed_size = processed_size + len(line) - if updater: - updater({ - "line_number": line_number, - "percent": (processed_size/filesize) * 100 - }) + if update_progress: + update_progress(line_number, line) def take(iterable: Iterable, num: int) -> list: """Take at most `num` items from `iterable`.""" diff --git a/quality_control/utils.py b/quality_control/utils.py new file mode 100644 index 0000000..0072608 --- /dev/null +++ b/quality_control/utils.py @@ -0,0 +1,21 @@ +"""Utilities that might be useful elsewhere.""" + +from collections import namedtuple + +ProgressIndicator = namedtuple( + "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent")) + +def make_progress_calculator(filesize: int): + """ + Returns a function that takes two arguments, `linenumber` and `linetext` and + return a `ProgressIndicator` object with the computed progress. + """ + processedsize = 0 + def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator: + nonlocal processedsize + processedsize = processedsize + len(linetext) + return ProgressIndicator( + filesize, processedsize, linenumber, + ((processedsize/filesize) * 100)) + + return __calculator__ |