diff options
-rw-r--r-- | quality_control/parsing.py | 12 | ||||
-rw-r--r-- | quality_control/utils.py | 21 | ||||
-rw-r--r-- | scripts/qc.py | 20 |
3 files changed, 39 insertions, 14 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py index 655b98a..5b1809b 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -65,7 +65,7 @@ def se_errors(line_number, fields): def collect_errors( filepath: str, filetype: FileType, strains: list, - updater: Union[Callable, None] = None) -> Generator: + update_progress: Union[Callable, None] = None) -> Generator: """Run checks against file and collect all the errors""" errors = tuple() def __process_errors__(line_number, line, error_checker_fn, errors = tuple()): @@ -78,8 +78,6 @@ def collect_errors( return errors + tuple(error for error in errs if error is not None) return errors + (errs,) - filesize = os.stat(filepath).st_size - processed_size = 0 with open(filepath, encoding="utf-8") as input_file: for line_number, line in enumerate(input_file, start=1): if line_number == 1: @@ -96,12 +94,8 @@ def collect_errors( errors): yield error - processed_size = processed_size + len(line) - if updater: - updater({ - "line_number": line_number, - "percent": (processed_size/filesize) * 100 - }) + if update_progress: + update_progress(line_number, line) def take(iterable: Iterable, num: int) -> list: """Take at most `num` items from `iterable`.""" diff --git a/quality_control/utils.py b/quality_control/utils.py new file mode 100644 index 0000000..0072608 --- /dev/null +++ b/quality_control/utils.py @@ -0,0 +1,21 @@ +"""Utilities that might be useful elsewhere.""" + +from collections import namedtuple + +ProgressIndicator = namedtuple( + "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent")) + +def make_progress_calculator(filesize: int): + """ + Returns a function that takes two arguments, `linenumber` and `linetext` and + return a `ProgressIndicator` object with the computed progress. + """ + processedsize = 0 + def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator: + nonlocal processedsize + processedsize = processedsize + len(linetext) + return ProgressIndicator( + filesize, processedsize, linenumber, + ((processedsize/filesize) * 100)) + + return __calculator__ diff --git a/scripts/qc.py b/scripts/qc.py index 9d57f4b..9bad55e 100644 --- a/scripts/qc.py +++ b/scripts/qc.py @@ -2,10 +2,12 @@ import os import sys import argparse +from typing import Union, Callable import magic from quality_control.errors import InvalidValue +from quality_control.utils import make_progress_calculator from quality_control.parsing import ( take, FileType, @@ -54,10 +56,18 @@ def cli_argument_parser(): default=False, action="store_true") return parser -def progress_indicator(msg): +def make_progress_indicator( + verbose: bool, progress_calc_fn: Callable) -> Union[Callable, None]: """Utility to display the progress""" - print(f"LINE: {msg['line_number']} ({msg['percent']:.2f}%)", end="\r") - return msg + if not verbose: + return None + + def __indicator__(linenumber, linetext): + msg = progress_calc_fn(linenumber, linetext) + print(f"LINE: {msg.currentline} ({msg.percent:.2f}%)", end="\r") + return msg + + return __indicator__ def print_errors(errors, verbose): """Print out the errors""" @@ -78,8 +88,8 @@ def print_errors(errors, verbose): def check(filepath, filetype, strains, count, verbose=False): """Check the file and print out results""" - if verbose: - updater = progress_indicator + updater = make_progress_indicator( + verbose, make_progress_calculator(os.stat(filepath).st_size)) if count > 0: return print_errors( |