aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-05-19 10:25:18 +0300
committerFrederick Muriuki Muriithi2022-05-19 15:23:47 +0300
commit27f6e9e28f2a3244bdd00336cf918de97b2ceed6 (patch)
tree35ad7aeea324b0cbe60c44d652b4a5387321f4bd
parent6865d8621e6fadb915813951068ee950c781ee0d (diff)
downloadgn-uploader-27f6e9e28f2a3244bdd00336cf918de97b2ceed6.tar.gz
Extract progress indication from the parsing
Since progress indication is not part of the parsing, this commit extracts the progress indication into functions with well defined input arguments that hide the progress indication logic from the parsing function.
-rw-r--r--quality_control/parsing.py12
-rw-r--r--quality_control/utils.py21
-rw-r--r--scripts/qc.py20
3 files changed, 39 insertions, 14 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 655b98a..5b1809b 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -65,7 +65,7 @@ def se_errors(line_number, fields):
def collect_errors(
filepath: str, filetype: FileType, strains: list,
- updater: Union[Callable, None] = None) -> Generator:
+ update_progress: Union[Callable, None] = None) -> Generator:
"""Run checks against file and collect all the errors"""
errors = tuple()
def __process_errors__(line_number, line, error_checker_fn, errors = tuple()):
@@ -78,8 +78,6 @@ def collect_errors(
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
- filesize = os.stat(filepath).st_size
- processed_size = 0
with open(filepath, encoding="utf-8") as input_file:
for line_number, line in enumerate(input_file, start=1):
if line_number == 1:
@@ -96,12 +94,8 @@ def collect_errors(
errors):
yield error
- processed_size = processed_size + len(line)
- if updater:
- updater({
- "line_number": line_number,
- "percent": (processed_size/filesize) * 100
- })
+ if update_progress:
+ update_progress(line_number, line)
def take(iterable: Iterable, num: int) -> list:
"""Take at most `num` items from `iterable`."""
diff --git a/quality_control/utils.py b/quality_control/utils.py
new file mode 100644
index 0000000..0072608
--- /dev/null
+++ b/quality_control/utils.py
@@ -0,0 +1,21 @@
+"""Utilities that might be useful elsewhere."""
+
+from collections import namedtuple
+
+ProgressIndicator = namedtuple(
+ "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent"))
+
+def make_progress_calculator(filesize: int):
+ """
+ Returns a function that takes two arguments, `linenumber` and `linetext` and
+ return a `ProgressIndicator` object with the computed progress.
+ """
+ processedsize = 0
+ def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator:
+ nonlocal processedsize
+ processedsize = processedsize + len(linetext)
+ return ProgressIndicator(
+ filesize, processedsize, linenumber,
+ ((processedsize/filesize) * 100))
+
+ return __calculator__
diff --git a/scripts/qc.py b/scripts/qc.py
index 9d57f4b..9bad55e 100644
--- a/scripts/qc.py
+++ b/scripts/qc.py
@@ -2,10 +2,12 @@
import os
import sys
import argparse
+from typing import Union, Callable
import magic
from quality_control.errors import InvalidValue
+from quality_control.utils import make_progress_calculator
from quality_control.parsing import (
take,
FileType,
@@ -54,10 +56,18 @@ def cli_argument_parser():
default=False, action="store_true")
return parser
-def progress_indicator(msg):
+def make_progress_indicator(
+ verbose: bool, progress_calc_fn: Callable) -> Union[Callable, None]:
"""Utility to display the progress"""
- print(f"LINE: {msg['line_number']} ({msg['percent']:.2f}%)", end="\r")
- return msg
+ if not verbose:
+ return None
+
+ def __indicator__(linenumber, linetext):
+ msg = progress_calc_fn(linenumber, linetext)
+ print(f"LINE: {msg.currentline} ({msg.percent:.2f}%)", end="\r")
+ return msg
+
+ return __indicator__
def print_errors(errors, verbose):
"""Print out the errors"""
@@ -78,8 +88,8 @@ def print_errors(errors, verbose):
def check(filepath, filetype, strains, count, verbose=False):
"""Check the file and print out results"""
- if verbose:
- updater = progress_indicator
+ updater = make_progress_indicator(
+ verbose, make_progress_calculator(os.stat(filepath).st_size))
if count > 0:
return print_errors(