aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--quality_control/parsing.py12
-rw-r--r--quality_control/utils.py21
-rw-r--r--scripts/qc.py20
3 files changed, 39 insertions, 14 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 655b98a..5b1809b 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -65,7 +65,7 @@ def se_errors(line_number, fields):
def collect_errors(
filepath: str, filetype: FileType, strains: list,
- updater: Union[Callable, None] = None) -> Generator:
+ update_progress: Union[Callable, None] = None) -> Generator:
"""Run checks against file and collect all the errors"""
errors = tuple()
def __process_errors__(line_number, line, error_checker_fn, errors = tuple()):
@@ -78,8 +78,6 @@ def collect_errors(
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
- filesize = os.stat(filepath).st_size
- processed_size = 0
with open(filepath, encoding="utf-8") as input_file:
for line_number, line in enumerate(input_file, start=1):
if line_number == 1:
@@ -96,12 +94,8 @@ def collect_errors(
errors):
yield error
- processed_size = processed_size + len(line)
- if updater:
- updater({
- "line_number": line_number,
- "percent": (processed_size/filesize) * 100
- })
+ if update_progress:
+ update_progress(line_number, line)
def take(iterable: Iterable, num: int) -> list:
"""Take at most `num` items from `iterable`."""
diff --git a/quality_control/utils.py b/quality_control/utils.py
new file mode 100644
index 0000000..0072608
--- /dev/null
+++ b/quality_control/utils.py
@@ -0,0 +1,21 @@
+"""Utilities that might be useful elsewhere."""
+
+from collections import namedtuple
+
+ProgressIndicator = namedtuple(
+ "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent"))
+
+def make_progress_calculator(filesize: int):
+ """
+ Returns a function that takes two arguments, `linenumber` and `linetext` and
+ return a `ProgressIndicator` object with the computed progress.
+ """
+ processedsize = 0
+ def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator:
+ nonlocal processedsize
+ processedsize = processedsize + len(linetext)
+ return ProgressIndicator(
+ filesize, processedsize, linenumber,
+ ((processedsize/filesize) * 100))
+
+ return __calculator__
diff --git a/scripts/qc.py b/scripts/qc.py
index 9d57f4b..9bad55e 100644
--- a/scripts/qc.py
+++ b/scripts/qc.py
@@ -2,10 +2,12 @@
import os
import sys
import argparse
+from typing import Union, Callable
import magic
from quality_control.errors import InvalidValue
+from quality_control.utils import make_progress_calculator
from quality_control.parsing import (
take,
FileType,
@@ -54,10 +56,18 @@ def cli_argument_parser():
default=False, action="store_true")
return parser
-def progress_indicator(msg):
+def make_progress_indicator(
+ verbose: bool, progress_calc_fn: Callable) -> Union[Callable, None]:
"""Utility to display the progress"""
- print(f"LINE: {msg['line_number']} ({msg['percent']:.2f}%)", end="\r")
- return msg
+ if not verbose:
+ return None
+
+ def __indicator__(linenumber, linetext):
+ msg = progress_calc_fn(linenumber, linetext)
+ print(f"LINE: {msg.currentline} ({msg.percent:.2f}%)", end="\r")
+ return msg
+
+ return __indicator__
def print_errors(errors, verbose):
"""Print out the errors"""
@@ -78,8 +88,8 @@ def print_errors(errors, verbose):
def check(filepath, filetype, strains, count, verbose=False):
"""Check the file and print out results"""
- if verbose:
- updater = progress_indicator
+ updater = make_progress_indicator(
+ verbose, make_progress_calculator(os.stat(filepath).st_size))
if count > 0:
return print_errors(