about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--quality_control/parsing.py12
-rw-r--r--quality_control/utils.py21
-rw-r--r--scripts/qc.py20
3 files changed, 39 insertions, 14 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 655b98a..5b1809b 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -65,7 +65,7 @@ def se_errors(line_number, fields):
 
 def collect_errors(
         filepath: str, filetype: FileType, strains: list,
-        updater: Union[Callable, None] = None) -> Generator:
+        update_progress: Union[Callable, None] = None) -> Generator:
     """Run checks against file and collect all the errors"""
     errors = tuple()
     def __process_errors__(line_number, line, error_checker_fn, errors = tuple()):
@@ -78,8 +78,6 @@ def collect_errors(
             return errors + tuple(error for error in errs if error is not None)
         return errors + (errs,)
 
-    filesize = os.stat(filepath).st_size
-    processed_size = 0
     with open(filepath, encoding="utf-8") as input_file:
         for line_number, line in enumerate(input_file, start=1):
             if line_number == 1:
@@ -96,12 +94,8 @@ def collect_errors(
                         errors):
                     yield error
 
-            processed_size = processed_size + len(line)
-            if updater:
-                updater({
-                    "line_number": line_number,
-                    "percent": (processed_size/filesize) * 100
-                })
+            if update_progress:
+                update_progress(line_number, line)
 
 def take(iterable: Iterable, num: int) -> list:
     """Take at most `num` items from `iterable`."""
diff --git a/quality_control/utils.py b/quality_control/utils.py
new file mode 100644
index 0000000..0072608
--- /dev/null
+++ b/quality_control/utils.py
@@ -0,0 +1,21 @@
+"""Utilities that might be useful elsewhere."""
+
+from collections import namedtuple
+
+ProgressIndicator = namedtuple(
+    "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent"))
+
+def make_progress_calculator(filesize: int):
+    """
+    Returns a function that takes two arguments, `linenumber` and `linetext` and
+    return a `ProgressIndicator` object with the computed progress.
+    """
+    processedsize = 0
+    def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator:
+        nonlocal processedsize
+        processedsize = processedsize + len(linetext)
+        return ProgressIndicator(
+            filesize, processedsize, linenumber,
+            ((processedsize/filesize) * 100))
+
+    return __calculator__
diff --git a/scripts/qc.py b/scripts/qc.py
index 9d57f4b..9bad55e 100644
--- a/scripts/qc.py
+++ b/scripts/qc.py
@@ -2,10 +2,12 @@
 import os
 import sys
 import argparse
+from typing import Union, Callable
 
 import magic
 
 from quality_control.errors import InvalidValue
+from quality_control.utils import make_progress_calculator
 from quality_control.parsing import (
     take,
     FileType,
@@ -54,10 +56,18 @@ def cli_argument_parser():
         default=False, action="store_true")
     return parser
 
-def progress_indicator(msg):
+def make_progress_indicator(
+        verbose: bool, progress_calc_fn: Callable) -> Union[Callable, None]:
     """Utility to display the progress"""
-    print(f"LINE: {msg['line_number']} ({msg['percent']:.2f}%)", end="\r")
-    return msg
+    if not verbose:
+        return None
+
+    def __indicator__(linenumber, linetext):
+        msg = progress_calc_fn(linenumber, linetext)
+        print(f"LINE: {msg.currentline} ({msg.percent:.2f}%)", end="\r")
+        return msg
+
+    return __indicator__
 
 def print_errors(errors, verbose):
     """Print out the errors"""
@@ -78,8 +88,8 @@ def print_errors(errors, verbose):
 
 def check(filepath, filetype, strains, count, verbose=False):
     """Check the file and print out results"""
-    if verbose:
-        updater = progress_indicator
+    updater = make_progress_indicator(
+        verbose, make_progress_calculator(os.stat(filepath).st_size))
 
     if count > 0:
         return print_errors(