about summary refs log tree commit diff
path: root/quality_control
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control')
-rw-r--r--quality_control/parsing.py12
-rw-r--r--quality_control/utils.py21
2 files changed, 24 insertions, 9 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 655b98a..5b1809b 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -65,7 +65,7 @@ def se_errors(line_number, fields):
 
 def collect_errors(
         filepath: str, filetype: FileType, strains: list,
-        updater: Union[Callable, None] = None) -> Generator:
+        update_progress: Union[Callable, None] = None) -> Generator:
     """Run checks against file and collect all the errors"""
     errors = tuple()
     def __process_errors__(line_number, line, error_checker_fn, errors = tuple()):
@@ -78,8 +78,6 @@ def collect_errors(
             return errors + tuple(error for error in errs if error is not None)
         return errors + (errs,)
 
-    filesize = os.stat(filepath).st_size
-    processed_size = 0
     with open(filepath, encoding="utf-8") as input_file:
         for line_number, line in enumerate(input_file, start=1):
             if line_number == 1:
@@ -96,12 +94,8 @@ def collect_errors(
                         errors):
                     yield error
 
-            processed_size = processed_size + len(line)
-            if updater:
-                updater({
-                    "line_number": line_number,
-                    "percent": (processed_size/filesize) * 100
-                })
+            if update_progress:
+                update_progress(line_number, line)
 
 def take(iterable: Iterable, num: int) -> list:
     """Take at most `num` items from `iterable`."""
diff --git a/quality_control/utils.py b/quality_control/utils.py
new file mode 100644
index 0000000..0072608
--- /dev/null
+++ b/quality_control/utils.py
@@ -0,0 +1,21 @@
+"""Utilities that might be useful elsewhere."""
+
+from collections import namedtuple
+
+ProgressIndicator = namedtuple(
+    "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent"))
+
+def make_progress_calculator(filesize: int):
+    """
+    Returns a function that takes two arguments, `linenumber` and `linetext` and
+    return a `ProgressIndicator` object with the computed progress.
+    """
+    processedsize = 0
+    def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator:
+        nonlocal processedsize
+        processedsize = processedsize + len(linetext)
+        return ProgressIndicator(
+            filesize, processedsize, linenumber,
+            ((processedsize/filesize) * 100))
+
+    return __calculator__