aboutsummaryrefslogtreecommitdiff
path: root/quality_control
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control')
-rw-r--r--quality_control/parsing.py12
-rw-r--r--quality_control/utils.py21
2 files changed, 24 insertions, 9 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 655b98a..5b1809b 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -65,7 +65,7 @@ def se_errors(line_number, fields):
def collect_errors(
filepath: str, filetype: FileType, strains: list,
- updater: Union[Callable, None] = None) -> Generator:
+ update_progress: Union[Callable, None] = None) -> Generator:
"""Run checks against file and collect all the errors"""
errors = tuple()
def __process_errors__(line_number, line, error_checker_fn, errors = tuple()):
@@ -78,8 +78,6 @@ def collect_errors(
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
- filesize = os.stat(filepath).st_size
- processed_size = 0
with open(filepath, encoding="utf-8") as input_file:
for line_number, line in enumerate(input_file, start=1):
if line_number == 1:
@@ -96,12 +94,8 @@ def collect_errors(
errors):
yield error
- processed_size = processed_size + len(line)
- if updater:
- updater({
- "line_number": line_number,
- "percent": (processed_size/filesize) * 100
- })
+ if update_progress:
+ update_progress(line_number, line)
def take(iterable: Iterable, num: int) -> list:
"""Take at most `num` items from `iterable`."""
diff --git a/quality_control/utils.py b/quality_control/utils.py
new file mode 100644
index 0000000..0072608
--- /dev/null
+++ b/quality_control/utils.py
@@ -0,0 +1,21 @@
+"""Utilities that might be useful elsewhere."""
+
+from collections import namedtuple
+
+ProgressIndicator = namedtuple(
+ "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent"))
+
+def make_progress_calculator(filesize: int):
+ """
+ Returns a function that takes two arguments, `linenumber` and `linetext` and
+ return a `ProgressIndicator` object with the computed progress.
+ """
+ processedsize = 0
+ def __calculator__(linenumber: int, linetext: str) -> ProgressIndicator:
+ nonlocal processedsize
+ processedsize = processedsize + len(linetext)
+ return ProgressIndicator(
+ filesize, processedsize, linenumber,
+ ((processedsize/filesize) * 100))
+
+ return __calculator__