diff options
Diffstat (limited to 'quality_control')
-rw-r--r-- | quality_control/average.py | 15 | ||||
-rw-r--r-- | quality_control/headers.py | 5 | ||||
-rw-r--r-- | quality_control/parsing.py | 12 | ||||
-rw-r--r-- | quality_control/standard_error.py | 15 | ||||
-rw-r--r-- | quality_control/utils.py | 12 |
5 files changed, 31 insertions, 28 deletions
diff --git a/quality_control/average.py b/quality_control/average.py index 06b0a47..2b098db 100644 --- a/quality_control/average.py +++ b/quality_control/average.py @@ -1,19 +1,16 @@ """Contain logic for checking average files""" -import re from typing import Union +from .utils import cell_error from .errors import InvalidValue def invalid_value(line_number: int, column_number: int, val: str) -> Union[ InvalidValue, None]: """Return an `InvalidValue` object if `val` is not a valid "averages" value.""" - if re.search(r"^[0-9]+\.[0-9]{3}$", val): - return None - if re.search(r"^0\.0+$", val) or re.search("^0+$", val): - return None - return InvalidValue( - line_number, column_number, val, ( + return cell_error( + r"^[0-9]+\.[0-9]{3}$", val, line=line_number, column=column_number, + value=val, message=( f"Invalid value '{val}'. " - "Expected string representing a number with exactly three decimal " - "places.")) + "Expected string representing a number with exactly three " + "decimal places.")) diff --git a/quality_control/headers.py b/quality_control/headers.py index 79d7e43..f4f4dad 100644 --- a/quality_control/headers.py +++ b/quality_control/headers.py @@ -27,14 +27,15 @@ def invalid_headings( enumerate(headings, start=2) if header not in strains) def duplicate_headings( - line_number: int, headers: Sequence[str]) -> Union[InvalidValue, None]: + line_number: int, + headers: Sequence[str]) -> Tuple[DuplicateHeading, ...]: """Return a tuple of `DuplicateHeading` objects for each column heading that is a duplicate of another column heading.""" def __update_columns__(acc, item): if item[1] in acc.keys(): return {**acc, item[1]: acc[item[1]] + (item[0],)} return {**acc, item[1]: (item[0],)} - repeated = { + repeated = {# type: ignore[var-annotated] heading: columns for heading, columns in reduce(__update_columns__, enumerate(headers, start=1), {}).items() if len(columns) > 1 diff --git a/quality_control/parsing.py b/quality_control/parsing.py index f1f4f79..ba22e0c 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -1,15 +1,13 @@ """Module handling the high-level parsing of the files""" - -import os import collections from enum import Enum from functools import partial from zipfile import ZipFile, is_zipfile -from typing import Iterable, Generator, Callable, Optional +from typing import Tuple, Union, Iterable, Generator, Callable, Optional import quality_control.average as avg import quality_control.standard_error as se -from quality_control.errors import InvalidValue +from quality_control.errors import InvalidValue, DuplicateHeading from quality_control.headers import ( invalid_header, invalid_headings, duplicate_headings) @@ -67,16 +65,16 @@ def se_errors(line_number, fields): def collect_errors( filepath: str, filetype: FileType, strains: list, update_progress: Optional[Callable] = None, - user_aborted: Optional[Callable] = lambda: False) -> Generator: + user_aborted: Callable = lambda: False) -> Generator: """Run checks against file and collect all the errors""" - errors = tuple() + errors:Tuple[Union[InvalidValue, DuplicateHeading], ...] = tuple() def __process_errors__(line_number, line, error_checker_fn, errors = tuple()): errs = error_checker_fn( line_number, tuple(field.strip() for field in line.split("\t"))) if errs is None: return errors - if isinstance(errs, collections.Sequence): + if isinstance(errs, collections.abc.Sequence): return errors + tuple(error for error in errs if error is not None) return errors + (errs,) diff --git a/quality_control/standard_error.py b/quality_control/standard_error.py index aa7df3c..7e059ad 100644 --- a/quality_control/standard_error.py +++ b/quality_control/standard_error.py @@ -1,7 +1,7 @@ """Contain logic for checking standard error files""" -import re from typing import Union +from .utils import cell_error from .errors import InvalidValue def invalid_value( @@ -12,12 +12,9 @@ def invalid_value( `val` is not a valid input for standard error files, otherwise, it returns `None`. """ - if re.search(r"^[0-9]+\.[0-9]{6,}$", val): - return None - if re.search(r"^0\.0+$", val) or re.search("^0+$", val): - return None - return InvalidValue( - line_number, column_number, val, ( + return cell_error( + r"^[0-9]+\.[0-9]{6,}$", val, line=line_number, column=column_number, + value=val, message=( f"Invalid value '{val}'. " - "Expected string representing a number with at least six decimal " - "places.")) + "Expected string representing a number with at least six " + "decimal places.")) diff --git a/quality_control/utils.py b/quality_control/utils.py index 0072608..67301d6 100644 --- a/quality_control/utils.py +++ b/quality_control/utils.py @@ -1,7 +1,9 @@ """Utilities that might be useful elsewhere.""" - +import re from collections import namedtuple +from .errors import InvalidValue + ProgressIndicator = namedtuple( "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent")) @@ -19,3 +21,11 @@ def make_progress_calculator(filesize: int): ((processedsize/filesize) * 100)) return __calculator__ + +def cell_error(pattern, val, **error_kwargs): + "Return the error in the cell" + if re.search(pattern, val): + return None + if re.search(r"^0\.0+$", val) or re.search("^0+$", val): + return None + return InvalidValue(**error_kwargs) |