From c52570a4069abb6b8953e486adb326392ce6714c Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 18 Jul 2022 18:47:11 +0300 Subject: Implement data insertion (averages/means) * quality_control/file_utils.py (new file): extract common file utilities. * quality_control/parsing.py (refactor): extract common file utilities. * scripts/insert_data.py: Implement data insertion for averages/means --- quality_control/file_utils.py | 13 +++++++++++++ quality_control/parsing.py | 11 ++--------- 2 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 quality_control/file_utils.py (limited to 'quality_control') diff --git a/quality_control/file_utils.py b/quality_control/file_utils.py new file mode 100644 index 0000000..fdce1e1 --- /dev/null +++ b/quality_control/file_utils.py @@ -0,0 +1,13 @@ +"Common file utilities" +from typing import Union +from pathlib import Path +from io import TextIOWrapper +from zipfile import ZipFile, is_zipfile + +def open_file(filepath: Union[str, Path]) -> Union[ZipFile, TextIOWrapper]: + "Transparently open both TSV and ZIP files" + if not is_zipfile(filepath): + return open(filepath, encoding="utf-8") + + with ZipFile(filepath, "r") as zfile: + return zfile.open(zfile.infolist()[0], "r") diff --git a/quality_control/parsing.py b/quality_control/parsing.py index 28a311e..5fc5f62 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -2,10 +2,10 @@ import collections from enum import Enum from functools import partial -from zipfile import ZipFile, is_zipfile from typing import Tuple, Union, Iterable, Generator, Callable, Optional import quality_control.average as avg +from quality_control.file_utils import open_file import quality_control.standard_error as se from quality_control.errors import ( InvalidValue, DuplicateHeading, InconsistentColumns) @@ -92,14 +92,7 @@ def collect_errors( return errors + tuple(error for error in errs if error is not None) return errors + (errs,) - def __open_file__(filepath): - if not is_zipfile(filepath): - return open(filepath, encoding="utf-8") - - with ZipFile(filepath, "r") as zfile: - return zfile.open(zfile.infolist()[0], "r") - - with __open_file__(filepath) as input_file: + with open_file(filepath) as input_file: for line_number, line in enumerate(input_file, start=1): if user_aborted(): break -- cgit v1.2.3