aboutsummaryrefslogtreecommitdiff
path: root/quality_control
diff options
context:
space:
mode:
Diffstat (limited to 'quality_control')
-rw-r--r--quality_control/file_utils.py13
-rw-r--r--quality_control/parsing.py11
2 files changed, 15 insertions, 9 deletions
diff --git a/quality_control/file_utils.py b/quality_control/file_utils.py
new file mode 100644
index 0000000..fdce1e1
--- /dev/null
+++ b/quality_control/file_utils.py
@@ -0,0 +1,13 @@
+"Common file utilities"
+from typing import Union
+from pathlib import Path
+from io import TextIOWrapper
+from zipfile import ZipFile, is_zipfile
+
+def open_file(filepath: Union[str, Path]) -> Union[ZipFile, TextIOWrapper]:
+ "Transparently open both TSV and ZIP files"
+ if not is_zipfile(filepath):
+ return open(filepath, encoding="utf-8")
+
+ with ZipFile(filepath, "r") as zfile:
+ return zfile.open(zfile.infolist()[0], "r")
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 28a311e..5fc5f62 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -2,10 +2,10 @@
import collections
from enum import Enum
from functools import partial
-from zipfile import ZipFile, is_zipfile
from typing import Tuple, Union, Iterable, Generator, Callable, Optional
import quality_control.average as avg
+from quality_control.file_utils import open_file
import quality_control.standard_error as se
from quality_control.errors import (
InvalidValue, DuplicateHeading, InconsistentColumns)
@@ -92,14 +92,7 @@ def collect_errors(
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
- def __open_file__(filepath):
- if not is_zipfile(filepath):
- return open(filepath, encoding="utf-8")
-
- with ZipFile(filepath, "r") as zfile:
- return zfile.open(zfile.infolist()[0], "r")
-
- with __open_file__(filepath) as input_file:
+ with open_file(filepath) as input_file:
for line_number, line in enumerate(input_file, start=1):
if user_aborted():
break