about summary refs log tree commit diff
path: root/quality_control
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-07-18 18:47:11 +0300
committerFrederick Muriuki Muriithi2022-07-19 05:10:32 +0300
commitc52570a4069abb6b8953e486adb326392ce6714c (patch)
tree260ebe385a1c9b8a1782765fa3072f59bfa3555b /quality_control
parent044184ef28a091519b7632d582387c26bf1543ea (diff)
downloadgn-uploader-c52570a4069abb6b8953e486adb326392ce6714c.tar.gz
Implement data insertion (averages/means)
* quality_control/file_utils.py (new file): extract common file
  utilities.
* quality_control/parsing.py (refactor): extract common file
  utilities.
* scripts/insert_data.py: Implement data insertion for averages/means
Diffstat (limited to 'quality_control')
-rw-r--r--quality_control/file_utils.py13
-rw-r--r--quality_control/parsing.py11
2 files changed, 15 insertions, 9 deletions
diff --git a/quality_control/file_utils.py b/quality_control/file_utils.py
new file mode 100644
index 0000000..fdce1e1
--- /dev/null
+++ b/quality_control/file_utils.py
@@ -0,0 +1,13 @@
+"Common file utilities"
+from typing import Union
+from pathlib import Path
+from io import TextIOWrapper
+from zipfile import ZipFile, is_zipfile
+
+def open_file(filepath: Union[str, Path]) -> Union[ZipFile, TextIOWrapper]:
+    "Transparently open both TSV and ZIP files"
+    if not is_zipfile(filepath):
+        return open(filepath, encoding="utf-8")
+
+    with ZipFile(filepath, "r") as zfile:
+        return zfile.open(zfile.infolist()[0], "r")
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 28a311e..5fc5f62 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -2,10 +2,10 @@
 import collections
 from enum import Enum
 from functools import partial
-from zipfile import ZipFile, is_zipfile
 from typing import Tuple, Union, Iterable, Generator, Callable, Optional
 
 import quality_control.average as avg
+from quality_control.file_utils import open_file
 import quality_control.standard_error as se
 from quality_control.errors import (
     InvalidValue, DuplicateHeading, InconsistentColumns)
@@ -92,14 +92,7 @@ def collect_errors(
             return errors + tuple(error for error in errs if error is not None)
         return errors + (errs,)
 
-    def __open_file__(filepath):
-        if not is_zipfile(filepath):
-            return open(filepath, encoding="utf-8")
-
-        with ZipFile(filepath, "r") as zfile:
-            return zfile.open(zfile.infolist()[0], "r")
-
-    with __open_file__(filepath) as input_file:
+    with open_file(filepath) as input_file:
         for line_number, line in enumerate(input_file, start=1):
             if user_aborted():
                 break