aboutsummaryrefslogtreecommitdiff
path: root/quality_control/parsing.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-06-10 08:06:47 +0300
committerFrederick Muriuki Muriithi2022-06-10 08:06:47 +0300
commit557d1d5c19ab518fa7abb3229c6d9042867e6c00 (patch)
tree97b2ca39fe49600da74dbfa9ca358fa97f114a92 /quality_control/parsing.py
parent35a9cf67a9e055925f11a12c9fb964e5dbeb5525 (diff)
downloadgn-uploader-557d1d5c19ab518fa7abb3229c6d9042867e6c00.tar.gz
Enable upload of zipfiles
Diffstat (limited to 'quality_control/parsing.py')
-rw-r--r--quality_control/parsing.py13
1 files changed, 12 insertions, 1 deletions
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 9f8e8ee..f1f4f79 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -4,6 +4,7 @@ import os
import collections
from enum import Enum
from functools import partial
+from zipfile import ZipFile, is_zipfile
from typing import Iterable, Generator, Callable, Optional
import quality_control.average as avg
@@ -79,11 +80,21 @@ def collect_errors(
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
- with open(filepath, encoding="utf-8") as input_file:
+ def __open_file__(filepath):
+ if not is_zipfile(filepath):
+ return open(filepath, encoding="utf-8")
+
+ with ZipFile(filepath, "r") as zfile:
+ return zfile.open(zfile.infolist()[0], "r")
+
+ with __open_file__(filepath) as input_file:
for line_number, line in enumerate(input_file, start=1):
if user_aborted():
break
+ if isinstance(line, bytes):
+ line = line.decode("utf-8")
+
if line_number == 1:
for error in __process_errors__(
line_number, line, partial(header_errors, strains=strains),