about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-06-10 08:06:47 +0300
committerFrederick Muriuki Muriithi2022-06-10 08:06:47 +0300
commit557d1d5c19ab518fa7abb3229c6d9042867e6c00 (patch)
tree97b2ca39fe49600da74dbfa9ca358fa97f114a92
parent35a9cf67a9e055925f11a12c9fb964e5dbeb5525 (diff)
downloadgn-uploader-557d1d5c19ab518fa7abb3229c6d9042867e6c00.tar.gz
Enable upload of zipfiles
-rw-r--r--qc_app/entry.py96
-rw-r--r--qc_app/templates/index.html2
-rw-r--r--quality_control/parsing.py13
-rw-r--r--scripts/worker.py8
4 files changed, 95 insertions, 24 deletions
diff --git a/qc_app/entry.py b/qc_app/entry.py
index b7b4b6f..25e2eed 100644
--- a/qc_app/entry.py
+++ b/qc_app/entry.py
@@ -1,5 +1,10 @@
 """Entry-point module"""
 import os
+import random
+import string
+import mimetypes
+from typing import Tuple
+from zipfile import ZipFile, is_zipfile
 
 from werkzeug.utils import secure_filename
 from flask import (
@@ -13,38 +18,87 @@ from flask import (
 
 entrybp = Blueprint("entry", __name__)
 
+def errors(request) -> Tuple[str, ...]:
+    """Return a tuple of the errors found in the `request`. If no error is
+    found, then an empty tuple is returned."""
+    def __filetype_error__():
+        return (
+            ("Invalid file type provided.",)
+            if request.form["filetype"] not in ("average", "standard-error")
+            else tuple())
+
+    def __file_missing_error__():
+        return (
+            ("No file was uploaded.",)
+            if ("qc_text_file" not in request.files or
+                request.files["qc_text_file"].filename == "")
+            else tuple())
+
+    def __file_mimetype_error__():
+        text_file = request.files["qc_text_file"]
+        return (
+            (
+                ("Invalid file! Expected a tab-separated-values file, or a zip "
+                 "file of the a tab-separated-values file."),)
+            if text_file.mimetype
+            not in ("text/tab-separated-values", "application/zip")
+            else tuple())
+
+    return (
+        __filetype_error__() +
+        (__file_missing_error__() or __file_mimetype_error__()))
+
+def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
+    """Check the uploaded zip file for errors."""
+    zfile_errors = ("Fail always!!",)
+    if is_zipfile(filepath):
+        zfile = ZipFile(filepath, "r")
+        infolist = zfile.infolist()
+        if len(infolist) != 1:
+            zfile_errors = zfile_errors + (
+                ("Expected exactly one (1) member file within the uploaded zip "
+                 "file. Got {len(infolist)} member files."))
+        if len(infolist) == 1 and infolist[0].is_dir():
+            zfile_errors = zfile_errors + (
+                ("Expected a member text file in the uploaded zip file. Got a "
+                 "directory/folder."))
+
+        if len(infolist) == 1 and not infolist[0].is_dir():
+            zfile.extract(infolist[0], path=upload_dir)
+            mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+            if mime[0] != "text/tab-separated-values":
+                zfile_errors = zfile_errors + (
+                    ("Expected the member text file in the uploaded zip file to"
+                     " be a tab-separated file."))
+
+    return zfile_errors
+
 @entrybp.route("/", methods=["GET", "POST"])
 def upload_file():
     """Enables uploading the files"""
+    upload_dir = app.config["UPLOAD_FOLDER"]
     if request.method == "GET":
         return render_template("index.html")
 
-    errors = False
-    if request.form["filetype"] not in ("average", "standard-error"):
-        flash("Invalid file type provided.", "alert-error")
-        errors = True
+    request_errors = errors(request)
+    if request_errors:
+        for error in request_errors:
+            flash(error, "alert-error")
+        return render_template("index.html")
 
-    if ("qc_text_file" not in request.files or
-        request.files["qc_text_file"].filename == ""):
-        flash("No file was uploaded.", "alert-error")
-        errors = True
+    filename = secure_filename(request.files["qc_text_file"].filename)
+    if not os.path.exists(upload_dir):
+        os.mkdir(upload_dir)
 
-    text_file = request.files["qc_text_file"]
-    if text_file.mimetype != "text/tab-separated-values":
-        flash("Invalid file! Expected a tab-separated-values file.",
-              "alert-error")
-        errors = True
+    filepath = os.path.join(upload_dir, filename)
+    request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
 
-    if errors:
+    zip_errors = zip_file_errors(filepath, upload_dir)
+    if zip_errors:
+        for error in zip_errors:
+            flash(error, "alert-error")
         return render_template("index.html")
 
-    filename = secure_filename(text_file.filename)
-    if not os.path.exists(app.config["UPLOAD_FOLDER"]):
-        os.mkdir(app.config["UPLOAD_FOLDER"])
-
-    filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
-    text_file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
-
     return redirect(url_for(
         "parse.parse", filename=filename,
         filetype=request.form["filetype"]))
diff --git a/qc_app/templates/index.html b/qc_app/templates/index.html
index 28aaa7f..b14f3d4 100644
--- a/qc_app/templates/index.html
+++ b/qc_app/templates/index.html
@@ -31,7 +31,7 @@
 
   <label for="file_upload">select file</label>
   <input type="file" name="qc_text_file" id="file_upload"
-	 accept="text/tab-separated-values" />
+	 accept="text/tab-separated-values, application/zip" />
 
   <input type="submit" value="upload file" class="btn btn-main" />
 </form>
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 9f8e8ee..f1f4f79 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -4,6 +4,7 @@ import os
 import collections
 from enum import Enum
 from functools import partial
+from zipfile import ZipFile, is_zipfile
 from typing import Iterable, Generator, Callable, Optional
 
 import quality_control.average as avg
@@ -79,11 +80,21 @@ def collect_errors(
             return errors + tuple(error for error in errs if error is not None)
         return errors + (errs,)
 
-    with open(filepath, encoding="utf-8") as input_file:
+    def __open_file__(filepath):
+        if not is_zipfile(filepath):
+            return open(filepath, encoding="utf-8")
+
+        with ZipFile(filepath, "r") as zfile:
+            return zfile.open(zfile.infolist()[0], "r")
+
+    with __open_file__(filepath) as input_file:
         for line_number, line in enumerate(input_file, start=1):
             if user_aborted():
                 break
 
+            if isinstance(line, bytes):
+                line = line.decode("utf-8")
+
             if line_number == 1:
                 for error in __process_errors__(
                         line_number, line, partial(header_errors, strains=strains),
diff --git a/scripts/worker.py b/scripts/worker.py
index 0ef5ae5..ecdfaa2 100644
--- a/scripts/worker.py
+++ b/scripts/worker.py
@@ -1,6 +1,7 @@
 import os
 import sys
 from typing import Callable
+from zipfile import Path, ZipFile, is_zipfile
 
 import jsonpickle
 from redis import Redis
@@ -64,6 +65,10 @@ def make_user_aborted(redis_conn, job_id):
         return user_aborted
     return __aborted__
 
+def get_zipfile_size(filepath):
+    with ZipFile(filepath, "r") as zfile:
+        return zfile.infolist()[0].file_size
+
 def main():
     args = process_cli_arguments()
     if args is None:
@@ -72,7 +77,8 @@ def main():
 
     with Redis.from_url(args.redisurl) as redis_conn:
         progress_calculator = make_progress_calculator(
-            os.stat(args.filepath).st_size)
+            get_zipfile_size(args.filepath) if is_zipfile(args.filepath)
+            else os.stat(args.filepath).st_size)
         progress_indicator = make_progress_indicator(
             redis_conn, args.job_id, progress_calculator)
         count = args.count