aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-06-10 08:06:47 +0300
committerFrederick Muriuki Muriithi2022-06-10 08:06:47 +0300
commit557d1d5c19ab518fa7abb3229c6d9042867e6c00 (patch)
tree97b2ca39fe49600da74dbfa9ca358fa97f114a92
parent35a9cf67a9e055925f11a12c9fb964e5dbeb5525 (diff)
downloadgn-uploader-557d1d5c19ab518fa7abb3229c6d9042867e6c00.tar.gz
Enable upload of zipfiles
-rw-r--r--qc_app/entry.py96
-rw-r--r--qc_app/templates/index.html2
-rw-r--r--quality_control/parsing.py13
-rw-r--r--scripts/worker.py8
4 files changed, 95 insertions, 24 deletions
diff --git a/qc_app/entry.py b/qc_app/entry.py
index b7b4b6f..25e2eed 100644
--- a/qc_app/entry.py
+++ b/qc_app/entry.py
@@ -1,5 +1,10 @@
"""Entry-point module"""
import os
+import random
+import string
+import mimetypes
+from typing import Tuple
+from zipfile import ZipFile, is_zipfile
from werkzeug.utils import secure_filename
from flask import (
@@ -13,38 +18,87 @@ from flask import (
entrybp = Blueprint("entry", __name__)
+def errors(request) -> Tuple[str, ...]:
+ """Return a tuple of the errors found in the `request`. If no error is
+ found, then an empty tuple is returned."""
+ def __filetype_error__():
+ return (
+ ("Invalid file type provided.",)
+ if request.form["filetype"] not in ("average", "standard-error")
+ else tuple())
+
+ def __file_missing_error__():
+ return (
+ ("No file was uploaded.",)
+ if ("qc_text_file" not in request.files or
+ request.files["qc_text_file"].filename == "")
+ else tuple())
+
+ def __file_mimetype_error__():
+ text_file = request.files["qc_text_file"]
+ return (
+ (
+ ("Invalid file! Expected a tab-separated-values file, or a zip "
+ "file of the a tab-separated-values file."),)
+ if text_file.mimetype
+ not in ("text/tab-separated-values", "application/zip")
+ else tuple())
+
+ return (
+ __filetype_error__() +
+ (__file_missing_error__() or __file_mimetype_error__()))
+
+def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
+ """Check the uploaded zip file for errors."""
+ zfile_errors = ("Fail always!!",)
+ if is_zipfile(filepath):
+ zfile = ZipFile(filepath, "r")
+ infolist = zfile.infolist()
+ if len(infolist) != 1:
+ zfile_errors = zfile_errors + (
+ ("Expected exactly one (1) member file within the uploaded zip "
+ "file. Got {len(infolist)} member files."))
+ if len(infolist) == 1 and infolist[0].is_dir():
+ zfile_errors = zfile_errors + (
+ ("Expected a member text file in the uploaded zip file. Got a "
+ "directory/folder."))
+
+ if len(infolist) == 1 and not infolist[0].is_dir():
+ zfile.extract(infolist[0], path=upload_dir)
+ mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+ if mime[0] != "text/tab-separated-values":
+ zfile_errors = zfile_errors + (
+ ("Expected the member text file in the uploaded zip file to"
+ " be a tab-separated file."))
+
+ return zfile_errors
+
@entrybp.route("/", methods=["GET", "POST"])
def upload_file():
"""Enables uploading the files"""
+ upload_dir = app.config["UPLOAD_FOLDER"]
if request.method == "GET":
return render_template("index.html")
- errors = False
- if request.form["filetype"] not in ("average", "standard-error"):
- flash("Invalid file type provided.", "alert-error")
- errors = True
+ request_errors = errors(request)
+ if request_errors:
+ for error in request_errors:
+ flash(error, "alert-error")
+ return render_template("index.html")
- if ("qc_text_file" not in request.files or
- request.files["qc_text_file"].filename == ""):
- flash("No file was uploaded.", "alert-error")
- errors = True
+ filename = secure_filename(request.files["qc_text_file"].filename)
+ if not os.path.exists(upload_dir):
+ os.mkdir(upload_dir)
- text_file = request.files["qc_text_file"]
- if text_file.mimetype != "text/tab-separated-values":
- flash("Invalid file! Expected a tab-separated-values file.",
- "alert-error")
- errors = True
+ filepath = os.path.join(upload_dir, filename)
+ request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
- if errors:
+ zip_errors = zip_file_errors(filepath, upload_dir)
+ if zip_errors:
+ for error in zip_errors:
+ flash(error, "alert-error")
return render_template("index.html")
- filename = secure_filename(text_file.filename)
- if not os.path.exists(app.config["UPLOAD_FOLDER"]):
- os.mkdir(app.config["UPLOAD_FOLDER"])
-
- filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
- text_file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
-
return redirect(url_for(
"parse.parse", filename=filename,
filetype=request.form["filetype"]))
diff --git a/qc_app/templates/index.html b/qc_app/templates/index.html
index 28aaa7f..b14f3d4 100644
--- a/qc_app/templates/index.html
+++ b/qc_app/templates/index.html
@@ -31,7 +31,7 @@
<label for="file_upload">select file</label>
<input type="file" name="qc_text_file" id="file_upload"
- accept="text/tab-separated-values" />
+ accept="text/tab-separated-values, application/zip" />
<input type="submit" value="upload file" class="btn btn-main" />
</form>
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index 9f8e8ee..f1f4f79 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -4,6 +4,7 @@ import os
import collections
from enum import Enum
from functools import partial
+from zipfile import ZipFile, is_zipfile
from typing import Iterable, Generator, Callable, Optional
import quality_control.average as avg
@@ -79,11 +80,21 @@ def collect_errors(
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
- with open(filepath, encoding="utf-8") as input_file:
+ def __open_file__(filepath):
+ if not is_zipfile(filepath):
+ return open(filepath, encoding="utf-8")
+
+ with ZipFile(filepath, "r") as zfile:
+ return zfile.open(zfile.infolist()[0], "r")
+
+ with __open_file__(filepath) as input_file:
for line_number, line in enumerate(input_file, start=1):
if user_aborted():
break
+ if isinstance(line, bytes):
+ line = line.decode("utf-8")
+
if line_number == 1:
for error in __process_errors__(
line_number, line, partial(header_errors, strains=strains),
diff --git a/scripts/worker.py b/scripts/worker.py
index 0ef5ae5..ecdfaa2 100644
--- a/scripts/worker.py
+++ b/scripts/worker.py
@@ -1,6 +1,7 @@
import os
import sys
from typing import Callable
+from zipfile import Path, ZipFile, is_zipfile
import jsonpickle
from redis import Redis
@@ -64,6 +65,10 @@ def make_user_aborted(redis_conn, job_id):
return user_aborted
return __aborted__
+def get_zipfile_size(filepath):
+ with ZipFile(filepath, "r") as zfile:
+ return zfile.infolist()[0].file_size
+
def main():
args = process_cli_arguments()
if args is None:
@@ -72,7 +77,8 @@ def main():
with Redis.from_url(args.redisurl) as redis_conn:
progress_calculator = make_progress_calculator(
- os.stat(args.filepath).st_size)
+ get_zipfile_size(args.filepath) if is_zipfile(args.filepath)
+ else os.stat(args.filepath).st_size)
progress_indicator = make_progress_indicator(
redis_conn, args.job_id, progress_calculator)
count = args.count