diff options
author | Frederick Muriuki Muriithi | 2022-06-10 08:06:47 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-06-10 08:06:47 +0300 |
commit | 557d1d5c19ab518fa7abb3229c6d9042867e6c00 (patch) | |
tree | 97b2ca39fe49600da74dbfa9ca358fa97f114a92 | |
parent | 35a9cf67a9e055925f11a12c9fb964e5dbeb5525 (diff) | |
download | gn-uploader-557d1d5c19ab518fa7abb3229c6d9042867e6c00.tar.gz |
Enable upload of zipfiles
-rw-r--r-- | qc_app/entry.py | 96 | ||||
-rw-r--r-- | qc_app/templates/index.html | 2 | ||||
-rw-r--r-- | quality_control/parsing.py | 13 | ||||
-rw-r--r-- | scripts/worker.py | 8 |
4 files changed, 95 insertions, 24 deletions
diff --git a/qc_app/entry.py b/qc_app/entry.py index b7b4b6f..25e2eed 100644 --- a/qc_app/entry.py +++ b/qc_app/entry.py @@ -1,5 +1,10 @@ """Entry-point module""" import os +import random +import string +import mimetypes +from typing import Tuple +from zipfile import ZipFile, is_zipfile from werkzeug.utils import secure_filename from flask import ( @@ -13,38 +18,87 @@ from flask import ( entrybp = Blueprint("entry", __name__) +def errors(request) -> Tuple[str, ...]: + """Return a tuple of the errors found in the `request`. If no error is + found, then an empty tuple is returned.""" + def __filetype_error__(): + return ( + ("Invalid file type provided.",) + if request.form["filetype"] not in ("average", "standard-error") + else tuple()) + + def __file_missing_error__(): + return ( + ("No file was uploaded.",) + if ("qc_text_file" not in request.files or + request.files["qc_text_file"].filename == "") + else tuple()) + + def __file_mimetype_error__(): + text_file = request.files["qc_text_file"] + return ( + ( + ("Invalid file! Expected a tab-separated-values file, or a zip " + "file of the a tab-separated-values file."),) + if text_file.mimetype + not in ("text/tab-separated-values", "application/zip") + else tuple()) + + return ( + __filetype_error__() + + (__file_missing_error__() or __file_mimetype_error__())) + +def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]: + """Check the uploaded zip file for errors.""" + zfile_errors = ("Fail always!!",) + if is_zipfile(filepath): + zfile = ZipFile(filepath, "r") + infolist = zfile.infolist() + if len(infolist) != 1: + zfile_errors = zfile_errors + ( + ("Expected exactly one (1) member file within the uploaded zip " + "file. Got {len(infolist)} member files.")) + if len(infolist) == 1 and infolist[0].is_dir(): + zfile_errors = zfile_errors + ( + ("Expected a member text file in the uploaded zip file. Got a " + "directory/folder.")) + + if len(infolist) == 1 and not infolist[0].is_dir(): + zfile.extract(infolist[0], path=upload_dir) + mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}") + if mime[0] != "text/tab-separated-values": + zfile_errors = zfile_errors + ( + ("Expected the member text file in the uploaded zip file to" + " be a tab-separated file.")) + + return zfile_errors + @entrybp.route("/", methods=["GET", "POST"]) def upload_file(): """Enables uploading the files""" + upload_dir = app.config["UPLOAD_FOLDER"] if request.method == "GET": return render_template("index.html") - errors = False - if request.form["filetype"] not in ("average", "standard-error"): - flash("Invalid file type provided.", "alert-error") - errors = True + request_errors = errors(request) + if request_errors: + for error in request_errors: + flash(error, "alert-error") + return render_template("index.html") - if ("qc_text_file" not in request.files or - request.files["qc_text_file"].filename == ""): - flash("No file was uploaded.", "alert-error") - errors = True + filename = secure_filename(request.files["qc_text_file"].filename) + if not os.path.exists(upload_dir): + os.mkdir(upload_dir) - text_file = request.files["qc_text_file"] - if text_file.mimetype != "text/tab-separated-values": - flash("Invalid file! Expected a tab-separated-values file.", - "alert-error") - errors = True + filepath = os.path.join(upload_dir, filename) + request.files["qc_text_file"].save(os.path.join(upload_dir, filename)) - if errors: + zip_errors = zip_file_errors(filepath, upload_dir) + if zip_errors: + for error in zip_errors: + flash(error, "alert-error") return render_template("index.html") - filename = secure_filename(text_file.filename) - if not os.path.exists(app.config["UPLOAD_FOLDER"]): - os.mkdir(app.config["UPLOAD_FOLDER"]) - - filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename) - text_file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename)) - return redirect(url_for( "parse.parse", filename=filename, filetype=request.form["filetype"])) diff --git a/qc_app/templates/index.html b/qc_app/templates/index.html index 28aaa7f..b14f3d4 100644 --- a/qc_app/templates/index.html +++ b/qc_app/templates/index.html @@ -31,7 +31,7 @@ <label for="file_upload">select file</label> <input type="file" name="qc_text_file" id="file_upload" - accept="text/tab-separated-values" /> + accept="text/tab-separated-values, application/zip" /> <input type="submit" value="upload file" class="btn btn-main" /> </form> diff --git a/quality_control/parsing.py b/quality_control/parsing.py index 9f8e8ee..f1f4f79 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -4,6 +4,7 @@ import os import collections from enum import Enum from functools import partial +from zipfile import ZipFile, is_zipfile from typing import Iterable, Generator, Callable, Optional import quality_control.average as avg @@ -79,11 +80,21 @@ def collect_errors( return errors + tuple(error for error in errs if error is not None) return errors + (errs,) - with open(filepath, encoding="utf-8") as input_file: + def __open_file__(filepath): + if not is_zipfile(filepath): + return open(filepath, encoding="utf-8") + + with ZipFile(filepath, "r") as zfile: + return zfile.open(zfile.infolist()[0], "r") + + with __open_file__(filepath) as input_file: for line_number, line in enumerate(input_file, start=1): if user_aborted(): break + if isinstance(line, bytes): + line = line.decode("utf-8") + if line_number == 1: for error in __process_errors__( line_number, line, partial(header_errors, strains=strains), diff --git a/scripts/worker.py b/scripts/worker.py index 0ef5ae5..ecdfaa2 100644 --- a/scripts/worker.py +++ b/scripts/worker.py @@ -1,6 +1,7 @@ import os import sys from typing import Callable +from zipfile import Path, ZipFile, is_zipfile import jsonpickle from redis import Redis @@ -64,6 +65,10 @@ def make_user_aborted(redis_conn, job_id): return user_aborted return __aborted__ +def get_zipfile_size(filepath): + with ZipFile(filepath, "r") as zfile: + return zfile.infolist()[0].file_size + def main(): args = process_cli_arguments() if args is None: @@ -72,7 +77,8 @@ def main(): with Redis.from_url(args.redisurl) as redis_conn: progress_calculator = make_progress_calculator( - os.stat(args.filepath).st_size) + get_zipfile_size(args.filepath) if is_zipfile(args.filepath) + else os.stat(args.filepath).st_size) progress_indicator = make_progress_indicator( redis_conn, args.job_id, progress_calculator) count = args.count |