diff options
Diffstat (limited to 'uploader/files')
-rw-r--r-- | uploader/files/__init__.py | 4 | ||||
-rw-r--r-- | uploader/files/chunks.py | 32 | ||||
-rw-r--r-- | uploader/files/functions.py | 39 | ||||
-rw-r--r-- | uploader/files/views.py | 97 |
4 files changed, 172 insertions, 0 deletions
diff --git a/uploader/files/__init__.py b/uploader/files/__init__.py new file mode 100644 index 0000000..60d2f3b --- /dev/null +++ b/uploader/files/__init__.py @@ -0,0 +1,4 @@ +from .chunks import chunked_binary_read +from .functions import (fullpath, + save_file, + sha256_digest_over_file) diff --git a/uploader/files/chunks.py b/uploader/files/chunks.py new file mode 100644 index 0000000..c4360b5 --- /dev/null +++ b/uploader/files/chunks.py @@ -0,0 +1,32 @@ +"""Functions dealing with chunking of files.""" +from pathlib import Path +from typing import Iterator + +from flask import current_app as app +from werkzeug.utils import secure_filename + + +def chunked_binary_read(filepath: Path, chunksize: int = 2048) -> Iterator: + """Read a file in binary mode in chunks.""" + with open(filepath, "rb") as inputfile: + while True: + data = inputfile.read(chunksize) + if data != b"": + yield data + continue + break + +def chunk_name(uploadfilename: str, chunkno: int) -> str: + """Generate chunk name from original filename and chunk number""" + if uploadfilename == "": + raise ValueError("Name cannot be empty!") + if chunkno < 1: + raise ValueError("Chunk number must be greater than zero") + return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}" + + +def chunks_directory(uniqueidentifier: str) -> Path: + """Compute the directory where chunks are temporarily stored.""" + if uniqueidentifier == "": + raise ValueError("Unique identifier cannot be empty!") + return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}") diff --git a/uploader/files/functions.py b/uploader/files/functions.py new file mode 100644 index 0000000..5a3dece --- /dev/null +++ b/uploader/files/functions.py @@ -0,0 +1,39 @@ +"""Utilities to deal with uploaded files.""" +import hashlib +from pathlib import Path +from datetime import datetime + +from flask import current_app + +from werkzeug.utils import secure_filename +from werkzeug.datastructures import FileStorage + +from .chunks import chunked_binary_read + +def save_file(fileobj: FileStorage, upload_dir: Path) -> Path: + """Save the uploaded file and return the path.""" + assert bool(fileobj), "Invalid file object!" + hashed_name = hashlib.sha512( + f"{fileobj.filename}::{datetime.now().isoformat()}".encode("utf8") + ).hexdigest() + filename = Path(secure_filename(hashed_name)) # type: ignore[arg-type] + if not upload_dir.exists(): + upload_dir.mkdir() + + filepath = Path(upload_dir, filename) + fileobj.save(filepath) + return filepath + + +def fullpath(filename: str): + """Get a file's full path. This makes use of `flask.current_app`.""" + return Path(current_app.config["UPLOAD_FOLDER"], filename).absolute() + + +def sha256_digest_over_file(filepath: Path) -> str: + """Compute the sha256 digest over a file's contents.""" + filehash = hashlib.sha256() + for chunk in chunked_binary_read(filepath): + filehash.update(chunk) + + return filehash.hexdigest() diff --git a/uploader/files/views.py b/uploader/files/views.py new file mode 100644 index 0000000..cd5f00f --- /dev/null +++ b/uploader/files/views.py @@ -0,0 +1,97 @@ +"""Module for generic files endpoints.""" +import traceback +from pathlib import Path + +from flask import request, jsonify, Blueprint, current_app as app + +from .chunks import chunk_name, chunks_directory + +files = Blueprint("files", __name__) + +@files.route("/upload/resumable", methods=["GET"]) +def resumable_upload_get(): + """Used for checking whether **ALL** chunks have been uploaded.""" + fileid = request.args.get("resumableIdentifier", type=str) or "" + filename = request.args.get("resumableFilename", type=str) or "" + chunk = request.args.get("resumableChunkNumber", type=int) or 0 + if not(fileid or filename or chunk): + return jsonify({ + "message": "At least one required query parameter is missing.", + "error": "BadRequest", + "statuscode": 400 + }), 400 + + if Path(chunks_directory(fileid), + chunk_name(filename, chunk)).exists(): + return "OK" + + return jsonify({ + "message": f"Chunk {chunk} was not found.", + "error": "NotFound", + "statuscode": 404 + }), 404 + + +def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: + """Merge the chunks into a single file.""" + with open(targetfile, "ab") as _target: + for chunkfile in chunkpaths: + with open(chunkfile, "rb") as _chunkdata: + _target.write(_chunkdata.read()) + + chunkfile.unlink() + return targetfile + + +@files.route("/upload/resumable", methods=["POST"]) +def resumable_upload_post(): + """Do the actual chunks upload here.""" + _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0 + _chunk = request.form.get("resumableChunkNumber", default=1, type=int) + _uploadfilename = request.form.get( + "resumableFilename", default="", type=str) or "" + _fileid = request.form.get( + "resumableIdentifier", default="", type=str) or "" + _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid) + + if _targetfile.exists(): + return jsonify({ + "message": ( + "A file with a similar unique identifier has previously been " + "uploaded and possibly is/has being/been processed."), + "error": "BadRequest", + "statuscode": 400 + }), 400 + + try: + chunks_directory(_fileid).mkdir(exist_ok=True, parents=True) + request.files["file"].save(Path(chunks_directory(_fileid), + chunk_name(_uploadfilename, _chunk))) + + # Check whether upload is complete + chunkpaths = tuple( + Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) + for _achunk in range(1, _totalchunks+1)) + if all(_file.exists() for _file in chunkpaths): + # merge_files and clean up chunks + __merge_chunks__(_targetfile, chunkpaths) + chunks_directory(_fileid).rmdir() + return jsonify({ + "uploaded-file": _targetfile.name, + "message": "File was uploaded successfully!", + "statuscode": 200 + }), 200 + return jsonify({ + "message": "Some chunks were not uploaded!", + "error": "ChunksUploadError", + "error-description": "Some chunks were not uploaded!" + }) + except Exception as exc:# pylint: disable=[broad-except] + msg = "Error processing uploaded file chunks." + app.logger.error(msg, exc_info=True, stack_info=True) + return jsonify({ + "message": msg, + "error": type(exc).__name__, + "error-description": " ".join(str(arg) for arg in exc.args), + "error-trace": traceback.format_exception(exc) + }), 500 |