diff options
Diffstat (limited to 'uploader/files')
| -rw-r--r-- | uploader/files/chunks.py | 4 | ||||
| -rw-r--r-- | uploader/files/functions.py | 4 | ||||
| -rw-r--r-- | uploader/files/views.py | 59 |
3 files changed, 57 insertions, 10 deletions
diff --git a/uploader/files/chunks.py b/uploader/files/chunks.py index c4360b5..f63f32f 100644 --- a/uploader/files/chunks.py +++ b/uploader/files/chunks.py @@ -5,6 +5,8 @@ from typing import Iterator from flask import current_app as app from werkzeug.utils import secure_filename +from uploader.configutils import uploads_dir + def chunked_binary_read(filepath: Path, chunksize: int = 2048) -> Iterator: """Read a file in binary mode in chunks.""" @@ -29,4 +31,4 @@ def chunks_directory(uniqueidentifier: str) -> Path: """Compute the directory where chunks are temporarily stored.""" if uniqueidentifier == "": raise ValueError("Unique identifier cannot be empty!") - return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}") + return Path(uploads_dir(app), f"tempdir_{uniqueidentifier}") diff --git a/uploader/files/functions.py b/uploader/files/functions.py index 7b9f06b..68f4e16 100644 --- a/uploader/files/functions.py +++ b/uploader/files/functions.py @@ -8,6 +8,8 @@ from flask import current_app from werkzeug.utils import secure_filename from werkzeug.datastructures import FileStorage +from uploader.configutils import uploads_dir + from .chunks import chunked_binary_read def save_file(fileobj: FileStorage, upload_dir: Path, hashed: bool = True) -> Path: @@ -30,7 +32,7 @@ def save_file(fileobj: FileStorage, upload_dir: Path, hashed: bool = True) -> Pa def fullpath(filename: str): """Get a file's full path. This makes use of `flask.current_app`.""" - return Path(current_app.config["UPLOAD_FOLDER"], filename).absolute() + return Path(uploads_dir(current_app), filename).absolute() def sha256_digest_over_file(filepath: Path) -> str: diff --git a/uploader/files/views.py b/uploader/files/views.py index ddf5350..ea0e827 100644 --- a/uploader/files/views.py +++ b/uploader/files/views.py @@ -1,16 +1,20 @@ """Module for generic files endpoints.""" +import time +import random import traceback from pathlib import Path from flask import request, jsonify, Blueprint, current_app as app +from uploader.configutils import uploads_dir + from .chunks import chunk_name, chunks_directory files = Blueprint("files", __name__) def target_file(fileid: str) -> Path: """Compute the full path for the target file.""" - return Path(app.config["UPLOAD_FOLDER"], fileid) + return Path(uploads_dir(app), fileid) @files.route("/upload/resumable", methods=["GET"]) @@ -56,10 +60,13 @@ def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: """Merge the chunks into a single file.""" with open(targetfile, "ab") as _target: for chunkfile in chunkpaths: + app.logger.error("Merging chunk: %s", chunkfile) with open(chunkfile, "rb") as _chunkdata: _target.write(_chunkdata.read()) - chunkfile.unlink(missing_ok=True) + chunkfile.unlink() # Don't use `missing_ok=True` — chunk MUST exist + # If chunk does't exist, it might indicate a race condition. Handle + # that instead. return targetfile @@ -92,15 +99,51 @@ def resumable_upload_post(): Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) for _achunk in range(1, _totalchunks+1)) if all(_file.exists() for _file in chunkpaths): - # merge_files and clean up chunks - __merge_chunks__(_targetfile, chunkpaths) - chunks_directory(_fileid).rmdir() + ### HACK: Break possible race condition ### + # Looks like sometimes, there are multiple threads/requests trying + # to merge one file, leading to race conditions and in some rare + # instances, actual data corruption. This hack is meant to break + # that race condition. + _delays = ( + 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, + 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, + 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293) + _lockfile = Path(chunks_directory(_fileid), "merge.lock") + while True: + time.sleep(random.choice(_delays) / 1000) + if (chunks_directory(_fileid).exists() + and not (_lockfile.exists() and _targetfile.exists())): + # merge_files and clean up chunks + _lockfile.touch() + __merge_chunks__(_targetfile, chunkpaths) + _lockfile.unlink() + chunks_directory(_fileid).rmdir() + continue + + if (_targetfile.exists() + and not ( + chunks_directory(_fileid).exists() + and _lockfile.exists())): + # merge complete + break + + # There is still a thread that's merging this file + continue + ### END: HACK: Break possible race condition ### + + if _targetfile.exists(): + return jsonify({ + "uploaded-file": _targetfile.name, + "original-name": _uploadfilename, + "message": "File was uploaded successfully!", + "statuscode": 200 + }), 200 return jsonify({ "uploaded-file": _targetfile.name, "original-name": _uploadfilename, - "message": "File was uploaded successfully!", - "statuscode": 200 - }), 200 + "message": "Uploaded file is missing!", + "statuscode": 404 + }), 404 return jsonify({ "message": f"Chunk {int(_chunk)} uploaded successfully.", "statuscode": 201 |
