about summary refs log tree commit diff
path: root/uploader/files
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/files')
-rw-r--r--uploader/files/chunks.py4
-rw-r--r--uploader/files/functions.py4
-rw-r--r--uploader/files/views.py59
3 files changed, 57 insertions, 10 deletions
diff --git a/uploader/files/chunks.py b/uploader/files/chunks.py
index c4360b5..f63f32f 100644
--- a/uploader/files/chunks.py
+++ b/uploader/files/chunks.py
@@ -5,6 +5,8 @@ from typing import Iterator
 from flask import current_app as app
 from werkzeug.utils import secure_filename
 
+from uploader.configutils import uploads_dir
+
 
 def chunked_binary_read(filepath: Path, chunksize: int = 2048) -> Iterator:
     """Read a file in binary mode in chunks."""
@@ -29,4 +31,4 @@ def chunks_directory(uniqueidentifier: str) -> Path:
     """Compute the directory where chunks are temporarily stored."""
     if uniqueidentifier == "":
         raise ValueError("Unique identifier cannot be empty!")
-    return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}")
+    return Path(uploads_dir(app), f"tempdir_{uniqueidentifier}")
diff --git a/uploader/files/functions.py b/uploader/files/functions.py
index 7b9f06b..68f4e16 100644
--- a/uploader/files/functions.py
+++ b/uploader/files/functions.py
@@ -8,6 +8,8 @@ from flask import current_app
 from werkzeug.utils import secure_filename
 from werkzeug.datastructures import FileStorage
 
+from uploader.configutils import uploads_dir
+
 from .chunks import chunked_binary_read
 
 def save_file(fileobj: FileStorage, upload_dir: Path, hashed: bool = True) -> Path:
@@ -30,7 +32,7 @@ def save_file(fileobj: FileStorage, upload_dir: Path, hashed: bool = True) -> Pa
 
 def fullpath(filename: str):
     """Get a file's full path. This makes use of `flask.current_app`."""
-    return Path(current_app.config["UPLOAD_FOLDER"], filename).absolute()
+    return Path(uploads_dir(current_app), filename).absolute()
 
 
 def sha256_digest_over_file(filepath: Path) -> str:
diff --git a/uploader/files/views.py b/uploader/files/views.py
index ddf5350..ea0e827 100644
--- a/uploader/files/views.py
+++ b/uploader/files/views.py
@@ -1,16 +1,20 @@
 """Module for generic files endpoints."""
+import time
+import random
 import traceback
 from pathlib import Path
 
 from flask import request, jsonify, Blueprint, current_app as app
 
+from uploader.configutils import uploads_dir
+
 from .chunks import chunk_name, chunks_directory
 
 files = Blueprint("files", __name__)
 
 def target_file(fileid: str) -> Path:
     """Compute the full path for the target file."""
-    return Path(app.config["UPLOAD_FOLDER"], fileid)
+    return Path(uploads_dir(app), fileid)
 
 
 @files.route("/upload/resumable", methods=["GET"])
@@ -56,10 +60,13 @@ def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path:
     """Merge the chunks into a single file."""
     with open(targetfile, "ab") as _target:
         for chunkfile in chunkpaths:
+            app.logger.error("Merging chunk: %s", chunkfile)
             with open(chunkfile, "rb") as _chunkdata:
                 _target.write(_chunkdata.read())
 
-            chunkfile.unlink(missing_ok=True)
+            chunkfile.unlink() # Don't use `missing_ok=True` — chunk MUST exist
+            # If chunk does't exist, it might indicate a race condition. Handle
+            # that instead.
     return targetfile
 
 
@@ -92,15 +99,51 @@ def resumable_upload_post():
             Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk))
             for _achunk in range(1, _totalchunks+1))
         if all(_file.exists() for _file in chunkpaths):
-            # merge_files and clean up chunks
-            __merge_chunks__(_targetfile, chunkpaths)
-            chunks_directory(_fileid).rmdir()
+            ### HACK: Break possible race condition ###
+            # Looks like sometimes, there are multiple threads/requests trying
+            # to merge one file, leading to race conditions and in some rare
+            # instances, actual data corruption. This hack is meant to break
+            # that race condition.
+            _delays = (
+                101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
+                167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233,
+                239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293)
+            _lockfile = Path(chunks_directory(_fileid), "merge.lock")
+            while True:
+                time.sleep(random.choice(_delays) / 1000)
+                if (chunks_directory(_fileid).exists()
+                    and not (_lockfile.exists() and _targetfile.exists())):
+                    # merge_files and clean up chunks
+                    _lockfile.touch()
+                    __merge_chunks__(_targetfile, chunkpaths)
+                    _lockfile.unlink()
+                    chunks_directory(_fileid).rmdir()
+                    continue
+
+                if (_targetfile.exists()
+                    and not (
+                        chunks_directory(_fileid).exists()
+                        and _lockfile.exists())):
+                    # merge complete
+                    break
+
+                # There is still a thread that's merging this file
+                continue
+            ### END: HACK: Break possible race condition ###
+
+            if _targetfile.exists():
+                return jsonify({
+                    "uploaded-file": _targetfile.name,
+                    "original-name": _uploadfilename,
+                    "message": "File was uploaded successfully!",
+                    "statuscode": 200
+                }), 200
             return jsonify({
                 "uploaded-file": _targetfile.name,
                 "original-name": _uploadfilename,
-                "message": "File was uploaded successfully!",
-                "statuscode": 200
-            }), 200
+                "message": "Uploaded file is missing!",
+                "statuscode": 404
+            }), 404
         return jsonify({
             "message": f"Chunk {int(_chunk)} uploaded successfully.",
             "statuscode": 201