aboutsummaryrefslogtreecommitdiff
path: root/uploader/files
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-12 10:48:26 -0500
committerFrederick Muriuki Muriithi2025-05-12 10:52:58 -0500
commitbb16f9e7488d7e5b9138025d08e326b5588beff4 (patch)
treee36d94a5b273a25fa2073ae09becc5c2ccd35520 /uploader/files
parent5cedf63b71cb10d6183919f5a499c8bddb9fa602 (diff)
downloadgn-uploader-bb16f9e7488d7e5b9138025d08e326b5588beff4.tar.gz
Break "merge file" race-condition
Multiple requests/threads were attempting to merge the same file, leading to race conditions, and sometimes, rarely, it would lead to actual data corruption.
Diffstat (limited to 'uploader/files')
-rw-r--r--uploader/files/views.py54
1 files changed, 47 insertions, 7 deletions
diff --git a/uploader/files/views.py b/uploader/files/views.py
index cebd325..29059c7 100644
--- a/uploader/files/views.py
+++ b/uploader/files/views.py
@@ -1,4 +1,6 @@
"""Module for generic files endpoints."""
+import time
+import random
import traceback
from pathlib import Path
@@ -60,7 +62,9 @@ def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path:
with open(chunkfile, "rb") as _chunkdata:
_target.write(_chunkdata.read())
- chunkfile.unlink(missing_ok=True)
+ chunkfile.unlink() # Don't use `missing_ok=True` — chunk MUST exist
+ # If chunk does't exist, it might indicate a race condition. Handle
+ # that instead.
return targetfile
@@ -93,15 +97,51 @@ def resumable_upload_post():
Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk))
for _achunk in range(1, _totalchunks+1))
if all(_file.exists() for _file in chunkpaths):
- # merge_files and clean up chunks
- __merge_chunks__(_targetfile, chunkpaths)
- chunks_directory(_fileid).rmdir()
+ ### HACK: Break possible race condition ###
+ # Looks like sometimes, there are multiple threads/requests trying
+ # to merge one file, leading to race conditions and in some rare
+ # instances, actual data corruption. This hack is meant to break
+ # that race condition.
+ _delays = (
+ 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
+ 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233,
+ 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293)
+ _lockfile = Path(chunks_directory(_fileid), "merge.lock")
+ while True:
+ time.sleep(random.choice(_delays) / 1000)
+ if (chunks_directory(_fileid).exists()
+ and not (_lockfile.exists() and _targetfile.exists())):
+ # merge_files and clean up chunks
+ _lockfile.touch()
+ __merge_chunks__(_targetfile, chunkpaths)
+ _lockfile.unlink()
+ chunks_directory(_fileid).rmdir()
+ continue
+
+ if (_targetfile.exists()
+ and not (
+ chunks_directory(_fileid).exists()
+ and _lockfile.exists())):
+ # merge complete
+ break
+
+ # There is still a thread that's merging this file
+ continue
+ ### END: HACK: Break possible race condition ###
+
+ if _targetfile.exists():
+ return jsonify({
+ "uploaded-file": _targetfile.name,
+ "original-name": _uploadfilename,
+ "message": "File was uploaded successfully!",
+ "statuscode": 200
+ }), 200
return jsonify({
"uploaded-file": _targetfile.name,
"original-name": _uploadfilename,
- "message": "File was uploaded successfully!",
- "statuscode": 200
- }), 200
+ "message": "Uploaded file is missing!",
+ "statuscode": 404
+ }), 404
return jsonify({
"message": f"Chunk {int(_chunk)} uploaded successfully.",
"statuscode": 201