diff options
author | Frederick Muriuki Muriithi | 2025-05-12 10:48:26 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-05-12 10:52:58 -0500 |
commit | bb16f9e7488d7e5b9138025d08e326b5588beff4 (patch) | |
tree | e36d94a5b273a25fa2073ae09becc5c2ccd35520 /uploader/files | |
parent | 5cedf63b71cb10d6183919f5a499c8bddb9fa602 (diff) | |
download | gn-uploader-bb16f9e7488d7e5b9138025d08e326b5588beff4.tar.gz |
Break "merge file" race-condition
Multiple requests/threads were attempting to merge the same file,
leading to race conditions, and sometimes, rarely, it would lead to
actual data corruption.
Diffstat (limited to 'uploader/files')
-rw-r--r-- | uploader/files/views.py | 54 |
1 files changed, 47 insertions, 7 deletions
diff --git a/uploader/files/views.py b/uploader/files/views.py index cebd325..29059c7 100644 --- a/uploader/files/views.py +++ b/uploader/files/views.py @@ -1,4 +1,6 @@ """Module for generic files endpoints.""" +import time +import random import traceback from pathlib import Path @@ -60,7 +62,9 @@ def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: with open(chunkfile, "rb") as _chunkdata: _target.write(_chunkdata.read()) - chunkfile.unlink(missing_ok=True) + chunkfile.unlink() # Don't use `missing_ok=True` — chunk MUST exist + # If chunk does't exist, it might indicate a race condition. Handle + # that instead. return targetfile @@ -93,15 +97,51 @@ def resumable_upload_post(): Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) for _achunk in range(1, _totalchunks+1)) if all(_file.exists() for _file in chunkpaths): - # merge_files and clean up chunks - __merge_chunks__(_targetfile, chunkpaths) - chunks_directory(_fileid).rmdir() + ### HACK: Break possible race condition ### + # Looks like sometimes, there are multiple threads/requests trying + # to merge one file, leading to race conditions and in some rare + # instances, actual data corruption. This hack is meant to break + # that race condition. + _delays = ( + 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, + 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, + 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293) + _lockfile = Path(chunks_directory(_fileid), "merge.lock") + while True: + time.sleep(random.choice(_delays) / 1000) + if (chunks_directory(_fileid).exists() + and not (_lockfile.exists() and _targetfile.exists())): + # merge_files and clean up chunks + _lockfile.touch() + __merge_chunks__(_targetfile, chunkpaths) + _lockfile.unlink() + chunks_directory(_fileid).rmdir() + continue + + if (_targetfile.exists() + and not ( + chunks_directory(_fileid).exists() + and _lockfile.exists())): + # merge complete + break + + # There is still a thread that's merging this file + continue + ### END: HACK: Break possible race condition ### + + if _targetfile.exists(): + return jsonify({ + "uploaded-file": _targetfile.name, + "original-name": _uploadfilename, + "message": "File was uploaded successfully!", + "statuscode": 200 + }), 200 return jsonify({ "uploaded-file": _targetfile.name, "original-name": _uploadfilename, - "message": "File was uploaded successfully!", - "statuscode": 200 - }), 200 + "message": "Uploaded file is missing!", + "statuscode": 404 + }), 404 return jsonify({ "message": f"Chunk {int(_chunk)} uploaded successfully.", "statuscode": 201 |