about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-06-27 16:40:52 -0500
committerFrederick Muriuki Muriithi2024-06-27 16:40:52 -0500
commit47c2ea64682064d7cb609e5459d7bd2e49efa17e (patch)
treeb067922e2609188cdc00319aa9883210e656891f
parent59b345294cda9cf25b20ae7bfd617f62655ad6da (diff)
downloadgn-uploader-47c2ea64682064d7cb609e5459d7bd2e49efa17e.tar.gz
Handle chunks on backend and update UI with progress
Handle the uploaded chunks, enabling resumption of the upload if
incomplete.

Update the UI with the progress of the upload.
-rw-r--r--qc_app/templates/rqtl2/upload-rqtl2-bundle-step-01.html48
-rw-r--r--qc_app/upload/rqtl2.py160
2 files changed, 175 insertions, 33 deletions
diff --git a/qc_app/templates/rqtl2/upload-rqtl2-bundle-step-01.html b/qc_app/templates/rqtl2/upload-rqtl2-bundle-step-01.html
index b9320ea..c6c79e5 100644
--- a/qc_app/templates/rqtl2/upload-rqtl2-bundle-step-01.html
+++ b/qc_app/templates/rqtl2/upload-rqtl2-bundle-step-01.html
@@ -58,13 +58,13 @@
             class="btn btn-danger"
             style="display: none">cancel upload</button>
   </div>
-  <div class="progress" style="display: none">
+  <div id="resumable-progress-bar" class="progress" style="display: none">
     <div class="progress-bar"
          role="progress-bar"
          aria-valuenow="60"
          aria-valuemin="0"
          aria-valuemax="100"
-         style="width: 60%;">
+         style="width: 0%;">
       Uploading: 60%
     </div>
   </div>
@@ -76,7 +76,10 @@
 	      population_id=population.InbredSetId)}}"
       method="POST"
       enctype="multipart/form-data"
-      data-resumable-action="/no/such/endpoint">
+      data-resumable-target="{{url_for(
+                             'upload.rqtl2.upload_rqtl2_bundle_chunked_post',
+                             species_id=species.SpeciesId,
+                             population_id=population.InbredSetId)}}">
   <input type="hidden" name="species_id" value="{{species.SpeciesId}}" />
   <input type="hidden" name="population_id"
 	 value="{{population.InbredSetId}}" />
@@ -138,7 +141,7 @@
   }
 
   var r = Resumable({
-      target: $("#frm-upload-rqtl2-bundle").attr("data-resumable-action"),
+      target: $("#frm-upload-rqtl2-bundle").attr("data-resumable-target"),
       fileType: ["zip"],
       maxFiles: 1,
       forceChunkSize: true,
@@ -164,7 +167,8 @@
       r.assignBrowse(document.getElementById("resumable-browse-button"));
 
       // Event handlers
-      r.on("filesAdded", function(files) {
+
+      function display_files(files) {
           displayArea = $("#resumable-selected-files")
           displayArea.empty();
           files.forEach((file) => {
@@ -190,10 +194,14 @@
                             + "</li>"));
               displayElement.find(".panel-body").append(list);
               displayElement.appendTo("#resumable-selected-files");
-              $("#resumable-upload-button").css("display", "");
-              $("#resumable-upload-button").on("click", (event) => {
-                  r.upload()
-              });
+          });
+      }
+
+      r.on("filesAdded", function(files) {
+          display_files(files);
+          $("#resumable-upload-button").css("display", "");
+          $("#resumable-upload-button").on("click", (event) => {
+              r.upload();
           });
       });
 
@@ -207,9 +215,31 @@
                   }
               });
               $("#resumable-cancel-upload-button").css("display", "none");
+              $("#resumable-upload-button").on("click", (event) => {
+                  r.files.forEach((file) => {file.retry();});
+              });
               $("#resumable-upload-button").css("display", "");
           });
       });
+
+      r.on("progress", () => {
+          var progress = (r.progress() * 100).toFixed(2);
+          var pbar = $("#resumable-progress-bar > .progress-bar");
+          $("#resumable-progress-bar").css("display", "");
+          pbar.css("width", progress+"%");
+          pbar.attr("aria-valuenow", progress);
+          pbar.text("Uploading: " + progress + "%");
+      })
+
+      r.on("fileSuccess", (file, message) => {
+          if(message != "OK") {
+              var uri = (window.location.protocol
+                         + "//"
+                         + window.location.host
+                         + message);
+              window.location.replace(uri);
+          }
+      });
   } else {
       setup_upload_handlers(
           "frm-upload-rqtl2-bundle", make_data_uploader(
diff --git a/qc_app/upload/rqtl2.py b/qc_app/upload/rqtl2.py
index e79f1db..4ff7ba3 100644
--- a/qc_app/upload/rqtl2.py
+++ b/qc_app/upload/rqtl2.py
@@ -12,6 +12,8 @@ from typing import Union, Callable, Optional
 import MySQLdb as mdb
 from redis import Redis
 from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
+from werkzeug.exceptions import NotFound, BadRequest
 from flask import (
     flash,
     escape,
@@ -178,30 +180,140 @@ def upload_rqtl2_bundle(species_id: int, population_id: int):
             app.logger.debug("The file is not a zip file.")
             raise __RequestError__("Invalid file! Expected a zip file.")
 
-        redisuri = app.config["REDIS_URL"]
-        with Redis.from_url(redisuri, decode_responses=True) as rconn:
-            jobid = str(uuid4())
-            redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
-            jobs.launch_job(
-                jobs.initialise_job(
-                    rconn,
-                    jobs.jobsnamespace(),
-                    jobid,
-                    [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle",
-                     app.config["SQL_URI"], app.config["REDIS_URL"],
-                     jobs.jobsnamespace(), jobid, "--redisexpiry",
-                     str(redis_ttl_seconds)],
-                    "rqtl2-bundle-qc-job",
-                    redis_ttl_seconds,
-                    {"job-metadata": json.dumps({
-                        "speciesid": species_id,
-                        "populationid": population_id,
-                        "rqtl2-bundle-file": str(the_file.absolute()),
-                        "original-filename": request.files["rqtl2_bundle_file"].filename})}),
-                redisuri,
-                f"{app.config['UPLOAD_FOLDER']}/job_errors")
-            return redirect(url_for(
-                "upload.rqtl2.rqtl2_bundle_qc_status", jobid=jobid))
+        jobid = trigger_rqtl2_bundle_qc(
+            species_id,
+            population_id,
+            the_file,
+            request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type]
+        return redirect(url_for(
+            "upload.rqtl2.rqtl2_bundle_qc_status", jobid=jobid))
+
+
+def trigger_rqtl2_bundle_qc(
+        species_id: int,
+        population_id: int,
+        rqtl2bundle: Path,
+        originalfilename: str
+) -> UUID:
+    """Trigger QC on the R/qtl2 bundle."""
+    redisuri = app.config["REDIS_URL"]
+    with Redis.from_url(redisuri, decode_responses=True) as rconn:
+        jobid = uuid4()
+        redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
+        jobs.launch_job(
+            jobs.initialise_job(
+                rconn,
+                jobs.jobsnamespace(),
+                str(jobid),
+                [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle",
+                 app.config["SQL_URI"], app.config["REDIS_URL"],
+                 jobs.jobsnamespace(), str(jobid), "--redisexpiry",
+                 str(redis_ttl_seconds)],
+                "rqtl2-bundle-qc-job",
+                redis_ttl_seconds,
+                {"job-metadata": json.dumps({
+                    "speciesid": species_id,
+                    "populationid": population_id,
+                    "rqtl2-bundle-file": str(rqtl2bundle.absolute()),
+                    "original-filename": originalfilename})}),
+            redisuri,
+            f"{app.config['UPLOAD_FOLDER']}/job_errors")
+        return jobid
+
+
+def chunk_name(uploadfilename: str, chunkno: int) -> str:
+    """Generate chunk name from original filename and chunk number"""
+    if uploadfilename == "":
+        raise ValueError("Name cannot be empty!")
+    if chunkno < 1:
+        raise ValueError("Chunk number must be greater than zero")
+    return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}"
+
+
+def chunks_directory(uniqueidentifier: str) -> Path:
+    """Compute the directory where chunks are temporarily stored."""
+    if uniqueidentifier == "":
+        raise ValueError("Unique identifier cannot be empty!")
+    return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}")
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle-chunked"),
+             methods=["GET"])
+def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"]
+        species_id: int,
+        population_id: int
+):
+    """
+    Extension to the `upload_rqtl2_bundle` endpoint above that provides a way
+    for testing whether all the chunks have been uploaded and to assist with
+    resuming a failed upload.
+    """
+    fileid = request.args.get("resumableIdentifier", type=str) or ""
+    filename = request.args.get("resumableFilename", type=str) or ""
+    chunk = request.args.get("resumableChunkNumber", type=int) or 0
+    if not(fileid or filename or chunk):
+        raise BadRequest("At least one required query parameter is missing.")
+
+    if Path(chunks_directory(fileid),
+            chunk_name(filename, chunk)).exists():
+        return "OK"
+
+    raise NotFound(description=f"Chunk {chunk} was not found.")
+
+
+def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path:
+    """Merge the chunks into a single file."""
+    with open(targetfile, "ab") as _target:
+        for chunkfile in chunkpaths:
+            with open(chunkfile, "rb") as _chunkdata:
+                _target.write(_chunkdata.read())
+
+            chunkfile.unlink()
+    return targetfile
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle-chunked"),
+             methods=["POST"])
+def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int):
+    """
+    Extension to the `upload_rqtl2_bundle` endpoint above that allows large
+    files to be uploaded in chunks.
+
+    This should hopefully speed up uploads, and if done right, even enable
+    resumable uploads
+    """
+    _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0
+    _chunk = request.form.get("resumableChunkNumber", default=1, type=int)
+    _uploadfilename = request.form.get(
+        "resumableFilename", default="", type=str) or ""
+    _fileid = request.form.get(
+        "resumableIdentifier", default="", type=str) or ""
+    _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid)
+
+    if _targetfile.exists():
+        raise BadRequest("The file has already been uploaded.")
+
+    # save chunk data
+    chunks_directory(_fileid).mkdir(exist_ok=True)
+    request.files["file"].save(Path(chunks_directory(_fileid),
+                                    chunk_name(_uploadfilename, _chunk)))
+
+    # Check whether upload is complete
+    chunkpaths = tuple(
+        Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk))
+        for _achunk in range(1, _totalchunks+1))
+    if all(_file.exists() for _file in chunkpaths):
+        # merge_files and clean up chunks
+        __merge_chunks__(_targetfile, chunkpaths)
+        chunks_directory(_fileid).rmdir()
+        jobid = trigger_rqtl2_bundle_qc(
+            species_id, population_id, _targetfile, _uploadfilename)
+        return url_for(
+            "upload.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)
+
+    return "OK"
 
 
 @rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>",