From 3da6848381b6103fbb58eeab8d7051cba0bded58 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 4 Aug 2022 08:51:15 +0300 Subject: Implement data insertion - Hook up external data insertion script to webserver code - Provide rudimentary status indication - Generalise some job creation details --- qc_app/dbinsert.py | 43 +++++++++++++++++++++++++++++++- qc_app/jobs.py | 39 +++++++++++++++++++++++------ qc_app/templates/final_confirmation.html | 2 +- qc_app/templates/insert_progress.html | 24 ++++++++++++++++++ scripts/worker.py | 2 +- 5 files changed, 99 insertions(+), 11 deletions(-) create mode 100644 qc_app/templates/insert_progress.html diff --git a/qc_app/dbinsert.py b/qc_app/dbinsert.py index 0b87c20..70aeb6d 100644 --- a/qc_app/dbinsert.py +++ b/qc_app/dbinsert.py @@ -5,7 +5,6 @@ from typing import Union from functools import reduce from datetime import datetime -import requests from redis import Redis from MySQLdb.cursors import DictCursor from flask import ( @@ -359,3 +358,45 @@ def final_confirmation(): "DataScale"))) except AssertionError as aserr: return render_error(f"Missing data: {aserr.args[0]}") + +@dbinsertbp.route("/insert-data", methods=["POST"]) +def insert_data(): + "Trigger data insertion" + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + assert form.get("studyid"), "study" + assert form.get("datasetid"), "dataset" + + filename = form["filename"] + filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}" + redisurl = app.config["REDIS_URL"] + if os.path.exists(filepath): + with Redis.from_url(redisurl, decode_responses=True) as rconn: + job = jobs.launch_job( + jobs.data_insertion_job( + rconn, filepath, form.get("filetype"), + form.get("species"), form.get("genechipid"), + form.get("datasetid"), app.config["SQL_URI"], redisurl, + app.config["JOBS_TTL_SECONDS"]), + redisurl, f"{app.config['UPLOAD_FOLDER']}/job_errors") + + return redirect(url_for("dbinsert.insert_status", job_id=job["job_id"])) + return render_error(f"File '{filename}' no longer exists.") + except AssertionError as aserr: + return render_error(f"Missing data: {aserr.args[0]}") + +@dbinsertbp.route("/status/", methods=["GET"]) +def insert_status(job_id: str): + "Retrieve status of data insertion." + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + job = jobs.job(rconn, job_id) + + if job: + if job["status"] == "success": + return render_template("insert_success.html", job=job) + return render_template("insert_progress.html", job=job) + return render_template("no_such_job.html", job_id=job_id), 400 diff --git a/qc_app/jobs.py b/qc_app/jobs.py index 406874a..9b350b7 100644 --- a/qc_app/jobs.py +++ b/qc_app/jobs.py @@ -11,6 +11,18 @@ def error_filename(job_id, error_dir): "Compute the path of the file where errors will be dumped." return f"{error_dir}/job_{job_id}.error" +def __init_job__(# pylint: disable=[too-many-arguments] + redis_conn: Redis, job_id: str, command: list, job_type: str, + ttl_seconds: int, extra_meta: dict) -> dict: + "Initialise a job 'object' and put in on redis" + the_job = { + "job_id": job_id, "command": shlex.join(command), "status": "pending", + "percent": 0, "job-type": job_type, **extra_meta + } + redis_conn.hset(name=the_job["job_id"], mapping=the_job) + redis_conn.expire(name=the_job["job_id"], time=timedelta(seconds=ttl_seconds)) + return the_job + def build_file_verification_job( redis_conn: Redis, filepath: str, filetype: str, redisurl: str, ttl_seconds: int): @@ -20,14 +32,25 @@ def build_file_verification_job( "python3", "-m", "scripts.validate_file", filetype, filepath, redisurl, job_id ] - the_job = { - "job_id": job_id, "command": shlex.join(command), "status": "pending", - "filename": os.path.basename(filepath), "percent": 0, - "filetype": filetype, "job-type": "file-verification" - } - redis_conn.hset(name=the_job["job_id"], mapping=the_job) - redis_conn.expire(name=the_job["job_id"], time=timedelta(seconds=ttl_seconds)) - return the_job + return __init_job__( + redis_conn, job_id, command, "file-verification", ttl_seconds, { + "filetype": filetype, + "filename": os.path.basename(filepath), "percent": 0 + }) + +def data_insertion_job(# pylint: disable=[too-many-arguments] + redis_conn: Redis, filepath: str, filetype: str, speciesid: int, + platformid: int, datasetid: int, databaseuri: str, redisuri: str, + ttl_seconds: int) -> dict: + "Build a data insertion job" + command = [ + "python3", "-m", "scripts.insert_data", filetype, filepath, speciesid, + platformid, datasetid, databaseuri, redisuri + ] + return __init_job__( + redis_conn, str(uuid4()), command, "data-insertion", ttl_seconds, { + "filename": os.path.basename(filepath), "filetype": filetype, + }) def launch_job(the_job: dict, redisurl: str, error_dir): """Launch a job in the background""" diff --git a/qc_app/templates/final_confirmation.html b/qc_app/templates/final_confirmation.html index f2de028..018f8d6 100644 --- a/qc_app/templates/final_confirmation.html +++ b/qc_app/templates/final_confirmation.html @@ -35,7 +35,7 @@ -
+ {{hidden_fields( filename, filetype, species=species, genechipid=genechipid, studyid=studyid,datasetid=datasetid)}} diff --git a/qc_app/templates/insert_progress.html b/qc_app/templates/insert_progress.html new file mode 100644 index 0000000..1ac6212 --- /dev/null +++ b/qc_app/templates/insert_progress.html @@ -0,0 +1,24 @@ +{%extends "base.html"%} + +{%block extrameta%} + +{%endblock%} + +{%block title%}Job Status{%endblock%} + +{%block contents%} +

{{job_name}}

+ + +{{job_status}}: {{message}}
+ +{{job}} + +{{job["percent"]}} +{{"%.2f" | format(job['percent'] | float)}}% + +
+ {{job.stderr}} +
+ +{%endblock%} diff --git a/scripts/worker.py b/scripts/worker.py index 879442a..03751d2 100644 --- a/scripts/worker.py +++ b/scripts/worker.py @@ -54,7 +54,7 @@ def run_job(job, rconn): os.remove(stderrpath) return process.poll() - except Exception as exc: + except Exception as exc:# pylint: disable=[broad-except,unused-variable] update_stdout_stderr( traceback.format_exc().encode("utf-8"), "stderr", rconn, job_id) print(traceback.format_exc(), file=sys.stderr) -- cgit v1.2.3