about summary refs log tree commit diff
path: root/qc_app/parse.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-04-28 15:32:35 +0300
committerFrederick Muriuki Muriithi2022-04-28 15:32:35 +0300
commit5632dcab27058875de99d63cbd263acfa3a9a2d5 (patch)
tree93cce3204086c14760c3e76497b8b106100b4a96 /qc_app/parse.py
parentdefc1cf0c1635f3262200a9ba25d8bd0c6fc0a93 (diff)
downloadgn-uploader-5632dcab27058875de99d63cbd263acfa3a9a2d5.tar.gz
Use sqlite3 to save the jobs metadata
* Use sqlite to save the jobs metadata and enable UI update of the
  progress for large files
Diffstat (limited to 'qc_app/parse.py')
-rw-r--r--qc_app/parse.py217
1 files changed, 139 insertions, 78 deletions
diff --git a/qc_app/parse.py b/qc_app/parse.py
index 3398918..795cc01 100644
--- a/qc_app/parse.py
+++ b/qc_app/parse.py
@@ -1,5 +1,6 @@
 """File parsing module"""
 import os
+import sqlite3
 from functools import reduce
 
 from redis import Redis
@@ -23,32 +24,41 @@ from quality_control.parsing import (
 
 parsebp = Blueprint("parse", __name__)
 
-def queued_parse(filepath, filetype, strainsfile, redis_url):
+def queued_parse(
+        filepath: str, filetype: FileType, strainsfile: str, redis_url: str,
+        dbpath: str):
     job = get_current_job()
+    job_id = job.get_id()
     with Redis.from_url(redis_url) as rconn:
-        jobs.update_meta(rconn, job, status = "in-progress", progress = 0)
-        job.save_meta()
+        dbconn = sqlite3.connect(dbpath)
         try:
+            job_meta = jobs.update_meta(
+                dbconn, job_id, status = "in-progress", progress = 0)
             parsed = parse_file(
                 filepath, filetype, strain_names(parse_strains(strainsfile)))
             for line, curr_size in parsed:
-                jobs.update_meta(
-                    rconn, job, progress = (curr_size/job.meta["filesize"]) * 100,
+                job_meta = jobs.update_meta(
+                    dbconn, job_id,
+                    progress=((curr_size/job_meta["filesize"]) * 100),
                     message = f"Parsed {curr_size} bytes")
-                print(f"Progress: {curr_size} bytes: {(curr_size/job.meta['filesize']) * 100}%")
 
             os.remove(filepath)
-            jobs.update_meta(
-                rconn, job, progress = 10, status = "success",
+            jobs_meta = jobs.update_meta(
+                dbconn, job_id, progress = 100, status = "success",
                 message = "no errors found")
         except ParseError as pe:
             pe_dict = pe.args[0]
-            jobs.update_meta(
-                rconn, job, status = "parse-error", results = {
+            job_meta = jobs.update_meta(
+                dbconn, job_id, status = "parse-error", results = {
                     "filename": os.path.basename(filepath), "filetype": filetype,
                     "position": pe_dict["position"],
                     "line_number": pe_dict["line_number"]
                 })
+        finally:
+            dbconn.close()
+
+def retrieve_dbpath():
+    return os.path.join(app.config["UPLOAD_FOLDER"], "jobs.db")
 
 @parsebp.route("/parse", methods=["GET"])
 def parse():
@@ -73,94 +83,145 @@ def parse():
     if errors:
         return redirect(url_for("entry.index"))
 
+    jobs.create_jobs_table(retrieve_dbpath())
     filetype = (
         FileType.AVERAGE if filetype == "average" else FileType.STANDARD_ERROR)
     job = jobs.enqueue_job(
         "qc_app.parse.queued_parse", filepath, filetype,
         os.path.join(os.getcwd(), "etc/strains.csv"), app.config["REDIS_URL"],
-        additional_jobs_meta = {
-            "filename": filename, "filesize": os.stat(filepath).st_size})
+        retrieve_dbpath())
+    try:
+        dbconn = sqlite3.connect(retrieve_dbpath())
+        jobs.update_meta(
+            dbconn, job.get_id(), filename=filename, filesize=os.stat(filepath).st_size,
+            status="enqueued", progress=0)
+    except Exception as exc:
+        import traceback
+        print(traceback.format_exc())
+        dbconn.rollback()
+    finally:
+        dbconn.close()
+
     return redirect(url_for("parse.parse_status", job_id=job.get_id()))
 
 @parsebp.route("/status/<job_id>", methods=["GET"])
-def parse_status(job_id):
-    job = jobs.job(job_id)
-    if job:
-        job_id = job.get_id()
-        progress = job.meta["progress"]
-        status = job.meta["status"]
-        filename = job.meta.get("filename", "uploaded file")
-        if status == "success":
-            return redirect(url_for("parse.results", job_id=job_id))
-
-        if status == "parse-error":
-            return redirect(url_for("parse.fail", job_id=job_id))
-
-        return render_template(
-            "job_progress.html",
-            job_id = job_id,
-            job_status = status,
-            progress = progress,
-            message = job.meta.get("message", ""),
-            job_name = f"Parsing '{filename}'")
-
-    return render_template("no_such_job.html", job_id=job_id)
+def parse_status(job_id: str):
+    try:
+        dbconn = sqlite3.connect(retrieve_dbpath(), timeout=10)
+        job = jobs.job(job_id)
+        if job:
+            job_id = job.get_id()
+            job_meta = jobs.retrieve_meta(dbconn, job_id)
+            progress = job_meta["progress"]
+            status = job_meta["status"]
+            filename = job_meta.get("filename", "uploaded file")
+            if status == "success":
+                return redirect(url_for("parse.results", job_id=job_id))
+
+            if status == "parse-error":
+                return redirect(url_for("parse.fail", job_id=job_id))
+
+            return render_template(
+                "job_progress.html",
+                job_id = job_id,
+                job_status = status,
+                progress = progress,
+                message = job_meta.get("message", ""),
+                job_name = f"Parsing '{filename}'")
+
+        return render_template("no_such_job.html", job_id=job_id)
+    except sqlite3.OperationalError:
+        return redirect(url_for("parse.parse_status", job_id=job_id))
+    except Exception as exc:
+        import traceback
+        print(traceback.format_exc())
+        return exc
+    finally:
+        dbconn.close()
 
 @parsebp.route("/results/<job_id>", methods=["GET"])
-def results(job_id):
+def results(job_id: str):
     """Show results of parsing..."""
     job = jobs.job(job_id)
     if job:
-        filename = job.meta["filename"]
-        errors = job.meta.get("errors", [])
-        return render_template(
-            "parse_results.html",
-            errors=errors,
-            job_name = f"Parsing '{filename}'",
-            starting_line_number=job.meta.get("starting_line_number", 0))
+        try:
+            dbconn = sqlite3.connect(retrieve_dbpath())
+            job_meta = jobs.retrieve_meta(dbconn, job_id)
+            filename = job_meta["filename"]
+            errors = job_meta.get("errors", [])
+            return render_template(
+                "parse_results.html",
+                errors=errors,
+                job_name = f"Parsing '{filename}'",
+                starting_line_number=job_meta.get("starting_line_number", 0))
+        except Exception as exc:
+            import traceback
+            print(traceback.format_exc())
+        finally:
+            dbconn.close()
 
     return render_template("no_such_job.html", job_id=job_id)
 
-def queued_collect_errors(filepath, filetype, strainsfile, redis_url, seek_pos=0):
+def queued_collect_errors(
+        filepath: str, filetype: FileType, strainsfile: str, redis_url: str,
+        dbpath: str, seek_pos: int = 0):
     job = get_current_job()
+    job_id = job.get_id()
     errors = []
     count = 0
     with Redis.from_url(redis_url) as rconn:
-        for error in parse_errors(
-                filepath, filetype, strain_names(parse_strains(strainsfile)),
-                seek_pos):
-            count = count + 1
-            jobs.update_meta(
-                rconn, job, message = f"Collected {count} errors", progress = (
-                    (error["position"] / job.meta["filesize"]) * 100))
-            errors.append(error)
-
-        jobs.update_meta(
-            rconn, job, errors = errors, progress = 100, status = "success")
+        try:
+            dbconn = sqlite3.connect(dbpath)
+            job_meta = jobs.retrieve_meta(dbconn, job.get_id())
+            for error in parse_errors(
+                    filepath, filetype, strain_names(parse_strains(strainsfile)),
+                    seek_pos):
+                count = count + 1
+                progress  = ((error["position"] / job_meta["filesize"]) * 100)
+                print(f"CURRENT PROGRESS: {progress}")
+                job_meta = jobs.update_meta(
+                    dbconn, job_id, message = f"Collected {count} errors",
+                    progress = progress)
+                errors.append(error)
+
+            job_meta = jobs.update_meta(
+                dbconn, job_id, errors = errors, progress = 100, status = "success")
+            os.remove(filepath)
+        except Exception as exc:
+            dbconn.rollback()
+        finally:
+            dbconn.close()
 
 @parsebp.route("/fail/<job_id>", methods=["GET"])
-def fail(job_id):
+def fail(job_id: str):
     """Handle parsing failure"""
-    old_job = jobs.job(job_id)
-    if old_job:
-        old_job.refresh()
-        job_id = old_job.get_id()
-        progress = old_job.meta.get("progress", 0)
-        status = old_job.meta["status"]
-        results = old_job.meta["results"]
-        filename = old_job.meta.get("filename", "uploaded file")
-
-        new_job = jobs.enqueue_job(
-            "qc_app.parse.queued_collect_errors",
-            os.path.join(
-                app.config["UPLOAD_FOLDER"], old_job.meta["filename"]),
-            results["filetype"], os.path.join(os.getcwd(), "etc/strains.csv"),
-            app.config["REDIS_URL"], results["position"],
-            additional_jobs_meta = {
-                "status": "Collecting Errors",
-                "filename": old_job.meta["filename"],
-                "filesize": old_job.meta["filesize"],
-                "starting_line_number": results["line_number"]})
-        return redirect(url_for("parse.parse_status", job_id=new_job.get_id()))
-
-    return render_template("no_such_job.html", job_id=job_id)
+    try:
+        dbpath = retrieve_dbpath()
+        dbconn = sqlite3.connect(dbpath)
+        old_job = jobs.job(job_id)
+        if old_job:
+            job_id = old_job.get_id()
+            old_meta = jobs.retrieve_meta(dbconn, job_id)
+            progress = old_meta["progress"]
+            status = old_meta["status"]
+            results = old_meta["results"]
+            filename = old_meta["filename"]
+
+            new_job = jobs.enqueue_job(
+                "qc_app.parse.queued_collect_errors",
+                os.path.join(
+                    app.config["UPLOAD_FOLDER"], old_meta["filename"]),
+                results["filetype"], os.path.join(os.getcwd(), "etc/strains.csv"),
+                app.config["REDIS_URL"], dbpath, results["position"])
+            jobs.update_meta(
+                dbconn, new_job.get_id(), status =  "Collecting Errors",
+                filename = old_meta["filename"], filesize = old_meta["filesize"],
+                starting_line_number = results["line_number"],
+                progress = progress)
+            return redirect(url_for("parse.parse_status", job_id=new_job.get_id()))
+
+        return render_template("no_such_job.html", job_id=job_id)
+    except Exception as exc:
+        dbconn.rollback()
+    finally:
+        dbconn.close()