about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-13 04:09:34 +0300
committerFrederick Muriuki Muriithi2024-02-13 04:09:34 +0300
commit971d1383aa81947a1d43725150bcfa6eceec24f0 (patch)
tree0c538514aa416a31429422727a81a3570db899f9
parent8b637a760362d08fb92152288d0b04f90299da5f (diff)
downloadgn-uploader-971d1383aa81947a1d43725150bcfa6eceec24f0.tar.gz
Provide nice UI progress indicators.
-rw-r--r--qc_app/templates/rqtl2/rqtl2-qc-job-status.html31
-rw-r--r--qc_app/upload/rqtl2.py22
-rw-r--r--scripts/qc_on_rqtl2_bundle.py64
3 files changed, 102 insertions, 15 deletions
diff --git a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html
index 85b8864..4bdc983 100644
--- a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html
+++ b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html
@@ -10,14 +10,29 @@
 {%block contents%}
 <h1 class="heading">R/qtl2 bundle: QC job status</h1>
 
-<h2 class="heading">R/qtl2 bundle: QC Job Status</h2>
-
-<hr />
-<p>The job:</p>
-<hr />
-{{job}}
-<hr />
-<hr />
+{%if geno_percent%}
+<p>
+  <h2>Checking 'geno' file:</h2>
+  <progress id="prg-geno-checking" value="{{geno_percent}}" max="100">
+    {{geno_percent}}%</progress>
+  {{geno_percent}}%</p>
+{%endif%}
+
+{%if pheno_percent%}
+<p>
+  <h2>Checking 'pheno' file:</h2>
+  <progress id="prg-pheno-checking" value="{{pheno_percent}}" max="100">
+    {{pheno_percent}}%</progress>
+  {{pheno_percent}}%</p>
+{%endif%}
+
+{%if phenose_percent%}
+<p>
+  <h2>Checking 'phenose' file:</h2>
+  <progress id="prg-phenose-checking" value="{{phenose_percent}}" max="100">
+    {{phenose_percent}}%</progress>
+  {{phenose_percent}}%</p>
+{%endif%}
 
 <h4>Log</h4>
 <div class="cli-output">
diff --git a/qc_app/upload/rqtl2.py b/qc_app/upload/rqtl2.py
index 66b219d..a32019f 100644
--- a/qc_app/upload/rqtl2.py
+++ b/qc_app/upload/rqtl2.py
@@ -1,6 +1,7 @@
 """Module to handle uploading of R/qtl2 bundles."""
 import sys
 import json
+from typing import Union
 from pathlib import Path
 from datetime import date
 from uuid import UUID, uuid4
@@ -227,9 +228,24 @@ def rqtl2_bundle_qc_status(jobid: UUID):
                     rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]),
                     rqtl2bundleorig=jobmeta["original-filename"])
 
-            return render_template("rqtl2/rqtl2-qc-job-status.html",
-                                   job=thejob,
-                                   messages=tuple())
+            def compute_percentage(thejob, filetype) -> Union[str, None]:
+                if f"{filetype}-linecount" in thejob:
+                    return "100"
+                if f"{filetype}-filesize" in thejob:
+                    percent = ((int(thejob.get(f"{filetype}-checked", 0))
+                                /
+                                int(thejob.get(f"{filetype}-filesize", 1)))
+                               * 100)
+                    return f"{percent:.2f}"
+                return None
+
+            return render_template(
+                "rqtl2/rqtl2-qc-job-status.html",
+                job=thejob,
+                geno_percent=compute_percentage(thejob, "geno"),
+                pheno_percent=compute_percentage(thejob, "pheno"),
+                phenose_percent=compute_percentage(thejob, "phenose"),
+                messages=tuple())
         except jobs.JobNotFound:
             return render_template("rqtl2/no-such-job.html", jobid=jobid)
 
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index c3e8b66..b5b2059 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -2,18 +2,21 @@
 import sys
 import json
 from zipfile import ZipFile
+from functools import partial
 from argparse import Namespace
-from typing import Union, Sequence
 from logging import Logger, getLogger, StreamHandler
+from typing import Union, Sequence, Callable, Iterator
 
 from redis import Redis
 
 from quality_control.errors import InvalidValue
+from quality_control.checks import decimal_points_error
 
 from qc_app import jobs
 from qc_app.db_utils import database_connection
 from qc_app.check_connections import check_db, check_redis
 
+from r_qtl import errors as rqe
 from r_qtl import r_qtl2 as rqtl2
 from r_qtl import r_qtl2_qc as rqc
 from r_qtl import fileerrors as rqfe
@@ -57,12 +60,61 @@ def qc_missing_files(rconn: Redis,
         return True
     return False
 
+def compute_filesize(zfile: ZipFile, filetype: str) -> int:
+    """Compute the total file size."""
+    cdata = rqtl2.control_data(zfile)
+    if isinstance(cdata[filetype], str):
+        return zfile.getinfo(cdata[filetype]).file_size
+
+    return sum(zfile.getinfo(afile).file_size for afile in cdata[filetype])
+
+def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals]
+                                  fqjobid: str,
+                                  zfile: ZipFile,
+                                  filetype: str,
+                                  checkers: tuple[Callable]) -> Iterator[Union[
+                                      InvalidValue, rqfe.MissingFile]]:
+    """Filter the errors while also counting the number of lines in the file."""
+    assert filetype in rqtl2.FILE_TYPES, f"Invalid file type {filetype}."
+    count = 0
+    checked = 0
+    cdata = rqtl2.control_data(zfile)
+    rconn.hset(fqjobid, f"{filetype}-filesize", compute_filesize(zfile, filetype))
+    def __update_processed__(value):
+        nonlocal checked
+        checked = checked + len(value)
+        rconn.hset(fqjobid, f"{filetype}-checked", checked)
+
+    try:# pylint: disable=[too-many-nested-blocks]
+        for lineno, row in enumerate(
+                rqtl2.file_data(zfile, filetype, cdata), start=1):
+            count = count + 1
+            for field, value in row.items():
+                if field == "id":
+                    __update_processed__(value)
+                    continue
+                if value is not None:
+                    for checker in checkers:
+                        error = checker(lineno, field, value)
+                        if bool(error):
+                            yield error
+                        __update_processed__(value)
+
+        rconn.hset(fqjobid, f"{filetype}-linecount", count)
+    except rqe.MissingFileError:
+        fname = cdata.get(filetype)
+        yield rqfe.MissingFile(filetype, fname, (
+            f"The file '{fname}' does not exist in the bundle despite it being "
+            f"listed under '{filetype}' in the control file."))
+
 def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool:
     """Check for errors in `geno` file(s)."""
     logger.info("Checking for errors in the 'geno' file…")
     cdata = rqtl2.control_data(zfile)
     if "geno" in cdata:
-        gerrs = tuple(rqc.geno_errors(zfile))
+        gerrs = tuple(retrieve_errors_with_progress(
+            rconn, fqjobid, zfile, "geno",
+            (rqc.make_genocode_checker(cdata.get("genotypes", {})),)))
         add_to_errors(rconn, fqjobid, "errors-generic", tuple(
             err for err in gerrs if isinstance(err, rqfe.MissingFile)))
         add_to_errors(rconn, fqjobid, "errors-geno", tuple(
@@ -79,7 +131,9 @@ def qc_pheno_errors(rconn, fqjobid, zfile, logger) -> bool:
     logger.info("Checking for errors in the 'pheno' file…")
     cdata = rqtl2.control_data(zfile)
     if "pheno" in cdata:
-        perrs = tuple(rqc.pheno_errors(zfile))
+        perrs = tuple(retrieve_errors_with_progress(
+            rconn,fqjobid, zfile, "pheno",
+            (partial(decimal_points_error, mini=3),)))
         add_to_errors(rconn, fqjobid, "errors-generic", tuple(
             err for err in perrs if isinstance(err, rqfe.MissingFile)))
         add_to_errors(rconn, fqjobid, "errors-pheno", tuple(
@@ -96,7 +150,9 @@ def qc_phenose_errors(rconn, fqjobid, zfile, logger) -> bool:
     logger.info("Checking for errors in the 'phenose' file…")
     cdata = rqtl2.control_data(zfile)
     if "phenose" in cdata:
-        perrs = tuple(rqc.phenose_errors(zfile))
+        perrs = tuple(retrieve_errors_with_progress(
+            rconn,fqjobid, zfile, "phenose",
+            (partial(decimal_points_error, mini=6),)))
         add_to_errors(rconn, fqjobid, "errors-generic", tuple(
             err for err in perrs if isinstance(err, rqfe.MissingFile)))
         add_to_errors(rconn, fqjobid, "errors-phenose", tuple(