From 971d1383aa81947a1d43725150bcfa6eceec24f0 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 13 Feb 2024 04:09:34 +0300 Subject: Provide nice UI progress indicators. --- qc_app/templates/rqtl2/rqtl2-qc-job-status.html | 31 ++++++++---- qc_app/upload/rqtl2.py | 22 +++++++-- scripts/qc_on_rqtl2_bundle.py | 64 +++++++++++++++++++++++-- 3 files changed, 102 insertions(+), 15 deletions(-) diff --git a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html index 85b8864..4bdc983 100644 --- a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html +++ b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html @@ -10,14 +10,29 @@ {%block contents%}

R/qtl2 bundle: QC job status

-

R/qtl2 bundle: QC Job Status

- -
-

The job:

-
-{{job}} -
-
+{%if geno_percent%} +

+

Checking 'geno' file:

+ + {{geno_percent}}% + {{geno_percent}}%

+{%endif%} + +{%if pheno_percent%} +

+

Checking 'pheno' file:

+ + {{pheno_percent}}% + {{pheno_percent}}%

+{%endif%} + +{%if phenose_percent%} +

+

Checking 'phenose' file:

+ + {{phenose_percent}}% + {{phenose_percent}}%

+{%endif%}

Log

diff --git a/qc_app/upload/rqtl2.py b/qc_app/upload/rqtl2.py index 66b219d..a32019f 100644 --- a/qc_app/upload/rqtl2.py +++ b/qc_app/upload/rqtl2.py @@ -1,6 +1,7 @@ """Module to handle uploading of R/qtl2 bundles.""" import sys import json +from typing import Union from pathlib import Path from datetime import date from uuid import UUID, uuid4 @@ -227,9 +228,24 @@ def rqtl2_bundle_qc_status(jobid: UUID): rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]), rqtl2bundleorig=jobmeta["original-filename"]) - return render_template("rqtl2/rqtl2-qc-job-status.html", - job=thejob, - messages=tuple()) + def compute_percentage(thejob, filetype) -> Union[str, None]: + if f"{filetype}-linecount" in thejob: + return "100" + if f"{filetype}-filesize" in thejob: + percent = ((int(thejob.get(f"{filetype}-checked", 0)) + / + int(thejob.get(f"{filetype}-filesize", 1))) + * 100) + return f"{percent:.2f}" + return None + + return render_template( + "rqtl2/rqtl2-qc-job-status.html", + job=thejob, + geno_percent=compute_percentage(thejob, "geno"), + pheno_percent=compute_percentage(thejob, "pheno"), + phenose_percent=compute_percentage(thejob, "phenose"), + messages=tuple()) except jobs.JobNotFound: return render_template("rqtl2/no-such-job.html", jobid=jobid) diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py index c3e8b66..b5b2059 100644 --- a/scripts/qc_on_rqtl2_bundle.py +++ b/scripts/qc_on_rqtl2_bundle.py @@ -2,18 +2,21 @@ import sys import json from zipfile import ZipFile +from functools import partial from argparse import Namespace -from typing import Union, Sequence from logging import Logger, getLogger, StreamHandler +from typing import Union, Sequence, Callable, Iterator from redis import Redis from quality_control.errors import InvalidValue +from quality_control.checks import decimal_points_error from qc_app import jobs from qc_app.db_utils import database_connection from qc_app.check_connections import check_db, check_redis +from r_qtl import errors as rqe from r_qtl import r_qtl2 as rqtl2 from r_qtl import r_qtl2_qc as rqc from r_qtl import fileerrors as rqfe @@ -57,12 +60,61 @@ def qc_missing_files(rconn: Redis, return True return False +def compute_filesize(zfile: ZipFile, filetype: str) -> int: + """Compute the total file size.""" + cdata = rqtl2.control_data(zfile) + if isinstance(cdata[filetype], str): + return zfile.getinfo(cdata[filetype]).file_size + + return sum(zfile.getinfo(afile).file_size for afile in cdata[filetype]) + +def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals] + fqjobid: str, + zfile: ZipFile, + filetype: str, + checkers: tuple[Callable]) -> Iterator[Union[ + InvalidValue, rqfe.MissingFile]]: + """Filter the errors while also counting the number of lines in the file.""" + assert filetype in rqtl2.FILE_TYPES, f"Invalid file type {filetype}." + count = 0 + checked = 0 + cdata = rqtl2.control_data(zfile) + rconn.hset(fqjobid, f"{filetype}-filesize", compute_filesize(zfile, filetype)) + def __update_processed__(value): + nonlocal checked + checked = checked + len(value) + rconn.hset(fqjobid, f"{filetype}-checked", checked) + + try:# pylint: disable=[too-many-nested-blocks] + for lineno, row in enumerate( + rqtl2.file_data(zfile, filetype, cdata), start=1): + count = count + 1 + for field, value in row.items(): + if field == "id": + __update_processed__(value) + continue + if value is not None: + for checker in checkers: + error = checker(lineno, field, value) + if bool(error): + yield error + __update_processed__(value) + + rconn.hset(fqjobid, f"{filetype}-linecount", count) + except rqe.MissingFileError: + fname = cdata.get(filetype) + yield rqfe.MissingFile(filetype, fname, ( + f"The file '{fname}' does not exist in the bundle despite it being " + f"listed under '{filetype}' in the control file.")) + def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool: """Check for errors in `geno` file(s).""" logger.info("Checking for errors in the 'geno' file…") cdata = rqtl2.control_data(zfile) if "geno" in cdata: - gerrs = tuple(rqc.geno_errors(zfile)) + gerrs = tuple(retrieve_errors_with_progress( + rconn, fqjobid, zfile, "geno", + (rqc.make_genocode_checker(cdata.get("genotypes", {})),))) add_to_errors(rconn, fqjobid, "errors-generic", tuple( err for err in gerrs if isinstance(err, rqfe.MissingFile))) add_to_errors(rconn, fqjobid, "errors-geno", tuple( @@ -79,7 +131,9 @@ def qc_pheno_errors(rconn, fqjobid, zfile, logger) -> bool: logger.info("Checking for errors in the 'pheno' file…") cdata = rqtl2.control_data(zfile) if "pheno" in cdata: - perrs = tuple(rqc.pheno_errors(zfile)) + perrs = tuple(retrieve_errors_with_progress( + rconn,fqjobid, zfile, "pheno", + (partial(decimal_points_error, mini=3),))) add_to_errors(rconn, fqjobid, "errors-generic", tuple( err for err in perrs if isinstance(err, rqfe.MissingFile))) add_to_errors(rconn, fqjobid, "errors-pheno", tuple( @@ -96,7 +150,9 @@ def qc_phenose_errors(rconn, fqjobid, zfile, logger) -> bool: logger.info("Checking for errors in the 'phenose' file…") cdata = rqtl2.control_data(zfile) if "phenose" in cdata: - perrs = tuple(rqc.phenose_errors(zfile)) + perrs = tuple(retrieve_errors_with_progress( + rconn,fqjobid, zfile, "phenose", + (partial(decimal_points_error, mini=6),))) add_to_errors(rconn, fqjobid, "errors-generic", tuple( err for err in perrs if isinstance(err, rqfe.MissingFile))) add_to_errors(rconn, fqjobid, "errors-phenose", tuple( -- cgit v1.2.3