diff options
author | Frederick Muriuki Muriithi | 2024-02-13 04:09:34 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-02-13 04:09:34 +0300 |
commit | 971d1383aa81947a1d43725150bcfa6eceec24f0 (patch) | |
tree | 0c538514aa416a31429422727a81a3570db899f9 | |
parent | 8b637a760362d08fb92152288d0b04f90299da5f (diff) | |
download | gn-uploader-971d1383aa81947a1d43725150bcfa6eceec24f0.tar.gz |
Provide nice UI progress indicators.
-rw-r--r-- | qc_app/templates/rqtl2/rqtl2-qc-job-status.html | 31 | ||||
-rw-r--r-- | qc_app/upload/rqtl2.py | 22 | ||||
-rw-r--r-- | scripts/qc_on_rqtl2_bundle.py | 64 |
3 files changed, 102 insertions, 15 deletions
diff --git a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html index 85b8864..4bdc983 100644 --- a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html +++ b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html @@ -10,14 +10,29 @@ {%block contents%} <h1 class="heading">R/qtl2 bundle: QC job status</h1> -<h2 class="heading">R/qtl2 bundle: QC Job Status</h2> - -<hr /> -<p>The job:</p> -<hr /> -{{job}} -<hr /> -<hr /> +{%if geno_percent%} +<p> + <h2>Checking 'geno' file:</h2> + <progress id="prg-geno-checking" value="{{geno_percent}}" max="100"> + {{geno_percent}}%</progress> + {{geno_percent}}%</p> +{%endif%} + +{%if pheno_percent%} +<p> + <h2>Checking 'pheno' file:</h2> + <progress id="prg-pheno-checking" value="{{pheno_percent}}" max="100"> + {{pheno_percent}}%</progress> + {{pheno_percent}}%</p> +{%endif%} + +{%if phenose_percent%} +<p> + <h2>Checking 'phenose' file:</h2> + <progress id="prg-phenose-checking" value="{{phenose_percent}}" max="100"> + {{phenose_percent}}%</progress> + {{phenose_percent}}%</p> +{%endif%} <h4>Log</h4> <div class="cli-output"> diff --git a/qc_app/upload/rqtl2.py b/qc_app/upload/rqtl2.py index 66b219d..a32019f 100644 --- a/qc_app/upload/rqtl2.py +++ b/qc_app/upload/rqtl2.py @@ -1,6 +1,7 @@ """Module to handle uploading of R/qtl2 bundles.""" import sys import json +from typing import Union from pathlib import Path from datetime import date from uuid import UUID, uuid4 @@ -227,9 +228,24 @@ def rqtl2_bundle_qc_status(jobid: UUID): rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]), rqtl2bundleorig=jobmeta["original-filename"]) - return render_template("rqtl2/rqtl2-qc-job-status.html", - job=thejob, - messages=tuple()) + def compute_percentage(thejob, filetype) -> Union[str, None]: + if f"{filetype}-linecount" in thejob: + return "100" + if f"{filetype}-filesize" in thejob: + percent = ((int(thejob.get(f"{filetype}-checked", 0)) + / + int(thejob.get(f"{filetype}-filesize", 1))) + * 100) + return f"{percent:.2f}" + return None + + return render_template( + "rqtl2/rqtl2-qc-job-status.html", + job=thejob, + geno_percent=compute_percentage(thejob, "geno"), + pheno_percent=compute_percentage(thejob, "pheno"), + phenose_percent=compute_percentage(thejob, "phenose"), + messages=tuple()) except jobs.JobNotFound: return render_template("rqtl2/no-such-job.html", jobid=jobid) diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py index c3e8b66..b5b2059 100644 --- a/scripts/qc_on_rqtl2_bundle.py +++ b/scripts/qc_on_rqtl2_bundle.py @@ -2,18 +2,21 @@ import sys import json from zipfile import ZipFile +from functools import partial from argparse import Namespace -from typing import Union, Sequence from logging import Logger, getLogger, StreamHandler +from typing import Union, Sequence, Callable, Iterator from redis import Redis from quality_control.errors import InvalidValue +from quality_control.checks import decimal_points_error from qc_app import jobs from qc_app.db_utils import database_connection from qc_app.check_connections import check_db, check_redis +from r_qtl import errors as rqe from r_qtl import r_qtl2 as rqtl2 from r_qtl import r_qtl2_qc as rqc from r_qtl import fileerrors as rqfe @@ -57,12 +60,61 @@ def qc_missing_files(rconn: Redis, return True return False +def compute_filesize(zfile: ZipFile, filetype: str) -> int: + """Compute the total file size.""" + cdata = rqtl2.control_data(zfile) + if isinstance(cdata[filetype], str): + return zfile.getinfo(cdata[filetype]).file_size + + return sum(zfile.getinfo(afile).file_size for afile in cdata[filetype]) + +def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals] + fqjobid: str, + zfile: ZipFile, + filetype: str, + checkers: tuple[Callable]) -> Iterator[Union[ + InvalidValue, rqfe.MissingFile]]: + """Filter the errors while also counting the number of lines in the file.""" + assert filetype in rqtl2.FILE_TYPES, f"Invalid file type {filetype}." + count = 0 + checked = 0 + cdata = rqtl2.control_data(zfile) + rconn.hset(fqjobid, f"{filetype}-filesize", compute_filesize(zfile, filetype)) + def __update_processed__(value): + nonlocal checked + checked = checked + len(value) + rconn.hset(fqjobid, f"{filetype}-checked", checked) + + try:# pylint: disable=[too-many-nested-blocks] + for lineno, row in enumerate( + rqtl2.file_data(zfile, filetype, cdata), start=1): + count = count + 1 + for field, value in row.items(): + if field == "id": + __update_processed__(value) + continue + if value is not None: + for checker in checkers: + error = checker(lineno, field, value) + if bool(error): + yield error + __update_processed__(value) + + rconn.hset(fqjobid, f"{filetype}-linecount", count) + except rqe.MissingFileError: + fname = cdata.get(filetype) + yield rqfe.MissingFile(filetype, fname, ( + f"The file '{fname}' does not exist in the bundle despite it being " + f"listed under '{filetype}' in the control file.")) + def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool: """Check for errors in `geno` file(s).""" logger.info("Checking for errors in the 'geno' file…") cdata = rqtl2.control_data(zfile) if "geno" in cdata: - gerrs = tuple(rqc.geno_errors(zfile)) + gerrs = tuple(retrieve_errors_with_progress( + rconn, fqjobid, zfile, "geno", + (rqc.make_genocode_checker(cdata.get("genotypes", {})),))) add_to_errors(rconn, fqjobid, "errors-generic", tuple( err for err in gerrs if isinstance(err, rqfe.MissingFile))) add_to_errors(rconn, fqjobid, "errors-geno", tuple( @@ -79,7 +131,9 @@ def qc_pheno_errors(rconn, fqjobid, zfile, logger) -> bool: logger.info("Checking for errors in the 'pheno' file…") cdata = rqtl2.control_data(zfile) if "pheno" in cdata: - perrs = tuple(rqc.pheno_errors(zfile)) + perrs = tuple(retrieve_errors_with_progress( + rconn,fqjobid, zfile, "pheno", + (partial(decimal_points_error, mini=3),))) add_to_errors(rconn, fqjobid, "errors-generic", tuple( err for err in perrs if isinstance(err, rqfe.MissingFile))) add_to_errors(rconn, fqjobid, "errors-pheno", tuple( @@ -96,7 +150,9 @@ def qc_phenose_errors(rconn, fqjobid, zfile, logger) -> bool: logger.info("Checking for errors in the 'phenose' file…") cdata = rqtl2.control_data(zfile) if "phenose" in cdata: - perrs = tuple(rqc.phenose_errors(zfile)) + perrs = tuple(retrieve_errors_with_progress( + rconn,fqjobid, zfile, "phenose", + (partial(decimal_points_error, mini=6),))) add_to_errors(rconn, fqjobid, "errors-generic", tuple( err for err in perrs if isinstance(err, rqfe.MissingFile))) add_to_errors(rconn, fqjobid, "errors-phenose", tuple( |