aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--qc_app/templates/rqtl2/rqtl2-qc-job-status.html31
-rw-r--r--qc_app/upload/rqtl2.py22
-rw-r--r--scripts/qc_on_rqtl2_bundle.py64
3 files changed, 102 insertions, 15 deletions
diff --git a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html
index 85b8864..4bdc983 100644
--- a/qc_app/templates/rqtl2/rqtl2-qc-job-status.html
+++ b/qc_app/templates/rqtl2/rqtl2-qc-job-status.html
@@ -10,14 +10,29 @@
{%block contents%}
<h1 class="heading">R/qtl2 bundle: QC job status</h1>
-<h2 class="heading">R/qtl2 bundle: QC Job Status</h2>
-
-<hr />
-<p>The job:</p>
-<hr />
-{{job}}
-<hr />
-<hr />
+{%if geno_percent%}
+<p>
+ <h2>Checking 'geno' file:</h2>
+ <progress id="prg-geno-checking" value="{{geno_percent}}" max="100">
+ {{geno_percent}}%</progress>
+ {{geno_percent}}%</p>
+{%endif%}
+
+{%if pheno_percent%}
+<p>
+ <h2>Checking 'pheno' file:</h2>
+ <progress id="prg-pheno-checking" value="{{pheno_percent}}" max="100">
+ {{pheno_percent}}%</progress>
+ {{pheno_percent}}%</p>
+{%endif%}
+
+{%if phenose_percent%}
+<p>
+ <h2>Checking 'phenose' file:</h2>
+ <progress id="prg-phenose-checking" value="{{phenose_percent}}" max="100">
+ {{phenose_percent}}%</progress>
+ {{phenose_percent}}%</p>
+{%endif%}
<h4>Log</h4>
<div class="cli-output">
diff --git a/qc_app/upload/rqtl2.py b/qc_app/upload/rqtl2.py
index 66b219d..a32019f 100644
--- a/qc_app/upload/rqtl2.py
+++ b/qc_app/upload/rqtl2.py
@@ -1,6 +1,7 @@
"""Module to handle uploading of R/qtl2 bundles."""
import sys
import json
+from typing import Union
from pathlib import Path
from datetime import date
from uuid import UUID, uuid4
@@ -227,9 +228,24 @@ def rqtl2_bundle_qc_status(jobid: UUID):
rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]),
rqtl2bundleorig=jobmeta["original-filename"])
- return render_template("rqtl2/rqtl2-qc-job-status.html",
- job=thejob,
- messages=tuple())
+ def compute_percentage(thejob, filetype) -> Union[str, None]:
+ if f"{filetype}-linecount" in thejob:
+ return "100"
+ if f"{filetype}-filesize" in thejob:
+ percent = ((int(thejob.get(f"{filetype}-checked", 0))
+ /
+ int(thejob.get(f"{filetype}-filesize", 1)))
+ * 100)
+ return f"{percent:.2f}"
+ return None
+
+ return render_template(
+ "rqtl2/rqtl2-qc-job-status.html",
+ job=thejob,
+ geno_percent=compute_percentage(thejob, "geno"),
+ pheno_percent=compute_percentage(thejob, "pheno"),
+ phenose_percent=compute_percentage(thejob, "phenose"),
+ messages=tuple())
except jobs.JobNotFound:
return render_template("rqtl2/no-such-job.html", jobid=jobid)
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index c3e8b66..b5b2059 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -2,18 +2,21 @@
import sys
import json
from zipfile import ZipFile
+from functools import partial
from argparse import Namespace
-from typing import Union, Sequence
from logging import Logger, getLogger, StreamHandler
+from typing import Union, Sequence, Callable, Iterator
from redis import Redis
from quality_control.errors import InvalidValue
+from quality_control.checks import decimal_points_error
from qc_app import jobs
from qc_app.db_utils import database_connection
from qc_app.check_connections import check_db, check_redis
+from r_qtl import errors as rqe
from r_qtl import r_qtl2 as rqtl2
from r_qtl import r_qtl2_qc as rqc
from r_qtl import fileerrors as rqfe
@@ -57,12 +60,61 @@ def qc_missing_files(rconn: Redis,
return True
return False
+def compute_filesize(zfile: ZipFile, filetype: str) -> int:
+ """Compute the total file size."""
+ cdata = rqtl2.control_data(zfile)
+ if isinstance(cdata[filetype], str):
+ return zfile.getinfo(cdata[filetype]).file_size
+
+ return sum(zfile.getinfo(afile).file_size for afile in cdata[filetype])
+
+def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals]
+ fqjobid: str,
+ zfile: ZipFile,
+ filetype: str,
+ checkers: tuple[Callable]) -> Iterator[Union[
+ InvalidValue, rqfe.MissingFile]]:
+ """Filter the errors while also counting the number of lines in the file."""
+ assert filetype in rqtl2.FILE_TYPES, f"Invalid file type {filetype}."
+ count = 0
+ checked = 0
+ cdata = rqtl2.control_data(zfile)
+ rconn.hset(fqjobid, f"{filetype}-filesize", compute_filesize(zfile, filetype))
+ def __update_processed__(value):
+ nonlocal checked
+ checked = checked + len(value)
+ rconn.hset(fqjobid, f"{filetype}-checked", checked)
+
+ try:# pylint: disable=[too-many-nested-blocks]
+ for lineno, row in enumerate(
+ rqtl2.file_data(zfile, filetype, cdata), start=1):
+ count = count + 1
+ for field, value in row.items():
+ if field == "id":
+ __update_processed__(value)
+ continue
+ if value is not None:
+ for checker in checkers:
+ error = checker(lineno, field, value)
+ if bool(error):
+ yield error
+ __update_processed__(value)
+
+ rconn.hset(fqjobid, f"{filetype}-linecount", count)
+ except rqe.MissingFileError:
+ fname = cdata.get(filetype)
+ yield rqfe.MissingFile(filetype, fname, (
+ f"The file '{fname}' does not exist in the bundle despite it being "
+ f"listed under '{filetype}' in the control file."))
+
def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool:
"""Check for errors in `geno` file(s)."""
logger.info("Checking for errors in the 'geno' file…")
cdata = rqtl2.control_data(zfile)
if "geno" in cdata:
- gerrs = tuple(rqc.geno_errors(zfile))
+ gerrs = tuple(retrieve_errors_with_progress(
+ rconn, fqjobid, zfile, "geno",
+ (rqc.make_genocode_checker(cdata.get("genotypes", {})),)))
add_to_errors(rconn, fqjobid, "errors-generic", tuple(
err for err in gerrs if isinstance(err, rqfe.MissingFile)))
add_to_errors(rconn, fqjobid, "errors-geno", tuple(
@@ -79,7 +131,9 @@ def qc_pheno_errors(rconn, fqjobid, zfile, logger) -> bool:
logger.info("Checking for errors in the 'pheno' file…")
cdata = rqtl2.control_data(zfile)
if "pheno" in cdata:
- perrs = tuple(rqc.pheno_errors(zfile))
+ perrs = tuple(retrieve_errors_with_progress(
+ rconn,fqjobid, zfile, "pheno",
+ (partial(decimal_points_error, mini=3),)))
add_to_errors(rconn, fqjobid, "errors-generic", tuple(
err for err in perrs if isinstance(err, rqfe.MissingFile)))
add_to_errors(rconn, fqjobid, "errors-pheno", tuple(
@@ -96,7 +150,9 @@ def qc_phenose_errors(rconn, fqjobid, zfile, logger) -> bool:
logger.info("Checking for errors in the 'phenose' file…")
cdata = rqtl2.control_data(zfile)
if "phenose" in cdata:
- perrs = tuple(rqc.phenose_errors(zfile))
+ perrs = tuple(retrieve_errors_with_progress(
+ rconn,fqjobid, zfile, "phenose",
+ (partial(decimal_points_error, mini=6),)))
add_to_errors(rconn, fqjobid, "errors-generic", tuple(
err for err in perrs if isinstance(err, rqfe.MissingFile)))
add_to_errors(rconn, fqjobid, "errors-phenose", tuple(