From 4729abd0ab7a8fbeb700a278ac8bdfcf62ab79ac Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Sat, 10 Feb 2024 06:57:23 +0300 Subject: Collect and display errors on 'geno' files in the bundle. --- scripts/qc_on_rqtl2_bundle.py | 88 +++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 33 deletions(-) (limited to 'scripts') diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py index 43f766a..02c8c3a 100644 --- a/scripts/qc_on_rqtl2_bundle.py +++ b/scripts/qc_on_rqtl2_bundle.py @@ -1,7 +1,6 @@ """Run Quality Control checks on R/qtl2 bundle.""" import sys import json -from pathlib import Path from zipfile import ZipFile from argparse import Namespace from typing import Union, Sequence @@ -9,6 +8,8 @@ from logging import Logger, getLogger, StreamHandler from redis import Redis +from quality_control.errors import InvalidValue + from qc_app import jobs from qc_app.db_utils import database_connection from qc_app.check_connections import check_db, check_redis @@ -20,38 +21,59 @@ from scripts.cli_parser import init_cli_parser from scripts.process_rqtl2_bundle import parse_job from scripts.redis_logger import setup_redis_logger -def add_to_errors(rconn: Redis, fqjobid: str, key: str, errors: Sequence[rqfe.MissingFile]): +def dict2tuple(dct: dict) -> tuple: + """Utility to convert items in dicts to pairs of tuples.""" + return tuple((key, val) for key,val in dct.items()) + +def add_to_errors(rconn: Redis, + fqjobid: str, + key: str, + errors: Sequence[Union[InvalidValue, rqfe.MissingFile]]): """Add `errors` to a given list of errors""" - errs = tuple(set( - json.loads(rconn.hget(fqjobid, key) or "[]") + - [error.message for error in errors])) + errs = tuple(dict(item) for item in set( + [dict2tuple(old) for old in + json.loads(rconn.hget(fqjobid, key) or "[]")] + + [dict2tuple({"type": type(error).__name__, **error._asdict()}) + for error in errors])) rconn.hset(fqjobid, key, json.dumps(errs)) -def qc_missing_files(rconn: Redis, fqjobid: str, - bundlefilepath: Union[str, Path]) -> tuple[ - tuple[str, str], ...]: +def qc_missing_files(rconn: Redis, + fqjobid: str, + zfile: ZipFile, + logger: Logger) -> bool: """Run QC for files listed in control file that don't exist in bundle.""" - with ZipFile(str(bundlefilepath), "r") as zfile: - missing = rqc.missing_files(zfile) - add_to_errors(rconn, fqjobid, "errors-generic", tuple( - rqfe.MissingFile( - mfile[0], mfile[1], ( - f"File '{mfile[1]}' is listed in the control file under " - f"the '{mfile[0]}' key, but it does not actually exist in " - "the bundle.")) - for mfile in missing)) - - return missing - -def qc_geno_errors(_rconn, _fqjobid, _job) -> bool: + logger.info("Checking for missing files…") + missing = rqc.missing_files(zfile) + add_to_errors(rconn, fqjobid, "errors-generic", tuple( + rqfe.MissingFile( + mfile[0], mfile[1], ( + f"File '{mfile[1]}' is listed in the control file under " + f"the '{mfile[0]}' key, but it does not actually exist in " + "the bundle.")) + for mfile in missing)) + if len(missing) > 0: + logger.error("Missing files in the bundle!") + return True + return False + +def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool: """Check for errors in `geno` file(s).""" + logger.info("Checking for geno errors…") + gerrs = tuple(rqc.geno_errors(zfile)) + add_to_errors(rconn, fqjobid, "errors-generic", tuple( + err for err in gerrs if isinstance(err, rqfe.MissingFile))) + add_to_errors(rconn, fqjobid, "errors-geno", tuple( + err for err in gerrs if not isinstance(err, rqfe.MissingFile))) + if len(gerrs) > 0: + logger.error("The 'geno' file has errors.") + return True return False -def qc_pheno_errors(_rconn, _fqjobid, _job) -> bool: +def qc_pheno_errors(_rconn, _fqjobid, _zfile, _logger) -> bool: """Check for errors in `pheno` file(s).""" return False -def qc_phenocovar_errors(_rconn, _fqjobid, _job) -> bool: +def qc_phenocovar_errors(_rconn, _fqjobid, _zfile, _logger) -> bool: """Check for errors in `phenocovar` file(s).""" return False @@ -63,16 +85,16 @@ def run_qc(rconn: Redis, thejob = parse_job(rconn, args.redisprefix, args.jobid) jobmeta = thejob["job-metadata"] - if len(qc_missing_files(rconn, fqjobid, jobmeta["rqtl2-bundle-file"])) > 0: - logger.error("Missing files in the bundle!") - return 1 - - return ( - 1 if any(( - qc_geno_errors(rconn, fqjobid, thejob), - qc_pheno_errors(rconn, fqjobid, thejob), - qc_phenocovar_errors(rconn, fqjobid, thejob))) - else 0) + with ZipFile(jobmeta["rqtl2-bundle-file"], "r") as zfile: + if qc_missing_files(rconn, fqjobid, zfile, logger): + return 1 + + return ( + 1 if any(( + qc_geno_errors(rconn, fqjobid, zfile, logger), + qc_pheno_errors(rconn, fqjobid, zfile, logger), + qc_phenocovar_errors(rconn, fqjobid, zfile, logger))) + else 0) if __name__ == "__main__": def main(): -- cgit v1.2.3