aboutsummaryrefslogtreecommitdiff
path: root/scripts/qc_on_rqtl2_bundle.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-10 06:57:23 +0300
committerFrederick Muriuki Muriithi2024-02-12 18:17:40 +0300
commit4729abd0ab7a8fbeb700a278ac8bdfcf62ab79ac (patch)
treeb9fbf2d0a312afcc0336d8ee3d3d97d1673a229a /scripts/qc_on_rqtl2_bundle.py
parent445a28579e2139654132643cf9595acfd402c283 (diff)
downloadgn-uploader-4729abd0ab7a8fbeb700a278ac8bdfcf62ab79ac.tar.gz
Collect and display errors on 'geno' files in the bundle.
Diffstat (limited to 'scripts/qc_on_rqtl2_bundle.py')
-rw-r--r--scripts/qc_on_rqtl2_bundle.py88
1 files changed, 55 insertions, 33 deletions
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index 43f766a..02c8c3a 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -1,7 +1,6 @@
"""Run Quality Control checks on R/qtl2 bundle."""
import sys
import json
-from pathlib import Path
from zipfile import ZipFile
from argparse import Namespace
from typing import Union, Sequence
@@ -9,6 +8,8 @@ from logging import Logger, getLogger, StreamHandler
from redis import Redis
+from quality_control.errors import InvalidValue
+
from qc_app import jobs
from qc_app.db_utils import database_connection
from qc_app.check_connections import check_db, check_redis
@@ -20,38 +21,59 @@ from scripts.cli_parser import init_cli_parser
from scripts.process_rqtl2_bundle import parse_job
from scripts.redis_logger import setup_redis_logger
-def add_to_errors(rconn: Redis, fqjobid: str, key: str, errors: Sequence[rqfe.MissingFile]):
+def dict2tuple(dct: dict) -> tuple:
+ """Utility to convert items in dicts to pairs of tuples."""
+ return tuple((key, val) for key,val in dct.items())
+
+def add_to_errors(rconn: Redis,
+ fqjobid: str,
+ key: str,
+ errors: Sequence[Union[InvalidValue, rqfe.MissingFile]]):
"""Add `errors` to a given list of errors"""
- errs = tuple(set(
- json.loads(rconn.hget(fqjobid, key) or "[]") +
- [error.message for error in errors]))
+ errs = tuple(dict(item) for item in set(
+ [dict2tuple(old) for old in
+ json.loads(rconn.hget(fqjobid, key) or "[]")] +
+ [dict2tuple({"type": type(error).__name__, **error._asdict()})
+ for error in errors]))
rconn.hset(fqjobid, key, json.dumps(errs))
-def qc_missing_files(rconn: Redis, fqjobid: str,
- bundlefilepath: Union[str, Path]) -> tuple[
- tuple[str, str], ...]:
+def qc_missing_files(rconn: Redis,
+ fqjobid: str,
+ zfile: ZipFile,
+ logger: Logger) -> bool:
"""Run QC for files listed in control file that don't exist in bundle."""
- with ZipFile(str(bundlefilepath), "r") as zfile:
- missing = rqc.missing_files(zfile)
- add_to_errors(rconn, fqjobid, "errors-generic", tuple(
- rqfe.MissingFile(
- mfile[0], mfile[1], (
- f"File '{mfile[1]}' is listed in the control file under "
- f"the '{mfile[0]}' key, but it does not actually exist in "
- "the bundle."))
- for mfile in missing))
-
- return missing
-
-def qc_geno_errors(_rconn, _fqjobid, _job) -> bool:
+ logger.info("Checking for missing files…")
+ missing = rqc.missing_files(zfile)
+ add_to_errors(rconn, fqjobid, "errors-generic", tuple(
+ rqfe.MissingFile(
+ mfile[0], mfile[1], (
+ f"File '{mfile[1]}' is listed in the control file under "
+ f"the '{mfile[0]}' key, but it does not actually exist in "
+ "the bundle."))
+ for mfile in missing))
+ if len(missing) > 0:
+ logger.error("Missing files in the bundle!")
+ return True
+ return False
+
+def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool:
"""Check for errors in `geno` file(s)."""
+ logger.info("Checking for geno errors…")
+ gerrs = tuple(rqc.geno_errors(zfile))
+ add_to_errors(rconn, fqjobid, "errors-generic", tuple(
+ err for err in gerrs if isinstance(err, rqfe.MissingFile)))
+ add_to_errors(rconn, fqjobid, "errors-geno", tuple(
+ err for err in gerrs if not isinstance(err, rqfe.MissingFile)))
+ if len(gerrs) > 0:
+ logger.error("The 'geno' file has errors.")
+ return True
return False
-def qc_pheno_errors(_rconn, _fqjobid, _job) -> bool:
+def qc_pheno_errors(_rconn, _fqjobid, _zfile, _logger) -> bool:
"""Check for errors in `pheno` file(s)."""
return False
-def qc_phenocovar_errors(_rconn, _fqjobid, _job) -> bool:
+def qc_phenocovar_errors(_rconn, _fqjobid, _zfile, _logger) -> bool:
"""Check for errors in `phenocovar` file(s)."""
return False
@@ -63,16 +85,16 @@ def run_qc(rconn: Redis,
thejob = parse_job(rconn, args.redisprefix, args.jobid)
jobmeta = thejob["job-metadata"]
- if len(qc_missing_files(rconn, fqjobid, jobmeta["rqtl2-bundle-file"])) > 0:
- logger.error("Missing files in the bundle!")
- return 1
-
- return (
- 1 if any((
- qc_geno_errors(rconn, fqjobid, thejob),
- qc_pheno_errors(rconn, fqjobid, thejob),
- qc_phenocovar_errors(rconn, fqjobid, thejob)))
- else 0)
+ with ZipFile(jobmeta["rqtl2-bundle-file"], "r") as zfile:
+ if qc_missing_files(rconn, fqjobid, zfile, logger):
+ return 1
+
+ return (
+ 1 if any((
+ qc_geno_errors(rconn, fqjobid, zfile, logger),
+ qc_pheno_errors(rconn, fqjobid, zfile, logger),
+ qc_phenocovar_errors(rconn, fqjobid, zfile, logger)))
+ else 0)
if __name__ == "__main__":
def main():