about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-10 06:57:23 +0300
committerFrederick Muriuki Muriithi2024-02-12 18:17:40 +0300
commit4729abd0ab7a8fbeb700a278ac8bdfcf62ab79ac (patch)
treeb9fbf2d0a312afcc0336d8ee3d3d97d1673a229a /scripts
parent445a28579e2139654132643cf9595acfd402c283 (diff)
downloadgn-uploader-4729abd0ab7a8fbeb700a278ac8bdfcf62ab79ac.tar.gz
Collect and display errors on 'geno' files in the bundle.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/qc_on_rqtl2_bundle.py88
1 files changed, 55 insertions, 33 deletions
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index 43f766a..02c8c3a 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -1,7 +1,6 @@
 """Run Quality Control checks on R/qtl2 bundle."""
 import sys
 import json
-from pathlib import Path
 from zipfile import ZipFile
 from argparse import Namespace
 from typing import Union, Sequence
@@ -9,6 +8,8 @@ from logging import Logger, getLogger, StreamHandler
 
 from redis import Redis
 
+from quality_control.errors import InvalidValue
+
 from qc_app import jobs
 from qc_app.db_utils import database_connection
 from qc_app.check_connections import check_db, check_redis
@@ -20,38 +21,59 @@ from scripts.cli_parser import init_cli_parser
 from scripts.process_rqtl2_bundle import parse_job
 from scripts.redis_logger import setup_redis_logger
 
-def add_to_errors(rconn: Redis, fqjobid: str, key: str, errors: Sequence[rqfe.MissingFile]):
+def dict2tuple(dct: dict) -> tuple:
+    """Utility to convert items in dicts to pairs of tuples."""
+    return tuple((key, val) for key,val in dct.items())
+
+def add_to_errors(rconn: Redis,
+                  fqjobid: str,
+                  key: str,
+                  errors: Sequence[Union[InvalidValue, rqfe.MissingFile]]):
     """Add `errors` to a given list of errors"""
-    errs = tuple(set(
-        json.loads(rconn.hget(fqjobid, key) or "[]") +
-        [error.message for error in errors]))
+    errs = tuple(dict(item) for item in set(
+        [dict2tuple(old) for old in
+         json.loads(rconn.hget(fqjobid, key) or "[]")] +
+        [dict2tuple({"type": type(error).__name__, **error._asdict()})
+         for error in errors]))
     rconn.hset(fqjobid, key, json.dumps(errs))
 
-def qc_missing_files(rconn: Redis, fqjobid: str,
-                     bundlefilepath: Union[str, Path]) -> tuple[
-                         tuple[str, str], ...]:
+def qc_missing_files(rconn: Redis,
+                     fqjobid: str,
+                     zfile: ZipFile,
+                     logger: Logger) -> bool:
     """Run QC for files listed in control file that don't exist in bundle."""
-    with ZipFile(str(bundlefilepath), "r") as zfile:
-        missing = rqc.missing_files(zfile)
-        add_to_errors(rconn, fqjobid, "errors-generic", tuple(
-            rqfe.MissingFile(
-                mfile[0], mfile[1], (
-                    f"File '{mfile[1]}' is listed in the control file under "
-                    f"the '{mfile[0]}' key, but it does not actually exist in "
-                    "the bundle."))
-            for mfile in missing))
-
-    return missing
-
-def qc_geno_errors(_rconn, _fqjobid, _job) -> bool:
+    logger.info("Checking for missing files…")
+    missing = rqc.missing_files(zfile)
+    add_to_errors(rconn, fqjobid, "errors-generic", tuple(
+        rqfe.MissingFile(
+            mfile[0], mfile[1], (
+                f"File '{mfile[1]}' is listed in the control file under "
+                f"the '{mfile[0]}' key, but it does not actually exist in "
+                "the bundle."))
+        for mfile in missing))
+    if len(missing) > 0:
+        logger.error("Missing files in the bundle!")
+        return True
+    return False
+
+def qc_geno_errors(rconn, fqjobid, zfile, logger) -> bool:
     """Check for errors in `geno` file(s)."""
+    logger.info("Checking for geno errors…")
+    gerrs = tuple(rqc.geno_errors(zfile))
+    add_to_errors(rconn, fqjobid, "errors-generic", tuple(
+        err for err in gerrs if isinstance(err, rqfe.MissingFile)))
+    add_to_errors(rconn, fqjobid, "errors-geno", tuple(
+        err for err in gerrs if not isinstance(err, rqfe.MissingFile)))
+    if len(gerrs) > 0:
+        logger.error("The 'geno' file has errors.")
+        return True
     return False
 
-def qc_pheno_errors(_rconn, _fqjobid, _job) -> bool:
+def qc_pheno_errors(_rconn, _fqjobid, _zfile, _logger) -> bool:
     """Check for errors in `pheno` file(s)."""
     return False
 
-def qc_phenocovar_errors(_rconn, _fqjobid, _job) -> bool:
+def qc_phenocovar_errors(_rconn, _fqjobid, _zfile, _logger) -> bool:
     """Check for errors in `phenocovar` file(s)."""
     return False
 
@@ -63,16 +85,16 @@ def run_qc(rconn: Redis,
     thejob = parse_job(rconn, args.redisprefix, args.jobid)
     jobmeta = thejob["job-metadata"]
 
-    if len(qc_missing_files(rconn, fqjobid, jobmeta["rqtl2-bundle-file"])) > 0:
-        logger.error("Missing files in the bundle!")
-        return 1
-
-    return (
-        1 if any((
-            qc_geno_errors(rconn, fqjobid, thejob),
-            qc_pheno_errors(rconn, fqjobid, thejob),
-            qc_phenocovar_errors(rconn, fqjobid, thejob)))
-        else 0)
+    with ZipFile(jobmeta["rqtl2-bundle-file"], "r") as zfile:
+        if qc_missing_files(rconn, fqjobid, zfile, logger):
+            return 1
+
+        return (
+            1 if any((
+                qc_geno_errors(rconn, fqjobid, zfile, logger),
+                qc_pheno_errors(rconn, fqjobid, zfile, logger),
+                qc_phenocovar_errors(rconn, fqjobid, zfile, logger)))
+            else 0)
 
 if __name__ == "__main__":
     def main():