about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-05 12:13:16 +0300
committerFrederick Muriuki Muriithi2024-02-05 12:13:16 +0300
commit3245182e967f5ac8a296cf47ce3e622c3cb754ed (patch)
tree5ac4092f4aef5f22229886b7d1bb492fd2559ca3
parent949ea71eb721a3f5bdff3ba10f7dd7c70899d560 (diff)
downloadgn-uploader-3245182e967f5ac8a296cf47ce3e622c3cb754ed.tar.gz
Add a QC step: Check all listed files exist
Add a QC step to the R/qtl2 bundle processing - check that all files
that are listed in the control file also exist in the bundle.
-rw-r--r--scripts/process_rqtl2_bundle.py12
1 files changed, 12 insertions, 0 deletions
diff --git a/scripts/process_rqtl2_bundle.py b/scripts/process_rqtl2_bundle.py
index 1b26264..6ba746a 100644
--- a/scripts/process_rqtl2_bundle.py
+++ b/scripts/process_rqtl2_bundle.py
@@ -5,11 +5,14 @@ import json
 import traceback
 from typing import Any
 from pathlib import Path
+from zipfile import ZipFile
 from logging import Logger, getLogger, StreamHandler
 
 import MySQLdb as mdb
 from redis import Redis
 
+import r_qtl.r_qtl2_qc as rqc
+
 from qc_app import jobs
 from qc_app.db_utils import database_connection
 from qc_app.check_connections import check_db, check_redis
@@ -49,6 +52,14 @@ def percent_completion(geno: float, pheno: float) -> float:
     """Compute the total completion percent."""
     return 0.5 * (geno + pheno)
 
+def qc_r_qtl2_bundle(bundlefilepath, logger):
+    """Run QC checks on the R/qtl2 bundle."""
+    with ZipFile(bundlefilepath, "r") as zfile:
+        logger.info("Validating the bundle ...")
+        rqc.validate_bundle(zfile)
+        logger.info(
+            "Bundle successfully validated. All listed files are present.")
+
 def process_bundle(dbconn: mdb.Connection,
                    rconn: Redis,
                    rprefix: str,
@@ -58,6 +69,7 @@ def process_bundle(dbconn: mdb.Connection,
     try:
         thejob = parse_job(rconn, rprefix, jobid)
         meta = thejob["bundle-metadata"]
+        qc_r_qtl2_bundle(meta["rqtl2-bundle-file"], logger)
         rconn.hset(jobs.job_key(rprefix, jobid), "geno-percent", "0")
         rconn.hset(jobs.job_key(rprefix, jobid), "pheno-percent", "0")