From 3245182e967f5ac8a296cf47ce3e622c3cb754ed Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 5 Feb 2024 12:13:16 +0300 Subject: Add a QC step: Check all listed files exist Add a QC step to the R/qtl2 bundle processing - check that all files that are listed in the control file also exist in the bundle. --- scripts/process_rqtl2_bundle.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'scripts/process_rqtl2_bundle.py') diff --git a/scripts/process_rqtl2_bundle.py b/scripts/process_rqtl2_bundle.py index 1b26264..6ba746a 100644 --- a/scripts/process_rqtl2_bundle.py +++ b/scripts/process_rqtl2_bundle.py @@ -5,11 +5,14 @@ import json import traceback from typing import Any from pathlib import Path +from zipfile import ZipFile from logging import Logger, getLogger, StreamHandler import MySQLdb as mdb from redis import Redis +import r_qtl.r_qtl2_qc as rqc + from qc_app import jobs from qc_app.db_utils import database_connection from qc_app.check_connections import check_db, check_redis @@ -49,6 +52,14 @@ def percent_completion(geno: float, pheno: float) -> float: """Compute the total completion percent.""" return 0.5 * (geno + pheno) +def qc_r_qtl2_bundle(bundlefilepath, logger): + """Run QC checks on the R/qtl2 bundle.""" + with ZipFile(bundlefilepath, "r") as zfile: + logger.info("Validating the bundle ...") + rqc.validate_bundle(zfile) + logger.info( + "Bundle successfully validated. All listed files are present.") + def process_bundle(dbconn: mdb.Connection, rconn: Redis, rprefix: str, @@ -58,6 +69,7 @@ def process_bundle(dbconn: mdb.Connection, try: thejob = parse_job(rconn, rprefix, jobid) meta = thejob["bundle-metadata"] + qc_r_qtl2_bundle(meta["rqtl2-bundle-file"], logger) rconn.hset(jobs.job_key(rprefix, jobid), "geno-percent", "0") rconn.hset(jobs.job_key(rprefix, jobid), "pheno-percent", "0") -- cgit v1.2.3