aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-05 12:13:16 +0300
committerFrederick Muriuki Muriithi2024-02-05 12:13:16 +0300
commit3245182e967f5ac8a296cf47ce3e622c3cb754ed (patch)
tree5ac4092f4aef5f22229886b7d1bb492fd2559ca3 /scripts
parent949ea71eb721a3f5bdff3ba10f7dd7c70899d560 (diff)
downloadgn-uploader-3245182e967f5ac8a296cf47ce3e622c3cb754ed.tar.gz
Add a QC step: Check all listed files exist
Add a QC step to the R/qtl2 bundle processing - check that all files that are listed in the control file also exist in the bundle.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/process_rqtl2_bundle.py12
1 files changed, 12 insertions, 0 deletions
diff --git a/scripts/process_rqtl2_bundle.py b/scripts/process_rqtl2_bundle.py
index 1b26264..6ba746a 100644
--- a/scripts/process_rqtl2_bundle.py
+++ b/scripts/process_rqtl2_bundle.py
@@ -5,11 +5,14 @@ import json
import traceback
from typing import Any
from pathlib import Path
+from zipfile import ZipFile
from logging import Logger, getLogger, StreamHandler
import MySQLdb as mdb
from redis import Redis
+import r_qtl.r_qtl2_qc as rqc
+
from qc_app import jobs
from qc_app.db_utils import database_connection
from qc_app.check_connections import check_db, check_redis
@@ -49,6 +52,14 @@ def percent_completion(geno: float, pheno: float) -> float:
"""Compute the total completion percent."""
return 0.5 * (geno + pheno)
+def qc_r_qtl2_bundle(bundlefilepath, logger):
+ """Run QC checks on the R/qtl2 bundle."""
+ with ZipFile(bundlefilepath, "r") as zfile:
+ logger.info("Validating the bundle ...")
+ rqc.validate_bundle(zfile)
+ logger.info(
+ "Bundle successfully validated. All listed files are present.")
+
def process_bundle(dbconn: mdb.Connection,
rconn: Redis,
rprefix: str,
@@ -58,6 +69,7 @@ def process_bundle(dbconn: mdb.Connection,
try:
thejob = parse_job(rconn, rprefix, jobid)
meta = thejob["bundle-metadata"]
+ qc_r_qtl2_bundle(meta["rqtl2-bundle-file"], logger)
rconn.hset(jobs.job_key(rprefix, jobid), "geno-percent", "0")
rconn.hset(jobs.job_key(rprefix, jobid), "pheno-percent", "0")