From 427f73fcc3435b20cc5fb8b70fac16f251bbac3d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 17 Oct 2024 14:11:42 -0500 Subject: Extract the R/qtl2 bundle for processing. To enable processing of the files individually, this commit will enable the extraction of the files into a known working directory in which all further processing will take place. --- scripts/rqtl2/phenotypes_qc.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 6a804df..be57513 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -2,6 +2,7 @@ import sys from pathlib import Path from argparse import ArgumentParser +from zipfile import ZipFile from logging import Logger, getLogger, StreamHandler import MySQLdb as mdb @@ -11,6 +12,7 @@ from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe from scripts.cli_parser import init_cli_parser +from uploader.files import sha256_digest_over_file from scripts.rqtl2.entry import build_main from scripts.rqtl2.cli_parser import add_bundle_argument @@ -90,7 +92,12 @@ def check_for_averages_files( } -def run_qc(dbconn: mdb.Connection, phenobundle: Path, logger: Logger) -> int: +def extract_bundle(bundle: Path, workdir: Path) -> tuple[Path, tuple[Path, ...]]: + """Extract the bundle.""" + with ZipFile(bundle) as zfile: + extractiondir = workdir.joinpath( + f"{sha256_digest_over_file(bundle)}_phenotype_qc_{bundle.name}") + return extractiondir, rqtl2.extract(zfile, extractiondir) """Run quality control checks on the bundle.""" results = check_for_averages_files( **check_for_mandatory_pheno_keys( @@ -103,6 +110,7 @@ def run_qc(dbconn: mdb.Connection, phenobundle: Path, logger: Logger) -> int: # TODO: Run QC on actual values # Steps: # - Extract file to specific directory + extractiondir, *_bundlefiles = extract_bundle(phenobundle, workingdir) # - For every pheno, phenocovar, phenose, phenonum file, undo # transposition where relevant # - Check that `description` and `units` is present in phenocovar for -- cgit v1.2.3