about summary refs log tree commit diff
path: root/scripts/rqtl2
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-10-17 14:11:42 -0500
committerFrederick Muriuki Muriithi2024-10-17 14:39:41 -0500
commit427f73fcc3435b20cc5fb8b70fac16f251bbac3d (patch)
treefc956aaa8bf5b10e509afeeeb787dfa034cb551b /scripts/rqtl2
parentcd0d415e691f46941fc3bd4a132ed3bdacd97931 (diff)
downloadgn-uploader-427f73fcc3435b20cc5fb8b70fac16f251bbac3d.tar.gz
Extract the R/qtl2 bundle for processing.
To enable processing of the files individually, this commit will
enable the extraction of the files into a known working directory in
which all further processing will take place.
Diffstat (limited to 'scripts/rqtl2')
-rw-r--r--scripts/rqtl2/phenotypes_qc.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 6a804df..be57513 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -2,6 +2,7 @@
 import sys
 from pathlib import Path
 from argparse import ArgumentParser
+from zipfile import ZipFile
 from logging import Logger, getLogger, StreamHandler
 
 import MySQLdb as mdb
@@ -11,6 +12,7 @@ from r_qtl import r_qtl2_qc as rqc
 from r_qtl import exceptions as rqe
 
 from scripts.cli_parser import init_cli_parser
+from uploader.files import sha256_digest_over_file
 from scripts.rqtl2.entry import build_main
 from scripts.rqtl2.cli_parser import add_bundle_argument
 
@@ -90,7 +92,12 @@ def check_for_averages_files(
     }
 
 
-def run_qc(dbconn: mdb.Connection, phenobundle: Path, logger: Logger) -> int:
+def extract_bundle(bundle: Path, workdir: Path) -> tuple[Path, tuple[Path, ...]]:
+    """Extract the bundle."""
+    with ZipFile(bundle) as zfile:
+        extractiondir = workdir.joinpath(
+            f"{sha256_digest_over_file(bundle)}_phenotype_qc_{bundle.name}")
+        return extractiondir, rqtl2.extract(zfile, extractiondir)
     """Run quality control checks on the bundle."""
     results = check_for_averages_files(
         **check_for_mandatory_pheno_keys(
@@ -103,6 +110,7 @@ def run_qc(dbconn: mdb.Connection, phenobundle: Path, logger: Logger) -> int:
     # TODO: Run QC on actual values
     #       Steps:
     #       - Extract file to specific directory
+    extractiondir, *_bundlefiles = extract_bundle(phenobundle, workingdir)
     #       - For every pheno, phenocovar, phenose, phenonum file, undo
     #         transposition where relevant
     #       - Check that `description` and `units` is present in phenocovar for