aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/rqtl2/phenotypes_qc.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 438d4da..e495a97 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -3,6 +3,7 @@ import sys
import tempfile
from pathlib import Path
from zipfile import ZipFile
+import multiprocessing as mproc
from logging import Logger, getLogger, StreamHandler
import MySQLdb as mdb
@@ -15,6 +16,7 @@ from uploader.files import sha256_digest_over_file
from scripts.rqtl2.entry import build_main
from scripts.rqtl2.cli_parser import add_bundle_argument
from scripts.cli_parser import init_cli_parser, add_global_data_arguments
+from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter
def validate(phenobundle: Path, logger: Logger) -> dict:
"""Check that the bundle is generally valid"""
@@ -98,6 +100,19 @@ def extract_bundle(bundle: Path, workdir: Path) -> tuple[Path, tuple[Path, ...]]
extractiondir = workdir.joinpath(
f"{sha256_digest_over_file(bundle)}_phenotype_qc_{bundle.name}")
return extractiondir, rqtl2.extract(zfile, extractiondir)
+
+
+def undo_transpose(filetype: str, cdata: dict, extractiondir):
+ """Undo transposition of all files of type `filetype` in thebundle."""
+ if len(cdata.get(filetype, [])) > 0 and cdata.get(f"{filetype}_transposed", False):
+ files = (extractiondir.joinpath(_file) for _file in cdata[filetype])
+ for _file in files:
+ rqtl2.transpose_csv_with_rename(
+ _file,
+ build_line_splitter(cdata),
+ build_line_joiner(cdata))
+
+
def run_qc(# pylint: disable=[too-many-arguments]
dbconn: mdb.Connection,
phenobundle: Path,
@@ -119,8 +134,16 @@ def run_qc(# pylint: disable=[too-many-arguments]
# Steps:
# - Extract file to specific directory
extractiondir, *_bundlefiles = extract_bundle(phenobundle, workingdir)
+
# - For every pheno, phenocovar, phenose, phenonum file, undo
# transposition where relevant
+ cdata = rqtl2.control_data(extractiondir)
+ with mproc.Pool(mproc.cpu_count() - 1) as pool:
+ pool.starmap(
+ undo_transpose,
+ ((ftype, cdata, extractiondir)
+ for ftype in ("pheno", "phenocovar", "phenose", "phenonum")))
+
# - Check that `description` and `units` is present in phenocovar for
# all phenotypes
# - Check all phenotypes in pheno files exist in phenocovar files