about summary refs log tree commit diff
path: root/scripts/rqtl2
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/rqtl2')
-rw-r--r--scripts/rqtl2/phenotypes_qc.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 438d4da..e495a97 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -3,6 +3,7 @@ import sys
 import tempfile
 from pathlib import Path
 from zipfile import ZipFile
+import multiprocessing as mproc
 from logging import Logger, getLogger, StreamHandler
 
 import MySQLdb as mdb
@@ -15,6 +16,7 @@ from uploader.files import sha256_digest_over_file
 from scripts.rqtl2.entry import build_main
 from scripts.rqtl2.cli_parser import add_bundle_argument
 from scripts.cli_parser import init_cli_parser, add_global_data_arguments
+from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter
 
 def validate(phenobundle: Path, logger: Logger) -> dict:
     """Check that the bundle is generally valid"""
@@ -98,6 +100,19 @@ def extract_bundle(bundle: Path, workdir: Path) -> tuple[Path, tuple[Path, ...]]
         extractiondir = workdir.joinpath(
             f"{sha256_digest_over_file(bundle)}_phenotype_qc_{bundle.name}")
         return extractiondir, rqtl2.extract(zfile, extractiondir)
+
+
+def undo_transpose(filetype: str, cdata: dict, extractiondir):
+    """Undo transposition of all files of type `filetype` in thebundle."""
+    if len(cdata.get(filetype, [])) > 0 and cdata.get(f"{filetype}_transposed", False):
+        files = (extractiondir.joinpath(_file) for _file in cdata[filetype])
+        for _file in files:
+            rqtl2.transpose_csv_with_rename(
+                _file,
+                build_line_splitter(cdata),
+                build_line_joiner(cdata))
+
+
 def run_qc(# pylint: disable=[too-many-arguments]
         dbconn: mdb.Connection,
         phenobundle: Path,
@@ -119,8 +134,16 @@ def run_qc(# pylint: disable=[too-many-arguments]
     #       Steps:
     #       - Extract file to specific directory
     extractiondir, *_bundlefiles = extract_bundle(phenobundle, workingdir)
+
     #       - For every pheno, phenocovar, phenose, phenonum file, undo
     #         transposition where relevant
+    cdata = rqtl2.control_data(extractiondir)
+    with mproc.Pool(mproc.cpu_count() - 1) as pool:
+        pool.starmap(
+            undo_transpose,
+            ((ftype, cdata, extractiondir)
+             for ftype in ("pheno", "phenocovar", "phenose", "phenonum")))
+
     #       - Check that `description` and `units` is present in phenocovar for
     #         all phenotypes
     #       - Check all phenotypes in pheno files exist in phenocovar files