diff options
Diffstat (limited to 'scripts/rqtl2/phenotypes_qc.py')
-rw-r--r-- | scripts/rqtl2/phenotypes_qc.py | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 438d4da..e495a97 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -3,6 +3,7 @@ import sys import tempfile from pathlib import Path from zipfile import ZipFile +import multiprocessing as mproc from logging import Logger, getLogger, StreamHandler import MySQLdb as mdb @@ -15,6 +16,7 @@ from uploader.files import sha256_digest_over_file from scripts.rqtl2.entry import build_main from scripts.rqtl2.cli_parser import add_bundle_argument from scripts.cli_parser import init_cli_parser, add_global_data_arguments +from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter def validate(phenobundle: Path, logger: Logger) -> dict: """Check that the bundle is generally valid""" @@ -98,6 +100,19 @@ def extract_bundle(bundle: Path, workdir: Path) -> tuple[Path, tuple[Path, ...]] extractiondir = workdir.joinpath( f"{sha256_digest_over_file(bundle)}_phenotype_qc_{bundle.name}") return extractiondir, rqtl2.extract(zfile, extractiondir) + + +def undo_transpose(filetype: str, cdata: dict, extractiondir): + """Undo transposition of all files of type `filetype` in thebundle.""" + if len(cdata.get(filetype, [])) > 0 and cdata.get(f"{filetype}_transposed", False): + files = (extractiondir.joinpath(_file) for _file in cdata[filetype]) + for _file in files: + rqtl2.transpose_csv_with_rename( + _file, + build_line_splitter(cdata), + build_line_joiner(cdata)) + + def run_qc(# pylint: disable=[too-many-arguments] dbconn: mdb.Connection, phenobundle: Path, @@ -119,8 +134,16 @@ def run_qc(# pylint: disable=[too-many-arguments] # Steps: # - Extract file to specific directory extractiondir, *_bundlefiles = extract_bundle(phenobundle, workingdir) + # - For every pheno, phenocovar, phenose, phenonum file, undo # transposition where relevant + cdata = rqtl2.control_data(extractiondir) + with mproc.Pool(mproc.cpu_count() - 1) as pool: + pool.starmap( + undo_transpose, + ((ftype, cdata, extractiondir) + for ftype in ("pheno", "phenocovar", "phenose", "phenonum"))) + # - Check that `description` and `units` is present in phenocovar for # all phenotypes # - Check all phenotypes in pheno files exist in phenocovar files |