aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-10-14 13:26:08 -0500
committerFrederick Muriuki Muriithi2024-10-14 13:27:45 -0500
commitb8299e6c7cdb0819883bfd8e142fc71c43440746 (patch)
tree9da6b21b1d021dcf8fa1ea86224bb73a2922780b
parent681b4812214d377ea84801acf6a4b2d8fc688775 (diff)
downloadgn-uploader-b8299e6c7cdb0819883bfd8e142fc71c43440746.tar.gz
Initialise background script for running QC on phenotype bundles.
-rw-r--r--scripts/rqtl2/phenotypes_qc.py134
1 files changed, 134 insertions, 0 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
new file mode 100644
index 0000000..6a804df
--- /dev/null
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -0,0 +1,134 @@
+"""Run quality control on phenotypes-specific files in the bundle."""
+import sys
+from pathlib import Path
+from argparse import ArgumentParser
+from logging import Logger, getLogger, StreamHandler
+
+import MySQLdb as mdb
+
+from r_qtl import r_qtl2 as rqtl2
+from r_qtl import r_qtl2_qc as rqc
+from r_qtl import exceptions as rqe
+
+from scripts.cli_parser import init_cli_parser
+from scripts.rqtl2.entry import build_main
+from scripts.rqtl2.cli_parser import add_bundle_argument
+
+def validate(phenobundle: Path, logger: Logger) -> dict:
+ """Check that the bundle is generally valid"""
+ try:
+ rqc.validate_bundle(phenobundle)
+ except rqe.RQTLError as rqtlerr:
+ # logger.error("Bundle file validation failed!", exc_info=True)
+ return {
+ "skip": True,
+ "logger": logger,
+ "phenobundle": phenobundle,
+ "errors": (" ".join(rqtlerr.args),)
+ }
+ return {
+ "errors": tuple(),
+ "skip": False,
+ "phenobundle": phenobundle,
+ "logger": logger
+ }
+
+
+def check_for_mandatory_pheno_keys(
+ phenobundle: Path,
+ logger: Logger,
+ **kwargs
+) -> dict:
+ """Check that the mandatory keys exist for phenotypes."""
+ if kwargs.get("skip", False):
+ return {
+ **kwargs,
+ "logger": logger,
+ "phenobundle": phenobundle
+ }
+
+ _mandatory_keys = ("pheno", "phenocovar")
+ _cdata = rqtl2.read_control_file(phenobundle)
+ _errors = kwargs.get("errors", tuple()) + tuple(
+ f"Expected '{key}' file(s) are not declared in the bundle."
+ for key in _mandatory_keys if key not in _cdata.keys())
+ return {
+ **kwargs,
+ "logger": logger,
+ "phenobundle": phenobundle,
+ "errors": _errors,
+ "skip": len(_errors) > 0
+ }
+
+
+def check_for_averages_files(
+ phenobundle: Path,
+ logger: Logger,
+ **kwargs
+) -> dict:
+ """Check that averages files appear together"""
+ if kwargs.get("skip", False):
+ return {
+ **kwargs,
+ "logger": logger,
+ "phenobundle": phenobundle
+ }
+
+ _together = (("phenose", "phenonum"), ("phenonum", "phenose"))
+ _cdata = rqtl2.read_control_file(phenobundle)
+ _errors = kwargs.get("errors", tuple()) + tuple(
+ f"'{first}' is defined in the control file but there is no "
+ f"corresponding '{second}'"
+ for first, second in _together
+ if ((first in _cdata.keys()) and (second not in _cdata.keys())))
+ return {
+ **kwargs,
+ "logger": logger,
+ "phenobundle": phenobundle,
+ "errors": _errors,
+ "skip": len(_errors) > 0
+ }
+
+
+def run_qc(dbconn: mdb.Connection, phenobundle: Path, logger: Logger) -> int:
+ """Run quality control checks on the bundle."""
+ results = check_for_averages_files(
+ **check_for_mandatory_pheno_keys(
+ **validate(phenobundle, logger)))
+ errors = results.get("errors", tuple())
+ if len(errors) > 0:
+ logger.error("We found the following errors:\n%s",
+ "\n".join(f" - {error}" for error in errors))
+ return 1
+ # TODO: Run QC on actual values
+ # Steps:
+ # - Extract file to specific directory
+ # - For every pheno, phenocovar, phenose, phenonum file, undo
+ # transposition where relevant
+ # - Check that `description` and `units` is present in phenocovar for
+ # all phenotypes
+ # - Check all phenotypes in pheno files exist in phenocovar files
+ # - Check all numeric values
+ raise NotImplementedError("WIP!")
+
+
+if __name__ == "__main__":
+
+ def cli_args():
+ """Process command-line arguments for `install_phenos`"""
+ parser = add_bundle_argument(init_cli_parser(
+ program="PhenotypesQC",
+ description=(
+ "Perform Quality Control checks on a phenotypes bundle file")))
+ return parser.parse_args()
+
+ _logger = getLogger("phenotypes_qc")
+ _logger.addHandler(StreamHandler(stream=sys.stderr))
+
+ main = build_main(
+ cli_args(),
+ lambda dbconn, args: run_qc(dbconn, args.rqtl2bundle, _logger),
+ _logger,
+ "DEBUG")
+ sys.exit(main())
+