6 files changed, 719 insertions, 109 deletions
diff --git a/scripts/rqtl2/bundleutils.py b/scripts/rqtl2/bundleutils.py
new file mode 100644
index 0000000..17faa7c
--- /dev/null
+++ b/scripts/rqtl2/bundleutils.py
@@ -0,0 +1,44 @@
+"""Common utilities to operate in R/qtl2 bundles."""
+from typing import Union, Callable
+
+def build_line_splitter(cdata: dict) -> Callable[[str], tuple[Union[str, None], ...]]:
+    """Build and return a function to use to split data in the files.
+
+    Parameters
+    ----------
+    cdata: A dict holding the control information included with the R/qtl2
+        bundle.
+
+    Returns
+    -------
+    A function that takes a string and return a tuple of strings.
+    """
+    separator = cdata["sep"]
+    na_strings = cdata["na.strings"]
+    def __splitter__(line: str) -> tuple[Union[str, None], ...]:
+        return tuple(
+            item if item not in na_strings else None
+            for item in
+            (field.strip() for field in line.strip().split(separator)))
+    return __splitter__
+
+
+def build_line_joiner(cdata: dict) -> Callable[[tuple[Union[str, None], ...]], str]:
+    """Build and return a function to use to split data in the files.
+
+    Parameters
+    ----------
+    cdata: A dict holding the control information included with the R/qtl2
+        bundle.
+
+    Returns
+    -------
+    A function that takes a string and return a tuple of strings.
+    """
+    separator = cdata["sep"]
+    na_strings = cdata["na.strings"]
+    def __joiner__(row: tuple[Union[str, None], ...]) -> str:
+        return separator.join(
+            (na_strings[0] if item is None else item)
+            for item in row)
+    return __joiner__
diff --git a/scripts/rqtl2/cli_parser.py b/scripts/rqtl2/cli_parser.py
index bcc7a4f..9bb60a3 100644
--- a/scripts/rqtl2/cli_parser.py
+++ b/scripts/rqtl2/cli_parser.py
@@ -2,12 +2,22 @@
 from pathlib import Path
 from argparse import ArgumentParser
 
-def add_common_arguments(parser: ArgumentParser) -> ArgumentParser:
-    """Add common arguments to the CLI parser."""
-    parser.add_argument("datasetid",
-                        type=int,
-                        help="The dataset to which the data belongs.")
+def add_bundle_argument(parser: ArgumentParser) -> ArgumentParser:
+    """Add the `rqtl2bundle` argument."""
     parser.add_argument("rqtl2bundle",
                         type=Path,
                         help="Path to R/qtl2 bundle zip file.")
     return parser
+
+
+def add_datasetid_argument(parser: ArgumentParser) -> ArgumentParser:
+    """Add the `datasetid` argument."""
+    parser.add_argument("datasetid",
+                        type=int,
+                        help="The dataset to which the data belongs.")
+    return parser
+
+
+def add_common_arguments(parser: ArgumentParser) -> ArgumentParser:
+    """Add common arguments to the CLI parser."""
+    return add_bundle_argument(add_datasetid_argument(parser))
diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py
index 93fc130..e0e00e7 100644
--- a/scripts/rqtl2/entry.py
+++ b/scripts/rqtl2/entry.py
@@ -1,38 +1,58 @@
 """Build common script-entry structure."""
-from logging import Logger
+import sys
+import logging
 from typing import Callable
 from argparse import Namespace
+from logging import StreamHandler
 
 from redis import Redis
 from MySQLdb import Connection
+from gn_libs.mysqldb import database_connection
 
-from qc_app import jobs
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
+from uploader import jobs
+from uploader.check_connections import check_db, check_redis
 
 from scripts.redis_logger import setup_redis_logger
 
-def build_main(args: Namespace,
-               run_fn: Callable[[Connection, Namespace], int],
-               logger: Logger,
-               loglevel: str = "INFO") -> Callable[[],int]:
+def build_main(
+        args: Namespace,
+        run_fn: Callable[
+            [Redis, Connection, str, Namespace, logging.Logger],
+            int
+        ],
+        logger: logging.Logger
+) -> Callable[[],int]:
     """Build a function to be used as an entry-point for scripts."""
     def main():
-        check_db(args.databaseuri)
-        check_redis(args.redisuri)
-        if not args.rqtl2bundle.exists():
-            logger.error("File not found: '%s'.", args.rqtl2bundle)
-            return 2
-
         with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
               database_connection(args.databaseuri) as dbconn):
-            fqjobid = jobs.job_key(jobs.jobsnamespace(), args.jobid)
-            logger.addHandler(setup_redis_logger(
-                rconn,
-                fqjobid,
-                f"{fqjobid}:log-messages",
-                args.redisexpiry))
-            logger.setLevel(loglevel)
-            return run_fn(dbconn, args)
+            logger.setLevel(args.loglevel.upper())
+            fqjobid = jobs.job_key(args.redisprefix, args.jobid)
+
+            try:
+                rconn.hset(fqjobid, "status", "started")
+                logger.addHandler(setup_redis_logger(
+                    rconn,
+                    fqjobid,
+                    f"{fqjobid}:log-messages",
+                    args.redisexpiry))
+                logger.addHandler(StreamHandler(stream=sys.stderr))
+
+                check_db(args.databaseuri)
+                check_redis(args.redisuri)
+                if not args.rqtl2bundle.exists():
+                    logger.error("File not found: '%s'.", args.rqtl2bundle)
+                    return 2
+
+                returncode = run_fn(rconn, dbconn, fqjobid, args)
+                if returncode == 0:
+                    rconn.hset(fqjobid, "status", "completed:success")
+                    return returncode
+                rconn.hset(fqjobid, "status", "completed:error")
+                return returncode
+            except Exception as _exc:# pylint: disable=[broad-except]
+                logger.error("The process failed!", exc_info=True)
+                rconn.hset(fqjobid, "status", "completed:error")
+                return 4
 
     return main
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index d0731a2..8762655 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -1,12 +1,13 @@
 """Load genotypes from R/qtl2 bundle into the database."""
 import sys
+import argparse
 import traceback
-from pathlib import Path
 from zipfile import ZipFile
 from functools import reduce
 from typing import Iterator, Optional
-from logging import Logger, getLogger, StreamHandler
+from logging import Logger, getLogger
 
+from redis import Redis
 import MySQLdb as mdb
 from MySQLdb.cursors import DictCursor
 
@@ -19,10 +20,15 @@ from scripts.rqtl2.entry import build_main
 from scripts.rqtl2.cli_parser import add_common_arguments
 from scripts.cli_parser import init_cli_parser, add_global_data_arguments
 
-def insert_markers(dbconn: mdb.Connection,
-                   speciesid: int,
-                   markers: tuple[str, ...],
-                   pmapdata: Optional[Iterator[dict]]) -> int:
+__MODULE__ = "scripts.rqtl2.install_genotypes"
+
+def insert_markers(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        markers: tuple[str, ...],
+        pmapdata: Optional[Iterator[dict]],
+        _logger: Logger
+) -> int:
     """Insert genotype and genotype values into the database."""
     mdata = reduce(#type: ignore[var-annotated]
         lambda acc, row: ({#type: ignore[arg-type, return-value]
@@ -40,16 +46,20 @@ def insert_markers(dbconn: mdb.Connection,
             "VALUES (%(speciesid)s, %(marker)s, %(marker)s, %(chr)s, %(pos)s) "
             "ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId",
             tuple({
-                "speciesid": speciesid,
-                "marker": marker,
-                "chr": mdata.get(marker, {}).get("chr"),
-                "pos": mdata.get(marker, {}).get("pos")
-            } for marker in markers))
+                (speciesid, marker): {
+                    "speciesid": speciesid,
+                    "marker": marker,
+                    "chr": mdata.get(marker, {}).get("chr"),
+                    "pos": mdata.get(marker, {}).get("pos")
+                } for marker in markers}.values()))
         return cursor.rowcount
 
-def insert_individuals(dbconn: mdb.Connection,
-                       speciesid: int,
-                       individuals: tuple[str, ...]) -> int:
+def insert_individuals(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        individuals: tuple[str, ...],
+        _logger: Logger
+) -> int:
     """Insert individuals/samples into the database."""
     with dbconn.cursor() as cursor:
         cursor.executemany(
@@ -60,10 +70,13 @@ def insert_individuals(dbconn: mdb.Connection,
                   for individual in individuals))
         return cursor.rowcount
 
-def cross_reference_individuals(dbconn: mdb.Connection,
-                                speciesid: int,
-                                populationid: int,
-                                individuals: tuple[str, ...]) -> int:
+def cross_reference_individuals(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        populationid: int,
+        individuals: tuple[str, ...],
+        _logger: Logger
+) -> int:
     """Cross reference any inserted individuals."""
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
         paramstr = ", ".join(["%s"] * len(individuals))
@@ -79,11 +92,13 @@ def cross_reference_individuals(dbconn: mdb.Connection,
             tuple(ids))
         return cursor.rowcount
 
-def insert_genotype_data(dbconn: mdb.Connection,
-                         speciesid: int,
-                         genotypes: tuple[dict, ...],
-                         individuals: tuple[str, ...]) -> tuple[
-                             int, tuple[dict, ...]]:
+def insert_genotype_data(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        genotypes: tuple[dict, ...],
+        individuals: tuple[str, ...],
+        _logger: Logger
+) -> tuple[int, tuple[dict, ...]]:
     """Insert the genotype data values into the database."""
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
         paramstr = ", ".join(["%s"] * len(individuals))
@@ -119,11 +134,14 @@ def insert_genotype_data(dbconn: mdb.Connection,
             "markerid": row["markerid"]
         } for row in data)
 
-def cross_reference_genotypes(dbconn: mdb.Connection,
-                              speciesid: int,
-                              datasetid: int,
-                              dataids: tuple[dict, ...],
-                              gmapdata: Optional[Iterator[dict]]) -> int:
+def cross_reference_genotypes(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        datasetid: int,
+        dataids: tuple[dict, ...],
+        gmapdata: Optional[Iterator[dict]],
+        _logger: Logger
+) -> int:
     """Cross-reference the data to the relevant dataset."""
     _rows, markers, mdata = reduce(#type: ignore[var-annotated]
         lambda acc, row: (#type: ignore[return-value,arg-type]
@@ -139,31 +157,45 @@ def cross_reference_genotypes(dbconn: mdb.Connection,
         (tuple(), tuple(), {}))
 
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
-        paramstr = ", ".join(["%s"] * len(markers))
-        cursor.execute("SELECT Id, Name FROM Geno "
-                       f"WHERE SpeciesId=%s AND Name IN ({paramstr})",
-                       (speciesid,) + markers)
-        markersdict = {row["Id"]: row["Name"] for row in cursor.fetchall()}
-        cursor.executemany(
+        markersdict = {}
+        if len(markers) > 0:
+            paramstr = ", ".join(["%s"] * len(markers))
+            insertparams = (speciesid,) + markers
+            selectquery = ("SELECT Id, Name FROM Geno "
+                     f"WHERE SpeciesId=%s AND Name IN ({paramstr})")
+            _logger.debug(
+                "The select query was\n\t%s\n\nwith the parameters\n\t%s",
+                selectquery,
+                (speciesid,) + markers)
+            cursor.execute(selectquery, insertparams)
+            markersdict = {row["Id"]: row["Name"] for row in cursor.fetchall()}
+
+        insertquery = (
             "INSERT INTO GenoXRef(GenoFreezeId, GenoId, DataId, cM) "
             "VALUES(%(datasetid)s, %(markerid)s, %(dataid)s, %(pos)s) "
-            "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId",
-            tuple({
-                **row,
-                "datasetid": datasetid,
-                "pos": mdata.get(markersdict.get(
-                    row.get("markerid"), {}), {}).get("pos")
-            } for row in dataids))
+            "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId")
+        insertparams = tuple({
+            **row,
+            "datasetid": datasetid,
+            "pos": mdata.get(markersdict.get(
+                row.get("markerid"), "nosuchkey"), {}).get("pos")
+        } for row in dataids)
+        _logger.debug(
+            "The insert query was\n\t%s\n\nwith the parameters\n\t%s",
+            insertquery, insertparams)
+        cursor.executemany(insertquery, insertparams)
         return cursor.rowcount
 
-def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
+def install_genotypes(#pylint: disable=[too-many-locals]
+        rconn: Redis,#pylint: disable=[unused-argument]
         dbconn: mdb.Connection,
-                      speciesid: int,
-                      populationid: int,
-                      datasetid: int,
-                      rqtl2bundle: Path,
-                      logger: Logger = getLogger()) -> int:
+        fullyqualifiedjobid: str,#pylint: disable=[unused-argument]
+        args: argparse.Namespace,
+        logger: Logger = getLogger(__name__)
+) -> int:
     """Load any existing genotypes into the database."""
+    (speciesid, populationid, datasetid, rqtl2bundle) = (
+         args.speciesid, args.populationid, args.datasetid, args.rqtl2bundle)
     count = 0
     with ZipFile(str(rqtl2bundle.absolute()), "r") as zfile:
         try:
@@ -188,20 +220,22 @@ def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
                     speciesid,
                     tuple(key for key in batch[0].keys() if key != "id"),
                     (rqtl2.file_data(zfile, "pmap", cdata) if "pmap" in cdata
-                     else None))
+                     else None),
+                    logger)
                 individuals = tuple(row["id"] for row in batch)
-                insert_individuals(dbconn, speciesid, individuals)
+                insert_individuals(dbconn, speciesid, individuals, logger)
                 cross_reference_individuals(
-                    dbconn, speciesid, populationid, individuals)
+                    dbconn, speciesid, populationid, individuals, logger)
                 _num_rows, dataids = insert_genotype_data(
-                    dbconn, speciesid, batch, individuals)
+                    dbconn, speciesid, batch, individuals, logger)
                 cross_reference_genotypes(
                     dbconn,
                     speciesid,
                     datasetid,
                     dataids,
                     (rqtl2.file_data(zfile, "gmap", cdata)
-                     if "gmap" in cdata else None))
+                     if "gmap" in cdata else None),
+                    logger)
                 count = count + len(batch)
         except rqtl2.InvalidFormat as exc:
             logger.error(str(exc))
@@ -223,15 +257,5 @@ if __name__ == "__main__":
 
         return parser.parse_args()
 
-    thelogger = getLogger("install_genotypes")
-    thelogger.addHandler(StreamHandler(stream=sys.stderr))
-    main = build_main(
-        cli_args(),
-        lambda dbconn, args: install_genotypes(dbconn,
-                                               args.speciesid,
-                                               args.populationid,
-                                               args.datasetid,
-                                               args.rqtl2bundle),
-        thelogger,
-        "INFO")
+    main = build_main(cli_args(), install_genotypes, __MODULE__)
     sys.exit(main())
diff --git a/scripts/rqtl2/install_phenos.py b/scripts/rqtl2/install_phenos.py
index b5cab8e..9059cd6 100644
--- a/scripts/rqtl2/install_phenos.py
+++ b/scripts/rqtl2/install_phenos.py
@@ -1,11 +1,12 @@
 """Load pheno from R/qtl2 bundle into the database."""
 import sys
+import argparse
 import traceback
-from pathlib import Path
 from zipfile import ZipFile
 from functools import reduce
-from logging import Logger, getLogger, StreamHandler
+from logging import Logger, getLogger
 
+from redis import Redis
 import MySQLdb as mdb
 from MySQLdb.cursors import DictCursor
 
@@ -18,6 +19,8 @@ from r_qtl import r_qtl2_qc as rqc
 
 from functional_tools import take
 
+__MODULE__ = "scripts.rqtl2.install_phenos"
+
 def insert_probesets(dbconn: mdb.Connection,
                      platformid: int,
                      phenos: tuple[str, ...]) -> int:
@@ -93,14 +96,15 @@ def cross_reference_probeset_data(dbconn: mdb.Connection,
             } for row in dataids))
         return cursor.rowcount
 
-def install_pheno_files(#pylint: disable=[too-many-arguments, too-many-locals]
+def install_pheno_files(#pylint: disable=[too-many-locals]
+        rconn: Redis,#pylint: disable=[unused-argument]
         dbconn: mdb.Connection,
-        speciesid: int,
-        platformid: int,
-        datasetid: int,
-        rqtl2bundle: Path,
+        fullyqualifiedjobid: str,#pylint: disable=[unused-argument]
+        args: argparse.Namespace,
         logger: Logger = getLogger()) -> int:
     """Load data in `pheno` files and other related files into the database."""
+    (speciesid, platformid, datasetid, rqtl2bundle) = (
+        args.speciesid, args.platformid, args.datasetid, args.rqtl2bundle)
     with ZipFile(str(rqtl2bundle), "r") as zfile:
         try:
             rqc.validate_bundle(zfile)
@@ -155,16 +159,5 @@ if __name__ == "__main__":
 
         return parser.parse_args()
 
-    thelogger = getLogger("install_phenos")
-    thelogger.addHandler(StreamHandler(stream=sys.stderr))
-    main = build_main(
-        cli_args(),
-        lambda dbconn, args: install_pheno_files(dbconn,
-                                                 args.speciesid,
-                                                 args.platformid,
-                                                 args.datasetid,
-                                                 args.rqtl2bundle,
-                                                 thelogger),
-        thelogger,
-        "DEBUG")
+    main = build_main(cli_args(), install_pheno_files, __MODULE__)
     sys.exit(main())
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
new file mode 100644
index 0000000..9f11f57
--- /dev/null
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -0,0 +1,519 @@
+"""Run quality control on phenotypes-specific files in the bundle."""
+import sys
+import uuid
+import json
+import shutil
+import logging
+import tempfile
+import contextlib
+from pathlib import Path
+from logging import Logger
+from zipfile import ZipFile
+from argparse import Namespace
+import multiprocessing as mproc
+from functools import reduce, partial
+from typing import Union, Iterator, Callable, Optional, Sequence
+
+import MySQLdb as mdb
+from redis import Redis
+
+from r_qtl import r_qtl2 as rqtl2
+from r_qtl import r_qtl2_qc as rqc
+from r_qtl import exceptions as rqe
+from r_qtl.fileerrors import InvalidValue
+
+from functional_tools import chain
+
+from quality_control.checks import decimal_places_pattern
+
+from uploader.files import sha256_digest_over_file
+from uploader.samples.models import samples_by_species_and_population
+
+from scripts.rqtl2.entry import build_main
+from scripts.redis_logger import RedisMessageListHandler
+from scripts.rqtl2.cli_parser import add_bundle_argument
+from scripts.cli_parser import init_cli_parser, add_global_data_arguments
+from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter
+
+__MODULE__ = "scripts.rqtl2.phenotypes_qc"
+logging.basicConfig(
+    format=("%(asctime)s - %(levelname)s %(name)s: "
+            "(%(pathname)s: %(lineno)d) %(message)s"))
+logger = logging.getLogger(__MODULE__)
+
+def validate(
+        phenobundle: Path,
+        logger: Logger# pylint: disable=[redefined-outer-name]
+) -> dict:
+    """Check that the bundle is generally valid"""
+    try:
+        rqc.validate_bundle(phenobundle)
+    except rqe.RQTLError as rqtlerr:
+        # logger.error("Bundle file validation failed!", exc_info=True)
+        return {
+            "skip": True,
+            "logger": logger,
+            "phenobundle": phenobundle,
+            "errors": (" ".join(rqtlerr.args),)
+        }
+    return {
+        "errors": tuple(),
+        "skip": False,
+        "phenobundle": phenobundle,
+        "logger": logger
+    }
+
+
+def check_for_mandatory_pheno_keys(
+        phenobundle: Path,
+        logger: Logger,# pylint: disable=[redefined-outer-name]
+        **kwargs
+) -> dict:
+    """Check that the mandatory keys exist for phenotypes."""
+    if kwargs.get("skip", False):
+        return {
+            **kwargs,
+            "logger": logger,
+            "phenobundle": phenobundle
+        }
+
+    _mandatory_keys = ("pheno", "phenocovar")
+    _cdata = rqtl2.read_control_file(phenobundle)
+    _errors = kwargs.get("errors", tuple()) + tuple(
+        f"Expected '{key}' file(s) are not declared in the bundle."
+        for key in _mandatory_keys if key not in _cdata.keys())
+    return {
+        **kwargs,
+        "logger": logger,
+        "phenobundle": phenobundle,
+        "errors": _errors,
+        "skip": len(_errors) > 0
+    }
+
+
+def check_for_averages_files(
+        phenobundle: Path,
+        logger: Logger,# pylint: disable=[redefined-outer-name]
+        **kwargs
+) -> dict:
+    """Check that averages files appear together"""
+    if kwargs.get("skip", False):
+        return {
+            **kwargs,
+            "logger": logger,
+            "phenobundle": phenobundle
+        }
+
+    _together = (("phenose", "phenonum"), ("phenonum", "phenose"))
+    _cdata = rqtl2.read_control_file(phenobundle)
+    _errors = kwargs.get("errors", tuple()) + tuple(
+        f"'{first}' is defined in the control file but there is no "
+        f"corresponding '{second}'"
+        for first, second in _together
+        if ((first in _cdata.keys()) and (second not in _cdata.keys())))
+    return {
+        **kwargs,
+        "logger": logger,
+        "phenobundle": phenobundle,
+        "errors": _errors,
+        "skip": len(_errors) > 0
+    }
+
+
+def extract_bundle(
+        bundle: Path, workdir: Path, jobid: uuid.UUID
+) -> tuple[Path, tuple[Path, ...]]:
+    """Extract the bundle."""
+    with ZipFile(bundle) as zfile:
+        extractiondir = workdir.joinpath(
+            f"{str(jobid)}-{sha256_digest_over_file(bundle)}-{bundle.name}")
+        return extractiondir, rqtl2.extract(zfile, extractiondir)
+
+
+def undo_transpose(filetype: str, cdata: dict, extractiondir):
+    """Undo transposition of all files of type `filetype` in thebundle."""
+    if len(cdata.get(filetype, [])) > 0 and cdata.get(f"{filetype}_transposed", False):
+        files = (extractiondir.joinpath(_file) for _file in cdata[filetype])
+        for _file in files:
+            rqtl2.transpose_csv_with_rename(
+                _file,
+                build_line_splitter(cdata),
+                build_line_joiner(cdata))
+
+
+@contextlib.contextmanager
+def redis_logger(
+        redisuri: str, loggername: str, filename: str, fqkey: str
+) -> Iterator[logging.Logger]:
+    """Build a Redis message-list logger."""
+    rconn = Redis.from_url(redisuri, decode_responses=True)
+    _logger = logging.getLogger(loggername)
+    _logger.propagate = False
+    handler = RedisMessageListHandler(
+        rconn,
+        fullyqualifiedkey(fqkey, filename))#type: ignore[arg-type]
+    handler.setFormatter(logging.getLogger().handlers[0].formatter)
+    _logger.addHandler(handler)
+    try:
+        yield _logger
+    finally:
+        rconn.close()
+
+
+def push_error(rconn: Redis, fqkey: str, error: InvalidValue) -> InvalidValue:
+    """Persist the error in redis."""
+    rconn.rpush(fqkey, json.dumps(error._asdict()))
+    return error
+
+
+def file_fqkey(prefix: str, section: str, filepath: Path) -> str:
+    """Build a files fully-qualified key in a consistent manner"""
+    return f"{prefix}:{section}:{filepath.name}"
+
+
+def qc_phenocovar_file(
+        filepath: Path,
+        redisuri,
+        fqkey: str,
+        separator: str,
+        comment_char: str):
+    """Check that `phenocovar` files are structured correctly."""
+    with (redis_logger(
+            redisuri,
+            f"{__MODULE__}.qc_phenocovar_file",
+            filepath.name,
+            f"{fqkey}:logs") as _logger,
+          Redis.from_url(redisuri, decode_responses=True) as rconn):
+        print("Running QC on file: ", filepath.name)
+        _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char)
+        _headings = tuple(heading.lower() for heading in next(_csvfile))
+        _errors: tuple[InvalidValue, ...] = tuple()
+        save_error = partial(
+            push_error, rconn, file_fqkey(fqkey, "errors", filepath))
+        for heading in ("description", "units"):
+            if heading not in _headings:
+                _errors = (save_error(InvalidValue(
+                    filepath.name,
+                    "header row",
+                    "-",
+                    "-",
+                    (f"File {filepath.name} is missing the {heading} heading "
+                     "in the header line."))),)
+
+        def collect_errors(errors_and_linecount, line):
+            _errs, _lc = errors_and_linecount
+            _logger.info("Testing record '%s'", line[0])
+            if len(line) != len(_headings):
+                _errs = _errs + (save_error(InvalidValue(
+                    filepath.name,
+                    line[0],
+                    "-",
+                    "-",
+                    (f"Record {_lc} in file {filepath.name} has a different "
+                     "number of columns than the number of headings"))),)
+            _line = dict(zip(_headings, line))
+            if not bool(_line.get("description")):
+                _errs = _errs + (
+                    save_error(InvalidValue(filepath.name,
+                                            _line[_headings[0]],
+                                            "description",
+                                            _line.get("description"),
+                                            "The description is not provided!")),)
+
+            rconn.hset(file_fqkey(fqkey, "metadata", filepath),
+                       mapping={
+                           "status": "checking",
+                           "linecount": _lc+1,
+                           "total-errors": len(_errs)
+                       })
+            return _errs, _lc+1
+
+        _errors, _linecount = reduce(collect_errors, _csvfile, (_errors, 1))
+        rconn.hset(file_fqkey(fqkey, "metadata", filepath),
+                   mapping={
+                       "status": "completed",
+                       "linecount": _linecount,
+                       "total-errors": len(_errors)
+                   })
+        return {filepath.name: {"errors": _errors, "linecount": _linecount}}
+
+
+def merge_dicts(*dicts):
+    """Merge multiple dicts into a single one."""
+    return reduce(lambda merged, dct: {**merged, **dct}, dicts, {})
+
+
+def decimal_points_error(# pylint: disable=[too-many-arguments,too-many-positional-arguments]
+        filename: str,
+        rowtitle: str,
+        coltitle: str,
+        cellvalue: str,
+        message: str,
+        decimal_places: int = 1
+) -> Optional[InvalidValue]:
+    """Returns an error if the value does not meet the checks."""
+    if not bool(decimal_places_pattern(decimal_places).match(cellvalue)):
+        return InvalidValue(filename, rowtitle, coltitle, cellvalue, message)
+    return None
+
+
+def integer_error(
+        filename: str,
+        rowtitle: str,
+        coltitle: str,
+        cellvalue: str,
+        message: str
+) -> Optional[InvalidValue]:
+    """Returns an error if the value does not meet the checks."""
+    try:
+        value = int(cellvalue)
+        if value <= 0:
+            raise ValueError("Must be a non-zero, positive number.")
+        return None
+    except ValueError as _verr:
+        return InvalidValue(filename, rowtitle, coltitle, cellvalue, message)
+
+
+def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments, too-many-positional-arguments]
+        filepath: Path,
+        redisuri: str,
+        fqkey: str,
+        samples: tuple[str, ...],
+        phenonames: tuple[str, ...],
+        separator: str,
+        comment_char: str,
+        na_strings: Sequence[str],
+        error_fn: Callable = decimal_points_error
+):
+    """Run QC/QA on a `pheno` file."""
+    with (redis_logger(
+            redisuri,
+            f"{__MODULE__}.qc_pheno_file",
+            filepath.name,
+            f"{fqkey}:logs") as _logger,
+          Redis.from_url(redisuri, decode_responses=True) as rconn):
+        print("Running QC on file: ", filepath.name)
+        save_error = partial(
+            push_error, rconn, file_fqkey(fqkey, "errors", filepath))
+        _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char)
+        _headings: tuple[str, ...] = tuple(
+            # select lowercase for comparison purposes
+            heading.lower() for heading in next(_csvfile))
+        _errors: tuple[InvalidValue, ...] = tuple()
+
+        _absent = tuple(pheno for pheno in _headings[1:] if pheno
+                        not in tuple(
+                            # lower to have consistent case with headings for
+                            # comparison
+                            phe.lower() for phe in phenonames))
+        if len(_absent) > 0:
+            _errors = _errors + (save_error(InvalidValue(
+                filepath.name,
+                "header row",
+                "-",
+                ", ".join(_absent),
+                ("The following phenotype names do not exist in any of the "
+                 f"provided phenocovar files: ({', '.join(_absent)})"))),)
+
+        def collect_errors(errors_and_linecount, line):
+            _errs, _lc = errors_and_linecount
+            _logger.debug("Checking row %s", line[0])
+            if line[0] not in samples:
+                _errs = _errs + (save_error(InvalidValue(
+                filepath.name,
+                line[0],
+                _headings[0],
+                line[0],
+                (f"The sample named '{line[0]}' does not exist in the database. "
+                 "You will need to upload that first."))),)
+
+            for field, value in zip(_headings[1:], line[1:]):
+                if value in na_strings:
+                    continue
+                _err = error_fn(
+                    filepath.name,
+                    line[0],
+                    field,
+                    value)
+                _errs = _errs + ((save_error(_err),) if bool(_err) else tuple())
+
+            rconn.hset(file_fqkey(fqkey, "metadata", filepath),
+                       mapping={
+                           "status": "checking",
+                           "linecount": _lc+1,
+                           "total-errors": len(_errs)
+                       })
+            return _errs, _lc+1
+
+        _errors, _linecount = reduce(collect_errors, _csvfile, (_errors, 1))
+        rconn.hset(file_fqkey(fqkey, "metadata", filepath),
+                   mapping={
+                       "status": "completed",
+                       "linecount": _linecount,
+                       "total-errors": len(_errors)
+                   })
+        return {filepath.name: {"errors": _errors, "linecount": _linecount}}
+
+
+def phenotype_names(filepath: Path,
+                    separator: str,
+                    comment_char: str) -> tuple[str, ...]:
+    """Read phenotype names from `phenocovar` file."""
+    return reduce(lambda tpl, line: tpl + (line[0],),#type: ignore[arg-type, return-value]
+                  rqtl2.read_csv_file(filepath, separator, comment_char),
+                  tuple())[1:]
+
+def fullyqualifiedkey(
+        prefix: str,
+        rest: Optional[str] = None
+) -> Union[Callable[[str], str], str]:
+    """Compute fully qualified Redis key."""
+    if not bool(rest):
+        return lambda _rest: f"{prefix}:{_rest}"
+    return f"{prefix}:{rest}"
+
+def run_qc(# pylint: disable=[too-many-locals]
+        rconn: Redis,
+        dbconn: mdb.Connection,
+        fullyqualifiedjobid: str,
+        args: Namespace
+) -> int:
+    """Run quality control checks on the bundle."""
+    print("Beginning the quality assurance checks.")
+    results = check_for_averages_files(
+        **check_for_mandatory_pheno_keys(
+            **validate(args.rqtl2bundle, logger)))
+    errors = results.get("errors", tuple())
+    if len(errors) > 0:
+        logger.error("We found the following errors:\n%s",
+                     "\n".join(f" - {error}" for error in errors))
+        return 1
+    # Run QC on actual values
+    #       Steps:
+    #       - Extract file to specific directory
+    extractiondir, *_bundlefiles = extract_bundle(
+        args.rqtl2bundle, args.workingdir, args.jobid)
+
+    #       - For every pheno, phenocovar, phenose, phenonum file, undo
+    #         transposition where relevant
+    cdata = rqtl2.control_data(extractiondir)
+    with mproc.Pool(mproc.cpu_count() - 1) as pool:
+        pool.starmap(
+            undo_transpose,
+            ((ftype, cdata, extractiondir)
+             for ftype in ("pheno", "phenocovar", "phenose", "phenonum")))
+
+    #       - Fetch samples/individuals from database.
+    print("Fetching samples/individuals from the database.")
+    samples = tuple(#type: ignore[var-annotated]
+        item for item in set(reduce(
+            lambda acc, item: acc + (
+                item["Name"], item["Name2"], item["Symbol"], item["Alias"]),
+            samples_by_species_and_population(
+                dbconn, args.speciesid, args.populationid),
+            tuple()))
+        if bool(item))
+
+    #       - Check that `description` and `units` is present in phenocovar for
+    #         all phenotypes
+    rconn.hset(fullyqualifiedjobid,
+               "fully-qualified-keys:phenocovar",
+               json.dumps(tuple(f"{fullyqualifiedjobid}:phenocovar:{_file}"
+                                for _file in cdata.get("phenocovar", []))))
+    with mproc.Pool(mproc.cpu_count() - 1) as pool:
+        print("Check for errors in 'phenocovar' file(s).")
+        _phenocovar_qc_res = merge_dicts(*pool.starmap(qc_phenocovar_file, tuple(
+            (extractiondir.joinpath(_file),
+             args.redisuri,
+             f"{fullyqualifiedjobid}:phenocovar",
+             cdata["sep"],
+             cdata["comment.char"])
+            for _file in cdata.get("phenocovar", []))))
+
+        #       - Check all samples in pheno files exist in database
+        #       - Check all phenotypes in pheno files exist in phenocovar files
+        #       - Check all numeric values in pheno files
+        phenonames = tuple(set(
+            name for names in pool.starmap(phenotype_names, tuple(
+            (extractiondir.joinpath(_file), cdata["sep"], cdata["comment.char"])
+            for _file in cdata.get("phenocovar", [])))
+            for name in names))
+
+        dec_err_fn = partial(decimal_points_error, message=(
+            "Expected a non-negative number with at least one decimal "
+            "place."))
+
+        print("Check for errors in 'pheno' file(s).")
+        _pheno_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple((
+            extractiondir.joinpath(_file),
+            args.redisuri,
+            chain(
+                "pheno",
+                fullyqualifiedkey(args.jobid),
+                fullyqualifiedkey(args.redisprefix)),
+            samples,
+            phenonames,
+            cdata["sep"],
+            cdata["comment.char"],
+            cdata["na.strings"],
+            dec_err_fn
+        ) for _file in cdata.get("pheno", []))))
+
+        #       - Check the 3 checks above for phenose and phenonum values too
+        # qc_phenose_files(…)
+        # qc_phenonum_files(…)
+        print("Check for errors in 'phenose' file(s).")
+        _phenose_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple((
+            extractiondir.joinpath(_file),
+            args.redisuri,
+            chain(
+                "phenose",
+                fullyqualifiedkey(args.jobid),
+                fullyqualifiedkey(args.redisprefix)),
+            samples,
+            phenonames,
+            cdata["sep"],
+            cdata["comment.char"],
+            cdata["na.strings"],
+            dec_err_fn
+        ) for _file in cdata.get("phenose", []))))
+
+        print("Check for errors in 'phenonum' file(s).")
+        _phenonum_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple((
+            extractiondir.joinpath(_file),
+            args.redisuri,
+            chain(
+                "phenonum",
+                fullyqualifiedkey(args.jobid),
+                fullyqualifiedkey(args.redisprefix)),
+            samples,
+            phenonames,
+            cdata["sep"],
+            cdata["comment.char"],
+            cdata["na.strings"],
+            partial(integer_error, message=(
+                "Expected a non-negative, non-zero integer value."))
+        ) for _file in cdata.get("phenonum", []))))
+
+    #       - Delete all extracted files
+    shutil.rmtree(extractiondir)
+    return 0
+
+
+if __name__ == "__main__":
+    def cli_args():
+        """Process command-line arguments for `install_phenos`"""
+        parser = add_bundle_argument(add_global_data_arguments(init_cli_parser(
+            program="PhenotypesQC",
+            description=(
+                "Perform Quality Control checks on a phenotypes bundle file"))))
+        parser.add_argument(
+            "--workingdir",
+            default=f"{tempfile.gettempdir()}/phenotypes_qc",
+            help=("The directory where this script will put its intermediate "
+                  "files."),
+            type=Path)
+        return parser.parse_args()
+
+    main = build_main(cli_args(), run_qc, logger)
+    sys.exit(main())