aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/rqtl2/phenotypes_qc.py40
1 files changed, 25 insertions, 15 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 12b1803..26c049b 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -1,6 +1,7 @@
"""Run quality control on phenotypes-specific files in the bundle."""
import sys
import uuid
+import json
import shutil
import logging
import tempfile
@@ -152,6 +153,12 @@ def redis_logger(
rconn.close()
+def push_error(rconn: Redis, fqkey: str, error: InvalidValue) -> InvalidValue:
+ """Persist the error in redis."""
+ rconn.rpush(fqkey, json.dumps(error._asdict()))
+ return error
+
+
def qc_phenocovar_file(
filepath: Path,
redisuri,
@@ -159,44 +166,46 @@ def qc_phenocovar_file(
separator: str,
comment_char: str):
"""Check that `phenocovar` files are structured correctly."""
- with redis_logger(
+ with (redis_logger(
redisuri,
f"{__MODULE__}.qc_phenocovar_file",
filepath.name,
- fqkey) as logger:
+ f"{fqkey}:logs") as logger,
+ Redis.from_url(redisuri, decode_responses=True) as rconn):
logger.info("Running QC on file: %s", filepath.name)
_csvfile = rqtl2.read_csv_file(filepath, separator, comment_char)
_headings = tuple(heading.lower() for heading in next(_csvfile))
_errors: tuple[InvalidValue, ...] = tuple()
+ save_error = partial(push_error, rconn, f"{fqkey}:errors:{filepath.name}")
for heading in ("description", "units"):
if heading not in _headings:
- _errors = (InvalidValue(
+ _errors = (save_error(InvalidValue(
filepath.name,
"header row",
"-",
"-",
(f"File {filepath.name} is missing the {heading} heading "
- "in the header line.")),)
+ "in the header line."))),)
def collect_errors(errors_and_linecount, line):
_errs, _lc = errors_and_linecount
logger.info("Testing record '%s'", line[0])
if len(line) != len(_headings):
- _errs = _errs + (InvalidValue(
+ _errs = _errs + (save_error(InvalidValue(
filepath.name,
line[0],
"-",
"-",
(f"Record {_lc} in file {filepath.name} has a different "
- "number of columns than the number of headings")),)
+ "number of columns than the number of headings"))),)
_line = dict(zip(_headings, line))
if not bool(_line["description"]):
_errs = _errs + (
- InvalidValue(filepath.name,
- _line[_headings[0]],
- "description",
- _line["description"],
- "The description is not provided!"),)
+ save_error(InvalidValue(filepath.name,
+ _line[_headings[0]],
+ "description",
+ _line["description"],
+ "The description is not provided!")),)
return _errs, _lc+1
@@ -368,15 +377,16 @@ def run_qc(# pylint: disable=[too-many-locals]
# - Check that `description` and `units` is present in phenocovar for
# all phenotypes
+ rconn.hset(fullyqualifiedjobid,
+ "fully-qualified-keys:phenocovar",
+ json.dumps(tuple(f"{fullyqualifiedjobid}:phenocovar:{_file}"
+ for _file in cdata.get("phenocovar", []))))
with mproc.Pool(mproc.cpu_count() - 1) as pool:
logger.debug("Check for errors in 'phenocovar' file(s).")
_phenocovar_qc_res = merge_dicts(*pool.starmap(qc_phenocovar_file, tuple(
(extractiondir.joinpath(_file),
args.redisuri,
- chain(
- "phenocovar",
- fullyqualifiedkey(args.jobid),
- fullyqualifiedkey(args.redisprefix)),
+ f"{fullyqualifiedjobid}:phenocovar",
cdata["sep"],
cdata["comment.char"])
for _file in cdata.get("phenocovar", []))))