about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-12-03 12:23:16 -0600
committerFrederick Muriuki Muriithi2024-12-03 15:11:24 -0600
commitc6437873142ad90c6ce29dcf0e700e0e6a1c658a (patch)
tree2df0a81034509f67ae54a0baf47c6e8cd636d13f /scripts
parentc7ff9222b74402e068630352ba37de0f15d07b88 (diff)
downloadgn-uploader-c6437873142ad90c6ce29dcf0e700e0e6a1c658a.tar.gz
Save phenocovar errors in redis as they are found.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/phenotypes_qc.py40
1 files changed, 25 insertions, 15 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 12b1803..26c049b 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -1,6 +1,7 @@
 """Run quality control on phenotypes-specific files in the bundle."""
 import sys
 import uuid
+import json
 import shutil
 import logging
 import tempfile
@@ -152,6 +153,12 @@ def redis_logger(
         rconn.close()
 
 
+def push_error(rconn: Redis, fqkey: str, error: InvalidValue) -> InvalidValue:
+    """Persist the error in redis."""
+    rconn.rpush(fqkey, json.dumps(error._asdict()))
+    return error
+
+
 def qc_phenocovar_file(
         filepath: Path,
         redisuri,
@@ -159,44 +166,46 @@ def qc_phenocovar_file(
         separator: str,
         comment_char: str):
     """Check that `phenocovar` files are structured correctly."""
-    with redis_logger(
+    with (redis_logger(
             redisuri,
             f"{__MODULE__}.qc_phenocovar_file",
             filepath.name,
-            fqkey) as logger:
+            f"{fqkey}:logs") as logger,
+          Redis.from_url(redisuri, decode_responses=True) as rconn):
         logger.info("Running QC on file: %s", filepath.name)
         _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char)
         _headings = tuple(heading.lower() for heading in next(_csvfile))
         _errors: tuple[InvalidValue, ...] = tuple()
+        save_error = partial(push_error, rconn, f"{fqkey}:errors:{filepath.name}")
         for heading in ("description", "units"):
             if heading not in _headings:
-                _errors = (InvalidValue(
+                _errors = (save_error(InvalidValue(
                     filepath.name,
                     "header row",
                     "-",
                     "-",
                     (f"File {filepath.name} is missing the {heading} heading "
-                     "in the header line.")),)
+                     "in the header line."))),)
 
         def collect_errors(errors_and_linecount, line):
             _errs, _lc = errors_and_linecount
             logger.info("Testing record '%s'", line[0])
             if len(line) != len(_headings):
-                _errs = _errs + (InvalidValue(
+                _errs = _errs + (save_error(InvalidValue(
                     filepath.name,
                     line[0],
                     "-",
                     "-",
                     (f"Record {_lc} in file {filepath.name} has a different "
-                        "number of columns than the number of headings")),)
+                     "number of columns than the number of headings"))),)
             _line = dict(zip(_headings, line))
             if not bool(_line["description"]):
                 _errs = _errs + (
-                    InvalidValue(filepath.name,
-                                 _line[_headings[0]],
-                                 "description",
-                                 _line["description"],
-                                 "The description is not provided!"),)
+                    save_error(InvalidValue(filepath.name,
+                                            _line[_headings[0]],
+                                            "description",
+                                            _line["description"],
+                                            "The description is not provided!")),)
 
             return _errs, _lc+1
 
@@ -368,15 +377,16 @@ def run_qc(# pylint: disable=[too-many-locals]
 
     #       - Check that `description` and `units` is present in phenocovar for
     #         all phenotypes
+    rconn.hset(fullyqualifiedjobid,
+               "fully-qualified-keys:phenocovar",
+               json.dumps(tuple(f"{fullyqualifiedjobid}:phenocovar:{_file}"
+                                for _file in cdata.get("phenocovar", []))))
     with mproc.Pool(mproc.cpu_count() - 1) as pool:
         logger.debug("Check for errors in 'phenocovar' file(s).")
         _phenocovar_qc_res = merge_dicts(*pool.starmap(qc_phenocovar_file, tuple(
             (extractiondir.joinpath(_file),
              args.redisuri,
-             chain(
-                 "phenocovar",
-                 fullyqualifiedkey(args.jobid),
-                 fullyqualifiedkey(args.redisprefix)),
+             f"{fullyqualifiedjobid}:phenocovar",
              cdata["sep"],
              cdata["comment.char"])
             for _file in cdata.get("phenocovar", []))))