diff options
-rw-r--r-- | scripts/rqtl2/phenotypes_qc.py | 28 |
1 files changed, 16 insertions, 12 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index bfbfab6..2c09835 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -264,12 +264,14 @@ def qc_pheno_file(# pylint: disable=[too-many-arguments] error_fn: Callable = decimal_points_error ): """Run QC/QA on a `pheno` file.""" - with redis_logger( + with (redis_logger( redisuri, f"{__MODULE__}.qc_pheno_file", filepath.name, - fqkey) as logger: + f"{fqkey}:logs") as logger, + Redis.from_url(redisuri, decode_responses=True) as rconn): logger.info("Running QC on file: %s", filepath.name) + save_error = partial(push_error, rconn, f"{fqkey}:errors:{filepath.name}") _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) _headings: tuple[str, ...] = tuple( heading.lower() for heading in next(_csvfile)) @@ -277,24 +279,25 @@ def qc_pheno_file(# pylint: disable=[too-many-arguments] _absent = tuple(pheno for pheno in _headings[1:] if pheno not in phenonames) if len(_absent) > 0: - _errors = _errors + (InvalidValue( + _errors = _errors + (save_error(InvalidValue( filepath.name, "header row", "-", ", ".join(_absent), (f"The phenotype names ({', '.join(samples)}) do not exist in any " - "of the provided phenocovar files.")),) + "of the provided phenocovar files."))),) def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount + logger.debug("Checking row %s", line[0]) if line[0] not in samples: - _errs = _errs + (InvalidValue( + _errs = _errs + (save_error(InvalidValue( filepath.name, line[0], _headings[0], line[0], (f"The sample named '{line[0]}' does not exist in the database. " - "You will need to upload that first.")),) + "You will need to upload that first."))),) for field, value in zip(_headings[1:], line[1:]): if value in na_strings: @@ -304,15 +307,16 @@ def qc_pheno_file(# pylint: disable=[too-many-arguments] line[0], field, value) - _errs = _errs + ((_err,) if bool(_err) else tuple()) + _errs = _errs + ((save_error(_err),) if bool(_err) else tuple()) + rconn.hset(f"{fqkey}:metadata", "linecount", _lc+1) + rconn.hset(f"{fqkey}:metadata", "total-errors", len(_errs)) return _errs, _lc+1 - return { - filepath.name: dict(zip( - ("errors", "linecount"), - reduce(collect_errors, _csvfile, (_errors, 1)))) - } + logger.debug(f"[{filepath.name}] Collecting errors") + _errors, _linecount = reduce(collect_errors, _csvfile, (_errors, 1)) + logger.debug(f"[{filepath.name}] Finished collecting errors. Returning results …") + return {filepath.name: {"errors": _errors, "linecount": linecount}} def phenotype_names(filepath: Path, |