diff options
-rw-r--r-- | scripts/rqtl2/phenotypes_qc.py | 63 |
1 files changed, 52 insertions, 11 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 668fca0..ccd2110 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -4,9 +4,9 @@ import shutil import tempfile from pathlib import Path from zipfile import ZipFile -from functools import reduce import multiprocessing as mproc -from typing import Optional, Sequence +from functools import reduce, partial +from typing import Callable, Optional, Sequence from logging import Logger, getLogger, StreamHandler import MySQLdb as mdb @@ -183,13 +183,32 @@ def decimal_points_error( return None +def integer_error( + filename: str, + rowtitle: str, + coltitle: str, + cellvalue: str, + message: str, + decimal_places: int = 1 +) -> Optional[InvalidValue]: + """Returns an error if the value does not meet the checks.""" + try: + value = int(cellvalue) + if value <= 0: + raise ValueError("Must be a non-zero, positive number.") + return None + except ValueError as _verr: + return InvalidValue(filename, rowtitle, coltitle, cellvalue, message) + + def qc_pheno_file( filepath: Path, samples: tuple[str, ...], phenonames: tuple[str, ...], separator: str, comment_char: str, - na_strings: Sequence[str] + na_strings: Sequence[str], + error_fn: Callable = decimal_points_error ): """Run QC/QA on a `pheno` file.""" _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) @@ -220,13 +239,11 @@ def qc_pheno_file( for field, value in zip(_headings[1:], line[1:]): if value in na_strings: continue - _err = decimal_points_error( + _err = error_fn( filepath.name, line[0], field, - value, - ("Expected a non-negative number with at least one decimal " - "place.")) + value) _errs = _errs + ((_err,) if bool(_err) else tuple()) return _errs, _lc+1 @@ -303,18 +320,42 @@ def run_qc(# pylint: disable=[too-many-arguments] for _file in cdata.get("phenocovar", []))) for name in names)) + dec_err_fn = partial(decimal_points_error, message=( + "Expected a non-negative number with at least one decimal " + "place.")) pheno_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), samples, phenonames, cdata["sep"], cdata["comment.char"], - cdata["na.strings"] + cdata["na.strings"], + dec_err_fn ) for _file in cdata.get("pheno", [])))) - # - Check the 3 checks above for phenose and phenonum values too - # qc_phenose_files(…) - # qc_phenonum_files(…) + # - Check the 3 checks above for phenose and phenonum values too + # qc_phenose_files(…) + # qc_phenonum_files(…) + phenose_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( + extractiondir.joinpath(_file), + samples, + phenonames, + cdata["sep"], + cdata["comment.char"], + cdata["na.strings"], + dec_err_fn + ) for _file in cdata.get("phenose", [])))) + + phenonum_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( + extractiondir.joinpath(_file), + samples, + phenonames, + cdata["sep"], + cdata["comment.char"], + cdata["na.strings"], + partial(integer_error, message=( + "Expected a non-negative, non-zero integer value.")) + ) for _file in cdata.get("phenonum", [])))) # - Delete all extracted files shutil.rmtree(extractiondir) |