From 1b61b59dcc8e92cbeaedfa7183df281555ba2828 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 8 Feb 2024 04:00:20 +0300 Subject: Use error objects rather than plain tuple values. --- r_qtl/fileerrors.py | 5 +++++ r_qtl/r_qtl2_qc.py | 24 ++++++++++++++---------- tests/r_qtl/test_r_qtl2_qc.py | 21 ++++++++++++--------- 3 files changed, 31 insertions(+), 19 deletions(-) create mode 100644 r_qtl/fileerrors.py diff --git a/r_qtl/fileerrors.py b/r_qtl/fileerrors.py new file mode 100644 index 0000000..e76676c --- /dev/null +++ b/r_qtl/fileerrors.py @@ -0,0 +1,5 @@ +"""QC errors as distinguished from actual exceptions""" +from collections import namedtuple + +MissingFile = namedtuple( + "MissingFile", ("controlfilekey", "filename", "message")) diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py index b45c17a..8d4fc19 100644 --- a/r_qtl/r_qtl2_qc.py +++ b/r_qtl/r_qtl2_qc.py @@ -2,11 +2,14 @@ import re from zipfile import ZipFile from functools import reduce -from typing import Union, Sequence, Iterator, Optional +from typing import Union, Sequence, Iterator from r_qtl import errors as rqe from r_qtl import r_qtl2 as rqtl2 from r_qtl.r_qtl2 import __FILE_TYPES__ +from r_qtl.fileerrors import MissingFile + +from quality_control.errors import InvalidValue def bundle_files_list(cdata: dict) -> tuple[str, ...]: """Retrieve files listed in control file.""" @@ -56,8 +59,7 @@ def validate_bundle(zfile: ZipFile): "The following files do not exist in the bundle: " + ", ".join(missing)) -def geno_errors(zfile: ZipFile) -> Iterator[ - tuple[Optional[int], Optional[str], str]]: +def geno_errors(zfile: ZipFile) -> Iterator[Union[InvalidValue, MissingFile]]: """Check for and retrieve geno errors.""" cdata = rqtl2.control_data(zfile) genotypes = tuple(cdata.get("genotypes", {}).keys()) @@ -68,13 +70,14 @@ def geno_errors(zfile: ZipFile) -> Iterator[ if field == "id": continue if value is not None and value not in genotypes: - yield (lineno, field, ( - f"Invalid value '{value}'. Expected one of {genotypes}")) + yield InvalidValue(lineno, field, value, ( + f"Invalid value '{value}'. Expected one of " + f"{genotypes}.")) except rqe.MissingFileError: - yield (None, None, "Missing 'geno' file.") + fname = cdata.get("geno") + yield MissingFile("geno", fname, f"Missing 'geno' file '{fname}'.") -def pheno_errors(zfile: ZipFile) -> Iterator[ - tuple[Optional[int], Optional[str], str]]: +def pheno_errors(zfile: ZipFile) -> Iterator[Union[InvalidValue, MissingFile]]: """Check for and retrieve pheno errors.""" cdata = rqtl2.control_data(zfile) try: @@ -87,8 +90,9 @@ def pheno_errors(zfile: ZipFile) -> Iterator[ re.search(r"^([0-9]+\.[0-9]{3,}|[0-9]+\.?0*)$", value) or re.search(r"^0\.0+$", value) or re.search("^0+$", value)): - yield (lineno, field, ( + yield InvalidValue(lineno, field, value, ( f"Invalid value '{value}'. Expected numerical value " "with at least 3 decimal places.")) except rqe.MissingFileError: - yield (None, None, "Missing 'pheno' file.") + fname = cdata.get("pheno") + yield MissingFile("pheno", fname, f"Missing 'pheno' file '{fname}'.") diff --git a/tests/r_qtl/test_r_qtl2_qc.py b/tests/r_qtl/test_r_qtl2_qc.py index 1c96a86..554cfc4 100644 --- a/tests/r_qtl/test_r_qtl2_qc.py +++ b/tests/r_qtl/test_r_qtl2_qc.py @@ -6,6 +6,9 @@ from zipfile import ZipFile from r_qtl import r_qtl2 as rqtl2 from r_qtl import r_qtl2_qc as rqc +from r_qtl.fileerrors import MissingFile + +from quality_control.errors import InvalidValue @pytest.mark.unit_test @pytest.mark.parametrize( @@ -85,13 +88,13 @@ def test_missing_files(filepath, expected): @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/empty_control_file_yaml.zip", - ((None, None, "Missing 'geno' file."),)), + (MissingFile("geno", None, "Missing 'geno' file 'None'."),)), ("tests/r_qtl/test_files/test_geno.zip", tuple()), ("tests/r_qtl/test_files/geno_with_missing_genotypes.zip", - ((1, "AXR-1", "Invalid value 'X'. Expected one of ('L', 'C')"), - (2, "EC.480C", "Invalid value 'Y'. Expected one of ('L', 'C')"), - (6, "HH.335C-Col/PhyA", f"Invalid value 'H'. Expected one of ('L', 'C')"))))) + (InvalidValue(1, "AXR-1", "X", "Invalid value 'X'. Expected one of ('L', 'C')."), + InvalidValue(2, "EC.480C", "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."), + InvalidValue(6, "HH.335C-Col/PhyA", "H", f"Invalid value 'H'. Expected one of ('L', 'C')."))))) def test_geno_errors(filepath, expected): """ GIVEN: A R/qtl2 bundle @@ -106,14 +109,14 @@ def test_geno_errors(filepath, expected): @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/empty_control_file_yaml.zip", - ((None, None, "Missing 'pheno' file."),)), + (MissingFile("pheno", None, "Missing 'pheno' file 'None'."),)), ("tests/r_qtl/test_files/pheno_without_errors.zip", tuple()), ("tests/r_qtl/test_files/pheno_with_errors.zip", - ((1, "liver", ("Invalid value '61.92'. Expected numerical value " - "with at least 3 decimal places.")), - (2, "spleen", ("Invalid value 'brrr'. Expected numerical value " - "with at least 3 decimal places.")))))) + (InvalidValue(1, "liver", "61.92", ("Invalid value '61.92'. Expected numerical value " + "with at least 3 decimal places.")), + InvalidValue(2, "spleen", "brrr", ("Invalid value 'brrr'. Expected numerical value " + "with at least 3 decimal places.")))))) def test_pheno_errors(filepath, expected): """ GIVEN: A R/qtl2 bundle -- cgit v1.2.3