diff options
Diffstat (limited to 'r_qtl/r_qtl2_qc.py')
-rw-r--r-- | r_qtl/r_qtl2_qc.py | 24 |
1 files changed, 14 insertions, 10 deletions
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py index b45c17a..8d4fc19 100644 --- a/r_qtl/r_qtl2_qc.py +++ b/r_qtl/r_qtl2_qc.py @@ -2,11 +2,14 @@ import re from zipfile import ZipFile from functools import reduce -from typing import Union, Sequence, Iterator, Optional +from typing import Union, Sequence, Iterator from r_qtl import errors as rqe from r_qtl import r_qtl2 as rqtl2 from r_qtl.r_qtl2 import __FILE_TYPES__ +from r_qtl.fileerrors import MissingFile + +from quality_control.errors import InvalidValue def bundle_files_list(cdata: dict) -> tuple[str, ...]: """Retrieve files listed in control file.""" @@ -56,8 +59,7 @@ def validate_bundle(zfile: ZipFile): "The following files do not exist in the bundle: " + ", ".join(missing)) -def geno_errors(zfile: ZipFile) -> Iterator[ - tuple[Optional[int], Optional[str], str]]: +def geno_errors(zfile: ZipFile) -> Iterator[Union[InvalidValue, MissingFile]]: """Check for and retrieve geno errors.""" cdata = rqtl2.control_data(zfile) genotypes = tuple(cdata.get("genotypes", {}).keys()) @@ -68,13 +70,14 @@ def geno_errors(zfile: ZipFile) -> Iterator[ if field == "id": continue if value is not None and value not in genotypes: - yield (lineno, field, ( - f"Invalid value '{value}'. Expected one of {genotypes}")) + yield InvalidValue(lineno, field, value, ( + f"Invalid value '{value}'. Expected one of " + f"{genotypes}.")) except rqe.MissingFileError: - yield (None, None, "Missing 'geno' file.") + fname = cdata.get("geno") + yield MissingFile("geno", fname, f"Missing 'geno' file '{fname}'.") -def pheno_errors(zfile: ZipFile) -> Iterator[ - tuple[Optional[int], Optional[str], str]]: +def pheno_errors(zfile: ZipFile) -> Iterator[Union[InvalidValue, MissingFile]]: """Check for and retrieve pheno errors.""" cdata = rqtl2.control_data(zfile) try: @@ -87,8 +90,9 @@ def pheno_errors(zfile: ZipFile) -> Iterator[ re.search(r"^([0-9]+\.[0-9]{3,}|[0-9]+\.?0*)$", value) or re.search(r"^0\.0+$", value) or re.search("^0+$", value)): - yield (lineno, field, ( + yield InvalidValue(lineno, field, value, ( f"Invalid value '{value}'. Expected numerical value " "with at least 3 decimal places.")) except rqe.MissingFileError: - yield (None, None, "Missing 'pheno' file.") + fname = cdata.get("pheno") + yield MissingFile("pheno", fname, f"Missing 'pheno' file '{fname}'.") |