aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--r_qtl/fileerrors.py5
-rw-r--r--r_qtl/r_qtl2_qc.py24
-rw-r--r--tests/r_qtl/test_r_qtl2_qc.py21
3 files changed, 31 insertions, 19 deletions
diff --git a/r_qtl/fileerrors.py b/r_qtl/fileerrors.py
new file mode 100644
index 0000000..e76676c
--- /dev/null
+++ b/r_qtl/fileerrors.py
@@ -0,0 +1,5 @@
+"""QC errors as distinguished from actual exceptions"""
+from collections import namedtuple
+
+MissingFile = namedtuple(
+ "MissingFile", ("controlfilekey", "filename", "message"))
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py
index b45c17a..8d4fc19 100644
--- a/r_qtl/r_qtl2_qc.py
+++ b/r_qtl/r_qtl2_qc.py
@@ -2,11 +2,14 @@
import re
from zipfile import ZipFile
from functools import reduce
-from typing import Union, Sequence, Iterator, Optional
+from typing import Union, Sequence, Iterator
from r_qtl import errors as rqe
from r_qtl import r_qtl2 as rqtl2
from r_qtl.r_qtl2 import __FILE_TYPES__
+from r_qtl.fileerrors import MissingFile
+
+from quality_control.errors import InvalidValue
def bundle_files_list(cdata: dict) -> tuple[str, ...]:
"""Retrieve files listed in control file."""
@@ -56,8 +59,7 @@ def validate_bundle(zfile: ZipFile):
"The following files do not exist in the bundle: " +
", ".join(missing))
-def geno_errors(zfile: ZipFile) -> Iterator[
- tuple[Optional[int], Optional[str], str]]:
+def geno_errors(zfile: ZipFile) -> Iterator[Union[InvalidValue, MissingFile]]:
"""Check for and retrieve geno errors."""
cdata = rqtl2.control_data(zfile)
genotypes = tuple(cdata.get("genotypes", {}).keys())
@@ -68,13 +70,14 @@ def geno_errors(zfile: ZipFile) -> Iterator[
if field == "id":
continue
if value is not None and value not in genotypes:
- yield (lineno, field, (
- f"Invalid value '{value}'. Expected one of {genotypes}"))
+ yield InvalidValue(lineno, field, value, (
+ f"Invalid value '{value}'. Expected one of "
+ f"{genotypes}."))
except rqe.MissingFileError:
- yield (None, None, "Missing 'geno' file.")
+ fname = cdata.get("geno")
+ yield MissingFile("geno", fname, f"Missing 'geno' file '{fname}'.")
-def pheno_errors(zfile: ZipFile) -> Iterator[
- tuple[Optional[int], Optional[str], str]]:
+def pheno_errors(zfile: ZipFile) -> Iterator[Union[InvalidValue, MissingFile]]:
"""Check for and retrieve pheno errors."""
cdata = rqtl2.control_data(zfile)
try:
@@ -87,8 +90,9 @@ def pheno_errors(zfile: ZipFile) -> Iterator[
re.search(r"^([0-9]+\.[0-9]{3,}|[0-9]+\.?0*)$", value)
or re.search(r"^0\.0+$", value)
or re.search("^0+$", value)):
- yield (lineno, field, (
+ yield InvalidValue(lineno, field, value, (
f"Invalid value '{value}'. Expected numerical value "
"with at least 3 decimal places."))
except rqe.MissingFileError:
- yield (None, None, "Missing 'pheno' file.")
+ fname = cdata.get("pheno")
+ yield MissingFile("pheno", fname, f"Missing 'pheno' file '{fname}'.")
diff --git a/tests/r_qtl/test_r_qtl2_qc.py b/tests/r_qtl/test_r_qtl2_qc.py
index 1c96a86..554cfc4 100644
--- a/tests/r_qtl/test_r_qtl2_qc.py
+++ b/tests/r_qtl/test_r_qtl2_qc.py
@@ -6,6 +6,9 @@ from zipfile import ZipFile
from r_qtl import r_qtl2 as rqtl2
from r_qtl import r_qtl2_qc as rqc
+from r_qtl.fileerrors import MissingFile
+
+from quality_control.errors import InvalidValue
@pytest.mark.unit_test
@pytest.mark.parametrize(
@@ -85,13 +88,13 @@ def test_missing_files(filepath, expected):
@pytest.mark.parametrize(
"filepath,expected",
(("tests/r_qtl/test_files/empty_control_file_yaml.zip",
- ((None, None, "Missing 'geno' file."),)),
+ (MissingFile("geno", None, "Missing 'geno' file 'None'."),)),
("tests/r_qtl/test_files/test_geno.zip",
tuple()),
("tests/r_qtl/test_files/geno_with_missing_genotypes.zip",
- ((1, "AXR-1", "Invalid value 'X'. Expected one of ('L', 'C')"),
- (2, "EC.480C", "Invalid value 'Y'. Expected one of ('L', 'C')"),
- (6, "HH.335C-Col/PhyA", f"Invalid value 'H'. Expected one of ('L', 'C')")))))
+ (InvalidValue(1, "AXR-1", "X", "Invalid value 'X'. Expected one of ('L', 'C')."),
+ InvalidValue(2, "EC.480C", "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."),
+ InvalidValue(6, "HH.335C-Col/PhyA", "H", f"Invalid value 'H'. Expected one of ('L', 'C').")))))
def test_geno_errors(filepath, expected):
"""
GIVEN: A R/qtl2 bundle
@@ -106,14 +109,14 @@ def test_geno_errors(filepath, expected):
@pytest.mark.parametrize(
"filepath,expected",
(("tests/r_qtl/test_files/empty_control_file_yaml.zip",
- ((None, None, "Missing 'pheno' file."),)),
+ (MissingFile("pheno", None, "Missing 'pheno' file 'None'."),)),
("tests/r_qtl/test_files/pheno_without_errors.zip",
tuple()),
("tests/r_qtl/test_files/pheno_with_errors.zip",
- ((1, "liver", ("Invalid value '61.92'. Expected numerical value "
- "with at least 3 decimal places.")),
- (2, "spleen", ("Invalid value 'brrr'. Expected numerical value "
- "with at least 3 decimal places."))))))
+ (InvalidValue(1, "liver", "61.92", ("Invalid value '61.92'. Expected numerical value "
+ "with at least 3 decimal places.")),
+ InvalidValue(2, "spleen", "brrr", ("Invalid value 'brrr'. Expected numerical value "
+ "with at least 3 decimal places."))))))
def test_pheno_errors(filepath, expected):
"""
GIVEN: A R/qtl2 bundle