about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--r_qtl/r_qtl2_qc.py24
-rw-r--r--tests/r_qtl/test_files/pheno_with_errors.zipbin0 -> 522 bytes
-rw-r--r--tests/r_qtl/test_files/pheno_without_errors.zipbin0 -> 539 bytes
-rw-r--r--tests/r_qtl/test_r_qtl2_qc.py30
4 files changed, 47 insertions, 7 deletions
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py
index 70a00f9..b45c17a 100644
--- a/r_qtl/r_qtl2_qc.py
+++ b/r_qtl/r_qtl2_qc.py
@@ -1,4 +1,5 @@
 """Quality control checks for R/qtl2 data bundles."""
+import re
 from zipfile import ZipFile
 from functools import reduce
 from typing import Union, Sequence, Iterator, Optional
@@ -55,7 +56,8 @@ def validate_bundle(zfile: ZipFile):
                         "The following files do not exist in the bundle: " +
                         ", ".join(missing))
 
-def geno_errors(zfile: ZipFile) -> Iterator[tuple[Optional[int], Optional[str], str]]:
+def geno_errors(zfile: ZipFile) -> Iterator[
+        tuple[Optional[int], Optional[str], str]]:
     """Check for and retrieve geno errors."""
     cdata = rqtl2.control_data(zfile)
     genotypes = tuple(cdata.get("genotypes", {}).keys())
@@ -70,3 +72,23 @@ def geno_errors(zfile: ZipFile) -> Iterator[tuple[Optional[int], Optional[str],
                         f"Invalid value '{value}'. Expected one of {genotypes}"))
     except rqe.MissingFileError:
         yield (None, None, "Missing 'geno' file.")
+
+def pheno_errors(zfile: ZipFile) -> Iterator[
+        tuple[Optional[int], Optional[str], str]]:
+    """Check for and retrieve pheno errors."""
+    cdata = rqtl2.control_data(zfile)
+    try:
+        for lineno, row in enumerate(
+                rqtl2.file_data(zfile, "pheno", cdata), start=1):
+            for field, value in row.items():
+                if field == "id":
+                    continue
+                if value is not None and not(
+                        re.search(r"^([0-9]+\.[0-9]{3,}|[0-9]+\.?0*)$", value)
+                        or re.search(r"^0\.0+$", value)
+                        or re.search("^0+$", value)):
+                    yield (lineno, field, (
+                        f"Invalid value '{value}'. Expected numerical value "
+                        "with at least 3 decimal places."))
+    except rqe.MissingFileError:
+        yield (None, None, "Missing 'pheno' file.")
diff --git a/tests/r_qtl/test_files/pheno_with_errors.zip b/tests/r_qtl/test_files/pheno_with_errors.zip
new file mode 100644
index 0000000..321e44d
--- /dev/null
+++ b/tests/r_qtl/test_files/pheno_with_errors.zip
Binary files differdiff --git a/tests/r_qtl/test_files/pheno_without_errors.zip b/tests/r_qtl/test_files/pheno_without_errors.zip
new file mode 100644
index 0000000..9d9d027
--- /dev/null
+++ b/tests/r_qtl/test_files/pheno_without_errors.zip
Binary files differdiff --git a/tests/r_qtl/test_r_qtl2_qc.py b/tests/r_qtl/test_r_qtl2_qc.py
index bcbcbac..1c96a86 100644
--- a/tests/r_qtl/test_r_qtl2_qc.py
+++ b/tests/r_qtl/test_r_qtl2_qc.py
@@ -7,10 +7,6 @@ from zipfile import ZipFile
 from r_qtl import r_qtl2 as rqtl2
 from r_qtl import r_qtl2_qc as rqc
 
-###### DO NOT COMMIT THIS ######
-from quality_control.debug import __pk__
-###### END: DO NOT COMMIT THIS ######
-
 @pytest.mark.unit_test
 @pytest.mark.parametrize(
     "filepath,expected",
@@ -93,8 +89,8 @@ def test_missing_files(filepath, expected):
      ("tests/r_qtl/test_files/test_geno.zip",
       tuple()),
      ("tests/r_qtl/test_files/geno_with_missing_genotypes.zip",
-      ((1, "AXR-1", f"Invalid value 'X'. Expected one of ('L', 'C')"),
-       (2, "EC.480C", f"Invalid value 'Y'. Expected one of ('L', 'C')"),
+      ((1, "AXR-1", "Invalid value 'X'. Expected one of ('L', 'C')"),
+       (2, "EC.480C", "Invalid value 'Y'. Expected one of ('L', 'C')"),
        (6, "HH.335C-Col/PhyA", f"Invalid value 'H'. Expected one of ('L', 'C')")))))
 def test_geno_errors(filepath, expected):
     """
@@ -105,3 +101,25 @@ def test_geno_errors(filepath, expected):
     """
     with ZipFile(Path(filepath).absolute(), "r") as zfile:
         assert tuple(rqc.geno_errors(zfile)) == expected
+
+@pytest.mark.unit_test
+@pytest.mark.parametrize(
+    "filepath,expected",
+    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
+      ((None, None, "Missing 'pheno' file."),)),
+     ("tests/r_qtl/test_files/pheno_without_errors.zip",
+      tuple()),
+     ("tests/r_qtl/test_files/pheno_with_errors.zip",
+      ((1, "liver", ("Invalid value '61.92'. Expected numerical value "
+                     "with at least 3 decimal places.")),
+       (2, "spleen", ("Invalid value 'brrr'. Expected numerical value "
+                     "with at least 3 decimal places."))))))
+def test_pheno_errors(filepath, expected):
+    """
+    GIVEN: A R/qtl2 bundle
+    WHEN: we check for pheno errors
+    THEN: We should get a sequence of all errors present in the pheno file, or
+        an empty sequence if no errors exist.
+    """
+    with ZipFile(Path(filepath).absolute(), "r") as zfile:
+        assert tuple(rqc.pheno_errors(zfile)) == expected