about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-12 12:10:44 +0300
committerFrederick Muriuki Muriithi2024-02-12 18:17:42 +0300
commit68696caeedde3636aff34db048a4490fbf51edf3 (patch)
tree8a7fd555aa6b867043a6e4cf662102a2a0d004fa
parentabb55d7e03bf207ebf00b4c71f1bbdd8f58a0ad3 (diff)
downloadgn-uploader-68696caeedde3636aff34db048a4490fbf51edf3.tar.gz
Build generic decimal places checker.
-rw-r--r--quality_control/checks.py51
-rw-r--r--tests/qc/test_checks.py50
2 files changed, 101 insertions, 0 deletions
diff --git a/quality_control/checks.py b/quality_control/checks.py
new file mode 100644
index 0000000..28b9ab5
--- /dev/null
+++ b/quality_control/checks.py
@@ -0,0 +1,51 @@
+"""Quality control checks for data."""
+import re
+from typing import Optional
+
+def decimal_places_pattern(mini: int, maxi: Optional[int] = None) -> re.Pattern:
+    """
+    Generate a regular expression for checking numbers
+
+    Generates a regular expression that matches:
+    a) Whole numbers, e.g. 2, 54343, 25, etc
+    b) Zeroes e.g. 0, 000, 0.0, 0.000, 0.00000, etc
+    c) Numbers with at least 'mini' decimal places
+    d) If 'maxi' is provided, then numbers with decimal places between
+       'mini' and 'maxi'.
+
+    To test for a match of exactly 'n' decimal places, then both 'mini' and
+    'maxi' should be set to 'n'.
+
+    ARGUMENTS:
+    - mini [int]: The mini number of decimal places allowed. This is
+      mandatory.
+    - maxi [int]: The maxi number of decimal places allowed. This is an
+      optional argument. If not provided, then an infinite (theoretically)
+      number of decimal places is allowed.
+    """
+    try:
+        assert isinstance(mini, int), "The argument 'mini' MUST be a integer"
+        assert maxi is None or isinstance(maxi, int), "The argument 'maxi' MUST be a integer"
+    except AssertionError as exc:
+        raise TypeError(*exc.args) from exc
+
+    try:
+        assert mini > 0, "The argument 'mini' must be greater than zero (0)."
+        if isinstance(maxi, int):
+            assert maxi > 0, "The argument 'maxi' must be greater than zero (0)."
+            assert maxi >= mini, "'maxi' MUST be greater than or equal to 'mini'."
+    except AssertionError as exc:
+        raise ValueError(*exc.args) from exc
+
+    return re.compile(
+        r"^("
+        r"0+" # All zeroes, no decimal places
+        + r"|0+\.0+" # 0.00…
+        + r"|[0-9]+\.?0*" # Whole numbers, or all zeroes after decimal point
+        + r"|[0-9]+\.[0-9]{"
+        + str(mini)
+        + r","
+        + (str(maxi) if maxi is not None else r"")
+        + r"}"
+        + r")$"
+    )
diff --git a/tests/qc/test_checks.py b/tests/qc/test_checks.py
new file mode 100644
index 0000000..e91cd19
--- /dev/null
+++ b/tests/qc/test_checks.py
@@ -0,0 +1,50 @@
+"""Test that the checks run correct."""
+import pytest
+from hypothesis import given
+from hypothesis import strategies as st
+
+from quality_control.checks import decimal_places_pattern
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^[0-9]+$", fullmatch=True))
+def test_matches_whole_numbers(numstr):
+    """
+    GIVEN: 'numstr' is an arbitrary string that represents a whole number,
+        e.g. 45342
+    WHEN: We test for a match made by 'decimal_places_pattern'
+    THEN: The string in 'numstr' always matches.
+    """
+    assert bool(decimal_places_pattern(3, 6).search(numstr))
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^0+\.?0*$", fullmatch=True))
+def test_matches_zeroes(numstr):
+    """
+    GIVEN: 'numstr' is an arbitrary string that represents zero,
+        e.g. 0, 00, 000, 0.0, 00.00, 000.0
+    WHEN: We test for a match made by 'decimal_places_pattern'
+    THEN: The string in 'numstr' always matches.
+    """
+    assert bool(decimal_places_pattern(3, 6).search(numstr))
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^[0-9]+\.[1-9]{1,5}$", fullmatch=True))
+def test_does_not_match_fewer_decimal_places(numstr):
+    """
+    GIVEN: 'numstr' is an arbitrary string that represents a non-whole decimal
+        number with fewer decimal places than allowed
+    WHEN: We test for a match made by 'decimal_places_pattern'
+    THEN: The string in 'numstr' always fails to match
+    """
+    assert decimal_places_pattern(6).match(numstr) is None
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^[0-9]+\.[1-9]{7}$", fullmatch=True))
+def test_does_not_match_more_decimal_places(numstr):
+    """
+    GIVEN: 'numstr' is an arbitrary string that represents a non-whole decimal
+        number with more decimal places than allowed
+    WHEN: We test for a match made by 'decimal_places_pattern'
+    THEN: The string in 'numstr' always fails to match
+    """
+    assert decimal_places_pattern(3, 6).match(numstr) is None