aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-12 12:10:44 +0300
committerFrederick Muriuki Muriithi2024-02-12 18:17:42 +0300
commit68696caeedde3636aff34db048a4490fbf51edf3 (patch)
tree8a7fd555aa6b867043a6e4cf662102a2a0d004fa
parentabb55d7e03bf207ebf00b4c71f1bbdd8f58a0ad3 (diff)
downloadgn-uploader-68696caeedde3636aff34db048a4490fbf51edf3.tar.gz
Build generic decimal places checker.
-rw-r--r--quality_control/checks.py51
-rw-r--r--tests/qc/test_checks.py50
2 files changed, 101 insertions, 0 deletions
diff --git a/quality_control/checks.py b/quality_control/checks.py
new file mode 100644
index 0000000..28b9ab5
--- /dev/null
+++ b/quality_control/checks.py
@@ -0,0 +1,51 @@
+"""Quality control checks for data."""
+import re
+from typing import Optional
+
+def decimal_places_pattern(mini: int, maxi: Optional[int] = None) -> re.Pattern:
+ """
+ Generate a regular expression for checking numbers
+
+ Generates a regular expression that matches:
+ a) Whole numbers, e.g. 2, 54343, 25, etc
+ b) Zeroes e.g. 0, 000, 0.0, 0.000, 0.00000, etc
+ c) Numbers with at least 'mini' decimal places
+ d) If 'maxi' is provided, then numbers with decimal places between
+ 'mini' and 'maxi'.
+
+ To test for a match of exactly 'n' decimal places, then both 'mini' and
+ 'maxi' should be set to 'n'.
+
+ ARGUMENTS:
+ - mini [int]: The mini number of decimal places allowed. This is
+ mandatory.
+ - maxi [int]: The maxi number of decimal places allowed. This is an
+ optional argument. If not provided, then an infinite (theoretically)
+ number of decimal places is allowed.
+ """
+ try:
+ assert isinstance(mini, int), "The argument 'mini' MUST be a integer"
+ assert maxi is None or isinstance(maxi, int), "The argument 'maxi' MUST be a integer"
+ except AssertionError as exc:
+ raise TypeError(*exc.args) from exc
+
+ try:
+ assert mini > 0, "The argument 'mini' must be greater than zero (0)."
+ if isinstance(maxi, int):
+ assert maxi > 0, "The argument 'maxi' must be greater than zero (0)."
+ assert maxi >= mini, "'maxi' MUST be greater than or equal to 'mini'."
+ except AssertionError as exc:
+ raise ValueError(*exc.args) from exc
+
+ return re.compile(
+ r"^("
+ r"0+" # All zeroes, no decimal places
+ + r"|0+\.0+" # 0.00…
+ + r"|[0-9]+\.?0*" # Whole numbers, or all zeroes after decimal point
+ + r"|[0-9]+\.[0-9]{"
+ + str(mini)
+ + r","
+ + (str(maxi) if maxi is not None else r"")
+ + r"}"
+ + r")$"
+ )
diff --git a/tests/qc/test_checks.py b/tests/qc/test_checks.py
new file mode 100644
index 0000000..e91cd19
--- /dev/null
+++ b/tests/qc/test_checks.py
@@ -0,0 +1,50 @@
+"""Test that the checks run correct."""
+import pytest
+from hypothesis import given
+from hypothesis import strategies as st
+
+from quality_control.checks import decimal_places_pattern
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^[0-9]+$", fullmatch=True))
+def test_matches_whole_numbers(numstr):
+ """
+ GIVEN: 'numstr' is an arbitrary string that represents a whole number,
+ e.g. 45342
+ WHEN: We test for a match made by 'decimal_places_pattern'
+ THEN: The string in 'numstr' always matches.
+ """
+ assert bool(decimal_places_pattern(3, 6).search(numstr))
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^0+\.?0*$", fullmatch=True))
+def test_matches_zeroes(numstr):
+ """
+ GIVEN: 'numstr' is an arbitrary string that represents zero,
+ e.g. 0, 00, 000, 0.0, 00.00, 000.0
+ WHEN: We test for a match made by 'decimal_places_pattern'
+ THEN: The string in 'numstr' always matches.
+ """
+ assert bool(decimal_places_pattern(3, 6).search(numstr))
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^[0-9]+\.[1-9]{1,5}$", fullmatch=True))
+def test_does_not_match_fewer_decimal_places(numstr):
+ """
+ GIVEN: 'numstr' is an arbitrary string that represents a non-whole decimal
+ number with fewer decimal places than allowed
+ WHEN: We test for a match made by 'decimal_places_pattern'
+ THEN: The string in 'numstr' always fails to match
+ """
+ assert decimal_places_pattern(6).match(numstr) is None
+
+@pytest.mark.unit_test
+@given(numstr=st.from_regex(r"^[0-9]+\.[1-9]{7}$", fullmatch=True))
+def test_does_not_match_more_decimal_places(numstr):
+ """
+ GIVEN: 'numstr' is an arbitrary string that represents a non-whole decimal
+ number with more decimal places than allowed
+ WHEN: We test for a match made by 'decimal_places_pattern'
+ THEN: The string in 'numstr' always fails to match
+ """
+ assert decimal_places_pattern(3, 6).match(numstr) is None