From 68696caeedde3636aff34db048a4490fbf51edf3 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 12 Feb 2024 12:10:44 +0300 Subject: Build generic decimal places checker. --- quality_control/checks.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++ tests/qc/test_checks.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 quality_control/checks.py create mode 100644 tests/qc/test_checks.py diff --git a/quality_control/checks.py b/quality_control/checks.py new file mode 100644 index 0000000..28b9ab5 --- /dev/null +++ b/quality_control/checks.py @@ -0,0 +1,51 @@ +"""Quality control checks for data.""" +import re +from typing import Optional + +def decimal_places_pattern(mini: int, maxi: Optional[int] = None) -> re.Pattern: + """ + Generate a regular expression for checking numbers + + Generates a regular expression that matches: + a) Whole numbers, e.g. 2, 54343, 25, etc + b) Zeroes e.g. 0, 000, 0.0, 0.000, 0.00000, etc + c) Numbers with at least 'mini' decimal places + d) If 'maxi' is provided, then numbers with decimal places between + 'mini' and 'maxi'. + + To test for a match of exactly 'n' decimal places, then both 'mini' and + 'maxi' should be set to 'n'. + + ARGUMENTS: + - mini [int]: The mini number of decimal places allowed. This is + mandatory. + - maxi [int]: The maxi number of decimal places allowed. This is an + optional argument. If not provided, then an infinite (theoretically) + number of decimal places is allowed. + """ + try: + assert isinstance(mini, int), "The argument 'mini' MUST be a integer" + assert maxi is None or isinstance(maxi, int), "The argument 'maxi' MUST be a integer" + except AssertionError as exc: + raise TypeError(*exc.args) from exc + + try: + assert mini > 0, "The argument 'mini' must be greater than zero (0)." + if isinstance(maxi, int): + assert maxi > 0, "The argument 'maxi' must be greater than zero (0)." + assert maxi >= mini, "'maxi' MUST be greater than or equal to 'mini'." + except AssertionError as exc: + raise ValueError(*exc.args) from exc + + return re.compile( + r"^(" + r"0+" # All zeroes, no decimal places + + r"|0+\.0+" # 0.00… + + r"|[0-9]+\.?0*" # Whole numbers, or all zeroes after decimal point + + r"|[0-9]+\.[0-9]{" + + str(mini) + + r"," + + (str(maxi) if maxi is not None else r"") + + r"}" + + r")$" + ) diff --git a/tests/qc/test_checks.py b/tests/qc/test_checks.py new file mode 100644 index 0000000..e91cd19 --- /dev/null +++ b/tests/qc/test_checks.py @@ -0,0 +1,50 @@ +"""Test that the checks run correct.""" +import pytest +from hypothesis import given +from hypothesis import strategies as st + +from quality_control.checks import decimal_places_pattern + +@pytest.mark.unit_test +@given(numstr=st.from_regex(r"^[0-9]+$", fullmatch=True)) +def test_matches_whole_numbers(numstr): + """ + GIVEN: 'numstr' is an arbitrary string that represents a whole number, + e.g. 45342 + WHEN: We test for a match made by 'decimal_places_pattern' + THEN: The string in 'numstr' always matches. + """ + assert bool(decimal_places_pattern(3, 6).search(numstr)) + +@pytest.mark.unit_test +@given(numstr=st.from_regex(r"^0+\.?0*$", fullmatch=True)) +def test_matches_zeroes(numstr): + """ + GIVEN: 'numstr' is an arbitrary string that represents zero, + e.g. 0, 00, 000, 0.0, 00.00, 000.0 + WHEN: We test for a match made by 'decimal_places_pattern' + THEN: The string in 'numstr' always matches. + """ + assert bool(decimal_places_pattern(3, 6).search(numstr)) + +@pytest.mark.unit_test +@given(numstr=st.from_regex(r"^[0-9]+\.[1-9]{1,5}$", fullmatch=True)) +def test_does_not_match_fewer_decimal_places(numstr): + """ + GIVEN: 'numstr' is an arbitrary string that represents a non-whole decimal + number with fewer decimal places than allowed + WHEN: We test for a match made by 'decimal_places_pattern' + THEN: The string in 'numstr' always fails to match + """ + assert decimal_places_pattern(6).match(numstr) is None + +@pytest.mark.unit_test +@given(numstr=st.from_regex(r"^[0-9]+\.[1-9]{7}$", fullmatch=True)) +def test_does_not_match_more_decimal_places(numstr): + """ + GIVEN: 'numstr' is an arbitrary string that represents a non-whole decimal + number with more decimal places than allowed + WHEN: We test for a match made by 'decimal_places_pattern' + THEN: The string in 'numstr' always fails to match + """ + assert decimal_places_pattern(3, 6).match(numstr) is None -- cgit v1.2.3