aboutsummaryrefslogtreecommitdiff
path: root/quality_control/checks.py
blob: 475eb9ed40e55617a32fb464649a1dea323dc0fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Quality control checks for data."""
import re
from typing import Optional

from .errors import InvalidValue

def decimal_places_pattern(mini: int, maxi: Optional[int] = None) -> re.Pattern:
    """
    Generate a regular expression for checking numbers

    Generates a regular expression that matches:
    a) Whole numbers, e.g. 2, 54343, 25, etc
    b) Zeroes e.g. 0, 000, 0.0, 0.000, 0.00000, etc
    c) Numbers with at least 'mini' decimal places
    d) If 'maxi' is provided, then numbers with decimal places between
       'mini' and 'maxi'.

    To test for a match of exactly 'n' decimal places, then both 'mini' and
    'maxi' should be set to 'n'.

    ARGUMENTS:
    - mini [int]: The mini number of decimal places allowed. This is
      mandatory.
    - maxi [int]: The maxi number of decimal places allowed. This is an
      optional argument. If not provided, then an infinite (theoretically)
      number of decimal places is allowed.
    """
    try:
        assert isinstance(mini, int), "The argument 'mini' MUST be a integer"
        assert maxi is None or isinstance(maxi, int), "The argument 'maxi' MUST be a integer"
    except AssertionError as exc:
        raise TypeError(*exc.args) from exc

    try:
        assert mini > 0, "The argument 'mini' must be greater than zero (0)."
        if isinstance(maxi, int):
            assert maxi > 0, "The argument 'maxi' must be greater than zero (0)."
            assert maxi >= mini, "'maxi' MUST be greater than or equal to 'mini'."
    except AssertionError as exc:
        raise ValueError(*exc.args) from exc

    return re.compile(
        r"^("
        r"0+" # All zeroes, no decimal places
        + r"|0+\.0+" # 0.00…
        + r"|[0-9]+\.?0*" # Whole numbers, or all zeroes after decimal point
        + r"|[0-9]+\.[0-9]{"
        + str(mini)
        + r","
        + (str(maxi) if maxi is not None else r"")
        + r"}"
        + r")$"
    )

def decimal_points_error(lineno: int,
                         field: str,
                         value: str,
                         mini: int,
                         maxi: Optional[int] = None) -> Optional[InvalidValue]:
    """
    Check that 'value' in a decimal number with the appropriate decimal places.
    """
    if not bool(decimal_places_pattern(mini, maxi).match(value)):
        return InvalidValue(lineno, field, value, (
            f"Invalid value '{value}'. Expected numerical value " +
            f"with at least {mini} decimal places" +
            (f" and at most {maxi} decimal places" if maxi is not None else "")
            + "."))
    return None