aboutsummaryrefslogtreecommitdiff
from typing import Optional, Union, TYPE_CHECKING
import unicodedata

from .exceptions_types import EmailSyntaxError, ValidatedEmail
from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES

if TYPE_CHECKING:
    import dns.resolver
    _Resolver = dns.resolver.Resolver
else:
    _Resolver = object


def validate_email(
    email: Union[str, bytes],
    /,  # prior arguments are positional-only
    *,  # subsequent arguments are keyword-only
    allow_smtputf8: Optional[bool] = None,
    allow_empty_local: bool = False,
    allow_quoted_local: Optional[bool] = None,
    allow_domain_literal: Optional[bool] = None,
    allow_display_name: Optional[bool] = None,
    check_deliverability: Optional[bool] = None,
    test_environment: Optional[bool] = None,
    globally_deliverable: Optional[bool] = None,
    timeout: Optional[int] = None,
    dns_resolver: Optional[_Resolver] = None
) -> ValidatedEmail:
    """
    Given an email address, and some options, returns a ValidatedEmail instance
    with information about the address if it is valid or, if the address is not
    valid, raises an EmailNotValidError. This is the main function of the module.
    """

    # Fill in default values of arguments.
    from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
        GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
    if allow_smtputf8 is None:
        allow_smtputf8 = ALLOW_SMTPUTF8
    if allow_quoted_local is None:
        allow_quoted_local = ALLOW_QUOTED_LOCAL
    if allow_domain_literal is None:
        allow_domain_literal = ALLOW_DOMAIN_LITERAL
    if allow_display_name is None:
        allow_display_name = ALLOW_DISPLAY_NAME
    if check_deliverability is None:
        check_deliverability = CHECK_DELIVERABILITY
    if test_environment is None:
        test_environment = TEST_ENVIRONMENT
    if globally_deliverable is None:
        globally_deliverable = GLOBALLY_DELIVERABLE
    if timeout is None and dns_resolver is None:
        timeout = DEFAULT_TIMEOUT

    # Allow email to be a str or bytes instance. If bytes,
    # it must be ASCII because that's how the bytes work
    # on the wire with SMTP.
    if not isinstance(email, str):
        try:
            email = email.decode("ascii")
        except ValueError as e:
            raise EmailSyntaxError("The email address is not valid ASCII.") from e

    # Split the address into the display name (or None), the local part
    # (before the @-sign), and the domain part (after the @-sign).
    # Normally, there is only one @-sign. But the awkward "quoted string"
    # local part form (RFC 5321 4.1.2) allows @-signs in the local
    # part if the local part is quoted.
    display_name, local_part, domain_part, is_quoted_local_part \
        = split_email(email)

    # Collect return values in this instance.
    ret = ValidatedEmail()
    ret.original = ((local_part if not is_quoted_local_part
                    else ('"' + local_part + '"'))
                    + "@" + domain_part)  # drop the display name, if any, for email length tests at the end
    ret.display_name = display_name

    # Validate the email address's local part syntax and get a normalized form.
    # If the original address was quoted and the decoded local part is a valid
    # unquoted local part, then we'll get back a normalized (unescaped) local
    # part.
    local_part_info = validate_email_local_part(local_part,
                                                allow_smtputf8=allow_smtputf8,
                                                allow_empty_local=allow_empty_local,
                                                quoted_local_part=is_quoted_local_part)
    ret.local_part = local_part_info["local_part"]
    ret.ascii_local_part = local_part_info["ascii_local_part"]
    ret.smtputf8 = local_part_info["smtputf8"]

    # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
    # so we'll return the NFC-normalized local part. Since the caller may use that
    # string in place of the original string, ensure it is also valid.
    normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
    if normalized_local_part != ret.local_part:
        try:
            validate_email_local_part(normalized_local_part,
                                      allow_smtputf8=allow_smtputf8,
                                      allow_empty_local=allow_empty_local,
                                      quoted_local_part=is_quoted_local_part)
        except EmailSyntaxError as e:
            raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
        ret.local_part = normalized_local_part

    # If a quoted local part isn't allowed but is present, now raise an exception.
    # This is done after any exceptions raised by validate_email_local_part so
    # that mandatory checks have highest precedence.
    if is_quoted_local_part and not allow_quoted_local:
        raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")

    # Some local parts are required to be case-insensitive, so we should normalize
    # to lowercase.
    # RFC 2142
    if ret.ascii_local_part is not None \
       and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
       and ret.local_part is not None:
        ret.ascii_local_part = ret.ascii_local_part.lower()
        ret.local_part = ret.local_part.lower()

    # Validate the email address's domain part syntax and get a normalized form.
    is_domain_literal = False
    if len(domain_part) == 0:
        raise EmailSyntaxError("There must be something after the @-sign.")

    elif domain_part.startswith("[") and domain_part.endswith("]"):
        # Parse the address in the domain literal and get back a normalized domain.
        domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
        if not allow_domain_literal:
            raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
        ret.domain = domain_literal_info["domain"]
        ret.ascii_domain = domain_literal_info["domain"]  # Domain literals are always ASCII.
        ret.domain_address = domain_literal_info["domain_address"]
        is_domain_literal = True  # Prevent deliverability checks.

    else:
        # Check the syntax of the domain and get back a normalized
        # internationalized and ASCII form.
        domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
        ret.domain = domain_name_info["domain"]
        ret.ascii_domain = domain_name_info["ascii_domain"]

    # Construct the complete normalized form.
    ret.normalized = ret.local_part + "@" + ret.domain

    # If the email address has an ASCII form, add it.
    if not ret.smtputf8:
        if not ret.ascii_domain:
            raise Exception("Missing ASCII domain.")
        ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
    else:
        ret.ascii_email = None

    # Check the length of the address.
    validate_email_length(ret)

    # Check that a display name is permitted. It's the last syntax check
    # because we always check against optional parsing features last.
    if display_name is not None and not allow_display_name:
        raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")

    if check_deliverability and not test_environment:
        # Validate the email address's deliverability using DNS
        # and update the returned ValidatedEmail object with metadata.

        if is_domain_literal:
            # There is nothing to check --- skip deliverability checks.
            return ret

        # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
        from .deliverability import validate_email_deliverability
        deliverability_info = validate_email_deliverability(
            ret.ascii_domain, ret.domain, timeout, dns_resolver
        )
        mx = deliverability_info.get("mx")
        if mx is not None:
            ret.mx = mx
        ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")

    return ret