diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py new file mode 100644 index 00000000..a9a18239 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py @@ -0,0 +1,587 @@ +# --------------------------------------------------------- +# Copyright (c) 2013-2022 Caleb P. Burns credits dahlia <https://github.com/dahlia> +# Licensed under the MPLv2 License. See License.txt in the project root for +# license information. +# --------------------------------------------------------- +""" +This file code has been vendored from pathspec repo. +Please do not edit it, unless really necessary +""" +import dataclasses +import os +import posixpath +import re +import warnings +from typing import Any, AnyStr, Iterable +from typing import Match as MatchHint +from typing import Optional +from typing import Pattern as PatternHint +from typing import Tuple, Union + +NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != posixpath.sep] + +# The encoding to use when parsing a byte string pattern. +# This provides the base definition for patterns. +_BYTES_ENCODING = "latin1" + + +class Pattern(object): + """ + The :class:`Pattern` class is the abstract definition of a pattern. + """ + + # Make the class dict-less. + __slots__ = ("include",) + + def __init__(self, include: Optional[bool]) -> None: + """ + Initializes the :class:`Pattern` instance. + *include* (:class:`bool` or :data:`None`) is whether the matched + files should be included (:data:`True`), excluded (:data:`False`), + or is a null-operation (:data:`None`). + """ + + self.include = include + """ + *include* (:class:`bool` or :data:`None`) is whether the matched + files should be included (:data:`True`), excluded (:data:`False`), + or is a null-operation (:data:`None`). + """ + + def match(self, files: Iterable[str]) -> Iterable[str]: + """Matches this pattern against the specified files. + + :param files: Contains each file relative to the root directory (e.g. :data:`"relative/path/to/file"`). + :type files: Iterable[str] + :return: The matched file paths + :rtype: Iterable[str] + + .. deprecated:: + + This method is no longer used and has been replaced by :meth:`.match_file`. Use the :meth:`.match_file` + method with a loop for similar results. + """ + warnings.warn( + ( + "{0.__module__}.{0.__qualname__}.match() is deprecated. Use " + "{0.__module__}.{0.__qualname__}.match_file() with a loop for " + "similar results." + ).format(self.__class__), + DeprecationWarning, + stacklevel=2, + ) + + for file in files: + if self.match_file(file) is not None: + yield file + + def match_file(self, file: str) -> Optional[Any]: + """Matches this pattern against the specified file. + + :param file: The normalized file path to match against. + :type file: str + :return: Returns the match result if *file* matched; otherwise, :data:`None`. + :rtype: Optional[Any] + """ + raise NotImplementedError( + ("{0.__module__}.{0.__qualname__} must override match_file().").format(self.__class__) + ) + + +class RegexPattern(Pattern): + """ + The :class:`RegexPattern` class is an implementation of a pattern + using regular expressions. + """ + + # Keep the class dict-less. + __slots__ = ("regex",) + + def __init__( + self, + pattern: Union[AnyStr, PatternHint], + include: Optional[bool] = None, + ) -> None: + """ + Initializes the :class:`RegexPattern` instance. + *pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or + :data:`None`) is the pattern to compile into a regular expression. + *include* (:class:`bool` or :data:`None`) must be :data:`None` + unless *pattern* is a precompiled regular expression (:class:`re.Pattern`) + in which case it is whether matched files should be included + (:data:`True`), excluded (:data:`False`), or is a null operation + (:data:`None`). + .. NOTE:: Subclasses do not need to support the *include* + parameter. + """ + + if isinstance(pattern, (str, bytes)): + assert include is None, ("include:{!r} must be null when pattern:{!r} is a string.").format( + include, pattern + ) + regex, include = self.pattern_to_regex(pattern) + # NOTE: Make sure to allow a null regular expression to be + # returned for a null-operation. + if include is not None: + regex = re.compile(regex) + + elif pattern is not None and hasattr(pattern, "match"): + # Assume pattern is a precompiled regular expression. + # - NOTE: Used specified *include*. + regex = pattern + + elif pattern is None: + # NOTE: Make sure to allow a null pattern to be passed for a + # null-operation. + assert include is None, ("include:{!r} must be null when pattern:{!r} is null.").format(include, pattern) + + else: + raise TypeError("pattern:{!r} is not a string, re.Pattern, or None.".format(pattern)) + + super(RegexPattern, self).__init__(include) + + self.regex: PatternHint = regex + """ + *regex* (:class:`re.Pattern`) is the regular expression for the + pattern. + """ + + def __eq__(self, other: "RegexPattern") -> bool: + """Tests the equality of this regex pattern with *other* + + :param other: The regex pattern to test against + :type other: RegexPattern + :return: Return True if :attr:`~Pattern.include` and :attr:`~RegexPattern.regex` + are equal. False otherwise. + :rtype: bool + """ + if isinstance(other, RegexPattern): + return self.include == other.include and self.regex == other.regex + return NotImplemented + + def match_file(self, file: str) -> Optional["RegexMatchResult"]: + """Matches this pattern against the specified file. + + :param file: File relative to the root directory (e.g., "relative/path/to/file"). + :type file: str + :return: Returns the match result (:class:`RegexMatchResult`) if *file* + matched; otherwise, :data:`None`. + :rtype: Optional[RegexMatchResult] + """ + if self.include is not None: + match = self.regex.match(file) + if match is not None: + return RegexMatchResult(match) + + return None + + @classmethod + def pattern_to_regex(cls, pattern: str) -> Tuple[str, bool]: + """Convert the pattern into an uncompiled regular expression. + + :param pattern: The pattern to convert into a regular expression. + :type pattern: str + :return: Returns the uncompiled regular expression (:class:`str` or :data:`None`), + and whether matched files should be included (:data:`True`), + excluded (:data:`False`), or is a null-operation (:data:`None`). + :rtype: Tuple[str, bool] + + .. NOTE:: + + The default implementation simply returns *pattern* and :data:`True`. + """ + return pattern, True + + +@dataclasses.dataclass() +class RegexMatchResult(object): + """ + The :class:`RegexMatchResult` data class is used to return information + about the matched regular expression. + """ + + # Keep the class dict-less. + __slots__ = ("match",) + + match: MatchHint + """ + *match* (:class:`re.Match`) is the regex match result. + """ + + +class GitWildMatchPatternError(ValueError): + """ + The :class:`GitWildMatchPatternError` indicates an invalid git wild match + pattern. + """ + + +class GitWildMatchPattern(RegexPattern): + """ + The :class:`GitWildMatchPattern` class represents a compiled Git + wildmatch pattern. + """ + + # Keep the dict-less class hierarchy. + __slots__ = () + + @classmethod + # pylint: disable=too-many-branches,too-many-statements + def pattern_to_regex( + cls, + pattern: AnyStr, + ) -> Tuple[Optional[AnyStr], Optional[bool]]: + """Convert the pattern into a regular expression. + + :param pattern: Pattern to convert into a regular expression. + :type pattern: AnyStr + :return: A 2-tuple of: + * the uncompiled regular expression (:class:`str`, :class:`bytes`, + or :data:`None`) + * whether matched files should be included (:data:`True`), excluded (:data:`False`), or if it is a + null-operation (:data:`None`). + :rtype: Tuple[Optional[AnyStr], Optional[bool]] + """ + if isinstance(pattern, str): + return_type = str + elif isinstance(pattern, bytes): + return_type = bytes + pattern = pattern.decode(_BYTES_ENCODING) + else: + raise TypeError(f"pattern:{pattern!r} is not a unicode or byte string.") + + original_pattern = pattern + pattern = pattern.strip() + + if pattern.startswith("#"): + # A pattern starting with a hash ('#') serves as a comment + # (neither includes nor excludes files). Escape the hash with a + # back-slash to match a literal hash (i.e., '\#'). + regex = None + include = None + + elif pattern == "/": + # EDGE CASE: According to `git check-ignore` (v2.4.1), a single + # '/' does not match any file. + regex = None + include = None + + elif pattern: + if pattern.startswith("!"): + # A pattern starting with an exclamation mark ('!') negates the + # pattern (exclude instead of include). Escape the exclamation + # mark with a back-slash to match a literal exclamation mark + # (i.e., '\!'). + include = False + # Remove leading exclamation mark. + pattern = pattern[1:] + else: + include = True + + # Allow a regex override for edge cases that cannot be handled + # through normalization. + override_regex = None + + # Split pattern into segments. + pattern_segs = pattern.split("/") + + # Normalize pattern to make processing easier. + + # EDGE CASE: Deal with duplicate double-asterisk sequences. + # Collapse each sequence down to one double-asterisk. Iterate over + # the segments in reverse and remove the duplicate double + # asterisks as we go. + for i in range(len(pattern_segs) - 1, 0, -1): + prev = pattern_segs[i - 1] + seg = pattern_segs[i] + if prev == "**" and seg == "**": + del pattern_segs[i] + + if len(pattern_segs) == 2 and pattern_segs[0] == "**" and not pattern_segs[1]: + # EDGE CASE: The '**/' pattern should match everything except + # individual files in the root directory. This case cannot be + # adequately handled through normalization. Use the override. + override_regex = "^.+(?P<ps_d>/).*$" + + if not pattern_segs[0]: + # A pattern beginning with a slash ('/') will only match paths + # directly on the root directory instead of any descendant + # paths. So, remove empty first segment to make pattern relative + # to root. + del pattern_segs[0] + + elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]): + # A single pattern without a beginning slash ('/') will match + # any descendant path. This is equivalent to "**/{pattern}". So, + # prepend with double-asterisks to make pattern relative to + # root. + # EDGE CASE: This also holds for a single pattern with a + # trailing slash (e.g. dir/). + if pattern_segs[0] != "**": + pattern_segs.insert(0, "**") + + else: + # EDGE CASE: A pattern without a beginning slash ('/') but + # contains at least one prepended directory (e.g. + # "dir/{pattern}") should not match "**/dir/{pattern}", + # according to `git check-ignore` (v2.4.1). + pass + + if not pattern_segs: + # After resolving the edge cases, we end up with no pattern at + # all. This must be because the pattern is invalid. + raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}") + + if not pattern_segs[-1] and len(pattern_segs) > 1: + # A pattern ending with a slash ('/') will match all descendant + # paths if it is a directory but not if it is a regular file. + # This is equivalent to "{pattern}/**". So, set last segment to + # a double-asterisk to include all descendants. + pattern_segs[-1] = "**" + + if override_regex is None: + # Build regular expression from pattern. + output = ["^"] + need_slash = False + end = len(pattern_segs) - 1 + for i, seg in enumerate(pattern_segs): + if seg == "**": + if i == 0 and i == end: + # A pattern consisting solely of double-asterisks ('**') + # will match every path. + output.append(".+") + elif i == 0: + # A normalized pattern beginning with double-asterisks + # ('**') will match any leading path segments. + output.append("(?:.+/)?") + need_slash = False + elif i == end: + # A normalized pattern ending with double-asterisks ('**') + # will match any trailing path segments. + output.append("(?P<ps_d>/).*") + else: + # A pattern with inner double-asterisks ('**') will match + # multiple (or zero) inner path segments. + output.append("(?:/.+)?") + need_slash = True + + elif seg == "*": + # Match single path segment. + if need_slash: + output.append("/") + + output.append("[^/]+") + + if i == end: + # A pattern ending without a slash ('/') will match a file + # or a directory (with paths underneath it). E.g., "foo" + # matches "foo", "foo/bar", "foo/bar/baz", etc. + output.append("(?:(?P<ps_d>/).*)?") + + need_slash = True + + else: + # Match segment glob pattern. + if need_slash: + output.append("/") + + try: + output.append(cls._translate_segment_glob(seg)) + except ValueError as e: + raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}") from e + + if i == end: + # A pattern ending without a slash ('/') will match a file + # or a directory (with paths underneath it). E.g., "foo" + # matches "foo", "foo/bar", "foo/bar/baz", etc. + output.append("(?:(?P<ps_d>/).*)?") + + need_slash = True + + output.append("$") + regex = "".join(output) + + else: + # Use regex override. + regex = override_regex + + else: + # A blank pattern is a null-operation (neither includes nor + # excludes files). + regex = None + include = None + + if regex is not None and return_type is bytes: + regex = regex.encode(_BYTES_ENCODING) + + return regex, include + + @staticmethod + def _translate_segment_glob(pattern: str) -> str: + """Translates the glob pattern to a regular expression. This is used in + the constructor to translate a path segment glob pattern to its + corresponding regular expression. + + :param pattern: The glob pattern. + :type pattern: str + :return: The regular expression + :rtype: str + """ + # NOTE: This is derived from `fnmatch.translate()` and is similar to + # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. + + escape = False + regex = "" + i, end = 0, len(pattern) + while i < end: + # Get next character. + char = pattern[i] + i += 1 + + if escape: + # Escape the character. + escape = False + regex += re.escape(char) + + elif char == "\\": + # Escape character, escape next character. + escape = True + + elif char == "*": + # Multi-character wildcard. Match any string (except slashes), + # including an empty string. + regex += "[^/]*" + + elif char == "?": + # Single-character wildcard. Match any single character (except + # a slash). + regex += "[^/]" + + elif char == "[": + # Bracket expression wildcard. Except for the beginning + # exclamation mark, the whole bracket expression can be used + # directly as regex but we have to find where the expression + # ends. + # - "[][!]" matches ']', '[' and '!'. + # - "[]-]" matches ']' and '-'. + # - "[!]a-]" matches any character except ']', 'a' and '-'. + j = i + # Pass back expression negation. + if j < end and pattern[j] == "!": + j += 1 + # Pass first closing bracket if it is at the beginning of the + # expression. + if j < end and pattern[j] == "]": + j += 1 + # Find closing bracket. Stop once we reach the end or find it. + while j < end and pattern[j] != "]": + j += 1 + + if j < end: + # Found end of bracket expression. Increment j to be one past + # the closing bracket: + # + # [...] + # ^ ^ + # i j + # + j += 1 + expr = "[" + + if pattern[i] == "!": + # Bracket expression needs to be negated. + expr += "^" + i += 1 + elif pattern[i] == "^": + # POSIX declares that the regex bracket expression negation + # "[^...]" is undefined in a glob pattern. Python's + # `fnmatch.translate()` escapes the caret ('^') as a + # literal. To maintain consistency with undefined behavior, + # I am escaping the '^' as well. + expr += "\\^" + i += 1 + + # Build regex bracket expression. Escape slashes so they are + # treated as literal slashes by regex as defined by POSIX. + expr += pattern[i:j].replace("\\", "\\\\") + + # Add regex bracket expression to regex result. + regex += expr + + # Set i to one past the closing bracket. + i = j + + else: + # Failed to find closing bracket, treat opening bracket as a + # bracket literal instead of as an expression. + regex += "\\[" + + else: + # Regular character, escape it for regex. + regex += re.escape(char) + + if escape: + raise ValueError(f"Escape character found with no next character to escape: {pattern!r}") + + return regex + + @staticmethod + def escape(s: AnyStr) -> AnyStr: + """Escape special characters in the given string. + + :param s: a filename or a string that you want to escape, usually before adding it to a ".gitignore". + :type s: AnyStr + :return: The escaped string + :rtype: Union[str, bytes] + """ + if isinstance(s, str): + return_type = str + string = s + elif isinstance(s, bytes): + return_type = bytes + string = s.decode(_BYTES_ENCODING) + else: + raise TypeError(f"s:{s!r} is not a unicode or byte string.") + + # Reference: https://git-scm.com/docs/gitignore#_pattern_format + meta_characters = r"[]!*#?" + + out_string = "".join("\\" + x if x in meta_characters else x for x in string) + + if return_type is bytes: + return out_string.encode(_BYTES_ENCODING) + return out_string + + +def normalize_file(file: Union[str, os.PathLike], separators: Optional[Iterable[str]] = None) -> str: + """Normalizes the file path to use the POSIX path separator (i.e., + ``'/'``), and make the paths relative (remove leading ``'/'``). + + :param file: The file path. + :type file: Union[str, os.PathLike] + :param separators: The path separators to normalize. This does not need to include the POSIX path separator + (``'/'``), but including it will not affect the results. Default is :data:`None` for + :data:`NORMALIZE_PATH_SEPS`. To prevent normalization, pass an empty container (e.g., an empty tuple ``()``). + :type separators: Optional[Iterable[str]] + :return: The normalized file path. + :rtype: str + """ + # Normalize path separators. + if separators is None: + separators = NORMALIZE_PATH_SEPS + + # Convert path object to string. + norm_file = str(file) + + for sep in separators: + norm_file = norm_file.replace(sep, posixpath.sep) + + if norm_file.startswith("/"): + # Make path relative. + norm_file = norm_file[1:] + + elif norm_file.startswith("./"): + # Remove current directory prefix. + norm_file = norm_file[2:] + + return norm_file |