aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py')
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py587
1 files changed, 587 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py
new file mode 100644
index 00000000..a9a18239
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_utils/_pathspec.py
@@ -0,0 +1,587 @@
+# ---------------------------------------------------------
+# Copyright (c) 2013-2022 Caleb P. Burns credits dahlia <https://github.com/dahlia>
+# Licensed under the MPLv2 License. See License.txt in the project root for
+# license information.
+# ---------------------------------------------------------
+"""
+This file code has been vendored from pathspec repo.
+Please do not edit it, unless really necessary
+"""
+import dataclasses
+import os
+import posixpath
+import re
+import warnings
+from typing import Any, AnyStr, Iterable
+from typing import Match as MatchHint
+from typing import Optional
+from typing import Pattern as PatternHint
+from typing import Tuple, Union
+
+NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != posixpath.sep]
+
+# The encoding to use when parsing a byte string pattern.
+# This provides the base definition for patterns.
+_BYTES_ENCODING = "latin1"
+
+
+class Pattern(object):
+ """
+ The :class:`Pattern` class is the abstract definition of a pattern.
+ """
+
+ # Make the class dict-less.
+ __slots__ = ("include",)
+
+ def __init__(self, include: Optional[bool]) -> None:
+ """
+ Initializes the :class:`Pattern` instance.
+ *include* (:class:`bool` or :data:`None`) is whether the matched
+ files should be included (:data:`True`), excluded (:data:`False`),
+ or is a null-operation (:data:`None`).
+ """
+
+ self.include = include
+ """
+ *include* (:class:`bool` or :data:`None`) is whether the matched
+ files should be included (:data:`True`), excluded (:data:`False`),
+ or is a null-operation (:data:`None`).
+ """
+
+ def match(self, files: Iterable[str]) -> Iterable[str]:
+ """Matches this pattern against the specified files.
+
+ :param files: Contains each file relative to the root directory (e.g. :data:`"relative/path/to/file"`).
+ :type files: Iterable[str]
+ :return: The matched file paths
+ :rtype: Iterable[str]
+
+ .. deprecated::
+
+ This method is no longer used and has been replaced by :meth:`.match_file`. Use the :meth:`.match_file`
+ method with a loop for similar results.
+ """
+ warnings.warn(
+ (
+ "{0.__module__}.{0.__qualname__}.match() is deprecated. Use "
+ "{0.__module__}.{0.__qualname__}.match_file() with a loop for "
+ "similar results."
+ ).format(self.__class__),
+ DeprecationWarning,
+ stacklevel=2,
+ )
+
+ for file in files:
+ if self.match_file(file) is not None:
+ yield file
+
+ def match_file(self, file: str) -> Optional[Any]:
+ """Matches this pattern against the specified file.
+
+ :param file: The normalized file path to match against.
+ :type file: str
+ :return: Returns the match result if *file* matched; otherwise, :data:`None`.
+ :rtype: Optional[Any]
+ """
+ raise NotImplementedError(
+ ("{0.__module__}.{0.__qualname__} must override match_file().").format(self.__class__)
+ )
+
+
+class RegexPattern(Pattern):
+ """
+ The :class:`RegexPattern` class is an implementation of a pattern
+ using regular expressions.
+ """
+
+ # Keep the class dict-less.
+ __slots__ = ("regex",)
+
+ def __init__(
+ self,
+ pattern: Union[AnyStr, PatternHint],
+ include: Optional[bool] = None,
+ ) -> None:
+ """
+ Initializes the :class:`RegexPattern` instance.
+ *pattern* (:class:`str`, :class:`bytes`, :class:`re.Pattern`, or
+ :data:`None`) is the pattern to compile into a regular expression.
+ *include* (:class:`bool` or :data:`None`) must be :data:`None`
+ unless *pattern* is a precompiled regular expression (:class:`re.Pattern`)
+ in which case it is whether matched files should be included
+ (:data:`True`), excluded (:data:`False`), or is a null operation
+ (:data:`None`).
+ .. NOTE:: Subclasses do not need to support the *include*
+ parameter.
+ """
+
+ if isinstance(pattern, (str, bytes)):
+ assert include is None, ("include:{!r} must be null when pattern:{!r} is a string.").format(
+ include, pattern
+ )
+ regex, include = self.pattern_to_regex(pattern)
+ # NOTE: Make sure to allow a null regular expression to be
+ # returned for a null-operation.
+ if include is not None:
+ regex = re.compile(regex)
+
+ elif pattern is not None and hasattr(pattern, "match"):
+ # Assume pattern is a precompiled regular expression.
+ # - NOTE: Used specified *include*.
+ regex = pattern
+
+ elif pattern is None:
+ # NOTE: Make sure to allow a null pattern to be passed for a
+ # null-operation.
+ assert include is None, ("include:{!r} must be null when pattern:{!r} is null.").format(include, pattern)
+
+ else:
+ raise TypeError("pattern:{!r} is not a string, re.Pattern, or None.".format(pattern))
+
+ super(RegexPattern, self).__init__(include)
+
+ self.regex: PatternHint = regex
+ """
+ *regex* (:class:`re.Pattern`) is the regular expression for the
+ pattern.
+ """
+
+ def __eq__(self, other: "RegexPattern") -> bool:
+ """Tests the equality of this regex pattern with *other*
+
+ :param other: The regex pattern to test against
+ :type other: RegexPattern
+ :return: Return True if :attr:`~Pattern.include` and :attr:`~RegexPattern.regex`
+ are equal. False otherwise.
+ :rtype: bool
+ """
+ if isinstance(other, RegexPattern):
+ return self.include == other.include and self.regex == other.regex
+ return NotImplemented
+
+ def match_file(self, file: str) -> Optional["RegexMatchResult"]:
+ """Matches this pattern against the specified file.
+
+ :param file: File relative to the root directory (e.g., "relative/path/to/file").
+ :type file: str
+ :return: Returns the match result (:class:`RegexMatchResult`) if *file*
+ matched; otherwise, :data:`None`.
+ :rtype: Optional[RegexMatchResult]
+ """
+ if self.include is not None:
+ match = self.regex.match(file)
+ if match is not None:
+ return RegexMatchResult(match)
+
+ return None
+
+ @classmethod
+ def pattern_to_regex(cls, pattern: str) -> Tuple[str, bool]:
+ """Convert the pattern into an uncompiled regular expression.
+
+ :param pattern: The pattern to convert into a regular expression.
+ :type pattern: str
+ :return: Returns the uncompiled regular expression (:class:`str` or :data:`None`),
+ and whether matched files should be included (:data:`True`),
+ excluded (:data:`False`), or is a null-operation (:data:`None`).
+ :rtype: Tuple[str, bool]
+
+ .. NOTE::
+
+ The default implementation simply returns *pattern* and :data:`True`.
+ """
+ return pattern, True
+
+
+@dataclasses.dataclass()
+class RegexMatchResult(object):
+ """
+ The :class:`RegexMatchResult` data class is used to return information
+ about the matched regular expression.
+ """
+
+ # Keep the class dict-less.
+ __slots__ = ("match",)
+
+ match: MatchHint
+ """
+ *match* (:class:`re.Match`) is the regex match result.
+ """
+
+
+class GitWildMatchPatternError(ValueError):
+ """
+ The :class:`GitWildMatchPatternError` indicates an invalid git wild match
+ pattern.
+ """
+
+
+class GitWildMatchPattern(RegexPattern):
+ """
+ The :class:`GitWildMatchPattern` class represents a compiled Git
+ wildmatch pattern.
+ """
+
+ # Keep the dict-less class hierarchy.
+ __slots__ = ()
+
+ @classmethod
+ # pylint: disable=too-many-branches,too-many-statements
+ def pattern_to_regex(
+ cls,
+ pattern: AnyStr,
+ ) -> Tuple[Optional[AnyStr], Optional[bool]]:
+ """Convert the pattern into a regular expression.
+
+ :param pattern: Pattern to convert into a regular expression.
+ :type pattern: AnyStr
+ :return: A 2-tuple of:
+ * the uncompiled regular expression (:class:`str`, :class:`bytes`,
+ or :data:`None`)
+ * whether matched files should be included (:data:`True`), excluded (:data:`False`), or if it is a
+ null-operation (:data:`None`).
+ :rtype: Tuple[Optional[AnyStr], Optional[bool]]
+ """
+ if isinstance(pattern, str):
+ return_type = str
+ elif isinstance(pattern, bytes):
+ return_type = bytes
+ pattern = pattern.decode(_BYTES_ENCODING)
+ else:
+ raise TypeError(f"pattern:{pattern!r} is not a unicode or byte string.")
+
+ original_pattern = pattern
+ pattern = pattern.strip()
+
+ if pattern.startswith("#"):
+ # A pattern starting with a hash ('#') serves as a comment
+ # (neither includes nor excludes files). Escape the hash with a
+ # back-slash to match a literal hash (i.e., '\#').
+ regex = None
+ include = None
+
+ elif pattern == "/":
+ # EDGE CASE: According to `git check-ignore` (v2.4.1), a single
+ # '/' does not match any file.
+ regex = None
+ include = None
+
+ elif pattern:
+ if pattern.startswith("!"):
+ # A pattern starting with an exclamation mark ('!') negates the
+ # pattern (exclude instead of include). Escape the exclamation
+ # mark with a back-slash to match a literal exclamation mark
+ # (i.e., '\!').
+ include = False
+ # Remove leading exclamation mark.
+ pattern = pattern[1:]
+ else:
+ include = True
+
+ # Allow a regex override for edge cases that cannot be handled
+ # through normalization.
+ override_regex = None
+
+ # Split pattern into segments.
+ pattern_segs = pattern.split("/")
+
+ # Normalize pattern to make processing easier.
+
+ # EDGE CASE: Deal with duplicate double-asterisk sequences.
+ # Collapse each sequence down to one double-asterisk. Iterate over
+ # the segments in reverse and remove the duplicate double
+ # asterisks as we go.
+ for i in range(len(pattern_segs) - 1, 0, -1):
+ prev = pattern_segs[i - 1]
+ seg = pattern_segs[i]
+ if prev == "**" and seg == "**":
+ del pattern_segs[i]
+
+ if len(pattern_segs) == 2 and pattern_segs[0] == "**" and not pattern_segs[1]:
+ # EDGE CASE: The '**/' pattern should match everything except
+ # individual files in the root directory. This case cannot be
+ # adequately handled through normalization. Use the override.
+ override_regex = "^.+(?P<ps_d>/).*$"
+
+ if not pattern_segs[0]:
+ # A pattern beginning with a slash ('/') will only match paths
+ # directly on the root directory instead of any descendant
+ # paths. So, remove empty first segment to make pattern relative
+ # to root.
+ del pattern_segs[0]
+
+ elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
+ # A single pattern without a beginning slash ('/') will match
+ # any descendant path. This is equivalent to "**/{pattern}". So,
+ # prepend with double-asterisks to make pattern relative to
+ # root.
+ # EDGE CASE: This also holds for a single pattern with a
+ # trailing slash (e.g. dir/).
+ if pattern_segs[0] != "**":
+ pattern_segs.insert(0, "**")
+
+ else:
+ # EDGE CASE: A pattern without a beginning slash ('/') but
+ # contains at least one prepended directory (e.g.
+ # "dir/{pattern}") should not match "**/dir/{pattern}",
+ # according to `git check-ignore` (v2.4.1).
+ pass
+
+ if not pattern_segs:
+ # After resolving the edge cases, we end up with no pattern at
+ # all. This must be because the pattern is invalid.
+ raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}")
+
+ if not pattern_segs[-1] and len(pattern_segs) > 1:
+ # A pattern ending with a slash ('/') will match all descendant
+ # paths if it is a directory but not if it is a regular file.
+ # This is equivalent to "{pattern}/**". So, set last segment to
+ # a double-asterisk to include all descendants.
+ pattern_segs[-1] = "**"
+
+ if override_regex is None:
+ # Build regular expression from pattern.
+ output = ["^"]
+ need_slash = False
+ end = len(pattern_segs) - 1
+ for i, seg in enumerate(pattern_segs):
+ if seg == "**":
+ if i == 0 and i == end:
+ # A pattern consisting solely of double-asterisks ('**')
+ # will match every path.
+ output.append(".+")
+ elif i == 0:
+ # A normalized pattern beginning with double-asterisks
+ # ('**') will match any leading path segments.
+ output.append("(?:.+/)?")
+ need_slash = False
+ elif i == end:
+ # A normalized pattern ending with double-asterisks ('**')
+ # will match any trailing path segments.
+ output.append("(?P<ps_d>/).*")
+ else:
+ # A pattern with inner double-asterisks ('**') will match
+ # multiple (or zero) inner path segments.
+ output.append("(?:/.+)?")
+ need_slash = True
+
+ elif seg == "*":
+ # Match single path segment.
+ if need_slash:
+ output.append("/")
+
+ output.append("[^/]+")
+
+ if i == end:
+ # A pattern ending without a slash ('/') will match a file
+ # or a directory (with paths underneath it). E.g., "foo"
+ # matches "foo", "foo/bar", "foo/bar/baz", etc.
+ output.append("(?:(?P<ps_d>/).*)?")
+
+ need_slash = True
+
+ else:
+ # Match segment glob pattern.
+ if need_slash:
+ output.append("/")
+
+ try:
+ output.append(cls._translate_segment_glob(seg))
+ except ValueError as e:
+ raise GitWildMatchPatternError(f"Invalid git pattern: {original_pattern!r}") from e
+
+ if i == end:
+ # A pattern ending without a slash ('/') will match a file
+ # or a directory (with paths underneath it). E.g., "foo"
+ # matches "foo", "foo/bar", "foo/bar/baz", etc.
+ output.append("(?:(?P<ps_d>/).*)?")
+
+ need_slash = True
+
+ output.append("$")
+ regex = "".join(output)
+
+ else:
+ # Use regex override.
+ regex = override_regex
+
+ else:
+ # A blank pattern is a null-operation (neither includes nor
+ # excludes files).
+ regex = None
+ include = None
+
+ if regex is not None and return_type is bytes:
+ regex = regex.encode(_BYTES_ENCODING)
+
+ return regex, include
+
+ @staticmethod
+ def _translate_segment_glob(pattern: str) -> str:
+ """Translates the glob pattern to a regular expression. This is used in
+ the constructor to translate a path segment glob pattern to its
+ corresponding regular expression.
+
+ :param pattern: The glob pattern.
+ :type pattern: str
+ :return: The regular expression
+ :rtype: str
+ """
+ # NOTE: This is derived from `fnmatch.translate()` and is similar to
+ # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
+
+ escape = False
+ regex = ""
+ i, end = 0, len(pattern)
+ while i < end:
+ # Get next character.
+ char = pattern[i]
+ i += 1
+
+ if escape:
+ # Escape the character.
+ escape = False
+ regex += re.escape(char)
+
+ elif char == "\\":
+ # Escape character, escape next character.
+ escape = True
+
+ elif char == "*":
+ # Multi-character wildcard. Match any string (except slashes),
+ # including an empty string.
+ regex += "[^/]*"
+
+ elif char == "?":
+ # Single-character wildcard. Match any single character (except
+ # a slash).
+ regex += "[^/]"
+
+ elif char == "[":
+ # Bracket expression wildcard. Except for the beginning
+ # exclamation mark, the whole bracket expression can be used
+ # directly as regex but we have to find where the expression
+ # ends.
+ # - "[][!]" matches ']', '[' and '!'.
+ # - "[]-]" matches ']' and '-'.
+ # - "[!]a-]" matches any character except ']', 'a' and '-'.
+ j = i
+ # Pass back expression negation.
+ if j < end and pattern[j] == "!":
+ j += 1
+ # Pass first closing bracket if it is at the beginning of the
+ # expression.
+ if j < end and pattern[j] == "]":
+ j += 1
+ # Find closing bracket. Stop once we reach the end or find it.
+ while j < end and pattern[j] != "]":
+ j += 1
+
+ if j < end:
+ # Found end of bracket expression. Increment j to be one past
+ # the closing bracket:
+ #
+ # [...]
+ # ^ ^
+ # i j
+ #
+ j += 1
+ expr = "["
+
+ if pattern[i] == "!":
+ # Bracket expression needs to be negated.
+ expr += "^"
+ i += 1
+ elif pattern[i] == "^":
+ # POSIX declares that the regex bracket expression negation
+ # "[^...]" is undefined in a glob pattern. Python's
+ # `fnmatch.translate()` escapes the caret ('^') as a
+ # literal. To maintain consistency with undefined behavior,
+ # I am escaping the '^' as well.
+ expr += "\\^"
+ i += 1
+
+ # Build regex bracket expression. Escape slashes so they are
+ # treated as literal slashes by regex as defined by POSIX.
+ expr += pattern[i:j].replace("\\", "\\\\")
+
+ # Add regex bracket expression to regex result.
+ regex += expr
+
+ # Set i to one past the closing bracket.
+ i = j
+
+ else:
+ # Failed to find closing bracket, treat opening bracket as a
+ # bracket literal instead of as an expression.
+ regex += "\\["
+
+ else:
+ # Regular character, escape it for regex.
+ regex += re.escape(char)
+
+ if escape:
+ raise ValueError(f"Escape character found with no next character to escape: {pattern!r}")
+
+ return regex
+
+ @staticmethod
+ def escape(s: AnyStr) -> AnyStr:
+ """Escape special characters in the given string.
+
+ :param s: a filename or a string that you want to escape, usually before adding it to a ".gitignore".
+ :type s: AnyStr
+ :return: The escaped string
+ :rtype: Union[str, bytes]
+ """
+ if isinstance(s, str):
+ return_type = str
+ string = s
+ elif isinstance(s, bytes):
+ return_type = bytes
+ string = s.decode(_BYTES_ENCODING)
+ else:
+ raise TypeError(f"s:{s!r} is not a unicode or byte string.")
+
+ # Reference: https://git-scm.com/docs/gitignore#_pattern_format
+ meta_characters = r"[]!*#?"
+
+ out_string = "".join("\\" + x if x in meta_characters else x for x in string)
+
+ if return_type is bytes:
+ return out_string.encode(_BYTES_ENCODING)
+ return out_string
+
+
+def normalize_file(file: Union[str, os.PathLike], separators: Optional[Iterable[str]] = None) -> str:
+ """Normalizes the file path to use the POSIX path separator (i.e.,
+ ``'/'``), and make the paths relative (remove leading ``'/'``).
+
+ :param file: The file path.
+ :type file: Union[str, os.PathLike]
+ :param separators: The path separators to normalize. This does not need to include the POSIX path separator
+ (``'/'``), but including it will not affect the results. Default is :data:`None` for
+ :data:`NORMALIZE_PATH_SEPS`. To prevent normalization, pass an empty container (e.g., an empty tuple ``()``).
+ :type separators: Optional[Iterable[str]]
+ :return: The normalized file path.
+ :rtype: str
+ """
+ # Normalize path separators.
+ if separators is None:
+ separators = NORMALIZE_PATH_SEPS
+
+ # Convert path object to string.
+ norm_file = str(file)
+
+ for sep in separators:
+ norm_file = norm_file.replace(sep, posixpath.sep)
+
+ if norm_file.startswith("/"):
+ # Make path relative.
+ norm_file = norm_file[1:]
+
+ elif norm_file.startswith("./"):
+ # Remove current directory prefix.
+ norm_file = norm_file[2:]
+
+ return norm_file