aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/regex/regex.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/regex/regex.py')
-rw-r--r--.venv/lib/python3.12/site-packages/regex/regex.py746
1 files changed, 746 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/regex/regex.py b/.venv/lib/python3.12/site-packages/regex/regex.py
new file mode 100644
index 00000000..0fdb4da9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/regex/regex.py
@@ -0,0 +1,746 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
+#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license. For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI. Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+# 2010-01-16 mrab Python front-end re-written and extended
+
+r"""Support for regular expressions (RE).
+
+This module provides regular expression matching operations similar to those
+found in Perl. It supports both 8-bit and Unicode strings; both the pattern and
+the strings being processed can contain null bytes and characters outside the
+US ASCII range.
+
+Regular expressions can contain both special and ordinary characters. Most
+ordinary characters, like "A", "a", or "0", are the simplest regular
+expressions; they simply match themselves. You can concatenate ordinary
+characters, so last matches the string 'last'.
+
+There are a few differences between the old (legacy) behaviour and the new
+(enhanced) behaviour, which are indicated by VERSION0 or VERSION1.
+
+The special characters are:
+ "." Matches any character except a newline.
+ "^" Matches the start of the string.
+ "$" Matches the end of the string or just before the
+ newline at the end of the string.
+ "*" Matches 0 or more (greedy) repetitions of the preceding
+ RE. Greedy means that it will match as many repetitions
+ as possible.
+ "+" Matches 1 or more (greedy) repetitions of the preceding
+ RE.
+ "?" Matches 0 or 1 (greedy) of the preceding RE.
+ *?,+?,?? Non-greedy versions of the previous three special
+ characters.
+ *+,++,?+ Possessive versions of the previous three special
+ characters.
+ {m,n} Matches from m to n repetitions of the preceding RE.
+ {m,n}? Non-greedy version of the above.
+ {m,n}+ Possessive version of the above.
+ {...} Fuzzy matching constraints.
+ "\\" Either escapes special characters or signals a special
+ sequence.
+ [...] Indicates a set of characters. A "^" as the first
+ character indicates a complementing set.
+ "|" A|B, creates an RE that will match either A or B.
+ (...) Matches the RE inside the parentheses. The contents are
+ captured and can be retrieved or matched later in the
+ string.
+ (?flags-flags) VERSION1: Sets/clears the flags for the remainder of
+ the group or pattern; VERSION0: Sets the flags for the
+ entire pattern.
+ (?:...) Non-capturing version of regular parentheses.
+ (?>...) Atomic non-capturing version of regular parentheses.
+ (?flags-flags:...) Non-capturing version of regular parentheses with local
+ flags.
+ (?P<name>...) The substring matched by the group is accessible by
+ name.
+ (?<name>...) The substring matched by the group is accessible by
+ name.
+ (?P=name) Matches the text matched earlier by the group named
+ name.
+ (?#...) A comment; ignored.
+ (?=...) Matches if ... matches next, but doesn't consume the
+ string.
+ (?!...) Matches if ... doesn't match next.
+ (?<=...) Matches if preceded by ....
+ (?<!...) Matches if not preceded by ....
+ (?(id)yes|no) Matches yes pattern if group id matched, the (optional)
+ no pattern otherwise.
+ (?(DEFINE)...) If there's no group called "DEFINE", then ... will be
+ ignored, but any group definitions will be available.
+ (?|...|...) (?|A|B), creates an RE that will match either A or B,
+ but reuses capture group numbers across the
+ alternatives.
+ (*FAIL) Forces matching to fail, which means immediate
+ backtracking.
+ (*F) Abbreviation for (*FAIL).
+ (*PRUNE) Discards the current backtracking information. Its
+ effect doesn't extend outside an atomic group or a
+ lookaround.
+ (*SKIP) Similar to (*PRUNE), except that it also sets where in
+ the text the next attempt at matching the entire
+ pattern will start. Its effect doesn't extend outside
+ an atomic group or a lookaround.
+
+The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
+deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
+optional with "<=" and "<". If any type of error is provided then any type not
+provided is not permitted.
+
+A cost equation may be provided.
+
+Examples:
+ (?:fuzzy){i<=2}
+ (?:fuzzy){i<=1,s<=2,d<=1,1i+1s+1d<3}
+
+VERSION1: Set operators are supported, and a set can include nested sets. The
+set operators, in order of increasing precedence, are:
+ || Set union ("x||y" means "x or y").
+ ~~ (double tilde) Symmetric set difference ("x~~y" means "x or y, but not
+ both").
+ && Set intersection ("x&&y" means "x and y").
+ -- (double dash) Set difference ("x--y" means "x but not y").
+
+Implicit union, ie, simple juxtaposition like in [ab], has the highest
+precedence.
+
+VERSION0 and VERSION1:
+The special sequences consist of "\\" and a character from the list below. If
+the ordinary character is not on the list, then the resulting RE will match the
+second character.
+ \number Matches the contents of the group of the same number if
+ number is no more than 2 digits, otherwise the character
+ with the 3-digit octal code.
+ \a Matches the bell character.
+ \A Matches only at the start of the string.
+ \b Matches the empty string, but only at the start or end of a
+ word.
+ \B Matches the empty string, but not at the start or end of a
+ word.
+ \d Matches any decimal digit; equivalent to the set [0-9] when
+ matching a bytestring or a Unicode string with the ASCII
+ flag, or the whole range of Unicode digits when matching a
+ Unicode string.
+ \D Matches any non-digit character; equivalent to [^\d].
+ \f Matches the formfeed character.
+ \g<name> Matches the text matched by the group named name.
+ \G Matches the empty string, but only at the position where
+ the search started.
+ \h Matches horizontal whitespace.
+ \K Keeps only what follows for the entire match.
+ \L<name> Named list. The list is provided as a keyword argument.
+ \m Matches the empty string, but only at the start of a word.
+ \M Matches the empty string, but only at the end of a word.
+ \n Matches the newline character.
+ \N{name} Matches the named character.
+ \p{name=value} Matches the character if its property has the specified
+ value.
+ \P{name=value} Matches the character if its property hasn't the specified
+ value.
+ \r Matches the carriage-return character.
+ \s Matches any whitespace character; equivalent to
+ [ \t\n\r\f\v].
+ \S Matches any non-whitespace character; equivalent to [^\s].
+ \t Matches the tab character.
+ \uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX.
+ \UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code
+ XXXXXXXX.
+ \v Matches the vertical tab character.
+ \w Matches any alphanumeric character; equivalent to
+ [a-zA-Z0-9_] when matching a bytestring or a Unicode string
+ with the ASCII flag, or the whole range of Unicode
+ alphanumeric characters (letters plus digits plus
+ underscore) when matching a Unicode string. With LOCALE, it
+ will match the set [0-9_] plus characters defined as
+ letters for the current locale.
+ \W Matches the complement of \w; equivalent to [^\w].
+ \xXX Matches the character with 2-digit hex code XX.
+ \X Matches a grapheme.
+ \Z Matches only at the end of the string.
+ \\ Matches a literal backslash.
+
+This module exports the following functions:
+ match Match a regular expression pattern at the beginning of a string.
+ fullmatch Match a regular expression pattern against all of a string.
+ search Search a string for the presence of a pattern.
+ sub Substitute occurrences of a pattern found in a string using a
+ template string.
+ subf Substitute occurrences of a pattern found in a string using a
+ format string.
+ subn Same as sub, but also return the number of substitutions made.
+ subfn Same as subf, but also return the number of substitutions made.
+ split Split a string by the occurrences of a pattern. VERSION1: will
+ split at zero-width match; VERSION0: won't split at zero-width
+ match.
+ splititer Return an iterator yielding the parts of a split string.
+ findall Find all occurrences of a pattern in a string.
+ finditer Return an iterator yielding a match object for each match.
+ compile Compile a pattern into a Pattern object.
+ purge Clear the regular expression cache.
+ escape Backslash all non-alphanumerics or special characters in a
+ string.
+
+Most of the functions support a concurrent parameter: if True, the GIL will be
+released during matching, allowing other Python threads to run concurrently. If
+the string changes during matching, the behaviour is undefined. This parameter
+is not needed when working on the builtin (immutable) string classes.
+
+Some of the functions in this module take flags as optional parameters. Most of
+these flags can also be set within an RE:
+ A a ASCII Make \w, \W, \b, \B, \d, and \D match the
+ corresponding ASCII character categories. Default
+ when matching a bytestring.
+ B b BESTMATCH Find the best fuzzy match (default is first).
+ D DEBUG Print the parsed pattern.
+ E e ENHANCEMATCH Attempt to improve the fit after finding the first
+ fuzzy match.
+ F f FULLCASE Use full case-folding when performing
+ case-insensitive matching in Unicode.
+ I i IGNORECASE Perform case-insensitive matching.
+ L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the
+ current locale. (One byte per character only.)
+ M m MULTILINE "^" matches the beginning of lines (after a newline)
+ as well as the string. "$" matches the end of lines
+ (before a newline) as well as the end of the string.
+ P p POSIX Perform POSIX-standard matching (leftmost longest).
+ R r REVERSE Searches backwards.
+ S s DOTALL "." matches any character at all, including the
+ newline.
+ U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the
+ Unicode locale. Default when matching a Unicode
+ string.
+ V0 V0 VERSION0 Turn on the old legacy behaviour.
+ V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag
+ includes the FULLCASE flag.
+ W w WORD Make \b and \B work with default Unicode word breaks
+ and make ".", "^" and "$" work with Unicode line
+ breaks.
+ X x VERBOSE Ignore whitespace and comments for nicer looking REs.
+
+This module also defines an exception 'error'.
+
+"""
+
+# Public symbols.
+__all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
+ "finditer", "fullmatch", "match", "purge", "search", "split", "splititer",
+ "sub", "subf", "subfn", "subn", "template", "Scanner", "A", "ASCII", "B",
+ "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", "S", "DOTALL", "F",
+ "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "P", "POSIX",
+ "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", "V1",
+ "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
+ "__doc__", "RegexFlag"]
+
+__version__ = "2.5.148"
+
+# --------------------------------------------------------------------
+# Public interface.
+
+def match(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Try to apply the pattern at the start of the string, returning a match
+ object, or None if no match was found."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.match(string, pos, endpos, concurrent, partial, timeout)
+
+def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Try to apply the pattern against all of the string, returning a match
+ object, or None if no match was found."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.fullmatch(string, pos, endpos, concurrent, partial, timeout)
+
+def search(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Search through string looking for a match to the pattern, returning a
+ match object, or None if no match was found."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.search(string, pos, endpos, concurrent, partial, timeout)
+
+def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Return the string obtained by replacing the leftmost (or rightmost with a
+ reverse pattern) non-overlapping occurrences of the pattern in string by the
+ replacement repl. repl can be either a string or a callable; if a string,
+ backslash escapes in it are processed; if a callable, it's passed the match
+ object and must return a replacement string to be used."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.sub(repl, string, count, pos, endpos, concurrent, timeout)
+
+def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Return the string obtained by replacing the leftmost (or rightmost with a
+ reverse pattern) non-overlapping occurrences of the pattern in string by the
+ replacement format. format can be either a string or a callable; if a string,
+ it's treated as a format string; if a callable, it's passed the match object
+ and must return a replacement string to be used."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.subf(format, string, count, pos, endpos, concurrent, timeout)
+
+def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Return a 2-tuple containing (new_string, number). new_string is the string
+ obtained by replacing the leftmost (or rightmost with a reverse pattern)
+ non-overlapping occurrences of the pattern in the source string by the
+ replacement repl. number is the number of substitutions that were made. repl
+ can be either a string or a callable; if a string, backslash escapes in it
+ are processed; if a callable, it's passed the match object and must return a
+ replacement string to be used."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.subn(repl, string, count, pos, endpos, concurrent, timeout)
+
+def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Return a 2-tuple containing (new_string, number). new_string is the string
+ obtained by replacing the leftmost (or rightmost with a reverse pattern)
+ non-overlapping occurrences of the pattern in the source string by the
+ replacement format. number is the number of substitutions that were made. format
+ can be either a string or a callable; if a string, it's treated as a format
+ string; if a callable, it's passed the match object and must return a
+ replacement string to be used."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.subfn(format, string, count, pos, endpos, concurrent, timeout)
+
+def split(pattern, string, maxsplit=0, flags=0, concurrent=None, timeout=None,
+ ignore_unused=False, **kwargs):
+ """Split the source string by the occurrences of the pattern, returning a
+ list containing the resulting substrings. If capturing parentheses are used
+ in pattern, then the text of all groups in the pattern are also returned as
+ part of the resulting list. If maxsplit is nonzero, at most maxsplit splits
+ occur, and the remainder of the string is returned as the final element of
+ the list."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.split(string, maxsplit, concurrent, timeout)
+
+def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None,
+ timeout=None, ignore_unused=False, **kwargs):
+ "Return an iterator yielding the parts of a split string."
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.splititer(string, maxsplit, concurrent, timeout)
+
+def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+ concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Return a list of all matches in the string. The matches may be overlapped
+ if overlapped is True. If one or more groups are present in the pattern,
+ return a list of groups; this will be a list of tuples if the pattern has
+ more than one group. Empty matches are included in the result."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.findall(string, pos, endpos, overlapped, concurrent, timeout)
+
+def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+ partial=False, concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+ """Return an iterator over all matches in the string. The matches may be
+ overlapped if overlapped is True. For each match, the iterator returns a
+ match object. Empty matches are included in the result."""
+ pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+ return pat.finditer(string, pos, endpos, overlapped, concurrent, partial,
+ timeout)
+
+def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs):
+ "Compile a regular expression pattern, returning a pattern object."
+ if cache_pattern is None:
+ cache_pattern = _cache_all
+ return _compile(pattern, flags, ignore_unused, kwargs, cache_pattern)
+
+def purge():
+ "Clear the regular expression cache"
+ _cache.clear()
+ _locale_sensitive.clear()
+
+# Whether to cache all patterns.
+_cache_all = True
+
+def cache_all(value=True):
+ """Sets whether to cache all patterns, even those are compiled explicitly.
+ Passing None has no effect, but returns the current setting."""
+ global _cache_all
+
+ if value is None:
+ return _cache_all
+
+ _cache_all = value
+
+def template(pattern, flags=0):
+ "Compile a template pattern, returning a pattern object."
+ return _compile(pattern, flags | TEMPLATE, False, {}, False)
+
+def escape(pattern, special_only=True, literal_spaces=False):
+ """Escape a string for use as a literal in a pattern. If special_only is
+ True, escape only special characters, else escape all non-alphanumeric
+ characters. If literal_spaces is True, don't escape spaces."""
+ # Convert it to Unicode.
+ if isinstance(pattern, bytes):
+ p = pattern.decode("latin-1")
+ else:
+ p = pattern
+
+ s = []
+ if special_only:
+ for c in p:
+ if c == " " and literal_spaces:
+ s.append(c)
+ elif c in _METACHARS or c.isspace():
+ s.append("\\")
+ s.append(c)
+ else:
+ s.append(c)
+ else:
+ for c in p:
+ if c == " " and literal_spaces:
+ s.append(c)
+ elif c in _ALNUM:
+ s.append(c)
+ else:
+ s.append("\\")
+ s.append(c)
+
+ r = "".join(s)
+ # Convert it back to bytes if necessary.
+ if isinstance(pattern, bytes):
+ r = r.encode("latin-1")
+
+ return r
+
+# --------------------------------------------------------------------
+# Internals.
+
+import regex._regex_core as _regex_core
+import regex._regex as _regex
+from threading import RLock as _RLock
+from locale import getpreferredencoding as _getpreferredencoding
+from regex._regex_core import *
+from regex._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError,
+ _UnscopedFlagSet, _check_group_features, _compile_firstset,
+ _compile_replacement, _flatten_code, _fold_case, _get_required_string,
+ _parse_pattern, _shrink_cache)
+from regex._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source
+ as _Source, Fuzzy as _Fuzzy)
+
+# Version 0 is the old behaviour, compatible with the original 're' module.
+# Version 1 is the new behaviour, which differs slightly.
+
+DEFAULT_VERSION = VERSION0
+
+_METACHARS = frozenset("()[]{}?*+|^$\\.-#&~")
+
+_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+# Caches for the patterns and replacements.
+_cache = {}
+_cache_lock = _RLock()
+_named_args = {}
+_replacement_cache = {}
+_locale_sensitive = {}
+
+# Maximum size of the cache.
+_MAXCACHE = 500
+_MAXREPCACHE = 500
+
+def _compile(pattern, flags, ignore_unused, kwargs, cache_it):
+ "Compiles a regular expression to a PatternObject."
+
+ global DEFAULT_VERSION
+ try:
+ from regex import DEFAULT_VERSION
+ except ImportError:
+ pass
+
+ # We won't bother to cache the pattern if we're debugging.
+ if (flags & DEBUG) != 0:
+ cache_it = False
+
+ # What locale is this pattern using?
+ locale_key = (type(pattern), pattern)
+ if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0:
+ # This pattern is, or might be, locale-sensitive.
+ pattern_locale = _getpreferredencoding()
+ else:
+ # This pattern is definitely not locale-sensitive.
+ pattern_locale = None
+
+ def complain_unused_args():
+ if ignore_unused:
+ return
+
+ # Complain about any unused keyword arguments, possibly resulting from a typo.
+ unused_kwargs = set(kwargs) - {k for k, v in args_needed}
+ if unused_kwargs:
+ any_one = next(iter(unused_kwargs))
+ raise ValueError('unused keyword argument {!a}'.format(any_one))
+
+ if cache_it:
+ try:
+ # Do we know what keyword arguments are needed?
+ args_key = pattern, type(pattern), flags
+ args_needed = _named_args[args_key]
+
+ # Are we being provided with its required keyword arguments?
+ args_supplied = set()
+ if args_needed:
+ for k, v in args_needed:
+ try:
+ args_supplied.add((k, frozenset(kwargs[k])))
+ except KeyError:
+ raise error("missing named list: {!r}".format(k))
+
+ complain_unused_args()
+
+ args_supplied = frozenset(args_supplied)
+
+ # Have we already seen this regular expression and named list?
+ pattern_key = (pattern, type(pattern), flags, args_supplied,
+ DEFAULT_VERSION, pattern_locale)
+ return _cache[pattern_key]
+ except KeyError:
+ # It's a new pattern, or new named list for a known pattern.
+ pass
+
+ # Guess the encoding from the class of the pattern string.
+ if isinstance(pattern, str):
+ guess_encoding = UNICODE
+ elif isinstance(pattern, bytes):
+ guess_encoding = ASCII
+ elif isinstance(pattern, Pattern):
+ if flags:
+ raise ValueError("cannot process flags argument with a compiled pattern")
+
+ return pattern
+ else:
+ raise TypeError("first argument must be a string or compiled pattern")
+
+ # Set the default version in the core code in case it has been changed.
+ _regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+ global_flags = flags
+
+ while True:
+ caught_exception = None
+ try:
+ source = _Source(pattern)
+ info = _Info(global_flags, source.char_type, kwargs)
+ info.guess_encoding = guess_encoding
+ source.ignore_space = bool(info.flags & VERBOSE)
+ parsed = _parse_pattern(source, info)
+ break
+ except _UnscopedFlagSet:
+ # Remember the global flags for the next attempt.
+ global_flags = info.global_flags
+ except error as e:
+ caught_exception = e
+
+ if caught_exception:
+ raise error(caught_exception.msg, caught_exception.pattern,
+ caught_exception.pos)
+
+ if not source.at_end():
+ raise error("unbalanced parenthesis", pattern, source.pos)
+
+ # Check the global flags for conflicts.
+ version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
+ if version not in (0, VERSION0, VERSION1):
+ raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible")
+
+ if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE):
+ raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible")
+
+ if isinstance(pattern, bytes) and (info.flags & UNICODE):
+ raise ValueError("cannot use UNICODE flag with a bytes pattern")
+
+ if not (info.flags & _ALL_ENCODINGS):
+ if isinstance(pattern, str):
+ info.flags |= UNICODE
+ else:
+ info.flags |= ASCII
+
+ reverse = bool(info.flags & REVERSE)
+ fuzzy = isinstance(parsed, _Fuzzy)
+
+ # Remember whether this pattern as an inline locale flag.
+ _locale_sensitive[locale_key] = info.inline_locale
+
+ # Fix the group references.
+ caught_exception = None
+ try:
+ parsed.fix_groups(pattern, reverse, False)
+ except error as e:
+ caught_exception = e
+
+ if caught_exception:
+ raise error(caught_exception.msg, caught_exception.pattern,
+ caught_exception.pos)
+
+ # Should we print the parsed pattern?
+ if flags & DEBUG:
+ parsed.dump(indent=0, reverse=reverse)
+
+ # Optimise the parsed pattern.
+ parsed = parsed.optimise(info, reverse)
+ parsed = parsed.pack_characters(info)
+
+ # Get the required string.
+ req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags)
+
+ # Build the named lists.
+ named_lists = {}
+ named_list_indexes = [None] * len(info.named_lists_used)
+ args_needed = set()
+ for key, index in info.named_lists_used.items():
+ name, case_flags = key
+ values = frozenset(kwargs[name])
+ if case_flags:
+ items = frozenset(_fold_case(info, v) for v in values)
+ else:
+ items = values
+ named_lists[name] = values
+ named_list_indexes[index] = items
+ args_needed.add((name, values))
+
+ complain_unused_args()
+
+ # Check the features of the groups.
+ _check_group_features(info, parsed)
+
+ # Compile the parsed pattern. The result is a list of tuples.
+ code = parsed.compile(reverse)
+
+ # Is there a group call to the pattern as a whole?
+ key = (0, reverse, fuzzy)
+ ref = info.call_refs.get(key)
+ if ref is not None:
+ code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )]
+
+ # Add the final 'success' opcode.
+ code += [(_OP.SUCCESS, )]
+
+ # Compile the additional copies of the groups that we need.
+ for group, rev, fuz in info.additional_groups:
+ code += group.compile(rev, fuz)
+
+ # Flatten the code into a list of ints.
+ code = _flatten_code(code)
+
+ if not parsed.has_simple_start():
+ # Get the first set, if possible.
+ try:
+ fs_code = _compile_firstset(info, parsed.get_firstset(reverse))
+ fs_code = _flatten_code(fs_code)
+ code = fs_code + code
+ except _FirstSetError:
+ pass
+
+ # The named capture groups.
+ index_group = dict((v, n) for n, v in info.group_index.items())
+
+ # Create the PatternObject.
+ #
+ # Local flags like IGNORECASE affect the code generation, but aren't needed
+ # by the PatternObject itself. Conversely, global flags like LOCALE _don't_
+ # affect the code generation but _are_ needed by the PatternObject.
+ compiled_pattern = _regex.compile(pattern, info.flags | version, code,
+ info.group_index, index_group, named_lists, named_list_indexes,
+ req_offset, req_chars, req_flags, info.group_count)
+
+ # Do we need to reduce the size of the cache?
+ if len(_cache) >= _MAXCACHE:
+ with _cache_lock:
+ _shrink_cache(_cache, _named_args, _locale_sensitive, _MAXCACHE)
+
+ if cache_it:
+ if (info.flags & LOCALE) == 0:
+ pattern_locale = None
+
+ args_needed = frozenset(args_needed)
+
+ # Store this regular expression and named list.
+ pattern_key = (pattern, type(pattern), flags, args_needed,
+ DEFAULT_VERSION, pattern_locale)
+ _cache[pattern_key] = compiled_pattern
+
+ # Store what keyword arguments are needed.
+ _named_args[args_key] = args_needed
+
+ return compiled_pattern
+
+def _compile_replacement_helper(pattern, template):
+ "Compiles a replacement template."
+ # This function is called by the _regex module.
+
+ # Have we seen this before?
+ key = pattern.pattern, pattern.flags, template
+ compiled = _replacement_cache.get(key)
+ if compiled is not None:
+ return compiled
+
+ if len(_replacement_cache) >= _MAXREPCACHE:
+ _replacement_cache.clear()
+
+ is_unicode = isinstance(template, str)
+ source = _Source(template)
+ if is_unicode:
+ def make_string(char_codes):
+ return "".join(chr(c) for c in char_codes)
+ else:
+ def make_string(char_codes):
+ return bytes(char_codes)
+
+ compiled = []
+ literal = []
+ while True:
+ ch = source.get()
+ if not ch:
+ break
+ if ch == "\\":
+ # '_compile_replacement' will return either an int group reference
+ # or a string literal. It returns items (plural) in order to handle
+ # a 2-character literal (an invalid escape sequence).
+ is_group, items = _compile_replacement(source, pattern, is_unicode)
+ if is_group:
+ # It's a group, so first flush the literal.
+ if literal:
+ compiled.append(make_string(literal))
+ literal = []
+ compiled.extend(items)
+ else:
+ literal.extend(items)
+ else:
+ literal.append(ord(ch))
+
+ # Flush the literal.
+ if literal:
+ compiled.append(make_string(literal))
+
+ _replacement_cache[key] = compiled
+
+ return compiled
+
+# We define Pattern here after all the support objects have been defined.
+_pat = _compile('', 0, False, {}, False)
+Pattern = type(_pat)
+Match = type(_pat.match(''))
+del _pat
+
+# Make Pattern public for typing annotations.
+__all__.append("Pattern")
+__all__.append("Match")
+
+# We'll define an alias for the 'compile' function so that the repr of a
+# pattern object is eval-able.
+Regex = compile
+
+# Register myself for pickling.
+import copyreg as _copy_reg
+
+def _pickle(pattern):
+ return _regex.compile, pattern._pickled_data
+
+_copy_reg.pickle(Pattern, _pickle)