two version of R2R are here HEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/regex/regex.py
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-master.tar.gz
1 files changed, 746 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/regex/regex.py b/.venv/lib/python3.12/site-packages/regex/regex.py
new file mode 100644
index 00000000..0fdb4da9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/regex/regex.py
@@ -0,0 +1,746 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license.  For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+# 2010-01-16 mrab Python front-end re-written and extended
+
+r"""Support for regular expressions (RE).
+
+This module provides regular expression matching operations similar to those
+found in Perl. It supports both 8-bit and Unicode strings; both the pattern and
+the strings being processed can contain null bytes and characters outside the
+US ASCII range.
+
+Regular expressions can contain both special and ordinary characters. Most
+ordinary characters, like "A", "a", or "0", are the simplest regular
+expressions; they simply match themselves. You can concatenate ordinary
+characters, so last matches the string 'last'.
+
+There are a few differences between the old (legacy) behaviour and the new
+(enhanced) behaviour, which are indicated by VERSION0 or VERSION1.
+
+The special characters are:
+    "."                 Matches any character except a newline.
+    "^"                 Matches the start of the string.
+    "$"                 Matches the end of the string or just before the
+                        newline at the end of the string.
+    "*"                 Matches 0 or more (greedy) repetitions of the preceding
+                        RE. Greedy means that it will match as many repetitions
+                        as possible.
+    "+"                 Matches 1 or more (greedy) repetitions of the preceding
+                        RE.
+    "?"                 Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,??            Non-greedy versions of the previous three special
+                        characters.
+    *+,++,?+            Possessive versions of the previous three special
+                        characters.
+    {m,n}               Matches from m to n repetitions of the preceding RE.
+    {m,n}?              Non-greedy version of the above.
+    {m,n}+              Possessive version of the above.
+    {...}               Fuzzy matching constraints.
+    "\\"                Either escapes special characters or signals a special
+                        sequence.
+    [...]               Indicates a set of characters. A "^" as the first
+                        character indicates a complementing set.
+    "|"                 A|B, creates an RE that will match either A or B.
+    (...)               Matches the RE inside the parentheses. The contents are
+                        captured and can be retrieved or matched later in the
+                        string.
+    (?flags-flags)      VERSION1: Sets/clears the flags for the remainder of
+                        the group or pattern; VERSION0: Sets the flags for the
+                        entire pattern.
+    (?:...)             Non-capturing version of regular parentheses.
+    (?>...)             Atomic non-capturing version of regular parentheses.
+    (?flags-flags:...)  Non-capturing version of regular parentheses with local
+                        flags.
+    (?P<name>...)       The substring matched by the group is accessible by
+                        name.
+    (?<name>...)        The substring matched by the group is accessible by
+                        name.
+    (?P=name)           Matches the text matched earlier by the group named
+                        name.
+    (?#...)             A comment; ignored.
+    (?=...)             Matches if ... matches next, but doesn't consume the
+                        string.
+    (?!...)             Matches if ... doesn't match next.
+    (?<=...)            Matches if preceded by ....
+    (?<!...)            Matches if not preceded by ....
+    (?(id)yes|no)       Matches yes pattern if group id matched, the (optional)
+                        no pattern otherwise.
+    (?(DEFINE)...)      If there's no group called "DEFINE", then ... will be
+                        ignored, but any group definitions will be available.
+    (?|...|...)         (?|A|B), creates an RE that will match either A or B,
+                        but reuses capture group numbers across the
+                        alternatives.
+    (*FAIL)             Forces matching to fail, which means immediate
+                        backtracking.
+    (*F)                Abbreviation for (*FAIL).
+    (*PRUNE)            Discards the current backtracking information. Its
+                        effect doesn't extend outside an atomic group or a
+                        lookaround.
+    (*SKIP)             Similar to (*PRUNE), except that it also sets where in
+                        the text the next attempt at matching the entire
+                        pattern will start. Its effect doesn't extend outside
+                        an atomic group or a lookaround.
+
+The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
+deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
+optional with "<=" and "<". If any type of error is provided then any type not
+provided is not permitted.
+
+A cost equation may be provided.
+
+Examples:
+    (?:fuzzy){i<=2}
+    (?:fuzzy){i<=1,s<=2,d<=1,1i+1s+1d<3}
+
+VERSION1: Set operators are supported, and a set can include nested sets. The
+set operators, in order of increasing precedence, are:
+    ||  Set union ("x||y" means "x or y").
+    ~~  (double tilde) Symmetric set difference ("x~~y" means "x or y, but not
+        both").
+    &&  Set intersection ("x&&y" means "x and y").
+    --  (double dash) Set difference ("x--y" means "x but not y").
+
+Implicit union, ie, simple juxtaposition like in [ab], has the highest
+precedence.
+
+VERSION0 and VERSION1:
+The special sequences consist of "\\" and a character from the list below. If
+the ordinary character is not on the list, then the resulting RE will match the
+second character.
+    \number         Matches the contents of the group of the same number if
+                    number is no more than 2 digits, otherwise the character
+                    with the 3-digit octal code.
+    \a              Matches the bell character.
+    \A              Matches only at the start of the string.
+    \b              Matches the empty string, but only at the start or end of a
+                    word.
+    \B              Matches the empty string, but not at the start or end of a
+                    word.
+    \d              Matches any decimal digit; equivalent to the set [0-9] when
+                    matching a bytestring or a Unicode string with the ASCII
+                    flag, or the whole range of Unicode digits when matching a
+                    Unicode string.
+    \D              Matches any non-digit character; equivalent to [^\d].
+    \f              Matches the formfeed character.
+    \g<name>        Matches the text matched by the group named name.
+    \G              Matches the empty string, but only at the position where
+                    the search started.
+    \h              Matches horizontal whitespace.
+    \K              Keeps only what follows for the entire match.
+    \L<name>        Named list. The list is provided as a keyword argument.
+    \m              Matches the empty string, but only at the start of a word.
+    \M              Matches the empty string, but only at the end of a word.
+    \n              Matches the newline character.
+    \N{name}        Matches the named character.
+    \p{name=value}  Matches the character if its property has the specified
+                    value.
+    \P{name=value}  Matches the character if its property hasn't the specified
+                    value.
+    \r              Matches the carriage-return character.
+    \s              Matches any whitespace character; equivalent to
+                    [ \t\n\r\f\v].
+    \S              Matches any non-whitespace character; equivalent to [^\s].
+    \t              Matches the tab character.
+    \uXXXX          Matches the Unicode codepoint with 4-digit hex code XXXX.
+    \UXXXXXXXX      Matches the Unicode codepoint with 8-digit hex code
+                    XXXXXXXX.
+    \v              Matches the vertical tab character.
+    \w              Matches any alphanumeric character; equivalent to
+                    [a-zA-Z0-9_] when matching a bytestring or a Unicode string
+                    with the ASCII flag, or the whole range of Unicode
+                    alphanumeric characters (letters plus digits plus
+                    underscore) when matching a Unicode string. With LOCALE, it
+                    will match the set [0-9_] plus characters defined as
+                    letters for the current locale.
+    \W              Matches the complement of \w; equivalent to [^\w].
+    \xXX            Matches the character with 2-digit hex code XX.
+    \X              Matches a grapheme.
+    \Z              Matches only at the end of the string.
+    \\              Matches a literal backslash.
+
+This module exports the following functions:
+    match      Match a regular expression pattern at the beginning of a string.
+    fullmatch  Match a regular expression pattern against all of a string.
+    search     Search a string for the presence of a pattern.
+    sub        Substitute occurrences of a pattern found in a string using a
+               template string.
+    subf       Substitute occurrences of a pattern found in a string using a
+               format string.
+    subn       Same as sub, but also return the number of substitutions made.
+    subfn      Same as subf, but also return the number of substitutions made.
+    split      Split a string by the occurrences of a pattern. VERSION1: will
+               split at zero-width match; VERSION0: won't split at zero-width
+               match.
+    splititer  Return an iterator yielding the parts of a split string.
+    findall    Find all occurrences of a pattern in a string.
+    finditer   Return an iterator yielding a match object for each match.
+    compile    Compile a pattern into a Pattern object.
+    purge      Clear the regular expression cache.
+    escape     Backslash all non-alphanumerics or special characters in a
+               string.
+
+Most of the functions support a concurrent parameter: if True, the GIL will be
+released during matching, allowing other Python threads to run concurrently. If
+the string changes during matching, the behaviour is undefined. This parameter
+is not needed when working on the builtin (immutable) string classes.
+
+Some of the functions in this module take flags as optional parameters. Most of
+these flags can also be set within an RE:
+    A   a   ASCII         Make \w, \W, \b, \B, \d, and \D match the
+                          corresponding ASCII character categories. Default
+                          when matching a bytestring.
+    B   b   BESTMATCH     Find the best fuzzy match (default is first).
+    D       DEBUG         Print the parsed pattern.
+    E   e   ENHANCEMATCH  Attempt to improve the fit after finding the first
+                          fuzzy match.
+    F   f   FULLCASE      Use full case-folding when performing
+                          case-insensitive matching in Unicode.
+    I   i   IGNORECASE    Perform case-insensitive matching.
+    L   L   LOCALE        Make \w, \W, \b, \B, \d, and \D dependent on the
+                          current locale. (One byte per character only.)
+    M   m   MULTILINE     "^" matches the beginning of lines (after a newline)
+                          as well as the string. "$" matches the end of lines
+                          (before a newline) as well as the end of the string.
+    P   p   POSIX         Perform POSIX-standard matching (leftmost longest).
+    R   r   REVERSE       Searches backwards.
+    S   s   DOTALL        "." matches any character at all, including the
+                          newline.
+    U   u   UNICODE       Make \w, \W, \b, \B, \d, and \D dependent on the
+                          Unicode locale. Default when matching a Unicode
+                          string.
+    V0  V0  VERSION0      Turn on the old legacy behaviour.
+    V1  V1  VERSION1      Turn on the new enhanced behaviour. This flag
+                          includes the FULLCASE flag.
+    W   w   WORD          Make \b and \B work with default Unicode word breaks
+                          and make ".", "^" and "$" work with Unicode line
+                          breaks.
+    X   x   VERBOSE       Ignore whitespace and comments for nicer looking REs.
+
+This module also defines an exception 'error'.
+
+"""
+
+# Public symbols.
+__all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall",
+  "finditer", "fullmatch", "match", "purge", "search", "split", "splititer",
+  "sub", "subf", "subfn", "subn", "template", "Scanner", "A", "ASCII", "B",
+  "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", "S", "DOTALL", "F",
+  "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "P", "POSIX",
+  "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", "V1",
+  "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
+  "__doc__", "RegexFlag"]
+
+__version__ = "2.5.148"
+
+# --------------------------------------------------------------------
+# Public interface.
+
+def match(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Try to apply the pattern at the start of the string, returning a match
+    object, or None if no match was found."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.match(string, pos, endpos, concurrent, partial, timeout)
+
+def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Try to apply the pattern against all of the string, returning a match
+    object, or None if no match was found."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.fullmatch(string, pos, endpos, concurrent, partial, timeout)
+
+def search(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Search through string looking for a match to the pattern, returning a
+    match object, or None if no match was found."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.search(string, pos, endpos, concurrent, partial, timeout)
+
+def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Return the string obtained by replacing the leftmost (or rightmost with a
+    reverse pattern) non-overlapping occurrences of the pattern in string by the
+    replacement repl. repl can be either a string or a callable; if a string,
+    backslash escapes in it are processed; if a callable, it's passed the match
+    object and must return a replacement string to be used."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.sub(repl, string, count, pos, endpos, concurrent, timeout)
+
+def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Return the string obtained by replacing the leftmost (or rightmost with a
+    reverse pattern) non-overlapping occurrences of the pattern in string by the
+    replacement format. format can be either a string or a callable; if a string,
+    it's treated as a format string; if a callable, it's passed the match object
+    and must return a replacement string to be used."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.subf(format, string, count, pos, endpos, concurrent, timeout)
+
+def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Return a 2-tuple containing (new_string, number). new_string is the string
+    obtained by replacing the leftmost (or rightmost with a reverse pattern)
+    non-overlapping occurrences of the pattern in the source string by the
+    replacement repl. number is the number of substitutions that were made. repl
+    can be either a string or a callable; if a string, backslash escapes in it
+    are processed; if a callable, it's passed the match object and must return a
+    replacement string to be used."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.subn(repl, string, count, pos, endpos, concurrent, timeout)
+
+def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Return a 2-tuple containing (new_string, number). new_string is the string
+    obtained by replacing the leftmost (or rightmost with a reverse pattern)
+    non-overlapping occurrences of the pattern in the source string by the
+    replacement format. number is the number of substitutions that were made. format
+    can be either a string or a callable; if a string, it's treated as a format
+    string; if a callable, it's passed the match object and must return a
+    replacement string to be used."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.subfn(format, string, count, pos, endpos, concurrent, timeout)
+
+def split(pattern, string, maxsplit=0, flags=0, concurrent=None, timeout=None,
+  ignore_unused=False, **kwargs):
+    """Split the source string by the occurrences of the pattern, returning a
+    list containing the resulting substrings.  If capturing parentheses are used
+    in pattern, then the text of all groups in the pattern are also returned as
+    part of the resulting list.  If maxsplit is nonzero, at most maxsplit splits
+    occur, and the remainder of the string is returned as the final element of
+    the list."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.split(string, maxsplit, concurrent, timeout)
+
+def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None,
+  timeout=None, ignore_unused=False, **kwargs):
+    "Return an iterator yielding the parts of a split string."
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.splititer(string, maxsplit, concurrent, timeout)
+
+def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+  concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Return a list of all matches in the string. The matches may be overlapped
+    if overlapped is True. If one or more groups are present in the pattern,
+    return a list of groups; this will be a list of tuples if the pattern has
+    more than one group. Empty matches are included in the result."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.findall(string, pos, endpos, overlapped, concurrent, timeout)
+
+def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+  partial=False, concurrent=None, timeout=None, ignore_unused=False, **kwargs):
+    """Return an iterator over all matches in the string. The matches may be
+    overlapped if overlapped is True. For each match, the iterator returns a
+    match object. Empty matches are included in the result."""
+    pat = _compile(pattern, flags, ignore_unused, kwargs, True)
+    return pat.finditer(string, pos, endpos, overlapped, concurrent, partial,
+      timeout)
+
+def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs):
+    "Compile a regular expression pattern, returning a pattern object."
+    if cache_pattern is None:
+        cache_pattern = _cache_all
+    return _compile(pattern, flags, ignore_unused, kwargs, cache_pattern)
+
+def purge():
+    "Clear the regular expression cache"
+    _cache.clear()
+    _locale_sensitive.clear()
+
+# Whether to cache all patterns.
+_cache_all = True
+
+def cache_all(value=True):
+    """Sets whether to cache all patterns, even those are compiled explicitly.
+    Passing None has no effect, but returns the current setting."""
+    global _cache_all
+
+    if value is None:
+        return _cache_all
+
+    _cache_all = value
+
+def template(pattern, flags=0):
+    "Compile a template pattern, returning a pattern object."
+    return _compile(pattern, flags | TEMPLATE, False, {}, False)
+
+def escape(pattern, special_only=True, literal_spaces=False):
+    """Escape a string for use as a literal in a pattern. If special_only is
+    True, escape only special characters, else escape all non-alphanumeric
+    characters. If literal_spaces is True, don't escape spaces."""
+    # Convert it to Unicode.
+    if isinstance(pattern, bytes):
+        p = pattern.decode("latin-1")
+    else:
+        p = pattern
+
+    s = []
+    if special_only:
+        for c in p:
+            if c == " " and literal_spaces:
+                s.append(c)
+            elif c in _METACHARS or c.isspace():
+                s.append("\\")
+                s.append(c)
+            else:
+                s.append(c)
+    else:
+        for c in p:
+            if c == " " and literal_spaces:
+                s.append(c)
+            elif c in _ALNUM:
+                s.append(c)
+            else:
+                s.append("\\")
+                s.append(c)
+
+    r = "".join(s)
+    # Convert it back to bytes if necessary.
+    if isinstance(pattern, bytes):
+        r = r.encode("latin-1")
+
+    return r
+
+# --------------------------------------------------------------------
+# Internals.
+
+import regex._regex_core as _regex_core
+import regex._regex as _regex
+from threading import RLock as _RLock
+from locale import getpreferredencoding as _getpreferredencoding
+from regex._regex_core import *
+from regex._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError,
+  _UnscopedFlagSet, _check_group_features, _compile_firstset,
+  _compile_replacement, _flatten_code, _fold_case, _get_required_string,
+  _parse_pattern, _shrink_cache)
+from regex._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source
+  as _Source, Fuzzy as _Fuzzy)
+
+# Version 0 is the old behaviour, compatible with the original 're' module.
+# Version 1 is the new behaviour, which differs slightly.
+
+DEFAULT_VERSION = VERSION0
+
+_METACHARS = frozenset("()[]{}?*+|^$\\.-#&~")
+
+_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+# Caches for the patterns and replacements.
+_cache = {}
+_cache_lock = _RLock()
+_named_args = {}
+_replacement_cache = {}
+_locale_sensitive = {}
+
+# Maximum size of the cache.
+_MAXCACHE = 500
+_MAXREPCACHE = 500
+
+def _compile(pattern, flags, ignore_unused, kwargs, cache_it):
+    "Compiles a regular expression to a PatternObject."
+
+    global DEFAULT_VERSION
+    try:
+        from regex import DEFAULT_VERSION
+    except ImportError:
+        pass
+
+    # We won't bother to cache the pattern if we're debugging.
+    if (flags & DEBUG) != 0:
+        cache_it = False
+
+    # What locale is this pattern using?
+    locale_key = (type(pattern), pattern)
+    if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0:
+        # This pattern is, or might be, locale-sensitive.
+        pattern_locale = _getpreferredencoding()
+    else:
+        # This pattern is definitely not locale-sensitive.
+        pattern_locale = None
+
+    def complain_unused_args():
+        if ignore_unused:
+            return
+
+        # Complain about any unused keyword arguments, possibly resulting from a typo.
+        unused_kwargs = set(kwargs) - {k for k, v in args_needed}
+        if unused_kwargs:
+            any_one = next(iter(unused_kwargs))
+            raise ValueError('unused keyword argument {!a}'.format(any_one))
+
+    if cache_it:
+        try:
+            # Do we know what keyword arguments are needed?
+            args_key = pattern, type(pattern), flags
+            args_needed = _named_args[args_key]
+
+            # Are we being provided with its required keyword arguments?
+            args_supplied = set()
+            if args_needed:
+                for k, v in args_needed:
+                    try:
+                        args_supplied.add((k, frozenset(kwargs[k])))
+                    except KeyError:
+                        raise error("missing named list: {!r}".format(k))
+
+            complain_unused_args()
+
+            args_supplied = frozenset(args_supplied)
+
+            # Have we already seen this regular expression and named list?
+            pattern_key = (pattern, type(pattern), flags, args_supplied,
+              DEFAULT_VERSION, pattern_locale)
+            return _cache[pattern_key]
+        except KeyError:
+            # It's a new pattern, or new named list for a known pattern.
+            pass
+
+    # Guess the encoding from the class of the pattern string.
+    if isinstance(pattern, str):
+        guess_encoding = UNICODE
+    elif isinstance(pattern, bytes):
+        guess_encoding = ASCII
+    elif isinstance(pattern, Pattern):
+        if flags:
+            raise ValueError("cannot process flags argument with a compiled pattern")
+
+        return pattern
+    else:
+        raise TypeError("first argument must be a string or compiled pattern")
+
+    # Set the default version in the core code in case it has been changed.
+    _regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+    global_flags = flags
+
+    while True:
+        caught_exception = None
+        try:
+            source = _Source(pattern)
+            info = _Info(global_flags, source.char_type, kwargs)
+            info.guess_encoding = guess_encoding
+            source.ignore_space = bool(info.flags & VERBOSE)
+            parsed = _parse_pattern(source, info)
+            break
+        except _UnscopedFlagSet:
+            # Remember the global flags for the next attempt.
+            global_flags = info.global_flags
+        except error as e:
+            caught_exception = e
+
+        if caught_exception:
+            raise error(caught_exception.msg, caught_exception.pattern,
+              caught_exception.pos)
+
+    if not source.at_end():
+        raise error("unbalanced parenthesis", pattern, source.pos)
+
+    # Check the global flags for conflicts.
+    version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
+    if version not in (0, VERSION0, VERSION1):
+        raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible")
+
+    if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE):
+        raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible")
+
+    if isinstance(pattern, bytes) and (info.flags & UNICODE):
+        raise ValueError("cannot use UNICODE flag with a bytes pattern")
+
+    if not (info.flags & _ALL_ENCODINGS):
+        if isinstance(pattern, str):
+            info.flags |= UNICODE
+        else:
+            info.flags |= ASCII
+
+    reverse = bool(info.flags & REVERSE)
+    fuzzy = isinstance(parsed, _Fuzzy)
+
+    # Remember whether this pattern as an inline locale flag.
+    _locale_sensitive[locale_key] = info.inline_locale
+
+    # Fix the group references.
+    caught_exception = None
+    try:
+        parsed.fix_groups(pattern, reverse, False)
+    except error as e:
+        caught_exception = e
+
+    if caught_exception:
+        raise error(caught_exception.msg, caught_exception.pattern,
+          caught_exception.pos)
+
+    # Should we print the parsed pattern?
+    if flags & DEBUG:
+        parsed.dump(indent=0, reverse=reverse)
+
+    # Optimise the parsed pattern.
+    parsed = parsed.optimise(info, reverse)
+    parsed = parsed.pack_characters(info)
+
+    # Get the required string.
+    req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags)
+
+    # Build the named lists.
+    named_lists = {}
+    named_list_indexes = [None] * len(info.named_lists_used)
+    args_needed = set()
+    for key, index in info.named_lists_used.items():
+        name, case_flags = key
+        values = frozenset(kwargs[name])
+        if case_flags:
+            items = frozenset(_fold_case(info, v) for v in values)
+        else:
+            items = values
+        named_lists[name] = values
+        named_list_indexes[index] = items
+        args_needed.add((name, values))
+
+    complain_unused_args()
+
+    # Check the features of the groups.
+    _check_group_features(info, parsed)
+
+    # Compile the parsed pattern. The result is a list of tuples.
+    code = parsed.compile(reverse)
+
+    # Is there a group call to the pattern as a whole?
+    key = (0, reverse, fuzzy)
+    ref = info.call_refs.get(key)
+    if ref is not None:
+        code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )]
+
+    # Add the final 'success' opcode.
+    code += [(_OP.SUCCESS, )]
+
+    # Compile the additional copies of the groups that we need.
+    for group, rev, fuz in info.additional_groups:
+        code += group.compile(rev, fuz)
+
+    # Flatten the code into a list of ints.
+    code = _flatten_code(code)
+
+    if not parsed.has_simple_start():
+        # Get the first set, if possible.
+        try:
+            fs_code = _compile_firstset(info, parsed.get_firstset(reverse))
+            fs_code = _flatten_code(fs_code)
+            code = fs_code + code
+        except _FirstSetError:
+            pass
+
+    # The named capture groups.
+    index_group = dict((v, n) for n, v in info.group_index.items())
+
+    # Create the PatternObject.
+    #
+    # Local flags like IGNORECASE affect the code generation, but aren't needed
+    # by the PatternObject itself. Conversely, global flags like LOCALE _don't_
+    # affect the code generation but _are_ needed by the PatternObject.
+    compiled_pattern = _regex.compile(pattern, info.flags | version, code,
+      info.group_index, index_group, named_lists, named_list_indexes,
+      req_offset, req_chars, req_flags, info.group_count)
+
+    # Do we need to reduce the size of the cache?
+    if len(_cache) >= _MAXCACHE:
+        with _cache_lock:
+            _shrink_cache(_cache, _named_args, _locale_sensitive, _MAXCACHE)
+
+    if cache_it:
+        if (info.flags & LOCALE) == 0:
+            pattern_locale = None
+
+        args_needed = frozenset(args_needed)
+
+        # Store this regular expression and named list.
+        pattern_key = (pattern, type(pattern), flags, args_needed,
+          DEFAULT_VERSION, pattern_locale)
+        _cache[pattern_key] = compiled_pattern
+
+        # Store what keyword arguments are needed.
+        _named_args[args_key] = args_needed
+
+    return compiled_pattern
+
+def _compile_replacement_helper(pattern, template):
+    "Compiles a replacement template."
+    # This function is called by the _regex module.
+
+    # Have we seen this before?
+    key = pattern.pattern, pattern.flags, template
+    compiled = _replacement_cache.get(key)
+    if compiled is not None:
+        return compiled
+
+    if len(_replacement_cache) >= _MAXREPCACHE:
+        _replacement_cache.clear()
+
+    is_unicode = isinstance(template, str)
+    source = _Source(template)
+    if is_unicode:
+        def make_string(char_codes):
+            return "".join(chr(c) for c in char_codes)
+    else:
+        def make_string(char_codes):
+            return bytes(char_codes)
+
+    compiled = []
+    literal = []
+    while True:
+        ch = source.get()
+        if not ch:
+            break
+        if ch == "\\":
+            # '_compile_replacement' will return either an int group reference
+            # or a string literal. It returns items (plural) in order to handle
+            # a 2-character literal (an invalid escape sequence).
+            is_group, items = _compile_replacement(source, pattern, is_unicode)
+            if is_group:
+                # It's a group, so first flush the literal.
+                if literal:
+                    compiled.append(make_string(literal))
+                    literal = []
+                compiled.extend(items)
+            else:
+                literal.extend(items)
+        else:
+            literal.append(ord(ch))
+
+    # Flush the literal.
+    if literal:
+        compiled.append(make_string(literal))
+
+    _replacement_cache[key] = compiled
+
+    return compiled
+
+# We define Pattern here after all the support objects have been defined.
+_pat = _compile('', 0, False, {}, False)
+Pattern = type(_pat)
+Match = type(_pat.match(''))
+del _pat
+
+# Make Pattern public for typing annotations.
+__all__.append("Pattern")
+__all__.append("Match")
+
+# We'll define an alias for the 'compile' function so that the repr of a
+# pattern object is eval-able.
+Regex = compile
+
+# Register myself for pickling.
+import copyreg as _copy_reg
+
+def _pickle(pattern):
+    return _regex.compile, pattern._pickled_data
+
+_copy_reg.pickle(Pattern, _pickle)
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/regex/regex.py
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-master.tar.gz