From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .venv/lib/python3.12/site-packages/lark/utils.py | 387 +++++++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/lark/utils.py (limited to '.venv/lib/python3.12/site-packages/lark/utils.py') diff --git a/.venv/lib/python3.12/site-packages/lark/utils.py b/.venv/lib/python3.12/site-packages/lark/utils.py new file mode 100644 index 00000000..051adfa1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/utils.py @@ -0,0 +1,387 @@ +import hashlib +import unicodedata +import os +from functools import reduce +from collections import deque + +###{standalone +import sys, re +import logging +from io import open +logger = logging.getLogger("lark") +logger.addHandler(logging.StreamHandler()) +# Set to highest level, since we have some warnings amongst the code +# By default, we should not output any log messages +logger.setLevel(logging.CRITICAL) + +if sys.version_info[0]>2: + from abc import ABC, abstractmethod +else: + from abc import ABCMeta, abstractmethod + class ABC(object): # Provide Python27 compatibility + __slots__ = () + __metclass__ = ABCMeta + + +Py36 = (sys.version_info[:2] >= (3, 6)) + +NO_VALUE = object() + + +def classify(seq, key=None, value=None): + d = {} + for item in seq: + k = key(item) if (key is not None) else item + v = value(item) if (value is not None) else item + if k in d: + d[k].append(v) + else: + d[k] = [v] + return d + + +def _deserialize(data, namespace, memo): + if isinstance(data, dict): + if '__type__' in data: # Object + class_ = namespace[data['__type__']] + return class_.deserialize(data, memo) + elif '@' in data: + return memo[data['@']] + return {key:_deserialize(value, namespace, memo) for key, value in data.items()} + elif isinstance(data, list): + return [_deserialize(value, namespace, memo) for value in data] + return data + + +class Serialize(object): + """Safe-ish serialization interface that doesn't rely on Pickle + + Attributes: + __serialize_fields__ (List[str]): Fields (aka attributes) to serialize. + __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate. + Should include all field types that aren't builtin types. + """ + + def memo_serialize(self, types_to_memoize): + memo = SerializeMemoizer(types_to_memoize) + return self.serialize(memo), memo.serialize() + + def serialize(self, memo=None): + if memo and memo.in_types(self): + return {'@': memo.memoized.get(self)} + + fields = getattr(self, '__serialize_fields__') + res = {f: _serialize(getattr(self, f), memo) for f in fields} + res['__type__'] = type(self).__name__ + if hasattr(self, '_serialize'): + self._serialize(res, memo) + return res + + @classmethod + def deserialize(cls, data, memo): + namespace = getattr(cls, '__serialize_namespace__', []) + namespace = {c.__name__:c for c in namespace} + + fields = getattr(cls, '__serialize_fields__') + + if '@' in data: + return memo[data['@']] + + inst = cls.__new__(cls) + for f in fields: + try: + setattr(inst, f, _deserialize(data[f], namespace, memo)) + except KeyError as e: + raise KeyError("Cannot find key for class", cls, e) + + if hasattr(inst, '_deserialize'): + inst._deserialize() + + return inst + + +class SerializeMemoizer(Serialize): + "A version of serialize that memoizes objects to reduce space" + + __serialize_fields__ = 'memoized', + + def __init__(self, types_to_memoize): + self.types_to_memoize = tuple(types_to_memoize) + self.memoized = Enumerator() + + def in_types(self, value): + return isinstance(value, self.types_to_memoize) + + def serialize(self): + return _serialize(self.memoized.reversed(), None) + + @classmethod + def deserialize(cls, data, namespace, memo): + return _deserialize(data, namespace, memo) + + +try: + STRING_TYPE = basestring +except NameError: # Python 3 + STRING_TYPE = str + + +import types +from functools import wraps, partial +from contextlib import contextmanager + +Str = type(u'') +try: + classtype = types.ClassType # Python2 +except AttributeError: + classtype = type # Python3 + + +def smart_decorator(f, create_decorator): + if isinstance(f, types.FunctionType): + return wraps(f)(create_decorator(f, True)) + + elif isinstance(f, (classtype, type, types.BuiltinFunctionType)): + return wraps(f)(create_decorator(f, False)) + + elif isinstance(f, types.MethodType): + return wraps(f)(create_decorator(f.__func__, True)) + + elif isinstance(f, partial): + # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 + return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True)) + + else: + return create_decorator(f.__func__.__call__, True) + + +try: + import regex +except ImportError: + regex = None + +import sre_parse +import sre_constants +categ_pattern = re.compile(r'\\p{[A-Za-z_]+}') + +def get_regexp_width(expr): + if regex: + # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with + # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex + # match here below. + regexp_final = re.sub(categ_pattern, 'A', expr) + else: + if re.search(categ_pattern, expr): + raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr) + regexp_final = expr + try: + return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] + except sre_constants.error: + if not regex: + raise ValueError(expr) + else: + # sre_parse does not support the new features in regex. To not completely fail in that case, + # we manually test for the most important info (whether the empty string is matched) + c = regex.compile(regexp_final) + if c.match('') is None: + return 1, sre_constants.MAXREPEAT + else: + return 0, sre_constants.MAXREPEAT + +###} + + +_ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc' +_ID_CONTINUE = _ID_START + ('Nd', 'Nl',) + +def _test_unicode_category(s, categories): + if len(s) != 1: + return all(_test_unicode_category(char, categories) for char in s) + return s == '_' or unicodedata.category(s) in categories + +def is_id_continue(s): + """ + Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin + numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details. + """ + return _test_unicode_category(s, _ID_CONTINUE) + +def is_id_start(s): + """ + Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin + numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details. + """ + return _test_unicode_category(s, _ID_START) + + +def dedup_list(l): + """Given a list (l) will removing duplicates from the list, + preserving the original order of the list. Assumes that + the list entries are hashable.""" + dedup = set() + return [x for x in l if not (x in dedup or dedup.add(x))] + + +try: + from contextlib import suppress # Python 3 +except ImportError: + @contextmanager + def suppress(*excs): + '''Catch and dismiss the provided exception + + >>> x = 'hello' + >>> with suppress(IndexError): + ... x = x[10] + >>> x + 'hello' + ''' + try: + yield + except excs: + pass + + +class Enumerator(Serialize): + def __init__(self): + self.enums = {} + + def get(self, item): + if item not in self.enums: + self.enums[item] = len(self.enums) + return self.enums[item] + + def __len__(self): + return len(self.enums) + + def reversed(self): + r = {v: k for k, v in self.enums.items()} + assert len(r) == len(self.enums) + return r + + + +def combine_alternatives(lists): + """ + Accepts a list of alternatives, and enumerates all their possible concatinations. + + Examples: + >>> combine_alternatives([range(2), [4,5]]) + [[0, 4], [0, 5], [1, 4], [1, 5]] + + >>> combine_alternatives(["abc", "xy", '$']) + [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']] + + >>> combine_alternatives([]) + [[]] + """ + if not lists: + return [[]] + assert all(l for l in lists), lists + init = [[x] for x in lists[0]] + return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init) + + +try: + import atomicwrites +except ImportError: + atomicwrites = None + +class FS: + exists = staticmethod(os.path.exists) + + @staticmethod + def open(name, mode="r", **kwargs): + if atomicwrites and "w" in mode: + return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs) + else: + return open(name, mode, **kwargs) + + + +def isascii(s): + """ str.isascii only exists in python3.7+ """ + try: + return s.isascii() + except AttributeError: + try: + s.encode('ascii') + return True + except (UnicodeDecodeError, UnicodeEncodeError): + return False + + +class fzset(frozenset): + def __repr__(self): + return '{%s}' % ', '.join(map(repr, self)) + + +def classify_bool(seq, pred): + true_elems = [] + false_elems = [] + + for elem in seq: + if pred(elem): + true_elems.append(elem) + else: + false_elems.append(elem) + + return true_elems, false_elems + + +def bfs(initial, expand): + open_q = deque(list(initial)) + visited = set(open_q) + while open_q: + node = open_q.popleft() + yield node + for next_node in expand(node): + if next_node not in visited: + visited.add(next_node) + open_q.append(next_node) + +def bfs_all_unique(initial, expand): + "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions" + open_q = deque(list(initial)) + while open_q: + node = open_q.popleft() + yield node + open_q += expand(node) + + +def _serialize(value, memo): + if isinstance(value, Serialize): + return value.serialize(memo) + elif isinstance(value, list): + return [_serialize(elem, memo) for elem in value] + elif isinstance(value, frozenset): + return list(value) # TODO reversible? + elif isinstance(value, dict): + return {key:_serialize(elem, memo) for key, elem in value.items()} + # assert value is None or isinstance(value, (int, float, str, tuple)), value + return value + + + + +def small_factors(n, max_factor): + """ + Splits n up into smaller factors and summands <= max_factor. + Returns a list of [(a, b), ...] + so that the following code returns n: + + n = 1 + for a, b in values: + n = n * a + b + + Currently, we also keep a + b <= max_factor, but that might change + """ + assert n >= 0 + assert max_factor > 2 + if n <= max_factor: + return [(n, 0)] + + for a in range(max_factor, 1, -1): + r, b = divmod(n, a) + if a + b <= max_factor: + return small_factors(r, max_factor) + [(a, b)] + assert False, "Failed to factorize %s" % n -- cgit v1.2.3