aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lark
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lark
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark')
-rw-r--r--.venv/lib/python3.12/site-packages/lark/__init__.py10
-rw-r--r--.venv/lib/python3.12/site-packages/lark/__pyinstaller/__init__.py6
-rw-r--r--.venv/lib/python3.12/site-packages/lark/__pyinstaller/hook-lark.py14
-rw-r--r--.venv/lib/python3.12/site-packages/lark/ast_utils.py55
-rw-r--r--.venv/lib/python3.12/site-packages/lark/common.py59
-rw-r--r--.venv/lib/python3.12/site-packages/lark/exceptions.py267
-rw-r--r--.venv/lib/python3.12/site-packages/lark/grammar.py105
-rw-r--r--.venv/lib/python3.12/site-packages/lark/grammars/common.lark59
-rw-r--r--.venv/lib/python3.12/site-packages/lark/grammars/lark.lark59
-rw-r--r--.venv/lib/python3.12/site-packages/lark/grammars/python.lark19
-rw-r--r--.venv/lib/python3.12/site-packages/lark/grammars/unicode.lark7
-rw-r--r--.venv/lib/python3.12/site-packages/lark/indenter.py67
-rw-r--r--.venv/lib/python3.12/site-packages/lark/lark.py602
-rw-r--r--.venv/lib/python3.12/site-packages/lark/lexer.py506
-rw-r--r--.venv/lib/python3.12/site-packages/lark/load_grammar.py1354
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parse_tree_builder.py387
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parser_frontends.py239
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/__init__.py0
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/cyk.py345
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/earley.py326
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py75
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py790
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py185
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py304
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py132
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py200
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py3
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/xearley.py157
-rw-r--r--.venv/lib/python3.12/site-packages/lark/reconstruct.py101
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/__init__.py65
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/nearley.py201
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/serialize.py34
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/standalone.py196
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tree.py231
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tree_matcher.py186
-rw-r--r--.venv/lib/python3.12/site-packages/lark/utils.py387
-rw-r--r--.venv/lib/python3.12/site-packages/lark/visitors.py529
37 files changed, 8262 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/__init__.py b/.venv/lib/python3.12/site-packages/lark/__init__.py
new file mode 100644
index 00000000..909d4106
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/__init__.py
@@ -0,0 +1,10 @@
+from .utils import logger
+from .tree import Tree
+from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive
+from .visitors import InlineTransformer, inline_args # XXX Deprecated
+from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
+ UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError)
+from .lexer import Token
+from .lark import Lark
+
+__version__ = "0.12.0"
diff --git a/.venv/lib/python3.12/site-packages/lark/__pyinstaller/__init__.py b/.venv/lib/python3.12/site-packages/lark/__pyinstaller/__init__.py
new file mode 100644
index 00000000..fa02fc92
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/__pyinstaller/__init__.py
@@ -0,0 +1,6 @@
+# For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html
+
+import os
+
+def get_hook_dirs():
+ return [os.path.dirname(__file__)] \ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/lark/__pyinstaller/hook-lark.py b/.venv/lib/python3.12/site-packages/lark/__pyinstaller/hook-lark.py
new file mode 100644
index 00000000..cf3d8e3d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/__pyinstaller/hook-lark.py
@@ -0,0 +1,14 @@
+#-----------------------------------------------------------------------------
+# Copyright (c) 2017-2020, PyInstaller Development Team.
+#
+# Distributed under the terms of the GNU General Public License (version 2
+# or later) with exception for distributing the bootloader.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#
+# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
+#-----------------------------------------------------------------------------
+
+from PyInstaller.utils.hooks import collect_data_files
+
+datas = collect_data_files('lark')
diff --git a/.venv/lib/python3.12/site-packages/lark/ast_utils.py b/.venv/lib/python3.12/site-packages/lark/ast_utils.py
new file mode 100644
index 00000000..0c03d458
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/ast_utils.py
@@ -0,0 +1,55 @@
+"""
+ Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree
+"""
+
+import inspect, re
+
+from lark import Transformer, v_args
+
+class Ast(object):
+ """Abstract class
+
+ Subclasses will be collected by `create_transformer()`
+ """
+ pass
+
+class AsList(object):
+ """Abstract class
+
+ Subclasses will be instanciated with the parse results as a single list, instead of as arguments.
+ """
+
+class WithMeta(object):
+ """Abstract class
+
+ Subclasses will be instanciated with the Meta instance of the tree. (see ``v_args`` for more detail)
+ """
+ pass
+
+def camel_to_snake(name):
+ return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
+
+def create_transformer(ast_module, transformer=None, decorator_factory=v_args):
+ """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.
+
+ For each class, we create a corresponding rule in the transformer, with a matching name.
+ CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block".
+
+ Classes starting with an underscore (`_`) will be skipped.
+
+ Parameters:
+ ast_module: A Python module containing all the subclasses of ``ast_utils.Ast``
+ transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten.
+ decorator_factory (Callable): An optional callable accepting two booleans, inline, and meta,
+ and returning a decorator for the methods of ``transformer``. (default: ``v_args``).
+ """
+ t = transformer or Transformer()
+
+ for name, obj in inspect.getmembers(ast_module):
+ if not name.startswith('_') and inspect.isclass(obj):
+ if issubclass(obj, Ast):
+ wrapper = decorator_factory(inline=not issubclass(obj, AsList), meta=issubclass(obj, WithMeta))
+ obj = wrapper(obj).__get__(t)
+ setattr(t, camel_to_snake(name), obj)
+
+ return t \ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/lark/common.py b/.venv/lib/python3.12/site-packages/lark/common.py
new file mode 100644
index 00000000..cb408d91
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/common.py
@@ -0,0 +1,59 @@
+from warnings import warn
+from copy import deepcopy
+
+from .utils import Serialize
+from .lexer import TerminalDef
+
+###{standalone
+
+
+class LexerConf(Serialize):
+ __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
+ __serialize_namespace__ = TerminalDef,
+
+ def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
+ self.terminals = terminals
+ self.terminals_by_name = {t.name: t for t in self.terminals}
+ assert len(self.terminals) == len(self.terminals_by_name)
+ self.ignore = ignore
+ self.postlex = postlex
+ self.callbacks = callbacks or {}
+ self.g_regex_flags = g_regex_flags
+ self.re_module = re_module
+ self.skip_validation = skip_validation
+ self.use_bytes = use_bytes
+ self.lexer_type = None
+
+ @property
+ def tokens(self):
+ warn("LexerConf.tokens is deprecated. Use LexerConf.terminals instead", DeprecationWarning)
+ return self.terminals
+
+ def _deserialize(self):
+ self.terminals_by_name = {t.name: t for t in self.terminals}
+
+ def __deepcopy__(self, memo=None):
+ return type(self)(
+ deepcopy(self.terminals, memo),
+ self.re_module,
+ deepcopy(self.ignore, memo),
+ deepcopy(self.postlex, memo),
+ deepcopy(self.callbacks, memo),
+ deepcopy(self.g_regex_flags, memo),
+ deepcopy(self.skip_validation, memo),
+ deepcopy(self.use_bytes, memo),
+ )
+
+
+class ParserConf(Serialize):
+ __serialize_fields__ = 'rules', 'start', 'parser_type'
+
+ def __init__(self, rules, callbacks, start):
+ assert isinstance(start, list)
+ self.rules = rules
+ self.callbacks = callbacks
+ self.start = start
+
+ self.parser_type = None
+
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/exceptions.py b/.venv/lib/python3.12/site-packages/lark/exceptions.py
new file mode 100644
index 00000000..9f187531
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/exceptions.py
@@ -0,0 +1,267 @@
+from warnings import warn
+
+from .utils import STRING_TYPE, logger, NO_VALUE
+
+
+###{standalone
+
+
+class LarkError(Exception):
+ pass
+
+
+class ConfigurationError(LarkError, ValueError):
+ pass
+
+
+def assert_config(value, options, msg='Got %r, expected one of %s'):
+ if value not in options:
+ raise ConfigurationError(msg % (value, options))
+
+
+class GrammarError(LarkError):
+ pass
+
+
+class ParseError(LarkError):
+ pass
+
+
+class LexError(LarkError):
+ pass
+
+
+class UnexpectedInput(LarkError):
+ """UnexpectedInput Error.
+
+ Used as a base class for the following exceptions:
+
+ - ``UnexpectedCharacters``: The lexer encountered an unexpected string
+ - ``UnexpectedToken``: The parser received an unexpected token
+ - ``UnexpectedEOF``: The parser expected a token, but the input ended
+
+ After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
+ """
+ pos_in_stream = None
+ _terminals_by_name = None
+
+ def get_context(self, text, span=40):
+ """Returns a pretty string pinpointing the error in the text,
+ with span amount of context characters around it.
+
+ Note:
+ The parser doesn't hold a copy of the text it has to parse,
+ so you have to provide it again
+ """
+ assert self.pos_in_stream is not None, self
+ pos = self.pos_in_stream
+ start = max(pos - span, 0)
+ end = pos + span
+ if not isinstance(text, bytes):
+ before = text[start:pos].rsplit('\n', 1)[-1]
+ after = text[pos:end].split('\n', 1)[0]
+ return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
+ else:
+ before = text[start:pos].rsplit(b'\n', 1)[-1]
+ after = text[pos:end].split(b'\n', 1)[0]
+ return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
+
+ def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
+ """Allows you to detect what's wrong in the input text by matching
+ against example errors.
+
+ Given a parser instance and a dictionary mapping some label with
+ some malformed syntax examples, it'll return the label for the
+ example that bests matches the current error. The function will
+ iterate the dictionary until it finds a matching error, and
+ return the corresponding value.
+
+ For an example usage, see `examples/error_reporting_lalr.py`
+
+ Parameters:
+ parse_fn: parse function (usually ``lark_instance.parse``)
+ examples: dictionary of ``{'example_string': value}``.
+ use_accepts: Recommended to call this with ``use_accepts=True``.
+ The default is ``False`` for backwards compatibility.
+ """
+ assert self.state is not None, "Not supported for this exception"
+
+ if isinstance(examples, dict):
+ examples = examples.items()
+
+ candidate = (None, False)
+ for i, (label, example) in enumerate(examples):
+ assert not isinstance(example, STRING_TYPE)
+
+ for j, malformed in enumerate(example):
+ try:
+ parse_fn(malformed)
+ except UnexpectedInput as ut:
+ if ut.state == self.state:
+ if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
+ logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
+ (self.state, self.accepts, ut.accepts, i, j))
+ continue
+ try:
+ if ut.token == self.token: # Try exact match first
+ logger.debug("Exact Match at example [%s][%s]" % (i, j))
+ return label
+
+ if token_type_match_fallback:
+ # Fallback to token types match
+ if (ut.token.type == self.token.type) and not candidate[-1]:
+ logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
+ candidate = label, True
+
+ except AttributeError:
+ pass
+ if candidate[0] is None:
+ logger.debug("Same State match at example [%s][%s]" % (i, j))
+ candidate = label, False
+
+ return candidate[0]
+
+ def _format_expected(self, expected):
+ if self._terminals_by_name:
+ d = self._terminals_by_name
+ expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
+ return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
+
+
+class UnexpectedEOF(ParseError, UnexpectedInput):
+ """An exception that is raised by the parser, when the input ends while it still expects a token.
+ """
+
+ def __init__(self, expected, state=None, terminals_by_name=None):
+ super(UnexpectedEOF, self).__init__()
+
+ self.expected = expected
+ self.state = state
+ from .lexer import Token
+ self.token = Token("<EOF>", "") # , line=-1, column=-1, pos_in_stream=-1)
+ self.pos_in_stream = -1
+ self.line = -1
+ self.column = -1
+ self._terminals_by_name = terminals_by_name
+
+
+ def __str__(self):
+ message = "Unexpected end-of-input. "
+ message += self._format_expected(self.expected)
+ return message
+
+
+class UnexpectedCharacters(LexError, UnexpectedInput):
+ """An exception that is raised by the lexer, when it cannot match the next
+ string of characters to any of its terminals.
+ """
+
+ def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
+ terminals_by_name=None, considered_rules=None):
+ super(UnexpectedCharacters, self).__init__()
+
+ # TODO considered_tokens and allowed can be figured out using state
+ self.line = line
+ self.column = column
+ self.pos_in_stream = lex_pos
+ self.state = state
+ self._terminals_by_name = terminals_by_name
+
+ self.allowed = allowed
+ self.considered_tokens = considered_tokens
+ self.considered_rules = considered_rules
+ self.token_history = token_history
+
+ if isinstance(seq, bytes):
+ self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
+ else:
+ self.char = seq[lex_pos]
+ self._context = self.get_context(seq)
+
+
+ def __str__(self):
+ message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
+ message += '\n\n' + self._context
+ if self.allowed:
+ message += self._format_expected(self.allowed)
+ if self.token_history:
+ message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
+ return message
+
+
+class UnexpectedToken(ParseError, UnexpectedInput):
+ """An exception that is raised by the parser, when the token it received
+ doesn't match any valid step forward.
+
+ Parameters:
+ token: The mismatched token
+ expected: The set of expected tokens
+ considered_rules: Which rules were considered, to deduce the expected tokens
+ state: A value representing the parser state. Do not rely on its value or type.
+ interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failture,
+ and can be used for debugging and error handling.
+
+ Note: These parameters are available as attributes of the instance.
+ """
+
+ def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
+ super(UnexpectedToken, self).__init__()
+
+ # TODO considered_rules and expected can be figured out using state
+ self.line = getattr(token, 'line', '?')
+ self.column = getattr(token, 'column', '?')
+ self.pos_in_stream = getattr(token, 'start_pos', None)
+ self.state = state
+
+ self.token = token
+ self.expected = expected # XXX deprecate? `accepts` is better
+ self._accepts = NO_VALUE
+ self.considered_rules = considered_rules
+ self.interactive_parser = interactive_parser
+ self._terminals_by_name = terminals_by_name
+ self.token_history = token_history
+
+
+ @property
+ def accepts(self):
+ if self._accepts is NO_VALUE:
+ self._accepts = self.interactive_parser and self.interactive_parser.accepts()
+ return self._accepts
+
+ def __str__(self):
+ message = ("Unexpected token %r at line %s, column %s.\n%s"
+ % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
+ if self.token_history:
+ message += "Previous tokens: %r\n" % self.token_history
+
+ return message
+
+ @property
+ def puppet(self):
+ warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning)
+ return self.interactive_parser
+
+
+
+class VisitError(LarkError):
+ """VisitError is raised when visitors are interrupted by an exception
+
+ It provides the following attributes for inspection:
+
+ Parameters:
+ rule: the name of the visit rule that failed
+ obj: the tree-node or token that was being processed
+ orig_exc: the exception that cause it to fail
+
+ Note: These parameters are available as attributes
+ """
+
+ def __init__(self, rule, obj, orig_exc):
+ message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
+ super(VisitError, self).__init__(message)
+
+ self.rule = rule
+ self.obj = obj
+ self.orig_exc = orig_exc
+
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/grammar.py b/.venv/lib/python3.12/site-packages/lark/grammar.py
new file mode 100644
index 00000000..405086a2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/grammar.py
@@ -0,0 +1,105 @@
+from .utils import Serialize
+
+###{standalone
+
+class Symbol(Serialize):
+ __slots__ = ('name',)
+
+ is_term = NotImplemented
+
+ def __init__(self, name):
+ self.name = name
+
+ def __eq__(self, other):
+ assert isinstance(other, Symbol), other
+ return self.is_term == other.is_term and self.name == other.name
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash(self.name)
+
+ def __repr__(self):
+ return '%s(%r)' % (type(self).__name__, self.name)
+
+ fullrepr = property(__repr__)
+
+
+class Terminal(Symbol):
+ __serialize_fields__ = 'name', 'filter_out'
+
+ is_term = True
+
+ def __init__(self, name, filter_out=False):
+ self.name = name
+ self.filter_out = filter_out
+
+ @property
+ def fullrepr(self):
+ return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
+
+
+class NonTerminal(Symbol):
+ __serialize_fields__ = 'name',
+
+ is_term = False
+
+
+class RuleOptions(Serialize):
+ __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
+
+ def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()):
+ self.keep_all_tokens = keep_all_tokens
+ self.expand1 = expand1
+ self.priority = priority
+ self.template_source = template_source
+ self.empty_indices = empty_indices
+
+ def __repr__(self):
+ return 'RuleOptions(%r, %r, %r, %r)' % (
+ self.keep_all_tokens,
+ self.expand1,
+ self.priority,
+ self.template_source
+ )
+
+
+class Rule(Serialize):
+ """
+ origin : a symbol
+ expansion : a list of symbols
+ order : index of this expansion amongst all rules of the same name
+ """
+ __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
+
+ __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
+ __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
+
+ def __init__(self, origin, expansion, order=0, alias=None, options=None):
+ self.origin = origin
+ self.expansion = expansion
+ self.alias = alias
+ self.order = order
+ self.options = options or RuleOptions()
+ self._hash = hash((self.origin, tuple(self.expansion)))
+
+ def _deserialize(self):
+ self._hash = hash((self.origin, tuple(self.expansion)))
+
+ def __str__(self):
+ return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
+
+ def __repr__(self):
+ return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
+
+ def __hash__(self):
+ return self._hash
+
+ def __eq__(self, other):
+ if not isinstance(other, Rule):
+ return False
+ return self.origin == other.origin and self.expansion == other.expansion
+
+
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/grammars/common.lark b/.venv/lib/python3.12/site-packages/lark/grammars/common.lark
new file mode 100644
index 00000000..d2e86d17
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/grammars/common.lark
@@ -0,0 +1,59 @@
+// Basic terminals for common use
+
+
+//
+// Numbers
+//
+
+DIGIT: "0".."9"
+HEXDIGIT: "a".."f"|"A".."F"|DIGIT
+
+INT: DIGIT+
+SIGNED_INT: ["+"|"-"] INT
+DECIMAL: INT "." INT? | "." INT
+
+// float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
+_EXP: ("e"|"E") SIGNED_INT
+FLOAT: INT _EXP | DECIMAL _EXP?
+SIGNED_FLOAT: ["+"|"-"] FLOAT
+
+NUMBER: FLOAT | INT
+SIGNED_NUMBER: ["+"|"-"] NUMBER
+
+//
+// Strings
+//
+_STRING_INNER: /.*?/
+_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
+
+ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
+
+
+//
+// Names (Variables)
+//
+LCASE_LETTER: "a".."z"
+UCASE_LETTER: "A".."Z"
+
+LETTER: UCASE_LETTER | LCASE_LETTER
+WORD: LETTER+
+
+CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
+
+
+//
+// Whitespace
+//
+WS_INLINE: (" "|/\t/)+
+WS: /[ \t\f\r\n]/+
+
+CR : /\r/
+LF : /\n/
+NEWLINE: (CR? LF)+
+
+
+// Comments
+SH_COMMENT: /#[^\n]*/
+CPP_COMMENT: /\/\/[^\n]*/
+C_COMMENT: "/*" /(.|\n)*?/ "*/"
+SQL_COMMENT: /--[^\n]*/
diff --git a/.venv/lib/python3.12/site-packages/lark/grammars/lark.lark b/.venv/lib/python3.12/site-packages/lark/grammars/lark.lark
new file mode 100644
index 00000000..68588469
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/grammars/lark.lark
@@ -0,0 +1,59 @@
+start: (_item? _NL)* _item?
+
+_item: rule
+ | token
+ | statement
+
+rule: RULE rule_params priority? ":" expansions
+token: TOKEN token_params priority? ":" expansions
+
+rule_params: ["{" RULE ("," RULE)* "}"]
+token_params: ["{" TOKEN ("," TOKEN)* "}"]
+
+priority: "." NUMBER
+
+statement: "%ignore" expansions -> ignore
+ | "%import" import_path ["->" name] -> import
+ | "%import" import_path name_list -> multi_import
+ | "%override" rule -> override_rule
+ | "%declare" name+ -> declare
+
+!import_path: "."? name ("." name)*
+name_list: "(" name ("," name)* ")"
+
+?expansions: alias (_VBAR alias)*
+
+?alias: expansion ["->" RULE]
+
+?expansion: expr*
+
+?expr: atom [OP | "~" NUMBER [".." NUMBER]]
+
+?atom: "(" expansions ")"
+ | "[" expansions "]" -> maybe
+ | value
+
+?value: STRING ".." STRING -> literal_range
+ | name
+ | (REGEXP | STRING) -> literal
+ | name "{" value ("," value)* "}" -> template_usage
+
+name: RULE
+ | TOKEN
+
+_VBAR: _NL? "|"
+OP: /[+*]|[?](?![a-z])/
+RULE: /!?[_?]?[a-z][_a-z0-9]*/
+TOKEN: /_?[A-Z][_A-Z0-9]*/
+STRING: _STRING "i"?
+REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/
+_NL: /(\r?\n)+\s*/
+
+%import common.ESCAPED_STRING -> _STRING
+%import common.SIGNED_INT -> NUMBER
+%import common.WS_INLINE
+
+COMMENT: /\s*/ "//" /[^\n]/*
+
+%ignore WS_INLINE
+%ignore COMMENT
diff --git a/.venv/lib/python3.12/site-packages/lark/grammars/python.lark b/.venv/lib/python3.12/site-packages/lark/grammars/python.lark
new file mode 100644
index 00000000..785728c5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/grammars/python.lark
@@ -0,0 +1,19 @@
+// Python terminals
+
+NAME: /[a-zA-Z_]\w*/
+COMMENT: /#[^\n]*/
+
+STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
+LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
+
+DEC_NUMBER: /0|[1-9][\d_]*/i
+HEX_NUMBER.2: /0x[\da-f]*/i
+OCT_NUMBER.2: /0o[0-7]*/i
+BIN_NUMBER.2 : /0b[0-1]*/i
+FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([eE][-+]?\d+)?|\d+([eE][-+]?\d+))/
+IMAG_NUMBER.2: /\d+[jJ]/ | FLOAT_NUMBER /[jJ]/
+
+
+// Comma-separated list (with an optional trailing comma)
+cs_list{item}: item ("," item)* ","?
+_cs_list{item}: item ("," item)* ","? \ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/lark/grammars/unicode.lark b/.venv/lib/python3.12/site-packages/lark/grammars/unicode.lark
new file mode 100644
index 00000000..0ab849e3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/grammars/unicode.lark
@@ -0,0 +1,7 @@
+// TODO: LETTER, WORD, etc.
+
+//
+// Whitespace
+//
+WS_INLINE: /[ \t\xa0]/+
+WS: /[ \t\xa0\f\r\n]/+
diff --git a/.venv/lib/python3.12/site-packages/lark/indenter.py b/.venv/lib/python3.12/site-packages/lark/indenter.py
new file mode 100644
index 00000000..7e1263dd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/indenter.py
@@ -0,0 +1,67 @@
+"Provides Indentation services for languages with indentation similar to Python"
+
+from .exceptions import LarkError
+from .lark import PostLex
+from .lexer import Token
+
+###{standalone
+class DedentError(LarkError):
+ pass
+
+class Indenter(PostLex):
+ def __init__(self):
+ self.paren_level = None
+ self.indent_level = None
+ assert self.tab_len > 0
+
+ def handle_NL(self, token):
+ if self.paren_level > 0:
+ return
+
+ yield token
+
+ indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
+ indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
+
+ if indent > self.indent_level[-1]:
+ self.indent_level.append(indent)
+ yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
+ else:
+ while indent < self.indent_level[-1]:
+ self.indent_level.pop()
+ yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
+
+ if indent != self.indent_level[-1]:
+ raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
+
+ def _process(self, stream):
+ for token in stream:
+ if token.type == self.NL_type:
+ for t in self.handle_NL(token):
+ yield t
+ else:
+ yield token
+
+ if token.type in self.OPEN_PAREN_types:
+ self.paren_level += 1
+ elif token.type in self.CLOSE_PAREN_types:
+ self.paren_level -= 1
+ assert self.paren_level >= 0
+
+ while len(self.indent_level) > 1:
+ self.indent_level.pop()
+ yield Token(self.DEDENT_type, '')
+
+ assert self.indent_level == [0], self.indent_level
+
+ def process(self, stream):
+ self.paren_level = 0
+ self.indent_level = [0]
+ return self._process(stream)
+
+ # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
+ @property
+ def always_accept(self):
+ return (self.NL_type,)
+
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/lark.py b/.venv/lib/python3.12/site-packages/lark/lark.py
new file mode 100644
index 00000000..744cf4b4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/lark.py
@@ -0,0 +1,602 @@
+from __future__ import absolute_import
+
+
+from lark.exceptions import ConfigurationError, assert_config
+
+import sys, os, pickle, hashlib
+from io import open
+import tempfile
+from warnings import warn
+
+from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod
+from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files
+from .tree import Tree
+from .common import LexerConf, ParserConf
+
+from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread
+from .parse_tree_builder import ParseTreeBuilder
+from .parser_frontends import get_frontend, _get_lexer_callbacks
+from .grammar import Rule
+
+import re
+try:
+ import regex
+except ImportError:
+ regex = None
+
+
+###{standalone
+
+
+class LarkOptions(Serialize):
+ """Specifies the options for Lark
+
+ """
+ OPTIONS_DOC = """
+ **=== General Options ===**
+
+ start
+ The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
+ debug
+ Display debug information and extra warnings. Use only when debugging (default: False)
+ When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
+ transformer
+ Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
+ propagate_positions
+ Propagates (line, column, end_line, end_column) attributes into all tree branches.
+ Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
+ maybe_placeholders
+ When ``True``, the ``[]`` operator returns ``None`` when not matched.
+
+ When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
+ (default= ``False``. Recommended to set to ``True``)
+ cache
+ Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
+
+ - When ``False``, does nothing (default)
+ - When ``True``, caches to a temporary file in the local directory
+ - When given a string, caches to the path pointed by the string
+ regex
+ When True, uses the ``regex`` module instead of the stdlib ``re``.
+ g_regex_flags
+ Flags that are applied to all terminals (both regex and strings)
+ keep_all_tokens
+ Prevent the tree builder from automagically removing "punctuation" tokens (default: False)
+ tree_class
+ Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
+
+ **=== Algorithm Options ===**
+
+ parser
+ Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
+ (there is also a "cyk" option for legacy)
+ lexer
+ Decides whether or not to use a lexer stage
+
+ - "auto" (default): Choose for me based on the parser
+ - "standard": Use a standard lexer
+ - "contextual": Stronger lexer (only works with parser="lalr")
+ - "dynamic": Flexible and powerful (only with parser="earley")
+ - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
+ ambiguity
+ Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
+
+ - "resolve": The parser will automatically choose the simplest derivation
+ (it chooses consistently: greedy for tokens, non-greedy for rules)
+ - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
+ - "forest": The parser will return the root of the shared packed parse forest.
+
+ **=== Misc. / Domain Specific Options ===**
+
+ postlex
+ Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
+ priority
+ How priorities should be evaluated - auto, none, normal, invert (Default: auto)
+ lexer_callbacks
+ Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
+ use_bytes
+ Accept an input of type ``bytes`` instead of ``str`` (Python 3 only).
+ edit_terminals
+ A callback for editing the terminals before parse.
+ import_paths
+ A List of either paths or loader functions to specify from where grammars are imported
+ source_path
+ Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
+ **=== End of Options ===**
+ """
+ if __doc__:
+ __doc__ += OPTIONS_DOC
+
+
+ # Adding a new option needs to be done in multiple places:
+ # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
+ # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
+ # - In `lark-stubs/lark.pyi`:
+ # - As attribute to `LarkOptions`
+ # - As parameter to `Lark.__init__`
+ # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
+ # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
+ _defaults = {
+ 'debug': False,
+ 'keep_all_tokens': False,
+ 'tree_class': None,
+ 'cache': False,
+ 'postlex': None,
+ 'parser': 'earley',
+ 'lexer': 'auto',
+ 'transformer': None,
+ 'start': 'start',
+ 'priority': 'auto',
+ 'ambiguity': 'auto',
+ 'regex': False,
+ 'propagate_positions': False,
+ 'lexer_callbacks': {},
+ 'maybe_placeholders': False,
+ 'edit_terminals': None,
+ 'g_regex_flags': 0,
+ 'use_bytes': False,
+ 'import_paths': [],
+ 'source_path': None,
+ }
+
+ def __init__(self, options_dict):
+ o = dict(options_dict)
+
+ options = {}
+ for name, default in self._defaults.items():
+ if name in o:
+ value = o.pop(name)
+ if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
+ value = bool(value)
+ else:
+ value = default
+
+ options[name] = value
+
+ if isinstance(options['start'], STRING_TYPE):
+ options['start'] = [options['start']]
+
+ self.__dict__['options'] = options
+
+
+ assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
+
+ if self.parser == 'earley' and self.transformer:
+ raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
+ 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
+
+ if o:
+ raise ConfigurationError("Unknown options: %s" % o.keys())
+
+ def __getattr__(self, name):
+ try:
+ return self.__dict__['options'][name]
+ except KeyError as e:
+ raise AttributeError(e)
+
+ def __setattr__(self, name, value):
+ assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
+ self.options[name] = value
+
+ def serialize(self, memo):
+ return self.options
+
+ @classmethod
+ def deserialize(cls, data, memo):
+ return cls(data)
+
+
+# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
+# These option are only used outside of `load_grammar`.
+_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}
+
+_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
+_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
+
+
+class PostLex(ABC):
+ @abstractmethod
+ def process(self, stream):
+ return stream
+
+ always_accept = ()
+
+
+class Lark(Serialize):
+ """Main interface for the library.
+
+ It's mostly a thin wrapper for the many different parsers, and for the tree constructor.
+
+ Parameters:
+ grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax)
+ options: a dictionary controlling various aspects of Lark.
+
+ Example:
+ >>> Lark(r'''start: "foo" ''')
+ Lark(...)
+ """
+ def __init__(self, grammar, **options):
+ self.options = LarkOptions(options)
+
+ # Set regex or re module
+ use_regex = self.options.regex
+ if use_regex:
+ if regex:
+ re_module = regex
+ else:
+ raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
+ else:
+ re_module = re
+
+ # Some, but not all file-like objects have a 'name' attribute
+ if self.options.source_path is None:
+ try:
+ self.source_path = grammar.name
+ except AttributeError:
+ self.source_path = '<string>'
+ else:
+ self.source_path = self.options.source_path
+
+ # Drain file-like objects to get their contents
+ try:
+ read = grammar.read
+ except AttributeError:
+ pass
+ else:
+ grammar = read()
+
+ cache_fn = None
+ cache_md5 = None
+ if isinstance(grammar, STRING_TYPE):
+ self.source_grammar = grammar
+ if self.options.use_bytes:
+ if not isascii(grammar):
+ raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
+ if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
+ raise ConfigurationError("`use_bytes=True` may have issues on python2."
+ "Use `use_bytes='force'` to use it at your own risk.")
+
+ if self.options.cache:
+ if self.options.parser != 'lalr':
+ raise ConfigurationError("cache only works with parser='lalr' for now")
+
+ unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
+ options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
+ from . import __version__
+ s = grammar + options_str + __version__ + str(sys.version_info[:2])
+ cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest()
+
+ if isinstance(self.options.cache, STRING_TYPE):
+ cache_fn = self.options.cache
+ else:
+ if self.options.cache is not True:
+ raise ConfigurationError("cache argument must be bool or str")
+ # Python2.7 doesn't support * syntax in tuples
+ cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % ((cache_md5,) + sys.version_info[:2])
+
+ if FS.exists(cache_fn):
+ logger.debug('Loading grammar from cache: %s', cache_fn)
+ # Remove options that aren't relevant for loading from cache
+ for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
+ del options[name]
+ with FS.open(cache_fn, 'rb') as f:
+ old_options = self.options
+ try:
+ file_md5 = f.readline().rstrip(b'\n')
+ cached_used_files = pickle.load(f)
+ if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
+ cached_parser_data = pickle.load(f)
+ self._load(cached_parser_data, **options)
+ return
+ except Exception: # We should probably narrow done which errors we catch here.
+ logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn)
+
+ # In theory, the Lark instance might have been messed up by the call to `_load`.
+ # In practice the only relevant thing that might have been overriden should be `options`
+ self.options = old_options
+
+
+ # Parse the grammar file and compose the grammars
+ self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
+ else:
+ assert isinstance(grammar, Grammar)
+ self.grammar = grammar
+
+
+ if self.options.lexer == 'auto':
+ if self.options.parser == 'lalr':
+ self.options.lexer = 'contextual'
+ elif self.options.parser == 'earley':
+ if self.options.postlex is not None:
+ logger.info("postlex can't be used with the dynamic lexer, so we use standard instead. "
+ "Consider using lalr with contextual instead of earley")
+ self.options.lexer = 'standard'
+ else:
+ self.options.lexer = 'dynamic'
+ elif self.options.parser == 'cyk':
+ self.options.lexer = 'standard'
+ else:
+ assert False, self.options.parser
+ lexer = self.options.lexer
+ if isinstance(lexer, type):
+ assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance
+ else:
+ assert_config(lexer, ('standard', 'contextual', 'dynamic', 'dynamic_complete'))
+ if self.options.postlex is not None and 'dynamic' in lexer:
+ raise ConfigurationError("Can't use postlex with a dynamic lexer. Use standard or contextual instead")
+
+ if self.options.ambiguity == 'auto':
+ if self.options.parser == 'earley':
+ self.options.ambiguity = 'resolve'
+ else:
+ assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
+
+ if self.options.priority == 'auto':
+ self.options.priority = 'normal'
+
+ if self.options.priority not in _VALID_PRIORITY_OPTIONS:
+ raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
+ assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
+ if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
+ raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
+
+ if self.options.parser is None:
+ terminals_to_keep = '*'
+ elif self.options.postlex is not None:
+ terminals_to_keep = set(self.options.postlex.always_accept)
+ else:
+ terminals_to_keep = set()
+
+ # Compile the EBNF grammar into BNF
+ self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
+
+ if self.options.edit_terminals:
+ for t in self.terminals:
+ self.options.edit_terminals(t)
+
+ self._terminals_dict = {t.name: t for t in self.terminals}
+
+ # If the user asked to invert the priorities, negate them all here.
+ # This replaces the old 'resolve__antiscore_sum' option.
+ if self.options.priority == 'invert':
+ for rule in self.rules:
+ if rule.options.priority is not None:
+ rule.options.priority = -rule.options.priority
+ # Else, if the user asked to disable priorities, strip them from the
+ # rules. This allows the Earley parsers to skip an extra forest walk
+ # for improved performance, if you don't need them (or didn't specify any).
+ elif self.options.priority is None:
+ for rule in self.rules:
+ if rule.options.priority is not None:
+ rule.options.priority = None
+
+ # TODO Deprecate lexer_callbacks?
+ self.lexer_conf = LexerConf(
+ self.terminals, re_module, self.ignore_tokens, self.options.postlex,
+ self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
+ )
+
+ if self.options.parser:
+ self.parser = self._build_parser()
+ elif lexer:
+ self.lexer = self._build_lexer()
+
+ if cache_fn:
+ logger.debug('Saving grammar to cache: %s', cache_fn)
+ with FS.open(cache_fn, 'wb') as f:
+ f.write(cache_md5.encode('utf8') + b'\n')
+ pickle.dump(used_files, f)
+ self.save(f)
+
+ if __doc__:
+ __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
+
+ __serialize_fields__ = 'parser', 'rules', 'options'
+
+ def _build_lexer(self, dont_ignore=False):
+ lexer_conf = self.lexer_conf
+ if dont_ignore:
+ from copy import copy
+ lexer_conf = copy(lexer_conf)
+ lexer_conf.ignore = ()
+ return TraditionalLexer(lexer_conf)
+
+ def _prepare_callbacks(self):
+ self._callbacks = {}
+ # we don't need these callbacks if we aren't building a tree
+ if self.options.ambiguity != 'forest':
+ self._parse_tree_builder = ParseTreeBuilder(
+ self.rules,
+ self.options.tree_class or Tree,
+ self.options.propagate_positions,
+ self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
+ self.options.maybe_placeholders
+ )
+ self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
+ self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
+
+ def _build_parser(self):
+ self._prepare_callbacks()
+ parser_class = get_frontend(self.options.parser, self.options.lexer)
+ parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
+ return parser_class(self.lexer_conf, parser_conf, options=self.options)
+
+ def save(self, f):
+ """Saves the instance into the given file object
+
+ Useful for caching and multiprocessing.
+ """
+ data, m = self.memo_serialize([TerminalDef, Rule])
+ pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+ @classmethod
+ def load(cls, f):
+ """Loads an instance from the given file object
+
+ Useful for caching and multiprocessing.
+ """
+ inst = cls.__new__(cls)
+ return inst._load(f)
+
+ def _deserialize_lexer_conf(self, data, memo, options):
+ lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
+ lexer_conf.callbacks = options.lexer_callbacks or {}
+ lexer_conf.re_module = regex if options.regex else re
+ lexer_conf.use_bytes = options.use_bytes
+ lexer_conf.g_regex_flags = options.g_regex_flags
+ lexer_conf.skip_validation = True
+ lexer_conf.postlex = options.postlex
+ return lexer_conf
+
+ def _load(self, f, **kwargs):
+ if isinstance(f, dict):
+ d = f
+ else:
+ d = pickle.load(f)
+ memo_json = d['memo']
+ data = d['data']
+
+ assert memo_json
+ memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
+ options = dict(data['options'])
+ if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
+ raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
+ .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
+ options.update(kwargs)
+ self.options = LarkOptions.deserialize(options, memo)
+ self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
+ self.source_path = '<deserialized>'
+ parser_class = get_frontend(self.options.parser, self.options.lexer)
+ self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
+ self.terminals = self.lexer_conf.terminals
+ self._prepare_callbacks()
+ self._terminals_dict = {t.name: t for t in self.terminals}
+ self.parser = parser_class.deserialize(
+ data['parser'],
+ memo,
+ self.lexer_conf,
+ self._callbacks,
+ self.options, # Not all, but multiple attributes are used
+ )
+ return self
+
+ @classmethod
+ def _load_from_dict(cls, data, memo, **kwargs):
+ inst = cls.__new__(cls)
+ return inst._load({'data': data, 'memo': memo}, **kwargs)
+
+ @classmethod
+ def open(cls, grammar_filename, rel_to=None, **options):
+ """Create an instance of Lark with the grammar given by its filename
+
+ If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
+
+ Example:
+
+ >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
+ Lark(...)
+
+ """
+ if rel_to:
+ basepath = os.path.dirname(rel_to)
+ grammar_filename = os.path.join(basepath, grammar_filename)
+ with open(grammar_filename, encoding='utf8') as f:
+ return cls(f, **options)
+
+ @classmethod
+ def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
+ """Create an instance of Lark with the grammar loaded from within the package `package`.
+ This allows grammar loading from zipapps.
+
+ Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
+
+ Example:
+
+ Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
+ """
+ package_loader = FromPackageLoader(package, search_paths)
+ full_path, text = package_loader(None, grammar_path)
+ options.setdefault('source_path', full_path)
+ options.setdefault('import_paths', [])
+ options['import_paths'].append(package_loader)
+ return cls(text, **options)
+
+ def __repr__(self):
+ return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
+
+
+ def lex(self, text, dont_ignore=False):
+ """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'
+
+ When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
+
+ :raises UnexpectedCharacters: In case the lexer cannot find a suitable match.
+ """
+ if not hasattr(self, 'lexer') or dont_ignore:
+ lexer = self._build_lexer(dont_ignore)
+ else:
+ lexer = self.lexer
+ lexer_thread = LexerThread(lexer, text)
+ stream = lexer_thread.lex(None)
+ if self.options.postlex:
+ return self.options.postlex.process(stream)
+ return stream
+
+ def get_terminal(self, name):
+ """Get information about a terminal"""
+ return self._terminals_dict[name]
+
+ def parse_interactive(self, text=None, start=None):
+ """Start an interactive parsing session.
+
+ Parameters:
+ text (str, optional): Text to be parsed. Required for ``resume_parse()``.
+ start (str, optional): Start symbol
+
+ Returns:
+ A new InteractiveParser instance.
+
+ See Also: ``Lark.parse()``
+ """
+ return self.parser.parse_interactive(text, start=start)
+
+ def parse(self, text, start=None, on_error=None):
+ """Parse the given text, according to the options provided.
+
+ Parameters:
+ text (str): Text to be parsed.
+ start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
+ on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing.
+ LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.
+
+ Returns:
+ If a transformer is supplied to ``__init__``, returns whatever is the
+ result of the transformation. Otherwise, returns a Tree instance.
+
+ :raises UnexpectedInput: On a parse error, one of these sub-exceptions will rise:
+ ``UnexpectedCharacters``, ``UnexpectedToken``, or ``UnexpectedEOF``.
+ For convenience, these sub-exceptions also inherit from ``ParserError`` and ``LexerError``.
+
+ """
+ return self.parser.parse(text, start=start, on_error=on_error)
+
+ @property
+ def source(self):
+ warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning)
+ return self.source_path
+
+ @source.setter
+ def source(self, value):
+ self.source_path = value
+
+ @property
+ def grammar_source(self):
+ warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning)
+ return self.source_grammar
+
+ @grammar_source.setter
+ def grammar_source(self, value):
+ self.source_grammar = value
+
+
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/lexer.py b/.venv/lib/python3.12/site-packages/lark/lexer.py
new file mode 100644
index 00000000..a82cc180
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/lexer.py
@@ -0,0 +1,506 @@
+# Lexer Implementation
+
+import re
+
+from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress
+from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
+
+###{standalone
+from warnings import warn
+from copy import copy
+
+
+class Pattern(Serialize):
+ raw = None
+ type = None
+
+ def __init__(self, value, flags=(), raw=None):
+ self.value = value
+ self.flags = frozenset(flags)
+ self.raw = raw
+
+ def __repr__(self):
+ return repr(self.to_regexp())
+
+ # Pattern Hashing assumes all subclasses have a different priority!
+ def __hash__(self):
+ return hash((type(self), self.value, self.flags))
+
+ def __eq__(self, other):
+ return type(self) == type(other) and self.value == other.value and self.flags == other.flags
+
+ def to_regexp(self):
+ raise NotImplementedError()
+
+ def min_width(self):
+ raise NotImplementedError()
+
+ def max_width(self):
+ raise NotImplementedError()
+
+ if Py36:
+ # Python 3.6 changed syntax for flags in regular expression
+ def _get_flags(self, value):
+ for f in self.flags:
+ value = ('(?%s:%s)' % (f, value))
+ return value
+
+ else:
+ def _get_flags(self, value):
+ for f in self.flags:
+ value = ('(?%s)' % f) + value
+ return value
+
+
+
+class PatternStr(Pattern):
+ __serialize_fields__ = 'value', 'flags'
+
+ type = "str"
+
+ def to_regexp(self):
+ return self._get_flags(re.escape(self.value))
+
+ @property
+ def min_width(self):
+ return len(self.value)
+ max_width = min_width
+
+
+class PatternRE(Pattern):
+ __serialize_fields__ = 'value', 'flags', '_width'
+
+ type = "re"
+
+ def to_regexp(self):
+ return self._get_flags(self.value)
+
+ _width = None
+ def _get_width(self):
+ if self._width is None:
+ self._width = get_regexp_width(self.to_regexp())
+ return self._width
+
+ @property
+ def min_width(self):
+ return self._get_width()[0]
+
+ @property
+ def max_width(self):
+ return self._get_width()[1]
+
+
+class TerminalDef(Serialize):
+ __serialize_fields__ = 'name', 'pattern', 'priority'
+ __serialize_namespace__ = PatternStr, PatternRE
+
+ def __init__(self, name, pattern, priority=1):
+ assert isinstance(pattern, Pattern), pattern
+ self.name = name
+ self.pattern = pattern
+ self.priority = priority
+
+ def __repr__(self):
+ return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
+
+ def user_repr(self):
+ if self.name.startswith('__'): # We represent a generated terminal
+ return self.pattern.raw or self.name
+ else:
+ return self.name
+
+
+class Token(Str):
+ """A string with meta-information, that is produced by the lexer.
+
+ When parsing text, the resulting chunks of the input that haven't been discarded,
+ will end up in the tree as Token instances. The Token class inherits from Python's ``str``,
+ so normal string comparisons and operations will work as expected.
+
+ Attributes:
+ type: Name of the token (as specified in grammar)
+ value: Value of the token (redundant, as ``token.value == token`` will always be true)
+ start_pos: The index of the token in the text
+ line: The line of the token in the text (starting with 1)
+ column: The column of the token in the text (starting with 1)
+ end_line: The line where the token ends
+ end_column: The next column after the end of the token. For example,
+ if the token is a single character with a column value of 4,
+ end_column will be 5.
+ end_pos: the index where the token ends (basically ``start_pos + len(token)``)
+ """
+ __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
+
+ def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
+ try:
+ inst = super(Token, cls).__new__(cls, value)
+ except UnicodeDecodeError:
+ value = value.decode('latin1')
+ inst = super(Token, cls).__new__(cls, value)
+
+ inst.type = type_
+ inst.start_pos = start_pos if start_pos is not None else pos_in_stream
+ inst.value = value
+ inst.line = line
+ inst.column = column
+ inst.end_line = end_line
+ inst.end_column = end_column
+ inst.end_pos = end_pos
+ return inst
+
+ @property
+ def pos_in_stream(self):
+ warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning, 2)
+ return self.start_pos
+
+ def update(self, type_=None, value=None):
+ return Token.new_borrow_pos(
+ type_ if type_ is not None else self.type,
+ value if value is not None else self.value,
+ self
+ )
+
+ @classmethod
+ def new_borrow_pos(cls, type_, value, borrow_t):
+ return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
+
+ def __reduce__(self):
+ return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
+
+ def __repr__(self):
+ return 'Token(%r, %r)' % (self.type, self.value)
+
+ def __deepcopy__(self, memo):
+ return Token(self.type, self.value, self.start_pos, self.line, self.column)
+
+ def __eq__(self, other):
+ if isinstance(other, Token) and self.type != other.type:
+ return False
+
+ return Str.__eq__(self, other)
+
+ __hash__ = Str.__hash__
+
+
+class LineCounter:
+ __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
+
+ def __init__(self, newline_char):
+ self.newline_char = newline_char
+ self.char_pos = 0
+ self.line = 1
+ self.column = 1
+ self.line_start_pos = 0
+
+ def __eq__(self, other):
+ if not isinstance(other, LineCounter):
+ return NotImplemented
+
+ return self.char_pos == other.char_pos and self.newline_char == other.newline_char
+
+ def feed(self, token, test_newline=True):
+ """Consume a token and calculate the new line & column.
+
+ As an optional optimization, set test_newline=False if token doesn't contain a newline.
+ """
+ if test_newline:
+ newlines = token.count(self.newline_char)
+ if newlines:
+ self.line += newlines
+ self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
+
+ self.char_pos += len(token)
+ self.column = self.char_pos - self.line_start_pos + 1
+
+
+class UnlessCallback:
+ def __init__(self, scanner):
+ self.scanner = scanner
+
+ def __call__(self, t):
+ res = self.scanner.match(t.value, 0)
+ if res:
+ _value, t.type = res
+ return t
+
+
+class CallChain:
+ def __init__(self, callback1, callback2, cond):
+ self.callback1 = callback1
+ self.callback2 = callback2
+ self.cond = cond
+
+ def __call__(self, t):
+ t2 = self.callback1(t)
+ return self.callback2(t) if self.cond(t2) else t2
+
+
+def _get_match(re_, regexp, s, flags):
+ m = re_.match(regexp, s, flags)
+ if m:
+ return m.group(0)
+
+def _create_unless(terminals, g_regex_flags, re_, use_bytes):
+ tokens_by_type = classify(terminals, lambda t: type(t.pattern))
+ assert len(tokens_by_type) <= 2, tokens_by_type.keys()
+ embedded_strs = set()
+ callback = {}
+ for retok in tokens_by_type.get(PatternRE, []):
+ unless = []
+ for strtok in tokens_by_type.get(PatternStr, []):
+ if strtok.priority > retok.priority:
+ continue
+ s = strtok.pattern.value
+ if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
+ unless.append(strtok)
+ if strtok.pattern.flags <= retok.pattern.flags:
+ embedded_strs.add(strtok)
+ if unless:
+ callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
+
+ new_terminals = [t for t in terminals if t not in embedded_strs]
+ return new_terminals, callback
+
+
+
+class Scanner:
+ def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
+ self.terminals = terminals
+ self.g_regex_flags = g_regex_flags
+ self.re_ = re_
+ self.use_bytes = use_bytes
+ self.match_whole = match_whole
+
+ self.allowed_types = {t.name for t in self.terminals}
+
+ self._mres = self._build_mres(terminals, len(terminals))
+
+ def _build_mres(self, terminals, max_size):
+ # Python sets an unreasonable group limit (currently 100) in its re module
+ # Worse, the only way to know we reached it is by catching an AssertionError!
+ # This function recursively tries less and less groups until it's successful.
+ postfix = '$' if self.match_whole else ''
+ mres = []
+ while terminals:
+ pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
+ if self.use_bytes:
+ pattern = pattern.encode('latin-1')
+ try:
+ mre = self.re_.compile(pattern, self.g_regex_flags)
+ except AssertionError: # Yes, this is what Python provides us.. :/
+ return self._build_mres(terminals, max_size//2)
+
+ mres.append((mre, {i: n for n, i in mre.groupindex.items()}))
+ terminals = terminals[max_size:]
+ return mres
+
+ def match(self, text, pos):
+ for mre, type_from_index in self._mres:
+ m = mre.match(text, pos)
+ if m:
+ return m.group(0), type_from_index[m.lastindex]
+
+
+def _regexp_has_newline(r):
+ r"""Expressions that may indicate newlines in a regexp:
+ - newlines (\n)
+ - escaped newline (\\n)
+ - anything but ([^...])
+ - any-char (.) when the flag (?s) exists
+ - spaces (\s)
+ """
+ return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
+
+
+class Lexer(object):
+ """Lexer interface
+
+ Method Signatures:
+ lex(self, text) -> Iterator[Token]
+ """
+ lex = NotImplemented
+
+ def make_lexer_state(self, text):
+ line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n')
+ return LexerState(text, line_ctr)
+
+
+class TraditionalLexer(Lexer):
+
+ def __init__(self, conf):
+ terminals = list(conf.terminals)
+ assert all(isinstance(t, TerminalDef) for t in terminals), terminals
+
+ self.re = conf.re_module
+
+ if not conf.skip_validation:
+ # Sanitization
+ for t in terminals:
+ try:
+ self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags)
+ except self.re.error:
+ raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
+
+ if t.pattern.min_width == 0:
+ raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
+
+ if not (set(conf.ignore) <= {t.name for t in terminals}):
+ raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
+
+ # Init
+ self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
+ self.ignore_types = frozenset(conf.ignore)
+
+ terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
+ self.terminals = terminals
+ self.user_callbacks = conf.callbacks
+ self.g_regex_flags = conf.g_regex_flags
+ self.use_bytes = conf.use_bytes
+ self.terminals_by_name = conf.terminals_by_name
+
+ self._scanner = None
+
+ def _build_scanner(self):
+ terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
+ assert all(self.callback.values())
+
+ for type_, f in self.user_callbacks.items():
+ if type_ in self.callback:
+ # Already a callback there, probably UnlessCallback
+ self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
+ else:
+ self.callback[type_] = f
+
+ self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
+
+ @property
+ def scanner(self):
+ if self._scanner is None:
+ self._build_scanner()
+ return self._scanner
+
+ def match(self, text, pos):
+ return self.scanner.match(text, pos)
+
+ def lex(self, state, parser_state):
+ with suppress(EOFError):
+ while True:
+ yield self.next_token(state, parser_state)
+
+ def next_token(self, lex_state, parser_state=None):
+ line_ctr = lex_state.line_ctr
+ while line_ctr.char_pos < len(lex_state.text):
+ res = self.match(lex_state.text, line_ctr.char_pos)
+ if not res:
+ allowed = self.scanner.allowed_types - self.ignore_types
+ if not allowed:
+ allowed = {"<END-OF-FILE>"}
+ raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
+ allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
+ state=parser_state, terminals_by_name=self.terminals_by_name)
+
+ value, type_ = res
+
+ if type_ not in self.ignore_types:
+ t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+ line_ctr.feed(value, type_ in self.newline_types)
+ t.end_line = line_ctr.line
+ t.end_column = line_ctr.column
+ t.end_pos = line_ctr.char_pos
+ if t.type in self.callback:
+ t = self.callback[t.type](t)
+ if not isinstance(t, Token):
+ raise LexError("Callbacks must return a token (returned %r)" % t)
+ lex_state.last_token = t
+ return t
+ else:
+ if type_ in self.callback:
+ t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+ self.callback[type_](t2)
+ line_ctr.feed(value, type_ in self.newline_types)
+
+ # EOF
+ raise EOFError(self)
+
+
+class LexerState(object):
+ __slots__ = 'text', 'line_ctr', 'last_token'
+
+ def __init__(self, text, line_ctr, last_token=None):
+ self.text = text
+ self.line_ctr = line_ctr
+ self.last_token = last_token
+
+ def __eq__(self, other):
+ if not isinstance(other, LexerState):
+ return NotImplemented
+
+ return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
+
+ def __copy__(self):
+ return type(self)(self.text, copy(self.line_ctr), self.last_token)
+
+
+class ContextualLexer(Lexer):
+
+ def __init__(self, conf, states, always_accept=()):
+ terminals = list(conf.terminals)
+ terminals_by_name = conf.terminals_by_name
+
+ trad_conf = copy(conf)
+ trad_conf.terminals = terminals
+
+ lexer_by_tokens = {}
+ self.lexers = {}
+ for state, accepts in states.items():
+ key = frozenset(accepts)
+ try:
+ lexer = lexer_by_tokens[key]
+ except KeyError:
+ accepts = set(accepts) | set(conf.ignore) | set(always_accept)
+ lexer_conf = copy(trad_conf)
+ lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
+ lexer = TraditionalLexer(lexer_conf)
+ lexer_by_tokens[key] = lexer
+
+ self.lexers[state] = lexer
+
+ assert trad_conf.terminals is terminals
+ self.root_lexer = TraditionalLexer(trad_conf)
+
+ def make_lexer_state(self, text):
+ return self.root_lexer.make_lexer_state(text)
+
+ def lex(self, lexer_state, parser_state):
+ try:
+ while True:
+ lexer = self.lexers[parser_state.position]
+ yield lexer.next_token(lexer_state, parser_state)
+ except EOFError:
+ pass
+ except UnexpectedCharacters as e:
+ # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
+ # This tests the input against the global context, to provide a nicer error.
+ try:
+ last_token = lexer_state.last_token # Save last_token. Calling root_lexer.next_token will change this to the wrong token
+ token = self.root_lexer.next_token(lexer_state, parser_state)
+ raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
+ except UnexpectedCharacters:
+ raise e # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set.
+
+class LexerThread(object):
+ """A thread that ties a lexer instance and a lexer state, to be used by the parser"""
+
+ def __init__(self, lexer, text):
+ self.lexer = lexer
+ self.state = lexer.make_lexer_state(text)
+
+ def lex(self, parser_state):
+ return self.lexer.lex(self.state, parser_state)
+
+ def __copy__(self):
+ copied = object.__new__(LexerThread)
+ copied.lexer = self.lexer
+ copied.state = copy(self.state)
+ return copied
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/load_grammar.py b/.venv/lib/python3.12/site-packages/lark/load_grammar.py
new file mode 100644
index 00000000..1ae832f6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/load_grammar.py
@@ -0,0 +1,1354 @@
+"""Parses and creates Grammar objects"""
+import hashlib
+import os.path
+import sys
+from collections import namedtuple
+from copy import copy, deepcopy
+from io import open
+import pkgutil
+from ast import literal_eval
+from numbers import Integral
+
+from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors
+from .lexer import Token, TerminalDef, PatternStr, PatternRE
+
+from .parse_tree_builder import ParseTreeBuilder
+from .parser_frontends import ParsingFrontend
+from .common import LexerConf, ParserConf
+from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
+from .utils import classify, suppress, dedup_list, Str
+from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError
+
+from .tree import Tree, SlottedTree as ST
+from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
+inline_args = v_args(inline=True)
+
+__path__ = os.path.dirname(__file__)
+IMPORT_PATHS = ['grammars']
+
+EXT = '.lark'
+
+_RE_FLAGS = 'imslux'
+
+_EMPTY = Symbol('__empty__')
+
+_TERMINAL_NAMES = {
+ '.' : 'DOT',
+ ',' : 'COMMA',
+ ':' : 'COLON',
+ ';' : 'SEMICOLON',
+ '+' : 'PLUS',
+ '-' : 'MINUS',
+ '*' : 'STAR',
+ '/' : 'SLASH',
+ '\\' : 'BACKSLASH',
+ '|' : 'VBAR',
+ '?' : 'QMARK',
+ '!' : 'BANG',
+ '@' : 'AT',
+ '#' : 'HASH',
+ '$' : 'DOLLAR',
+ '%' : 'PERCENT',
+ '^' : 'CIRCUMFLEX',
+ '&' : 'AMPERSAND',
+ '_' : 'UNDERSCORE',
+ '<' : 'LESSTHAN',
+ '>' : 'MORETHAN',
+ '=' : 'EQUAL',
+ '"' : 'DBLQUOTE',
+ '\'' : 'QUOTE',
+ '`' : 'BACKQUOTE',
+ '~' : 'TILDE',
+ '(' : 'LPAR',
+ ')' : 'RPAR',
+ '{' : 'LBRACE',
+ '}' : 'RBRACE',
+ '[' : 'LSQB',
+ ']' : 'RSQB',
+ '\n' : 'NEWLINE',
+ '\r\n' : 'CRLF',
+ '\t' : 'TAB',
+ ' ' : 'SPACE',
+}
+
+# Grammar Parser
+TERMINALS = {
+ '_LPAR': r'\(',
+ '_RPAR': r'\)',
+ '_LBRA': r'\[',
+ '_RBRA': r'\]',
+ '_LBRACE': r'\{',
+ '_RBRACE': r'\}',
+ 'OP': '[+*]|[?](?![a-z])',
+ '_COLON': ':',
+ '_COMMA': ',',
+ '_OR': r'\|',
+ '_DOT': r'\.(?!\.)',
+ '_DOTDOT': r'\.\.',
+ 'TILDE': '~',
+ 'RULE': '!?[_?]?[a-z][_a-z0-9]*',
+ 'TERMINAL': '_?[A-Z][_A-Z0-9]*',
+ 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
+ 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
+ '_NL': r'(\r?\n)+\s*',
+ '_NL_OR': r'(\r?\n)+\s*\|',
+ 'WS': r'[ \t]+',
+ 'COMMENT': r'\s*//[^\n]*',
+ 'BACKSLASH': r'\\[ ]*\n',
+ '_TO': '->',
+ '_IGNORE': r'%ignore',
+ '_OVERRIDE': r'%override',
+ '_DECLARE': r'%declare',
+ '_EXTEND': r'%extend',
+ '_IMPORT': r'%import',
+ 'NUMBER': r'[+-]?\d+',
+}
+
+RULES = {
+ 'start': ['_list'],
+ '_list': ['_item', '_list _item'],
+ '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],
+
+ 'rule': ['RULE template_params _COLON expansions _NL',
+ 'RULE template_params _DOT NUMBER _COLON expansions _NL'],
+ 'template_params': ['_LBRACE _template_params _RBRACE',
+ ''],
+ '_template_params': ['RULE',
+ '_template_params _COMMA RULE'],
+ 'expansions': ['_expansions'],
+ '_expansions': ['alias',
+ '_expansions _OR alias',
+ '_expansions _NL_OR alias'],
+
+ '?alias': ['expansion _TO RULE', 'expansion'],
+ 'expansion': ['_expansion'],
+
+ '_expansion': ['', '_expansion expr'],
+
+ '?expr': ['atom',
+ 'atom OP',
+ 'atom TILDE NUMBER',
+ 'atom TILDE NUMBER _DOTDOT NUMBER',
+ ],
+
+ '?atom': ['_LPAR expansions _RPAR',
+ 'maybe',
+ 'value'],
+
+ 'value': ['terminal',
+ 'nonterminal',
+ 'literal',
+ 'range',
+ 'template_usage'],
+
+ 'terminal': ['TERMINAL'],
+ 'nonterminal': ['RULE'],
+
+ '?name': ['RULE', 'TERMINAL'],
+
+ 'maybe': ['_LBRA expansions _RBRA'],
+ 'range': ['STRING _DOTDOT STRING'],
+
+ 'template_usage': ['RULE _LBRACE _template_args _RBRACE'],
+ '_template_args': ['value',
+ '_template_args _COMMA value'],
+
+ 'term': ['TERMINAL _COLON expansions _NL',
+ 'TERMINAL _DOT NUMBER _COLON expansions _NL'],
+ 'override': ['_OVERRIDE rule',
+ '_OVERRIDE term'],
+ 'extend': ['_EXTEND rule',
+ '_EXTEND term'],
+ 'ignore': ['_IGNORE expansions _NL'],
+ 'declare': ['_DECLARE _declare_args _NL'],
+ 'import': ['_IMPORT _import_path _NL',
+ '_IMPORT _import_path _LPAR name_list _RPAR _NL',
+ '_IMPORT _import_path _TO name _NL'],
+
+ '_import_path': ['import_lib', 'import_rel'],
+ 'import_lib': ['_import_args'],
+ 'import_rel': ['_DOT _import_args'],
+ '_import_args': ['name', '_import_args _DOT name'],
+
+ 'name_list': ['_name_list'],
+ '_name_list': ['name', '_name_list _COMMA name'],
+
+ '_declare_args': ['name', '_declare_args name'],
+ 'literal': ['REGEXP', 'STRING'],
+}
+
+
+# Value 5 keeps the number of states in the lalr parser somewhat minimal
+# It isn't optimal, but close to it. See PR #949
+SMALL_FACTOR_THRESHOLD = 5
+# The Threshold whether repeat via ~ are split up into different rules
+# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,
+# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.
+# (See PR #949)
+REPEAT_BREAK_THRESHOLD = 50
+
+
+@inline_args
+class EBNF_to_BNF(Transformer_InPlace):
+ def __init__(self):
+ self.new_rules = []
+ self.rules_cache = {}
+ self.prefix = 'anon'
+ self.i = 0
+ self.rule_options = None
+
+ def _name_rule(self, inner):
+ new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)
+ self.i += 1
+ return new_name
+
+ def _add_rule(self, key, name, expansions):
+ t = NonTerminal(name)
+ self.new_rules.append((name, expansions, self.rule_options))
+ self.rules_cache[key] = t
+ return t
+
+ def _add_recurse_rule(self, type_, expr):
+ try:
+ return self.rules_cache[expr]
+ except KeyError:
+ new_name = self._name_rule(type_)
+ t = NonTerminal(new_name)
+ tree = ST('expansions', [
+ ST('expansion', [expr]),
+ ST('expansion', [t, expr])
+ ])
+ return self._add_rule(expr, new_name, tree)
+
+ def _add_repeat_rule(self, a, b, target, atom):
+ """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.
+
+ When called recursively (into target), it repeats atom for x(n) times, where:
+ x(0) = 1
+ x(n) = a(n) * x(n-1) + b
+
+ Example rule when a=3, b=4:
+
+ new_rule: target target target atom atom atom atom
+
+ """
+ key = (a, b, target, atom)
+ try:
+ return self.rules_cache[key]
+ except KeyError:
+ new_name = self._name_rule('repeat_a%d_b%d' % (a, b))
+ tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
+ return self._add_rule(key, new_name, tree)
+
+ def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):
+ """Creates a rule that matches atom 0 to (a*n+b)-1 times.
+
+ When target matches n times atom, and target_opt 0 to n-1 times target_opt,
+
+ First we generate target * i followed by target_opt, for i from 0 to a-1
+ These match 0 to n*a - 1 times atom
+
+ Then we generate target * a followed by atom * i, for i from 0 to b-1
+ These match n*a to n*a + b-1 times atom
+
+ The created rule will not have any shift/reduce conflicts so that it can be used with lalr
+
+ Example rule when a=3, b=4:
+
+ new_rule: target_opt
+ | target target_opt
+ | target target target_opt
+
+ | target target target
+ | target target target atom
+ | target target target atom atom
+ | target target target atom atom atom
+
+ """
+ key = (a, b, target, atom, "opt")
+ try:
+ return self.rules_cache[key]
+ except KeyError:
+ new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))
+ tree = ST('expansions', [
+ ST('expansion', [target]*i + [target_opt]) for i in range(a)
+ ] + [
+ ST('expansion', [target]*a + [atom]*i) for i in range(b)
+ ])
+ return self._add_rule(key, new_name, tree)
+
+ def _generate_repeats(self, rule, mn, mx):
+ """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.
+ """
+ # For a small number of repeats, we can take the naive approach
+ if mx < REPEAT_BREAK_THRESHOLD:
+ return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])
+
+ # For large repeat values, we break the repetition into sub-rules.
+ # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.
+ # We then use small_factors to split up mn and diff up into values [(a, b), ...]
+ # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt
+ # to generate a complete rule/expression that matches the corresponding number of repeats
+ mn_target = rule
+ for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):
+ mn_target = self._add_repeat_rule(a, b, mn_target, rule)
+ if mx == mn:
+ return mn_target
+
+ diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less
+ diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)
+ diff_target = rule # Match rule 1 times
+ diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times)
+ for a, b in diff_factors[:-1]:
+ diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
+ diff_target = self._add_repeat_rule(a, b, diff_target, rule)
+
+ a, b = diff_factors[-1]
+ diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
+
+ return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])
+
+ def expr(self, rule, op, *args):
+ if op.value == '?':
+ empty = ST('expansion', [])
+ return ST('expansions', [rule, empty])
+ elif op.value == '+':
+ # a : b c+ d
+ # -->
+ # a : b _c d
+ # _c : _c c | c;
+ return self._add_recurse_rule('plus', rule)
+ elif op.value == '*':
+ # a : b c* d
+ # -->
+ # a : b _c? d
+ # _c : _c c | c;
+ new_name = self._add_recurse_rule('star', rule)
+ return ST('expansions', [new_name, ST('expansion', [])])
+ elif op.value == '~':
+ if len(args) == 1:
+ mn = mx = int(args[0])
+ else:
+ mn, mx = map(int, args)
+ if mx < mn or mn < 0:
+ raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
+
+ return self._generate_repeats(rule, mn, mx)
+
+ assert False, op
+
+ def maybe(self, rule):
+ keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens
+
+ def will_not_get_removed(sym):
+ if isinstance(sym, NonTerminal):
+ return not sym.name.startswith('_')
+ if isinstance(sym, Terminal):
+ return keep_all_tokens or not sym.filter_out
+ assert False
+
+ if any(rule.scan_values(will_not_get_removed)):
+ empty = _EMPTY
+ else:
+ empty = ST('expansion', [])
+
+ return ST('expansions', [rule, empty])
+
+
+class SimplifyRule_Visitor(Visitor):
+
+ @staticmethod
+ def _flatten(tree):
+ while tree.expand_kids_by_data(tree.data):
+ pass
+
+ def expansion(self, tree):
+ # rules_list unpacking
+ # a : b (c|d) e
+ # -->
+ # a : b c e | b d e
+ #
+ # In AST terms:
+ # expansion(b, expansions(c, d), e)
+ # -->
+ # expansions( expansion(b, c, e), expansion(b, d, e) )
+
+ self._flatten(tree)
+
+ for i, child in enumerate(tree.children):
+ if isinstance(child, Tree) and child.data == 'expansions':
+ tree.data = 'expansions'
+ tree.children = [self.visit(ST('expansion', [option if i == j else other
+ for j, other in enumerate(tree.children)]))
+ for option in dedup_list(child.children)]
+ self._flatten(tree)
+ break
+
+ def alias(self, tree):
+ rule, alias_name = tree.children
+ if rule.data == 'expansions':
+ aliases = []
+ for child in tree.children[0].children:
+ aliases.append(ST('alias', [child, alias_name]))
+ tree.data = 'expansions'
+ tree.children = aliases
+
+ def expansions(self, tree):
+ self._flatten(tree)
+ # Ensure all children are unique
+ if len(set(tree.children)) != len(tree.children):
+ tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use
+
+
+class RuleTreeToText(Transformer):
+ def expansions(self, x):
+ return x
+
+ def expansion(self, symbols):
+ return symbols, None
+
+ def alias(self, x):
+ (expansion, _alias), alias = x
+ assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed
+ return expansion, alias.value
+
+
+class PrepareAnonTerminals(Transformer_InPlace):
+ """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""
+
+ def __init__(self, terminals):
+ self.terminals = terminals
+ self.term_set = {td.name for td in self.terminals}
+ self.term_reverse = {td.pattern: td for td in terminals}
+ self.i = 0
+ self.rule_options = None
+
+ @inline_args
+ def pattern(self, p):
+ value = p.value
+ if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags:
+ raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)
+
+ term_name = None
+
+ if isinstance(p, PatternStr):
+ try:
+ # If already defined, use the user-defined terminal name
+ term_name = self.term_reverse[p].name
+ except KeyError:
+ # Try to assign an indicative anon-terminal name
+ try:
+ term_name = _TERMINAL_NAMES[value]
+ except KeyError:
+ if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:
+ term_name = value.upper()
+
+ if term_name in self.term_set:
+ term_name = None
+
+ elif isinstance(p, PatternRE):
+ if p in self.term_reverse: # Kind of a weird placement.name
+ term_name = self.term_reverse[p].name
+ else:
+ assert False, p
+
+ if term_name is None:
+ term_name = '__ANON_%d' % self.i
+ self.i += 1
+
+ if term_name not in self.term_set:
+ assert p not in self.term_reverse
+ self.term_set.add(term_name)
+ termdef = TerminalDef(term_name, p)
+ self.term_reverse[p] = termdef
+ self.terminals.append(termdef)
+
+ filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr)
+
+ return Terminal(term_name, filter_out=filter_out)
+
+
+class _ReplaceSymbols(Transformer_InPlace):
+ """Helper for ApplyTemplates"""
+
+ def __init__(self):
+ self.names = {}
+
+ def value(self, c):
+ if len(c) == 1 and isinstance(c[0], Token) and c[0].value in self.names:
+ return self.names[c[0].value]
+ return self.__default__('value', c, None)
+
+ def template_usage(self, c):
+ if c[0] in self.names:
+ return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None)
+ return self.__default__('template_usage', c, None)
+
+
+class ApplyTemplates(Transformer_InPlace):
+ """Apply the templates, creating new rules that represent the used templates"""
+
+ def __init__(self, rule_defs):
+ self.rule_defs = rule_defs
+ self.replacer = _ReplaceSymbols()
+ self.created_templates = set()
+
+ def template_usage(self, c):
+ name = c[0]
+ args = c[1:]
+ result_name = "%s{%s}" % (name, ",".join(a.name for a in args))
+ if result_name not in self.created_templates:
+ self.created_templates.add(result_name)
+ (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name)
+ assert len(params) == len(args), args
+ result_tree = deepcopy(tree)
+ self.replacer.names = dict(zip(params, args))
+ self.replacer.transform(result_tree)
+ self.rule_defs.append((result_name, [], result_tree, deepcopy(options)))
+ return NonTerminal(result_name)
+
+
+def _rfind(s, choices):
+ return max(s.rfind(c) for c in choices)
+
+
+def eval_escaping(s):
+ w = ''
+ i = iter(s)
+ for n in i:
+ w += n
+ if n == '\\':
+ try:
+ n2 = next(i)
+ except StopIteration:
+ raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
+ if n2 == '\\':
+ w += '\\\\'
+ elif n2 not in 'Uuxnftr':
+ w += '\\'
+ w += n2
+ w = w.replace('\\"', '"').replace("'", "\\'")
+
+ to_eval = "u'''%s'''" % w
+ try:
+ s = literal_eval(to_eval)
+ except SyntaxError as e:
+ raise GrammarError(s, e)
+
+ return s
+
+
+def _literal_to_pattern(literal):
+ v = literal.value
+ flag_start = _rfind(v, '/"')+1
+ assert flag_start > 0
+ flags = v[flag_start:]
+ assert all(f in _RE_FLAGS for f in flags), flags
+
+ if literal.type == 'STRING' and '\n' in v:
+ raise GrammarError('You cannot put newlines in string literals')
+
+ if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
+ raise GrammarError('You can only use newlines in regular expressions '
+ 'with the `x` (verbose) flag')
+
+ v = v[:flag_start]
+ assert v[0] == v[-1] and v[0] in '"/'
+ x = v[1:-1]
+
+ s = eval_escaping(x)
+
+ if s == "":
+ raise GrammarError("Empty terminals are not allowed (%s)" % literal)
+
+ if literal.type == 'STRING':
+ s = s.replace('\\\\', '\\')
+ return PatternStr(s, flags, raw=literal.value)
+ elif literal.type == 'REGEXP':
+ return PatternRE(s, flags, raw=literal.value)
+ else:
+ assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'
+
+
+@inline_args
+class PrepareLiterals(Transformer_InPlace):
+ def literal(self, literal):
+ return ST('pattern', [_literal_to_pattern(literal)])
+
+ def range(self, start, end):
+ assert start.type == end.type == 'STRING'
+ start = start.value[1:-1]
+ end = end.value[1:-1]
+ assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
+ regexp = '[%s-%s]' % (start, end)
+ return ST('pattern', [PatternRE(regexp)])
+
+
+def _make_joined_pattern(regexp, flags_set):
+ # In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope
+ # of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags`
+ # However, for prior Python versions, we still need to use global flags, so we have to make sure
+ # that there are no flag collisions when we merge several terminals.
+ flags = ()
+ if not Py36:
+ if len(flags_set) > 1:
+ raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
+ elif len(flags_set) == 1:
+ flags ,= flags_set
+
+ return PatternRE(regexp, flags)
+
+class TerminalTreeToPattern(Transformer_NonRecursive):
+ def pattern(self, ps):
+ p ,= ps
+ return p
+
+ def expansion(self, items):
+ assert items
+ if len(items) == 1:
+ return items[0]
+
+ pattern = ''.join(i.to_regexp() for i in items)
+ return _make_joined_pattern(pattern, {i.flags for i in items})
+
+ def expansions(self, exps):
+ if len(exps) == 1:
+ return exps[0]
+
+ # Do a bit of sorting to make sure that the longest option is returned
+ # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)
+ exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))
+
+ pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
+ return _make_joined_pattern(pattern, {i.flags for i in exps})
+
+ def expr(self, args):
+ inner, op = args[:2]
+ if op == '~':
+ if len(args) == 3:
+ op = "{%d}" % int(args[2])
+ else:
+ mn, mx = map(int, args[2:])
+ if mx < mn:
+ raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
+ op = "{%d,%d}" % (mn, mx)
+ else:
+ assert len(args) == 2
+ return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)
+
+ def maybe(self, expr):
+ return self.expr(expr + ['?'])
+
+ def alias(self, t):
+ raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")
+
+ def value(self, v):
+ return v[0]
+
+
+class PrepareSymbols(Transformer_InPlace):
+ def value(self, v):
+ v ,= v
+ if isinstance(v, Tree):
+ return v
+ elif v.type == 'RULE':
+ return NonTerminal(Str(v.value))
+ elif v.type == 'TERMINAL':
+ return Terminal(Str(v.value), filter_out=v.startswith('_'))
+ assert False
+
+
+def nr_deepcopy_tree(t):
+ """Deepcopy tree `t` without recursion"""
+ return Transformer_NonRecursive(False).transform(t)
+
+
+class Grammar:
+ def __init__(self, rule_defs, term_defs, ignore):
+ self.term_defs = term_defs
+ self.rule_defs = rule_defs
+ self.ignore = ignore
+
+ def compile(self, start, terminals_to_keep):
+ # We change the trees in-place (to support huge grammars)
+ # So deepcopy allows calling compile more than once.
+ term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]
+ rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]
+
+ # ===================
+ # Compile Terminals
+ # ===================
+
+ # Convert terminal-trees to strings/regexps
+
+ for name, (term_tree, priority) in term_defs:
+ if term_tree is None: # Terminal added through %declare
+ continue
+ expansions = list(term_tree.find_data('expansion'))
+ if len(expansions) == 1 and not expansions[0].children:
+ raise GrammarError("Terminals cannot be empty (%s)" % name)
+
+ transformer = PrepareLiterals() * TerminalTreeToPattern()
+ terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
+ for name, (term_tree, priority) in term_defs if term_tree]
+
+ # =================
+ # Compile Rules
+ # =================
+
+ # 1. Pre-process terminals
+ anon_tokens_transf = PrepareAnonTerminals(terminals)
+ transformer = PrepareLiterals() * PrepareSymbols() * anon_tokens_transf # Adds to terminals
+
+ # 2. Inline Templates
+
+ transformer *= ApplyTemplates(rule_defs)
+
+ # 3. Convert EBNF to BNF (and apply step 1 & 2)
+ ebnf_to_bnf = EBNF_to_BNF()
+ rules = []
+ i = 0
+ while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
+ name, params, rule_tree, options = rule_defs[i]
+ i += 1
+ if len(params) != 0: # Dont transform templates
+ continue
+ rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
+ ebnf_to_bnf.rule_options = rule_options
+ ebnf_to_bnf.prefix = name
+ anon_tokens_transf.rule_options = rule_options
+ tree = transformer.transform(rule_tree)
+ res = ebnf_to_bnf.transform(tree)
+ rules.append((name, res, options))
+ rules += ebnf_to_bnf.new_rules
+
+ assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
+
+ # 4. Compile tree to Rule objects
+ rule_tree_to_text = RuleTreeToText()
+
+ simplify_rule = SimplifyRule_Visitor()
+ compiled_rules = []
+ for rule_content in rules:
+ name, tree, options = rule_content
+ simplify_rule.visit(tree)
+ expansions = rule_tree_to_text.transform(tree)
+
+ for i, (expansion, alias) in enumerate(expansions):
+ if alias and name.startswith('_'):
+ raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))
+
+ empty_indices = [x==_EMPTY for x in expansion]
+ if any(empty_indices):
+ exp_options = copy(options) or RuleOptions()
+ exp_options.empty_indices = empty_indices
+ expansion = [x for x in expansion if x!=_EMPTY]
+ else:
+ exp_options = options
+
+ for sym in expansion:
+ assert isinstance(sym, Symbol)
+ if sym.is_term and exp_options and exp_options.keep_all_tokens:
+ sym.filter_out = False
+ rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)
+ compiled_rules.append(rule)
+
+ # Remove duplicates of empty rules, throw error for non-empty duplicates
+ if len(set(compiled_rules)) != len(compiled_rules):
+ duplicates = classify(compiled_rules, lambda x: x)
+ for dups in duplicates.values():
+ if len(dups) > 1:
+ if dups[0].expansion:
+ raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"
+ % ''.join('\n * %s' % i for i in dups))
+
+ # Empty rule; assert all other attributes are equal
+ assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)
+
+ # Remove duplicates
+ compiled_rules = list(set(compiled_rules))
+
+ # Filter out unused rules
+ while True:
+ c = len(compiled_rules)
+ used_rules = {s for r in compiled_rules
+ for s in r.expansion
+ if isinstance(s, NonTerminal)
+ and s != r.origin}
+ used_rules |= {NonTerminal(s) for s in start}
+ compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
+ for r in unused:
+ logger.debug("Unused rule: %s", r)
+ if len(compiled_rules) == c:
+ break
+
+ # Filter out unused terminals
+ if terminals_to_keep != '*':
+ used_terms = {t.name for r in compiled_rules
+ for t in r.expansion
+ if isinstance(t, Terminal)}
+ terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)
+ if unused:
+ logger.debug("Unused terminals: %s", [t.name for t in unused])
+
+ return terminals, compiled_rules, self.ignore
+
+
+PackageResource = namedtuple('PackageResource', 'pkg_name path')
+
+
+class FromPackageLoader(object):
+ """
+ Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
+ This allows them to be compatible even from within zip files.
+
+ Relative imports are handled, so you can just freely use them.
+
+ pkg_name: The name of the package. You can probably provide `__name__` most of the time
+ search_paths: All the path that will be search on absolute imports.
+ """
+ def __init__(self, pkg_name, search_paths=("", )):
+ self.pkg_name = pkg_name
+ self.search_paths = search_paths
+
+ def __repr__(self):
+ return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)
+
+ def __call__(self, base_path, grammar_path):
+ if base_path is None:
+ to_try = self.search_paths
+ else:
+ # Check whether or not the importing grammar was loaded by this module.
+ if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name:
+ # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
+ raise IOError()
+ to_try = [base_path.path]
+ for path in to_try:
+ full_path = os.path.join(path, grammar_path)
+ try:
+ text = pkgutil.get_data(self.pkg_name, full_path)
+ except IOError:
+ continue
+ else:
+ return PackageResource(self.pkg_name, full_path), text.decode()
+ raise IOError()
+
+
+stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)
+
+
+
+def resolve_term_references(term_dict):
+ # TODO Solve with transitive closure (maybe)
+
+ while True:
+ changed = False
+ for name, token_tree in term_dict.items():
+ if token_tree is None: # Terminal added through %declare
+ continue
+ for exp in token_tree.find_data('value'):
+ item ,= exp.children
+ if isinstance(item, Token):
+ if item.type == 'RULE':
+ raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
+ if item.type == 'TERMINAL':
+ try:
+ term_value = term_dict[item]
+ except KeyError:
+ raise GrammarError("Terminal used but not defined: %s" % item)
+ assert term_value is not None
+ exp.children[0] = term_value
+ changed = True
+ if not changed:
+ break
+
+ for name, term in term_dict.items():
+ if term: # Not just declared
+ for child in term.children:
+ ids = [id(x) for x in child.iter_subtrees()]
+ if id(term) in ids:
+ raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
+
+
+def options_from_rule(name, params, *x):
+ if len(x) > 1:
+ priority, expansions = x
+ priority = int(priority)
+ else:
+ expansions ,= x
+ priority = None
+ params = [t.value for t in params.children] if params is not None else [] # For the grammar parser
+
+ keep_all_tokens = name.startswith('!')
+ name = name.lstrip('!')
+ expand1 = name.startswith('?')
+ name = name.lstrip('?')
+
+ return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority,
+ template_source=(name if params else None))
+
+
+def symbols_from_strcase(expansion):
+ return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]
+
+
+@inline_args
+class PrepareGrammar(Transformer_InPlace):
+ def terminal(self, name):
+ return name
+
+ def nonterminal(self, name):
+ return name
+
+
+def _find_used_symbols(tree):
+ assert tree.data == 'expansions'
+ return {t for x in tree.find_data('expansion')
+ for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
+
+
+def _get_parser():
+ try:
+ return _get_parser.cache
+ except AttributeError:
+ terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
+
+ rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
+ rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
+ for r, _p, xs, o in rules for i, x in enumerate(xs)]
+ callback = ParseTreeBuilder(rules, ST).create_callback()
+ import re
+ lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH'])
+ parser_conf = ParserConf(rules, callback, ['start'])
+ lexer_conf.lexer_type = 'standard'
+ parser_conf.parser_type = 'lalr'
+ _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)
+ return _get_parser.cache
+
+GRAMMAR_ERRORS = [
+ ('Incorrect type of value', ['a: 1\n']),
+ ('Unclosed parenthesis', ['a: (\n']),
+ ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
+ ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
+ ('Illegal name for rules or terminals', ['Aa:\n']),
+ ('Alias expects lowercase name', ['a: -> "a"\n']),
+ ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),
+ ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),
+ ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),
+ ('Terminal names cannot contain dots', ['A.B\n']),
+ ('Expecting rule or terminal definition', ['"a"\n']),
+ ('%import expects a name', ['%import "a"\n']),
+ ('%ignore expects a value', ['%ignore %import\n']),
+ ]
+
+def _translate_parser_exception(parse, e):
+ error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True)
+ if error:
+ return error
+ elif 'STRING' in e.expected:
+ return "Expecting a value"
+
+def _parse_grammar(text, name, start='start'):
+ try:
+ tree = _get_parser().parse(text + '\n', start)
+ except UnexpectedCharacters as e:
+ context = e.get_context(text)
+ raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
+ (e.line, e.column, name, context))
+ except UnexpectedToken as e:
+ context = e.get_context(text)
+ error = _translate_parser_exception(_get_parser().parse, e)
+ if error:
+ raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
+ raise
+
+ return PrepareGrammar().transform(tree)
+
+
+def _error_repr(error):
+ if isinstance(error, UnexpectedToken):
+ error2 = _translate_parser_exception(_get_parser().parse, error)
+ if error2:
+ return error2
+ expected = ', '.join(error.accepts or error.expected)
+ return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected)
+ else:
+ return str(error)
+
+def _search_interactive_parser(interactive_parser, predicate):
+ def expand(node):
+ path, p = node
+ for choice in p.choices():
+ t = Token(choice, '')
+ try:
+ new_p = p.feed_token(t)
+ except ParseError: # Illegal
+ pass
+ else:
+ yield path + (choice,), new_p
+
+ for path, p in bfs_all_unique([((), interactive_parser)], expand):
+ if predicate(p):
+ return path, p
+
+def find_grammar_errors(text, start='start'):
+ errors = []
+ def on_error(e):
+ errors.append((e, _error_repr(e)))
+
+ # recover to a new line
+ token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())
+ for token_type in token_path:
+ e.interactive_parser.feed_token(Token(token_type, ''))
+ e.interactive_parser.feed_token(Token('_NL', '\n'))
+ return True
+
+ _tree = _get_parser().parse(text + '\n', start, on_error=on_error)
+
+ errors_by_line = classify(errors, lambda e: e[0].line)
+ errors = [el[0] for el in errors_by_line.values()] # already sorted
+
+ for e in errors:
+ e[0].interactive_parser = None
+ return errors
+
+
+def _get_mangle(prefix, aliases, base_mangle=None):
+ def mangle(s):
+ if s in aliases:
+ s = aliases[s]
+ else:
+ if s[0] == '_':
+ s = '_%s__%s' % (prefix, s[1:])
+ else:
+ s = '%s__%s' % (prefix, s)
+ if base_mangle is not None:
+ s = base_mangle(s)
+ return s
+ return mangle
+
+def _mangle_exp(exp, mangle):
+ if mangle is None:
+ return exp
+ exp = deepcopy(exp) # TODO: is this needed
+ for t in exp.iter_subtrees():
+ for i, c in enumerate(t.children):
+ if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
+ t.children[i] = Token(c.type, mangle(c.value))
+ return exp
+
+
+
+class GrammarBuilder:
+ def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None):
+ self.global_keep_all_tokens = global_keep_all_tokens
+ self.import_paths = import_paths or []
+ self.used_files = used_files or {}
+
+ self._definitions = {}
+ self._ignore_names = []
+
+ def _is_term(self, name):
+ # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME`
+ # Only the last part is the actual name, and the rest might contain mixed case
+ return name.rpartition('__')[-1].isupper()
+
+ def _grammar_error(self, msg, *names):
+ args = {}
+ for i, name in enumerate(names, start=1):
+ postfix = '' if i == 1 else str(i)
+ args['name' + postfix] = name
+ args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)]
+ args['Type' + postfix] = lowercase_type.title()
+ raise GrammarError(msg.format(**args))
+
+ def _check_options(self, name, options):
+ if self._is_term(name):
+ if options is None:
+ options = 1
+ # if we don't use Integral here, we run into python2.7/python3 problems with long vs int
+ elif not isinstance(options, Integral):
+ raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
+ else:
+ if options is None:
+ options = RuleOptions()
+ elif not isinstance(options, RuleOptions):
+ raise GrammarError("Rules require a RuleOptions instance as 'options'")
+ if self.global_keep_all_tokens:
+ options.keep_all_tokens = True
+ return options
+
+
+ def _define(self, name, exp, params=(), options=None, override=False):
+ if name in self._definitions:
+ if not override:
+ self._grammar_error("{Type} '{name}' defined more than once", name)
+ elif override:
+ self._grammar_error("Cannot override a nonexisting {type} {name}", name)
+
+ if name.startswith('__'):
+ self._grammar_error('Names starting with double-underscore are reserved (Error at {name})', name)
+
+ self._definitions[name] = (params, exp, self._check_options(name, options))
+
+ def _extend(self, name, exp, params=(), options=None):
+ if name not in self._definitions:
+ self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name)
+ if tuple(params) != tuple(self._definitions[name][0]):
+ self._grammar_error("Cannot extend {type} with different parameters: {name}", name)
+ # TODO: think about what to do with 'options'
+ base = self._definitions[name][1]
+
+ assert isinstance(base, Tree) and base.data == 'expansions'
+ base.children.insert(0, exp)
+
+ def _ignore(self, exp_or_name):
+ if isinstance(exp_or_name, str):
+ self._ignore_names.append(exp_or_name)
+ else:
+ assert isinstance(exp_or_name, Tree)
+ t = exp_or_name
+ if t.data == 'expansions' and len(t.children) == 1:
+ t2 ,= t.children
+ if t2.data=='expansion' and len(t2.children) == 1:
+ item ,= t2.children
+ if item.data == 'value':
+ item ,= item.children
+ if isinstance(item, Token) and item.type == 'TERMINAL':
+ self._ignore_names.append(item.value)
+ return
+
+ name = '__IGNORE_%d'% len(self._ignore_names)
+ self._ignore_names.append(name)
+ self._definitions[name] = ((), t, 1)
+
+ def _declare(self, *names):
+ for name in names:
+ self._define(name, None)
+
+ def _unpack_import(self, stmt, grammar_name):
+ if len(stmt.children) > 1:
+ path_node, arg1 = stmt.children
+ else:
+ path_node, = stmt.children
+ arg1 = None
+
+ if isinstance(arg1, Tree): # Multi import
+ dotted_path = tuple(path_node.children)
+ names = arg1.children
+ aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names
+ else: # Single import
+ dotted_path = tuple(path_node.children[:-1])
+ if not dotted_path:
+ name ,= path_node.children
+ raise GrammarError("Nothing was imported from grammar `%s`" % name)
+ name = path_node.children[-1] # Get name from dotted path
+ aliases = {name.value: (arg1 or name).value} # Aliases if exist
+
+ if path_node.data == 'import_lib': # Import from library
+ base_path = None
+ else: # Relative import
+ if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
+ try:
+ base_file = os.path.abspath(sys.modules['__main__'].__file__)
+ except AttributeError:
+ base_file = None
+ else:
+ base_file = grammar_name # Import relative to grammar file path if external grammar file
+ if base_file:
+ if isinstance(base_file, PackageResource):
+ base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
+ else:
+ base_path = os.path.split(base_file)[0]
+ else:
+ base_path = os.path.abspath(os.path.curdir)
+
+ return dotted_path, base_path, aliases
+
+ def _unpack_definition(self, tree, mangle):
+ if tree.data == 'rule':
+ name, params, exp, opts = options_from_rule(*tree.children)
+ else:
+ name = tree.children[0].value
+ params = () # TODO terminal templates
+ opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
+ exp = tree.children[-1]
+
+ if mangle is not None:
+ params = tuple(mangle(p) for p in params)
+ name = mangle(name)
+
+ exp = _mangle_exp(exp, mangle)
+ return name, exp, params, opts
+
+
+ def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
+ tree = _parse_grammar(grammar_text, grammar_name)
+
+ imports = {}
+ for stmt in tree.children:
+ if stmt.data == 'import':
+ dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
+ try:
+ import_base_path, import_aliases = imports[dotted_path]
+ assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
+ import_aliases.update(aliases)
+ except KeyError:
+ imports[dotted_path] = base_path, aliases
+
+ for dotted_path, (base_path, aliases) in imports.items():
+ self.do_import(dotted_path, base_path, aliases, mangle)
+
+ for stmt in tree.children:
+ if stmt.data in ('term', 'rule'):
+ self._define(*self._unpack_definition(stmt, mangle))
+ elif stmt.data == 'override':
+ r ,= stmt.children
+ self._define(*self._unpack_definition(r, mangle), override=True)
+ elif stmt.data == 'extend':
+ r ,= stmt.children
+ self._extend(*self._unpack_definition(r, mangle))
+ elif stmt.data == 'ignore':
+ # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
+ if mangle is None:
+ self._ignore(*stmt.children)
+ elif stmt.data == 'declare':
+ names = [t.value for t in stmt.children]
+ if mangle is None:
+ self._declare(*names)
+ else:
+ self._declare(*map(mangle, names))
+ elif stmt.data == 'import':
+ pass
+ else:
+ assert False, stmt
+
+
+ term_defs = { name: exp
+ for name, (_params, exp, _options) in self._definitions.items()
+ if self._is_term(name)
+ }
+ resolve_term_references(term_defs)
+
+
+ def _remove_unused(self, used):
+ def rule_dependencies(symbol):
+ if self._is_term(symbol):
+ return []
+ try:
+ params, tree,_ = self._definitions[symbol]
+ except KeyError:
+ return []
+ return _find_used_symbols(tree) - set(params)
+
+ _used = set(bfs(used, rule_dependencies))
+ self._definitions = {k: v for k, v in self._definitions.items() if k in _used}
+
+
+ def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
+ assert dotted_path
+ mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
+ grammar_path = os.path.join(*dotted_path) + EXT
+ to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
+ for source in to_try:
+ try:
+ if callable(source):
+ joined_path, text = source(base_path, grammar_path)
+ else:
+ joined_path = os.path.join(source, grammar_path)
+ with open(joined_path, encoding='utf8') as f:
+ text = f.read()
+ except IOError:
+ continue
+ else:
+ h = hashlib.md5(text.encode('utf8')).hexdigest()
+ if self.used_files.get(joined_path, h) != h:
+ raise RuntimeError("Grammar file was changed during importing")
+ self.used_files[joined_path] = h
+
+ gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files)
+ gb.load_grammar(text, joined_path, mangle)
+ gb._remove_unused(map(mangle, aliases))
+ for name in gb._definitions:
+ if name in self._definitions:
+ raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))
+
+ self._definitions.update(**gb._definitions)
+ break
+ else:
+ # Search failed. Make Python throw a nice error.
+ open(grammar_path, encoding='utf8')
+ assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)
+
+
+ def validate(self):
+ for name, (params, exp, _options) in self._definitions.items():
+ for i, p in enumerate(params):
+ if p in self._definitions:
+ raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
+ if p in params[:i]:
+ raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
+
+ if exp is None: # Remaining checks don't apply to abstract rules/terminals
+ continue
+
+ for temp in exp.find_data('template_usage'):
+ sym = temp.children[0]
+ args = temp.children[1:]
+ if sym not in params:
+ if sym not in self._definitions:
+ self._grammar_error("Template '%s' used but not defined (in {type} {name})" % sym, name)
+ if len(args) != len(self._definitions[sym][0]):
+ expected, actual = len(self._definitions[sym][0]), len(args)
+ self._grammar_error("Wrong number of template arguments used for {name} "
+ "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)
+
+ for sym in _find_used_symbols(exp):
+ if sym not in self._definitions and sym not in params:
+ self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)
+
+ if not set(self._definitions).issuperset(self._ignore_names):
+ raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))
+
+ def build(self):
+ self.validate()
+ rule_defs = []
+ term_defs = []
+ for name, (params, exp, options) in self._definitions.items():
+ if self._is_term(name):
+ assert len(params) == 0
+ term_defs.append((name, (exp, options)))
+ else:
+ rule_defs.append((name, params, exp, options))
+ # resolve_term_references(term_defs)
+ return Grammar(rule_defs, term_defs, self._ignore_names)
+
+
+def verify_used_files(file_hashes):
+ for path, old in file_hashes.items():
+ text = None
+ if isinstance(path, str) and os.path.exists(path):
+ with open(path, encoding='utf8') as f:
+ text = f.read()
+ elif isinstance(path, PackageResource):
+ with suppress(IOError):
+ text = pkgutil.get_data(*path).decode('utf-8')
+ if text is None: # We don't know how to load the path. ignore it.
+ continue
+
+ current = hashlib.md5(text.encode()).hexdigest()
+ if old != current:
+ logger.info("File %r changed, rebuilding Parser" % path)
+ return False
+ return True
+
+def list_grammar_imports(grammar, import_paths=[]):
+ "Returns a list of paths to the lark grammars imported by the given grammar (recursively)"
+ builder = GrammarBuilder(False, import_paths)
+ builder.load_grammar(grammar, '<string>')
+ return list(builder.used_files.keys())
+
+def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
+ builder = GrammarBuilder(global_keep_all_tokens, import_paths)
+ builder.load_grammar(grammar, source)
+ return builder.build(), builder.used_files
diff --git a/.venv/lib/python3.12/site-packages/lark/parse_tree_builder.py b/.venv/lib/python3.12/site-packages/lark/parse_tree_builder.py
new file mode 100644
index 00000000..fa526b0c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parse_tree_builder.py
@@ -0,0 +1,387 @@
+from .exceptions import GrammarError, ConfigurationError
+from .lexer import Token
+from .tree import Tree
+from .visitors import InlineTransformer # XXX Deprecated
+from .visitors import Transformer_InPlace
+from .visitors import _vargs_meta, _vargs_meta_inline
+
+###{standalone
+from functools import partial, wraps
+from itertools import repeat, product
+
+
+class ExpandSingleChild:
+ def __init__(self, node_builder):
+ self.node_builder = node_builder
+
+ def __call__(self, children):
+ if len(children) == 1:
+ return children[0]
+ else:
+ return self.node_builder(children)
+
+
+
+class PropagatePositions:
+ def __init__(self, node_builder, node_filter=None):
+ self.node_builder = node_builder
+ self.node_filter = node_filter
+
+ def __call__(self, children):
+ res = self.node_builder(children)
+
+ if isinstance(res, Tree):
+ # Calculate positions while the tree is streaming, according to the rule:
+ # - nodes start at the start of their first child's container,
+ # and end at the end of their last child's container.
+ # Containers are nodes that take up space in text, but have been inlined in the tree.
+
+ res_meta = res.meta
+
+ first_meta = self._pp_get_meta(children)
+ if first_meta is not None:
+ if not hasattr(res_meta, 'line'):
+ # meta was already set, probably because the rule has been inlined (e.g. `?rule`)
+ res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
+ res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
+ res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
+ res_meta.empty = False
+
+ res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
+ res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
+
+ last_meta = self._pp_get_meta(reversed(children))
+ if last_meta is not None:
+ if not hasattr(res_meta, 'end_line'):
+ res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
+ res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
+ res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
+ res_meta.empty = False
+
+ res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
+ res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
+
+ return res
+
+ def _pp_get_meta(self, children):
+ for c in children:
+ if self.node_filter is not None and not self.node_filter(c):
+ continue
+ if isinstance(c, Tree):
+ if not c.meta.empty:
+ return c.meta
+ elif isinstance(c, Token):
+ return c
+
+def make_propagate_positions(option):
+ if callable(option):
+ return partial(PropagatePositions, node_filter=option)
+ elif option is True:
+ return PropagatePositions
+ elif option is False:
+ return None
+
+ raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
+
+
+class ChildFilter:
+ def __init__(self, to_include, append_none, node_builder):
+ self.node_builder = node_builder
+ self.to_include = to_include
+ self.append_none = append_none
+
+ def __call__(self, children):
+ filtered = []
+
+ for i, to_expand, add_none in self.to_include:
+ if add_none:
+ filtered += [None] * add_none
+ if to_expand:
+ filtered += children[i].children
+ else:
+ filtered.append(children[i])
+
+ if self.append_none:
+ filtered += [None] * self.append_none
+
+ return self.node_builder(filtered)
+
+
+class ChildFilterLALR(ChildFilter):
+ """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""
+
+ def __call__(self, children):
+ filtered = []
+ for i, to_expand, add_none in self.to_include:
+ if add_none:
+ filtered += [None] * add_none
+ if to_expand:
+ if filtered:
+ filtered += children[i].children
+ else: # Optimize for left-recursion
+ filtered = children[i].children
+ else:
+ filtered.append(children[i])
+
+ if self.append_none:
+ filtered += [None] * self.append_none
+
+ return self.node_builder(filtered)
+
+
+class ChildFilterLALR_NoPlaceholders(ChildFilter):
+ "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
+ def __init__(self, to_include, node_builder):
+ self.node_builder = node_builder
+ self.to_include = to_include
+
+ def __call__(self, children):
+ filtered = []
+ for i, to_expand in self.to_include:
+ if to_expand:
+ if filtered:
+ filtered += children[i].children
+ else: # Optimize for left-recursion
+ filtered = children[i].children
+ else:
+ filtered.append(children[i])
+ return self.node_builder(filtered)
+
+
+def _should_expand(sym):
+ return not sym.is_term and sym.name.startswith('_')
+
+
+def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
+ # Prepare empty_indices as: How many Nones to insert at each index?
+ if _empty_indices:
+ assert _empty_indices.count(False) == len(expansion)
+ s = ''.join(str(int(b)) for b in _empty_indices)
+ empty_indices = [len(ones) for ones in s.split('0')]
+ assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
+ else:
+ empty_indices = [0] * (len(expansion)+1)
+
+ to_include = []
+ nones_to_add = 0
+ for i, sym in enumerate(expansion):
+ nones_to_add += empty_indices[i]
+ if keep_all_tokens or not (sym.is_term and sym.filter_out):
+ to_include.append((i, _should_expand(sym), nones_to_add))
+ nones_to_add = 0
+
+ nones_to_add += empty_indices[len(expansion)]
+
+ if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
+ if _empty_indices or ambiguous:
+ return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
+ else:
+ # LALR without placeholders
+ return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
+
+
+class AmbiguousExpander:
+ """Deal with the case where we're expanding children ('_rule') into a parent but the children
+ are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
+ ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
+ into the right parents in the right places, essentially shifting the ambiguity up the tree."""
+ def __init__(self, to_expand, tree_class, node_builder):
+ self.node_builder = node_builder
+ self.tree_class = tree_class
+ self.to_expand = to_expand
+
+ def __call__(self, children):
+ def _is_ambig_tree(t):
+ return hasattr(t, 'data') and t.data == '_ambig'
+
+ # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
+ # All children of an _ambig node should be a derivation of that ambig node, hence
+ # it is safe to assume that if we see an _ambig node nested within an ambig node
+ # it is safe to simply expand it into the parent _ambig node as an alternative derivation.
+ ambiguous = []
+ for i, child in enumerate(children):
+ if _is_ambig_tree(child):
+ if i in self.to_expand:
+ ambiguous.append(i)
+
+ child.expand_kids_by_data('_ambig')
+
+ if not ambiguous:
+ return self.node_builder(children)
+
+ expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
+ return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
+
+
+def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
+ to_expand = [i for i, sym in enumerate(expansion)
+ if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
+ if to_expand:
+ return partial(AmbiguousExpander, to_expand, tree_class)
+
+
+class AmbiguousIntermediateExpander:
+ """
+ Propagate ambiguous intermediate nodes and their derivations up to the
+ current rule.
+
+ In general, converts
+
+ rule
+ _iambig
+ _inter
+ someChildren1
+ ...
+ _inter
+ someChildren2
+ ...
+ someChildren3
+ ...
+
+ to
+
+ _ambig
+ rule
+ someChildren1
+ ...
+ someChildren3
+ ...
+ rule
+ someChildren2
+ ...
+ someChildren3
+ ...
+ rule
+ childrenFromNestedIambigs
+ ...
+ someChildren3
+ ...
+ ...
+
+ propagating up any nested '_iambig' nodes along the way.
+ """
+
+ def __init__(self, tree_class, node_builder):
+ self.node_builder = node_builder
+ self.tree_class = tree_class
+
+ def __call__(self, children):
+ def _is_iambig_tree(child):
+ return hasattr(child, 'data') and child.data == '_iambig'
+
+ def _collapse_iambig(children):
+ """
+ Recursively flatten the derivations of the parent of an '_iambig'
+ node. Returns a list of '_inter' nodes guaranteed not
+ to contain any nested '_iambig' nodes, or None if children does
+ not contain an '_iambig' node.
+ """
+
+ # Due to the structure of the SPPF,
+ # an '_iambig' node can only appear as the first child
+ if children and _is_iambig_tree(children[0]):
+ iambig_node = children[0]
+ result = []
+ for grandchild in iambig_node.children:
+ collapsed = _collapse_iambig(grandchild.children)
+ if collapsed:
+ for child in collapsed:
+ child.children += children[1:]
+ result += collapsed
+ else:
+ new_tree = self.tree_class('_inter', grandchild.children + children[1:])
+ result.append(new_tree)
+ return result
+
+ collapsed = _collapse_iambig(children)
+ if collapsed:
+ processed_nodes = [self.node_builder(c.children) for c in collapsed]
+ return self.tree_class('_ambig', processed_nodes)
+
+ return self.node_builder(children)
+
+
+def ptb_inline_args(func):
+ @wraps(func)
+ def f(children):
+ return func(*children)
+ return f
+
+
+def inplace_transformer(func):
+ @wraps(func)
+ def f(children):
+ # function name in a Transformer is a rule name.
+ tree = Tree(func.__name__, children)
+ return func(tree)
+ return f
+
+
+def apply_visit_wrapper(func, name, wrapper):
+ if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
+ raise NotImplementedError("Meta args not supported for internal transformer")
+
+ @wraps(func)
+ def f(children):
+ return wrapper(func, name, children, None)
+ return f
+
+
+class ParseTreeBuilder:
+ def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
+ self.tree_class = tree_class
+ self.propagate_positions = propagate_positions
+ self.ambiguous = ambiguous
+ self.maybe_placeholders = maybe_placeholders
+
+ self.rule_builders = list(self._init_builders(rules))
+
+ def _init_builders(self, rules):
+ propagate_positions = make_propagate_positions(self.propagate_positions)
+
+ for rule in rules:
+ options = rule.options
+ keep_all_tokens = options.keep_all_tokens
+ expand_single_child = options.expand1
+
+ wrapper_chain = list(filter(None, [
+ (expand_single_child and not rule.alias) and ExpandSingleChild,
+ maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
+ propagate_positions,
+ self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
+ self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
+ ]))
+
+ yield rule, wrapper_chain
+
+ def create_callback(self, transformer=None):
+ callbacks = {}
+
+ for rule, wrapper_chain in self.rule_builders:
+
+ user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
+ try:
+ f = getattr(transformer, user_callback_name)
+ # XXX InlineTransformer is deprecated!
+ wrapper = getattr(f, 'visit_wrapper', None)
+ if wrapper is not None:
+ f = apply_visit_wrapper(f, user_callback_name, wrapper)
+ else:
+ if isinstance(transformer, InlineTransformer):
+ f = ptb_inline_args(f)
+ elif isinstance(transformer, Transformer_InPlace):
+ f = inplace_transformer(f)
+ except AttributeError:
+ f = partial(self.tree_class, user_callback_name)
+
+ for w in wrapper_chain:
+ f = w(f)
+
+ if rule in callbacks:
+ raise GrammarError("Rule '%s' already exists" % (rule,))
+
+ callbacks[rule] = f
+
+ return callbacks
+
+###}
diff --git a/.venv/lib/python3.12/site-packages/lark/parser_frontends.py b/.venv/lib/python3.12/site-packages/lark/parser_frontends.py
new file mode 100644
index 00000000..0e53dd58
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parser_frontends.py
@@ -0,0 +1,239 @@
+from .exceptions import ConfigurationError, GrammarError, assert_config
+from .utils import get_regexp_width, Serialize
+from .parsers.grammar_analysis import GrammarAnalyzer
+from .lexer import LexerThread, TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
+from .parsers import earley, xearley, cyk
+from .parsers.lalr_parser import LALR_Parser
+from .tree import Tree
+from .common import LexerConf, ParserConf
+try:
+ import regex
+except ImportError:
+ regex = None
+import re
+
+###{standalone
+
+def _wrap_lexer(lexer_class):
+ future_interface = getattr(lexer_class, '__future_interface__', False)
+ if future_interface:
+ return lexer_class
+ else:
+ class CustomLexerWrapper(Lexer):
+ def __init__(self, lexer_conf):
+ self.lexer = lexer_class(lexer_conf)
+ def lex(self, lexer_state, parser_state):
+ return self.lexer.lex(lexer_state.text)
+ return CustomLexerWrapper
+
+
+class MakeParsingFrontend:
+ def __init__(self, parser_type, lexer_type):
+ self.parser_type = parser_type
+ self.lexer_type = lexer_type
+
+ def __call__(self, lexer_conf, parser_conf, options):
+ assert isinstance(lexer_conf, LexerConf)
+ assert isinstance(parser_conf, ParserConf)
+ parser_conf.parser_type = self.parser_type
+ lexer_conf.lexer_type = self.lexer_type
+ return ParsingFrontend(lexer_conf, parser_conf, options)
+
+ def deserialize(self, data, memo, lexer_conf, callbacks, options):
+ parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
+ parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
+ parser_conf.callbacks = callbacks
+ return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
+
+
+
+
+class ParsingFrontend(Serialize):
+ __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser', 'options'
+
+ def __init__(self, lexer_conf, parser_conf, options, parser=None):
+ self.parser_conf = parser_conf
+ self.lexer_conf = lexer_conf
+ self.options = options
+
+ # Set-up parser
+ if parser: # From cache
+ self.parser = parser
+ else:
+ create_parser = {
+ 'lalr': create_lalr_parser,
+ 'earley': create_earley_parser,
+ 'cyk': CYK_FrontEnd,
+ }[parser_conf.parser_type]
+ self.parser = create_parser(lexer_conf, parser_conf, options)
+
+ # Set-up lexer
+ lexer_type = lexer_conf.lexer_type
+ self.skip_lexer = False
+ if lexer_type in ('dynamic', 'dynamic_complete'):
+ assert lexer_conf.postlex is None
+ self.skip_lexer = True
+ return
+
+ try:
+ create_lexer = {
+ 'standard': create_traditional_lexer,
+ 'contextual': create_contextual_lexer,
+ }[lexer_type]
+ except KeyError:
+ assert issubclass(lexer_type, Lexer), lexer_type
+ self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
+ else:
+ self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex)
+
+ if lexer_conf.postlex:
+ self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
+
+ def _verify_start(self, start=None):
+ if start is None:
+ start_decls = self.parser_conf.start
+ if len(start_decls) > 1:
+ raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
+ start ,= start_decls
+ elif start not in self.parser_conf.start:
+ raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
+ return start
+
+ def parse(self, text, start=None, on_error=None):
+ chosen_start = self._verify_start(start)
+ stream = text if self.skip_lexer else LexerThread(self.lexer, text)
+ kw = {} if on_error is None else {'on_error': on_error}
+ return self.parser.parse(stream, chosen_start, **kw)
+
+ def parse_interactive(self, text=None, start=None):
+ chosen_start = self._verify_start(start)
+ if self.parser_conf.parser_type != 'lalr':
+ raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
+ stream = text if self.skip_lexer else LexerThread(self.lexer, text)
+ return self.parser.parse_interactive(stream, chosen_start)
+
+
+def get_frontend(parser, lexer):
+ assert_config(parser, ('lalr', 'earley', 'cyk'))
+ if not isinstance(lexer, type): # not custom lexer?
+ expected = {
+ 'lalr': ('standard', 'contextual'),
+ 'earley': ('standard', 'dynamic', 'dynamic_complete'),
+ 'cyk': ('standard', ),
+ }[parser]
+ assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
+
+ return MakeParsingFrontend(parser, lexer)
+
+
+def _get_lexer_callbacks(transformer, terminals):
+ result = {}
+ for terminal in terminals:
+ callback = getattr(transformer, terminal.name, None)
+ if callback is not None:
+ result[terminal.name] = callback
+ return result
+
+class PostLexConnector:
+ def __init__(self, lexer, postlexer):
+ self.lexer = lexer
+ self.postlexer = postlexer
+
+ def make_lexer_state(self, text):
+ return self.lexer.make_lexer_state(text)
+
+ def lex(self, lexer_state, parser_state):
+ i = self.lexer.lex(lexer_state, parser_state)
+ return self.postlexer.process(i)
+
+
+
+def create_traditional_lexer(lexer_conf, parser, postlex):
+ return TraditionalLexer(lexer_conf)
+
+def create_contextual_lexer(lexer_conf, parser, postlex):
+ states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
+ always_accept = postlex.always_accept if postlex else ()
+ return ContextualLexer(lexer_conf, states, always_accept=always_accept)
+
+def create_lalr_parser(lexer_conf, parser_conf, options=None):
+ debug = options.debug if options else False
+ return LALR_Parser(parser_conf, debug=debug)
+
+
+create_earley_parser = NotImplemented
+CYK_FrontEnd = NotImplemented
+###}
+
+class EarleyRegexpMatcher:
+ def __init__(self, lexer_conf):
+ self.regexps = {}
+ for t in lexer_conf.terminals:
+ if t.priority != 1:
+ raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
+ regexp = t.pattern.to_regexp()
+ try:
+ width = get_regexp_width(regexp)[0]
+ except ValueError:
+ raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
+ else:
+ if width == 0:
+ raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
+ if lexer_conf.use_bytes:
+ regexp = regexp.encode('utf-8')
+
+ self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
+
+ def match(self, term, text, index=0):
+ return self.regexps[term.name].match(text, index)
+
+
+def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
+ earley_matcher = EarleyRegexpMatcher(lexer_conf)
+ return xearley.Parser(parser_conf, earley_matcher.match, ignore=lexer_conf.ignore, **kw)
+
+def _match_earley_basic(term, token):
+ return term.name == token.type
+
+def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
+ return earley.Parser(parser_conf, _match_earley_basic, **kw)
+
+def create_earley_parser(lexer_conf, parser_conf, options):
+ resolve_ambiguity = options.ambiguity == 'resolve'
+ debug = options.debug if options else False
+ tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
+
+ extra = {}
+ if lexer_conf.lexer_type == 'dynamic':
+ f = create_earley_parser__dynamic
+ elif lexer_conf.lexer_type == 'dynamic_complete':
+ extra['complete_lex'] =True
+ f = create_earley_parser__dynamic
+ else:
+ f = create_earley_parser__basic
+
+ return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
+
+
+
+class CYK_FrontEnd:
+ def __init__(self, lexer_conf, parser_conf, options=None):
+ self._analysis = GrammarAnalyzer(parser_conf)
+ self.parser = cyk.Parser(parser_conf.rules)
+
+ self.callbacks = parser_conf.callbacks
+
+ def parse(self, lexer_thread, start):
+ tokens = list(lexer_thread.lex(None))
+ tree = self.parser.parse(tokens, start)
+ return self._transform(tree)
+
+ def _transform(self, tree):
+ subtrees = list(tree.iter_subtrees())
+ for subtree in subtrees:
+ subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
+
+ return self._apply_callback(tree)
+
+ def _apply_callback(self, tree):
+ return self.callbacks[tree.rule](tree.children)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/__init__.py b/.venv/lib/python3.12/site-packages/lark/parsers/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/cyk.py b/.venv/lib/python3.12/site-packages/lark/parsers/cyk.py
new file mode 100644
index 00000000..ff0924f2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/cyk.py
@@ -0,0 +1,345 @@
+"""This module implements a CYK parser."""
+
+# Author: https://github.com/ehudt (2018)
+#
+# Adapted by Erez
+
+
+from collections import defaultdict
+import itertools
+
+from ..exceptions import ParseError
+from ..lexer import Token
+from ..tree import Tree
+from ..grammar import Terminal as T, NonTerminal as NT, Symbol
+
+try:
+ xrange
+except NameError:
+ xrange = range
+
+def match(t, s):
+ assert isinstance(t, T)
+ return t.name == s.type
+
+
+class Rule(object):
+ """Context-free grammar rule."""
+
+ def __init__(self, lhs, rhs, weight, alias):
+ super(Rule, self).__init__()
+ assert isinstance(lhs, NT), lhs
+ assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
+ self.lhs = lhs
+ self.rhs = rhs
+ self.weight = weight
+ self.alias = alias
+
+ def __str__(self):
+ return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
+
+ def __repr__(self):
+ return str(self)
+
+ def __hash__(self):
+ return hash((self.lhs, tuple(self.rhs)))
+
+ def __eq__(self, other):
+ return self.lhs == other.lhs and self.rhs == other.rhs
+
+ def __ne__(self, other):
+ return not (self == other)
+
+
+class Grammar(object):
+ """Context-free grammar."""
+
+ def __init__(self, rules):
+ self.rules = frozenset(rules)
+
+ def __eq__(self, other):
+ return self.rules == other.rules
+
+ def __str__(self):
+ return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
+
+ def __repr__(self):
+ return str(self)
+
+
+# Parse tree data structures
+class RuleNode(object):
+ """A node in the parse tree, which also contains the full rhs rule."""
+
+ def __init__(self, rule, children, weight=0):
+ self.rule = rule
+ self.children = children
+ self.weight = weight
+
+ def __repr__(self):
+ return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
+
+
+
+class Parser(object):
+ """Parser wrapper."""
+
+ def __init__(self, rules):
+ super(Parser, self).__init__()
+ self.orig_rules = {rule: rule for rule in rules}
+ rules = [self._to_rule(rule) for rule in rules]
+ self.grammar = to_cnf(Grammar(rules))
+
+ def _to_rule(self, lark_rule):
+ """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
+ assert isinstance(lark_rule.origin, NT)
+ assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
+ return Rule(
+ lark_rule.origin, lark_rule.expansion,
+ weight=lark_rule.options.priority if lark_rule.options.priority else 0,
+ alias=lark_rule)
+
+ def parse(self, tokenized, start): # pylint: disable=invalid-name
+ """Parses input, which is a list of tokens."""
+ assert start
+ start = NT(start)
+
+ table, trees = _parse(tokenized, self.grammar)
+ # Check if the parse succeeded.
+ if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
+ raise ParseError('Parsing failed.')
+ parse = trees[(0, len(tokenized) - 1)][start]
+ return self._to_tree(revert_cnf(parse))
+
+ def _to_tree(self, rule_node):
+ """Converts a RuleNode parse tree to a lark Tree."""
+ orig_rule = self.orig_rules[rule_node.rule.alias]
+ children = []
+ for child in rule_node.children:
+ if isinstance(child, RuleNode):
+ children.append(self._to_tree(child))
+ else:
+ assert isinstance(child.name, Token)
+ children.append(child.name)
+ t = Tree(orig_rule.origin, children)
+ t.rule=orig_rule
+ return t
+
+
+def print_parse(node, indent=0):
+ if isinstance(node, RuleNode):
+ print(' ' * (indent * 2) + str(node.rule.lhs))
+ for child in node.children:
+ print_parse(child, indent + 1)
+ else:
+ print(' ' * (indent * 2) + str(node.s))
+
+
+def _parse(s, g):
+ """Parses sentence 's' using CNF grammar 'g'."""
+ # The CYK table. Indexed with a 2-tuple: (start pos, end pos)
+ table = defaultdict(set)
+ # Top-level structure is similar to the CYK table. Each cell is a dict from
+ # rule name to the best (lightest) tree for that rule.
+ trees = defaultdict(dict)
+ # Populate base case with existing terminal production rules
+ for i, w in enumerate(s):
+ for terminal, rules in g.terminal_rules.items():
+ if match(terminal, w):
+ for rule in rules:
+ table[(i, i)].add(rule)
+ if (rule.lhs not in trees[(i, i)] or
+ rule.weight < trees[(i, i)][rule.lhs].weight):
+ trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
+
+ # Iterate over lengths of sub-sentences
+ for l in xrange(2, len(s) + 1):
+ # Iterate over sub-sentences with the given length
+ for i in xrange(len(s) - l + 1):
+ # Choose partition of the sub-sentence in [1, l)
+ for p in xrange(i + 1, i + l):
+ span1 = (i, p - 1)
+ span2 = (p, i + l - 1)
+ for r1, r2 in itertools.product(table[span1], table[span2]):
+ for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
+ table[(i, i + l - 1)].add(rule)
+ r1_tree = trees[span1][r1.lhs]
+ r2_tree = trees[span2][r2.lhs]
+ rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
+ if (rule.lhs not in trees[(i, i + l - 1)]
+ or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
+ trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
+ return table, trees
+
+
+# This section implements context-free grammar converter to Chomsky normal form.
+# It also implements a conversion of parse trees from its CNF to the original
+# grammar.
+# Overview:
+# Applies the following operations in this order:
+# * TERM: Eliminates non-solitary terminals from all rules
+# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
+# * UNIT: Eliminates non-terminal unit rules
+#
+# The following grammar characteristics aren't featured:
+# * Start symbol appears on RHS
+# * Empty rules (epsilon rules)
+
+
+class CnfWrapper(object):
+ """CNF wrapper for grammar.
+
+ Validates that the input grammar is CNF and provides helper data structures.
+ """
+
+ def __init__(self, grammar):
+ super(CnfWrapper, self).__init__()
+ self.grammar = grammar
+ self.rules = grammar.rules
+ self.terminal_rules = defaultdict(list)
+ self.nonterminal_rules = defaultdict(list)
+ for r in self.rules:
+ # Validate that the grammar is CNF and populate auxiliary data structures.
+ assert isinstance(r.lhs, NT), r
+ if len(r.rhs) not in [1, 2]:
+ raise ParseError("CYK doesn't support empty rules")
+ if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
+ self.terminal_rules[r.rhs[0]].append(r)
+ elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
+ self.nonterminal_rules[tuple(r.rhs)].append(r)
+ else:
+ assert False, r
+
+ def __eq__(self, other):
+ return self.grammar == other.grammar
+
+ def __repr__(self):
+ return repr(self.grammar)
+
+
+class UnitSkipRule(Rule):
+ """A rule that records NTs that were skipped during transformation."""
+
+ def __init__(self, lhs, rhs, skipped_rules, weight, alias):
+ super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
+ self.skipped_rules = skipped_rules
+
+ def __eq__(self, other):
+ return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
+
+ __hash__ = Rule.__hash__
+
+
+def build_unit_skiprule(unit_rule, target_rule):
+ skipped_rules = []
+ if isinstance(unit_rule, UnitSkipRule):
+ skipped_rules += unit_rule.skipped_rules
+ skipped_rules.append(target_rule)
+ if isinstance(target_rule, UnitSkipRule):
+ skipped_rules += target_rule.skipped_rules
+ return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
+ weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
+
+
+def get_any_nt_unit_rule(g):
+ """Returns a non-terminal unit rule from 'g', or None if there is none."""
+ for rule in g.rules:
+ if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
+ return rule
+ return None
+
+
+def _remove_unit_rule(g, rule):
+ """Removes 'rule' from 'g' without changing the langugage produced by 'g'."""
+ new_rules = [x for x in g.rules if x != rule]
+ refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
+ new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
+ return Grammar(new_rules)
+
+
+def _split(rule):
+ """Splits a rule whose len(rhs) > 2 into shorter rules."""
+ rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
+ rule_name = '__SP_%s' % (rule_str) + '_%d'
+ yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
+ for i in xrange(1, len(rule.rhs) - 2):
+ yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
+ yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
+
+
+def _term(g):
+ """Applies the TERM rule on 'g' (see top comment)."""
+ all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
+ t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
+ new_rules = []
+ for rule in g.rules:
+ if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
+ new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
+ new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
+ new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
+ else:
+ new_rules.append(rule)
+ return Grammar(new_rules)
+
+
+def _bin(g):
+ """Applies the BIN rule to 'g' (see top comment)."""
+ new_rules = []
+ for rule in g.rules:
+ if len(rule.rhs) > 2:
+ new_rules += _split(rule)
+ else:
+ new_rules.append(rule)
+ return Grammar(new_rules)
+
+
+def _unit(g):
+ """Applies the UNIT rule to 'g' (see top comment)."""
+ nt_unit_rule = get_any_nt_unit_rule(g)
+ while nt_unit_rule:
+ g = _remove_unit_rule(g, nt_unit_rule)
+ nt_unit_rule = get_any_nt_unit_rule(g)
+ return g
+
+
+def to_cnf(g):
+ """Creates a CNF grammar from a general context-free grammar 'g'."""
+ g = _unit(_bin(_term(g)))
+ return CnfWrapper(g)
+
+
+def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
+ if not skipped_rules:
+ return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
+ else:
+ weight = weight - skipped_rules[0].weight
+ return RuleNode(
+ Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
+ unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
+ skipped_rules[1:], children,
+ skipped_rules[0].weight, skipped_rules[0].alias)
+ ], weight=weight)
+
+
+def revert_cnf(node):
+ """Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
+ if isinstance(node, T):
+ return node
+ # Reverts TERM rule.
+ if node.rule.lhs.name.startswith('__T_'):
+ return node.children[0]
+ else:
+ children = []
+ for child in map(revert_cnf, node.children):
+ # Reverts BIN rule.
+ if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
+ children += child.children
+ else:
+ children.append(child)
+ # Reverts UNIT rule.
+ if isinstance(node.rule, UnitSkipRule):
+ return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
+ node.rule.skipped_rules, children,
+ node.rule.weight, node.rule.alias)
+ else:
+ return RuleNode(node.rule, children)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/earley.py b/.venv/lib/python3.12/site-packages/lark/parsers/earley.py
new file mode 100644
index 00000000..5961d5d5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/earley.py
@@ -0,0 +1,326 @@
+"""This module implements an Earley parser.
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+ https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
+"""
+
+from collections import deque
+
+from ..tree import Tree
+from ..exceptions import UnexpectedEOF, UnexpectedToken
+from ..utils import logger
+from .grammar_analysis import GrammarAnalyzer
+from ..grammar import NonTerminal
+from .earley_common import Item, TransitiveItem
+from .earley_forest import ForestSumVisitor, SymbolNode, ForestToParseTree
+
+class Parser:
+ def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
+ analysis = GrammarAnalyzer(parser_conf)
+ self.parser_conf = parser_conf
+ self.resolve_ambiguity = resolve_ambiguity
+ self.debug = debug
+ self.tree_class = tree_class
+
+ self.FIRST = analysis.FIRST
+ self.NULLABLE = analysis.NULLABLE
+ self.callbacks = parser_conf.callbacks
+ self.predictions = {}
+
+ ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
+ # the slow 'isupper' in is_terminal.
+ self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
+ self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
+
+ self.forest_sum_visitor = None
+ for rule in parser_conf.rules:
+ if rule.origin not in self.predictions:
+ self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
+
+ ## Detect if any rules have priorities set. If the user specified priority = "none" then
+ # the priorities will be stripped from all rules before they reach us, allowing us to
+ # skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
+ # on any rules.
+ if self.forest_sum_visitor is None and rule.options.priority is not None:
+ self.forest_sum_visitor = ForestSumVisitor
+
+ self.term_matcher = term_matcher
+
+
+ def predict_and_complete(self, i, to_scan, columns, transitives):
+ """The core Earley Predictor and Completer.
+
+ At each stage of the input, we handling any completed items (things
+ that matched on the last cycle) and use those to predict what should
+ come next in the input stream. The completions and any predicted
+ non-terminals are recursively processed until we reach a set of,
+ which can be added to the scan list for the next scanner cycle."""
+ # Held Completions (H in E.Scotts paper).
+ node_cache = {}
+ held_completions = {}
+
+ column = columns[i]
+ # R (items) = Ei (column.items)
+ items = deque(column)
+ while items:
+ item = items.pop() # remove an element, A say, from R
+
+ ### The Earley completer
+ if item.is_complete: ### (item.s == string)
+ if item.node is None:
+ label = (item.s, item.start, i)
+ item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ item.node.add_family(item.s, item.rule, item.start, None, None)
+
+ # create_leo_transitives(item.rule.origin, item.start)
+
+ ###R Joop Leo right recursion Completer
+ if item.rule.origin in transitives[item.start]:
+ transitive = transitives[item.start][item.s]
+ if transitive.previous in transitives[transitive.column]:
+ root_transitive = transitives[transitive.column][transitive.previous]
+ else:
+ root_transitive = transitive
+
+ new_item = Item(transitive.rule, transitive.ptr, transitive.start)
+ label = (root_transitive.s, root_transitive.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_path(root_transitive, item.node)
+ if new_item.expect in self.TERMINALS:
+ # Add (B :: aC.B, h, y) to Q
+ to_scan.add(new_item)
+ elif new_item not in column:
+ # Add (B :: aC.B, h, y) to Ei and R
+ column.add(new_item)
+ items.append(new_item)
+ ###R Regular Earley completer
+ else:
+ # Empty has 0 length. If we complete an empty symbol in a particular
+ # parse step, we need to be able to use that same empty symbol to complete
+ # any predictions that result, that themselves require empty. Avoids
+ # infinite recursion on empty symbols.
+ # held_completions is 'H' in E.Scott's paper.
+ is_empty_item = item.start == i
+ if is_empty_item:
+ held_completions[item.rule.origin] = item.node
+
+ originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
+ for originator in originators:
+ new_item = originator.advance()
+ label = (new_item.s, originator.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
+ if new_item.expect in self.TERMINALS:
+ # Add (B :: aC.B, h, y) to Q
+ to_scan.add(new_item)
+ elif new_item not in column:
+ # Add (B :: aC.B, h, y) to Ei and R
+ column.add(new_item)
+ items.append(new_item)
+
+ ### The Earley predictor
+ elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
+ new_items = []
+ for rule in self.predictions[item.expect]:
+ new_item = Item(rule, 0, i)
+ new_items.append(new_item)
+
+ # Process any held completions (H).
+ if item.expect in held_completions:
+ new_item = item.advance()
+ label = (new_item.s, item.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
+ new_items.append(new_item)
+
+ for new_item in new_items:
+ if new_item.expect in self.TERMINALS:
+ to_scan.add(new_item)
+ elif new_item not in column:
+ column.add(new_item)
+ items.append(new_item)
+
+ def _parse(self, lexer, columns, to_scan, start_symbol=None):
+ def is_quasi_complete(item):
+ if item.is_complete:
+ return True
+
+ quasi = item.advance()
+ while not quasi.is_complete:
+ if quasi.expect not in self.NULLABLE:
+ return False
+ if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
+ return False
+ quasi = quasi.advance()
+ return True
+
+ def create_leo_transitives(origin, start):
+ visited = set()
+ to_create = []
+ trule = None
+ previous = None
+
+ ### Recursively walk backwards through the Earley sets until we find the
+ # first transitive candidate. If this is done continuously, we shouldn't
+ # have to walk more than 1 hop.
+ while True:
+ if origin in transitives[start]:
+ previous = trule = transitives[start][origin]
+ break
+
+ is_empty_rule = not self.FIRST[origin]
+ if is_empty_rule:
+ break
+
+ candidates = [ candidate for candidate in columns[start] if candidate.expect is not None and origin == candidate.expect ]
+ if len(candidates) != 1:
+ break
+ originator = next(iter(candidates))
+
+ if originator is None or originator in visited:
+ break
+
+ visited.add(originator)
+ if not is_quasi_complete(originator):
+ break
+
+ trule = originator.advance()
+ if originator.start != start:
+ visited.clear()
+
+ to_create.append((origin, start, originator))
+ origin = originator.rule.origin
+ start = originator.start
+
+ # If a suitable Transitive candidate is not found, bail.
+ if trule is None:
+ return
+
+ #### Now walk forwards and create Transitive Items in each set we walked through; and link
+ # each transitive item to the next set forwards.
+ while to_create:
+ origin, start, originator = to_create.pop()
+ titem = None
+ if previous is not None:
+ titem = previous.next_titem = TransitiveItem(origin, trule, originator, previous.column)
+ else:
+ titem = TransitiveItem(origin, trule, originator, start)
+ previous = transitives[start][origin] = titem
+
+
+
+ def scan(i, token, to_scan):
+ """The core Earley Scanner.
+
+ This is a custom implementation of the scanner that uses the
+ Lark lexer to match tokens. The scan list is built by the
+ Earley predictor, based on the previously completed tokens.
+ This ensures that at each phase of the parse we have a custom
+ lexer context, allowing for more complex ambiguities."""
+ next_to_scan = set()
+ next_set = set()
+ columns.append(next_set)
+ transitives.append({})
+ node_cache = {}
+
+ for item in set(to_scan):
+ if match(item.expect, token):
+ new_item = item.advance()
+ label = (new_item.s, new_item.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
+
+ if new_item.expect in self.TERMINALS:
+ # add (B ::= Aai+1.B, h, y) to Q'
+ next_to_scan.add(new_item)
+ else:
+ # add (B ::= Aa+1.B, h, y) to Ei+1
+ next_set.add(new_item)
+
+ if not next_set and not next_to_scan:
+ expect = {i.expect.name for i in to_scan}
+ raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
+
+ return next_to_scan
+
+
+ # Define parser functions
+ match = self.term_matcher
+
+ # Cache for nodes & tokens created in a particular parse step.
+ transitives = [{}]
+
+ ## The main Earley loop.
+ # Run the Prediction/Completion cycle for any Items in the current Earley set.
+ # Completions will be added to the SPPF tree, and predictions will be recursively
+ # processed down to terminals/empty nodes to be added to the scanner for the next
+ # step.
+ expects = {i.expect for i in to_scan}
+ i = 0
+ for token in lexer.lex(expects):
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ to_scan = scan(i, token, to_scan)
+ i += 1
+
+ expects.clear()
+ expects |= {i.expect for i in to_scan}
+
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ ## Column is now the final column in the parse.
+ assert i == len(columns)-1
+ return to_scan
+
+ def parse(self, lexer, start):
+ assert start, start
+ start_symbol = NonTerminal(start)
+
+ columns = [set()]
+ to_scan = set() # The scan buffer. 'Q' in E.Scott's paper.
+
+ ## Predict for the start_symbol.
+ # Add predicted items to the first Earley set (for the predictor) if they
+ # result in a non-terminal, or the scanner if they result in a terminal.
+ for rule in self.predictions[start_symbol]:
+ item = Item(rule, 0, 0)
+ if item.expect in self.TERMINALS:
+ to_scan.add(item)
+ else:
+ columns[0].add(item)
+
+ to_scan = self._parse(lexer, columns, to_scan, start_symbol)
+
+ # If the parse was successful, the start
+ # symbol should have been completed in the last step of the Earley cycle, and will be in
+ # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
+ solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
+ if not solutions:
+ expected_terminals = [t.expect.name for t in to_scan]
+ raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
+
+ if self.debug:
+ from .earley_forest import ForestToPyDotVisitor
+ try:
+ debug_walker = ForestToPyDotVisitor()
+ except ImportError:
+ logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+ else:
+ debug_walker.visit(solutions[0], "sppf.png")
+
+
+ if len(solutions) > 1:
+ assert False, 'Earley should not generate multiple start symbol items!'
+
+ if self.tree_class is not None:
+ # Perform our SPPF -> AST conversion
+ transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
+ return transformer.transform(solutions[0])
+
+ # return the root of the SPPF
+ return solutions[0]
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py b/.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py
new file mode 100644
index 00000000..6bd614ba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py
@@ -0,0 +1,75 @@
+"This module implements an Earley Parser"
+
+# The parser uses a parse-forest to keep track of derivations and ambiguations.
+# When the parse ends successfully, a disambiguation stage resolves all ambiguity
+# (right now ambiguity resolution is not developed beyond the needs of lark)
+# Afterwards the parse tree is reduced (transformed) according to user callbacks.
+# I use the no-recursion version of Transformer, because the tree might be
+# deeper than Python's recursion limit (a bit absurd, but that's life)
+#
+# The algorithm keeps track of each state set, using a corresponding Column instance.
+# Column keeps track of new items using NewsList instances.
+#
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+
+from ..grammar import NonTerminal, Terminal
+
+class Item(object):
+ "An Earley Item, the atom of the algorithm."
+
+ __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
+ def __init__(self, rule, ptr, start):
+ self.is_complete = len(rule.expansion) == ptr
+ self.rule = rule # rule
+ self.ptr = ptr # ptr
+ self.start = start # j
+ self.node = None # w
+ if self.is_complete:
+ self.s = rule.origin
+ self.expect = None
+ self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+ else:
+ self.s = (rule, ptr)
+ self.expect = rule.expansion[ptr]
+ self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+ self._hash = hash((self.s, self.start))
+
+ def advance(self):
+ return Item(self.rule, self.ptr + 1, self.start)
+
+ def __eq__(self, other):
+ return self is other or (self.s == other.s and self.start == other.start)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
+ after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
+ symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
+ return '%s (%d)' % (symbol, self.start)
+
+
+class TransitiveItem(Item):
+ __slots__ = ('recognized', 'reduction', 'column', 'next_titem')
+ def __init__(self, recognized, trule, originator, start):
+ super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
+ self.recognized = recognized
+ self.reduction = originator
+ self.column = start
+ self.next_titem = None
+ self._hash = hash((self.s, self.start, self.recognized))
+
+ def __eq__(self, other):
+ if not isinstance(other, TransitiveItem):
+ return False
+ return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
+ after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
+ return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py b/.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py
new file mode 100644
index 00000000..f39f4eb1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py
@@ -0,0 +1,790 @@
+""""This module implements an SPPF implementation
+
+This is used as the primary output mechanism for the Earley parser
+in order to store complex ambiguities.
+
+Full reference and more details is here:
+http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+"""
+
+from random import randint
+from math import isinf
+from collections import deque
+from operator import attrgetter
+from importlib import import_module
+from functools import partial
+
+from ..parse_tree_builder import AmbiguousIntermediateExpander
+from ..visitors import Discard
+from ..lexer import Token
+from ..utils import logger
+from ..tree import Tree
+
+class ForestNode(object):
+ pass
+
+class SymbolNode(ForestNode):
+ """
+ A Symbol Node represents a symbol (or Intermediate LR0).
+
+ Symbol nodes are keyed by the symbol (s). For intermediate nodes
+ s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
+ nodes, s will be a string representing the non-terminal origin (i.e.
+ the left hand side of the rule).
+
+ The children of a Symbol or Intermediate Node will always be Packed Nodes;
+ with each Packed Node child representing a single derivation of a production.
+
+ Hence a Symbol Node with a single child is unambiguous.
+
+ :ivar s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
+ :ivar start: The index of the start of the substring matched by this
+ symbol (inclusive).
+ :ivar end: The index of the end of the substring matched by this
+ symbol (exclusive).
+ :ivar is_intermediate: True if this node is an intermediate node.
+ :ivar priority: The priority of the node's symbol.
+ """
+ __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
+ def __init__(self, s, start, end):
+ self.s = s
+ self.start = start
+ self.end = end
+ self._children = set()
+ self.paths = set()
+ self.paths_loaded = False
+
+ ### We use inf here as it can be safely negated without resorting to conditionals,
+ # unlike None or float('NaN'), and sorts appropriately.
+ self.priority = float('-inf')
+ self.is_intermediate = isinstance(s, tuple)
+ self._hash = hash((self.s, self.start, self.end))
+
+ def add_family(self, lr0, rule, start, left, right):
+ self._children.add(PackedNode(self, lr0, rule, start, left, right))
+
+ def add_path(self, transitive, node):
+ self.paths.add((transitive, node))
+
+ def load_paths(self):
+ for transitive, node in self.paths:
+ if transitive.next_titem is not None:
+ vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end)
+ vn.add_path(transitive.next_titem, node)
+ self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
+ else:
+ self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
+ self.paths_loaded = True
+
+ @property
+ def is_ambiguous(self):
+ """Returns True if this node is ambiguous."""
+ return len(self.children) > 1
+
+ @property
+ def children(self):
+ """Returns a list of this node's children sorted from greatest to
+ least priority."""
+ if not self.paths_loaded: self.load_paths()
+ return sorted(self._children, key=attrgetter('sort_key'))
+
+ def __iter__(self):
+ return iter(self._children)
+
+ def __eq__(self, other):
+ if not isinstance(other, SymbolNode):
+ return False
+ return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ if self.is_intermediate:
+ rule = self.s[0]
+ ptr = self.s[1]
+ before = ( expansion.name for expansion in rule.expansion[:ptr] )
+ after = ( expansion.name for expansion in rule.expansion[ptr:] )
+ symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+ else:
+ symbol = self.s.name
+ return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
+
+class PackedNode(ForestNode):
+ """
+ A Packed Node represents a single derivation in a symbol node.
+
+ :ivar rule: The rule associated with this node.
+ :ivar parent: The parent of this node.
+ :ivar left: The left child of this node. ``None`` if one does not exist.
+ :ivar right: The right child of this node. ``None`` if one does not exist.
+ :ivar priority: The priority of this node.
+ """
+ __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
+ def __init__(self, parent, s, rule, start, left, right):
+ self.parent = parent
+ self.s = s
+ self.start = start
+ self.rule = rule
+ self.left = left
+ self.right = right
+ self.priority = float('-inf')
+ self._hash = hash((self.left, self.right))
+
+ @property
+ def is_empty(self):
+ return self.left is None and self.right is None
+
+ @property
+ def sort_key(self):
+ """
+ Used to sort PackedNode children of SymbolNodes.
+ A SymbolNode has multiple PackedNodes if it matched
+ ambiguously. Hence, we use the sort order to identify
+ the order in which ambiguous children should be considered.
+ """
+ return self.is_empty, -self.priority, self.rule.order
+
+ @property
+ def children(self):
+ """Returns a list of this node's children."""
+ return [x for x in [self.left, self.right] if x is not None]
+
+ def __iter__(self):
+ yield self.left
+ yield self.right
+
+ def __eq__(self, other):
+ if not isinstance(other, PackedNode):
+ return False
+ return self is other or (self.left == other.left and self.right == other.right)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ if isinstance(self.s, tuple):
+ rule = self.s[0]
+ ptr = self.s[1]
+ before = ( expansion.name for expansion in rule.expansion[:ptr] )
+ after = ( expansion.name for expansion in rule.expansion[ptr:] )
+ symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+ else:
+ symbol = self.s.name
+ return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
+
+class ForestVisitor(object):
+ """
+ An abstract base class for building forest visitors.
+
+ This class performs a controllable depth-first walk of an SPPF.
+ The visitor will not enter cycles and will backtrack if one is encountered.
+ Subclasses are notified of cycles through the ``on_cycle`` method.
+
+ Behavior for visit events is defined by overriding the
+ ``visit*node*`` functions.
+
+ The walk is controlled by the return values of the ``visit*node_in``
+ methods. Returning a node(s) will schedule them to be visited. The visitor
+ will begin to backtrack if no nodes are returned.
+
+ :ivar single_visit: If ``True``, non-Token nodes will only be visited once.
+ """
+
+ def __init__(self, single_visit=False):
+ self.single_visit = single_visit
+
+ def visit_token_node(self, node):
+ """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
+ pass
+
+ def visit_symbol_node_in(self, node):
+ """Called when a symbol node is visited. Nodes that are returned
+ will be scheduled to be visited. If ``visit_intermediate_node_in``
+ is not implemented, this function will be called for intermediate
+ nodes as well."""
+ pass
+
+ def visit_symbol_node_out(self, node):
+ """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
+ call have been visited. If ``visit_intermediate_node_out``
+ is not implemented, this function will be called for intermediate
+ nodes as well."""
+ pass
+
+ def visit_packed_node_in(self, node):
+ """Called when a packed node is visited. Nodes that are returned
+ will be scheduled to be visited. """
+ pass
+
+ def visit_packed_node_out(self, node):
+ """Called after all nodes returned from a corresponding ``visit_packed_node_in``
+ call have been visited."""
+ pass
+
+ def on_cycle(self, node, path):
+ """Called when a cycle is encountered.
+
+ :param node: The node that causes a cycle.
+ :param path: The list of nodes being visited: nodes that have been
+ entered but not exited. The first element is the root in a forest
+ visit, and the last element is the node visited most recently.
+ ``path`` should be treated as read-only.
+ """
+ pass
+
+ def get_cycle_in_path(self, node, path):
+ """A utility function for use in ``on_cycle`` to obtain a slice of
+ ``path`` that only contains the nodes that make up the cycle."""
+ index = len(path) - 1
+ while id(path[index]) != id(node):
+ index -= 1
+ return path[index:]
+
+ def visit(self, root):
+ # Visiting is a list of IDs of all symbol/intermediate nodes currently in
+ # the stack. It serves two purposes: to detect when we 'recurse' in and out
+ # of a symbol/intermediate so that we can process both up and down. Also,
+ # since the SPPF can have cycles it allows us to detect if we're trying
+ # to recurse into a node that's already on the stack (infinite recursion).
+ visiting = set()
+
+ # set of all nodes that have been visited
+ visited = set()
+
+ # a list of nodes that are currently being visited
+ # used for the `on_cycle` callback
+ path = []
+
+ # We do not use recursion here to walk the Forest due to the limited
+ # stack size in python. Therefore input_stack is essentially our stack.
+ input_stack = deque([root])
+
+ # It is much faster to cache these as locals since they are called
+ # many times in large parses.
+ vpno = getattr(self, 'visit_packed_node_out')
+ vpni = getattr(self, 'visit_packed_node_in')
+ vsno = getattr(self, 'visit_symbol_node_out')
+ vsni = getattr(self, 'visit_symbol_node_in')
+ vino = getattr(self, 'visit_intermediate_node_out', vsno)
+ vini = getattr(self, 'visit_intermediate_node_in', vsni)
+ vtn = getattr(self, 'visit_token_node')
+ oc = getattr(self, 'on_cycle')
+
+ while input_stack:
+ current = next(reversed(input_stack))
+ try:
+ next_node = next(current)
+ except StopIteration:
+ input_stack.pop()
+ continue
+ except TypeError:
+ ### If the current object is not an iterator, pass through to Token/SymbolNode
+ pass
+ else:
+ if next_node is None:
+ continue
+
+ if id(next_node) in visiting:
+ oc(next_node, path)
+ continue
+
+ input_stack.append(next_node)
+ continue
+
+ if not isinstance(current, ForestNode):
+ vtn(current)
+ input_stack.pop()
+ continue
+
+ current_id = id(current)
+ if current_id in visiting:
+ if isinstance(current, PackedNode):
+ vpno(current)
+ elif current.is_intermediate:
+ vino(current)
+ else:
+ vsno(current)
+ input_stack.pop()
+ path.pop()
+ visiting.remove(current_id)
+ visited.add(current_id)
+ elif self.single_visit and current_id in visited:
+ input_stack.pop()
+ else:
+ visiting.add(current_id)
+ path.append(current)
+ if isinstance(current, PackedNode):
+ next_node = vpni(current)
+ elif current.is_intermediate:
+ next_node = vini(current)
+ else:
+ next_node = vsni(current)
+ if next_node is None:
+ continue
+
+ if not isinstance(next_node, ForestNode) and \
+ not isinstance(next_node, Token):
+ next_node = iter(next_node)
+ elif id(next_node) in visiting:
+ oc(next_node, path)
+ continue
+
+ input_stack.append(next_node)
+
+class ForestTransformer(ForestVisitor):
+ """The base class for a bottom-up forest transformation. Most users will
+ want to use ``TreeForestTransformer`` instead as it has a friendlier
+ interface and covers most use cases.
+
+ Transformations are applied via inheritance and overriding of the
+ ``transform*node`` methods.
+
+ ``transform_token_node`` receives a ``Token`` as an argument.
+ All other methods receive the node that is being transformed and
+ a list of the results of the transformations of that node's children.
+ The return value of these methods are the resulting transformations.
+
+ If ``Discard`` is raised in a node's transformation, no data from that node
+ will be passed to its parent's transformation.
+ """
+
+ def __init__(self):
+ super(ForestTransformer, self).__init__()
+ # results of transformations
+ self.data = dict()
+ # used to track parent nodes
+ self.node_stack = deque()
+
+ def transform(self, root):
+ """Perform a transformation on an SPPF."""
+ self.node_stack.append('result')
+ self.data['result'] = []
+ self.visit(root)
+ assert len(self.data['result']) <= 1
+ if self.data['result']:
+ return self.data['result'][0]
+
+ def transform_symbol_node(self, node, data):
+ """Transform a symbol node."""
+ return node
+
+ def transform_intermediate_node(self, node, data):
+ """Transform an intermediate node."""
+ return node
+
+ def transform_packed_node(self, node, data):
+ """Transform a packed node."""
+ return node
+
+ def transform_token_node(self, node):
+ """Transform a ``Token``."""
+ return node
+
+ def visit_symbol_node_in(self, node):
+ self.node_stack.append(id(node))
+ self.data[id(node)] = []
+ return node.children
+
+ def visit_packed_node_in(self, node):
+ self.node_stack.append(id(node))
+ self.data[id(node)] = []
+ return node.children
+
+ def visit_token_node(self, node):
+ try:
+ transformed = self.transform_token_node(node)
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+
+ def visit_symbol_node_out(self, node):
+ self.node_stack.pop()
+ try:
+ transformed = self.transform_symbol_node(node, self.data[id(node)])
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+ finally:
+ del self.data[id(node)]
+
+ def visit_intermediate_node_out(self, node):
+ self.node_stack.pop()
+ try:
+ transformed = self.transform_intermediate_node(node, self.data[id(node)])
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+ finally:
+ del self.data[id(node)]
+
+ def visit_packed_node_out(self, node):
+ self.node_stack.pop()
+ try:
+ transformed = self.transform_packed_node(node, self.data[id(node)])
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+ finally:
+ del self.data[id(node)]
+
+class ForestSumVisitor(ForestVisitor):
+ """
+ A visitor for prioritizing ambiguous parts of the Forest.
+
+ This visitor is used when support for explicit priorities on
+ rules is requested (whether normal, or invert). It walks the
+ forest (or subsets thereof) and cascades properties upwards
+ from the leaves.
+
+ It would be ideal to do this during parsing, however this would
+ require processing each Earley item multiple times. That's
+ a big performance drawback; so running a forest walk is the
+ lesser of two evils: there can be significantly more Earley
+ items created during parsing than there are SPPF nodes in the
+ final tree.
+ """
+ def __init__(self):
+ super(ForestSumVisitor, self).__init__(single_visit=True)
+
+ def visit_packed_node_in(self, node):
+ yield node.left
+ yield node.right
+
+ def visit_symbol_node_in(self, node):
+ return iter(node.children)
+
+ def visit_packed_node_out(self, node):
+ priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
+ priority += getattr(node.right, 'priority', 0)
+ priority += getattr(node.left, 'priority', 0)
+ node.priority = priority
+
+ def visit_symbol_node_out(self, node):
+ node.priority = max(child.priority for child in node.children)
+
+class PackedData():
+ """Used in transformationss of packed nodes to distinguish the data
+ that comes from the left child and the right child.
+ """
+
+ class _NoData():
+ pass
+
+ NO_DATA = _NoData()
+
+ def __init__(self, node, data):
+ self.left = self.NO_DATA
+ self.right = self.NO_DATA
+ if data:
+ if node.left is not None:
+ self.left = data[0]
+ if len(data) > 1:
+ self.right = data[1]
+ else:
+ self.right = data[0]
+
+class ForestToParseTree(ForestTransformer):
+ """Used by the earley parser when ambiguity equals 'resolve' or
+ 'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
+
+ tree_class: The tree class to use for construction
+ callbacks: A dictionary of rules to functions that output a tree
+ prioritizer: A ``ForestVisitor`` that manipulates the priorities of
+ ForestNodes
+ resolve_ambiguity: If True, ambiguities will be resolved based on
+ priorities. Otherwise, `_ambig` nodes will be in the resulting
+ tree.
+ use_cache: If True, the results of packed node transformations will be
+ cached.
+ """
+
+ def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
+ super(ForestToParseTree, self).__init__()
+ self.tree_class = tree_class
+ self.callbacks = callbacks
+ self.prioritizer = prioritizer
+ self.resolve_ambiguity = resolve_ambiguity
+ self._use_cache = use_cache
+ self._cache = {}
+ self._on_cycle_retreat = False
+ self._cycle_node = None
+ self._successful_visits = set()
+
+ def visit(self, root):
+ if self.prioritizer:
+ self.prioritizer.visit(root)
+ super(ForestToParseTree, self).visit(root)
+ self._cache = {}
+
+ def on_cycle(self, node, path):
+ logger.debug("Cycle encountered in the SPPF at node: %s. "
+ "As infinite ambiguities cannot be represented in a tree, "
+ "this family of derivations will be discarded.", node)
+ self._cycle_node = node
+ self._on_cycle_retreat = True
+
+ def _check_cycle(self, node):
+ if self._on_cycle_retreat:
+ if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
+ self._cycle_node = None
+ self._on_cycle_retreat = False
+ return
+ raise Discard()
+
+ def _collapse_ambig(self, children):
+ new_children = []
+ for child in children:
+ if hasattr(child, 'data') and child.data == '_ambig':
+ new_children += child.children
+ else:
+ new_children.append(child)
+ return new_children
+
+ def _call_rule_func(self, node, data):
+ # called when transforming children of symbol nodes
+ # data is a list of trees or tokens that correspond to the
+ # symbol's rule expansion
+ return self.callbacks[node.rule](data)
+
+ def _call_ambig_func(self, node, data):
+ # called when transforming a symbol node
+ # data is a list of trees where each tree's data is
+ # equal to the name of the symbol or one of its aliases.
+ if len(data) > 1:
+ return self.tree_class('_ambig', data)
+ elif data:
+ return data[0]
+ raise Discard()
+
+ def transform_symbol_node(self, node, data):
+ if id(node) not in self._successful_visits:
+ raise Discard()
+ self._check_cycle(node)
+ self._successful_visits.remove(id(node))
+ data = self._collapse_ambig(data)
+ return self._call_ambig_func(node, data)
+
+ def transform_intermediate_node(self, node, data):
+ if id(node) not in self._successful_visits:
+ raise Discard()
+ self._check_cycle(node)
+ self._successful_visits.remove(id(node))
+ if len(data) > 1:
+ children = [self.tree_class('_inter', c) for c in data]
+ return self.tree_class('_iambig', children)
+ return data[0]
+
+ def transform_packed_node(self, node, data):
+ self._check_cycle(node)
+ if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
+ raise Discard()
+ if self._use_cache and id(node) in self._cache:
+ return self._cache[id(node)]
+ children = []
+ assert len(data) <= 2
+ data = PackedData(node, data)
+ if data.left is not PackedData.NO_DATA:
+ if node.left.is_intermediate and isinstance(data.left, list):
+ children += data.left
+ else:
+ children.append(data.left)
+ if data.right is not PackedData.NO_DATA:
+ children.append(data.right)
+ if node.parent.is_intermediate:
+ return self._cache.setdefault(id(node), children)
+ return self._cache.setdefault(id(node), self._call_rule_func(node, children))
+
+ def visit_symbol_node_in(self, node):
+ super(ForestToParseTree, self).visit_symbol_node_in(node)
+ if self._on_cycle_retreat:
+ return
+ return node.children
+
+ def visit_packed_node_in(self, node):
+ self._on_cycle_retreat = False
+ to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
+ if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
+ if not self._use_cache or id(node) not in self._cache:
+ return to_visit
+
+ def visit_packed_node_out(self, node):
+ super(ForestToParseTree, self).visit_packed_node_out(node)
+ if not self._on_cycle_retreat:
+ self._successful_visits.add(id(node.parent))
+
+def handles_ambiguity(func):
+ """Decorator for methods of subclasses of ``TreeForestTransformer``.
+ Denotes that the method should receive a list of transformed derivations."""
+ func.handles_ambiguity = True
+ return func
+
+class TreeForestTransformer(ForestToParseTree):
+ """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
+ By default, it will construct a tree.
+
+ Methods provided via inheritance are called based on the rule/symbol
+ names of nodes in the forest.
+
+ Methods that act on rules will receive a list of the results of the
+ transformations of the rule's children. By default, trees and tokens.
+
+ Methods that act on tokens will receive a token.
+
+ Alternatively, methods that act on rules may be annotated with
+ ``handles_ambiguity``. In this case, the function will receive a list
+ of all the transformations of all the derivations of the rule.
+ By default, a list of trees where each tree.data is equal to the
+ rule name or one of its aliases.
+
+ Non-tree transformations are made possible by override of
+ ``__default__``, ``__default_token__``, and ``__default_ambig__``.
+
+ .. note::
+
+ Tree shaping features such as inlined rules and token filtering are
+ not built into the transformation. Positions are also not
+ propagated.
+
+ :param tree_class: The tree class to use for construction
+ :param prioritizer: A ``ForestVisitor`` that manipulates the priorities of
+ nodes in the SPPF.
+ :param resolve_ambiguity: If True, ambiguities will be resolved based on
+ priorities.
+ :param use_cache: If True, caches the results of some transformations,
+ potentially improving performance when ``resolve_ambiguity==False``.
+ Only use if you know what you are doing: i.e. All transformation
+ functions are pure and referentially transparent.
+ """
+
+ def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
+ super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
+
+ def __default__(self, name, data):
+ """Default operation on tree (for override).
+
+ Returns a tree with name with data as children.
+ """
+ return self.tree_class(name, data)
+
+ def __default_ambig__(self, name, data):
+ """Default operation on ambiguous rule (for override).
+
+ Wraps data in an '_ambig_' node if it contains more than
+ one element.
+ """
+ if len(data) > 1:
+ return self.tree_class('_ambig', data)
+ elif data:
+ return data[0]
+ raise Discard()
+
+ def __default_token__(self, node):
+ """Default operation on ``Token`` (for override).
+
+ Returns ``node``.
+ """
+ return node
+
+ def transform_token_node(self, node):
+ return getattr(self, node.type, self.__default_token__)(node)
+
+ def _call_rule_func(self, node, data):
+ name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
+ user_func = getattr(self, name, self.__default__)
+ if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
+ user_func = partial(self.__default__, name)
+ if not self.resolve_ambiguity:
+ wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
+ user_func = wrapper(user_func)
+ return user_func(data)
+
+ def _call_ambig_func(self, node, data):
+ name = node.s.name
+ user_func = getattr(self, name, self.__default_ambig__)
+ if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
+ user_func = partial(self.__default_ambig__, name)
+ return user_func(data)
+
+class ForestToPyDotVisitor(ForestVisitor):
+ """
+ A Forest visitor which writes the SPPF to a PNG.
+
+ The SPPF can get really large, really quickly because
+ of the amount of meta-data it stores, so this is probably
+ only useful for trivial trees and learning how the SPPF
+ is structured.
+ """
+ def __init__(self, rankdir="TB"):
+ super(ForestToPyDotVisitor, self).__init__(single_visit=True)
+ self.pydot = import_module('pydot')
+ self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
+
+ def visit(self, root, filename):
+ super(ForestToPyDotVisitor, self).visit(root)
+ try:
+ self.graph.write_png(filename)
+ except FileNotFoundError as e:
+ logger.error("Could not write png: ", e)
+
+ def visit_token_node(self, node):
+ graph_node_id = str(id(node))
+ graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
+ graph_node_color = 0x808080
+ graph_node_style = "\"filled,rounded\""
+ graph_node_shape = "diamond"
+ graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+ self.graph.add_node(graph_node)
+
+ def visit_packed_node_in(self, node):
+ graph_node_id = str(id(node))
+ graph_node_label = repr(node)
+ graph_node_color = 0x808080
+ graph_node_style = "filled"
+ graph_node_shape = "diamond"
+ graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+ self.graph.add_node(graph_node)
+ yield node.left
+ yield node.right
+
+ def visit_packed_node_out(self, node):
+ graph_node_id = str(id(node))
+ graph_node = self.graph.get_node(graph_node_id)[0]
+ for child in [node.left, node.right]:
+ if child is not None:
+ child_graph_node_id = str(id(child))
+ child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+ self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
+ else:
+ #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
+ child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
+ child_graph_node_style = "invis"
+ child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
+ child_edge_style = "invis"
+ self.graph.add_node(child_graph_node)
+ self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
+
+ def visit_symbol_node_in(self, node):
+ graph_node_id = str(id(node))
+ graph_node_label = repr(node)
+ graph_node_color = 0x808080
+ graph_node_style = "\"filled\""
+ if node.is_intermediate:
+ graph_node_shape = "ellipse"
+ else:
+ graph_node_shape = "rectangle"
+ graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+ self.graph.add_node(graph_node)
+ return iter(node.children)
+
+ def visit_symbol_node_out(self, node):
+ graph_node_id = str(id(node))
+ graph_node = self.graph.get_node(graph_node_id)[0]
+ for child in node.children:
+ child_graph_node_id = str(id(child))
+ child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+ self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py b/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py
new file mode 100644
index 00000000..737cb02a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py
@@ -0,0 +1,185 @@
+from collections import Counter, defaultdict
+
+from ..utils import bfs, fzset, classify
+from ..exceptions import GrammarError
+from ..grammar import Rule, Terminal, NonTerminal
+
+
+class RulePtr(object):
+ __slots__ = ('rule', 'index')
+
+ def __init__(self, rule, index):
+ assert isinstance(rule, Rule)
+ assert index <= len(rule.expansion)
+ self.rule = rule
+ self.index = index
+
+ def __repr__(self):
+ before = [x.name for x in self.rule.expansion[:self.index]]
+ after = [x.name for x in self.rule.expansion[self.index:]]
+ return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
+
+ @property
+ def next(self):
+ return self.rule.expansion[self.index]
+
+ def advance(self, sym):
+ assert self.next == sym
+ return RulePtr(self.rule, self.index+1)
+
+ @property
+ def is_satisfied(self):
+ return self.index == len(self.rule.expansion)
+
+ def __eq__(self, other):
+ return self.rule == other.rule and self.index == other.index
+ def __hash__(self):
+ return hash((self.rule, self.index))
+
+
+# state generation ensures no duplicate LR0ItemSets
+class LR0ItemSet(object):
+ __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
+
+ def __init__(self, kernel, closure):
+ self.kernel = fzset(kernel)
+ self.closure = fzset(closure)
+ self.transitions = {}
+ self.lookaheads = defaultdict(set)
+
+ def __repr__(self):
+ return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
+
+
+def update_set(set1, set2):
+ if not set2 or set1 > set2:
+ return False
+
+ copy = set(set1)
+ set1 |= set2
+ return set1 != copy
+
+def calculate_sets(rules):
+ """Calculate FOLLOW sets.
+
+ Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
+ symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
+
+ # foreach grammar rule X ::= Y(1) ... Y(k)
+ # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
+ # NULLABLE = NULLABLE union {X}
+ # for i = 1 to k
+ # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
+ # FIRST(X) = FIRST(X) union FIRST(Y(i))
+ # for j = i+1 to k
+ # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
+ # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
+ # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
+ # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
+ # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
+
+ NULLABLE = set()
+ FIRST = {}
+ FOLLOW = {}
+ for sym in symbols:
+ FIRST[sym]={sym} if sym.is_term else set()
+ FOLLOW[sym]=set()
+
+ # Calculate NULLABLE and FIRST
+ changed = True
+ while changed:
+ changed = False
+
+ for rule in rules:
+ if set(rule.expansion) <= NULLABLE:
+ if update_set(NULLABLE, {rule.origin}):
+ changed = True
+
+ for i, sym in enumerate(rule.expansion):
+ if set(rule.expansion[:i]) <= NULLABLE:
+ if update_set(FIRST[rule.origin], FIRST[sym]):
+ changed = True
+ else:
+ break
+
+ # Calculate FOLLOW
+ changed = True
+ while changed:
+ changed = False
+
+ for rule in rules:
+ for i, sym in enumerate(rule.expansion):
+ if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
+ if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
+ changed = True
+
+ for j in range(i+1, len(rule.expansion)):
+ if set(rule.expansion[i+1:j]) <= NULLABLE:
+ if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
+ changed = True
+
+ return FIRST, FOLLOW, NULLABLE
+
+
+class GrammarAnalyzer(object):
+ def __init__(self, parser_conf, debug=False):
+ self.debug = debug
+
+ root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
+ for start in parser_conf.start}
+
+ rules = parser_conf.rules + list(root_rules.values())
+ self.rules_by_origin = classify(rules, lambda r: r.origin)
+
+ if len(rules) != len(set(rules)):
+ duplicates = [item for item, count in Counter(rules).items() if count > 1]
+ raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
+
+ for r in rules:
+ for sym in r.expansion:
+ if not (sym.is_term or sym in self.rules_by_origin):
+ raise GrammarError("Using an undefined rule: %s" % sym)
+
+ self.start_states = {start: self.expand_rule(root_rule.origin)
+ for start, root_rule in root_rules.items()}
+
+ self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
+ for start, root_rule in root_rules.items()}
+
+ lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
+ for start in parser_conf.start}
+
+ lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
+ assert(len(lr0_rules) == len(set(lr0_rules)))
+
+ self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
+
+ # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
+ self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+ for start, root_rule in lr0_root_rules.items()}
+
+ self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
+
+ def expand_rule(self, source_rule, rules_by_origin=None):
+ "Returns all init_ptrs accessible by rule (recursive)"
+
+ if rules_by_origin is None:
+ rules_by_origin = self.rules_by_origin
+
+ init_ptrs = set()
+ def _expand_rule(rule):
+ assert not rule.is_term, rule
+
+ for r in rules_by_origin[rule]:
+ init_ptr = RulePtr(r, 0)
+ init_ptrs.add(init_ptr)
+
+ if r.expansion: # if not empty rule
+ new_r = init_ptr.next
+ if not new_r.is_term:
+ yield new_r
+
+ for _ in bfs([source_rule], _expand_rule):
+ pass
+
+ return fzset(init_ptrs)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py
new file mode 100644
index 00000000..f6a993b9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py
@@ -0,0 +1,304 @@
+"""This module builds a LALR(1) transition-table for lalr_parser.py
+
+For now, shift/reduce conflicts are automatically resolved as shifts.
+"""
+
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+
+from collections import defaultdict
+
+from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
+from ..exceptions import GrammarError
+
+from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
+from ..grammar import Rule
+
+###{standalone
+
+class Action:
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return self.name
+ def __repr__(self):
+ return str(self)
+
+Shift = Action('Shift')
+Reduce = Action('Reduce')
+
+
+class ParseTable:
+ def __init__(self, states, start_states, end_states):
+ self.states = states
+ self.start_states = start_states
+ self.end_states = end_states
+
+ def serialize(self, memo):
+ tokens = Enumerator()
+ rules = Enumerator()
+
+ states = {
+ state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
+ for token, (action, arg) in actions.items()}
+ for state, actions in self.states.items()
+ }
+
+ return {
+ 'tokens': tokens.reversed(),
+ 'states': states,
+ 'start_states': self.start_states,
+ 'end_states': self.end_states,
+ }
+
+ @classmethod
+ def deserialize(cls, data, memo):
+ tokens = data['tokens']
+ states = {
+ state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
+ for token, (action, arg) in actions.items()}
+ for state, actions in data['states'].items()
+ }
+ return cls(states, data['start_states'], data['end_states'])
+
+
+class IntParseTable(ParseTable):
+
+ @classmethod
+ def from_ParseTable(cls, parse_table):
+ enum = list(parse_table.states)
+ state_to_idx = {s:i for i,s in enumerate(enum)}
+ int_states = {}
+
+ for s, la in parse_table.states.items():
+ la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
+ for k,v in la.items()}
+ int_states[ state_to_idx[s] ] = la
+
+
+ start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
+ end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
+ return cls(int_states, start_states, end_states)
+
+###}
+
+
+# digraph and traverse, see The Theory and Practice of Compiler Writing
+
+# computes F(x) = G(x) union (union { G(y) | x R y })
+# X: nodes
+# R: relation (function mapping node -> list of nodes that satisfy the relation)
+# G: set valued function
+def digraph(X, R, G):
+ F = {}
+ S = []
+ N = {}
+ for x in X:
+ N[x] = 0
+ for x in X:
+ # this is always true for the first iteration, but N[x] may be updated in traverse below
+ if N[x] == 0:
+ traverse(x, S, N, X, R, G, F)
+ return F
+
+# x: single node
+# S: stack
+# N: weights
+# X: nodes
+# R: relation (see above)
+# G: set valued function
+# F: set valued function we are computing (map of input -> output)
+def traverse(x, S, N, X, R, G, F):
+ S.append(x)
+ d = len(S)
+ N[x] = d
+ F[x] = G[x]
+ for y in R[x]:
+ if N[y] == 0:
+ traverse(y, S, N, X, R, G, F)
+ n_x = N[x]
+ assert(n_x > 0)
+ n_y = N[y]
+ assert(n_y != 0)
+ if (n_y > 0) and (n_y < n_x):
+ N[x] = n_y
+ F[x].update(F[y])
+ if N[x] == d:
+ f_x = F[x]
+ while True:
+ z = S.pop()
+ N[z] = -1
+ F[z] = f_x
+ if z == x:
+ break
+
+
+class LALR_Analyzer(GrammarAnalyzer):
+ def __init__(self, parser_conf, debug=False):
+ GrammarAnalyzer.__init__(self, parser_conf, debug)
+ self.nonterminal_transitions = []
+ self.directly_reads = defaultdict(set)
+ self.reads = defaultdict(set)
+ self.includes = defaultdict(set)
+ self.lookback = defaultdict(set)
+
+
+ def compute_lr0_states(self):
+ self.lr0_states = set()
+ # map of kernels to LR0ItemSets
+ cache = {}
+
+ def step(state):
+ _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
+
+ d = classify(unsat, lambda rp: rp.next)
+ for sym, rps in d.items():
+ kernel = fzset({rp.advance(sym) for rp in rps})
+ new_state = cache.get(kernel, None)
+ if new_state is None:
+ closure = set(kernel)
+ for rp in kernel:
+ if not rp.is_satisfied and not rp.next.is_term:
+ closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
+ new_state = LR0ItemSet(kernel, closure)
+ cache[kernel] = new_state
+
+ state.transitions[sym] = new_state
+ yield new_state
+
+ self.lr0_states.add(state)
+
+ for _ in bfs(self.lr0_start_states.values(), step):
+ pass
+
+ def compute_reads_relations(self):
+ # handle start state
+ for root in self.lr0_start_states.values():
+ assert(len(root.kernel) == 1)
+ for rp in root.kernel:
+ assert(rp.index == 0)
+ self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
+
+ for state in self.lr0_states:
+ seen = set()
+ for rp in state.closure:
+ if rp.is_satisfied:
+ continue
+ s = rp.next
+ # if s is a not a nonterminal
+ if s not in self.lr0_rules_by_origin:
+ continue
+ if s in seen:
+ continue
+ seen.add(s)
+ nt = (state, s)
+ self.nonterminal_transitions.append(nt)
+ dr = self.directly_reads[nt]
+ r = self.reads[nt]
+ next_state = state.transitions[s]
+ for rp2 in next_state.closure:
+ if rp2.is_satisfied:
+ continue
+ s2 = rp2.next
+ # if s2 is a terminal
+ if s2 not in self.lr0_rules_by_origin:
+ dr.add(s2)
+ if s2 in self.NULLABLE:
+ r.add((next_state, s2))
+
+ def compute_includes_lookback(self):
+ for nt in self.nonterminal_transitions:
+ state, nonterminal = nt
+ includes = []
+ lookback = self.lookback[nt]
+ for rp in state.closure:
+ if rp.rule.origin != nonterminal:
+ continue
+ # traverse the states for rp(.rule)
+ state2 = state
+ for i in range(rp.index, len(rp.rule.expansion)):
+ s = rp.rule.expansion[i]
+ nt2 = (state2, s)
+ state2 = state2.transitions[s]
+ if nt2 not in self.reads:
+ continue
+ for j in range(i + 1, len(rp.rule.expansion)):
+ if not rp.rule.expansion[j] in self.NULLABLE:
+ break
+ else:
+ includes.append(nt2)
+ # state2 is at the final state for rp.rule
+ if rp.index == 0:
+ for rp2 in state2.closure:
+ if (rp2.rule == rp.rule) and rp2.is_satisfied:
+ lookback.add((state2, rp2.rule))
+ for nt2 in includes:
+ self.includes[nt2].add(nt)
+
+ def compute_lookaheads(self):
+ read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
+ follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
+
+ for nt, lookbacks in self.lookback.items():
+ for state, rule in lookbacks:
+ for s in follow_sets[nt]:
+ state.lookaheads[s].add(rule)
+
+ def compute_lalr1_states(self):
+ m = {}
+ reduce_reduce = []
+ for state in self.lr0_states:
+ actions = {}
+ for la, next_state in state.transitions.items():
+ actions[la] = (Shift, next_state.closure)
+ for la, rules in state.lookaheads.items():
+ if len(rules) > 1:
+ # Try to resolve conflict based on priority
+ p = [(r.options.priority or 0, r) for r in rules]
+ p.sort(key=lambda r: r[0], reverse=True)
+ best, second_best = p[:2]
+ if best[0] > second_best[0]:
+ rules = [best[1]]
+ else:
+ reduce_reduce.append((state, la, rules))
+ if la in actions:
+ if self.debug:
+ logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+ logger.warning(' * %s', list(rules)[0])
+ else:
+ actions[la] = (Reduce, list(rules)[0])
+ m[state] = { k.name: v for k, v in actions.items() }
+
+ if reduce_reduce:
+ msgs = []
+ for state, la, rules in reduce_reduce:
+ msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
+ if self.debug:
+ msg += '\n collision occurred in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in state.closure])
+ msgs.append(msg)
+ raise GrammarError('\n\n'.join(msgs))
+
+ states = { k.closure: v for k, v in m.items() }
+
+ # compute end states
+ end_states = {}
+ for state in states:
+ for rp in state:
+ for start in self.lr0_start_states:
+ if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
+ assert(not start in end_states)
+ end_states[start] = state
+
+ _parse_table = ParseTable(states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
+
+ if self.debug:
+ self.parse_table = _parse_table
+ else:
+ self.parse_table = IntParseTable.from_ParseTable(_parse_table)
+
+ def compute_lalr(self):
+ self.compute_lr0_states()
+ self.compute_reads_relations()
+ self.compute_includes_lookback()
+ self.compute_lookaheads()
+ self.compute_lalr1_states()
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py
new file mode 100644
index 00000000..d6780cba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py
@@ -0,0 +1,132 @@
+# This module provides a LALR interactive parser, which is used for debugging and error handling
+
+from copy import copy
+
+from .. import Token
+from ..exceptions import UnexpectedToken
+
+
+class InteractiveParser(object):
+ """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
+
+ For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
+ """
+ def __init__(self, parser, parser_state, lexer_state):
+ self.parser = parser
+ self.parser_state = parser_state
+ self.lexer_state = lexer_state
+
+ def feed_token(self, token):
+ """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
+
+ Note that ``token`` has to be an instance of ``Token``.
+ """
+ return self.parser_state.feed_token(token, token.type == '$END')
+
+ def exhaust_lexer(self):
+ """Try to feed the rest of the lexer state into the interactive parser.
+
+ Note that this modifies the instance in place and does not feed an '$END' Token"""
+ for token in self.lexer_state.lex(self.parser_state):
+ self.parser_state.feed_token(token)
+
+ def feed_eof(self, last_token=None):
+ """Feed a '$END' Token. Borrows from 'last_token' if given."""
+ eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
+ return self.feed_token(eof)
+
+
+ def __copy__(self):
+ """Create a new interactive parser with a separate state.
+
+ Calls to feed_token() won't affect the old instance, and vice-versa.
+ """
+ return type(self)(
+ self.parser,
+ copy(self.parser_state),
+ copy(self.lexer_state),
+ )
+
+ def copy(self):
+ return copy(self)
+
+ def __eq__(self, other):
+ if not isinstance(other, InteractiveParser):
+ return False
+
+ return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state
+
+ def as_immutable(self):
+ """Convert to an ``ImmutableInteractiveParser``."""
+ p = copy(self)
+ return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)
+
+ def pretty(self):
+ """Print the output of ``choices()`` in a way that's easier to read."""
+ out = ["Parser choices:"]
+ for k, v in self.choices().items():
+ out.append('\t- %s -> %r' % (k, v))
+ out.append('stack size: %s' % len(self.parser_state.state_stack))
+ return '\n'.join(out)
+
+ def choices(self):
+ """Returns a dictionary of token types, matched to their action in the parser.
+
+ Only returns token types that are accepted by the current state.
+
+ Updated by ``feed_token()``.
+ """
+ return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
+
+ def accepts(self):
+ """Returns the set of possible tokens that will advance the parser into a new valid state."""
+ accepts = set()
+ for t in self.choices():
+ if t.isupper(): # is terminal?
+ new_cursor = copy(self)
+ try:
+ new_cursor.feed_token(Token(t, ''))
+ except UnexpectedToken:
+ pass
+ else:
+ accepts.add(t)
+ return accepts
+
+ def resume_parse(self):
+ """Resume automated parsing from the current state."""
+ return self.parser.parse_from_state(self.parser_state)
+
+
+
+class ImmutableInteractiveParser(InteractiveParser):
+ """Same as ``InteractiveParser``, but operations create a new instance instead
+ of changing it in-place.
+ """
+
+ result = None
+
+ def __hash__(self):
+ return hash((self.parser_state, self.lexer_state))
+
+ def feed_token(self, token):
+ c = copy(self)
+ c.result = InteractiveParser.feed_token(c, token)
+ return c
+
+ def exhaust_lexer(self):
+ """Try to feed the rest of the lexer state into the parser.
+
+ Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
+ cursor = self.as_mutable()
+ cursor.exhaust_lexer()
+ return cursor.as_immutable()
+
+ def as_mutable(self):
+ """Convert to an ``InteractiveParser``."""
+ p = copy(self)
+ return InteractiveParser(p.parser, p.parser_state, p.lexer_state)
+
+
+# Deprecated class names for the interactive parser
+ParserPuppet = InteractiveParser
+ImmutableParserPuppet = ImmutableInteractiveParser
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py
new file mode 100644
index 00000000..d916b463
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py
@@ -0,0 +1,200 @@
+"""This module implements a LALR(1) Parser
+"""
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+from copy import deepcopy, copy
+from ..exceptions import UnexpectedInput, UnexpectedToken
+from ..lexer import Token
+from ..utils import Serialize
+
+from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
+from .lalr_interactive_parser import InteractiveParser
+from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
+
+###{standalone
+
+class LALR_Parser(Serialize):
+ def __init__(self, parser_conf, debug=False):
+ analysis = LALR_Analyzer(parser_conf, debug=debug)
+ analysis.compute_lalr()
+ callbacks = parser_conf.callbacks
+
+ self._parse_table = analysis.parse_table
+ self.parser_conf = parser_conf
+ self.parser = _Parser(analysis.parse_table, callbacks, debug)
+
+ @classmethod
+ def deserialize(cls, data, memo, callbacks, debug=False):
+ inst = cls.__new__(cls)
+ inst._parse_table = IntParseTable.deserialize(data, memo)
+ inst.parser = _Parser(inst._parse_table, callbacks, debug)
+ return inst
+
+ def serialize(self, memo):
+ return self._parse_table.serialize(memo)
+
+ def parse_interactive(self, lexer, start):
+ return self.parser.parse(lexer, start, start_interactive=True)
+
+ def parse(self, lexer, start, on_error=None):
+ try:
+ return self.parser.parse(lexer, start)
+ except UnexpectedInput as e:
+ if on_error is None:
+ raise
+
+ while True:
+ if isinstance(e, UnexpectedCharacters):
+ s = e.interactive_parser.lexer_state.state
+ p = s.line_ctr.char_pos
+
+ if not on_error(e):
+ raise e
+
+ if isinstance(e, UnexpectedCharacters):
+ # If user didn't change the character position, then we should
+ if p == s.line_ctr.char_pos:
+ s.line_ctr.feed(s.text[p:p+1])
+
+ try:
+ return e.interactive_parser.resume_parse()
+ except UnexpectedToken as e2:
+ if (isinstance(e, UnexpectedToken)
+ and e.token.type == e2.token.type == '$END'
+ and e.interactive_parser == e2.interactive_parser):
+ # Prevent infinite loop
+ raise e2
+ e = e2
+ except UnexpectedCharacters as e2:
+ e = e2
+
+
+class ParseConf(object):
+ __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
+
+ def __init__(self, parse_table, callbacks, start):
+ self.parse_table = parse_table
+
+ self.start_state = self.parse_table.start_states[start]
+ self.end_state = self.parse_table.end_states[start]
+ self.states = self.parse_table.states
+
+ self.callbacks = callbacks
+ self.start = start
+
+
+class ParserState(object):
+ __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
+
+ def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
+ self.parse_conf = parse_conf
+ self.lexer = lexer
+ self.state_stack = state_stack or [self.parse_conf.start_state]
+ self.value_stack = value_stack or []
+
+ @property
+ def position(self):
+ return self.state_stack[-1]
+
+ # Necessary for match_examples() to work
+ def __eq__(self, other):
+ if not isinstance(other, ParserState):
+ return NotImplemented
+ return len(self.state_stack) == len(other.state_stack) and self.position == other.position
+
+ def __copy__(self):
+ return type(self)(
+ self.parse_conf,
+ self.lexer, # XXX copy
+ copy(self.state_stack),
+ deepcopy(self.value_stack),
+ )
+
+ def copy(self):
+ return copy(self)
+
+ def feed_token(self, token, is_end=False):
+ state_stack = self.state_stack
+ value_stack = self.value_stack
+ states = self.parse_conf.states
+ end_state = self.parse_conf.end_state
+ callbacks = self.parse_conf.callbacks
+
+ while True:
+ state = state_stack[-1]
+ try:
+ action, arg = states[state][token.type]
+ except KeyError:
+ expected = {s for s in states[state].keys() if s.isupper()}
+ raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
+
+ assert arg != end_state
+
+ if action is Shift:
+ # shift once and return
+ assert not is_end
+ state_stack.append(arg)
+ value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
+ return
+ else:
+ # reduce+shift as many times as necessary
+ rule = arg
+ size = len(rule.expansion)
+ if size:
+ s = value_stack[-size:]
+ del state_stack[-size:]
+ del value_stack[-size:]
+ else:
+ s = []
+
+ value = callbacks[rule](s)
+
+ _action, new_state = states[state_stack[-1]][rule.origin.name]
+ assert _action is Shift
+ state_stack.append(new_state)
+ value_stack.append(value)
+
+ if is_end and state_stack[-1] == end_state:
+ return value_stack[-1]
+
+class _Parser(object):
+ def __init__(self, parse_table, callbacks, debug=False):
+ self.parse_table = parse_table
+ self.callbacks = callbacks
+ self.debug = debug
+
+ def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
+ parse_conf = ParseConf(self.parse_table, self.callbacks, start)
+ parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
+ if start_interactive:
+ return InteractiveParser(self, parser_state, parser_state.lexer)
+ return self.parse_from_state(parser_state)
+
+
+ def parse_from_state(self, state):
+ # Main LALR-parser loop
+ try:
+ token = None
+ for token in state.lexer.lex(state):
+ state.feed_token(token)
+
+ end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
+ return state.feed_token(end_token, True)
+ except UnexpectedInput as e:
+ try:
+ e.interactive_parser = InteractiveParser(self, state, state.lexer)
+ except NameError:
+ pass
+ raise e
+ except Exception as e:
+ if self.debug:
+ print("")
+ print("STATE STACK DUMP")
+ print("----------------")
+ for i, s in enumerate(state.state_stack):
+ print('%d)' % i , s)
+ print("")
+
+ raise
+###}
+
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py
new file mode 100644
index 00000000..6ea6d894
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py
@@ -0,0 +1,3 @@
+# Deprecated
+
+from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet \ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/xearley.py b/.venv/lib/python3.12/site-packages/lark/parsers/xearley.py
new file mode 100644
index 00000000..335d9ad4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/xearley.py
@@ -0,0 +1,157 @@
+"""This module implements an experimental Earley parser with a dynamic lexer
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+ https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is better documented here:
+ http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+
+Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
+uses regular expressions by necessity, achieving high-performance while maintaining all of
+Earley's power in parsing any CFG.
+"""
+
+from collections import defaultdict
+
+from ..tree import Tree
+from ..exceptions import UnexpectedCharacters
+from ..lexer import Token
+from ..grammar import Terminal
+from .earley import Parser as BaseParser
+from .earley_forest import SymbolNode
+
+
+class Parser(BaseParser):
+ def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False, tree_class=Tree):
+ BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug, tree_class)
+ self.ignore = [Terminal(t) for t in ignore]
+ self.complete_lex = complete_lex
+
+ def _parse(self, stream, columns, to_scan, start_symbol=None):
+
+ def scan(i, to_scan):
+ """The core Earley Scanner.
+
+ This is a custom implementation of the scanner that uses the
+ Lark lexer to match tokens. The scan list is built by the
+ Earley predictor, based on the previously completed tokens.
+ This ensures that at each phase of the parse we have a custom
+ lexer context, allowing for more complex ambiguities."""
+
+ node_cache = {}
+
+ # 1) Loop the expectations and ask the lexer to match.
+ # Since regexp is forward looking on the input stream, and we only
+ # want to process tokens when we hit the point in the stream at which
+ # they complete, we push all tokens into a buffer (delayed_matches), to
+ # be held possibly for a later parse step when we reach the point in the
+ # input stream at which they complete.
+ for item in set(to_scan):
+ m = match(item.expect, stream, i)
+ if m:
+ t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+ delayed_matches[m.end()].append( (item, i, t) )
+
+ if self.complete_lex:
+ s = m.group(0)
+ for j in range(1, len(s)):
+ m = match(item.expect, s[:-j])
+ if m:
+ t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+ delayed_matches[i+m.end()].append( (item, i, t) )
+
+ # XXX The following 3 lines were commented out for causing a bug. See issue #768
+ # # Remove any items that successfully matched in this pass from the to_scan buffer.
+ # # This ensures we don't carry over tokens that already matched, if we're ignoring below.
+ # to_scan.remove(item)
+
+ # 3) Process any ignores. This is typically used for e.g. whitespace.
+ # We carry over any unmatched items from the to_scan buffer to be matched again after
+ # the ignore. This should allow us to use ignored symbols in non-terminals to implement
+ # e.g. mandatory spacing.
+ for x in self.ignore:
+ m = match(x, stream, i)
+ if m:
+ # Carry over any items still in the scan buffer, to past the end of the ignored items.
+ delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
+
+ # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
+ delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
+
+ next_to_scan = set()
+ next_set = set()
+ columns.append(next_set)
+ transitives.append({})
+
+ ## 4) Process Tokens from delayed_matches.
+ # This is the core of the Earley scanner. Create an SPPF node for each Token,
+ # and create the symbol node in the SPPF tree. Advance the item that completed,
+ # and add the resulting new item to either the Earley set (for processing by the
+ # completer/predictor) or the to_scan buffer for the next parse step.
+ for item, start, token in delayed_matches[i+1]:
+ if token is not None:
+ token.end_line = text_line
+ token.end_column = text_column + 1
+ token.end_pos = i + 1
+
+ new_item = item.advance()
+ label = (new_item.s, new_item.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
+ else:
+ new_item = item
+
+ if new_item.expect in self.TERMINALS:
+ # add (B ::= Aai+1.B, h, y) to Q'
+ next_to_scan.add(new_item)
+ else:
+ # add (B ::= Aa+1.B, h, y) to Ei+1
+ next_set.add(new_item)
+
+ del delayed_matches[i+1] # No longer needed, so unburden memory
+
+ if not next_set and not delayed_matches and not next_to_scan:
+ considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
+ raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
+ set(to_scan), state=frozenset(i.s for i in to_scan),
+ considered_rules=considered_rules
+ )
+
+ return next_to_scan
+
+
+ delayed_matches = defaultdict(list)
+ match = self.term_matcher
+
+ # Cache for nodes & tokens created in a particular parse step.
+ transitives = [{}]
+
+ text_line = 1
+ text_column = 1
+
+ ## The main Earley loop.
+ # Run the Prediction/Completion cycle for any Items in the current Earley set.
+ # Completions will be added to the SPPF tree, and predictions will be recursively
+ # processed down to terminals/empty nodes to be added to the scanner for the next
+ # step.
+ i = 0
+ for token in stream:
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ to_scan = scan(i, to_scan)
+
+ if token == '\n':
+ text_line += 1
+ text_column = 1
+ else:
+ text_column += 1
+ i += 1
+
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ ## Column is now the final column in the parse.
+ assert i == len(columns)-1
+ return to_scan
diff --git a/.venv/lib/python3.12/site-packages/lark/reconstruct.py b/.venv/lib/python3.12/site-packages/lark/reconstruct.py
new file mode 100644
index 00000000..ab2fb38b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/reconstruct.py
@@ -0,0 +1,101 @@
+"""Reconstruct text from a tree, based on Lark grammar"""
+
+import unicodedata
+
+from .tree import Tree
+from .visitors import Transformer_InPlace
+from .lexer import Token, PatternStr
+from .grammar import Terminal, NonTerminal
+
+from .tree_matcher import TreeMatcher, is_discarded_terminal
+from .utils import is_id_continue
+
+def is_iter_empty(i):
+ try:
+ _ = next(i)
+ return False
+ except StopIteration:
+ return True
+
+
+class WriteTokensTransformer(Transformer_InPlace):
+ "Inserts discarded tokens into their correct place, according to the rules of grammar"
+
+ def __init__(self, tokens, term_subs):
+ self.tokens = tokens
+ self.term_subs = term_subs
+
+ def __default__(self, data, children, meta):
+ if not getattr(meta, 'match_tree', False):
+ return Tree(data, children)
+
+ iter_args = iter(children)
+ to_write = []
+ for sym in meta.orig_expansion:
+ if is_discarded_terminal(sym):
+ try:
+ v = self.term_subs[sym.name](sym)
+ except KeyError:
+ t = self.tokens[sym.name]
+ if not isinstance(t.pattern, PatternStr):
+ raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
+
+ v = t.pattern.value
+ to_write.append(v)
+ else:
+ x = next(iter_args)
+ if isinstance(x, list):
+ to_write += x
+ else:
+ if isinstance(x, Token):
+ assert Terminal(x.type) == sym, x
+ else:
+ assert NonTerminal(x.data) == sym, (sym, x)
+ to_write.append(x)
+
+ assert is_iter_empty(iter_args)
+ return to_write
+
+
+class Reconstructor(TreeMatcher):
+ """
+ A Reconstructor that will, given a full parse Tree, generate source code.
+
+ Note:
+ The reconstructor cannot generate values from regexps. If you need to produce discarded
+ regexes, such as newlines, use `term_subs` and provide default values for them.
+
+ Paramters:
+ parser: a Lark instance
+ term_subs: a dictionary of [Terminal name as str] to [output text as str]
+ """
+
+ def __init__(self, parser, term_subs=None):
+ TreeMatcher.__init__(self, parser)
+
+ self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
+
+ def _reconstruct(self, tree):
+ unreduced_tree = self.match_tree(tree, tree.data)
+
+ res = self.write_tokens.transform(unreduced_tree)
+ for item in res:
+ if isinstance(item, Tree):
+ # TODO use orig_expansion.rulename to support templates
+ for x in self._reconstruct(item):
+ yield x
+ else:
+ yield item
+
+ def reconstruct(self, tree, postproc=None, insert_spaces=True):
+ x = self._reconstruct(tree)
+ if postproc:
+ x = postproc(x)
+ y = []
+ prev_item = ''
+ for item in x:
+ if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
+ y.append(' ')
+ y.append(item)
+ prev_item = item
+ return ''.join(y)
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/__init__.py b/.venv/lib/python3.12/site-packages/lark/tools/__init__.py
new file mode 100644
index 00000000..4ecf13d4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/__init__.py
@@ -0,0 +1,65 @@
+import sys
+from argparse import ArgumentParser, FileType
+try:
+ from textwrap import indent
+except ImportError:
+ def indent(text, prefix):
+ return ''.join(prefix + line for line in text.splitlines(True))
+from logging import DEBUG, INFO, WARN, ERROR
+import warnings
+
+from lark import Lark, logger
+
+lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')
+
+flags = [
+ ('d', 'debug'),
+ 'keep_all_tokens',
+ 'regex',
+ 'propagate_positions',
+ 'maybe_placeholders',
+ 'use_bytes'
+]
+
+options = ['start', 'lexer']
+
+lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
+lalr_argparser.add_argument('-s', '--start', action='append', default=[])
+lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual'))
+k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {}
+lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)')
+lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file')
+
+for f in flags:
+ if isinstance(f, tuple):
+ options.append(f[1])
+ lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true')
+ else:
+ options.append(f)
+ lalr_argparser.add_argument('--' + f, action='store_true')
+
+
+def build_lalr(namespace):
+ logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)])
+ if len(namespace.start) == 0:
+ namespace.start.append('start')
+ kwargs = {n: getattr(namespace, n) for n in options}
+ return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out
+
+
+def showwarning_as_comment(message, category, filename, lineno, file=None, line=None):
+ # Based on warnings._showwarnmsg_impl
+ text = warnings.formatwarning(message, category, filename, lineno, line)
+ text = indent(text, '# ')
+ if file is None:
+ file = sys.stderr
+ if file is None:
+ return
+ try:
+ file.write(text)
+ except OSError:
+ pass
+
+
+def make_warnings_comments():
+ warnings.showwarning = showwarning_as_comment
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/nearley.py b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
new file mode 100644
index 00000000..f0779dc5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
@@ -0,0 +1,201 @@
+"Converts Nearley grammars to Lark"
+
+import os.path
+import sys
+import codecs
+import argparse
+
+
+from lark import Lark, InlineTransformer
+
+nearley_grammar = r"""
+ start: (ruledef|directive)+
+
+ directive: "@" NAME (STRING|NAME)
+ | "@" JS -> js_code
+ ruledef: NAME "->" expansions
+ | NAME REGEXP "->" expansions -> macro
+ expansions: expansion ("|" expansion)*
+
+ expansion: expr+ js
+
+ ?expr: item (":" /[+*?]/)?
+
+ ?item: rule|string|regexp|null
+ | "(" expansions ")"
+
+ rule: NAME
+ string: STRING
+ regexp: REGEXP
+ null: "null"
+ JS: /{%.*?%}/s
+ js: JS?
+
+ NAME: /[a-zA-Z_$]\w*/
+ COMMENT: /#[^\n]*/
+ REGEXP: /\[.*?\]/
+
+ STRING: _STRING "i"?
+
+ %import common.ESCAPED_STRING -> _STRING
+ %import common.WS
+ %ignore WS
+ %ignore COMMENT
+
+ """
+
+nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
+
+def _get_rulename(name):
+ name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
+ return 'n_' + name.replace('$', '__DOLLAR__').lower()
+
+class NearleyToLark(InlineTransformer):
+ def __init__(self):
+ self._count = 0
+ self.extra_rules = {}
+ self.extra_rules_rev = {}
+ self.alias_js_code = {}
+
+ def _new_function(self, code):
+ name = 'alias_%d' % self._count
+ self._count += 1
+
+ self.alias_js_code[name] = code
+ return name
+
+ def _extra_rule(self, rule):
+ if rule in self.extra_rules_rev:
+ return self.extra_rules_rev[rule]
+
+ name = 'xrule_%d' % len(self.extra_rules)
+ assert name not in self.extra_rules
+ self.extra_rules[name] = rule
+ self.extra_rules_rev[rule] = name
+ return name
+
+ def rule(self, name):
+ return _get_rulename(name)
+
+ def ruledef(self, name, exps):
+ return '!%s: %s' % (_get_rulename(name), exps)
+
+ def expr(self, item, op):
+ rule = '(%s)%s' % (item, op)
+ return self._extra_rule(rule)
+
+ def regexp(self, r):
+ return '/%s/' % r
+
+ def null(self):
+ return ''
+
+ def string(self, s):
+ return self._extra_rule(s)
+
+ def expansion(self, *x):
+ x, js = x[:-1], x[-1]
+ if js.children:
+ js_code ,= js.children
+ js_code = js_code[2:-2]
+ alias = '-> ' + self._new_function(js_code)
+ else:
+ alias = ''
+ return ' '.join(x) + alias
+
+ def expansions(self, *x):
+ return '%s' % ('\n |'.join(x))
+
+ def start(self, *rules):
+ return '\n'.join(filter(None, rules))
+
+def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
+ rule_defs = []
+
+ tree = nearley_grammar_parser.parse(g)
+ for statement in tree.children:
+ if statement.data == 'directive':
+ directive, arg = statement.children
+ if directive in ('builtin', 'include'):
+ folder = builtin_path if directive == 'builtin' else folder_path
+ path = os.path.join(folder, arg[1:-1])
+ if path not in includes:
+ includes.add(path)
+ with codecs.open(path, encoding='utf8') as f:
+ text = f.read()
+ rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
+ else:
+ assert False, directive
+ elif statement.data == 'js_code':
+ code ,= statement.children
+ code = code[2:-2]
+ js_code.append(code)
+ elif statement.data == 'macro':
+ pass # TODO Add support for macros!
+ elif statement.data == 'ruledef':
+ rule_defs.append( n2l.transform(statement) )
+ else:
+ raise Exception("Unknown statement: %s" % statement)
+
+ return rule_defs
+
+
+def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
+ import js2py
+
+ emit_code = []
+ def emit(x=None):
+ if x:
+ emit_code.append(x)
+ emit_code.append('\n')
+
+ js_code = ['function id(x) {return x[0];}']
+ n2l = NearleyToLark()
+ rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
+ lark_g = '\n'.join(rule_defs)
+ lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
+
+ emit('from lark import Lark, Transformer')
+ emit()
+ emit('grammar = ' + repr(lark_g))
+ emit()
+
+ for alias, code in n2l.alias_js_code.items():
+ js_code.append('%s = (%s);' % (alias, code))
+
+ if es6:
+ emit(js2py.translate_js6('\n'.join(js_code)))
+ else:
+ emit(js2py.translate_js('\n'.join(js_code)))
+ emit('class TransformNearley(Transformer):')
+ for alias in n2l.alias_js_code:
+ emit(" %s = var.get('%s').to_python()" % (alias, alias))
+ emit(" __default__ = lambda self, n, c, m: c if c else None")
+
+ emit()
+ emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
+ emit('def parse(text):')
+ emit(' return TransformNearley().transform(parser.parse(text))')
+
+ return ''.join(emit_code)
+
+def main(fn, start, nearley_lib, es6=False):
+ with codecs.open(fn, encoding='utf8') as f:
+ grammar = f.read()
+ return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
+
+def get_arg_parser():
+ parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
+ parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
+ parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
+ parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
+ parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
+ return parser
+
+if __name__ == '__main__':
+ parser = get_arg_parser()
+ if len(sys.argv)==1:
+ parser.print_help(sys.stderr)
+ sys.exit(1)
+ args = parser.parse_args()
+ print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/serialize.py b/.venv/lib/python3.12/site-packages/lark/tools/serialize.py
new file mode 100644
index 00000000..61540242
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/serialize.py
@@ -0,0 +1,34 @@
+import codecs
+import sys
+import json
+
+from lark import Lark
+from lark.grammar import RuleOptions, Rule
+from lark.lexer import TerminalDef
+from lark.tools import lalr_argparser, build_lalr
+
+import argparse
+
+argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser],
+ description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
+ epilog='Look at the Lark documentation for more info on the options')
+
+
+def serialize(lark_inst, outfile):
+ data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
+ outfile.write('{\n')
+ outfile.write(' "data": %s,\n' % json.dumps(data))
+ outfile.write(' "memo": %s\n' % json.dumps(memo))
+ outfile.write('}\n')
+
+
+def main():
+ if len(sys.argv)==1:
+ argparser.print_help(sys.stderr)
+ sys.exit(1)
+ ns = argparser.parse_args()
+ serialize(*build_lalr(ns))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/standalone.py b/.venv/lib/python3.12/site-packages/lark/tools/standalone.py
new file mode 100644
index 00000000..c86d7d7a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/standalone.py
@@ -0,0 +1,196 @@
+from __future__ import print_function
+
+###{standalone
+#
+#
+# Lark Stand-alone Generator Tool
+# ----------------------------------
+# Generates a stand-alone LALR(1) parser with a standard lexer
+#
+# Git: https://github.com/erezsh/lark
+# Author: Erez Shinan (erezshin@gmail.com)
+#
+#
+# >>> LICENSE
+#
+# This tool and its generated code use a separate license from Lark,
+# and are subject to the terms of the Mozilla Public License, v. 2.0.
+# If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+#
+# If you wish to purchase a commercial license for this tool and its
+# generated code, you may contact me via email or otherwise.
+#
+# If MPL2 is incompatible with your free or open-source project,
+# contact me and we'll work it out.
+#
+#
+
+from io import open
+###}
+
+import sys
+import token, tokenize
+import os
+from os import path
+from collections import defaultdict
+from functools import partial
+from argparse import ArgumentParser, SUPPRESS
+from warnings import warn
+
+import lark
+from lark import Lark
+from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
+
+
+from lark.grammar import RuleOptions, Rule
+from lark.lexer import TerminalDef
+
+_dir = path.dirname(__file__)
+_larkdir = path.join(_dir, path.pardir)
+
+
+EXTRACT_STANDALONE_FILES = [
+ 'tools/standalone.py',
+ 'exceptions.py',
+ 'utils.py',
+ 'tree.py',
+ 'visitors.py',
+ 'grammar.py',
+ 'lexer.py',
+ 'common.py',
+ 'parse_tree_builder.py',
+ 'parsers/lalr_parser.py',
+ 'parsers/lalr_analysis.py',
+ 'parser_frontends.py',
+ 'lark.py',
+ 'indenter.py',
+]
+
+def extract_sections(lines):
+ section = None
+ text = []
+ sections = defaultdict(list)
+ for l in lines:
+ if l.startswith('###'):
+ if l[3] == '{':
+ section = l[4:].strip()
+ elif l[3] == '}':
+ sections[section] += text
+ section = None
+ text = []
+ else:
+ raise ValueError(l)
+ elif section:
+ text.append(l)
+
+ return {name:''.join(text) for name, text in sections.items()}
+
+
+def strip_docstrings(line_gen):
+ """ Strip comments and docstrings from a file.
+ Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
+ """
+ res = []
+
+ prev_toktype = token.INDENT
+ last_lineno = -1
+ last_col = 0
+
+ tokgen = tokenize.generate_tokens(line_gen)
+ for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
+ if slineno > last_lineno:
+ last_col = 0
+ if scol > last_col:
+ res.append(" " * (scol - last_col))
+ if toktype == token.STRING and prev_toktype == token.INDENT:
+ # Docstring
+ res.append("#--")
+ elif toktype == tokenize.COMMENT:
+ # Comment
+ res.append("##\n")
+ else:
+ res.append(ttext)
+ prev_toktype = toktype
+ last_col = ecol
+ last_lineno = elineno
+
+ return ''.join(res)
+
+
+def main(fobj, start, print=print):
+ warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
+ lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
+ gen_standalone(lark_inst, print)
+
+def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
+ if output is None:
+ output = partial(print, file=out)
+
+ import pickle, zlib, base64
+ def compressed_output(obj):
+ s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
+ c = zlib.compress(s)
+ output(repr(base64.b64encode(c)))
+
+ def output_decompress(name):
+ output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
+
+ output('# The file was automatically generated by Lark v%s' % lark.__version__)
+ output('__version__ = "%s"' % lark.__version__)
+ output()
+
+ for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
+ with open(os.path.join(_larkdir, pyfile)) as f:
+ code = extract_sections(f)['standalone']
+ if i: # if not this file
+ code = strip_docstrings(partial(next, iter(code.splitlines(True))))
+ output(code)
+
+ data, m = lark_inst.memo_serialize([TerminalDef, Rule])
+ output('import pickle, zlib, base64')
+ if compress:
+ output('DATA = (')
+ compressed_output(data)
+ output(')')
+ output_decompress('DATA')
+ output('MEMO = (')
+ compressed_output(m)
+ output(')')
+ output_decompress('MEMO')
+ else:
+ output('DATA = (')
+ output(data)
+ output(')')
+ output('MEMO = (')
+ output(m)
+ output(')')
+
+
+ output('Shift = 0')
+ output('Reduce = 1')
+ output("def Lark_StandAlone(**kwargs):")
+ output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
+
+
+
+
+def main():
+ make_warnings_comments()
+ parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
+ parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
+ parser.add_argument("old_start", nargs='?', help=SUPPRESS)
+ parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
+ if len(sys.argv)==1:
+ parser.print_help(sys.stderr)
+ sys.exit(1)
+ ns = parser.parse_args()
+ if ns.old_start is not None:
+ warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
+ ns.start.append(ns.old_start)
+
+ lark_inst, out = build_lalr(ns)
+ gen_standalone(lark_inst, out=out, compress=ns.compress)
+
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/lark/tree.py b/.venv/lib/python3.12/site-packages/lark/tree.py
new file mode 100644
index 00000000..0937b859
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tree.py
@@ -0,0 +1,231 @@
+try:
+ from future_builtins import filter
+except ImportError:
+ pass
+
+from copy import deepcopy
+
+
+###{standalone
+from collections import OrderedDict
+
+
+class Meta:
+ def __init__(self):
+ self.empty = True
+
+
+class Tree(object):
+ """The main tree class.
+
+ Creates a new tree, and stores "data" and "children" in attributes of the same name.
+ Trees can be hashed and compared.
+
+ Parameters:
+ data: The name of the rule or alias
+ children: List of matched sub-rules and terminals
+ meta: Line & Column numbers (if ``propagate_positions`` is enabled).
+ meta attributes: line, column, start_pos, end_line, end_column, end_pos
+ """
+ def __init__(self, data, children, meta=None):
+ self.data = data
+ self.children = children
+ self._meta = meta
+
+ @property
+ def meta(self):
+ if self._meta is None:
+ self._meta = Meta()
+ return self._meta
+
+ def __repr__(self):
+ return 'Tree(%r, %r)' % (self.data, self.children)
+
+ def _pretty_label(self):
+ return self.data
+
+ def _pretty(self, level, indent_str):
+ if len(self.children) == 1 and not isinstance(self.children[0], Tree):
+ return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
+
+ l = [indent_str*level, self._pretty_label(), '\n']
+ for n in self.children:
+ if isinstance(n, Tree):
+ l += n._pretty(level+1, indent_str)
+ else:
+ l += [indent_str*(level+1), '%s' % (n,), '\n']
+
+ return l
+
+ def pretty(self, indent_str=' '):
+ """Returns an indented string representation of the tree.
+
+ Great for debugging.
+ """
+ return ''.join(self._pretty(0, indent_str))
+
+ def __eq__(self, other):
+ try:
+ return self.data == other.data and self.children == other.children
+ except AttributeError:
+ return False
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __hash__(self):
+ return hash((self.data, tuple(self.children)))
+
+ def iter_subtrees(self):
+ """Depth-first iteration.
+
+ Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
+ """
+ queue = [self]
+ subtrees = OrderedDict()
+ for subtree in queue:
+ subtrees[id(subtree)] = subtree
+ queue += [c for c in reversed(subtree.children)
+ if isinstance(c, Tree) and id(c) not in subtrees]
+
+ del queue
+ return reversed(list(subtrees.values()))
+
+ def find_pred(self, pred):
+ """Returns all nodes of the tree that evaluate pred(node) as true."""
+ return filter(pred, self.iter_subtrees())
+
+ def find_data(self, data):
+ """Returns all nodes of the tree whose data equals the given data."""
+ return self.find_pred(lambda t: t.data == data)
+
+###}
+
+ def expand_kids_by_index(self, *indices):
+ """Expand (inline) children at the given indices"""
+ for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
+ kid = self.children[i]
+ self.children[i:i+1] = kid.children
+
+ def expand_kids_by_data(self, *data_values):
+ """Expand (inline) children with any of the given data values. Returns True if anything changed"""
+ changed = False
+ for i in range(len(self.children)-1, -1, -1):
+ child = self.children[i]
+ if isinstance(child, Tree) and child.data in data_values:
+ self.children[i:i+1] = child.children
+ changed = True
+ return changed
+
+
+ def scan_values(self, pred):
+ """Return all values in the tree that evaluate pred(value) as true.
+
+ This can be used to find all the tokens in the tree.
+
+ Example:
+ >>> all_tokens = tree.scan_values(lambda v: isinstance(v, Token))
+ """
+ for c in self.children:
+ if isinstance(c, Tree):
+ for t in c.scan_values(pred):
+ yield t
+ else:
+ if pred(c):
+ yield c
+
+ def iter_subtrees_topdown(self):
+ """Breadth-first iteration.
+
+ Iterates over all the subtrees, return nodes in order like pretty() does.
+ """
+ stack = [self]
+ while stack:
+ node = stack.pop()
+ if not isinstance(node, Tree):
+ continue
+ yield node
+ for n in reversed(node.children):
+ stack.append(n)
+
+ def __deepcopy__(self, memo):
+ return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)
+
+ def copy(self):
+ return type(self)(self.data, self.children)
+
+ def set(self, data, children):
+ self.data = data
+ self.children = children
+
+ # XXX Deprecated! Here for backwards compatibility <0.6.0
+ @property
+ def line(self):
+ return self.meta.line
+
+ @property
+ def column(self):
+ return self.meta.column
+
+ @property
+ def end_line(self):
+ return self.meta.end_line
+
+ @property
+ def end_column(self):
+ return self.meta.end_column
+
+
+class SlottedTree(Tree):
+ __slots__ = 'data', 'children', 'rule', '_meta'
+
+
+def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs):
+ graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
+ graph.write_png(filename)
+
+
+def pydot__tree_to_dot(tree, filename, rankdir="LR", **kwargs):
+ graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
+ graph.write(filename)
+
+
+def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
+ """Creates a colorful image that represents the tree (data+children, without meta)
+
+ Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to
+ directed graphs drawn from top to bottom, from left to right, from bottom to
+ top, and from right to left, respectively.
+
+ `kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of
+ possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
+ """
+
+ import pydot
+ graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
+
+ i = [0]
+
+ def new_leaf(leaf):
+ node = pydot.Node(i[0], label=repr(leaf))
+ i[0] += 1
+ graph.add_node(node)
+ return node
+
+ def _to_pydot(subtree):
+ color = hash(subtree.data) & 0xffffff
+ color |= 0x808080
+
+ subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
+ for child in subtree.children]
+ node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
+ i[0] += 1
+ graph.add_node(node)
+
+ for subnode in subnodes:
+ graph.add_edge(pydot.Edge(node, subnode))
+
+ return node
+
+ _to_pydot(tree)
+ return graph
diff --git a/.venv/lib/python3.12/site-packages/lark/tree_matcher.py b/.venv/lib/python3.12/site-packages/lark/tree_matcher.py
new file mode 100644
index 00000000..02f95885
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tree_matcher.py
@@ -0,0 +1,186 @@
+"""Tree matcher based on Lark grammar"""
+
+import re
+from collections import defaultdict
+
+from . import Tree, Token
+from .common import ParserConf
+from .parsers import earley
+from .grammar import Rule, Terminal, NonTerminal
+
+
+def is_discarded_terminal(t):
+ return t.is_term and t.filter_out
+
+
+class _MakeTreeMatch:
+ def __init__(self, name, expansion):
+ self.name = name
+ self.expansion = expansion
+
+ def __call__(self, args):
+ t = Tree(self.name, args)
+ t.meta.match_tree = True
+ t.meta.orig_expansion = self.expansion
+ return t
+
+
+def _best_from_group(seq, group_key, cmp_key):
+ d = {}
+ for item in seq:
+ key = group_key(item)
+ if key in d:
+ v1 = cmp_key(item)
+ v2 = cmp_key(d[key])
+ if v2 > v1:
+ d[key] = item
+ else:
+ d[key] = item
+ return list(d.values())
+
+
+def _best_rules_from_group(rules):
+ rules = _best_from_group(rules, lambda r: r, lambda r: -len(r.expansion))
+ rules.sort(key=lambda r: len(r.expansion))
+ return rules
+
+
+def _match(term, token):
+ if isinstance(token, Tree):
+ name, _args = parse_rulename(term.name)
+ return token.data == name
+ elif isinstance(token, Token):
+ return term == Terminal(token.type)
+ assert False, (term, token)
+
+
+def make_recons_rule(origin, expansion, old_expansion):
+ return Rule(origin, expansion, alias=_MakeTreeMatch(origin.name, old_expansion))
+
+
+def make_recons_rule_to_term(origin, term):
+ return make_recons_rule(origin, [Terminal(term.name)], [term])
+
+
+def parse_rulename(s):
+ "Parse rule names that may contain a template syntax (like rule{a, b, ...})"
+ name, args_str = re.match(r'(\w+)(?:{(.+)})?', s).groups()
+ args = args_str and [a.strip() for a in args_str.split(',')]
+ return name, args
+
+
+
+class ChildrenLexer:
+ def __init__(self, children):
+ self.children = children
+
+ def lex(self, parser_state):
+ return self.children
+
+class TreeMatcher:
+ """Match the elements of a tree node, based on an ontology
+ provided by a Lark grammar.
+
+ Supports templates and inlined rules (`rule{a, b,..}` and `_rule`)
+
+ Initiialize with an instance of Lark.
+ """
+
+ def __init__(self, parser):
+ # XXX TODO calling compile twice returns different results!
+ assert parser.options.maybe_placeholders == False
+ # XXX TODO: we just ignore the potential existence of a postlexer
+ self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set())
+
+ self.rules_for_root = defaultdict(list)
+
+ self.rules = list(self._build_recons_rules(rules))
+ self.rules.reverse()
+
+ # Choose the best rule from each group of {rule => [rule.alias]}, since we only really need one derivation.
+ self.rules = _best_rules_from_group(self.rules)
+
+ self.parser = parser
+ self._parser_cache = {}
+
+ def _build_recons_rules(self, rules):
+ "Convert tree-parsing/construction rules to tree-matching rules"
+ expand1s = {r.origin for r in rules if r.options.expand1}
+
+ aliases = defaultdict(list)
+ for r in rules:
+ if r.alias:
+ aliases[r.origin].append(r.alias)
+
+ rule_names = {r.origin for r in rules}
+ nonterminals = {sym for sym in rule_names
+ if sym.name.startswith('_') or sym in expand1s or sym in aliases}
+
+ seen = set()
+ for r in rules:
+ recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
+ for sym in r.expansion if not is_discarded_terminal(sym)]
+
+ # Skip self-recursive constructs
+ if recons_exp == [r.origin] and r.alias is None:
+ continue
+
+ sym = NonTerminal(r.alias) if r.alias else r.origin
+ rule = make_recons_rule(sym, recons_exp, r.expansion)
+
+ if sym in expand1s and len(recons_exp) != 1:
+ self.rules_for_root[sym.name].append(rule)
+
+ if sym.name not in seen:
+ yield make_recons_rule_to_term(sym, sym)
+ seen.add(sym.name)
+ else:
+ if sym.name.startswith('_') or sym in expand1s:
+ yield rule
+ else:
+ self.rules_for_root[sym.name].append(rule)
+
+ for origin, rule_aliases in aliases.items():
+ for alias in rule_aliases:
+ yield make_recons_rule_to_term(origin, NonTerminal(alias))
+ yield make_recons_rule_to_term(origin, origin)
+
+ def match_tree(self, tree, rulename):
+ """Match the elements of `tree` to the symbols of rule `rulename`.
+
+ Parameters:
+ tree (Tree): the tree node to match
+ rulename (str): The expected full rule name (including template args)
+
+ Returns:
+ Tree: an unreduced tree that matches `rulename`
+
+ Raises:
+ UnexpectedToken: If no match was found.
+
+ Note:
+ It's the callers' responsibility match the tree recursively.
+ """
+ if rulename:
+ # validate
+ name, _args = parse_rulename(rulename)
+ assert tree.data == name
+ else:
+ rulename = tree.data
+
+ # TODO: ambiguity?
+ try:
+ parser = self._parser_cache[rulename]
+ except KeyError:
+ rules = self.rules + _best_rules_from_group(self.rules_for_root[rulename])
+
+ # TODO pass callbacks through dict, instead of alias?
+ callbacks = {rule: rule.alias for rule in rules}
+ conf = ParserConf(rules, callbacks, [rulename])
+ parser = earley.Parser(conf, _match, resolve_ambiguity=True)
+ self._parser_cache[rulename] = parser
+
+ # find a full derivation
+ unreduced_tree = parser.parse(ChildrenLexer(tree.children), rulename)
+ assert unreduced_tree.data == rulename
+ return unreduced_tree
diff --git a/.venv/lib/python3.12/site-packages/lark/utils.py b/.venv/lib/python3.12/site-packages/lark/utils.py
new file mode 100644
index 00000000..051adfa1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/utils.py
@@ -0,0 +1,387 @@
+import hashlib
+import unicodedata
+import os
+from functools import reduce
+from collections import deque
+
+###{standalone
+import sys, re
+import logging
+from io import open
+logger = logging.getLogger("lark")
+logger.addHandler(logging.StreamHandler())
+# Set to highest level, since we have some warnings amongst the code
+# By default, we should not output any log messages
+logger.setLevel(logging.CRITICAL)
+
+if sys.version_info[0]>2:
+ from abc import ABC, abstractmethod
+else:
+ from abc import ABCMeta, abstractmethod
+ class ABC(object): # Provide Python27 compatibility
+ __slots__ = ()
+ __metclass__ = ABCMeta
+
+
+Py36 = (sys.version_info[:2] >= (3, 6))
+
+NO_VALUE = object()
+
+
+def classify(seq, key=None, value=None):
+ d = {}
+ for item in seq:
+ k = key(item) if (key is not None) else item
+ v = value(item) if (value is not None) else item
+ if k in d:
+ d[k].append(v)
+ else:
+ d[k] = [v]
+ return d
+
+
+def _deserialize(data, namespace, memo):
+ if isinstance(data, dict):
+ if '__type__' in data: # Object
+ class_ = namespace[data['__type__']]
+ return class_.deserialize(data, memo)
+ elif '@' in data:
+ return memo[data['@']]
+ return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
+ elif isinstance(data, list):
+ return [_deserialize(value, namespace, memo) for value in data]
+ return data
+
+
+class Serialize(object):
+ """Safe-ish serialization interface that doesn't rely on Pickle
+
+ Attributes:
+ __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
+ __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
+ Should include all field types that aren't builtin types.
+ """
+
+ def memo_serialize(self, types_to_memoize):
+ memo = SerializeMemoizer(types_to_memoize)
+ return self.serialize(memo), memo.serialize()
+
+ def serialize(self, memo=None):
+ if memo and memo.in_types(self):
+ return {'@': memo.memoized.get(self)}
+
+ fields = getattr(self, '__serialize_fields__')
+ res = {f: _serialize(getattr(self, f), memo) for f in fields}
+ res['__type__'] = type(self).__name__
+ if hasattr(self, '_serialize'):
+ self._serialize(res, memo)
+ return res
+
+ @classmethod
+ def deserialize(cls, data, memo):
+ namespace = getattr(cls, '__serialize_namespace__', [])
+ namespace = {c.__name__:c for c in namespace}
+
+ fields = getattr(cls, '__serialize_fields__')
+
+ if '@' in data:
+ return memo[data['@']]
+
+ inst = cls.__new__(cls)
+ for f in fields:
+ try:
+ setattr(inst, f, _deserialize(data[f], namespace, memo))
+ except KeyError as e:
+ raise KeyError("Cannot find key for class", cls, e)
+
+ if hasattr(inst, '_deserialize'):
+ inst._deserialize()
+
+ return inst
+
+
+class SerializeMemoizer(Serialize):
+ "A version of serialize that memoizes objects to reduce space"
+
+ __serialize_fields__ = 'memoized',
+
+ def __init__(self, types_to_memoize):
+ self.types_to_memoize = tuple(types_to_memoize)
+ self.memoized = Enumerator()
+
+ def in_types(self, value):
+ return isinstance(value, self.types_to_memoize)
+
+ def serialize(self):
+ return _serialize(self.memoized.reversed(), None)
+
+ @classmethod
+ def deserialize(cls, data, namespace, memo):
+ return _deserialize(data, namespace, memo)
+
+
+try:
+ STRING_TYPE = basestring
+except NameError: # Python 3
+ STRING_TYPE = str
+
+
+import types
+from functools import wraps, partial
+from contextlib import contextmanager
+
+Str = type(u'')
+try:
+ classtype = types.ClassType # Python2
+except AttributeError:
+ classtype = type # Python3
+
+
+def smart_decorator(f, create_decorator):
+ if isinstance(f, types.FunctionType):
+ return wraps(f)(create_decorator(f, True))
+
+ elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
+ return wraps(f)(create_decorator(f, False))
+
+ elif isinstance(f, types.MethodType):
+ return wraps(f)(create_decorator(f.__func__, True))
+
+ elif isinstance(f, partial):
+ # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
+ return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
+
+ else:
+ return create_decorator(f.__func__.__call__, True)
+
+
+try:
+ import regex
+except ImportError:
+ regex = None
+
+import sre_parse
+import sre_constants
+categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
+
+def get_regexp_width(expr):
+ if regex:
+ # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
+ # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
+ # match here below.
+ regexp_final = re.sub(categ_pattern, 'A', expr)
+ else:
+ if re.search(categ_pattern, expr):
+ raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
+ regexp_final = expr
+ try:
+ return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
+ except sre_constants.error:
+ if not regex:
+ raise ValueError(expr)
+ else:
+ # sre_parse does not support the new features in regex. To not completely fail in that case,
+ # we manually test for the most important info (whether the empty string is matched)
+ c = regex.compile(regexp_final)
+ if c.match('') is None:
+ return 1, sre_constants.MAXREPEAT
+ else:
+ return 0, sre_constants.MAXREPEAT
+
+###}
+
+
+_ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
+_ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
+
+def _test_unicode_category(s, categories):
+ if len(s) != 1:
+ return all(_test_unicode_category(char, categories) for char in s)
+ return s == '_' or unicodedata.category(s) in categories
+
+def is_id_continue(s):
+ """
+ Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
+ numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
+ """
+ return _test_unicode_category(s, _ID_CONTINUE)
+
+def is_id_start(s):
+ """
+ Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
+ numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
+ """
+ return _test_unicode_category(s, _ID_START)
+
+
+def dedup_list(l):
+ """Given a list (l) will removing duplicates from the list,
+ preserving the original order of the list. Assumes that
+ the list entries are hashable."""
+ dedup = set()
+ return [x for x in l if not (x in dedup or dedup.add(x))]
+
+
+try:
+ from contextlib import suppress # Python 3
+except ImportError:
+ @contextmanager
+ def suppress(*excs):
+ '''Catch and dismiss the provided exception
+
+ >>> x = 'hello'
+ >>> with suppress(IndexError):
+ ... x = x[10]
+ >>> x
+ 'hello'
+ '''
+ try:
+ yield
+ except excs:
+ pass
+
+
+class Enumerator(Serialize):
+ def __init__(self):
+ self.enums = {}
+
+ def get(self, item):
+ if item not in self.enums:
+ self.enums[item] = len(self.enums)
+ return self.enums[item]
+
+ def __len__(self):
+ return len(self.enums)
+
+ def reversed(self):
+ r = {v: k for k, v in self.enums.items()}
+ assert len(r) == len(self.enums)
+ return r
+
+
+
+def combine_alternatives(lists):
+ """
+ Accepts a list of alternatives, and enumerates all their possible concatinations.
+
+ Examples:
+ >>> combine_alternatives([range(2), [4,5]])
+ [[0, 4], [0, 5], [1, 4], [1, 5]]
+
+ >>> combine_alternatives(["abc", "xy", '$'])
+ [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
+
+ >>> combine_alternatives([])
+ [[]]
+ """
+ if not lists:
+ return [[]]
+ assert all(l for l in lists), lists
+ init = [[x] for x in lists[0]]
+ return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
+
+
+try:
+ import atomicwrites
+except ImportError:
+ atomicwrites = None
+
+class FS:
+ exists = staticmethod(os.path.exists)
+
+ @staticmethod
+ def open(name, mode="r", **kwargs):
+ if atomicwrites and "w" in mode:
+ return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs)
+ else:
+ return open(name, mode, **kwargs)
+
+
+
+def isascii(s):
+ """ str.isascii only exists in python3.7+ """
+ try:
+ return s.isascii()
+ except AttributeError:
+ try:
+ s.encode('ascii')
+ return True
+ except (UnicodeDecodeError, UnicodeEncodeError):
+ return False
+
+
+class fzset(frozenset):
+ def __repr__(self):
+ return '{%s}' % ', '.join(map(repr, self))
+
+
+def classify_bool(seq, pred):
+ true_elems = []
+ false_elems = []
+
+ for elem in seq:
+ if pred(elem):
+ true_elems.append(elem)
+ else:
+ false_elems.append(elem)
+
+ return true_elems, false_elems
+
+
+def bfs(initial, expand):
+ open_q = deque(list(initial))
+ visited = set(open_q)
+ while open_q:
+ node = open_q.popleft()
+ yield node
+ for next_node in expand(node):
+ if next_node not in visited:
+ visited.add(next_node)
+ open_q.append(next_node)
+
+def bfs_all_unique(initial, expand):
+ "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
+ open_q = deque(list(initial))
+ while open_q:
+ node = open_q.popleft()
+ yield node
+ open_q += expand(node)
+
+
+def _serialize(value, memo):
+ if isinstance(value, Serialize):
+ return value.serialize(memo)
+ elif isinstance(value, list):
+ return [_serialize(elem, memo) for elem in value]
+ elif isinstance(value, frozenset):
+ return list(value) # TODO reversible?
+ elif isinstance(value, dict):
+ return {key:_serialize(elem, memo) for key, elem in value.items()}
+ # assert value is None or isinstance(value, (int, float, str, tuple)), value
+ return value
+
+
+
+
+def small_factors(n, max_factor):
+ """
+ Splits n up into smaller factors and summands <= max_factor.
+ Returns a list of [(a, b), ...]
+ so that the following code returns n:
+
+ n = 1
+ for a, b in values:
+ n = n * a + b
+
+ Currently, we also keep a + b <= max_factor, but that might change
+ """
+ assert n >= 0
+ assert max_factor > 2
+ if n <= max_factor:
+ return [(n, 0)]
+
+ for a in range(max_factor, 1, -1):
+ r, b = divmod(n, a)
+ if a + b <= max_factor:
+ return small_factors(r, max_factor) + [(a, b)]
+ assert False, "Failed to factorize %s" % n
diff --git a/.venv/lib/python3.12/site-packages/lark/visitors.py b/.venv/lib/python3.12/site-packages/lark/visitors.py
new file mode 100644
index 00000000..d45bb193
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/visitors.py
@@ -0,0 +1,529 @@
+from functools import wraps
+
+from .utils import smart_decorator, combine_alternatives
+from .tree import Tree
+from .exceptions import VisitError, GrammarError
+from .lexer import Token
+
+###{standalone
+from inspect import getmembers, getmro
+
+
+class Discard(Exception):
+ """When raising the Discard exception in a transformer callback,
+ that node is discarded and won't appear in the parent.
+ """
+ pass
+
+# Transformers
+
+
+class _Decoratable:
+ "Provides support for decorating methods with @v_args"
+
+ @classmethod
+ def _apply_decorator(cls, decorator, **kwargs):
+ mro = getmro(cls)
+ assert mro[0] is cls
+ libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
+ for name, value in getmembers(cls):
+
+ # Make sure the function isn't inherited (unless it's overwritten)
+ if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
+ continue
+ if not callable(value):
+ continue
+
+ # Skip if v_args already applied (at the function level)
+ if hasattr(cls.__dict__[name], 'vargs_applied') or hasattr(value, 'vargs_applied'):
+ continue
+
+ static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
+ setattr(cls, name, decorator(value, static=static, **kwargs))
+ return cls
+
+ def __class_getitem__(cls, _):
+ return cls
+
+
+class Transformer(_Decoratable):
+ """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.
+
+ Methods are provided by the user via inheritance, and called according to ``tree.data``.
+ The returned value from each method replaces the node in the tree structure.
+
+ Transformers work bottom-up (or depth-first), starting with the leaves and ending at the root of the tree.
+ Transformers can be used to implement map & reduce patterns. Because nodes are reduced from leaf to root,
+ at any point the callbacks may assume the children have already been transformed (if applicable).
+
+ ``Transformer`` can do anything ``Visitor`` can do, but because it reconstructs the tree,
+ it is slightly less efficient.
+
+ All these classes implement the transformer interface:
+
+ - ``Transformer`` - Recursively transforms the tree. This is the one you probably want.
+ - ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place instead of returning new instances
+ - ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place instead of returning new instances
+
+ Parameters:
+ visit_tokens (bool, optional): Should the transformer visit tokens in addition to rules.
+ Setting this to ``False`` is slightly faster. Defaults to ``True``.
+ (For processing ignored tokens, use the ``lexer_callbacks`` options)
+
+ NOTE: A transformer without methods essentially performs a non-memoized partial deepcopy.
+ """
+ __visit_tokens__ = True # For backwards compatibility
+
+ def __init__(self, visit_tokens=True):
+ self.__visit_tokens__ = visit_tokens
+
+ def _call_userfunc(self, tree, new_children=None):
+ # Assumes tree is already transformed
+ children = new_children if new_children is not None else tree.children
+ try:
+ f = getattr(self, tree.data)
+ except AttributeError:
+ return self.__default__(tree.data, children, tree.meta)
+ else:
+ try:
+ wrapper = getattr(f, 'visit_wrapper', None)
+ if wrapper is not None:
+ return f.visit_wrapper(f, tree.data, children, tree.meta)
+ else:
+ return f(children)
+ except (GrammarError, Discard):
+ raise
+ except Exception as e:
+ raise VisitError(tree.data, tree, e)
+
+ def _call_userfunc_token(self, token):
+ try:
+ f = getattr(self, token.type)
+ except AttributeError:
+ return self.__default_token__(token)
+ else:
+ try:
+ return f(token)
+ except (GrammarError, Discard):
+ raise
+ except Exception as e:
+ raise VisitError(token.type, token, e)
+
+ def _transform_children(self, children):
+ for c in children:
+ try:
+ if isinstance(c, Tree):
+ yield self._transform_tree(c)
+ elif self.__visit_tokens__ and isinstance(c, Token):
+ yield self._call_userfunc_token(c)
+ else:
+ yield c
+ except Discard:
+ pass
+
+ def _transform_tree(self, tree):
+ children = list(self._transform_children(tree.children))
+ return self._call_userfunc(tree, children)
+
+ def transform(self, tree):
+ "Transform the given tree, and return the final result"
+ return self._transform_tree(tree)
+
+ def __mul__(self, other):
+ """Chain two transformers together, returning a new transformer.
+ """
+ return TransformerChain(self, other)
+
+ def __default__(self, data, children, meta):
+ """Default function that is called if there is no attribute matching ``data``
+
+ Can be overridden. Defaults to creating a new copy of the tree node (i.e. ``return Tree(data, children, meta)``)
+ """
+ return Tree(data, children, meta)
+
+ def __default_token__(self, token):
+ """Default function that is called if there is no attribute matching ``token.type``
+
+ Can be overridden. Defaults to returning the token as-is.
+ """
+ return token
+
+
+def merge_transformers(base_transformer=None, **transformers_to_merge):
+ """Merge a collection of transformers into the base_transformer, each into its own 'namespace'.
+
+ When called, it will collect the methods from each transformer, and assign them to base_transformer,
+ with their name prefixed with the given keyword, as ``prefix__methodname``.
+
+ This function is especially useful for processing grammars that import other grammars,
+ thereby creating some of their rules in a 'namespace'. (i.e with a consistent name prefix).
+ In this case, the key for the transformer should match the name of the imported grammar.
+
+ Parameters:
+ base_transformer (Transformer, optional): The transformer that all other transformers will be added to.
+ **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``.
+
+ Raises:
+ AttributeError: In case of a name collision in the merged methods
+
+ Example:
+ ::
+
+ class TBase(Transformer):
+ def start(self, children):
+ return children[0] + 'bar'
+
+ class TImportedGrammar(Transformer):
+ def foo(self, children):
+ return "foo"
+
+ composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar())
+
+ t = Tree('start', [ Tree('imported__foo', []) ])
+
+ assert composed_transformer.transform(t) == 'foobar'
+
+ """
+ if base_transformer is None:
+ base_transformer = Transformer()
+ for prefix, transformer in transformers_to_merge.items():
+ for method_name in dir(transformer):
+ method = getattr(transformer, method_name)
+ if not callable(method):
+ continue
+ if method_name.startswith("_") or method_name == "transform":
+ continue
+ prefixed_method = prefix + "__" + method_name
+ if hasattr(base_transformer, prefixed_method):
+ raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)
+
+ setattr(base_transformer, prefixed_method, method)
+
+ return base_transformer
+
+
+class InlineTransformer(Transformer): # XXX Deprecated
+ def _call_userfunc(self, tree, new_children=None):
+ # Assumes tree is already transformed
+ children = new_children if new_children is not None else tree.children
+ try:
+ f = getattr(self, tree.data)
+ except AttributeError:
+ return self.__default__(tree.data, children, tree.meta)
+ else:
+ return f(*children)
+
+
+class TransformerChain(object):
+ def __init__(self, *transformers):
+ self.transformers = transformers
+
+ def transform(self, tree):
+ for t in self.transformers:
+ tree = t.transform(tree)
+ return tree
+
+ def __mul__(self, other):
+ return TransformerChain(*self.transformers + (other,))
+
+
+class Transformer_InPlace(Transformer):
+ """Same as Transformer, but non-recursive, and changes the tree in-place instead of returning new instances
+
+ Useful for huge trees. Conservative in memory.
+ """
+ def _transform_tree(self, tree): # Cancel recursion
+ return self._call_userfunc(tree)
+
+ def transform(self, tree):
+ for subtree in tree.iter_subtrees():
+ subtree.children = list(self._transform_children(subtree.children))
+
+ return self._transform_tree(tree)
+
+
+class Transformer_NonRecursive(Transformer):
+ """Same as Transformer but non-recursive.
+
+ Like Transformer, it doesn't change the original tree.
+
+ Useful for huge trees.
+ """
+
+ def transform(self, tree):
+ # Tree to postfix
+ rev_postfix = []
+ q = [tree]
+ while q:
+ t = q.pop()
+ rev_postfix.append(t)
+ if isinstance(t, Tree):
+ q += t.children
+
+ # Postfix to tree
+ stack = []
+ for x in reversed(rev_postfix):
+ if isinstance(x, Tree):
+ size = len(x.children)
+ if size:
+ args = stack[-size:]
+ del stack[-size:]
+ else:
+ args = []
+ stack.append(self._call_userfunc(x, args))
+ elif self.__visit_tokens__ and isinstance(x, Token):
+ stack.append(self._call_userfunc_token(x))
+ else:
+ stack.append(x)
+
+ t ,= stack # We should have only one tree remaining
+ return t
+
+
+class Transformer_InPlaceRecursive(Transformer):
+ "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances"
+ def _transform_tree(self, tree):
+ tree.children = list(self._transform_children(tree.children))
+ return self._call_userfunc(tree)
+
+
+# Visitors
+
+class VisitorBase:
+ def _call_userfunc(self, tree):
+ return getattr(self, tree.data, self.__default__)(tree)
+
+ def __default__(self, tree):
+ """Default function that is called if there is no attribute matching ``tree.data``
+
+ Can be overridden. Defaults to doing nothing.
+ """
+ return tree
+
+ def __class_getitem__(cls, _):
+ return cls
+
+
+class Visitor(VisitorBase):
+ """Tree visitor, non-recursive (can handle huge trees).
+
+ Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
+ """
+
+ def visit(self, tree):
+ "Visits the tree, starting with the leaves and finally the root (bottom-up)"
+ for subtree in tree.iter_subtrees():
+ self._call_userfunc(subtree)
+ return tree
+
+ def visit_topdown(self,tree):
+ "Visit the tree, starting at the root, and ending at the leaves (top-down)"
+ for subtree in tree.iter_subtrees_topdown():
+ self._call_userfunc(subtree)
+ return tree
+
+
+class Visitor_Recursive(VisitorBase):
+ """Bottom-up visitor, recursive.
+
+ Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
+
+ Slightly faster than the non-recursive version.
+ """
+
+ def visit(self, tree):
+ "Visits the tree, starting with the leaves and finally the root (bottom-up)"
+ for child in tree.children:
+ if isinstance(child, Tree):
+ self.visit(child)
+
+ self._call_userfunc(tree)
+ return tree
+
+ def visit_topdown(self,tree):
+ "Visit the tree, starting at the root, and ending at the leaves (top-down)"
+ self._call_userfunc(tree)
+
+ for child in tree.children:
+ if isinstance(child, Tree):
+ self.visit_topdown(child)
+
+ return tree
+
+
+def visit_children_decor(func):
+ "See Interpreter"
+ @wraps(func)
+ def inner(cls, tree):
+ values = cls.visit_children(tree)
+ return func(cls, values)
+ return inner
+
+
+class Interpreter(_Decoratable):
+ """Interpreter walks the tree starting at the root.
+
+ Visits the tree, starting with the root and finally the leaves (top-down)
+
+ For each tree node, it calls its methods (provided by user via inheritance) according to ``tree.data``.
+
+ Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't automatically visit its sub-branches.
+ The user has to explicitly call ``visit``, ``visit_children``, or use the ``@visit_children_decor``.
+ This allows the user to implement branching and loops.
+ """
+
+ def visit(self, tree):
+ f = getattr(self, tree.data)
+ wrapper = getattr(f, 'visit_wrapper', None)
+ if wrapper is not None:
+ return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
+ else:
+ return f(tree)
+
+ def visit_children(self, tree):
+ return [self.visit(child) if isinstance(child, Tree) else child
+ for child in tree.children]
+
+ def __getattr__(self, name):
+ return self.__default__
+
+ def __default__(self, tree):
+ return self.visit_children(tree)
+
+
+# Decorators
+
+def _apply_decorator(obj, decorator, **kwargs):
+ try:
+ _apply = obj._apply_decorator
+ except AttributeError:
+ return decorator(obj, **kwargs)
+ else:
+ return _apply(decorator, **kwargs)
+
+
+def _inline_args__func(func):
+ @wraps(func)
+ def create_decorator(_f, with_self):
+ if with_self:
+ def f(self, children):
+ return _f(self, *children)
+ else:
+ def f(self, children):
+ return _f(*children)
+ return f
+
+ return smart_decorator(func, create_decorator)
+
+
+def inline_args(obj): # XXX Deprecated
+ return _apply_decorator(obj, _inline_args__func)
+
+
+def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
+ def create_decorator(_f, with_self):
+ if with_self:
+ def f(self, *args, **kwargs):
+ return _f(self, *args, **kwargs)
+ else:
+ def f(self, *args, **kwargs):
+ return _f(*args, **kwargs)
+ return f
+
+ if static:
+ f = wraps(func)(create_decorator(func, False))
+ else:
+ f = smart_decorator(func, create_decorator)
+ f.vargs_applied = True
+ f.visit_wrapper = visit_wrapper
+ return f
+
+
+def _vargs_inline(f, _data, children, _meta):
+ return f(*children)
+def _vargs_meta_inline(f, _data, children, meta):
+ return f(meta, *children)
+def _vargs_meta(f, _data, children, meta):
+ return f(children, meta) # TODO swap these for consistency? Backwards incompatible!
+def _vargs_tree(f, data, children, meta):
+ return f(Tree(data, children, meta))
+
+
+def v_args(inline=False, meta=False, tree=False, wrapper=None):
+ """A convenience decorator factory for modifying the behavior of user-supplied visitor methods.
+
+ By default, callback methods of transformers/visitors accept one argument - a list of the node's children.
+
+ ``v_args`` can modify this behavior. When used on a transformer/visitor class definition,
+ it applies to all the callback methods inside it.
+
+ ``v_args`` can be applied to a single method, or to an entire class. When applied to both,
+ the options given to the method take precedence.
+
+ Parameters:
+ inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists).
+ meta (bool, optional): Provides two arguments: ``children`` and ``meta`` (instead of just the first)
+ tree (bool, optional): Provides the entire tree as the argument, instead of the children.
+ wrapper (function, optional): Provide a function to decorate all methods.
+
+ Example:
+ ::
+
+ @v_args(inline=True)
+ class SolveArith(Transformer):
+ def add(self, left, right):
+ return left + right
+
+
+ class ReverseNotation(Transformer_InPlace):
+ @v_args(tree=True)
+ def tree_node(self, tree):
+ tree.children = tree.children[::-1]
+ """
+ if tree and (meta or inline):
+ raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
+
+ func = None
+ if meta:
+ if inline:
+ func = _vargs_meta_inline
+ else:
+ func = _vargs_meta
+ elif inline:
+ func = _vargs_inline
+ elif tree:
+ func = _vargs_tree
+
+ if wrapper is not None:
+ if func is not None:
+ raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
+ func = wrapper
+
+ def _visitor_args_dec(obj):
+ return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func)
+ return _visitor_args_dec
+
+
+###}
+
+
+# --- Visitor Utilities ---
+
+class CollapseAmbiguities(Transformer):
+ """
+ Transforms a tree that contains any number of _ambig nodes into a list of trees,
+ each one containing an unambiguous tree.
+
+ The length of the resulting list is the product of the length of all _ambig nodes.
+
+ Warning: This may quickly explode for highly ambiguous trees.
+
+ """
+ def _ambig(self, options):
+ return sum(options, [])
+
+ def __default__(self, data, children_lists, meta):
+ return [Tree(data, children, meta) for children in combine_alternatives(children_lists)]
+
+ def __default_token__(self, t):
+ return [t]