aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lark/parsers
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lark/parsers
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/parsers')
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/__init__.py0
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/cyk.py345
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/earley.py326
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py75
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py790
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py185
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py304
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py132
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py200
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py3
-rw-r--r--.venv/lib/python3.12/site-packages/lark/parsers/xearley.py157
11 files changed, 2517 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/__init__.py b/.venv/lib/python3.12/site-packages/lark/parsers/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/cyk.py b/.venv/lib/python3.12/site-packages/lark/parsers/cyk.py
new file mode 100644
index 00000000..ff0924f2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/cyk.py
@@ -0,0 +1,345 @@
+"""This module implements a CYK parser."""
+
+# Author: https://github.com/ehudt (2018)
+#
+# Adapted by Erez
+
+
+from collections import defaultdict
+import itertools
+
+from ..exceptions import ParseError
+from ..lexer import Token
+from ..tree import Tree
+from ..grammar import Terminal as T, NonTerminal as NT, Symbol
+
+try:
+ xrange
+except NameError:
+ xrange = range
+
+def match(t, s):
+ assert isinstance(t, T)
+ return t.name == s.type
+
+
+class Rule(object):
+ """Context-free grammar rule."""
+
+ def __init__(self, lhs, rhs, weight, alias):
+ super(Rule, self).__init__()
+ assert isinstance(lhs, NT), lhs
+ assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
+ self.lhs = lhs
+ self.rhs = rhs
+ self.weight = weight
+ self.alias = alias
+
+ def __str__(self):
+ return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
+
+ def __repr__(self):
+ return str(self)
+
+ def __hash__(self):
+ return hash((self.lhs, tuple(self.rhs)))
+
+ def __eq__(self, other):
+ return self.lhs == other.lhs and self.rhs == other.rhs
+
+ def __ne__(self, other):
+ return not (self == other)
+
+
+class Grammar(object):
+ """Context-free grammar."""
+
+ def __init__(self, rules):
+ self.rules = frozenset(rules)
+
+ def __eq__(self, other):
+ return self.rules == other.rules
+
+ def __str__(self):
+ return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
+
+ def __repr__(self):
+ return str(self)
+
+
+# Parse tree data structures
+class RuleNode(object):
+ """A node in the parse tree, which also contains the full rhs rule."""
+
+ def __init__(self, rule, children, weight=0):
+ self.rule = rule
+ self.children = children
+ self.weight = weight
+
+ def __repr__(self):
+ return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
+
+
+
+class Parser(object):
+ """Parser wrapper."""
+
+ def __init__(self, rules):
+ super(Parser, self).__init__()
+ self.orig_rules = {rule: rule for rule in rules}
+ rules = [self._to_rule(rule) for rule in rules]
+ self.grammar = to_cnf(Grammar(rules))
+
+ def _to_rule(self, lark_rule):
+ """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
+ assert isinstance(lark_rule.origin, NT)
+ assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
+ return Rule(
+ lark_rule.origin, lark_rule.expansion,
+ weight=lark_rule.options.priority if lark_rule.options.priority else 0,
+ alias=lark_rule)
+
+ def parse(self, tokenized, start): # pylint: disable=invalid-name
+ """Parses input, which is a list of tokens."""
+ assert start
+ start = NT(start)
+
+ table, trees = _parse(tokenized, self.grammar)
+ # Check if the parse succeeded.
+ if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
+ raise ParseError('Parsing failed.')
+ parse = trees[(0, len(tokenized) - 1)][start]
+ return self._to_tree(revert_cnf(parse))
+
+ def _to_tree(self, rule_node):
+ """Converts a RuleNode parse tree to a lark Tree."""
+ orig_rule = self.orig_rules[rule_node.rule.alias]
+ children = []
+ for child in rule_node.children:
+ if isinstance(child, RuleNode):
+ children.append(self._to_tree(child))
+ else:
+ assert isinstance(child.name, Token)
+ children.append(child.name)
+ t = Tree(orig_rule.origin, children)
+ t.rule=orig_rule
+ return t
+
+
+def print_parse(node, indent=0):
+ if isinstance(node, RuleNode):
+ print(' ' * (indent * 2) + str(node.rule.lhs))
+ for child in node.children:
+ print_parse(child, indent + 1)
+ else:
+ print(' ' * (indent * 2) + str(node.s))
+
+
+def _parse(s, g):
+ """Parses sentence 's' using CNF grammar 'g'."""
+ # The CYK table. Indexed with a 2-tuple: (start pos, end pos)
+ table = defaultdict(set)
+ # Top-level structure is similar to the CYK table. Each cell is a dict from
+ # rule name to the best (lightest) tree for that rule.
+ trees = defaultdict(dict)
+ # Populate base case with existing terminal production rules
+ for i, w in enumerate(s):
+ for terminal, rules in g.terminal_rules.items():
+ if match(terminal, w):
+ for rule in rules:
+ table[(i, i)].add(rule)
+ if (rule.lhs not in trees[(i, i)] or
+ rule.weight < trees[(i, i)][rule.lhs].weight):
+ trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
+
+ # Iterate over lengths of sub-sentences
+ for l in xrange(2, len(s) + 1):
+ # Iterate over sub-sentences with the given length
+ for i in xrange(len(s) - l + 1):
+ # Choose partition of the sub-sentence in [1, l)
+ for p in xrange(i + 1, i + l):
+ span1 = (i, p - 1)
+ span2 = (p, i + l - 1)
+ for r1, r2 in itertools.product(table[span1], table[span2]):
+ for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
+ table[(i, i + l - 1)].add(rule)
+ r1_tree = trees[span1][r1.lhs]
+ r2_tree = trees[span2][r2.lhs]
+ rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
+ if (rule.lhs not in trees[(i, i + l - 1)]
+ or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
+ trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
+ return table, trees
+
+
+# This section implements context-free grammar converter to Chomsky normal form.
+# It also implements a conversion of parse trees from its CNF to the original
+# grammar.
+# Overview:
+# Applies the following operations in this order:
+# * TERM: Eliminates non-solitary terminals from all rules
+# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
+# * UNIT: Eliminates non-terminal unit rules
+#
+# The following grammar characteristics aren't featured:
+# * Start symbol appears on RHS
+# * Empty rules (epsilon rules)
+
+
+class CnfWrapper(object):
+ """CNF wrapper for grammar.
+
+ Validates that the input grammar is CNF and provides helper data structures.
+ """
+
+ def __init__(self, grammar):
+ super(CnfWrapper, self).__init__()
+ self.grammar = grammar
+ self.rules = grammar.rules
+ self.terminal_rules = defaultdict(list)
+ self.nonterminal_rules = defaultdict(list)
+ for r in self.rules:
+ # Validate that the grammar is CNF and populate auxiliary data structures.
+ assert isinstance(r.lhs, NT), r
+ if len(r.rhs) not in [1, 2]:
+ raise ParseError("CYK doesn't support empty rules")
+ if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
+ self.terminal_rules[r.rhs[0]].append(r)
+ elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
+ self.nonterminal_rules[tuple(r.rhs)].append(r)
+ else:
+ assert False, r
+
+ def __eq__(self, other):
+ return self.grammar == other.grammar
+
+ def __repr__(self):
+ return repr(self.grammar)
+
+
+class UnitSkipRule(Rule):
+ """A rule that records NTs that were skipped during transformation."""
+
+ def __init__(self, lhs, rhs, skipped_rules, weight, alias):
+ super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
+ self.skipped_rules = skipped_rules
+
+ def __eq__(self, other):
+ return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
+
+ __hash__ = Rule.__hash__
+
+
+def build_unit_skiprule(unit_rule, target_rule):
+ skipped_rules = []
+ if isinstance(unit_rule, UnitSkipRule):
+ skipped_rules += unit_rule.skipped_rules
+ skipped_rules.append(target_rule)
+ if isinstance(target_rule, UnitSkipRule):
+ skipped_rules += target_rule.skipped_rules
+ return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
+ weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
+
+
+def get_any_nt_unit_rule(g):
+ """Returns a non-terminal unit rule from 'g', or None if there is none."""
+ for rule in g.rules:
+ if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
+ return rule
+ return None
+
+
+def _remove_unit_rule(g, rule):
+ """Removes 'rule' from 'g' without changing the langugage produced by 'g'."""
+ new_rules = [x for x in g.rules if x != rule]
+ refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
+ new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
+ return Grammar(new_rules)
+
+
+def _split(rule):
+ """Splits a rule whose len(rhs) > 2 into shorter rules."""
+ rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
+ rule_name = '__SP_%s' % (rule_str) + '_%d'
+ yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
+ for i in xrange(1, len(rule.rhs) - 2):
+ yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
+ yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
+
+
+def _term(g):
+ """Applies the TERM rule on 'g' (see top comment)."""
+ all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
+ t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
+ new_rules = []
+ for rule in g.rules:
+ if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
+ new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
+ new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
+ new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
+ else:
+ new_rules.append(rule)
+ return Grammar(new_rules)
+
+
+def _bin(g):
+ """Applies the BIN rule to 'g' (see top comment)."""
+ new_rules = []
+ for rule in g.rules:
+ if len(rule.rhs) > 2:
+ new_rules += _split(rule)
+ else:
+ new_rules.append(rule)
+ return Grammar(new_rules)
+
+
+def _unit(g):
+ """Applies the UNIT rule to 'g' (see top comment)."""
+ nt_unit_rule = get_any_nt_unit_rule(g)
+ while nt_unit_rule:
+ g = _remove_unit_rule(g, nt_unit_rule)
+ nt_unit_rule = get_any_nt_unit_rule(g)
+ return g
+
+
+def to_cnf(g):
+ """Creates a CNF grammar from a general context-free grammar 'g'."""
+ g = _unit(_bin(_term(g)))
+ return CnfWrapper(g)
+
+
+def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
+ if not skipped_rules:
+ return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
+ else:
+ weight = weight - skipped_rules[0].weight
+ return RuleNode(
+ Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
+ unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
+ skipped_rules[1:], children,
+ skipped_rules[0].weight, skipped_rules[0].alias)
+ ], weight=weight)
+
+
+def revert_cnf(node):
+ """Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
+ if isinstance(node, T):
+ return node
+ # Reverts TERM rule.
+ if node.rule.lhs.name.startswith('__T_'):
+ return node.children[0]
+ else:
+ children = []
+ for child in map(revert_cnf, node.children):
+ # Reverts BIN rule.
+ if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
+ children += child.children
+ else:
+ children.append(child)
+ # Reverts UNIT rule.
+ if isinstance(node.rule, UnitSkipRule):
+ return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
+ node.rule.skipped_rules, children,
+ node.rule.weight, node.rule.alias)
+ else:
+ return RuleNode(node.rule, children)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/earley.py b/.venv/lib/python3.12/site-packages/lark/parsers/earley.py
new file mode 100644
index 00000000..5961d5d5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/earley.py
@@ -0,0 +1,326 @@
+"""This module implements an Earley parser.
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+ https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
+"""
+
+from collections import deque
+
+from ..tree import Tree
+from ..exceptions import UnexpectedEOF, UnexpectedToken
+from ..utils import logger
+from .grammar_analysis import GrammarAnalyzer
+from ..grammar import NonTerminal
+from .earley_common import Item, TransitiveItem
+from .earley_forest import ForestSumVisitor, SymbolNode, ForestToParseTree
+
+class Parser:
+ def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
+ analysis = GrammarAnalyzer(parser_conf)
+ self.parser_conf = parser_conf
+ self.resolve_ambiguity = resolve_ambiguity
+ self.debug = debug
+ self.tree_class = tree_class
+
+ self.FIRST = analysis.FIRST
+ self.NULLABLE = analysis.NULLABLE
+ self.callbacks = parser_conf.callbacks
+ self.predictions = {}
+
+ ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
+ # the slow 'isupper' in is_terminal.
+ self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
+ self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
+
+ self.forest_sum_visitor = None
+ for rule in parser_conf.rules:
+ if rule.origin not in self.predictions:
+ self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
+
+ ## Detect if any rules have priorities set. If the user specified priority = "none" then
+ # the priorities will be stripped from all rules before they reach us, allowing us to
+ # skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
+ # on any rules.
+ if self.forest_sum_visitor is None and rule.options.priority is not None:
+ self.forest_sum_visitor = ForestSumVisitor
+
+ self.term_matcher = term_matcher
+
+
+ def predict_and_complete(self, i, to_scan, columns, transitives):
+ """The core Earley Predictor and Completer.
+
+ At each stage of the input, we handling any completed items (things
+ that matched on the last cycle) and use those to predict what should
+ come next in the input stream. The completions and any predicted
+ non-terminals are recursively processed until we reach a set of,
+ which can be added to the scan list for the next scanner cycle."""
+ # Held Completions (H in E.Scotts paper).
+ node_cache = {}
+ held_completions = {}
+
+ column = columns[i]
+ # R (items) = Ei (column.items)
+ items = deque(column)
+ while items:
+ item = items.pop() # remove an element, A say, from R
+
+ ### The Earley completer
+ if item.is_complete: ### (item.s == string)
+ if item.node is None:
+ label = (item.s, item.start, i)
+ item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ item.node.add_family(item.s, item.rule, item.start, None, None)
+
+ # create_leo_transitives(item.rule.origin, item.start)
+
+ ###R Joop Leo right recursion Completer
+ if item.rule.origin in transitives[item.start]:
+ transitive = transitives[item.start][item.s]
+ if transitive.previous in transitives[transitive.column]:
+ root_transitive = transitives[transitive.column][transitive.previous]
+ else:
+ root_transitive = transitive
+
+ new_item = Item(transitive.rule, transitive.ptr, transitive.start)
+ label = (root_transitive.s, root_transitive.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_path(root_transitive, item.node)
+ if new_item.expect in self.TERMINALS:
+ # Add (B :: aC.B, h, y) to Q
+ to_scan.add(new_item)
+ elif new_item not in column:
+ # Add (B :: aC.B, h, y) to Ei and R
+ column.add(new_item)
+ items.append(new_item)
+ ###R Regular Earley completer
+ else:
+ # Empty has 0 length. If we complete an empty symbol in a particular
+ # parse step, we need to be able to use that same empty symbol to complete
+ # any predictions that result, that themselves require empty. Avoids
+ # infinite recursion on empty symbols.
+ # held_completions is 'H' in E.Scott's paper.
+ is_empty_item = item.start == i
+ if is_empty_item:
+ held_completions[item.rule.origin] = item.node
+
+ originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
+ for originator in originators:
+ new_item = originator.advance()
+ label = (new_item.s, originator.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
+ if new_item.expect in self.TERMINALS:
+ # Add (B :: aC.B, h, y) to Q
+ to_scan.add(new_item)
+ elif new_item not in column:
+ # Add (B :: aC.B, h, y) to Ei and R
+ column.add(new_item)
+ items.append(new_item)
+
+ ### The Earley predictor
+ elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
+ new_items = []
+ for rule in self.predictions[item.expect]:
+ new_item = Item(rule, 0, i)
+ new_items.append(new_item)
+
+ # Process any held completions (H).
+ if item.expect in held_completions:
+ new_item = item.advance()
+ label = (new_item.s, item.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
+ new_items.append(new_item)
+
+ for new_item in new_items:
+ if new_item.expect in self.TERMINALS:
+ to_scan.add(new_item)
+ elif new_item not in column:
+ column.add(new_item)
+ items.append(new_item)
+
+ def _parse(self, lexer, columns, to_scan, start_symbol=None):
+ def is_quasi_complete(item):
+ if item.is_complete:
+ return True
+
+ quasi = item.advance()
+ while not quasi.is_complete:
+ if quasi.expect not in self.NULLABLE:
+ return False
+ if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
+ return False
+ quasi = quasi.advance()
+ return True
+
+ def create_leo_transitives(origin, start):
+ visited = set()
+ to_create = []
+ trule = None
+ previous = None
+
+ ### Recursively walk backwards through the Earley sets until we find the
+ # first transitive candidate. If this is done continuously, we shouldn't
+ # have to walk more than 1 hop.
+ while True:
+ if origin in transitives[start]:
+ previous = trule = transitives[start][origin]
+ break
+
+ is_empty_rule = not self.FIRST[origin]
+ if is_empty_rule:
+ break
+
+ candidates = [ candidate for candidate in columns[start] if candidate.expect is not None and origin == candidate.expect ]
+ if len(candidates) != 1:
+ break
+ originator = next(iter(candidates))
+
+ if originator is None or originator in visited:
+ break
+
+ visited.add(originator)
+ if not is_quasi_complete(originator):
+ break
+
+ trule = originator.advance()
+ if originator.start != start:
+ visited.clear()
+
+ to_create.append((origin, start, originator))
+ origin = originator.rule.origin
+ start = originator.start
+
+ # If a suitable Transitive candidate is not found, bail.
+ if trule is None:
+ return
+
+ #### Now walk forwards and create Transitive Items in each set we walked through; and link
+ # each transitive item to the next set forwards.
+ while to_create:
+ origin, start, originator = to_create.pop()
+ titem = None
+ if previous is not None:
+ titem = previous.next_titem = TransitiveItem(origin, trule, originator, previous.column)
+ else:
+ titem = TransitiveItem(origin, trule, originator, start)
+ previous = transitives[start][origin] = titem
+
+
+
+ def scan(i, token, to_scan):
+ """The core Earley Scanner.
+
+ This is a custom implementation of the scanner that uses the
+ Lark lexer to match tokens. The scan list is built by the
+ Earley predictor, based on the previously completed tokens.
+ This ensures that at each phase of the parse we have a custom
+ lexer context, allowing for more complex ambiguities."""
+ next_to_scan = set()
+ next_set = set()
+ columns.append(next_set)
+ transitives.append({})
+ node_cache = {}
+
+ for item in set(to_scan):
+ if match(item.expect, token):
+ new_item = item.advance()
+ label = (new_item.s, new_item.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
+
+ if new_item.expect in self.TERMINALS:
+ # add (B ::= Aai+1.B, h, y) to Q'
+ next_to_scan.add(new_item)
+ else:
+ # add (B ::= Aa+1.B, h, y) to Ei+1
+ next_set.add(new_item)
+
+ if not next_set and not next_to_scan:
+ expect = {i.expect.name for i in to_scan}
+ raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
+
+ return next_to_scan
+
+
+ # Define parser functions
+ match = self.term_matcher
+
+ # Cache for nodes & tokens created in a particular parse step.
+ transitives = [{}]
+
+ ## The main Earley loop.
+ # Run the Prediction/Completion cycle for any Items in the current Earley set.
+ # Completions will be added to the SPPF tree, and predictions will be recursively
+ # processed down to terminals/empty nodes to be added to the scanner for the next
+ # step.
+ expects = {i.expect for i in to_scan}
+ i = 0
+ for token in lexer.lex(expects):
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ to_scan = scan(i, token, to_scan)
+ i += 1
+
+ expects.clear()
+ expects |= {i.expect for i in to_scan}
+
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ ## Column is now the final column in the parse.
+ assert i == len(columns)-1
+ return to_scan
+
+ def parse(self, lexer, start):
+ assert start, start
+ start_symbol = NonTerminal(start)
+
+ columns = [set()]
+ to_scan = set() # The scan buffer. 'Q' in E.Scott's paper.
+
+ ## Predict for the start_symbol.
+ # Add predicted items to the first Earley set (for the predictor) if they
+ # result in a non-terminal, or the scanner if they result in a terminal.
+ for rule in self.predictions[start_symbol]:
+ item = Item(rule, 0, 0)
+ if item.expect in self.TERMINALS:
+ to_scan.add(item)
+ else:
+ columns[0].add(item)
+
+ to_scan = self._parse(lexer, columns, to_scan, start_symbol)
+
+ # If the parse was successful, the start
+ # symbol should have been completed in the last step of the Earley cycle, and will be in
+ # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
+ solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
+ if not solutions:
+ expected_terminals = [t.expect.name for t in to_scan]
+ raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
+
+ if self.debug:
+ from .earley_forest import ForestToPyDotVisitor
+ try:
+ debug_walker = ForestToPyDotVisitor()
+ except ImportError:
+ logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+ else:
+ debug_walker.visit(solutions[0], "sppf.png")
+
+
+ if len(solutions) > 1:
+ assert False, 'Earley should not generate multiple start symbol items!'
+
+ if self.tree_class is not None:
+ # Perform our SPPF -> AST conversion
+ transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
+ return transformer.transform(solutions[0])
+
+ # return the root of the SPPF
+ return solutions[0]
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py b/.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py
new file mode 100644
index 00000000..6bd614ba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/earley_common.py
@@ -0,0 +1,75 @@
+"This module implements an Earley Parser"
+
+# The parser uses a parse-forest to keep track of derivations and ambiguations.
+# When the parse ends successfully, a disambiguation stage resolves all ambiguity
+# (right now ambiguity resolution is not developed beyond the needs of lark)
+# Afterwards the parse tree is reduced (transformed) according to user callbacks.
+# I use the no-recursion version of Transformer, because the tree might be
+# deeper than Python's recursion limit (a bit absurd, but that's life)
+#
+# The algorithm keeps track of each state set, using a corresponding Column instance.
+# Column keeps track of new items using NewsList instances.
+#
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+
+from ..grammar import NonTerminal, Terminal
+
+class Item(object):
+ "An Earley Item, the atom of the algorithm."
+
+ __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
+ def __init__(self, rule, ptr, start):
+ self.is_complete = len(rule.expansion) == ptr
+ self.rule = rule # rule
+ self.ptr = ptr # ptr
+ self.start = start # j
+ self.node = None # w
+ if self.is_complete:
+ self.s = rule.origin
+ self.expect = None
+ self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+ else:
+ self.s = (rule, ptr)
+ self.expect = rule.expansion[ptr]
+ self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+ self._hash = hash((self.s, self.start))
+
+ def advance(self):
+ return Item(self.rule, self.ptr + 1, self.start)
+
+ def __eq__(self, other):
+ return self is other or (self.s == other.s and self.start == other.start)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
+ after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
+ symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
+ return '%s (%d)' % (symbol, self.start)
+
+
+class TransitiveItem(Item):
+ __slots__ = ('recognized', 'reduction', 'column', 'next_titem')
+ def __init__(self, recognized, trule, originator, start):
+ super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
+ self.recognized = recognized
+ self.reduction = originator
+ self.column = start
+ self.next_titem = None
+ self._hash = hash((self.s, self.start, self.recognized))
+
+ def __eq__(self, other):
+ if not isinstance(other, TransitiveItem):
+ return False
+ return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
+ after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
+ return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py b/.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py
new file mode 100644
index 00000000..f39f4eb1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/earley_forest.py
@@ -0,0 +1,790 @@
+""""This module implements an SPPF implementation
+
+This is used as the primary output mechanism for the Earley parser
+in order to store complex ambiguities.
+
+Full reference and more details is here:
+http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+"""
+
+from random import randint
+from math import isinf
+from collections import deque
+from operator import attrgetter
+from importlib import import_module
+from functools import partial
+
+from ..parse_tree_builder import AmbiguousIntermediateExpander
+from ..visitors import Discard
+from ..lexer import Token
+from ..utils import logger
+from ..tree import Tree
+
+class ForestNode(object):
+ pass
+
+class SymbolNode(ForestNode):
+ """
+ A Symbol Node represents a symbol (or Intermediate LR0).
+
+ Symbol nodes are keyed by the symbol (s). For intermediate nodes
+ s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
+ nodes, s will be a string representing the non-terminal origin (i.e.
+ the left hand side of the rule).
+
+ The children of a Symbol or Intermediate Node will always be Packed Nodes;
+ with each Packed Node child representing a single derivation of a production.
+
+ Hence a Symbol Node with a single child is unambiguous.
+
+ :ivar s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
+ :ivar start: The index of the start of the substring matched by this
+ symbol (inclusive).
+ :ivar end: The index of the end of the substring matched by this
+ symbol (exclusive).
+ :ivar is_intermediate: True if this node is an intermediate node.
+ :ivar priority: The priority of the node's symbol.
+ """
+ __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
+ def __init__(self, s, start, end):
+ self.s = s
+ self.start = start
+ self.end = end
+ self._children = set()
+ self.paths = set()
+ self.paths_loaded = False
+
+ ### We use inf here as it can be safely negated without resorting to conditionals,
+ # unlike None or float('NaN'), and sorts appropriately.
+ self.priority = float('-inf')
+ self.is_intermediate = isinstance(s, tuple)
+ self._hash = hash((self.s, self.start, self.end))
+
+ def add_family(self, lr0, rule, start, left, right):
+ self._children.add(PackedNode(self, lr0, rule, start, left, right))
+
+ def add_path(self, transitive, node):
+ self.paths.add((transitive, node))
+
+ def load_paths(self):
+ for transitive, node in self.paths:
+ if transitive.next_titem is not None:
+ vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end)
+ vn.add_path(transitive.next_titem, node)
+ self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
+ else:
+ self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
+ self.paths_loaded = True
+
+ @property
+ def is_ambiguous(self):
+ """Returns True if this node is ambiguous."""
+ return len(self.children) > 1
+
+ @property
+ def children(self):
+ """Returns a list of this node's children sorted from greatest to
+ least priority."""
+ if not self.paths_loaded: self.load_paths()
+ return sorted(self._children, key=attrgetter('sort_key'))
+
+ def __iter__(self):
+ return iter(self._children)
+
+ def __eq__(self, other):
+ if not isinstance(other, SymbolNode):
+ return False
+ return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ if self.is_intermediate:
+ rule = self.s[0]
+ ptr = self.s[1]
+ before = ( expansion.name for expansion in rule.expansion[:ptr] )
+ after = ( expansion.name for expansion in rule.expansion[ptr:] )
+ symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+ else:
+ symbol = self.s.name
+ return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
+
+class PackedNode(ForestNode):
+ """
+ A Packed Node represents a single derivation in a symbol node.
+
+ :ivar rule: The rule associated with this node.
+ :ivar parent: The parent of this node.
+ :ivar left: The left child of this node. ``None`` if one does not exist.
+ :ivar right: The right child of this node. ``None`` if one does not exist.
+ :ivar priority: The priority of this node.
+ """
+ __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
+ def __init__(self, parent, s, rule, start, left, right):
+ self.parent = parent
+ self.s = s
+ self.start = start
+ self.rule = rule
+ self.left = left
+ self.right = right
+ self.priority = float('-inf')
+ self._hash = hash((self.left, self.right))
+
+ @property
+ def is_empty(self):
+ return self.left is None and self.right is None
+
+ @property
+ def sort_key(self):
+ """
+ Used to sort PackedNode children of SymbolNodes.
+ A SymbolNode has multiple PackedNodes if it matched
+ ambiguously. Hence, we use the sort order to identify
+ the order in which ambiguous children should be considered.
+ """
+ return self.is_empty, -self.priority, self.rule.order
+
+ @property
+ def children(self):
+ """Returns a list of this node's children."""
+ return [x for x in [self.left, self.right] if x is not None]
+
+ def __iter__(self):
+ yield self.left
+ yield self.right
+
+ def __eq__(self, other):
+ if not isinstance(other, PackedNode):
+ return False
+ return self is other or (self.left == other.left and self.right == other.right)
+
+ def __hash__(self):
+ return self._hash
+
+ def __repr__(self):
+ if isinstance(self.s, tuple):
+ rule = self.s[0]
+ ptr = self.s[1]
+ before = ( expansion.name for expansion in rule.expansion[:ptr] )
+ after = ( expansion.name for expansion in rule.expansion[ptr:] )
+ symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+ else:
+ symbol = self.s.name
+ return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
+
+class ForestVisitor(object):
+ """
+ An abstract base class for building forest visitors.
+
+ This class performs a controllable depth-first walk of an SPPF.
+ The visitor will not enter cycles and will backtrack if one is encountered.
+ Subclasses are notified of cycles through the ``on_cycle`` method.
+
+ Behavior for visit events is defined by overriding the
+ ``visit*node*`` functions.
+
+ The walk is controlled by the return values of the ``visit*node_in``
+ methods. Returning a node(s) will schedule them to be visited. The visitor
+ will begin to backtrack if no nodes are returned.
+
+ :ivar single_visit: If ``True``, non-Token nodes will only be visited once.
+ """
+
+ def __init__(self, single_visit=False):
+ self.single_visit = single_visit
+
+ def visit_token_node(self, node):
+ """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
+ pass
+
+ def visit_symbol_node_in(self, node):
+ """Called when a symbol node is visited. Nodes that are returned
+ will be scheduled to be visited. If ``visit_intermediate_node_in``
+ is not implemented, this function will be called for intermediate
+ nodes as well."""
+ pass
+
+ def visit_symbol_node_out(self, node):
+ """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
+ call have been visited. If ``visit_intermediate_node_out``
+ is not implemented, this function will be called for intermediate
+ nodes as well."""
+ pass
+
+ def visit_packed_node_in(self, node):
+ """Called when a packed node is visited. Nodes that are returned
+ will be scheduled to be visited. """
+ pass
+
+ def visit_packed_node_out(self, node):
+ """Called after all nodes returned from a corresponding ``visit_packed_node_in``
+ call have been visited."""
+ pass
+
+ def on_cycle(self, node, path):
+ """Called when a cycle is encountered.
+
+ :param node: The node that causes a cycle.
+ :param path: The list of nodes being visited: nodes that have been
+ entered but not exited. The first element is the root in a forest
+ visit, and the last element is the node visited most recently.
+ ``path`` should be treated as read-only.
+ """
+ pass
+
+ def get_cycle_in_path(self, node, path):
+ """A utility function for use in ``on_cycle`` to obtain a slice of
+ ``path`` that only contains the nodes that make up the cycle."""
+ index = len(path) - 1
+ while id(path[index]) != id(node):
+ index -= 1
+ return path[index:]
+
+ def visit(self, root):
+ # Visiting is a list of IDs of all symbol/intermediate nodes currently in
+ # the stack. It serves two purposes: to detect when we 'recurse' in and out
+ # of a symbol/intermediate so that we can process both up and down. Also,
+ # since the SPPF can have cycles it allows us to detect if we're trying
+ # to recurse into a node that's already on the stack (infinite recursion).
+ visiting = set()
+
+ # set of all nodes that have been visited
+ visited = set()
+
+ # a list of nodes that are currently being visited
+ # used for the `on_cycle` callback
+ path = []
+
+ # We do not use recursion here to walk the Forest due to the limited
+ # stack size in python. Therefore input_stack is essentially our stack.
+ input_stack = deque([root])
+
+ # It is much faster to cache these as locals since they are called
+ # many times in large parses.
+ vpno = getattr(self, 'visit_packed_node_out')
+ vpni = getattr(self, 'visit_packed_node_in')
+ vsno = getattr(self, 'visit_symbol_node_out')
+ vsni = getattr(self, 'visit_symbol_node_in')
+ vino = getattr(self, 'visit_intermediate_node_out', vsno)
+ vini = getattr(self, 'visit_intermediate_node_in', vsni)
+ vtn = getattr(self, 'visit_token_node')
+ oc = getattr(self, 'on_cycle')
+
+ while input_stack:
+ current = next(reversed(input_stack))
+ try:
+ next_node = next(current)
+ except StopIteration:
+ input_stack.pop()
+ continue
+ except TypeError:
+ ### If the current object is not an iterator, pass through to Token/SymbolNode
+ pass
+ else:
+ if next_node is None:
+ continue
+
+ if id(next_node) in visiting:
+ oc(next_node, path)
+ continue
+
+ input_stack.append(next_node)
+ continue
+
+ if not isinstance(current, ForestNode):
+ vtn(current)
+ input_stack.pop()
+ continue
+
+ current_id = id(current)
+ if current_id in visiting:
+ if isinstance(current, PackedNode):
+ vpno(current)
+ elif current.is_intermediate:
+ vino(current)
+ else:
+ vsno(current)
+ input_stack.pop()
+ path.pop()
+ visiting.remove(current_id)
+ visited.add(current_id)
+ elif self.single_visit and current_id in visited:
+ input_stack.pop()
+ else:
+ visiting.add(current_id)
+ path.append(current)
+ if isinstance(current, PackedNode):
+ next_node = vpni(current)
+ elif current.is_intermediate:
+ next_node = vini(current)
+ else:
+ next_node = vsni(current)
+ if next_node is None:
+ continue
+
+ if not isinstance(next_node, ForestNode) and \
+ not isinstance(next_node, Token):
+ next_node = iter(next_node)
+ elif id(next_node) in visiting:
+ oc(next_node, path)
+ continue
+
+ input_stack.append(next_node)
+
+class ForestTransformer(ForestVisitor):
+ """The base class for a bottom-up forest transformation. Most users will
+ want to use ``TreeForestTransformer`` instead as it has a friendlier
+ interface and covers most use cases.
+
+ Transformations are applied via inheritance and overriding of the
+ ``transform*node`` methods.
+
+ ``transform_token_node`` receives a ``Token`` as an argument.
+ All other methods receive the node that is being transformed and
+ a list of the results of the transformations of that node's children.
+ The return value of these methods are the resulting transformations.
+
+ If ``Discard`` is raised in a node's transformation, no data from that node
+ will be passed to its parent's transformation.
+ """
+
+ def __init__(self):
+ super(ForestTransformer, self).__init__()
+ # results of transformations
+ self.data = dict()
+ # used to track parent nodes
+ self.node_stack = deque()
+
+ def transform(self, root):
+ """Perform a transformation on an SPPF."""
+ self.node_stack.append('result')
+ self.data['result'] = []
+ self.visit(root)
+ assert len(self.data['result']) <= 1
+ if self.data['result']:
+ return self.data['result'][0]
+
+ def transform_symbol_node(self, node, data):
+ """Transform a symbol node."""
+ return node
+
+ def transform_intermediate_node(self, node, data):
+ """Transform an intermediate node."""
+ return node
+
+ def transform_packed_node(self, node, data):
+ """Transform a packed node."""
+ return node
+
+ def transform_token_node(self, node):
+ """Transform a ``Token``."""
+ return node
+
+ def visit_symbol_node_in(self, node):
+ self.node_stack.append(id(node))
+ self.data[id(node)] = []
+ return node.children
+
+ def visit_packed_node_in(self, node):
+ self.node_stack.append(id(node))
+ self.data[id(node)] = []
+ return node.children
+
+ def visit_token_node(self, node):
+ try:
+ transformed = self.transform_token_node(node)
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+
+ def visit_symbol_node_out(self, node):
+ self.node_stack.pop()
+ try:
+ transformed = self.transform_symbol_node(node, self.data[id(node)])
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+ finally:
+ del self.data[id(node)]
+
+ def visit_intermediate_node_out(self, node):
+ self.node_stack.pop()
+ try:
+ transformed = self.transform_intermediate_node(node, self.data[id(node)])
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+ finally:
+ del self.data[id(node)]
+
+ def visit_packed_node_out(self, node):
+ self.node_stack.pop()
+ try:
+ transformed = self.transform_packed_node(node, self.data[id(node)])
+ except Discard:
+ pass
+ else:
+ self.data[self.node_stack[-1]].append(transformed)
+ finally:
+ del self.data[id(node)]
+
+class ForestSumVisitor(ForestVisitor):
+ """
+ A visitor for prioritizing ambiguous parts of the Forest.
+
+ This visitor is used when support for explicit priorities on
+ rules is requested (whether normal, or invert). It walks the
+ forest (or subsets thereof) and cascades properties upwards
+ from the leaves.
+
+ It would be ideal to do this during parsing, however this would
+ require processing each Earley item multiple times. That's
+ a big performance drawback; so running a forest walk is the
+ lesser of two evils: there can be significantly more Earley
+ items created during parsing than there are SPPF nodes in the
+ final tree.
+ """
+ def __init__(self):
+ super(ForestSumVisitor, self).__init__(single_visit=True)
+
+ def visit_packed_node_in(self, node):
+ yield node.left
+ yield node.right
+
+ def visit_symbol_node_in(self, node):
+ return iter(node.children)
+
+ def visit_packed_node_out(self, node):
+ priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
+ priority += getattr(node.right, 'priority', 0)
+ priority += getattr(node.left, 'priority', 0)
+ node.priority = priority
+
+ def visit_symbol_node_out(self, node):
+ node.priority = max(child.priority for child in node.children)
+
+class PackedData():
+ """Used in transformationss of packed nodes to distinguish the data
+ that comes from the left child and the right child.
+ """
+
+ class _NoData():
+ pass
+
+ NO_DATA = _NoData()
+
+ def __init__(self, node, data):
+ self.left = self.NO_DATA
+ self.right = self.NO_DATA
+ if data:
+ if node.left is not None:
+ self.left = data[0]
+ if len(data) > 1:
+ self.right = data[1]
+ else:
+ self.right = data[0]
+
+class ForestToParseTree(ForestTransformer):
+ """Used by the earley parser when ambiguity equals 'resolve' or
+ 'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
+
+ tree_class: The tree class to use for construction
+ callbacks: A dictionary of rules to functions that output a tree
+ prioritizer: A ``ForestVisitor`` that manipulates the priorities of
+ ForestNodes
+ resolve_ambiguity: If True, ambiguities will be resolved based on
+ priorities. Otherwise, `_ambig` nodes will be in the resulting
+ tree.
+ use_cache: If True, the results of packed node transformations will be
+ cached.
+ """
+
+ def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
+ super(ForestToParseTree, self).__init__()
+ self.tree_class = tree_class
+ self.callbacks = callbacks
+ self.prioritizer = prioritizer
+ self.resolve_ambiguity = resolve_ambiguity
+ self._use_cache = use_cache
+ self._cache = {}
+ self._on_cycle_retreat = False
+ self._cycle_node = None
+ self._successful_visits = set()
+
+ def visit(self, root):
+ if self.prioritizer:
+ self.prioritizer.visit(root)
+ super(ForestToParseTree, self).visit(root)
+ self._cache = {}
+
+ def on_cycle(self, node, path):
+ logger.debug("Cycle encountered in the SPPF at node: %s. "
+ "As infinite ambiguities cannot be represented in a tree, "
+ "this family of derivations will be discarded.", node)
+ self._cycle_node = node
+ self._on_cycle_retreat = True
+
+ def _check_cycle(self, node):
+ if self._on_cycle_retreat:
+ if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
+ self._cycle_node = None
+ self._on_cycle_retreat = False
+ return
+ raise Discard()
+
+ def _collapse_ambig(self, children):
+ new_children = []
+ for child in children:
+ if hasattr(child, 'data') and child.data == '_ambig':
+ new_children += child.children
+ else:
+ new_children.append(child)
+ return new_children
+
+ def _call_rule_func(self, node, data):
+ # called when transforming children of symbol nodes
+ # data is a list of trees or tokens that correspond to the
+ # symbol's rule expansion
+ return self.callbacks[node.rule](data)
+
+ def _call_ambig_func(self, node, data):
+ # called when transforming a symbol node
+ # data is a list of trees where each tree's data is
+ # equal to the name of the symbol or one of its aliases.
+ if len(data) > 1:
+ return self.tree_class('_ambig', data)
+ elif data:
+ return data[0]
+ raise Discard()
+
+ def transform_symbol_node(self, node, data):
+ if id(node) not in self._successful_visits:
+ raise Discard()
+ self._check_cycle(node)
+ self._successful_visits.remove(id(node))
+ data = self._collapse_ambig(data)
+ return self._call_ambig_func(node, data)
+
+ def transform_intermediate_node(self, node, data):
+ if id(node) not in self._successful_visits:
+ raise Discard()
+ self._check_cycle(node)
+ self._successful_visits.remove(id(node))
+ if len(data) > 1:
+ children = [self.tree_class('_inter', c) for c in data]
+ return self.tree_class('_iambig', children)
+ return data[0]
+
+ def transform_packed_node(self, node, data):
+ self._check_cycle(node)
+ if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
+ raise Discard()
+ if self._use_cache and id(node) in self._cache:
+ return self._cache[id(node)]
+ children = []
+ assert len(data) <= 2
+ data = PackedData(node, data)
+ if data.left is not PackedData.NO_DATA:
+ if node.left.is_intermediate and isinstance(data.left, list):
+ children += data.left
+ else:
+ children.append(data.left)
+ if data.right is not PackedData.NO_DATA:
+ children.append(data.right)
+ if node.parent.is_intermediate:
+ return self._cache.setdefault(id(node), children)
+ return self._cache.setdefault(id(node), self._call_rule_func(node, children))
+
+ def visit_symbol_node_in(self, node):
+ super(ForestToParseTree, self).visit_symbol_node_in(node)
+ if self._on_cycle_retreat:
+ return
+ return node.children
+
+ def visit_packed_node_in(self, node):
+ self._on_cycle_retreat = False
+ to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
+ if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
+ if not self._use_cache or id(node) not in self._cache:
+ return to_visit
+
+ def visit_packed_node_out(self, node):
+ super(ForestToParseTree, self).visit_packed_node_out(node)
+ if not self._on_cycle_retreat:
+ self._successful_visits.add(id(node.parent))
+
+def handles_ambiguity(func):
+ """Decorator for methods of subclasses of ``TreeForestTransformer``.
+ Denotes that the method should receive a list of transformed derivations."""
+ func.handles_ambiguity = True
+ return func
+
+class TreeForestTransformer(ForestToParseTree):
+ """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
+ By default, it will construct a tree.
+
+ Methods provided via inheritance are called based on the rule/symbol
+ names of nodes in the forest.
+
+ Methods that act on rules will receive a list of the results of the
+ transformations of the rule's children. By default, trees and tokens.
+
+ Methods that act on tokens will receive a token.
+
+ Alternatively, methods that act on rules may be annotated with
+ ``handles_ambiguity``. In this case, the function will receive a list
+ of all the transformations of all the derivations of the rule.
+ By default, a list of trees where each tree.data is equal to the
+ rule name or one of its aliases.
+
+ Non-tree transformations are made possible by override of
+ ``__default__``, ``__default_token__``, and ``__default_ambig__``.
+
+ .. note::
+
+ Tree shaping features such as inlined rules and token filtering are
+ not built into the transformation. Positions are also not
+ propagated.
+
+ :param tree_class: The tree class to use for construction
+ :param prioritizer: A ``ForestVisitor`` that manipulates the priorities of
+ nodes in the SPPF.
+ :param resolve_ambiguity: If True, ambiguities will be resolved based on
+ priorities.
+ :param use_cache: If True, caches the results of some transformations,
+ potentially improving performance when ``resolve_ambiguity==False``.
+ Only use if you know what you are doing: i.e. All transformation
+ functions are pure and referentially transparent.
+ """
+
+ def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
+ super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
+
+ def __default__(self, name, data):
+ """Default operation on tree (for override).
+
+ Returns a tree with name with data as children.
+ """
+ return self.tree_class(name, data)
+
+ def __default_ambig__(self, name, data):
+ """Default operation on ambiguous rule (for override).
+
+ Wraps data in an '_ambig_' node if it contains more than
+ one element.
+ """
+ if len(data) > 1:
+ return self.tree_class('_ambig', data)
+ elif data:
+ return data[0]
+ raise Discard()
+
+ def __default_token__(self, node):
+ """Default operation on ``Token`` (for override).
+
+ Returns ``node``.
+ """
+ return node
+
+ def transform_token_node(self, node):
+ return getattr(self, node.type, self.__default_token__)(node)
+
+ def _call_rule_func(self, node, data):
+ name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
+ user_func = getattr(self, name, self.__default__)
+ if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
+ user_func = partial(self.__default__, name)
+ if not self.resolve_ambiguity:
+ wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
+ user_func = wrapper(user_func)
+ return user_func(data)
+
+ def _call_ambig_func(self, node, data):
+ name = node.s.name
+ user_func = getattr(self, name, self.__default_ambig__)
+ if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
+ user_func = partial(self.__default_ambig__, name)
+ return user_func(data)
+
+class ForestToPyDotVisitor(ForestVisitor):
+ """
+ A Forest visitor which writes the SPPF to a PNG.
+
+ The SPPF can get really large, really quickly because
+ of the amount of meta-data it stores, so this is probably
+ only useful for trivial trees and learning how the SPPF
+ is structured.
+ """
+ def __init__(self, rankdir="TB"):
+ super(ForestToPyDotVisitor, self).__init__(single_visit=True)
+ self.pydot = import_module('pydot')
+ self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
+
+ def visit(self, root, filename):
+ super(ForestToPyDotVisitor, self).visit(root)
+ try:
+ self.graph.write_png(filename)
+ except FileNotFoundError as e:
+ logger.error("Could not write png: ", e)
+
+ def visit_token_node(self, node):
+ graph_node_id = str(id(node))
+ graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
+ graph_node_color = 0x808080
+ graph_node_style = "\"filled,rounded\""
+ graph_node_shape = "diamond"
+ graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+ self.graph.add_node(graph_node)
+
+ def visit_packed_node_in(self, node):
+ graph_node_id = str(id(node))
+ graph_node_label = repr(node)
+ graph_node_color = 0x808080
+ graph_node_style = "filled"
+ graph_node_shape = "diamond"
+ graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+ self.graph.add_node(graph_node)
+ yield node.left
+ yield node.right
+
+ def visit_packed_node_out(self, node):
+ graph_node_id = str(id(node))
+ graph_node = self.graph.get_node(graph_node_id)[0]
+ for child in [node.left, node.right]:
+ if child is not None:
+ child_graph_node_id = str(id(child))
+ child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+ self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
+ else:
+ #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
+ child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
+ child_graph_node_style = "invis"
+ child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
+ child_edge_style = "invis"
+ self.graph.add_node(child_graph_node)
+ self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
+
+ def visit_symbol_node_in(self, node):
+ graph_node_id = str(id(node))
+ graph_node_label = repr(node)
+ graph_node_color = 0x808080
+ graph_node_style = "\"filled\""
+ if node.is_intermediate:
+ graph_node_shape = "ellipse"
+ else:
+ graph_node_shape = "rectangle"
+ graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+ self.graph.add_node(graph_node)
+ return iter(node.children)
+
+ def visit_symbol_node_out(self, node):
+ graph_node_id = str(id(node))
+ graph_node = self.graph.get_node(graph_node_id)[0]
+ for child in node.children:
+ child_graph_node_id = str(id(child))
+ child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+ self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py b/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py
new file mode 100644
index 00000000..737cb02a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py
@@ -0,0 +1,185 @@
+from collections import Counter, defaultdict
+
+from ..utils import bfs, fzset, classify
+from ..exceptions import GrammarError
+from ..grammar import Rule, Terminal, NonTerminal
+
+
+class RulePtr(object):
+ __slots__ = ('rule', 'index')
+
+ def __init__(self, rule, index):
+ assert isinstance(rule, Rule)
+ assert index <= len(rule.expansion)
+ self.rule = rule
+ self.index = index
+
+ def __repr__(self):
+ before = [x.name for x in self.rule.expansion[:self.index]]
+ after = [x.name for x in self.rule.expansion[self.index:]]
+ return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
+
+ @property
+ def next(self):
+ return self.rule.expansion[self.index]
+
+ def advance(self, sym):
+ assert self.next == sym
+ return RulePtr(self.rule, self.index+1)
+
+ @property
+ def is_satisfied(self):
+ return self.index == len(self.rule.expansion)
+
+ def __eq__(self, other):
+ return self.rule == other.rule and self.index == other.index
+ def __hash__(self):
+ return hash((self.rule, self.index))
+
+
+# state generation ensures no duplicate LR0ItemSets
+class LR0ItemSet(object):
+ __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
+
+ def __init__(self, kernel, closure):
+ self.kernel = fzset(kernel)
+ self.closure = fzset(closure)
+ self.transitions = {}
+ self.lookaheads = defaultdict(set)
+
+ def __repr__(self):
+ return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
+
+
+def update_set(set1, set2):
+ if not set2 or set1 > set2:
+ return False
+
+ copy = set(set1)
+ set1 |= set2
+ return set1 != copy
+
+def calculate_sets(rules):
+ """Calculate FOLLOW sets.
+
+ Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
+ symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
+
+ # foreach grammar rule X ::= Y(1) ... Y(k)
+ # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
+ # NULLABLE = NULLABLE union {X}
+ # for i = 1 to k
+ # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
+ # FIRST(X) = FIRST(X) union FIRST(Y(i))
+ # for j = i+1 to k
+ # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
+ # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
+ # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
+ # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
+ # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
+
+ NULLABLE = set()
+ FIRST = {}
+ FOLLOW = {}
+ for sym in symbols:
+ FIRST[sym]={sym} if sym.is_term else set()
+ FOLLOW[sym]=set()
+
+ # Calculate NULLABLE and FIRST
+ changed = True
+ while changed:
+ changed = False
+
+ for rule in rules:
+ if set(rule.expansion) <= NULLABLE:
+ if update_set(NULLABLE, {rule.origin}):
+ changed = True
+
+ for i, sym in enumerate(rule.expansion):
+ if set(rule.expansion[:i]) <= NULLABLE:
+ if update_set(FIRST[rule.origin], FIRST[sym]):
+ changed = True
+ else:
+ break
+
+ # Calculate FOLLOW
+ changed = True
+ while changed:
+ changed = False
+
+ for rule in rules:
+ for i, sym in enumerate(rule.expansion):
+ if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
+ if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
+ changed = True
+
+ for j in range(i+1, len(rule.expansion)):
+ if set(rule.expansion[i+1:j]) <= NULLABLE:
+ if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
+ changed = True
+
+ return FIRST, FOLLOW, NULLABLE
+
+
+class GrammarAnalyzer(object):
+ def __init__(self, parser_conf, debug=False):
+ self.debug = debug
+
+ root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
+ for start in parser_conf.start}
+
+ rules = parser_conf.rules + list(root_rules.values())
+ self.rules_by_origin = classify(rules, lambda r: r.origin)
+
+ if len(rules) != len(set(rules)):
+ duplicates = [item for item, count in Counter(rules).items() if count > 1]
+ raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
+
+ for r in rules:
+ for sym in r.expansion:
+ if not (sym.is_term or sym in self.rules_by_origin):
+ raise GrammarError("Using an undefined rule: %s" % sym)
+
+ self.start_states = {start: self.expand_rule(root_rule.origin)
+ for start, root_rule in root_rules.items()}
+
+ self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
+ for start, root_rule in root_rules.items()}
+
+ lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
+ for start in parser_conf.start}
+
+ lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
+ assert(len(lr0_rules) == len(set(lr0_rules)))
+
+ self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
+
+ # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
+ self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+ for start, root_rule in lr0_root_rules.items()}
+
+ self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
+
+ def expand_rule(self, source_rule, rules_by_origin=None):
+ "Returns all init_ptrs accessible by rule (recursive)"
+
+ if rules_by_origin is None:
+ rules_by_origin = self.rules_by_origin
+
+ init_ptrs = set()
+ def _expand_rule(rule):
+ assert not rule.is_term, rule
+
+ for r in rules_by_origin[rule]:
+ init_ptr = RulePtr(r, 0)
+ init_ptrs.add(init_ptr)
+
+ if r.expansion: # if not empty rule
+ new_r = init_ptr.next
+ if not new_r.is_term:
+ yield new_r
+
+ for _ in bfs([source_rule], _expand_rule):
+ pass
+
+ return fzset(init_ptrs)
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py
new file mode 100644
index 00000000..f6a993b9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_analysis.py
@@ -0,0 +1,304 @@
+"""This module builds a LALR(1) transition-table for lalr_parser.py
+
+For now, shift/reduce conflicts are automatically resolved as shifts.
+"""
+
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+
+from collections import defaultdict
+
+from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
+from ..exceptions import GrammarError
+
+from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
+from ..grammar import Rule
+
+###{standalone
+
+class Action:
+ def __init__(self, name):
+ self.name = name
+ def __str__(self):
+ return self.name
+ def __repr__(self):
+ return str(self)
+
+Shift = Action('Shift')
+Reduce = Action('Reduce')
+
+
+class ParseTable:
+ def __init__(self, states, start_states, end_states):
+ self.states = states
+ self.start_states = start_states
+ self.end_states = end_states
+
+ def serialize(self, memo):
+ tokens = Enumerator()
+ rules = Enumerator()
+
+ states = {
+ state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
+ for token, (action, arg) in actions.items()}
+ for state, actions in self.states.items()
+ }
+
+ return {
+ 'tokens': tokens.reversed(),
+ 'states': states,
+ 'start_states': self.start_states,
+ 'end_states': self.end_states,
+ }
+
+ @classmethod
+ def deserialize(cls, data, memo):
+ tokens = data['tokens']
+ states = {
+ state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
+ for token, (action, arg) in actions.items()}
+ for state, actions in data['states'].items()
+ }
+ return cls(states, data['start_states'], data['end_states'])
+
+
+class IntParseTable(ParseTable):
+
+ @classmethod
+ def from_ParseTable(cls, parse_table):
+ enum = list(parse_table.states)
+ state_to_idx = {s:i for i,s in enumerate(enum)}
+ int_states = {}
+
+ for s, la in parse_table.states.items():
+ la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
+ for k,v in la.items()}
+ int_states[ state_to_idx[s] ] = la
+
+
+ start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
+ end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
+ return cls(int_states, start_states, end_states)
+
+###}
+
+
+# digraph and traverse, see The Theory and Practice of Compiler Writing
+
+# computes F(x) = G(x) union (union { G(y) | x R y })
+# X: nodes
+# R: relation (function mapping node -> list of nodes that satisfy the relation)
+# G: set valued function
+def digraph(X, R, G):
+ F = {}
+ S = []
+ N = {}
+ for x in X:
+ N[x] = 0
+ for x in X:
+ # this is always true for the first iteration, but N[x] may be updated in traverse below
+ if N[x] == 0:
+ traverse(x, S, N, X, R, G, F)
+ return F
+
+# x: single node
+# S: stack
+# N: weights
+# X: nodes
+# R: relation (see above)
+# G: set valued function
+# F: set valued function we are computing (map of input -> output)
+def traverse(x, S, N, X, R, G, F):
+ S.append(x)
+ d = len(S)
+ N[x] = d
+ F[x] = G[x]
+ for y in R[x]:
+ if N[y] == 0:
+ traverse(y, S, N, X, R, G, F)
+ n_x = N[x]
+ assert(n_x > 0)
+ n_y = N[y]
+ assert(n_y != 0)
+ if (n_y > 0) and (n_y < n_x):
+ N[x] = n_y
+ F[x].update(F[y])
+ if N[x] == d:
+ f_x = F[x]
+ while True:
+ z = S.pop()
+ N[z] = -1
+ F[z] = f_x
+ if z == x:
+ break
+
+
+class LALR_Analyzer(GrammarAnalyzer):
+ def __init__(self, parser_conf, debug=False):
+ GrammarAnalyzer.__init__(self, parser_conf, debug)
+ self.nonterminal_transitions = []
+ self.directly_reads = defaultdict(set)
+ self.reads = defaultdict(set)
+ self.includes = defaultdict(set)
+ self.lookback = defaultdict(set)
+
+
+ def compute_lr0_states(self):
+ self.lr0_states = set()
+ # map of kernels to LR0ItemSets
+ cache = {}
+
+ def step(state):
+ _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
+
+ d = classify(unsat, lambda rp: rp.next)
+ for sym, rps in d.items():
+ kernel = fzset({rp.advance(sym) for rp in rps})
+ new_state = cache.get(kernel, None)
+ if new_state is None:
+ closure = set(kernel)
+ for rp in kernel:
+ if not rp.is_satisfied and not rp.next.is_term:
+ closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
+ new_state = LR0ItemSet(kernel, closure)
+ cache[kernel] = new_state
+
+ state.transitions[sym] = new_state
+ yield new_state
+
+ self.lr0_states.add(state)
+
+ for _ in bfs(self.lr0_start_states.values(), step):
+ pass
+
+ def compute_reads_relations(self):
+ # handle start state
+ for root in self.lr0_start_states.values():
+ assert(len(root.kernel) == 1)
+ for rp in root.kernel:
+ assert(rp.index == 0)
+ self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
+
+ for state in self.lr0_states:
+ seen = set()
+ for rp in state.closure:
+ if rp.is_satisfied:
+ continue
+ s = rp.next
+ # if s is a not a nonterminal
+ if s not in self.lr0_rules_by_origin:
+ continue
+ if s in seen:
+ continue
+ seen.add(s)
+ nt = (state, s)
+ self.nonterminal_transitions.append(nt)
+ dr = self.directly_reads[nt]
+ r = self.reads[nt]
+ next_state = state.transitions[s]
+ for rp2 in next_state.closure:
+ if rp2.is_satisfied:
+ continue
+ s2 = rp2.next
+ # if s2 is a terminal
+ if s2 not in self.lr0_rules_by_origin:
+ dr.add(s2)
+ if s2 in self.NULLABLE:
+ r.add((next_state, s2))
+
+ def compute_includes_lookback(self):
+ for nt in self.nonterminal_transitions:
+ state, nonterminal = nt
+ includes = []
+ lookback = self.lookback[nt]
+ for rp in state.closure:
+ if rp.rule.origin != nonterminal:
+ continue
+ # traverse the states for rp(.rule)
+ state2 = state
+ for i in range(rp.index, len(rp.rule.expansion)):
+ s = rp.rule.expansion[i]
+ nt2 = (state2, s)
+ state2 = state2.transitions[s]
+ if nt2 not in self.reads:
+ continue
+ for j in range(i + 1, len(rp.rule.expansion)):
+ if not rp.rule.expansion[j] in self.NULLABLE:
+ break
+ else:
+ includes.append(nt2)
+ # state2 is at the final state for rp.rule
+ if rp.index == 0:
+ for rp2 in state2.closure:
+ if (rp2.rule == rp.rule) and rp2.is_satisfied:
+ lookback.add((state2, rp2.rule))
+ for nt2 in includes:
+ self.includes[nt2].add(nt)
+
+ def compute_lookaheads(self):
+ read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
+ follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
+
+ for nt, lookbacks in self.lookback.items():
+ for state, rule in lookbacks:
+ for s in follow_sets[nt]:
+ state.lookaheads[s].add(rule)
+
+ def compute_lalr1_states(self):
+ m = {}
+ reduce_reduce = []
+ for state in self.lr0_states:
+ actions = {}
+ for la, next_state in state.transitions.items():
+ actions[la] = (Shift, next_state.closure)
+ for la, rules in state.lookaheads.items():
+ if len(rules) > 1:
+ # Try to resolve conflict based on priority
+ p = [(r.options.priority or 0, r) for r in rules]
+ p.sort(key=lambda r: r[0], reverse=True)
+ best, second_best = p[:2]
+ if best[0] > second_best[0]:
+ rules = [best[1]]
+ else:
+ reduce_reduce.append((state, la, rules))
+ if la in actions:
+ if self.debug:
+ logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+ logger.warning(' * %s', list(rules)[0])
+ else:
+ actions[la] = (Reduce, list(rules)[0])
+ m[state] = { k.name: v for k, v in actions.items() }
+
+ if reduce_reduce:
+ msgs = []
+ for state, la, rules in reduce_reduce:
+ msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
+ if self.debug:
+ msg += '\n collision occurred in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in state.closure])
+ msgs.append(msg)
+ raise GrammarError('\n\n'.join(msgs))
+
+ states = { k.closure: v for k, v in m.items() }
+
+ # compute end states
+ end_states = {}
+ for state in states:
+ for rp in state:
+ for start in self.lr0_start_states:
+ if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
+ assert(not start in end_states)
+ end_states[start] = state
+
+ _parse_table = ParseTable(states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
+
+ if self.debug:
+ self.parse_table = _parse_table
+ else:
+ self.parse_table = IntParseTable.from_ParseTable(_parse_table)
+
+ def compute_lalr(self):
+ self.compute_lr0_states()
+ self.compute_reads_relations()
+ self.compute_includes_lookback()
+ self.compute_lookaheads()
+ self.compute_lalr1_states()
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py
new file mode 100644
index 00000000..d6780cba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_interactive_parser.py
@@ -0,0 +1,132 @@
+# This module provides a LALR interactive parser, which is used for debugging and error handling
+
+from copy import copy
+
+from .. import Token
+from ..exceptions import UnexpectedToken
+
+
+class InteractiveParser(object):
+ """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
+
+ For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
+ """
+ def __init__(self, parser, parser_state, lexer_state):
+ self.parser = parser
+ self.parser_state = parser_state
+ self.lexer_state = lexer_state
+
+ def feed_token(self, token):
+ """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
+
+ Note that ``token`` has to be an instance of ``Token``.
+ """
+ return self.parser_state.feed_token(token, token.type == '$END')
+
+ def exhaust_lexer(self):
+ """Try to feed the rest of the lexer state into the interactive parser.
+
+ Note that this modifies the instance in place and does not feed an '$END' Token"""
+ for token in self.lexer_state.lex(self.parser_state):
+ self.parser_state.feed_token(token)
+
+ def feed_eof(self, last_token=None):
+ """Feed a '$END' Token. Borrows from 'last_token' if given."""
+ eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
+ return self.feed_token(eof)
+
+
+ def __copy__(self):
+ """Create a new interactive parser with a separate state.
+
+ Calls to feed_token() won't affect the old instance, and vice-versa.
+ """
+ return type(self)(
+ self.parser,
+ copy(self.parser_state),
+ copy(self.lexer_state),
+ )
+
+ def copy(self):
+ return copy(self)
+
+ def __eq__(self, other):
+ if not isinstance(other, InteractiveParser):
+ return False
+
+ return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state
+
+ def as_immutable(self):
+ """Convert to an ``ImmutableInteractiveParser``."""
+ p = copy(self)
+ return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)
+
+ def pretty(self):
+ """Print the output of ``choices()`` in a way that's easier to read."""
+ out = ["Parser choices:"]
+ for k, v in self.choices().items():
+ out.append('\t- %s -> %r' % (k, v))
+ out.append('stack size: %s' % len(self.parser_state.state_stack))
+ return '\n'.join(out)
+
+ def choices(self):
+ """Returns a dictionary of token types, matched to their action in the parser.
+
+ Only returns token types that are accepted by the current state.
+
+ Updated by ``feed_token()``.
+ """
+ return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
+
+ def accepts(self):
+ """Returns the set of possible tokens that will advance the parser into a new valid state."""
+ accepts = set()
+ for t in self.choices():
+ if t.isupper(): # is terminal?
+ new_cursor = copy(self)
+ try:
+ new_cursor.feed_token(Token(t, ''))
+ except UnexpectedToken:
+ pass
+ else:
+ accepts.add(t)
+ return accepts
+
+ def resume_parse(self):
+ """Resume automated parsing from the current state."""
+ return self.parser.parse_from_state(self.parser_state)
+
+
+
+class ImmutableInteractiveParser(InteractiveParser):
+ """Same as ``InteractiveParser``, but operations create a new instance instead
+ of changing it in-place.
+ """
+
+ result = None
+
+ def __hash__(self):
+ return hash((self.parser_state, self.lexer_state))
+
+ def feed_token(self, token):
+ c = copy(self)
+ c.result = InteractiveParser.feed_token(c, token)
+ return c
+
+ def exhaust_lexer(self):
+ """Try to feed the rest of the lexer state into the parser.
+
+ Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
+ cursor = self.as_mutable()
+ cursor.exhaust_lexer()
+ return cursor.as_immutable()
+
+ def as_mutable(self):
+ """Convert to an ``InteractiveParser``."""
+ p = copy(self)
+ return InteractiveParser(p.parser, p.parser_state, p.lexer_state)
+
+
+# Deprecated class names for the interactive parser
+ParserPuppet = InteractiveParser
+ImmutableParserPuppet = ImmutableInteractiveParser
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py
new file mode 100644
index 00000000..d916b463
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py
@@ -0,0 +1,200 @@
+"""This module implements a LALR(1) Parser
+"""
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+from copy import deepcopy, copy
+from ..exceptions import UnexpectedInput, UnexpectedToken
+from ..lexer import Token
+from ..utils import Serialize
+
+from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
+from .lalr_interactive_parser import InteractiveParser
+from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
+
+###{standalone
+
+class LALR_Parser(Serialize):
+ def __init__(self, parser_conf, debug=False):
+ analysis = LALR_Analyzer(parser_conf, debug=debug)
+ analysis.compute_lalr()
+ callbacks = parser_conf.callbacks
+
+ self._parse_table = analysis.parse_table
+ self.parser_conf = parser_conf
+ self.parser = _Parser(analysis.parse_table, callbacks, debug)
+
+ @classmethod
+ def deserialize(cls, data, memo, callbacks, debug=False):
+ inst = cls.__new__(cls)
+ inst._parse_table = IntParseTable.deserialize(data, memo)
+ inst.parser = _Parser(inst._parse_table, callbacks, debug)
+ return inst
+
+ def serialize(self, memo):
+ return self._parse_table.serialize(memo)
+
+ def parse_interactive(self, lexer, start):
+ return self.parser.parse(lexer, start, start_interactive=True)
+
+ def parse(self, lexer, start, on_error=None):
+ try:
+ return self.parser.parse(lexer, start)
+ except UnexpectedInput as e:
+ if on_error is None:
+ raise
+
+ while True:
+ if isinstance(e, UnexpectedCharacters):
+ s = e.interactive_parser.lexer_state.state
+ p = s.line_ctr.char_pos
+
+ if not on_error(e):
+ raise e
+
+ if isinstance(e, UnexpectedCharacters):
+ # If user didn't change the character position, then we should
+ if p == s.line_ctr.char_pos:
+ s.line_ctr.feed(s.text[p:p+1])
+
+ try:
+ return e.interactive_parser.resume_parse()
+ except UnexpectedToken as e2:
+ if (isinstance(e, UnexpectedToken)
+ and e.token.type == e2.token.type == '$END'
+ and e.interactive_parser == e2.interactive_parser):
+ # Prevent infinite loop
+ raise e2
+ e = e2
+ except UnexpectedCharacters as e2:
+ e = e2
+
+
+class ParseConf(object):
+ __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
+
+ def __init__(self, parse_table, callbacks, start):
+ self.parse_table = parse_table
+
+ self.start_state = self.parse_table.start_states[start]
+ self.end_state = self.parse_table.end_states[start]
+ self.states = self.parse_table.states
+
+ self.callbacks = callbacks
+ self.start = start
+
+
+class ParserState(object):
+ __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
+
+ def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
+ self.parse_conf = parse_conf
+ self.lexer = lexer
+ self.state_stack = state_stack or [self.parse_conf.start_state]
+ self.value_stack = value_stack or []
+
+ @property
+ def position(self):
+ return self.state_stack[-1]
+
+ # Necessary for match_examples() to work
+ def __eq__(self, other):
+ if not isinstance(other, ParserState):
+ return NotImplemented
+ return len(self.state_stack) == len(other.state_stack) and self.position == other.position
+
+ def __copy__(self):
+ return type(self)(
+ self.parse_conf,
+ self.lexer, # XXX copy
+ copy(self.state_stack),
+ deepcopy(self.value_stack),
+ )
+
+ def copy(self):
+ return copy(self)
+
+ def feed_token(self, token, is_end=False):
+ state_stack = self.state_stack
+ value_stack = self.value_stack
+ states = self.parse_conf.states
+ end_state = self.parse_conf.end_state
+ callbacks = self.parse_conf.callbacks
+
+ while True:
+ state = state_stack[-1]
+ try:
+ action, arg = states[state][token.type]
+ except KeyError:
+ expected = {s for s in states[state].keys() if s.isupper()}
+ raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
+
+ assert arg != end_state
+
+ if action is Shift:
+ # shift once and return
+ assert not is_end
+ state_stack.append(arg)
+ value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
+ return
+ else:
+ # reduce+shift as many times as necessary
+ rule = arg
+ size = len(rule.expansion)
+ if size:
+ s = value_stack[-size:]
+ del state_stack[-size:]
+ del value_stack[-size:]
+ else:
+ s = []
+
+ value = callbacks[rule](s)
+
+ _action, new_state = states[state_stack[-1]][rule.origin.name]
+ assert _action is Shift
+ state_stack.append(new_state)
+ value_stack.append(value)
+
+ if is_end and state_stack[-1] == end_state:
+ return value_stack[-1]
+
+class _Parser(object):
+ def __init__(self, parse_table, callbacks, debug=False):
+ self.parse_table = parse_table
+ self.callbacks = callbacks
+ self.debug = debug
+
+ def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
+ parse_conf = ParseConf(self.parse_table, self.callbacks, start)
+ parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
+ if start_interactive:
+ return InteractiveParser(self, parser_state, parser_state.lexer)
+ return self.parse_from_state(parser_state)
+
+
+ def parse_from_state(self, state):
+ # Main LALR-parser loop
+ try:
+ token = None
+ for token in state.lexer.lex(state):
+ state.feed_token(token)
+
+ end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
+ return state.feed_token(end_token, True)
+ except UnexpectedInput as e:
+ try:
+ e.interactive_parser = InteractiveParser(self, state, state.lexer)
+ except NameError:
+ pass
+ raise e
+ except Exception as e:
+ if self.debug:
+ print("")
+ print("STATE STACK DUMP")
+ print("----------------")
+ for i, s in enumerate(state.state_stack):
+ print('%d)' % i , s)
+ print("")
+
+ raise
+###}
+
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py
new file mode 100644
index 00000000..6ea6d894
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/lalr_puppet.py
@@ -0,0 +1,3 @@
+# Deprecated
+
+from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet \ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/xearley.py b/.venv/lib/python3.12/site-packages/lark/parsers/xearley.py
new file mode 100644
index 00000000..335d9ad4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/parsers/xearley.py
@@ -0,0 +1,157 @@
+"""This module implements an experimental Earley parser with a dynamic lexer
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+ https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is better documented here:
+ http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+
+Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
+uses regular expressions by necessity, achieving high-performance while maintaining all of
+Earley's power in parsing any CFG.
+"""
+
+from collections import defaultdict
+
+from ..tree import Tree
+from ..exceptions import UnexpectedCharacters
+from ..lexer import Token
+from ..grammar import Terminal
+from .earley import Parser as BaseParser
+from .earley_forest import SymbolNode
+
+
+class Parser(BaseParser):
+ def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False, tree_class=Tree):
+ BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug, tree_class)
+ self.ignore = [Terminal(t) for t in ignore]
+ self.complete_lex = complete_lex
+
+ def _parse(self, stream, columns, to_scan, start_symbol=None):
+
+ def scan(i, to_scan):
+ """The core Earley Scanner.
+
+ This is a custom implementation of the scanner that uses the
+ Lark lexer to match tokens. The scan list is built by the
+ Earley predictor, based on the previously completed tokens.
+ This ensures that at each phase of the parse we have a custom
+ lexer context, allowing for more complex ambiguities."""
+
+ node_cache = {}
+
+ # 1) Loop the expectations and ask the lexer to match.
+ # Since regexp is forward looking on the input stream, and we only
+ # want to process tokens when we hit the point in the stream at which
+ # they complete, we push all tokens into a buffer (delayed_matches), to
+ # be held possibly for a later parse step when we reach the point in the
+ # input stream at which they complete.
+ for item in set(to_scan):
+ m = match(item.expect, stream, i)
+ if m:
+ t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+ delayed_matches[m.end()].append( (item, i, t) )
+
+ if self.complete_lex:
+ s = m.group(0)
+ for j in range(1, len(s)):
+ m = match(item.expect, s[:-j])
+ if m:
+ t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+ delayed_matches[i+m.end()].append( (item, i, t) )
+
+ # XXX The following 3 lines were commented out for causing a bug. See issue #768
+ # # Remove any items that successfully matched in this pass from the to_scan buffer.
+ # # This ensures we don't carry over tokens that already matched, if we're ignoring below.
+ # to_scan.remove(item)
+
+ # 3) Process any ignores. This is typically used for e.g. whitespace.
+ # We carry over any unmatched items from the to_scan buffer to be matched again after
+ # the ignore. This should allow us to use ignored symbols in non-terminals to implement
+ # e.g. mandatory spacing.
+ for x in self.ignore:
+ m = match(x, stream, i)
+ if m:
+ # Carry over any items still in the scan buffer, to past the end of the ignored items.
+ delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
+
+ # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
+ delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
+
+ next_to_scan = set()
+ next_set = set()
+ columns.append(next_set)
+ transitives.append({})
+
+ ## 4) Process Tokens from delayed_matches.
+ # This is the core of the Earley scanner. Create an SPPF node for each Token,
+ # and create the symbol node in the SPPF tree. Advance the item that completed,
+ # and add the resulting new item to either the Earley set (for processing by the
+ # completer/predictor) or the to_scan buffer for the next parse step.
+ for item, start, token in delayed_matches[i+1]:
+ if token is not None:
+ token.end_line = text_line
+ token.end_column = text_column + 1
+ token.end_pos = i + 1
+
+ new_item = item.advance()
+ label = (new_item.s, new_item.start, i)
+ new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
+ new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
+ else:
+ new_item = item
+
+ if new_item.expect in self.TERMINALS:
+ # add (B ::= Aai+1.B, h, y) to Q'
+ next_to_scan.add(new_item)
+ else:
+ # add (B ::= Aa+1.B, h, y) to Ei+1
+ next_set.add(new_item)
+
+ del delayed_matches[i+1] # No longer needed, so unburden memory
+
+ if not next_set and not delayed_matches and not next_to_scan:
+ considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
+ raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
+ set(to_scan), state=frozenset(i.s for i in to_scan),
+ considered_rules=considered_rules
+ )
+
+ return next_to_scan
+
+
+ delayed_matches = defaultdict(list)
+ match = self.term_matcher
+
+ # Cache for nodes & tokens created in a particular parse step.
+ transitives = [{}]
+
+ text_line = 1
+ text_column = 1
+
+ ## The main Earley loop.
+ # Run the Prediction/Completion cycle for any Items in the current Earley set.
+ # Completions will be added to the SPPF tree, and predictions will be recursively
+ # processed down to terminals/empty nodes to be added to the scanner for the next
+ # step.
+ i = 0
+ for token in stream:
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ to_scan = scan(i, to_scan)
+
+ if token == '\n':
+ text_line += 1
+ text_column = 1
+ else:
+ text_column += 1
+ i += 1
+
+ self.predict_and_complete(i, to_scan, columns, transitives)
+
+ ## Column is now the final column in the parse.
+ assert i == len(columns)-1
+ return to_scan