aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lark/load_grammar.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/load_grammar.py')
-rw-r--r--.venv/lib/python3.12/site-packages/lark/load_grammar.py1354
1 files changed, 1354 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/load_grammar.py b/.venv/lib/python3.12/site-packages/lark/load_grammar.py
new file mode 100644
index 00000000..1ae832f6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/load_grammar.py
@@ -0,0 +1,1354 @@
+"""Parses and creates Grammar objects"""
+import hashlib
+import os.path
+import sys
+from collections import namedtuple
+from copy import copy, deepcopy
+from io import open
+import pkgutil
+from ast import literal_eval
+from numbers import Integral
+
+from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors
+from .lexer import Token, TerminalDef, PatternStr, PatternRE
+
+from .parse_tree_builder import ParseTreeBuilder
+from .parser_frontends import ParsingFrontend
+from .common import LexerConf, ParserConf
+from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
+from .utils import classify, suppress, dedup_list, Str
+from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError
+
+from .tree import Tree, SlottedTree as ST
+from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
+inline_args = v_args(inline=True)
+
+__path__ = os.path.dirname(__file__)
+IMPORT_PATHS = ['grammars']
+
+EXT = '.lark'
+
+_RE_FLAGS = 'imslux'
+
+_EMPTY = Symbol('__empty__')
+
+_TERMINAL_NAMES = {
+ '.' : 'DOT',
+ ',' : 'COMMA',
+ ':' : 'COLON',
+ ';' : 'SEMICOLON',
+ '+' : 'PLUS',
+ '-' : 'MINUS',
+ '*' : 'STAR',
+ '/' : 'SLASH',
+ '\\' : 'BACKSLASH',
+ '|' : 'VBAR',
+ '?' : 'QMARK',
+ '!' : 'BANG',
+ '@' : 'AT',
+ '#' : 'HASH',
+ '$' : 'DOLLAR',
+ '%' : 'PERCENT',
+ '^' : 'CIRCUMFLEX',
+ '&' : 'AMPERSAND',
+ '_' : 'UNDERSCORE',
+ '<' : 'LESSTHAN',
+ '>' : 'MORETHAN',
+ '=' : 'EQUAL',
+ '"' : 'DBLQUOTE',
+ '\'' : 'QUOTE',
+ '`' : 'BACKQUOTE',
+ '~' : 'TILDE',
+ '(' : 'LPAR',
+ ')' : 'RPAR',
+ '{' : 'LBRACE',
+ '}' : 'RBRACE',
+ '[' : 'LSQB',
+ ']' : 'RSQB',
+ '\n' : 'NEWLINE',
+ '\r\n' : 'CRLF',
+ '\t' : 'TAB',
+ ' ' : 'SPACE',
+}
+
+# Grammar Parser
+TERMINALS = {
+ '_LPAR': r'\(',
+ '_RPAR': r'\)',
+ '_LBRA': r'\[',
+ '_RBRA': r'\]',
+ '_LBRACE': r'\{',
+ '_RBRACE': r'\}',
+ 'OP': '[+*]|[?](?![a-z])',
+ '_COLON': ':',
+ '_COMMA': ',',
+ '_OR': r'\|',
+ '_DOT': r'\.(?!\.)',
+ '_DOTDOT': r'\.\.',
+ 'TILDE': '~',
+ 'RULE': '!?[_?]?[a-z][_a-z0-9]*',
+ 'TERMINAL': '_?[A-Z][_A-Z0-9]*',
+ 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
+ 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
+ '_NL': r'(\r?\n)+\s*',
+ '_NL_OR': r'(\r?\n)+\s*\|',
+ 'WS': r'[ \t]+',
+ 'COMMENT': r'\s*//[^\n]*',
+ 'BACKSLASH': r'\\[ ]*\n',
+ '_TO': '->',
+ '_IGNORE': r'%ignore',
+ '_OVERRIDE': r'%override',
+ '_DECLARE': r'%declare',
+ '_EXTEND': r'%extend',
+ '_IMPORT': r'%import',
+ 'NUMBER': r'[+-]?\d+',
+}
+
+RULES = {
+ 'start': ['_list'],
+ '_list': ['_item', '_list _item'],
+ '_item': ['rule', 'term', 'ignore', 'import', 'declare', 'override', 'extend', '_NL'],
+
+ 'rule': ['RULE template_params _COLON expansions _NL',
+ 'RULE template_params _DOT NUMBER _COLON expansions _NL'],
+ 'template_params': ['_LBRACE _template_params _RBRACE',
+ ''],
+ '_template_params': ['RULE',
+ '_template_params _COMMA RULE'],
+ 'expansions': ['_expansions'],
+ '_expansions': ['alias',
+ '_expansions _OR alias',
+ '_expansions _NL_OR alias'],
+
+ '?alias': ['expansion _TO RULE', 'expansion'],
+ 'expansion': ['_expansion'],
+
+ '_expansion': ['', '_expansion expr'],
+
+ '?expr': ['atom',
+ 'atom OP',
+ 'atom TILDE NUMBER',
+ 'atom TILDE NUMBER _DOTDOT NUMBER',
+ ],
+
+ '?atom': ['_LPAR expansions _RPAR',
+ 'maybe',
+ 'value'],
+
+ 'value': ['terminal',
+ 'nonterminal',
+ 'literal',
+ 'range',
+ 'template_usage'],
+
+ 'terminal': ['TERMINAL'],
+ 'nonterminal': ['RULE'],
+
+ '?name': ['RULE', 'TERMINAL'],
+
+ 'maybe': ['_LBRA expansions _RBRA'],
+ 'range': ['STRING _DOTDOT STRING'],
+
+ 'template_usage': ['RULE _LBRACE _template_args _RBRACE'],
+ '_template_args': ['value',
+ '_template_args _COMMA value'],
+
+ 'term': ['TERMINAL _COLON expansions _NL',
+ 'TERMINAL _DOT NUMBER _COLON expansions _NL'],
+ 'override': ['_OVERRIDE rule',
+ '_OVERRIDE term'],
+ 'extend': ['_EXTEND rule',
+ '_EXTEND term'],
+ 'ignore': ['_IGNORE expansions _NL'],
+ 'declare': ['_DECLARE _declare_args _NL'],
+ 'import': ['_IMPORT _import_path _NL',
+ '_IMPORT _import_path _LPAR name_list _RPAR _NL',
+ '_IMPORT _import_path _TO name _NL'],
+
+ '_import_path': ['import_lib', 'import_rel'],
+ 'import_lib': ['_import_args'],
+ 'import_rel': ['_DOT _import_args'],
+ '_import_args': ['name', '_import_args _DOT name'],
+
+ 'name_list': ['_name_list'],
+ '_name_list': ['name', '_name_list _COMMA name'],
+
+ '_declare_args': ['name', '_declare_args name'],
+ 'literal': ['REGEXP', 'STRING'],
+}
+
+
+# Value 5 keeps the number of states in the lalr parser somewhat minimal
+# It isn't optimal, but close to it. See PR #949
+SMALL_FACTOR_THRESHOLD = 5
+# The Threshold whether repeat via ~ are split up into different rules
+# 50 is chosen since it keeps the number of states low and therefore lalr analysis time low,
+# while not being to overaggressive and unnecessarily creating rules that might create shift/reduce conflicts.
+# (See PR #949)
+REPEAT_BREAK_THRESHOLD = 50
+
+
+@inline_args
+class EBNF_to_BNF(Transformer_InPlace):
+ def __init__(self):
+ self.new_rules = []
+ self.rules_cache = {}
+ self.prefix = 'anon'
+ self.i = 0
+ self.rule_options = None
+
+ def _name_rule(self, inner):
+ new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)
+ self.i += 1
+ return new_name
+
+ def _add_rule(self, key, name, expansions):
+ t = NonTerminal(name)
+ self.new_rules.append((name, expansions, self.rule_options))
+ self.rules_cache[key] = t
+ return t
+
+ def _add_recurse_rule(self, type_, expr):
+ try:
+ return self.rules_cache[expr]
+ except KeyError:
+ new_name = self._name_rule(type_)
+ t = NonTerminal(new_name)
+ tree = ST('expansions', [
+ ST('expansion', [expr]),
+ ST('expansion', [t, expr])
+ ])
+ return self._add_rule(expr, new_name, tree)
+
+ def _add_repeat_rule(self, a, b, target, atom):
+ """Generate a rule that repeats target ``a`` times, and repeats atom ``b`` times.
+
+ When called recursively (into target), it repeats atom for x(n) times, where:
+ x(0) = 1
+ x(n) = a(n) * x(n-1) + b
+
+ Example rule when a=3, b=4:
+
+ new_rule: target target target atom atom atom atom
+
+ """
+ key = (a, b, target, atom)
+ try:
+ return self.rules_cache[key]
+ except KeyError:
+ new_name = self._name_rule('repeat_a%d_b%d' % (a, b))
+ tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
+ return self._add_rule(key, new_name, tree)
+
+ def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):
+ """Creates a rule that matches atom 0 to (a*n+b)-1 times.
+
+ When target matches n times atom, and target_opt 0 to n-1 times target_opt,
+
+ First we generate target * i followed by target_opt, for i from 0 to a-1
+ These match 0 to n*a - 1 times atom
+
+ Then we generate target * a followed by atom * i, for i from 0 to b-1
+ These match n*a to n*a + b-1 times atom
+
+ The created rule will not have any shift/reduce conflicts so that it can be used with lalr
+
+ Example rule when a=3, b=4:
+
+ new_rule: target_opt
+ | target target_opt
+ | target target target_opt
+
+ | target target target
+ | target target target atom
+ | target target target atom atom
+ | target target target atom atom atom
+
+ """
+ key = (a, b, target, atom, "opt")
+ try:
+ return self.rules_cache[key]
+ except KeyError:
+ new_name = self._name_rule('repeat_a%d_b%d_opt' % (a, b))
+ tree = ST('expansions', [
+ ST('expansion', [target]*i + [target_opt]) for i in range(a)
+ ] + [
+ ST('expansion', [target]*a + [atom]*i) for i in range(b)
+ ])
+ return self._add_rule(key, new_name, tree)
+
+ def _generate_repeats(self, rule, mn, mx):
+ """Generates a rule tree that repeats ``rule`` exactly between ``mn`` to ``mx`` times.
+ """
+ # For a small number of repeats, we can take the naive approach
+ if mx < REPEAT_BREAK_THRESHOLD:
+ return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])
+
+ # For large repeat values, we break the repetition into sub-rules.
+ # We treat ``rule~mn..mx`` as ``rule~mn rule~0..(diff=mx-mn)``.
+ # We then use small_factors to split up mn and diff up into values [(a, b), ...]
+ # This values are used with the help of _add_repeat_rule and _add_repeat_rule_opt
+ # to generate a complete rule/expression that matches the corresponding number of repeats
+ mn_target = rule
+ for a, b in small_factors(mn, SMALL_FACTOR_THRESHOLD):
+ mn_target = self._add_repeat_rule(a, b, mn_target, rule)
+ if mx == mn:
+ return mn_target
+
+ diff = mx - mn + 1 # We add one because _add_repeat_opt_rule generates rules that match one less
+ diff_factors = small_factors(diff, SMALL_FACTOR_THRESHOLD)
+ diff_target = rule # Match rule 1 times
+ diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. up to 1 -1 times)
+ for a, b in diff_factors[:-1]:
+ diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
+ diff_target = self._add_repeat_rule(a, b, diff_target, rule)
+
+ a, b = diff_factors[-1]
+ diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
+
+ return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])
+
+ def expr(self, rule, op, *args):
+ if op.value == '?':
+ empty = ST('expansion', [])
+ return ST('expansions', [rule, empty])
+ elif op.value == '+':
+ # a : b c+ d
+ # -->
+ # a : b _c d
+ # _c : _c c | c;
+ return self._add_recurse_rule('plus', rule)
+ elif op.value == '*':
+ # a : b c* d
+ # -->
+ # a : b _c? d
+ # _c : _c c | c;
+ new_name = self._add_recurse_rule('star', rule)
+ return ST('expansions', [new_name, ST('expansion', [])])
+ elif op.value == '~':
+ if len(args) == 1:
+ mn = mx = int(args[0])
+ else:
+ mn, mx = map(int, args)
+ if mx < mn or mn < 0:
+ raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
+
+ return self._generate_repeats(rule, mn, mx)
+
+ assert False, op
+
+ def maybe(self, rule):
+ keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens
+
+ def will_not_get_removed(sym):
+ if isinstance(sym, NonTerminal):
+ return not sym.name.startswith('_')
+ if isinstance(sym, Terminal):
+ return keep_all_tokens or not sym.filter_out
+ assert False
+
+ if any(rule.scan_values(will_not_get_removed)):
+ empty = _EMPTY
+ else:
+ empty = ST('expansion', [])
+
+ return ST('expansions', [rule, empty])
+
+
+class SimplifyRule_Visitor(Visitor):
+
+ @staticmethod
+ def _flatten(tree):
+ while tree.expand_kids_by_data(tree.data):
+ pass
+
+ def expansion(self, tree):
+ # rules_list unpacking
+ # a : b (c|d) e
+ # -->
+ # a : b c e | b d e
+ #
+ # In AST terms:
+ # expansion(b, expansions(c, d), e)
+ # -->
+ # expansions( expansion(b, c, e), expansion(b, d, e) )
+
+ self._flatten(tree)
+
+ for i, child in enumerate(tree.children):
+ if isinstance(child, Tree) and child.data == 'expansions':
+ tree.data = 'expansions'
+ tree.children = [self.visit(ST('expansion', [option if i == j else other
+ for j, other in enumerate(tree.children)]))
+ for option in dedup_list(child.children)]
+ self._flatten(tree)
+ break
+
+ def alias(self, tree):
+ rule, alias_name = tree.children
+ if rule.data == 'expansions':
+ aliases = []
+ for child in tree.children[0].children:
+ aliases.append(ST('alias', [child, alias_name]))
+ tree.data = 'expansions'
+ tree.children = aliases
+
+ def expansions(self, tree):
+ self._flatten(tree)
+ # Ensure all children are unique
+ if len(set(tree.children)) != len(tree.children):
+ tree.children = dedup_list(tree.children) # dedup is expensive, so try to minimize its use
+
+
+class RuleTreeToText(Transformer):
+ def expansions(self, x):
+ return x
+
+ def expansion(self, symbols):
+ return symbols, None
+
+ def alias(self, x):
+ (expansion, _alias), alias = x
+ assert _alias is None, (alias, expansion, '-', _alias) # Double alias not allowed
+ return expansion, alias.value
+
+
+class PrepareAnonTerminals(Transformer_InPlace):
+ """Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"""
+
+ def __init__(self, terminals):
+ self.terminals = terminals
+ self.term_set = {td.name for td in self.terminals}
+ self.term_reverse = {td.pattern: td for td in terminals}
+ self.i = 0
+ self.rule_options = None
+
+ @inline_args
+ def pattern(self, p):
+ value = p.value
+ if p in self.term_reverse and p.flags != self.term_reverse[p].pattern.flags:
+ raise GrammarError(u'Conflicting flags for the same terminal: %s' % p)
+
+ term_name = None
+
+ if isinstance(p, PatternStr):
+ try:
+ # If already defined, use the user-defined terminal name
+ term_name = self.term_reverse[p].name
+ except KeyError:
+ # Try to assign an indicative anon-terminal name
+ try:
+ term_name = _TERMINAL_NAMES[value]
+ except KeyError:
+ if value and is_id_continue(value) and is_id_start(value[0]) and value.upper() not in self.term_set:
+ term_name = value.upper()
+
+ if term_name in self.term_set:
+ term_name = None
+
+ elif isinstance(p, PatternRE):
+ if p in self.term_reverse: # Kind of a weird placement.name
+ term_name = self.term_reverse[p].name
+ else:
+ assert False, p
+
+ if term_name is None:
+ term_name = '__ANON_%d' % self.i
+ self.i += 1
+
+ if term_name not in self.term_set:
+ assert p not in self.term_reverse
+ self.term_set.add(term_name)
+ termdef = TerminalDef(term_name, p)
+ self.term_reverse[p] = termdef
+ self.terminals.append(termdef)
+
+ filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr)
+
+ return Terminal(term_name, filter_out=filter_out)
+
+
+class _ReplaceSymbols(Transformer_InPlace):
+ """Helper for ApplyTemplates"""
+
+ def __init__(self):
+ self.names = {}
+
+ def value(self, c):
+ if len(c) == 1 and isinstance(c[0], Token) and c[0].value in self.names:
+ return self.names[c[0].value]
+ return self.__default__('value', c, None)
+
+ def template_usage(self, c):
+ if c[0] in self.names:
+ return self.__default__('template_usage', [self.names[c[0]].name] + c[1:], None)
+ return self.__default__('template_usage', c, None)
+
+
+class ApplyTemplates(Transformer_InPlace):
+ """Apply the templates, creating new rules that represent the used templates"""
+
+ def __init__(self, rule_defs):
+ self.rule_defs = rule_defs
+ self.replacer = _ReplaceSymbols()
+ self.created_templates = set()
+
+ def template_usage(self, c):
+ name = c[0]
+ args = c[1:]
+ result_name = "%s{%s}" % (name, ",".join(a.name for a in args))
+ if result_name not in self.created_templates:
+ self.created_templates.add(result_name)
+ (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name)
+ assert len(params) == len(args), args
+ result_tree = deepcopy(tree)
+ self.replacer.names = dict(zip(params, args))
+ self.replacer.transform(result_tree)
+ self.rule_defs.append((result_name, [], result_tree, deepcopy(options)))
+ return NonTerminal(result_name)
+
+
+def _rfind(s, choices):
+ return max(s.rfind(c) for c in choices)
+
+
+def eval_escaping(s):
+ w = ''
+ i = iter(s)
+ for n in i:
+ w += n
+ if n == '\\':
+ try:
+ n2 = next(i)
+ except StopIteration:
+ raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
+ if n2 == '\\':
+ w += '\\\\'
+ elif n2 not in 'Uuxnftr':
+ w += '\\'
+ w += n2
+ w = w.replace('\\"', '"').replace("'", "\\'")
+
+ to_eval = "u'''%s'''" % w
+ try:
+ s = literal_eval(to_eval)
+ except SyntaxError as e:
+ raise GrammarError(s, e)
+
+ return s
+
+
+def _literal_to_pattern(literal):
+ v = literal.value
+ flag_start = _rfind(v, '/"')+1
+ assert flag_start > 0
+ flags = v[flag_start:]
+ assert all(f in _RE_FLAGS for f in flags), flags
+
+ if literal.type == 'STRING' and '\n' in v:
+ raise GrammarError('You cannot put newlines in string literals')
+
+ if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
+ raise GrammarError('You can only use newlines in regular expressions '
+ 'with the `x` (verbose) flag')
+
+ v = v[:flag_start]
+ assert v[0] == v[-1] and v[0] in '"/'
+ x = v[1:-1]
+
+ s = eval_escaping(x)
+
+ if s == "":
+ raise GrammarError("Empty terminals are not allowed (%s)" % literal)
+
+ if literal.type == 'STRING':
+ s = s.replace('\\\\', '\\')
+ return PatternStr(s, flags, raw=literal.value)
+ elif literal.type == 'REGEXP':
+ return PatternRE(s, flags, raw=literal.value)
+ else:
+ assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'
+
+
+@inline_args
+class PrepareLiterals(Transformer_InPlace):
+ def literal(self, literal):
+ return ST('pattern', [_literal_to_pattern(literal)])
+
+ def range(self, start, end):
+ assert start.type == end.type == 'STRING'
+ start = start.value[1:-1]
+ end = end.value[1:-1]
+ assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1
+ regexp = '[%s-%s]' % (start, end)
+ return ST('pattern', [PatternRE(regexp)])
+
+
+def _make_joined_pattern(regexp, flags_set):
+ # In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope
+ # of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags`
+ # However, for prior Python versions, we still need to use global flags, so we have to make sure
+ # that there are no flag collisions when we merge several terminals.
+ flags = ()
+ if not Py36:
+ if len(flags_set) > 1:
+ raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
+ elif len(flags_set) == 1:
+ flags ,= flags_set
+
+ return PatternRE(regexp, flags)
+
+class TerminalTreeToPattern(Transformer_NonRecursive):
+ def pattern(self, ps):
+ p ,= ps
+ return p
+
+ def expansion(self, items):
+ assert items
+ if len(items) == 1:
+ return items[0]
+
+ pattern = ''.join(i.to_regexp() for i in items)
+ return _make_joined_pattern(pattern, {i.flags for i in items})
+
+ def expansions(self, exps):
+ if len(exps) == 1:
+ return exps[0]
+
+ # Do a bit of sorting to make sure that the longest option is returned
+ # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)
+ exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))
+
+ pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
+ return _make_joined_pattern(pattern, {i.flags for i in exps})
+
+ def expr(self, args):
+ inner, op = args[:2]
+ if op == '~':
+ if len(args) == 3:
+ op = "{%d}" % int(args[2])
+ else:
+ mn, mx = map(int, args[2:])
+ if mx < mn:
+ raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
+ op = "{%d,%d}" % (mn, mx)
+ else:
+ assert len(args) == 2
+ return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)
+
+ def maybe(self, expr):
+ return self.expr(expr + ['?'])
+
+ def alias(self, t):
+ raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")
+
+ def value(self, v):
+ return v[0]
+
+
+class PrepareSymbols(Transformer_InPlace):
+ def value(self, v):
+ v ,= v
+ if isinstance(v, Tree):
+ return v
+ elif v.type == 'RULE':
+ return NonTerminal(Str(v.value))
+ elif v.type == 'TERMINAL':
+ return Terminal(Str(v.value), filter_out=v.startswith('_'))
+ assert False
+
+
+def nr_deepcopy_tree(t):
+ """Deepcopy tree `t` without recursion"""
+ return Transformer_NonRecursive(False).transform(t)
+
+
+class Grammar:
+ def __init__(self, rule_defs, term_defs, ignore):
+ self.term_defs = term_defs
+ self.rule_defs = rule_defs
+ self.ignore = ignore
+
+ def compile(self, start, terminals_to_keep):
+ # We change the trees in-place (to support huge grammars)
+ # So deepcopy allows calling compile more than once.
+ term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]
+ rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]
+
+ # ===================
+ # Compile Terminals
+ # ===================
+
+ # Convert terminal-trees to strings/regexps
+
+ for name, (term_tree, priority) in term_defs:
+ if term_tree is None: # Terminal added through %declare
+ continue
+ expansions = list(term_tree.find_data('expansion'))
+ if len(expansions) == 1 and not expansions[0].children:
+ raise GrammarError("Terminals cannot be empty (%s)" % name)
+
+ transformer = PrepareLiterals() * TerminalTreeToPattern()
+ terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
+ for name, (term_tree, priority) in term_defs if term_tree]
+
+ # =================
+ # Compile Rules
+ # =================
+
+ # 1. Pre-process terminals
+ anon_tokens_transf = PrepareAnonTerminals(terminals)
+ transformer = PrepareLiterals() * PrepareSymbols() * anon_tokens_transf # Adds to terminals
+
+ # 2. Inline Templates
+
+ transformer *= ApplyTemplates(rule_defs)
+
+ # 3. Convert EBNF to BNF (and apply step 1 & 2)
+ ebnf_to_bnf = EBNF_to_BNF()
+ rules = []
+ i = 0
+ while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
+ name, params, rule_tree, options = rule_defs[i]
+ i += 1
+ if len(params) != 0: # Dont transform templates
+ continue
+ rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
+ ebnf_to_bnf.rule_options = rule_options
+ ebnf_to_bnf.prefix = name
+ anon_tokens_transf.rule_options = rule_options
+ tree = transformer.transform(rule_tree)
+ res = ebnf_to_bnf.transform(tree)
+ rules.append((name, res, options))
+ rules += ebnf_to_bnf.new_rules
+
+ assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
+
+ # 4. Compile tree to Rule objects
+ rule_tree_to_text = RuleTreeToText()
+
+ simplify_rule = SimplifyRule_Visitor()
+ compiled_rules = []
+ for rule_content in rules:
+ name, tree, options = rule_content
+ simplify_rule.visit(tree)
+ expansions = rule_tree_to_text.transform(tree)
+
+ for i, (expansion, alias) in enumerate(expansions):
+ if alias and name.startswith('_'):
+ raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)"% (name, alias))
+
+ empty_indices = [x==_EMPTY for x in expansion]
+ if any(empty_indices):
+ exp_options = copy(options) or RuleOptions()
+ exp_options.empty_indices = empty_indices
+ expansion = [x for x in expansion if x!=_EMPTY]
+ else:
+ exp_options = options
+
+ for sym in expansion:
+ assert isinstance(sym, Symbol)
+ if sym.is_term and exp_options and exp_options.keep_all_tokens:
+ sym.filter_out = False
+ rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)
+ compiled_rules.append(rule)
+
+ # Remove duplicates of empty rules, throw error for non-empty duplicates
+ if len(set(compiled_rules)) != len(compiled_rules):
+ duplicates = classify(compiled_rules, lambda x: x)
+ for dups in duplicates.values():
+ if len(dups) > 1:
+ if dups[0].expansion:
+ raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"
+ % ''.join('\n * %s' % i for i in dups))
+
+ # Empty rule; assert all other attributes are equal
+ assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)
+
+ # Remove duplicates
+ compiled_rules = list(set(compiled_rules))
+
+ # Filter out unused rules
+ while True:
+ c = len(compiled_rules)
+ used_rules = {s for r in compiled_rules
+ for s in r.expansion
+ if isinstance(s, NonTerminal)
+ and s != r.origin}
+ used_rules |= {NonTerminal(s) for s in start}
+ compiled_rules, unused = classify_bool(compiled_rules, lambda r: r.origin in used_rules)
+ for r in unused:
+ logger.debug("Unused rule: %s", r)
+ if len(compiled_rules) == c:
+ break
+
+ # Filter out unused terminals
+ if terminals_to_keep != '*':
+ used_terms = {t.name for r in compiled_rules
+ for t in r.expansion
+ if isinstance(t, Terminal)}
+ terminals, unused = classify_bool(terminals, lambda t: t.name in used_terms or t.name in self.ignore or t.name in terminals_to_keep)
+ if unused:
+ logger.debug("Unused terminals: %s", [t.name for t in unused])
+
+ return terminals, compiled_rules, self.ignore
+
+
+PackageResource = namedtuple('PackageResource', 'pkg_name path')
+
+
+class FromPackageLoader(object):
+ """
+ Provides a simple way of creating custom import loaders that load from packages via ``pkgutil.get_data`` instead of using `open`.
+ This allows them to be compatible even from within zip files.
+
+ Relative imports are handled, so you can just freely use them.
+
+ pkg_name: The name of the package. You can probably provide `__name__` most of the time
+ search_paths: All the path that will be search on absolute imports.
+ """
+ def __init__(self, pkg_name, search_paths=("", )):
+ self.pkg_name = pkg_name
+ self.search_paths = search_paths
+
+ def __repr__(self):
+ return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)
+
+ def __call__(self, base_path, grammar_path):
+ if base_path is None:
+ to_try = self.search_paths
+ else:
+ # Check whether or not the importing grammar was loaded by this module.
+ if not isinstance(base_path, PackageResource) or base_path.pkg_name != self.pkg_name:
+ # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway
+ raise IOError()
+ to_try = [base_path.path]
+ for path in to_try:
+ full_path = os.path.join(path, grammar_path)
+ try:
+ text = pkgutil.get_data(self.pkg_name, full_path)
+ except IOError:
+ continue
+ else:
+ return PackageResource(self.pkg_name, full_path), text.decode()
+ raise IOError()
+
+
+stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)
+
+
+
+def resolve_term_references(term_dict):
+ # TODO Solve with transitive closure (maybe)
+
+ while True:
+ changed = False
+ for name, token_tree in term_dict.items():
+ if token_tree is None: # Terminal added through %declare
+ continue
+ for exp in token_tree.find_data('value'):
+ item ,= exp.children
+ if isinstance(item, Token):
+ if item.type == 'RULE':
+ raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
+ if item.type == 'TERMINAL':
+ try:
+ term_value = term_dict[item]
+ except KeyError:
+ raise GrammarError("Terminal used but not defined: %s" % item)
+ assert term_value is not None
+ exp.children[0] = term_value
+ changed = True
+ if not changed:
+ break
+
+ for name, term in term_dict.items():
+ if term: # Not just declared
+ for child in term.children:
+ ids = [id(x) for x in child.iter_subtrees()]
+ if id(term) in ids:
+ raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
+
+
+def options_from_rule(name, params, *x):
+ if len(x) > 1:
+ priority, expansions = x
+ priority = int(priority)
+ else:
+ expansions ,= x
+ priority = None
+ params = [t.value for t in params.children] if params is not None else [] # For the grammar parser
+
+ keep_all_tokens = name.startswith('!')
+ name = name.lstrip('!')
+ expand1 = name.startswith('?')
+ name = name.lstrip('?')
+
+ return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority,
+ template_source=(name if params else None))
+
+
+def symbols_from_strcase(expansion):
+ return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]
+
+
+@inline_args
+class PrepareGrammar(Transformer_InPlace):
+ def terminal(self, name):
+ return name
+
+ def nonterminal(self, name):
+ return name
+
+
+def _find_used_symbols(tree):
+ assert tree.data == 'expansions'
+ return {t for x in tree.find_data('expansion')
+ for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
+
+
+def _get_parser():
+ try:
+ return _get_parser.cache
+ except AttributeError:
+ terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
+
+ rules = [options_from_rule(name, None, x) for name, x in RULES.items()]
+ rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o)
+ for r, _p, xs, o in rules for i, x in enumerate(xs)]
+ callback = ParseTreeBuilder(rules, ST).create_callback()
+ import re
+ lexer_conf = LexerConf(terminals, re, ['WS', 'COMMENT', 'BACKSLASH'])
+ parser_conf = ParserConf(rules, callback, ['start'])
+ lexer_conf.lexer_type = 'standard'
+ parser_conf.parser_type = 'lalr'
+ _get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)
+ return _get_parser.cache
+
+GRAMMAR_ERRORS = [
+ ('Incorrect type of value', ['a: 1\n']),
+ ('Unclosed parenthesis', ['a: (\n']),
+ ('Unmatched closing parenthesis', ['a: )\n', 'a: [)\n', 'a: (]\n']),
+ ('Expecting rule or terminal definition (missing colon)', ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n']),
+ ('Illegal name for rules or terminals', ['Aa:\n']),
+ ('Alias expects lowercase name', ['a: -> "a"\n']),
+ ('Unexpected colon', ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n']),
+ ('Misplaced operator', ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n']),
+ ('Expecting option ("|") or a new rule or terminal definition', ['a:a\n()\n']),
+ ('Terminal names cannot contain dots', ['A.B\n']),
+ ('Expecting rule or terminal definition', ['"a"\n']),
+ ('%import expects a name', ['%import "a"\n']),
+ ('%ignore expects a value', ['%ignore %import\n']),
+ ]
+
+def _translate_parser_exception(parse, e):
+ error = e.match_examples(parse, GRAMMAR_ERRORS, use_accepts=True)
+ if error:
+ return error
+ elif 'STRING' in e.expected:
+ return "Expecting a value"
+
+def _parse_grammar(text, name, start='start'):
+ try:
+ tree = _get_parser().parse(text + '\n', start)
+ except UnexpectedCharacters as e:
+ context = e.get_context(text)
+ raise GrammarError("Unexpected input at line %d column %d in %s: \n\n%s" %
+ (e.line, e.column, name, context))
+ except UnexpectedToken as e:
+ context = e.get_context(text)
+ error = _translate_parser_exception(_get_parser().parse, e)
+ if error:
+ raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context))
+ raise
+
+ return PrepareGrammar().transform(tree)
+
+
+def _error_repr(error):
+ if isinstance(error, UnexpectedToken):
+ error2 = _translate_parser_exception(_get_parser().parse, error)
+ if error2:
+ return error2
+ expected = ', '.join(error.accepts or error.expected)
+ return "Unexpected token %r. Expected one of: {%s}" % (str(error.token), expected)
+ else:
+ return str(error)
+
+def _search_interactive_parser(interactive_parser, predicate):
+ def expand(node):
+ path, p = node
+ for choice in p.choices():
+ t = Token(choice, '')
+ try:
+ new_p = p.feed_token(t)
+ except ParseError: # Illegal
+ pass
+ else:
+ yield path + (choice,), new_p
+
+ for path, p in bfs_all_unique([((), interactive_parser)], expand):
+ if predicate(p):
+ return path, p
+
+def find_grammar_errors(text, start='start'):
+ errors = []
+ def on_error(e):
+ errors.append((e, _error_repr(e)))
+
+ # recover to a new line
+ token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())
+ for token_type in token_path:
+ e.interactive_parser.feed_token(Token(token_type, ''))
+ e.interactive_parser.feed_token(Token('_NL', '\n'))
+ return True
+
+ _tree = _get_parser().parse(text + '\n', start, on_error=on_error)
+
+ errors_by_line = classify(errors, lambda e: e[0].line)
+ errors = [el[0] for el in errors_by_line.values()] # already sorted
+
+ for e in errors:
+ e[0].interactive_parser = None
+ return errors
+
+
+def _get_mangle(prefix, aliases, base_mangle=None):
+ def mangle(s):
+ if s in aliases:
+ s = aliases[s]
+ else:
+ if s[0] == '_':
+ s = '_%s__%s' % (prefix, s[1:])
+ else:
+ s = '%s__%s' % (prefix, s)
+ if base_mangle is not None:
+ s = base_mangle(s)
+ return s
+ return mangle
+
+def _mangle_exp(exp, mangle):
+ if mangle is None:
+ return exp
+ exp = deepcopy(exp) # TODO: is this needed
+ for t in exp.iter_subtrees():
+ for i, c in enumerate(t.children):
+ if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'):
+ t.children[i] = Token(c.type, mangle(c.value))
+ return exp
+
+
+
+class GrammarBuilder:
+ def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None):
+ self.global_keep_all_tokens = global_keep_all_tokens
+ self.import_paths = import_paths or []
+ self.used_files = used_files or {}
+
+ self._definitions = {}
+ self._ignore_names = []
+
+ def _is_term(self, name):
+ # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME`
+ # Only the last part is the actual name, and the rest might contain mixed case
+ return name.rpartition('__')[-1].isupper()
+
+ def _grammar_error(self, msg, *names):
+ args = {}
+ for i, name in enumerate(names, start=1):
+ postfix = '' if i == 1 else str(i)
+ args['name' + postfix] = name
+ args['type' + postfix] = lowercase_type = ("rule", "terminal")[self._is_term(name)]
+ args['Type' + postfix] = lowercase_type.title()
+ raise GrammarError(msg.format(**args))
+
+ def _check_options(self, name, options):
+ if self._is_term(name):
+ if options is None:
+ options = 1
+ # if we don't use Integral here, we run into python2.7/python3 problems with long vs int
+ elif not isinstance(options, Integral):
+ raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
+ else:
+ if options is None:
+ options = RuleOptions()
+ elif not isinstance(options, RuleOptions):
+ raise GrammarError("Rules require a RuleOptions instance as 'options'")
+ if self.global_keep_all_tokens:
+ options.keep_all_tokens = True
+ return options
+
+
+ def _define(self, name, exp, params=(), options=None, override=False):
+ if name in self._definitions:
+ if not override:
+ self._grammar_error("{Type} '{name}' defined more than once", name)
+ elif override:
+ self._grammar_error("Cannot override a nonexisting {type} {name}", name)
+
+ if name.startswith('__'):
+ self._grammar_error('Names starting with double-underscore are reserved (Error at {name})', name)
+
+ self._definitions[name] = (params, exp, self._check_options(name, options))
+
+ def _extend(self, name, exp, params=(), options=None):
+ if name not in self._definitions:
+ self._grammar_error("Can't extend {type} {name} as it wasn't defined before", name)
+ if tuple(params) != tuple(self._definitions[name][0]):
+ self._grammar_error("Cannot extend {type} with different parameters: {name}", name)
+ # TODO: think about what to do with 'options'
+ base = self._definitions[name][1]
+
+ assert isinstance(base, Tree) and base.data == 'expansions'
+ base.children.insert(0, exp)
+
+ def _ignore(self, exp_or_name):
+ if isinstance(exp_or_name, str):
+ self._ignore_names.append(exp_or_name)
+ else:
+ assert isinstance(exp_or_name, Tree)
+ t = exp_or_name
+ if t.data == 'expansions' and len(t.children) == 1:
+ t2 ,= t.children
+ if t2.data=='expansion' and len(t2.children) == 1:
+ item ,= t2.children
+ if item.data == 'value':
+ item ,= item.children
+ if isinstance(item, Token) and item.type == 'TERMINAL':
+ self._ignore_names.append(item.value)
+ return
+
+ name = '__IGNORE_%d'% len(self._ignore_names)
+ self._ignore_names.append(name)
+ self._definitions[name] = ((), t, 1)
+
+ def _declare(self, *names):
+ for name in names:
+ self._define(name, None)
+
+ def _unpack_import(self, stmt, grammar_name):
+ if len(stmt.children) > 1:
+ path_node, arg1 = stmt.children
+ else:
+ path_node, = stmt.children
+ arg1 = None
+
+ if isinstance(arg1, Tree): # Multi import
+ dotted_path = tuple(path_node.children)
+ names = arg1.children
+ aliases = dict(zip(names, names)) # Can't have aliased multi import, so all aliases will be the same as names
+ else: # Single import
+ dotted_path = tuple(path_node.children[:-1])
+ if not dotted_path:
+ name ,= path_node.children
+ raise GrammarError("Nothing was imported from grammar `%s`" % name)
+ name = path_node.children[-1] # Get name from dotted path
+ aliases = {name.value: (arg1 or name).value} # Aliases if exist
+
+ if path_node.data == 'import_lib': # Import from library
+ base_path = None
+ else: # Relative import
+ if grammar_name == '<string>': # Import relative to script file path if grammar is coded in script
+ try:
+ base_file = os.path.abspath(sys.modules['__main__'].__file__)
+ except AttributeError:
+ base_file = None
+ else:
+ base_file = grammar_name # Import relative to grammar file path if external grammar file
+ if base_file:
+ if isinstance(base_file, PackageResource):
+ base_path = PackageResource(base_file.pkg_name, os.path.split(base_file.path)[0])
+ else:
+ base_path = os.path.split(base_file)[0]
+ else:
+ base_path = os.path.abspath(os.path.curdir)
+
+ return dotted_path, base_path, aliases
+
+ def _unpack_definition(self, tree, mangle):
+ if tree.data == 'rule':
+ name, params, exp, opts = options_from_rule(*tree.children)
+ else:
+ name = tree.children[0].value
+ params = () # TODO terminal templates
+ opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
+ exp = tree.children[-1]
+
+ if mangle is not None:
+ params = tuple(mangle(p) for p in params)
+ name = mangle(name)
+
+ exp = _mangle_exp(exp, mangle)
+ return name, exp, params, opts
+
+
+ def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
+ tree = _parse_grammar(grammar_text, grammar_name)
+
+ imports = {}
+ for stmt in tree.children:
+ if stmt.data == 'import':
+ dotted_path, base_path, aliases = self._unpack_import(stmt, grammar_name)
+ try:
+ import_base_path, import_aliases = imports[dotted_path]
+ assert base_path == import_base_path, 'Inconsistent base_path for %s.' % '.'.join(dotted_path)
+ import_aliases.update(aliases)
+ except KeyError:
+ imports[dotted_path] = base_path, aliases
+
+ for dotted_path, (base_path, aliases) in imports.items():
+ self.do_import(dotted_path, base_path, aliases, mangle)
+
+ for stmt in tree.children:
+ if stmt.data in ('term', 'rule'):
+ self._define(*self._unpack_definition(stmt, mangle))
+ elif stmt.data == 'override':
+ r ,= stmt.children
+ self._define(*self._unpack_definition(r, mangle), override=True)
+ elif stmt.data == 'extend':
+ r ,= stmt.children
+ self._extend(*self._unpack_definition(r, mangle))
+ elif stmt.data == 'ignore':
+ # if mangle is not None, we shouldn't apply ignore, since we aren't in a toplevel grammar
+ if mangle is None:
+ self._ignore(*stmt.children)
+ elif stmt.data == 'declare':
+ names = [t.value for t in stmt.children]
+ if mangle is None:
+ self._declare(*names)
+ else:
+ self._declare(*map(mangle, names))
+ elif stmt.data == 'import':
+ pass
+ else:
+ assert False, stmt
+
+
+ term_defs = { name: exp
+ for name, (_params, exp, _options) in self._definitions.items()
+ if self._is_term(name)
+ }
+ resolve_term_references(term_defs)
+
+
+ def _remove_unused(self, used):
+ def rule_dependencies(symbol):
+ if self._is_term(symbol):
+ return []
+ try:
+ params, tree,_ = self._definitions[symbol]
+ except KeyError:
+ return []
+ return _find_used_symbols(tree) - set(params)
+
+ _used = set(bfs(used, rule_dependencies))
+ self._definitions = {k: v for k, v in self._definitions.items() if k in _used}
+
+
+ def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
+ assert dotted_path
+ mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
+ grammar_path = os.path.join(*dotted_path) + EXT
+ to_try = self.import_paths + ([base_path] if base_path is not None else []) + [stdlib_loader]
+ for source in to_try:
+ try:
+ if callable(source):
+ joined_path, text = source(base_path, grammar_path)
+ else:
+ joined_path = os.path.join(source, grammar_path)
+ with open(joined_path, encoding='utf8') as f:
+ text = f.read()
+ except IOError:
+ continue
+ else:
+ h = hashlib.md5(text.encode('utf8')).hexdigest()
+ if self.used_files.get(joined_path, h) != h:
+ raise RuntimeError("Grammar file was changed during importing")
+ self.used_files[joined_path] = h
+
+ gb = GrammarBuilder(self.global_keep_all_tokens, self.import_paths, self.used_files)
+ gb.load_grammar(text, joined_path, mangle)
+ gb._remove_unused(map(mangle, aliases))
+ for name in gb._definitions:
+ if name in self._definitions:
+ raise GrammarError("Cannot import '%s' from '%s': Symbol already defined." % (name, grammar_path))
+
+ self._definitions.update(**gb._definitions)
+ break
+ else:
+ # Search failed. Make Python throw a nice error.
+ open(grammar_path, encoding='utf8')
+ assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)
+
+
+ def validate(self):
+ for name, (params, exp, _options) in self._definitions.items():
+ for i, p in enumerate(params):
+ if p in self._definitions:
+ raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
+ if p in params[:i]:
+ raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
+
+ if exp is None: # Remaining checks don't apply to abstract rules/terminals
+ continue
+
+ for temp in exp.find_data('template_usage'):
+ sym = temp.children[0]
+ args = temp.children[1:]
+ if sym not in params:
+ if sym not in self._definitions:
+ self._grammar_error("Template '%s' used but not defined (in {type} {name})" % sym, name)
+ if len(args) != len(self._definitions[sym][0]):
+ expected, actual = len(self._definitions[sym][0]), len(args)
+ self._grammar_error("Wrong number of template arguments used for {name} "
+ "(expected %s, got %s) (in {type2} {name2})" % (expected, actual), sym, name)
+
+ for sym in _find_used_symbols(exp):
+ if sym not in self._definitions and sym not in params:
+ self._grammar_error("{Type} '{name}' used but not defined (in {type2} {name2})", sym, name)
+
+ if not set(self._definitions).issuperset(self._ignore_names):
+ raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))
+
+ def build(self):
+ self.validate()
+ rule_defs = []
+ term_defs = []
+ for name, (params, exp, options) in self._definitions.items():
+ if self._is_term(name):
+ assert len(params) == 0
+ term_defs.append((name, (exp, options)))
+ else:
+ rule_defs.append((name, params, exp, options))
+ # resolve_term_references(term_defs)
+ return Grammar(rule_defs, term_defs, self._ignore_names)
+
+
+def verify_used_files(file_hashes):
+ for path, old in file_hashes.items():
+ text = None
+ if isinstance(path, str) and os.path.exists(path):
+ with open(path, encoding='utf8') as f:
+ text = f.read()
+ elif isinstance(path, PackageResource):
+ with suppress(IOError):
+ text = pkgutil.get_data(*path).decode('utf-8')
+ if text is None: # We don't know how to load the path. ignore it.
+ continue
+
+ current = hashlib.md5(text.encode()).hexdigest()
+ if old != current:
+ logger.info("File %r changed, rebuilding Parser" % path)
+ return False
+ return True
+
+def list_grammar_imports(grammar, import_paths=[]):
+ "Returns a list of paths to the lark grammars imported by the given grammar (recursively)"
+ builder = GrammarBuilder(False, import_paths)
+ builder.load_grammar(grammar, '<string>')
+ return list(builder.used_files.keys())
+
+def load_grammar(grammar, source, import_paths, global_keep_all_tokens):
+ builder = GrammarBuilder(global_keep_all_tokens, import_paths)
+ builder.load_grammar(grammar, source)
+ return builder.build(), builder.used_files