From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../python3.12/site-packages/lark/tools/nearley.py | 201 +++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/lark/tools/nearley.py (limited to '.venv/lib/python3.12/site-packages/lark/tools/nearley.py') diff --git a/.venv/lib/python3.12/site-packages/lark/tools/nearley.py b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py new file mode 100644 index 00000000..f0779dc5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py @@ -0,0 +1,201 @@ +"Converts Nearley grammars to Lark" + +import os.path +import sys +import codecs +import argparse + + +from lark import Lark, InlineTransformer + +nearley_grammar = r""" + start: (ruledef|directive)+ + + directive: "@" NAME (STRING|NAME) + | "@" JS -> js_code + ruledef: NAME "->" expansions + | NAME REGEXP "->" expansions -> macro + expansions: expansion ("|" expansion)* + + expansion: expr+ js + + ?expr: item (":" /[+*?]/)? + + ?item: rule|string|regexp|null + | "(" expansions ")" + + rule: NAME + string: STRING + regexp: REGEXP + null: "null" + JS: /{%.*?%}/s + js: JS? + + NAME: /[a-zA-Z_$]\w*/ + COMMENT: /#[^\n]*/ + REGEXP: /\[.*?\]/ + + STRING: _STRING "i"? + + %import common.ESCAPED_STRING -> _STRING + %import common.WS + %ignore WS + %ignore COMMENT + + """ + +nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard') + +def _get_rulename(name): + name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) + return 'n_' + name.replace('$', '__DOLLAR__').lower() + +class NearleyToLark(InlineTransformer): + def __init__(self): + self._count = 0 + self.extra_rules = {} + self.extra_rules_rev = {} + self.alias_js_code = {} + + def _new_function(self, code): + name = 'alias_%d' % self._count + self._count += 1 + + self.alias_js_code[name] = code + return name + + def _extra_rule(self, rule): + if rule in self.extra_rules_rev: + return self.extra_rules_rev[rule] + + name = 'xrule_%d' % len(self.extra_rules) + assert name not in self.extra_rules + self.extra_rules[name] = rule + self.extra_rules_rev[rule] = name + return name + + def rule(self, name): + return _get_rulename(name) + + def ruledef(self, name, exps): + return '!%s: %s' % (_get_rulename(name), exps) + + def expr(self, item, op): + rule = '(%s)%s' % (item, op) + return self._extra_rule(rule) + + def regexp(self, r): + return '/%s/' % r + + def null(self): + return '' + + def string(self, s): + return self._extra_rule(s) + + def expansion(self, *x): + x, js = x[:-1], x[-1] + if js.children: + js_code ,= js.children + js_code = js_code[2:-2] + alias = '-> ' + self._new_function(js_code) + else: + alias = '' + return ' '.join(x) + alias + + def expansions(self, *x): + return '%s' % ('\n |'.join(x)) + + def start(self, *rules): + return '\n'.join(filter(None, rules)) + +def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes): + rule_defs = [] + + tree = nearley_grammar_parser.parse(g) + for statement in tree.children: + if statement.data == 'directive': + directive, arg = statement.children + if directive in ('builtin', 'include'): + folder = builtin_path if directive == 'builtin' else folder_path + path = os.path.join(folder, arg[1:-1]) + if path not in includes: + includes.add(path) + with codecs.open(path, encoding='utf8') as f: + text = f.read() + rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes) + else: + assert False, directive + elif statement.data == 'js_code': + code ,= statement.children + code = code[2:-2] + js_code.append(code) + elif statement.data == 'macro': + pass # TODO Add support for macros! + elif statement.data == 'ruledef': + rule_defs.append( n2l.transform(statement) ) + else: + raise Exception("Unknown statement: %s" % statement) + + return rule_defs + + +def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False): + import js2py + + emit_code = [] + def emit(x=None): + if x: + emit_code.append(x) + emit_code.append('\n') + + js_code = ['function id(x) {return x[0];}'] + n2l = NearleyToLark() + rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set()) + lark_g = '\n'.join(rule_defs) + lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) + + emit('from lark import Lark, Transformer') + emit() + emit('grammar = ' + repr(lark_g)) + emit() + + for alias, code in n2l.alias_js_code.items(): + js_code.append('%s = (%s);' % (alias, code)) + + if es6: + emit(js2py.translate_js6('\n'.join(js_code))) + else: + emit(js2py.translate_js('\n'.join(js_code))) + emit('class TransformNearley(Transformer):') + for alias in n2l.alias_js_code: + emit(" %s = var.get('%s').to_python()" % (alias, alias)) + emit(" __default__ = lambda self, n, c, m: c if c else None") + + emit() + emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start) + emit('def parse(text):') + emit(' return TransformNearley().transform(parser.parse(text))') + + return ''.join(emit_code) + +def main(fn, start, nearley_lib, es6=False): + with codecs.open(fn, encoding='utf8') as f: + grammar = f.read() + return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6) + +def get_arg_parser(): + parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.') + parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar') + parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule') + parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)') + parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true') + return parser + +if __name__ == '__main__': + parser = get_arg_parser() + if len(sys.argv)==1: + parser.print_help(sys.stderr) + sys.exit(1) + args = parser.parse_args() + print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6)) -- cgit v1.2.3