aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lark/tools/nearley.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/tools/nearley.py')
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/nearley.py201
1 files changed, 201 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/nearley.py b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
new file mode 100644
index 00000000..f0779dc5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
@@ -0,0 +1,201 @@
+"Converts Nearley grammars to Lark"
+
+import os.path
+import sys
+import codecs
+import argparse
+
+
+from lark import Lark, InlineTransformer
+
+nearley_grammar = r"""
+ start: (ruledef|directive)+
+
+ directive: "@" NAME (STRING|NAME)
+ | "@" JS -> js_code
+ ruledef: NAME "->" expansions
+ | NAME REGEXP "->" expansions -> macro
+ expansions: expansion ("|" expansion)*
+
+ expansion: expr+ js
+
+ ?expr: item (":" /[+*?]/)?
+
+ ?item: rule|string|regexp|null
+ | "(" expansions ")"
+
+ rule: NAME
+ string: STRING
+ regexp: REGEXP
+ null: "null"
+ JS: /{%.*?%}/s
+ js: JS?
+
+ NAME: /[a-zA-Z_$]\w*/
+ COMMENT: /#[^\n]*/
+ REGEXP: /\[.*?\]/
+
+ STRING: _STRING "i"?
+
+ %import common.ESCAPED_STRING -> _STRING
+ %import common.WS
+ %ignore WS
+ %ignore COMMENT
+
+ """
+
+nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
+
+def _get_rulename(name):
+ name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
+ return 'n_' + name.replace('$', '__DOLLAR__').lower()
+
+class NearleyToLark(InlineTransformer):
+ def __init__(self):
+ self._count = 0
+ self.extra_rules = {}
+ self.extra_rules_rev = {}
+ self.alias_js_code = {}
+
+ def _new_function(self, code):
+ name = 'alias_%d' % self._count
+ self._count += 1
+
+ self.alias_js_code[name] = code
+ return name
+
+ def _extra_rule(self, rule):
+ if rule in self.extra_rules_rev:
+ return self.extra_rules_rev[rule]
+
+ name = 'xrule_%d' % len(self.extra_rules)
+ assert name not in self.extra_rules
+ self.extra_rules[name] = rule
+ self.extra_rules_rev[rule] = name
+ return name
+
+ def rule(self, name):
+ return _get_rulename(name)
+
+ def ruledef(self, name, exps):
+ return '!%s: %s' % (_get_rulename(name), exps)
+
+ def expr(self, item, op):
+ rule = '(%s)%s' % (item, op)
+ return self._extra_rule(rule)
+
+ def regexp(self, r):
+ return '/%s/' % r
+
+ def null(self):
+ return ''
+
+ def string(self, s):
+ return self._extra_rule(s)
+
+ def expansion(self, *x):
+ x, js = x[:-1], x[-1]
+ if js.children:
+ js_code ,= js.children
+ js_code = js_code[2:-2]
+ alias = '-> ' + self._new_function(js_code)
+ else:
+ alias = ''
+ return ' '.join(x) + alias
+
+ def expansions(self, *x):
+ return '%s' % ('\n |'.join(x))
+
+ def start(self, *rules):
+ return '\n'.join(filter(None, rules))
+
+def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
+ rule_defs = []
+
+ tree = nearley_grammar_parser.parse(g)
+ for statement in tree.children:
+ if statement.data == 'directive':
+ directive, arg = statement.children
+ if directive in ('builtin', 'include'):
+ folder = builtin_path if directive == 'builtin' else folder_path
+ path = os.path.join(folder, arg[1:-1])
+ if path not in includes:
+ includes.add(path)
+ with codecs.open(path, encoding='utf8') as f:
+ text = f.read()
+ rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
+ else:
+ assert False, directive
+ elif statement.data == 'js_code':
+ code ,= statement.children
+ code = code[2:-2]
+ js_code.append(code)
+ elif statement.data == 'macro':
+ pass # TODO Add support for macros!
+ elif statement.data == 'ruledef':
+ rule_defs.append( n2l.transform(statement) )
+ else:
+ raise Exception("Unknown statement: %s" % statement)
+
+ return rule_defs
+
+
+def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
+ import js2py
+
+ emit_code = []
+ def emit(x=None):
+ if x:
+ emit_code.append(x)
+ emit_code.append('\n')
+
+ js_code = ['function id(x) {return x[0];}']
+ n2l = NearleyToLark()
+ rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
+ lark_g = '\n'.join(rule_defs)
+ lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
+
+ emit('from lark import Lark, Transformer')
+ emit()
+ emit('grammar = ' + repr(lark_g))
+ emit()
+
+ for alias, code in n2l.alias_js_code.items():
+ js_code.append('%s = (%s);' % (alias, code))
+
+ if es6:
+ emit(js2py.translate_js6('\n'.join(js_code)))
+ else:
+ emit(js2py.translate_js('\n'.join(js_code)))
+ emit('class TransformNearley(Transformer):')
+ for alias in n2l.alias_js_code:
+ emit(" %s = var.get('%s').to_python()" % (alias, alias))
+ emit(" __default__ = lambda self, n, c, m: c if c else None")
+
+ emit()
+ emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
+ emit('def parse(text):')
+ emit(' return TransformNearley().transform(parser.parse(text))')
+
+ return ''.join(emit_code)
+
+def main(fn, start, nearley_lib, es6=False):
+ with codecs.open(fn, encoding='utf8') as f:
+ grammar = f.read()
+ return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
+
+def get_arg_parser():
+ parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
+ parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
+ parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
+ parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
+ parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
+ return parser
+
+if __name__ == '__main__':
+ parser = get_arg_parser()
+ if len(sys.argv)==1:
+ parser.print_help(sys.stderr)
+ sys.exit(1)
+ args = parser.parse_args()
+ print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))