about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lark/tools/nearley.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/tools/nearley.py')
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/nearley.py201
1 files changed, 201 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/nearley.py b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
new file mode 100644
index 00000000..f0779dc5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
@@ -0,0 +1,201 @@
+"Converts Nearley grammars to Lark"
+
+import os.path
+import sys
+import codecs
+import argparse
+
+
+from lark import Lark, InlineTransformer
+
+nearley_grammar = r"""
+    start: (ruledef|directive)+
+
+    directive: "@" NAME (STRING|NAME)
+             | "@" JS  -> js_code
+    ruledef: NAME "->" expansions
+           | NAME REGEXP "->" expansions -> macro
+    expansions: expansion ("|" expansion)*
+
+    expansion: expr+ js
+
+    ?expr: item (":" /[+*?]/)?
+
+    ?item: rule|string|regexp|null
+         | "(" expansions ")"
+
+    rule: NAME
+    string: STRING
+    regexp: REGEXP
+    null: "null"
+    JS: /{%.*?%}/s
+    js: JS?
+
+    NAME: /[a-zA-Z_$]\w*/
+    COMMENT: /#[^\n]*/
+    REGEXP: /\[.*?\]/
+
+    STRING: _STRING "i"?
+
+    %import common.ESCAPED_STRING -> _STRING
+    %import common.WS
+    %ignore WS
+    %ignore COMMENT
+
+    """
+
+nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
+
+def _get_rulename(name):
+    name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
+    return 'n_' + name.replace('$', '__DOLLAR__').lower()
+
+class NearleyToLark(InlineTransformer):
+    def __init__(self):
+        self._count = 0
+        self.extra_rules = {}
+        self.extra_rules_rev = {}
+        self.alias_js_code = {}
+
+    def _new_function(self, code):
+        name = 'alias_%d' % self._count
+        self._count += 1
+
+        self.alias_js_code[name] = code
+        return name
+
+    def _extra_rule(self, rule):
+        if rule in self.extra_rules_rev:
+            return self.extra_rules_rev[rule]
+
+        name = 'xrule_%d' % len(self.extra_rules)
+        assert name not in self.extra_rules
+        self.extra_rules[name] = rule
+        self.extra_rules_rev[rule] = name
+        return name
+
+    def rule(self, name):
+        return _get_rulename(name)
+
+    def ruledef(self, name, exps):
+        return '!%s: %s' % (_get_rulename(name), exps)
+
+    def expr(self, item, op):
+        rule = '(%s)%s' % (item, op)
+        return self._extra_rule(rule)
+
+    def regexp(self, r):
+        return '/%s/' % r
+
+    def null(self):
+        return ''
+
+    def string(self, s):
+        return self._extra_rule(s)
+
+    def expansion(self, *x):
+        x, js = x[:-1], x[-1]
+        if js.children:
+            js_code ,= js.children
+            js_code = js_code[2:-2]
+            alias = '-> ' + self._new_function(js_code)
+        else:
+            alias = ''
+        return ' '.join(x) + alias
+
+    def expansions(self, *x):
+        return '%s' % ('\n    |'.join(x))
+
+    def start(self, *rules):
+        return '\n'.join(filter(None, rules))
+
+def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
+    rule_defs = []
+
+    tree = nearley_grammar_parser.parse(g)
+    for statement in tree.children:
+        if statement.data == 'directive':
+            directive, arg = statement.children
+            if directive in ('builtin', 'include'):
+                folder = builtin_path if directive == 'builtin' else folder_path
+                path = os.path.join(folder, arg[1:-1])
+                if path not in includes:
+                    includes.add(path)
+                    with codecs.open(path, encoding='utf8') as f:
+                        text = f.read()
+                    rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
+            else:
+                assert False, directive
+        elif statement.data == 'js_code':
+            code ,= statement.children
+            code = code[2:-2]
+            js_code.append(code)
+        elif statement.data == 'macro':
+            pass    # TODO Add support for macros!
+        elif statement.data == 'ruledef':
+            rule_defs.append( n2l.transform(statement) )
+        else:
+            raise Exception("Unknown statement: %s" % statement)
+
+    return rule_defs
+
+
+def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
+    import js2py
+
+    emit_code = []
+    def emit(x=None):
+        if x:
+            emit_code.append(x)
+        emit_code.append('\n')
+
+    js_code = ['function id(x) {return x[0];}']
+    n2l = NearleyToLark()
+    rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
+    lark_g = '\n'.join(rule_defs)
+    lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
+
+    emit('from lark import Lark, Transformer')
+    emit()
+    emit('grammar = ' + repr(lark_g))
+    emit()
+
+    for alias, code in n2l.alias_js_code.items():
+        js_code.append('%s = (%s);' % (alias, code))
+
+    if es6:
+        emit(js2py.translate_js6('\n'.join(js_code)))
+    else:
+        emit(js2py.translate_js('\n'.join(js_code)))
+    emit('class TransformNearley(Transformer):')
+    for alias in n2l.alias_js_code:
+        emit("    %s = var.get('%s').to_python()" % (alias, alias))
+    emit("    __default__ = lambda self, n, c, m: c if c else None")
+
+    emit()
+    emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
+    emit('def parse(text):')
+    emit('    return TransformNearley().transform(parser.parse(text))')
+
+    return ''.join(emit_code)
+
+def main(fn, start, nearley_lib, es6=False):
+    with codecs.open(fn, encoding='utf8') as f:
+        grammar = f.read()
+    return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
+
+def get_arg_parser():
+    parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
+    parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
+    parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
+    parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
+    parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
+    return parser
+
+if __name__ == '__main__':
+    parser = get_arg_parser()
+    if len(sys.argv)==1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+    args = parser.parse_args()
+    print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))