about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/lark/tools
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/tools')
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/__init__.py65
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/nearley.py201
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/serialize.py34
-rw-r--r--.venv/lib/python3.12/site-packages/lark/tools/standalone.py196
4 files changed, 496 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/__init__.py b/.venv/lib/python3.12/site-packages/lark/tools/__init__.py
new file mode 100644
index 00000000..4ecf13d4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/__init__.py
@@ -0,0 +1,65 @@
+import sys
+from argparse import ArgumentParser, FileType
+try:
+    from textwrap import indent
+except ImportError:
+    def indent(text, prefix):
+        return ''.join(prefix + line for line in text.splitlines(True))
+from logging import DEBUG, INFO, WARN, ERROR
+import warnings
+
+from lark import Lark, logger
+
+lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')
+
+flags = [
+    ('d', 'debug'),
+    'keep_all_tokens',
+    'regex',
+    'propagate_positions',
+    'maybe_placeholders',
+    'use_bytes'
+]
+
+options = ['start', 'lexer']
+
+lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
+lalr_argparser.add_argument('-s', '--start', action='append', default=[])
+lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual'))
+k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {}
+lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)')
+lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file')
+
+for f in flags:
+    if isinstance(f, tuple):
+        options.append(f[1])
+        lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true')
+    else:
+        options.append(f)
+        lalr_argparser.add_argument('--' + f, action='store_true')
+
+
+def build_lalr(namespace):
+    logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)])
+    if len(namespace.start) == 0:
+        namespace.start.append('start')
+    kwargs = {n: getattr(namespace, n) for n in options}
+    return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out
+
+
+def showwarning_as_comment(message, category, filename, lineno, file=None, line=None):
+    # Based on warnings._showwarnmsg_impl
+    text = warnings.formatwarning(message, category, filename, lineno, line)
+    text = indent(text, '# ')
+    if file is None:
+        file = sys.stderr
+        if file is None:
+            return 
+    try:
+        file.write(text)
+    except OSError:
+        pass
+
+
+def make_warnings_comments():
+    warnings.showwarning = showwarning_as_comment
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/nearley.py b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
new file mode 100644
index 00000000..f0779dc5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py
@@ -0,0 +1,201 @@
+"Converts Nearley grammars to Lark"
+
+import os.path
+import sys
+import codecs
+import argparse
+
+
+from lark import Lark, InlineTransformer
+
+nearley_grammar = r"""
+    start: (ruledef|directive)+
+
+    directive: "@" NAME (STRING|NAME)
+             | "@" JS  -> js_code
+    ruledef: NAME "->" expansions
+           | NAME REGEXP "->" expansions -> macro
+    expansions: expansion ("|" expansion)*
+
+    expansion: expr+ js
+
+    ?expr: item (":" /[+*?]/)?
+
+    ?item: rule|string|regexp|null
+         | "(" expansions ")"
+
+    rule: NAME
+    string: STRING
+    regexp: REGEXP
+    null: "null"
+    JS: /{%.*?%}/s
+    js: JS?
+
+    NAME: /[a-zA-Z_$]\w*/
+    COMMENT: /#[^\n]*/
+    REGEXP: /\[.*?\]/
+
+    STRING: _STRING "i"?
+
+    %import common.ESCAPED_STRING -> _STRING
+    %import common.WS
+    %ignore WS
+    %ignore COMMENT
+
+    """
+
+nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
+
+def _get_rulename(name):
+    name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
+    return 'n_' + name.replace('$', '__DOLLAR__').lower()
+
+class NearleyToLark(InlineTransformer):
+    def __init__(self):
+        self._count = 0
+        self.extra_rules = {}
+        self.extra_rules_rev = {}
+        self.alias_js_code = {}
+
+    def _new_function(self, code):
+        name = 'alias_%d' % self._count
+        self._count += 1
+
+        self.alias_js_code[name] = code
+        return name
+
+    def _extra_rule(self, rule):
+        if rule in self.extra_rules_rev:
+            return self.extra_rules_rev[rule]
+
+        name = 'xrule_%d' % len(self.extra_rules)
+        assert name not in self.extra_rules
+        self.extra_rules[name] = rule
+        self.extra_rules_rev[rule] = name
+        return name
+
+    def rule(self, name):
+        return _get_rulename(name)
+
+    def ruledef(self, name, exps):
+        return '!%s: %s' % (_get_rulename(name), exps)
+
+    def expr(self, item, op):
+        rule = '(%s)%s' % (item, op)
+        return self._extra_rule(rule)
+
+    def regexp(self, r):
+        return '/%s/' % r
+
+    def null(self):
+        return ''
+
+    def string(self, s):
+        return self._extra_rule(s)
+
+    def expansion(self, *x):
+        x, js = x[:-1], x[-1]
+        if js.children:
+            js_code ,= js.children
+            js_code = js_code[2:-2]
+            alias = '-> ' + self._new_function(js_code)
+        else:
+            alias = ''
+        return ' '.join(x) + alias
+
+    def expansions(self, *x):
+        return '%s' % ('\n    |'.join(x))
+
+    def start(self, *rules):
+        return '\n'.join(filter(None, rules))
+
+def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
+    rule_defs = []
+
+    tree = nearley_grammar_parser.parse(g)
+    for statement in tree.children:
+        if statement.data == 'directive':
+            directive, arg = statement.children
+            if directive in ('builtin', 'include'):
+                folder = builtin_path if directive == 'builtin' else folder_path
+                path = os.path.join(folder, arg[1:-1])
+                if path not in includes:
+                    includes.add(path)
+                    with codecs.open(path, encoding='utf8') as f:
+                        text = f.read()
+                    rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
+            else:
+                assert False, directive
+        elif statement.data == 'js_code':
+            code ,= statement.children
+            code = code[2:-2]
+            js_code.append(code)
+        elif statement.data == 'macro':
+            pass    # TODO Add support for macros!
+        elif statement.data == 'ruledef':
+            rule_defs.append( n2l.transform(statement) )
+        else:
+            raise Exception("Unknown statement: %s" % statement)
+
+    return rule_defs
+
+
+def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
+    import js2py
+
+    emit_code = []
+    def emit(x=None):
+        if x:
+            emit_code.append(x)
+        emit_code.append('\n')
+
+    js_code = ['function id(x) {return x[0];}']
+    n2l = NearleyToLark()
+    rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
+    lark_g = '\n'.join(rule_defs)
+    lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
+
+    emit('from lark import Lark, Transformer')
+    emit()
+    emit('grammar = ' + repr(lark_g))
+    emit()
+
+    for alias, code in n2l.alias_js_code.items():
+        js_code.append('%s = (%s);' % (alias, code))
+
+    if es6:
+        emit(js2py.translate_js6('\n'.join(js_code)))
+    else:
+        emit(js2py.translate_js('\n'.join(js_code)))
+    emit('class TransformNearley(Transformer):')
+    for alias in n2l.alias_js_code:
+        emit("    %s = var.get('%s').to_python()" % (alias, alias))
+    emit("    __default__ = lambda self, n, c, m: c if c else None")
+
+    emit()
+    emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
+    emit('def parse(text):')
+    emit('    return TransformNearley().transform(parser.parse(text))')
+
+    return ''.join(emit_code)
+
+def main(fn, start, nearley_lib, es6=False):
+    with codecs.open(fn, encoding='utf8') as f:
+        grammar = f.read()
+    return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
+
+def get_arg_parser():
+    parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
+    parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
+    parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
+    parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
+    parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
+    return parser
+
+if __name__ == '__main__':
+    parser = get_arg_parser()
+    if len(sys.argv)==1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+    args = parser.parse_args()
+    print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/serialize.py b/.venv/lib/python3.12/site-packages/lark/tools/serialize.py
new file mode 100644
index 00000000..61540242
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/serialize.py
@@ -0,0 +1,34 @@
+import codecs
+import sys
+import json
+
+from lark import Lark
+from lark.grammar import RuleOptions, Rule
+from lark.lexer import TerminalDef
+from lark.tools import lalr_argparser, build_lalr
+
+import argparse
+
+argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser],
+                                    description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
+                                    epilog='Look at the Lark documentation for more info on the options')
+
+
+def serialize(lark_inst, outfile):
+    data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
+    outfile.write('{\n')
+    outfile.write('  "data": %s,\n' % json.dumps(data))
+    outfile.write('  "memo": %s\n' % json.dumps(memo))
+    outfile.write('}\n')
+
+
+def main():
+    if len(sys.argv)==1:
+        argparser.print_help(sys.stderr)
+        sys.exit(1)
+    ns = argparser.parse_args()
+    serialize(*build_lalr(ns))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/standalone.py b/.venv/lib/python3.12/site-packages/lark/tools/standalone.py
new file mode 100644
index 00000000..c86d7d7a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lark/tools/standalone.py
@@ -0,0 +1,196 @@
+from __future__ import print_function
+
+###{standalone
+#
+#
+#   Lark Stand-alone Generator Tool
+# ----------------------------------
+# Generates a stand-alone LALR(1) parser with a standard lexer
+#
+# Git:    https://github.com/erezsh/lark
+# Author: Erez Shinan (erezshin@gmail.com)
+#
+#
+#    >>> LICENSE
+#
+#    This tool and its generated code use a separate license from Lark,
+#    and are subject to the terms of the Mozilla Public License, v. 2.0.
+#    If a copy of the MPL was not distributed with this
+#    file, You can obtain one at https://mozilla.org/MPL/2.0/.
+#
+#    If you wish to purchase a commercial license for this tool and its
+#    generated code, you may contact me via email or otherwise.
+#
+#    If MPL2 is incompatible with your free or open-source project,
+#    contact me and we'll work it out.
+#
+#
+
+from io import open
+###}
+
+import sys
+import token, tokenize
+import os
+from os import path
+from collections import defaultdict
+from functools import partial
+from argparse import ArgumentParser, SUPPRESS
+from warnings import warn
+
+import lark
+from lark import Lark
+from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
+
+
+from lark.grammar import RuleOptions, Rule
+from lark.lexer import TerminalDef
+
+_dir = path.dirname(__file__)
+_larkdir = path.join(_dir, path.pardir)
+
+
+EXTRACT_STANDALONE_FILES = [
+    'tools/standalone.py',
+    'exceptions.py',
+    'utils.py',
+    'tree.py',
+    'visitors.py',
+    'grammar.py',
+    'lexer.py',
+    'common.py',
+    'parse_tree_builder.py',
+    'parsers/lalr_parser.py',
+    'parsers/lalr_analysis.py',
+    'parser_frontends.py',
+    'lark.py',
+    'indenter.py',
+]
+
+def extract_sections(lines):
+    section = None
+    text = []
+    sections = defaultdict(list)
+    for l in lines:
+        if l.startswith('###'):
+            if l[3] == '{':
+                section = l[4:].strip()
+            elif l[3] == '}':
+                sections[section] += text
+                section = None
+                text = []
+            else:
+                raise ValueError(l)
+        elif section:
+            text.append(l)
+
+    return {name:''.join(text) for name, text in sections.items()}
+
+
+def strip_docstrings(line_gen):
+    """ Strip comments and docstrings from a file.
+    Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
+    """
+    res = []
+
+    prev_toktype = token.INDENT
+    last_lineno = -1
+    last_col = 0
+
+    tokgen = tokenize.generate_tokens(line_gen)
+    for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
+        if slineno > last_lineno:
+            last_col = 0
+        if scol > last_col:
+            res.append(" " * (scol - last_col))
+        if toktype == token.STRING and prev_toktype == token.INDENT:
+            # Docstring
+            res.append("#--")
+        elif toktype == tokenize.COMMENT:
+            # Comment
+            res.append("##\n")
+        else:
+            res.append(ttext)
+        prev_toktype = toktype
+        last_col = ecol
+        last_lineno = elineno
+
+    return ''.join(res)
+
+
+def main(fobj, start, print=print):
+    warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
+    lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
+    gen_standalone(lark_inst, print)
+
+def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
+    if output is None:
+        output = partial(print, file=out)
+
+    import pickle, zlib, base64
+    def compressed_output(obj):
+        s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
+        c = zlib.compress(s)
+        output(repr(base64.b64encode(c)))
+
+    def output_decompress(name):
+        output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
+
+    output('# The file was automatically generated by Lark v%s' % lark.__version__)
+    output('__version__ = "%s"' % lark.__version__)
+    output()
+
+    for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
+        with open(os.path.join(_larkdir, pyfile)) as f:
+            code = extract_sections(f)['standalone']
+            if i:   # if not this file
+                code = strip_docstrings(partial(next, iter(code.splitlines(True))))
+            output(code)
+
+    data, m = lark_inst.memo_serialize([TerminalDef, Rule])
+    output('import pickle, zlib, base64')
+    if compress:
+        output('DATA = (')
+        compressed_output(data)
+        output(')')
+        output_decompress('DATA')
+        output('MEMO = (')
+        compressed_output(m)
+        output(')')
+        output_decompress('MEMO')
+    else:
+        output('DATA = (')
+        output(data)
+        output(')')
+        output('MEMO = (')
+        output(m)
+        output(')')
+
+
+    output('Shift = 0')
+    output('Reduce = 1')
+    output("def Lark_StandAlone(**kwargs):")
+    output("  return Lark._load_from_dict(DATA, MEMO, **kwargs)")
+
+
+
+
+def main():
+    make_warnings_comments()
+    parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
+                            parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
+    parser.add_argument("old_start", nargs='?', help=SUPPRESS)
+    parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
+    if len(sys.argv)==1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+    ns = parser.parse_args()
+    if ns.old_start is not None:
+        warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
+        ns.start.append(ns.old_start)
+
+    lark_inst, out = build_lalr(ns)
+    gen_standalone(lark_inst, out=out, compress=ns.compress)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file