diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/tools')
4 files changed, 496 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/tools/__init__.py b/.venv/lib/python3.12/site-packages/lark/tools/__init__.py new file mode 100644 index 00000000..4ecf13d4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/tools/__init__.py @@ -0,0 +1,65 @@ +import sys +from argparse import ArgumentParser, FileType +try: + from textwrap import indent +except ImportError: + def indent(text, prefix): + return ''.join(prefix + line for line in text.splitlines(True)) +from logging import DEBUG, INFO, WARN, ERROR +import warnings + +from lark import Lark, logger + +lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') + +flags = [ + ('d', 'debug'), + 'keep_all_tokens', + 'regex', + 'propagate_positions', + 'maybe_placeholders', + 'use_bytes' +] + +options = ['start', 'lexer'] + +lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") +lalr_argparser.add_argument('-s', '--start', action='append', default=[]) +lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('standard', 'contextual')) +k = {'encoding': 'utf-8'} if sys.version_info > (3, 4) else {} +lalr_argparser.add_argument('-o', '--out', type=FileType('w', **k), default=sys.stdout, help='the output file (default=stdout)') +lalr_argparser.add_argument('grammar_file', type=FileType('r', **k), help='A valid .lark file') + +for f in flags: + if isinstance(f, tuple): + options.append(f[1]) + lalr_argparser.add_argument('-' + f[0], '--' + f[1], action='store_true') + else: + options.append(f) + lalr_argparser.add_argument('--' + f, action='store_true') + + +def build_lalr(namespace): + logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) + if len(namespace.start) == 0: + namespace.start.append('start') + kwargs = {n: getattr(namespace, n) for n in options} + return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out + + +def showwarning_as_comment(message, category, filename, lineno, file=None, line=None): + # Based on warnings._showwarnmsg_impl + text = warnings.formatwarning(message, category, filename, lineno, line) + text = indent(text, '# ') + if file is None: + file = sys.stderr + if file is None: + return + try: + file.write(text) + except OSError: + pass + + +def make_warnings_comments(): + warnings.showwarning = showwarning_as_comment diff --git a/.venv/lib/python3.12/site-packages/lark/tools/nearley.py b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py new file mode 100644 index 00000000..f0779dc5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/tools/nearley.py @@ -0,0 +1,201 @@ +"Converts Nearley grammars to Lark" + +import os.path +import sys +import codecs +import argparse + + +from lark import Lark, InlineTransformer + +nearley_grammar = r""" + start: (ruledef|directive)+ + + directive: "@" NAME (STRING|NAME) + | "@" JS -> js_code + ruledef: NAME "->" expansions + | NAME REGEXP "->" expansions -> macro + expansions: expansion ("|" expansion)* + + expansion: expr+ js + + ?expr: item (":" /[+*?]/)? + + ?item: rule|string|regexp|null + | "(" expansions ")" + + rule: NAME + string: STRING + regexp: REGEXP + null: "null" + JS: /{%.*?%}/s + js: JS? + + NAME: /[a-zA-Z_$]\w*/ + COMMENT: /#[^\n]*/ + REGEXP: /\[.*?\]/ + + STRING: _STRING "i"? + + %import common.ESCAPED_STRING -> _STRING + %import common.WS + %ignore WS + %ignore COMMENT + + """ + +nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard') + +def _get_rulename(name): + name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) + return 'n_' + name.replace('$', '__DOLLAR__').lower() + +class NearleyToLark(InlineTransformer): + def __init__(self): + self._count = 0 + self.extra_rules = {} + self.extra_rules_rev = {} + self.alias_js_code = {} + + def _new_function(self, code): + name = 'alias_%d' % self._count + self._count += 1 + + self.alias_js_code[name] = code + return name + + def _extra_rule(self, rule): + if rule in self.extra_rules_rev: + return self.extra_rules_rev[rule] + + name = 'xrule_%d' % len(self.extra_rules) + assert name not in self.extra_rules + self.extra_rules[name] = rule + self.extra_rules_rev[rule] = name + return name + + def rule(self, name): + return _get_rulename(name) + + def ruledef(self, name, exps): + return '!%s: %s' % (_get_rulename(name), exps) + + def expr(self, item, op): + rule = '(%s)%s' % (item, op) + return self._extra_rule(rule) + + def regexp(self, r): + return '/%s/' % r + + def null(self): + return '' + + def string(self, s): + return self._extra_rule(s) + + def expansion(self, *x): + x, js = x[:-1], x[-1] + if js.children: + js_code ,= js.children + js_code = js_code[2:-2] + alias = '-> ' + self._new_function(js_code) + else: + alias = '' + return ' '.join(x) + alias + + def expansions(self, *x): + return '%s' % ('\n |'.join(x)) + + def start(self, *rules): + return '\n'.join(filter(None, rules)) + +def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes): + rule_defs = [] + + tree = nearley_grammar_parser.parse(g) + for statement in tree.children: + if statement.data == 'directive': + directive, arg = statement.children + if directive in ('builtin', 'include'): + folder = builtin_path if directive == 'builtin' else folder_path + path = os.path.join(folder, arg[1:-1]) + if path not in includes: + includes.add(path) + with codecs.open(path, encoding='utf8') as f: + text = f.read() + rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes) + else: + assert False, directive + elif statement.data == 'js_code': + code ,= statement.children + code = code[2:-2] + js_code.append(code) + elif statement.data == 'macro': + pass # TODO Add support for macros! + elif statement.data == 'ruledef': + rule_defs.append( n2l.transform(statement) ) + else: + raise Exception("Unknown statement: %s" % statement) + + return rule_defs + + +def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False): + import js2py + + emit_code = [] + def emit(x=None): + if x: + emit_code.append(x) + emit_code.append('\n') + + js_code = ['function id(x) {return x[0];}'] + n2l = NearleyToLark() + rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set()) + lark_g = '\n'.join(rule_defs) + lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) + + emit('from lark import Lark, Transformer') + emit() + emit('grammar = ' + repr(lark_g)) + emit() + + for alias, code in n2l.alias_js_code.items(): + js_code.append('%s = (%s);' % (alias, code)) + + if es6: + emit(js2py.translate_js6('\n'.join(js_code))) + else: + emit(js2py.translate_js('\n'.join(js_code))) + emit('class TransformNearley(Transformer):') + for alias in n2l.alias_js_code: + emit(" %s = var.get('%s').to_python()" % (alias, alias)) + emit(" __default__ = lambda self, n, c, m: c if c else None") + + emit() + emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start) + emit('def parse(text):') + emit(' return TransformNearley().transform(parser.parse(text))') + + return ''.join(emit_code) + +def main(fn, start, nearley_lib, es6=False): + with codecs.open(fn, encoding='utf8') as f: + grammar = f.read() + return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6) + +def get_arg_parser(): + parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.') + parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar') + parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule') + parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)') + parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true') + return parser + +if __name__ == '__main__': + parser = get_arg_parser() + if len(sys.argv)==1: + parser.print_help(sys.stderr) + sys.exit(1) + args = parser.parse_args() + print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6)) diff --git a/.venv/lib/python3.12/site-packages/lark/tools/serialize.py b/.venv/lib/python3.12/site-packages/lark/tools/serialize.py new file mode 100644 index 00000000..61540242 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/tools/serialize.py @@ -0,0 +1,34 @@ +import codecs +import sys +import json + +from lark import Lark +from lark.grammar import RuleOptions, Rule +from lark.lexer import TerminalDef +from lark.tools import lalr_argparser, build_lalr + +import argparse + +argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser], + description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", + epilog='Look at the Lark documentation for more info on the options') + + +def serialize(lark_inst, outfile): + data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) + outfile.write('{\n') + outfile.write(' "data": %s,\n' % json.dumps(data)) + outfile.write(' "memo": %s\n' % json.dumps(memo)) + outfile.write('}\n') + + +def main(): + if len(sys.argv)==1: + argparser.print_help(sys.stderr) + sys.exit(1) + ns = argparser.parse_args() + serialize(*build_lalr(ns)) + + +if __name__ == '__main__': + main() diff --git a/.venv/lib/python3.12/site-packages/lark/tools/standalone.py b/.venv/lib/python3.12/site-packages/lark/tools/standalone.py new file mode 100644 index 00000000..c86d7d7a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/tools/standalone.py @@ -0,0 +1,196 @@ +from __future__ import print_function + +###{standalone +# +# +# Lark Stand-alone Generator Tool +# ---------------------------------- +# Generates a stand-alone LALR(1) parser with a standard lexer +# +# Git: https://github.com/erezsh/lark +# Author: Erez Shinan (erezshin@gmail.com) +# +# +# >>> LICENSE +# +# This tool and its generated code use a separate license from Lark, +# and are subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# +# If you wish to purchase a commercial license for this tool and its +# generated code, you may contact me via email or otherwise. +# +# If MPL2 is incompatible with your free or open-source project, +# contact me and we'll work it out. +# +# + +from io import open +###} + +import sys +import token, tokenize +import os +from os import path +from collections import defaultdict +from functools import partial +from argparse import ArgumentParser, SUPPRESS +from warnings import warn + +import lark +from lark import Lark +from lark.tools import lalr_argparser, build_lalr, make_warnings_comments + + +from lark.grammar import RuleOptions, Rule +from lark.lexer import TerminalDef + +_dir = path.dirname(__file__) +_larkdir = path.join(_dir, path.pardir) + + +EXTRACT_STANDALONE_FILES = [ + 'tools/standalone.py', + 'exceptions.py', + 'utils.py', + 'tree.py', + 'visitors.py', + 'grammar.py', + 'lexer.py', + 'common.py', + 'parse_tree_builder.py', + 'parsers/lalr_parser.py', + 'parsers/lalr_analysis.py', + 'parser_frontends.py', + 'lark.py', + 'indenter.py', +] + +def extract_sections(lines): + section = None + text = [] + sections = defaultdict(list) + for l in lines: + if l.startswith('###'): + if l[3] == '{': + section = l[4:].strip() + elif l[3] == '}': + sections[section] += text + section = None + text = [] + else: + raise ValueError(l) + elif section: + text.append(l) + + return {name:''.join(text) for name, text in sections.items()} + + +def strip_docstrings(line_gen): + """ Strip comments and docstrings from a file. + Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings + """ + res = [] + + prev_toktype = token.INDENT + last_lineno = -1 + last_col = 0 + + tokgen = tokenize.generate_tokens(line_gen) + for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen: + if slineno > last_lineno: + last_col = 0 + if scol > last_col: + res.append(" " * (scol - last_col)) + if toktype == token.STRING and prev_toktype == token.INDENT: + # Docstring + res.append("#--") + elif toktype == tokenize.COMMENT: + # Comment + res.append("##\n") + else: + res.append(ttext) + prev_toktype = toktype + last_col = ecol + last_lineno = elineno + + return ''.join(res) + + +def main(fobj, start, print=print): + warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning) + lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) + gen_standalone(lark_inst, print) + +def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): + if output is None: + output = partial(print, file=out) + + import pickle, zlib, base64 + def compressed_output(obj): + s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) + c = zlib.compress(s) + output(repr(base64.b64encode(c))) + + def output_decompress(name): + output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals()) + + output('# The file was automatically generated by Lark v%s' % lark.__version__) + output('__version__ = "%s"' % lark.__version__) + output() + + for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES): + with open(os.path.join(_larkdir, pyfile)) as f: + code = extract_sections(f)['standalone'] + if i: # if not this file + code = strip_docstrings(partial(next, iter(code.splitlines(True)))) + output(code) + + data, m = lark_inst.memo_serialize([TerminalDef, Rule]) + output('import pickle, zlib, base64') + if compress: + output('DATA = (') + compressed_output(data) + output(')') + output_decompress('DATA') + output('MEMO = (') + compressed_output(m) + output(')') + output_decompress('MEMO') + else: + output('DATA = (') + output(data) + output(')') + output('MEMO = (') + output(m) + output(')') + + + output('Shift = 0') + output('Reduce = 1') + output("def Lark_StandAlone(**kwargs):") + output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)") + + + + +def main(): + make_warnings_comments() + parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", + parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') + parser.add_argument("old_start", nargs='?', help=SUPPRESS) + parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") + if len(sys.argv)==1: + parser.print_help(sys.stderr) + sys.exit(1) + ns = parser.parse_args() + if ns.old_start is not None: + warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option') + ns.start.append(ns.old_start) + + lark_inst, out = build_lalr(ns) + gen_standalone(lark_inst, out=out, compress=ns.compress) + +if __name__ == '__main__': + main()
\ No newline at end of file |