diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/reconstruct.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lark/reconstruct.py | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/reconstruct.py b/.venv/lib/python3.12/site-packages/lark/reconstruct.py new file mode 100644 index 00000000..ab2fb38b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/reconstruct.py @@ -0,0 +1,101 @@ +"""Reconstruct text from a tree, based on Lark grammar""" + +import unicodedata + +from .tree import Tree +from .visitors import Transformer_InPlace +from .lexer import Token, PatternStr +from .grammar import Terminal, NonTerminal + +from .tree_matcher import TreeMatcher, is_discarded_terminal +from .utils import is_id_continue + +def is_iter_empty(i): + try: + _ = next(i) + return False + except StopIteration: + return True + + +class WriteTokensTransformer(Transformer_InPlace): + "Inserts discarded tokens into their correct place, according to the rules of grammar" + + def __init__(self, tokens, term_subs): + self.tokens = tokens + self.term_subs = term_subs + + def __default__(self, data, children, meta): + if not getattr(meta, 'match_tree', False): + return Tree(data, children) + + iter_args = iter(children) + to_write = [] + for sym in meta.orig_expansion: + if is_discarded_terminal(sym): + try: + v = self.term_subs[sym.name](sym) + except KeyError: + t = self.tokens[sym.name] + if not isinstance(t.pattern, PatternStr): + raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) + + v = t.pattern.value + to_write.append(v) + else: + x = next(iter_args) + if isinstance(x, list): + to_write += x + else: + if isinstance(x, Token): + assert Terminal(x.type) == sym, x + else: + assert NonTerminal(x.data) == sym, (sym, x) + to_write.append(x) + + assert is_iter_empty(iter_args) + return to_write + + +class Reconstructor(TreeMatcher): + """ + A Reconstructor that will, given a full parse Tree, generate source code. + + Note: + The reconstructor cannot generate values from regexps. If you need to produce discarded + regexes, such as newlines, use `term_subs` and provide default values for them. + + Paramters: + parser: a Lark instance + term_subs: a dictionary of [Terminal name as str] to [output text as str] + """ + + def __init__(self, parser, term_subs=None): + TreeMatcher.__init__(self, parser) + + self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) + + def _reconstruct(self, tree): + unreduced_tree = self.match_tree(tree, tree.data) + + res = self.write_tokens.transform(unreduced_tree) + for item in res: + if isinstance(item, Tree): + # TODO use orig_expansion.rulename to support templates + for x in self._reconstruct(item): + yield x + else: + yield item + + def reconstruct(self, tree, postproc=None, insert_spaces=True): + x = self._reconstruct(tree) + if postproc: + x = postproc(x) + y = [] + prev_item = '' + for item in x: + if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): + y.append(' ') + y.append(item) + prev_item = item + return ''.join(y) |