diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py b/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py new file mode 100644 index 00000000..737cb02a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/parsers/grammar_analysis.py @@ -0,0 +1,185 @@ +from collections import Counter, defaultdict + +from ..utils import bfs, fzset, classify +from ..exceptions import GrammarError +from ..grammar import Rule, Terminal, NonTerminal + + +class RulePtr(object): + __slots__ = ('rule', 'index') + + def __init__(self, rule, index): + assert isinstance(rule, Rule) + assert index <= len(rule.expansion) + self.rule = rule + self.index = index + + def __repr__(self): + before = [x.name for x in self.rule.expansion[:self.index]] + after = [x.name for x in self.rule.expansion[self.index:]] + return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after)) + + @property + def next(self): + return self.rule.expansion[self.index] + + def advance(self, sym): + assert self.next == sym + return RulePtr(self.rule, self.index+1) + + @property + def is_satisfied(self): + return self.index == len(self.rule.expansion) + + def __eq__(self, other): + return self.rule == other.rule and self.index == other.index + def __hash__(self): + return hash((self.rule, self.index)) + + +# state generation ensures no duplicate LR0ItemSets +class LR0ItemSet(object): + __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads') + + def __init__(self, kernel, closure): + self.kernel = fzset(kernel) + self.closure = fzset(closure) + self.transitions = {} + self.lookaheads = defaultdict(set) + + def __repr__(self): + return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure])) + + +def update_set(set1, set2): + if not set2 or set1 > set2: + return False + + copy = set(set1) + set1 |= set2 + return set1 != copy + +def calculate_sets(rules): + """Calculate FOLLOW sets. + + Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets""" + symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules} + + # foreach grammar rule X ::= Y(1) ... Y(k) + # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then + # NULLABLE = NULLABLE union {X} + # for i = 1 to k + # if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then + # FIRST(X) = FIRST(X) union FIRST(Y(i)) + # for j = i+1 to k + # if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X) + # if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then + # FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j)) + # until none of NULLABLE,FIRST,FOLLOW changed in last iteration + + NULLABLE = set() + FIRST = {} + FOLLOW = {} + for sym in symbols: + FIRST[sym]={sym} if sym.is_term else set() + FOLLOW[sym]=set() + + # Calculate NULLABLE and FIRST + changed = True + while changed: + changed = False + + for rule in rules: + if set(rule.expansion) <= NULLABLE: + if update_set(NULLABLE, {rule.origin}): + changed = True + + for i, sym in enumerate(rule.expansion): + if set(rule.expansion[:i]) <= NULLABLE: + if update_set(FIRST[rule.origin], FIRST[sym]): + changed = True + else: + break + + # Calculate FOLLOW + changed = True + while changed: + changed = False + + for rule in rules: + for i, sym in enumerate(rule.expansion): + if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE: + if update_set(FOLLOW[sym], FOLLOW[rule.origin]): + changed = True + + for j in range(i+1, len(rule.expansion)): + if set(rule.expansion[i+1:j]) <= NULLABLE: + if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]): + changed = True + + return FIRST, FOLLOW, NULLABLE + + +class GrammarAnalyzer(object): + def __init__(self, parser_conf, debug=False): + self.debug = debug + + root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) + for start in parser_conf.start} + + rules = parser_conf.rules + list(root_rules.values()) + self.rules_by_origin = classify(rules, lambda r: r.origin) + + if len(rules) != len(set(rules)): + duplicates = [item for item, count in Counter(rules).items() if count > 1] + raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates)) + + for r in rules: + for sym in r.expansion: + if not (sym.is_term or sym in self.rules_by_origin): + raise GrammarError("Using an undefined rule: %s" % sym) + + self.start_states = {start: self.expand_rule(root_rule.origin) + for start, root_rule in root_rules.items()} + + self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))}) + for start, root_rule in root_rules.items()} + + lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)]) + for start in parser_conf.start} + + lr0_rules = parser_conf.rules + list(lr0_root_rules.values()) + assert(len(lr0_rules) == len(set(lr0_rules))) + + self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) + + # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) + self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) + for start, root_rule in lr0_root_rules.items()} + + self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) + + def expand_rule(self, source_rule, rules_by_origin=None): + "Returns all init_ptrs accessible by rule (recursive)" + + if rules_by_origin is None: + rules_by_origin = self.rules_by_origin + + init_ptrs = set() + def _expand_rule(rule): + assert not rule.is_term, rule + + for r in rules_by_origin[rule]: + init_ptr = RulePtr(r, 0) + init_ptrs.add(init_ptr) + + if r.expansion: # if not empty rule + new_r = init_ptr.next + if not new_r.is_term: + yield new_r + + for _ in bfs([source_rule], _expand_rule): + pass + + return fzset(init_ptrs) |