diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lark/exceptions.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/lark/exceptions.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lark/exceptions.py | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lark/exceptions.py b/.venv/lib/python3.12/site-packages/lark/exceptions.py new file mode 100644 index 00000000..9f187531 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lark/exceptions.py @@ -0,0 +1,267 @@ +from warnings import warn + +from .utils import STRING_TYPE, logger, NO_VALUE + + +###{standalone + + +class LarkError(Exception): + pass + + +class ConfigurationError(LarkError, ValueError): + pass + + +def assert_config(value, options, msg='Got %r, expected one of %s'): + if value not in options: + raise ConfigurationError(msg % (value, options)) + + +class GrammarError(LarkError): + pass + + +class ParseError(LarkError): + pass + + +class LexError(LarkError): + pass + + +class UnexpectedInput(LarkError): + """UnexpectedInput Error. + + Used as a base class for the following exceptions: + + - ``UnexpectedCharacters``: The lexer encountered an unexpected string + - ``UnexpectedToken``: The parser received an unexpected token + - ``UnexpectedEOF``: The parser expected a token, but the input ended + + After catching one of these exceptions, you may call the following helper methods to create a nicer error message. + """ + pos_in_stream = None + _terminals_by_name = None + + def get_context(self, text, span=40): + """Returns a pretty string pinpointing the error in the text, + with span amount of context characters around it. + + Note: + The parser doesn't hold a copy of the text it has to parse, + so you have to provide it again + """ + assert self.pos_in_stream is not None, self + pos = self.pos_in_stream + start = max(pos - span, 0) + end = pos + span + if not isinstance(text, bytes): + before = text[start:pos].rsplit('\n', 1)[-1] + after = text[pos:end].split('\n', 1)[0] + return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n' + else: + before = text[start:pos].rsplit(b'\n', 1)[-1] + after = text[pos:end].split(b'\n', 1)[0] + return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") + + def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): + """Allows you to detect what's wrong in the input text by matching + against example errors. + + Given a parser instance and a dictionary mapping some label with + some malformed syntax examples, it'll return the label for the + example that bests matches the current error. The function will + iterate the dictionary until it finds a matching error, and + return the corresponding value. + + For an example usage, see `examples/error_reporting_lalr.py` + + Parameters: + parse_fn: parse function (usually ``lark_instance.parse``) + examples: dictionary of ``{'example_string': value}``. + use_accepts: Recommended to call this with ``use_accepts=True``. + The default is ``False`` for backwards compatibility. + """ + assert self.state is not None, "Not supported for this exception" + + if isinstance(examples, dict): + examples = examples.items() + + candidate = (None, False) + for i, (label, example) in enumerate(examples): + assert not isinstance(example, STRING_TYPE) + + for j, malformed in enumerate(example): + try: + parse_fn(malformed) + except UnexpectedInput as ut: + if ut.state == self.state: + if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: + logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % + (self.state, self.accepts, ut.accepts, i, j)) + continue + try: + if ut.token == self.token: # Try exact match first + logger.debug("Exact Match at example [%s][%s]" % (i, j)) + return label + + if token_type_match_fallback: + # Fallback to token types match + if (ut.token.type == self.token.type) and not candidate[-1]: + logger.debug("Token Type Fallback at example [%s][%s]" % (i, j)) + candidate = label, True + + except AttributeError: + pass + if candidate[0] is None: + logger.debug("Same State match at example [%s][%s]" % (i, j)) + candidate = label, False + + return candidate[0] + + def _format_expected(self, expected): + if self._terminals_by_name: + d = self._terminals_by_name + expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected] + return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected) + + +class UnexpectedEOF(ParseError, UnexpectedInput): + """An exception that is raised by the parser, when the input ends while it still expects a token. + """ + + def __init__(self, expected, state=None, terminals_by_name=None): + super(UnexpectedEOF, self).__init__() + + self.expected = expected + self.state = state + from .lexer import Token + self.token = Token("<EOF>", "") # , line=-1, column=-1, pos_in_stream=-1) + self.pos_in_stream = -1 + self.line = -1 + self.column = -1 + self._terminals_by_name = terminals_by_name + + + def __str__(self): + message = "Unexpected end-of-input. " + message += self._format_expected(self.expected) + return message + + +class UnexpectedCharacters(LexError, UnexpectedInput): + """An exception that is raised by the lexer, when it cannot match the next + string of characters to any of its terminals. + """ + + def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, + terminals_by_name=None, considered_rules=None): + super(UnexpectedCharacters, self).__init__() + + # TODO considered_tokens and allowed can be figured out using state + self.line = line + self.column = column + self.pos_in_stream = lex_pos + self.state = state + self._terminals_by_name = terminals_by_name + + self.allowed = allowed + self.considered_tokens = considered_tokens + self.considered_rules = considered_rules + self.token_history = token_history + + if isinstance(seq, bytes): + self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace") + else: + self.char = seq[lex_pos] + self._context = self.get_context(seq) + + + def __str__(self): + message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column) + message += '\n\n' + self._context + if self.allowed: + message += self._format_expected(self.allowed) + if self.token_history: + message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history) + return message + + +class UnexpectedToken(ParseError, UnexpectedInput): + """An exception that is raised by the parser, when the token it received + doesn't match any valid step forward. + + Parameters: + token: The mismatched token + expected: The set of expected tokens + considered_rules: Which rules were considered, to deduce the expected tokens + state: A value representing the parser state. Do not rely on its value or type. + interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failture, + and can be used for debugging and error handling. + + Note: These parameters are available as attributes of the instance. + """ + + def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): + super(UnexpectedToken, self).__init__() + + # TODO considered_rules and expected can be figured out using state + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + self.pos_in_stream = getattr(token, 'start_pos', None) + self.state = state + + self.token = token + self.expected = expected # XXX deprecate? `accepts` is better + self._accepts = NO_VALUE + self.considered_rules = considered_rules + self.interactive_parser = interactive_parser + self._terminals_by_name = terminals_by_name + self.token_history = token_history + + + @property + def accepts(self): + if self._accepts is NO_VALUE: + self._accepts = self.interactive_parser and self.interactive_parser.accepts() + return self._accepts + + def __str__(self): + message = ("Unexpected token %r at line %s, column %s.\n%s" + % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected))) + if self.token_history: + message += "Previous tokens: %r\n" % self.token_history + + return message + + @property + def puppet(self): + warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning) + return self.interactive_parser + + + +class VisitError(LarkError): + """VisitError is raised when visitors are interrupted by an exception + + It provides the following attributes for inspection: + + Parameters: + rule: the name of the visit rule that failed + obj: the tree-node or token that was being processed + orig_exc: the exception that cause it to fail + + Note: These parameters are available as attributes + """ + + def __init__(self, rule, obj, orig_exc): + message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) + super(VisitError, self).__init__(message) + + self.rule = rule + self.obj = obj + self.orig_exc = orig_exc + +###} |