diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/jmespath/lexer.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/lexer.py | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/jmespath/lexer.py b/.venv/lib/python3.12/site-packages/jmespath/lexer.py new file mode 100644 index 00000000..8db05e37 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/lexer.py @@ -0,0 +1,208 @@ +import string +import warnings +from json import loads + +from jmespath.exceptions import LexerError, EmptyExpressionError + + +class Lexer(object): + START_IDENTIFIER = set(string.ascii_letters + '_') + VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_') + VALID_NUMBER = set(string.digits) + WHITESPACE = set(" \t\n\r") + SIMPLE_TOKENS = { + '.': 'dot', + '*': 'star', + ']': 'rbracket', + ',': 'comma', + ':': 'colon', + '@': 'current', + '(': 'lparen', + ')': 'rparen', + '{': 'lbrace', + '}': 'rbrace', + } + + def tokenize(self, expression): + self._initialize_for_expression(expression) + while self._current is not None: + if self._current in self.SIMPLE_TOKENS: + yield {'type': self.SIMPLE_TOKENS[self._current], + 'value': self._current, + 'start': self._position, 'end': self._position + 1} + self._next() + elif self._current in self.START_IDENTIFIER: + start = self._position + buff = self._current + while self._next() in self.VALID_IDENTIFIER: + buff += self._current + yield {'type': 'unquoted_identifier', 'value': buff, + 'start': start, 'end': start + len(buff)} + elif self._current in self.WHITESPACE: + self._next() + elif self._current == '[': + start = self._position + next_char = self._next() + if next_char == ']': + self._next() + yield {'type': 'flatten', 'value': '[]', + 'start': start, 'end': start + 2} + elif next_char == '?': + self._next() + yield {'type': 'filter', 'value': '[?', + 'start': start, 'end': start + 2} + else: + yield {'type': 'lbracket', 'value': '[', + 'start': start, 'end': start + 1} + elif self._current == "'": + yield self._consume_raw_string_literal() + elif self._current == '|': + yield self._match_or_else('|', 'or', 'pipe') + elif self._current == '&': + yield self._match_or_else('&', 'and', 'expref') + elif self._current == '`': + yield self._consume_literal() + elif self._current in self.VALID_NUMBER: + start = self._position + buff = self._consume_number() + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + elif self._current == '-': + # Negative number. + start = self._position + buff = self._consume_number() + if len(buff) > 1: + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + else: + raise LexerError(lexer_position=start, + lexer_value=buff, + message="Unknown token '%s'" % buff) + elif self._current == '"': + yield self._consume_quoted_identifier() + elif self._current == '<': + yield self._match_or_else('=', 'lte', 'lt') + elif self._current == '>': + yield self._match_or_else('=', 'gte', 'gt') + elif self._current == '!': + yield self._match_or_else('=', 'ne', 'not') + elif self._current == '=': + if self._next() == '=': + yield {'type': 'eq', 'value': '==', + 'start': self._position - 1, 'end': self._position} + self._next() + else: + if self._current is None: + # If we're at the EOF, we never advanced + # the position so we don't need to rewind + # it back one location. + position = self._position + else: + position = self._position - 1 + raise LexerError( + lexer_position=position, + lexer_value='=', + message="Unknown token '='") + else: + raise LexerError(lexer_position=self._position, + lexer_value=self._current, + message="Unknown token %s" % self._current) + yield {'type': 'eof', 'value': '', + 'start': self._length, 'end': self._length} + + def _consume_number(self): + start = self._position + buff = self._current + while self._next() in self.VALID_NUMBER: + buff += self._current + return buff + + def _initialize_for_expression(self, expression): + if not expression: + raise EmptyExpressionError() + self._position = 0 + self._expression = expression + self._chars = list(self._expression) + self._current = self._chars[self._position] + self._length = len(self._expression) + + def _next(self): + if self._position == self._length - 1: + self._current = None + else: + self._position += 1 + self._current = self._chars[self._position] + return self._current + + def _consume_until(self, delimiter): + # Consume until the delimiter is reached, + # allowing for the delimiter to be escaped with "\". + start = self._position + buff = '' + self._next() + while self._current != delimiter: + if self._current == '\\': + buff += '\\' + self._next() + if self._current is None: + # We're at the EOF. + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Unclosed %s delimiter" % delimiter) + buff += self._current + self._next() + # Skip the closing delimiter. + self._next() + return buff + + def _consume_literal(self): + start = self._position + lexeme = self._consume_until('`').replace('\\`', '`') + try: + # Assume it is valid JSON and attempt to parse. + parsed_json = loads(lexeme) + except ValueError: + try: + # Invalid JSON values should be converted to quoted + # JSON strings during the JEP-12 deprecation period. + parsed_json = loads('"%s"' % lexeme.lstrip()) + warnings.warn("deprecated string literal syntax", + PendingDeprecationWarning) + except ValueError: + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Bad token %s" % lexeme) + token_len = self._position - start + return {'type': 'literal', 'value': parsed_json, + 'start': start, 'end': token_len} + + def _consume_quoted_identifier(self): + start = self._position + lexeme = '"' + self._consume_until('"') + '"' + try: + token_len = self._position - start + return {'type': 'quoted_identifier', 'value': loads(lexeme), + 'start': start, 'end': token_len} + except ValueError as e: + error_message = str(e).split(':')[0] + raise LexerError(lexer_position=start, + lexer_value=lexeme, + message=error_message) + + def _consume_raw_string_literal(self): + start = self._position + lexeme = self._consume_until("'").replace("\\'", "'") + token_len = self._position - start + return {'type': 'literal', 'value': lexeme, + 'start': start, 'end': token_len} + + def _match_or_else(self, expected, match_type, else_type): + start = self._position + current = self._current + next_char = self._next() + if next_char == expected: + self._next() + return {'type': match_type, 'value': current + next_char, + 'start': start, 'end': start + 1} + return {'type': else_type, 'value': current, + 'start': start, 'end': start} |