aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/jmespath/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/jmespath/lexer.py')
-rw-r--r--.venv/lib/python3.12/site-packages/jmespath/lexer.py208
1 files changed, 208 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/jmespath/lexer.py b/.venv/lib/python3.12/site-packages/jmespath/lexer.py
new file mode 100644
index 00000000..8db05e37
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/jmespath/lexer.py
@@ -0,0 +1,208 @@
+import string
+import warnings
+from json import loads
+
+from jmespath.exceptions import LexerError, EmptyExpressionError
+
+
+class Lexer(object):
+ START_IDENTIFIER = set(string.ascii_letters + '_')
+ VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
+ VALID_NUMBER = set(string.digits)
+ WHITESPACE = set(" \t\n\r")
+ SIMPLE_TOKENS = {
+ '.': 'dot',
+ '*': 'star',
+ ']': 'rbracket',
+ ',': 'comma',
+ ':': 'colon',
+ '@': 'current',
+ '(': 'lparen',
+ ')': 'rparen',
+ '{': 'lbrace',
+ '}': 'rbrace',
+ }
+
+ def tokenize(self, expression):
+ self._initialize_for_expression(expression)
+ while self._current is not None:
+ if self._current in self.SIMPLE_TOKENS:
+ yield {'type': self.SIMPLE_TOKENS[self._current],
+ 'value': self._current,
+ 'start': self._position, 'end': self._position + 1}
+ self._next()
+ elif self._current in self.START_IDENTIFIER:
+ start = self._position
+ buff = self._current
+ while self._next() in self.VALID_IDENTIFIER:
+ buff += self._current
+ yield {'type': 'unquoted_identifier', 'value': buff,
+ 'start': start, 'end': start + len(buff)}
+ elif self._current in self.WHITESPACE:
+ self._next()
+ elif self._current == '[':
+ start = self._position
+ next_char = self._next()
+ if next_char == ']':
+ self._next()
+ yield {'type': 'flatten', 'value': '[]',
+ 'start': start, 'end': start + 2}
+ elif next_char == '?':
+ self._next()
+ yield {'type': 'filter', 'value': '[?',
+ 'start': start, 'end': start + 2}
+ else:
+ yield {'type': 'lbracket', 'value': '[',
+ 'start': start, 'end': start + 1}
+ elif self._current == "'":
+ yield self._consume_raw_string_literal()
+ elif self._current == '|':
+ yield self._match_or_else('|', 'or', 'pipe')
+ elif self._current == '&':
+ yield self._match_or_else('&', 'and', 'expref')
+ elif self._current == '`':
+ yield self._consume_literal()
+ elif self._current in self.VALID_NUMBER:
+ start = self._position
+ buff = self._consume_number()
+ yield {'type': 'number', 'value': int(buff),
+ 'start': start, 'end': start + len(buff)}
+ elif self._current == '-':
+ # Negative number.
+ start = self._position
+ buff = self._consume_number()
+ if len(buff) > 1:
+ yield {'type': 'number', 'value': int(buff),
+ 'start': start, 'end': start + len(buff)}
+ else:
+ raise LexerError(lexer_position=start,
+ lexer_value=buff,
+ message="Unknown token '%s'" % buff)
+ elif self._current == '"':
+ yield self._consume_quoted_identifier()
+ elif self._current == '<':
+ yield self._match_or_else('=', 'lte', 'lt')
+ elif self._current == '>':
+ yield self._match_or_else('=', 'gte', 'gt')
+ elif self._current == '!':
+ yield self._match_or_else('=', 'ne', 'not')
+ elif self._current == '=':
+ if self._next() == '=':
+ yield {'type': 'eq', 'value': '==',
+ 'start': self._position - 1, 'end': self._position}
+ self._next()
+ else:
+ if self._current is None:
+ # If we're at the EOF, we never advanced
+ # the position so we don't need to rewind
+ # it back one location.
+ position = self._position
+ else:
+ position = self._position - 1
+ raise LexerError(
+ lexer_position=position,
+ lexer_value='=',
+ message="Unknown token '='")
+ else:
+ raise LexerError(lexer_position=self._position,
+ lexer_value=self._current,
+ message="Unknown token %s" % self._current)
+ yield {'type': 'eof', 'value': '',
+ 'start': self._length, 'end': self._length}
+
+ def _consume_number(self):
+ start = self._position
+ buff = self._current
+ while self._next() in self.VALID_NUMBER:
+ buff += self._current
+ return buff
+
+ def _initialize_for_expression(self, expression):
+ if not expression:
+ raise EmptyExpressionError()
+ self._position = 0
+ self._expression = expression
+ self._chars = list(self._expression)
+ self._current = self._chars[self._position]
+ self._length = len(self._expression)
+
+ def _next(self):
+ if self._position == self._length - 1:
+ self._current = None
+ else:
+ self._position += 1
+ self._current = self._chars[self._position]
+ return self._current
+
+ def _consume_until(self, delimiter):
+ # Consume until the delimiter is reached,
+ # allowing for the delimiter to be escaped with "\".
+ start = self._position
+ buff = ''
+ self._next()
+ while self._current != delimiter:
+ if self._current == '\\':
+ buff += '\\'
+ self._next()
+ if self._current is None:
+ # We're at the EOF.
+ raise LexerError(lexer_position=start,
+ lexer_value=self._expression[start:],
+ message="Unclosed %s delimiter" % delimiter)
+ buff += self._current
+ self._next()
+ # Skip the closing delimiter.
+ self._next()
+ return buff
+
+ def _consume_literal(self):
+ start = self._position
+ lexeme = self._consume_until('`').replace('\\`', '`')
+ try:
+ # Assume it is valid JSON and attempt to parse.
+ parsed_json = loads(lexeme)
+ except ValueError:
+ try:
+ # Invalid JSON values should be converted to quoted
+ # JSON strings during the JEP-12 deprecation period.
+ parsed_json = loads('"%s"' % lexeme.lstrip())
+ warnings.warn("deprecated string literal syntax",
+ PendingDeprecationWarning)
+ except ValueError:
+ raise LexerError(lexer_position=start,
+ lexer_value=self._expression[start:],
+ message="Bad token %s" % lexeme)
+ token_len = self._position - start
+ return {'type': 'literal', 'value': parsed_json,
+ 'start': start, 'end': token_len}
+
+ def _consume_quoted_identifier(self):
+ start = self._position
+ lexeme = '"' + self._consume_until('"') + '"'
+ try:
+ token_len = self._position - start
+ return {'type': 'quoted_identifier', 'value': loads(lexeme),
+ 'start': start, 'end': token_len}
+ except ValueError as e:
+ error_message = str(e).split(':')[0]
+ raise LexerError(lexer_position=start,
+ lexer_value=lexeme,
+ message=error_message)
+
+ def _consume_raw_string_literal(self):
+ start = self._position
+ lexeme = self._consume_until("'").replace("\\'", "'")
+ token_len = self._position - start
+ return {'type': 'literal', 'value': lexeme,
+ 'start': start, 'end': token_len}
+
+ def _match_or_else(self, expected, match_type, else_type):
+ start = self._position
+ current = self._current
+ next_char = self._next()
+ if next_char == expected:
+ self._next()
+ return {'type': match_type, 'value': current + next_char,
+ 'start': start, 'end': start + 1}
+ return {'type': else_type, 'value': current,
+ 'start': start, 'end': start}