about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/jmespath/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/jmespath/lexer.py')
-rw-r--r--.venv/lib/python3.12/site-packages/jmespath/lexer.py208
1 files changed, 208 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/jmespath/lexer.py b/.venv/lib/python3.12/site-packages/jmespath/lexer.py
new file mode 100644
index 00000000..8db05e37
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/jmespath/lexer.py
@@ -0,0 +1,208 @@
+import string
+import warnings
+from json import loads
+
+from jmespath.exceptions import LexerError, EmptyExpressionError
+
+
+class Lexer(object):
+    START_IDENTIFIER = set(string.ascii_letters + '_')
+    VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
+    VALID_NUMBER = set(string.digits)
+    WHITESPACE = set(" \t\n\r")
+    SIMPLE_TOKENS = {
+        '.': 'dot',
+        '*': 'star',
+        ']': 'rbracket',
+        ',': 'comma',
+        ':': 'colon',
+        '@': 'current',
+        '(': 'lparen',
+        ')': 'rparen',
+        '{': 'lbrace',
+        '}': 'rbrace',
+    }
+
+    def tokenize(self, expression):
+        self._initialize_for_expression(expression)
+        while self._current is not None:
+            if self._current in self.SIMPLE_TOKENS:
+                yield {'type': self.SIMPLE_TOKENS[self._current],
+                       'value': self._current,
+                       'start': self._position, 'end': self._position + 1}
+                self._next()
+            elif self._current in self.START_IDENTIFIER:
+                start = self._position
+                buff = self._current
+                while self._next() in self.VALID_IDENTIFIER:
+                    buff += self._current
+                yield {'type': 'unquoted_identifier', 'value': buff,
+                       'start': start, 'end': start + len(buff)}
+            elif self._current in self.WHITESPACE:
+                self._next()
+            elif self._current == '[':
+                start = self._position
+                next_char = self._next()
+                if next_char == ']':
+                    self._next()
+                    yield {'type': 'flatten', 'value': '[]',
+                           'start': start, 'end': start + 2}
+                elif next_char == '?':
+                    self._next()
+                    yield {'type': 'filter', 'value': '[?',
+                           'start': start, 'end': start + 2}
+                else:
+                    yield {'type': 'lbracket', 'value': '[',
+                           'start': start, 'end': start + 1}
+            elif self._current == "'":
+                yield self._consume_raw_string_literal()
+            elif self._current == '|':
+                yield self._match_or_else('|', 'or', 'pipe')
+            elif self._current == '&':
+                yield self._match_or_else('&', 'and', 'expref')
+            elif self._current == '`':
+                yield self._consume_literal()
+            elif self._current in self.VALID_NUMBER:
+                start = self._position
+                buff = self._consume_number()
+                yield {'type': 'number', 'value': int(buff),
+                       'start': start, 'end': start + len(buff)}
+            elif self._current == '-':
+                # Negative number.
+                start = self._position
+                buff = self._consume_number()
+                if len(buff) > 1:
+                    yield {'type': 'number', 'value': int(buff),
+                           'start': start, 'end': start + len(buff)}
+                else:
+                    raise LexerError(lexer_position=start,
+                                     lexer_value=buff,
+                                     message="Unknown token '%s'" % buff)
+            elif self._current == '"':
+                yield self._consume_quoted_identifier()
+            elif self._current == '<':
+                yield self._match_or_else('=', 'lte', 'lt')
+            elif self._current == '>':
+                yield self._match_or_else('=', 'gte', 'gt')
+            elif self._current == '!':
+                yield self._match_or_else('=', 'ne', 'not')
+            elif self._current == '=':
+                if self._next() == '=':
+                    yield {'type': 'eq', 'value': '==',
+                        'start': self._position - 1, 'end': self._position}
+                    self._next()
+                else:
+                    if self._current is None:
+                        # If we're at the EOF, we never advanced
+                        # the position so we don't need to rewind
+                        # it back one location.
+                        position = self._position
+                    else:
+                        position = self._position - 1
+                    raise LexerError(
+                        lexer_position=position,
+                        lexer_value='=',
+                        message="Unknown token '='")
+            else:
+                raise LexerError(lexer_position=self._position,
+                                 lexer_value=self._current,
+                                 message="Unknown token %s" % self._current)
+        yield {'type': 'eof', 'value': '',
+               'start': self._length, 'end': self._length}
+
+    def _consume_number(self):
+        start = self._position
+        buff = self._current
+        while self._next() in self.VALID_NUMBER:
+            buff += self._current
+        return buff
+
+    def _initialize_for_expression(self, expression):
+        if not expression:
+            raise EmptyExpressionError()
+        self._position = 0
+        self._expression = expression
+        self._chars = list(self._expression)
+        self._current = self._chars[self._position]
+        self._length = len(self._expression)
+
+    def _next(self):
+        if self._position == self._length - 1:
+            self._current = None
+        else:
+            self._position += 1
+            self._current = self._chars[self._position]
+        return self._current
+
+    def _consume_until(self, delimiter):
+        # Consume until the delimiter is reached,
+        # allowing for the delimiter to be escaped with "\".
+        start = self._position
+        buff = ''
+        self._next()
+        while self._current != delimiter:
+            if self._current == '\\':
+                buff += '\\'
+                self._next()
+            if self._current is None:
+                # We're at the EOF.
+                raise LexerError(lexer_position=start,
+                                 lexer_value=self._expression[start:],
+                                 message="Unclosed %s delimiter" % delimiter)
+            buff += self._current
+            self._next()
+        # Skip the closing delimiter.
+        self._next()
+        return buff
+
+    def _consume_literal(self):
+        start = self._position
+        lexeme = self._consume_until('`').replace('\\`', '`')
+        try:
+            # Assume it is valid JSON and attempt to parse.
+            parsed_json = loads(lexeme)
+        except ValueError:
+            try:
+                # Invalid JSON values should be converted to quoted
+                # JSON strings during the JEP-12 deprecation period.
+                parsed_json = loads('"%s"' % lexeme.lstrip())
+                warnings.warn("deprecated string literal syntax",
+                              PendingDeprecationWarning)
+            except ValueError:
+                raise LexerError(lexer_position=start,
+                                 lexer_value=self._expression[start:],
+                                 message="Bad token %s" % lexeme)
+        token_len = self._position - start
+        return {'type': 'literal', 'value': parsed_json,
+                'start': start, 'end': token_len}
+
+    def _consume_quoted_identifier(self):
+        start = self._position
+        lexeme = '"' + self._consume_until('"') + '"'
+        try:
+            token_len = self._position - start
+            return {'type': 'quoted_identifier', 'value': loads(lexeme),
+                    'start': start, 'end': token_len}
+        except ValueError as e:
+            error_message = str(e).split(':')[0]
+            raise LexerError(lexer_position=start,
+                             lexer_value=lexeme,
+                             message=error_message)
+
+    def _consume_raw_string_literal(self):
+        start = self._position
+        lexeme = self._consume_until("'").replace("\\'", "'")
+        token_len = self._position - start
+        return {'type': 'literal', 'value': lexeme,
+                'start': start, 'end': token_len}
+
+    def _match_or_else(self, expected, match_type, else_type):
+        start = self._position
+        current = self._current
+        next_char = self._next()
+        if next_char == expected:
+            self._next()
+            return {'type': match_type, 'value': current + next_char,
+                    'start': start, 'end': start + 1}
+        return {'type': else_type, 'value': current,
+                'start': start, 'end': start}