diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/jmespath | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/jmespath')
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/__init__.py | 12 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/ast.py | 90 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/compat.py | 19 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/exceptions.py | 122 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/functions.py | 362 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/lexer.py | 208 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/parser.py | 527 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/jmespath/visitor.py | 328 |
8 files changed, 1668 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/jmespath/__init__.py b/.venv/lib/python3.12/site-packages/jmespath/__init__.py new file mode 100644 index 00000000..c2439e37 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/__init__.py @@ -0,0 +1,12 @@ +from jmespath import parser +from jmespath.visitor import Options + +__version__ = '1.0.1' + + +def compile(expression): + return parser.Parser().parse(expression) + + +def search(expression, data, options=None): + return parser.Parser().parse(expression).search(data, options=options) diff --git a/.venv/lib/python3.12/site-packages/jmespath/ast.py b/.venv/lib/python3.12/site-packages/jmespath/ast.py new file mode 100644 index 00000000..dd56c6ed --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/ast.py @@ -0,0 +1,90 @@ +# AST nodes have this structure: +# {"type": <node type>", children: [], "value": ""} + + +def comparator(name, first, second): + return {'type': 'comparator', 'children': [first, second], 'value': name} + + +def current_node(): + return {'type': 'current', 'children': []} + + +def expref(expression): + return {'type': 'expref', 'children': [expression]} + + +def function_expression(name, args): + return {'type': 'function_expression', 'children': args, 'value': name} + + +def field(name): + return {"type": "field", "children": [], "value": name} + + +def filter_projection(left, right, comparator): + return {'type': 'filter_projection', 'children': [left, right, comparator]} + + +def flatten(node): + return {'type': 'flatten', 'children': [node]} + + +def identity(): + return {"type": "identity", 'children': []} + + +def index(index): + return {"type": "index", "value": index, "children": []} + + +def index_expression(children): + return {"type": "index_expression", 'children': children} + + +def key_val_pair(key_name, node): + return {"type": "key_val_pair", 'children': [node], "value": key_name} + + +def literal(literal_value): + return {'type': 'literal', 'value': literal_value, 'children': []} + + +def multi_select_dict(nodes): + return {"type": "multi_select_dict", "children": nodes} + + +def multi_select_list(nodes): + return {"type": "multi_select_list", "children": nodes} + + +def or_expression(left, right): + return {"type": "or_expression", "children": [left, right]} + + +def and_expression(left, right): + return {"type": "and_expression", "children": [left, right]} + + +def not_expression(expr): + return {"type": "not_expression", "children": [expr]} + + +def pipe(left, right): + return {'type': 'pipe', 'children': [left, right]} + + +def projection(left, right): + return {'type': 'projection', 'children': [left, right]} + + +def subexpression(children): + return {"type": "subexpression", 'children': children} + + +def slice(start, end, step): + return {"type": "slice", "children": [start, end, step]} + + +def value_projection(left, right): + return {'type': 'value_projection', 'children': [left, right]} diff --git a/.venv/lib/python3.12/site-packages/jmespath/compat.py b/.venv/lib/python3.12/site-packages/jmespath/compat.py new file mode 100644 index 00000000..50f8f277 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/compat.py @@ -0,0 +1,19 @@ +import sys +import inspect +from itertools import zip_longest + + +text_type = str +string_type = str + + +def with_str_method(cls): + # In python3, we don't need to do anything, we return a str type. + return cls + +def with_repr_method(cls): + return cls + +def get_methods(cls): + for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): + yield name, method diff --git a/.venv/lib/python3.12/site-packages/jmespath/exceptions.py b/.venv/lib/python3.12/site-packages/jmespath/exceptions.py new file mode 100644 index 00000000..01560159 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/exceptions.py @@ -0,0 +1,122 @@ +from jmespath.compat import with_str_method + + +class JMESPathError(ValueError): + pass + + +@with_str_method +class ParseError(JMESPathError): + _ERROR_MESSAGE = 'Invalid jmespath expression' + def __init__(self, lex_position, token_value, token_type, + msg=_ERROR_MESSAGE): + super(ParseError, self).__init__(lex_position, token_value, token_type) + self.lex_position = lex_position + self.token_value = token_value + self.token_type = token_type.upper() + self.msg = msg + # Whatever catches the ParseError can fill in the full expression + self.expression = None + + def __str__(self): + # self.lex_position +1 to account for the starting double quote char. + underline = ' ' * (self.lex_position + 1) + '^' + return ( + '%s: Parse error at column %s, ' + 'token "%s" (%s), for expression:\n"%s"\n%s' % ( + self.msg, self.lex_position, self.token_value, self.token_type, + self.expression, underline)) + + +@with_str_method +class IncompleteExpressionError(ParseError): + def set_expression(self, expression): + self.expression = expression + self.lex_position = len(expression) + self.token_type = None + self.token_value = None + + def __str__(self): + # self.lex_position +1 to account for the starting double quote char. + underline = ' ' * (self.lex_position + 1) + '^' + return ( + 'Invalid jmespath expression: Incomplete expression:\n' + '"%s"\n%s' % (self.expression, underline)) + + +@with_str_method +class LexerError(ParseError): + def __init__(self, lexer_position, lexer_value, message, expression=None): + self.lexer_position = lexer_position + self.lexer_value = lexer_value + self.message = message + super(LexerError, self).__init__(lexer_position, + lexer_value, + message) + # Whatever catches LexerError can set this. + self.expression = expression + + def __str__(self): + underline = ' ' * self.lexer_position + '^' + return 'Bad jmespath expression: %s:\n%s\n%s' % ( + self.message, self.expression, underline) + + +@with_str_method +class ArityError(ParseError): + def __init__(self, expected, actual, name): + self.expected_arity = expected + self.actual_arity = actual + self.function_name = name + self.expression = None + + def __str__(self): + return ("Expected %s %s for function %s(), " + "received %s" % ( + self.expected_arity, + self._pluralize('argument', self.expected_arity), + self.function_name, + self.actual_arity)) + + def _pluralize(self, word, count): + if count == 1: + return word + else: + return word + 's' + + +@with_str_method +class VariadictArityError(ArityError): + def __str__(self): + return ("Expected at least %s %s for function %s(), " + "received %s" % ( + self.expected_arity, + self._pluralize('argument', self.expected_arity), + self.function_name, + self.actual_arity)) + + +@with_str_method +class JMESPathTypeError(JMESPathError): + def __init__(self, function_name, current_value, actual_type, + expected_types): + self.function_name = function_name + self.current_value = current_value + self.actual_type = actual_type + self.expected_types = expected_types + + def __str__(self): + return ('In function %s(), invalid type for value: %s, ' + 'expected one of: %s, received: "%s"' % ( + self.function_name, self.current_value, + self.expected_types, self.actual_type)) + + +class EmptyExpressionError(JMESPathError): + def __init__(self): + super(EmptyExpressionError, self).__init__( + "Invalid JMESPath expression: cannot be empty.") + + +class UnknownFunctionError(JMESPathError): + pass diff --git a/.venv/lib/python3.12/site-packages/jmespath/functions.py b/.venv/lib/python3.12/site-packages/jmespath/functions.py new file mode 100644 index 00000000..11ab56ac --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/functions.py @@ -0,0 +1,362 @@ +import math +import json + +from jmespath import exceptions +from jmespath.compat import string_type as STRING_TYPE +from jmespath.compat import get_methods + + +# python types -> jmespath types +TYPES_MAP = { + 'bool': 'boolean', + 'list': 'array', + 'dict': 'object', + 'NoneType': 'null', + 'unicode': 'string', + 'str': 'string', + 'float': 'number', + 'int': 'number', + 'long': 'number', + 'OrderedDict': 'object', + '_Projection': 'array', + '_Expression': 'expref', +} + + +# jmespath types -> python types +REVERSE_TYPES_MAP = { + 'boolean': ('bool',), + 'array': ('list', '_Projection'), + 'object': ('dict', 'OrderedDict',), + 'null': ('NoneType',), + 'string': ('unicode', 'str'), + 'number': ('float', 'int', 'long'), + 'expref': ('_Expression',), +} + + +def signature(*arguments): + def _record_signature(func): + func.signature = arguments + return func + return _record_signature + + +class FunctionRegistry(type): + def __init__(cls, name, bases, attrs): + cls._populate_function_table() + super(FunctionRegistry, cls).__init__(name, bases, attrs) + + def _populate_function_table(cls): + function_table = {} + # Any method with a @signature decorator that also + # starts with "_func_" is registered as a function. + # _func_max_by -> max_by function. + for name, method in get_methods(cls): + if not name.startswith('_func_'): + continue + signature = getattr(method, 'signature', None) + if signature is not None: + function_table[name[6:]] = { + 'function': method, + 'signature': signature, + } + cls.FUNCTION_TABLE = function_table + + +class Functions(metaclass=FunctionRegistry): + + FUNCTION_TABLE = { + } + + def call_function(self, function_name, resolved_args): + try: + spec = self.FUNCTION_TABLE[function_name] + except KeyError: + raise exceptions.UnknownFunctionError( + "Unknown function: %s()" % function_name) + function = spec['function'] + signature = spec['signature'] + self._validate_arguments(resolved_args, signature, function_name) + return function(self, *resolved_args) + + def _validate_arguments(self, args, signature, function_name): + if signature and signature[-1].get('variadic'): + if len(args) < len(signature): + raise exceptions.VariadictArityError( + len(signature), len(args), function_name) + elif len(args) != len(signature): + raise exceptions.ArityError( + len(signature), len(args), function_name) + return self._type_check(args, signature, function_name) + + def _type_check(self, actual, signature, function_name): + for i in range(len(signature)): + allowed_types = signature[i]['types'] + if allowed_types: + self._type_check_single(actual[i], allowed_types, + function_name) + + def _type_check_single(self, current, types, function_name): + # Type checking involves checking the top level type, + # and in the case of arrays, potentially checking the types + # of each element. + allowed_types, allowed_subtypes = self._get_allowed_pytypes(types) + # We're not using isinstance() on purpose. + # The type model for jmespath does not map + # 1-1 with python types (booleans are considered + # integers in python for example). + actual_typename = type(current).__name__ + if actual_typename not in allowed_types: + raise exceptions.JMESPathTypeError( + function_name, current, + self._convert_to_jmespath_type(actual_typename), types) + # If we're dealing with a list type, we can have + # additional restrictions on the type of the list + # elements (for example a function can require a + # list of numbers or a list of strings). + # Arrays are the only types that can have subtypes. + if allowed_subtypes: + self._subtype_check(current, allowed_subtypes, + types, function_name) + + def _get_allowed_pytypes(self, types): + allowed_types = [] + allowed_subtypes = [] + for t in types: + type_ = t.split('-', 1) + if len(type_) == 2: + type_, subtype = type_ + allowed_subtypes.append(REVERSE_TYPES_MAP[subtype]) + else: + type_ = type_[0] + allowed_types.extend(REVERSE_TYPES_MAP[type_]) + return allowed_types, allowed_subtypes + + def _subtype_check(self, current, allowed_subtypes, types, function_name): + if len(allowed_subtypes) == 1: + # The easy case, we know up front what type + # we need to validate. + allowed_subtypes = allowed_subtypes[0] + for element in current: + actual_typename = type(element).__name__ + if actual_typename not in allowed_subtypes: + raise exceptions.JMESPathTypeError( + function_name, element, actual_typename, types) + elif len(allowed_subtypes) > 1 and current: + # Dynamic type validation. Based on the first + # type we see, we validate that the remaining types + # match. + first = type(current[0]).__name__ + for subtypes in allowed_subtypes: + if first in subtypes: + allowed = subtypes + break + else: + raise exceptions.JMESPathTypeError( + function_name, current[0], first, types) + for element in current: + actual_typename = type(element).__name__ + if actual_typename not in allowed: + raise exceptions.JMESPathTypeError( + function_name, element, actual_typename, types) + + @signature({'types': ['number']}) + def _func_abs(self, arg): + return abs(arg) + + @signature({'types': ['array-number']}) + def _func_avg(self, arg): + if arg: + return sum(arg) / float(len(arg)) + else: + return None + + @signature({'types': [], 'variadic': True}) + def _func_not_null(self, *arguments): + for argument in arguments: + if argument is not None: + return argument + + @signature({'types': []}) + def _func_to_array(self, arg): + if isinstance(arg, list): + return arg + else: + return [arg] + + @signature({'types': []}) + def _func_to_string(self, arg): + if isinstance(arg, STRING_TYPE): + return arg + else: + return json.dumps(arg, separators=(',', ':'), + default=str) + + @signature({'types': []}) + def _func_to_number(self, arg): + if isinstance(arg, (list, dict, bool)): + return None + elif arg is None: + return None + elif isinstance(arg, (int, float)): + return arg + else: + try: + return int(arg) + except ValueError: + try: + return float(arg) + except ValueError: + return None + + @signature({'types': ['array', 'string']}, {'types': []}) + def _func_contains(self, subject, search): + return search in subject + + @signature({'types': ['string', 'array', 'object']}) + def _func_length(self, arg): + return len(arg) + + @signature({'types': ['string']}, {'types': ['string']}) + def _func_ends_with(self, search, suffix): + return search.endswith(suffix) + + @signature({'types': ['string']}, {'types': ['string']}) + def _func_starts_with(self, search, suffix): + return search.startswith(suffix) + + @signature({'types': ['array', 'string']}) + def _func_reverse(self, arg): + if isinstance(arg, STRING_TYPE): + return arg[::-1] + else: + return list(reversed(arg)) + + @signature({"types": ['number']}) + def _func_ceil(self, arg): + return math.ceil(arg) + + @signature({"types": ['number']}) + def _func_floor(self, arg): + return math.floor(arg) + + @signature({"types": ['string']}, {"types": ['array-string']}) + def _func_join(self, separator, array): + return separator.join(array) + + @signature({'types': ['expref']}, {'types': ['array']}) + def _func_map(self, expref, arg): + result = [] + for element in arg: + result.append(expref.visit(expref.expression, element)) + return result + + @signature({"types": ['array-number', 'array-string']}) + def _func_max(self, arg): + if arg: + return max(arg) + else: + return None + + @signature({"types": ["object"], "variadic": True}) + def _func_merge(self, *arguments): + merged = {} + for arg in arguments: + merged.update(arg) + return merged + + @signature({"types": ['array-number', 'array-string']}) + def _func_min(self, arg): + if arg: + return min(arg) + else: + return None + + @signature({"types": ['array-string', 'array-number']}) + def _func_sort(self, arg): + return list(sorted(arg)) + + @signature({"types": ['array-number']}) + def _func_sum(self, arg): + return sum(arg) + + @signature({"types": ['object']}) + def _func_keys(self, arg): + # To be consistent with .values() + # should we also return the indices of a list? + return list(arg.keys()) + + @signature({"types": ['object']}) + def _func_values(self, arg): + return list(arg.values()) + + @signature({'types': []}) + def _func_type(self, arg): + if isinstance(arg, STRING_TYPE): + return "string" + elif isinstance(arg, bool): + return "boolean" + elif isinstance(arg, list): + return "array" + elif isinstance(arg, dict): + return "object" + elif isinstance(arg, (float, int)): + return "number" + elif arg is None: + return "null" + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_sort_by(self, array, expref): + if not array: + return array + # sort_by allows for the expref to be either a number of + # a string, so we have some special logic to handle this. + # We evaluate the first array element and verify that it's + # either a string of a number. We then create a key function + # that validates that type, which requires that remaining array + # elements resolve to the same type as the first element. + required_type = self._convert_to_jmespath_type( + type(expref.visit(expref.expression, array[0])).__name__) + if required_type not in ['number', 'string']: + raise exceptions.JMESPathTypeError( + 'sort_by', array[0], required_type, ['string', 'number']) + keyfunc = self._create_key_func(expref, + [required_type], + 'sort_by') + return list(sorted(array, key=keyfunc)) + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_min_by(self, array, expref): + keyfunc = self._create_key_func(expref, + ['number', 'string'], + 'min_by') + if array: + return min(array, key=keyfunc) + else: + return None + + @signature({'types': ['array']}, {'types': ['expref']}) + def _func_max_by(self, array, expref): + keyfunc = self._create_key_func(expref, + ['number', 'string'], + 'max_by') + if array: + return max(array, key=keyfunc) + else: + return None + + def _create_key_func(self, expref, allowed_types, function_name): + def keyfunc(x): + result = expref.visit(expref.expression, x) + actual_typename = type(result).__name__ + jmespath_type = self._convert_to_jmespath_type(actual_typename) + # allowed_types is in term of jmespath types, not python types. + if jmespath_type not in allowed_types: + raise exceptions.JMESPathTypeError( + function_name, result, jmespath_type, allowed_types) + return result + return keyfunc + + def _convert_to_jmespath_type(self, pyobject): + return TYPES_MAP.get(pyobject, 'unknown') diff --git a/.venv/lib/python3.12/site-packages/jmespath/lexer.py b/.venv/lib/python3.12/site-packages/jmespath/lexer.py new file mode 100644 index 00000000..8db05e37 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/lexer.py @@ -0,0 +1,208 @@ +import string +import warnings +from json import loads + +from jmespath.exceptions import LexerError, EmptyExpressionError + + +class Lexer(object): + START_IDENTIFIER = set(string.ascii_letters + '_') + VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_') + VALID_NUMBER = set(string.digits) + WHITESPACE = set(" \t\n\r") + SIMPLE_TOKENS = { + '.': 'dot', + '*': 'star', + ']': 'rbracket', + ',': 'comma', + ':': 'colon', + '@': 'current', + '(': 'lparen', + ')': 'rparen', + '{': 'lbrace', + '}': 'rbrace', + } + + def tokenize(self, expression): + self._initialize_for_expression(expression) + while self._current is not None: + if self._current in self.SIMPLE_TOKENS: + yield {'type': self.SIMPLE_TOKENS[self._current], + 'value': self._current, + 'start': self._position, 'end': self._position + 1} + self._next() + elif self._current in self.START_IDENTIFIER: + start = self._position + buff = self._current + while self._next() in self.VALID_IDENTIFIER: + buff += self._current + yield {'type': 'unquoted_identifier', 'value': buff, + 'start': start, 'end': start + len(buff)} + elif self._current in self.WHITESPACE: + self._next() + elif self._current == '[': + start = self._position + next_char = self._next() + if next_char == ']': + self._next() + yield {'type': 'flatten', 'value': '[]', + 'start': start, 'end': start + 2} + elif next_char == '?': + self._next() + yield {'type': 'filter', 'value': '[?', + 'start': start, 'end': start + 2} + else: + yield {'type': 'lbracket', 'value': '[', + 'start': start, 'end': start + 1} + elif self._current == "'": + yield self._consume_raw_string_literal() + elif self._current == '|': + yield self._match_or_else('|', 'or', 'pipe') + elif self._current == '&': + yield self._match_or_else('&', 'and', 'expref') + elif self._current == '`': + yield self._consume_literal() + elif self._current in self.VALID_NUMBER: + start = self._position + buff = self._consume_number() + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + elif self._current == '-': + # Negative number. + start = self._position + buff = self._consume_number() + if len(buff) > 1: + yield {'type': 'number', 'value': int(buff), + 'start': start, 'end': start + len(buff)} + else: + raise LexerError(lexer_position=start, + lexer_value=buff, + message="Unknown token '%s'" % buff) + elif self._current == '"': + yield self._consume_quoted_identifier() + elif self._current == '<': + yield self._match_or_else('=', 'lte', 'lt') + elif self._current == '>': + yield self._match_or_else('=', 'gte', 'gt') + elif self._current == '!': + yield self._match_or_else('=', 'ne', 'not') + elif self._current == '=': + if self._next() == '=': + yield {'type': 'eq', 'value': '==', + 'start': self._position - 1, 'end': self._position} + self._next() + else: + if self._current is None: + # If we're at the EOF, we never advanced + # the position so we don't need to rewind + # it back one location. + position = self._position + else: + position = self._position - 1 + raise LexerError( + lexer_position=position, + lexer_value='=', + message="Unknown token '='") + else: + raise LexerError(lexer_position=self._position, + lexer_value=self._current, + message="Unknown token %s" % self._current) + yield {'type': 'eof', 'value': '', + 'start': self._length, 'end': self._length} + + def _consume_number(self): + start = self._position + buff = self._current + while self._next() in self.VALID_NUMBER: + buff += self._current + return buff + + def _initialize_for_expression(self, expression): + if not expression: + raise EmptyExpressionError() + self._position = 0 + self._expression = expression + self._chars = list(self._expression) + self._current = self._chars[self._position] + self._length = len(self._expression) + + def _next(self): + if self._position == self._length - 1: + self._current = None + else: + self._position += 1 + self._current = self._chars[self._position] + return self._current + + def _consume_until(self, delimiter): + # Consume until the delimiter is reached, + # allowing for the delimiter to be escaped with "\". + start = self._position + buff = '' + self._next() + while self._current != delimiter: + if self._current == '\\': + buff += '\\' + self._next() + if self._current is None: + # We're at the EOF. + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Unclosed %s delimiter" % delimiter) + buff += self._current + self._next() + # Skip the closing delimiter. + self._next() + return buff + + def _consume_literal(self): + start = self._position + lexeme = self._consume_until('`').replace('\\`', '`') + try: + # Assume it is valid JSON and attempt to parse. + parsed_json = loads(lexeme) + except ValueError: + try: + # Invalid JSON values should be converted to quoted + # JSON strings during the JEP-12 deprecation period. + parsed_json = loads('"%s"' % lexeme.lstrip()) + warnings.warn("deprecated string literal syntax", + PendingDeprecationWarning) + except ValueError: + raise LexerError(lexer_position=start, + lexer_value=self._expression[start:], + message="Bad token %s" % lexeme) + token_len = self._position - start + return {'type': 'literal', 'value': parsed_json, + 'start': start, 'end': token_len} + + def _consume_quoted_identifier(self): + start = self._position + lexeme = '"' + self._consume_until('"') + '"' + try: + token_len = self._position - start + return {'type': 'quoted_identifier', 'value': loads(lexeme), + 'start': start, 'end': token_len} + except ValueError as e: + error_message = str(e).split(':')[0] + raise LexerError(lexer_position=start, + lexer_value=lexeme, + message=error_message) + + def _consume_raw_string_literal(self): + start = self._position + lexeme = self._consume_until("'").replace("\\'", "'") + token_len = self._position - start + return {'type': 'literal', 'value': lexeme, + 'start': start, 'end': token_len} + + def _match_or_else(self, expected, match_type, else_type): + start = self._position + current = self._current + next_char = self._next() + if next_char == expected: + self._next() + return {'type': match_type, 'value': current + next_char, + 'start': start, 'end': start + 1} + return {'type': else_type, 'value': current, + 'start': start, 'end': start} diff --git a/.venv/lib/python3.12/site-packages/jmespath/parser.py b/.venv/lib/python3.12/site-packages/jmespath/parser.py new file mode 100644 index 00000000..47066880 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/parser.py @@ -0,0 +1,527 @@ +"""Top down operator precedence parser. + +This is an implementation of Vaughan R. Pratt's +"Top Down Operator Precedence" parser. +(http://dl.acm.org/citation.cfm?doid=512927.512931). + +These are some additional resources that help explain the +general idea behind a Pratt parser: + +* http://effbot.org/zone/simple-top-down-parsing.htm +* http://javascript.crockford.com/tdop/tdop.html + +A few notes on the implementation. + +* All the nud/led tokens are on the Parser class itself, and are dispatched + using getattr(). This keeps all the parsing logic contained to a single + class. +* We use two passes through the data. One to create a list of token, + then one pass through the tokens to create the AST. While the lexer actually + yields tokens, we convert it to a list so we can easily implement two tokens + of lookahead. A previous implementation used a fixed circular buffer, but it + was significantly slower. Also, the average jmespath expression typically + does not have a large amount of token so this is not an issue. And + interestingly enough, creating a token list first is actually faster than + consuming from the token iterator one token at a time. + +""" +import random + +from jmespath import lexer +from jmespath.compat import with_repr_method +from jmespath import ast +from jmespath import exceptions +from jmespath import visitor + + +class Parser(object): + BINDING_POWER = { + 'eof': 0, + 'unquoted_identifier': 0, + 'quoted_identifier': 0, + 'literal': 0, + 'rbracket': 0, + 'rparen': 0, + 'comma': 0, + 'rbrace': 0, + 'number': 0, + 'current': 0, + 'expref': 0, + 'colon': 0, + 'pipe': 1, + 'or': 2, + 'and': 3, + 'eq': 5, + 'gt': 5, + 'lt': 5, + 'gte': 5, + 'lte': 5, + 'ne': 5, + 'flatten': 9, + # Everything above stops a projection. + 'star': 20, + 'filter': 21, + 'dot': 40, + 'not': 45, + 'lbrace': 50, + 'lbracket': 55, + 'lparen': 60, + } + # The maximum binding power for a token that can stop + # a projection. + _PROJECTION_STOP = 10 + # The _MAX_SIZE most recent expressions are cached in + # _CACHE dict. + _CACHE = {} + _MAX_SIZE = 128 + + def __init__(self, lookahead=2): + self.tokenizer = None + self._tokens = [None] * lookahead + self._buffer_size = lookahead + self._index = 0 + + def parse(self, expression): + cached = self._CACHE.get(expression) + if cached is not None: + return cached + parsed_result = self._do_parse(expression) + self._CACHE[expression] = parsed_result + if len(self._CACHE) > self._MAX_SIZE: + self._free_cache_entries() + return parsed_result + + def _do_parse(self, expression): + try: + return self._parse(expression) + except exceptions.LexerError as e: + e.expression = expression + raise + except exceptions.IncompleteExpressionError as e: + e.set_expression(expression) + raise + except exceptions.ParseError as e: + e.expression = expression + raise + + def _parse(self, expression): + self.tokenizer = lexer.Lexer().tokenize(expression) + self._tokens = list(self.tokenizer) + self._index = 0 + parsed = self._expression(binding_power=0) + if not self._current_token() == 'eof': + t = self._lookahead_token(0) + raise exceptions.ParseError(t['start'], t['value'], t['type'], + "Unexpected token: %s" % t['value']) + return ParsedResult(expression, parsed) + + def _expression(self, binding_power=0): + left_token = self._lookahead_token(0) + self._advance() + nud_function = getattr( + self, '_token_nud_%s' % left_token['type'], + self._error_nud_token) + left = nud_function(left_token) + current_token = self._current_token() + while binding_power < self.BINDING_POWER[current_token]: + led = getattr(self, '_token_led_%s' % current_token, None) + if led is None: + error_token = self._lookahead_token(0) + self._error_led_token(error_token) + else: + self._advance() + left = led(left) + current_token = self._current_token() + return left + + def _token_nud_literal(self, token): + return ast.literal(token['value']) + + def _token_nud_unquoted_identifier(self, token): + return ast.field(token['value']) + + def _token_nud_quoted_identifier(self, token): + field = ast.field(token['value']) + # You can't have a quoted identifier as a function + # name. + if self._current_token() == 'lparen': + t = self._lookahead_token(0) + raise exceptions.ParseError( + 0, t['value'], t['type'], + 'Quoted identifier not allowed for function names.') + return field + + def _token_nud_star(self, token): + left = ast.identity() + if self._current_token() == 'rbracket': + right = ast.identity() + else: + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.value_projection(left, right) + + def _token_nud_filter(self, token): + return self._token_led_filter(ast.identity()) + + def _token_nud_lbrace(self, token): + return self._parse_multi_select_hash() + + def _token_nud_lparen(self, token): + expression = self._expression() + self._match('rparen') + return expression + + def _token_nud_flatten(self, token): + left = ast.flatten(ast.identity()) + right = self._parse_projection_rhs( + self.BINDING_POWER['flatten']) + return ast.projection(left, right) + + def _token_nud_not(self, token): + expr = self._expression(self.BINDING_POWER['not']) + return ast.not_expression(expr) + + def _token_nud_lbracket(self, token): + if self._current_token() in ['number', 'colon']: + right = self._parse_index_expression() + # We could optimize this and remove the identity() node. + # We don't really need an index_expression node, we can + # just use emit an index node here if we're not dealing + # with a slice. + return self._project_if_slice(ast.identity(), right) + elif self._current_token() == 'star' and \ + self._lookahead(1) == 'rbracket': + self._advance() + self._advance() + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.projection(ast.identity(), right) + else: + return self._parse_multi_select_list() + + def _parse_index_expression(self): + # We're here: + # [<current> + # ^ + # | current token + if (self._lookahead(0) == 'colon' or + self._lookahead(1) == 'colon'): + return self._parse_slice_expression() + else: + # Parse the syntax [number] + node = ast.index(self._lookahead_token(0)['value']) + self._advance() + self._match('rbracket') + return node + + def _parse_slice_expression(self): + # [start:end:step] + # Where start, end, and step are optional. + # The last colon is optional as well. + parts = [None, None, None] + index = 0 + current_token = self._current_token() + while not current_token == 'rbracket' and index < 3: + if current_token == 'colon': + index += 1 + if index == 3: + self._raise_parse_error_for_token( + self._lookahead_token(0), 'syntax error') + self._advance() + elif current_token == 'number': + parts[index] = self._lookahead_token(0)['value'] + self._advance() + else: + self._raise_parse_error_for_token( + self._lookahead_token(0), 'syntax error') + current_token = self._current_token() + self._match('rbracket') + return ast.slice(*parts) + + def _token_nud_current(self, token): + return ast.current_node() + + def _token_nud_expref(self, token): + expression = self._expression(self.BINDING_POWER['expref']) + return ast.expref(expression) + + def _token_led_dot(self, left): + if not self._current_token() == 'star': + right = self._parse_dot_rhs(self.BINDING_POWER['dot']) + if left['type'] == 'subexpression': + left['children'].append(right) + return left + else: + return ast.subexpression([left, right]) + else: + # We're creating a projection. + self._advance() + right = self._parse_projection_rhs( + self.BINDING_POWER['dot']) + return ast.value_projection(left, right) + + def _token_led_pipe(self, left): + right = self._expression(self.BINDING_POWER['pipe']) + return ast.pipe(left, right) + + def _token_led_or(self, left): + right = self._expression(self.BINDING_POWER['or']) + return ast.or_expression(left, right) + + def _token_led_and(self, left): + right = self._expression(self.BINDING_POWER['and']) + return ast.and_expression(left, right) + + def _token_led_lparen(self, left): + if left['type'] != 'field': + # 0 - first func arg or closing paren. + # -1 - '(' token + # -2 - invalid function "name". + prev_t = self._lookahead_token(-2) + raise exceptions.ParseError( + prev_t['start'], prev_t['value'], prev_t['type'], + "Invalid function name '%s'" % prev_t['value']) + name = left['value'] + args = [] + while not self._current_token() == 'rparen': + expression = self._expression() + if self._current_token() == 'comma': + self._match('comma') + args.append(expression) + self._match('rparen') + function_node = ast.function_expression(name, args) + return function_node + + def _token_led_filter(self, left): + # Filters are projections. + condition = self._expression(0) + self._match('rbracket') + if self._current_token() == 'flatten': + right = ast.identity() + else: + right = self._parse_projection_rhs(self.BINDING_POWER['filter']) + return ast.filter_projection(left, right, condition) + + def _token_led_eq(self, left): + return self._parse_comparator(left, 'eq') + + def _token_led_ne(self, left): + return self._parse_comparator(left, 'ne') + + def _token_led_gt(self, left): + return self._parse_comparator(left, 'gt') + + def _token_led_gte(self, left): + return self._parse_comparator(left, 'gte') + + def _token_led_lt(self, left): + return self._parse_comparator(left, 'lt') + + def _token_led_lte(self, left): + return self._parse_comparator(left, 'lte') + + def _token_led_flatten(self, left): + left = ast.flatten(left) + right = self._parse_projection_rhs( + self.BINDING_POWER['flatten']) + return ast.projection(left, right) + + def _token_led_lbracket(self, left): + token = self._lookahead_token(0) + if token['type'] in ['number', 'colon']: + right = self._parse_index_expression() + if left['type'] == 'index_expression': + # Optimization: if the left node is an index expr, + # we can avoid creating another node and instead just add + # the right node as a child of the left. + left['children'].append(right) + return left + else: + return self._project_if_slice(left, right) + else: + # We have a projection + self._match('star') + self._match('rbracket') + right = self._parse_projection_rhs(self.BINDING_POWER['star']) + return ast.projection(left, right) + + def _project_if_slice(self, left, right): + index_expr = ast.index_expression([left, right]) + if right['type'] == 'slice': + return ast.projection( + index_expr, + self._parse_projection_rhs(self.BINDING_POWER['star'])) + else: + return index_expr + + def _parse_comparator(self, left, comparator): + right = self._expression(self.BINDING_POWER[comparator]) + return ast.comparator(comparator, left, right) + + def _parse_multi_select_list(self): + expressions = [] + while True: + expression = self._expression() + expressions.append(expression) + if self._current_token() == 'rbracket': + break + else: + self._match('comma') + self._match('rbracket') + return ast.multi_select_list(expressions) + + def _parse_multi_select_hash(self): + pairs = [] + while True: + key_token = self._lookahead_token(0) + # Before getting the token value, verify it's + # an identifier. + self._match_multiple_tokens( + token_types=['quoted_identifier', 'unquoted_identifier']) + key_name = key_token['value'] + self._match('colon') + value = self._expression(0) + node = ast.key_val_pair(key_name=key_name, node=value) + pairs.append(node) + if self._current_token() == 'comma': + self._match('comma') + elif self._current_token() == 'rbrace': + self._match('rbrace') + break + return ast.multi_select_dict(nodes=pairs) + + def _parse_projection_rhs(self, binding_power): + # Parse the right hand side of the projection. + if self.BINDING_POWER[self._current_token()] < self._PROJECTION_STOP: + # BP of 10 are all the tokens that stop a projection. + right = ast.identity() + elif self._current_token() == 'lbracket': + right = self._expression(binding_power) + elif self._current_token() == 'filter': + right = self._expression(binding_power) + elif self._current_token() == 'dot': + self._match('dot') + right = self._parse_dot_rhs(binding_power) + else: + self._raise_parse_error_for_token(self._lookahead_token(0), + 'syntax error') + return right + + def _parse_dot_rhs(self, binding_power): + # From the grammar: + # expression '.' ( identifier / + # multi-select-list / + # multi-select-hash / + # function-expression / + # * + # In terms of tokens that means that after a '.', + # you can have: + lookahead = self._current_token() + # Common case "foo.bar", so first check for an identifier. + if lookahead in ['quoted_identifier', 'unquoted_identifier', 'star']: + return self._expression(binding_power) + elif lookahead == 'lbracket': + self._match('lbracket') + return self._parse_multi_select_list() + elif lookahead == 'lbrace': + self._match('lbrace') + return self._parse_multi_select_hash() + else: + t = self._lookahead_token(0) + allowed = ['quoted_identifier', 'unquoted_identifier', + 'lbracket', 'lbrace'] + msg = ( + "Expecting: %s, got: %s" % (allowed, t['type']) + ) + self._raise_parse_error_for_token(t, msg) + + def _error_nud_token(self, token): + if token['type'] == 'eof': + raise exceptions.IncompleteExpressionError( + token['start'], token['value'], token['type']) + self._raise_parse_error_for_token(token, 'invalid token') + + def _error_led_token(self, token): + self._raise_parse_error_for_token(token, 'invalid token') + + def _match(self, token_type=None): + # inline'd self._current_token() + if self._current_token() == token_type: + # inline'd self._advance() + self._advance() + else: + self._raise_parse_error_maybe_eof( + token_type, self._lookahead_token(0)) + + def _match_multiple_tokens(self, token_types): + if self._current_token() not in token_types: + self._raise_parse_error_maybe_eof( + token_types, self._lookahead_token(0)) + self._advance() + + def _advance(self): + self._index += 1 + + def _current_token(self): + return self._tokens[self._index]['type'] + + def _lookahead(self, number): + return self._tokens[self._index + number]['type'] + + def _lookahead_token(self, number): + return self._tokens[self._index + number] + + def _raise_parse_error_for_token(self, token, reason): + lex_position = token['start'] + actual_value = token['value'] + actual_type = token['type'] + raise exceptions.ParseError(lex_position, actual_value, + actual_type, reason) + + def _raise_parse_error_maybe_eof(self, expected_type, token): + lex_position = token['start'] + actual_value = token['value'] + actual_type = token['type'] + if actual_type == 'eof': + raise exceptions.IncompleteExpressionError( + lex_position, actual_value, actual_type) + message = 'Expecting: %s, got: %s' % (expected_type, + actual_type) + raise exceptions.ParseError( + lex_position, actual_value, actual_type, message) + + def _free_cache_entries(self): + for key in random.sample(list(self._CACHE.keys()), int(self._MAX_SIZE / 2)): + self._CACHE.pop(key, None) + + @classmethod + def purge(cls): + """Clear the expression compilation cache.""" + cls._CACHE.clear() + + +@with_repr_method +class ParsedResult(object): + def __init__(self, expression, parsed): + self.expression = expression + self.parsed = parsed + + def search(self, value, options=None): + interpreter = visitor.TreeInterpreter(options) + result = interpreter.visit(self.parsed, value) + return result + + def _render_dot_file(self): + """Render the parsed AST as a dot file. + + Note that this is marked as an internal method because + the AST is an implementation detail and is subject + to change. This method can be used to help troubleshoot + or for development purposes, but is not considered part + of the public supported API. Use at your own risk. + + """ + renderer = visitor.GraphvizVisitor() + contents = renderer.visit(self.parsed) + return contents + + def __repr__(self): + return repr(self.parsed) diff --git a/.venv/lib/python3.12/site-packages/jmespath/visitor.py b/.venv/lib/python3.12/site-packages/jmespath/visitor.py new file mode 100644 index 00000000..15fb1774 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/jmespath/visitor.py @@ -0,0 +1,328 @@ +import operator + +from jmespath import functions +from jmespath.compat import string_type +from numbers import Number + + +def _equals(x, y): + if _is_special_number_case(x, y): + return False + else: + return x == y + + +def _is_special_number_case(x, y): + # We need to special case comparing 0 or 1 to + # True/False. While normally comparing any + # integer other than 0/1 to True/False will always + # return False. However 0/1 have this: + # >>> 0 == True + # False + # >>> 0 == False + # True + # >>> 1 == True + # True + # >>> 1 == False + # False + # + # Also need to consider that: + # >>> 0 in [True, False] + # True + if _is_actual_number(x) and x in (0, 1): + return isinstance(y, bool) + elif _is_actual_number(y) and y in (0, 1): + return isinstance(x, bool) + + +def _is_comparable(x): + # The spec doesn't officially support string types yet, + # but enough people are relying on this behavior that + # it's been added back. This should eventually become + # part of the official spec. + return _is_actual_number(x) or isinstance(x, string_type) + + +def _is_actual_number(x): + # We need to handle python's quirkiness with booleans, + # specifically: + # + # >>> isinstance(False, int) + # True + # >>> isinstance(True, int) + # True + if isinstance(x, bool): + return False + return isinstance(x, Number) + + +class Options(object): + """Options to control how a JMESPath function is evaluated.""" + def __init__(self, dict_cls=None, custom_functions=None): + #: The class to use when creating a dict. The interpreter + # may create dictionaries during the evaluation of a JMESPath + # expression. For example, a multi-select hash will + # create a dictionary. By default we use a dict() type. + # You can set this value to change what dict type is used. + # The most common reason you would change this is if you + # want to set a collections.OrderedDict so that you can + # have predictable key ordering. + self.dict_cls = dict_cls + self.custom_functions = custom_functions + + +class _Expression(object): + def __init__(self, expression, interpreter): + self.expression = expression + self.interpreter = interpreter + + def visit(self, node, *args, **kwargs): + return self.interpreter.visit(node, *args, **kwargs) + + +class Visitor(object): + def __init__(self): + self._method_cache = {} + + def visit(self, node, *args, **kwargs): + node_type = node['type'] + method = self._method_cache.get(node_type) + if method is None: + method = getattr( + self, 'visit_%s' % node['type'], self.default_visit) + self._method_cache[node_type] = method + return method(node, *args, **kwargs) + + def default_visit(self, node, *args, **kwargs): + raise NotImplementedError("default_visit") + + +class TreeInterpreter(Visitor): + COMPARATOR_FUNC = { + 'eq': _equals, + 'ne': lambda x, y: not _equals(x, y), + 'lt': operator.lt, + 'gt': operator.gt, + 'lte': operator.le, + 'gte': operator.ge + } + _EQUALITY_OPS = ['eq', 'ne'] + MAP_TYPE = dict + + def __init__(self, options=None): + super(TreeInterpreter, self).__init__() + self._dict_cls = self.MAP_TYPE + if options is None: + options = Options() + self._options = options + if options.dict_cls is not None: + self._dict_cls = self._options.dict_cls + if options.custom_functions is not None: + self._functions = self._options.custom_functions + else: + self._functions = functions.Functions() + + def default_visit(self, node, *args, **kwargs): + raise NotImplementedError(node['type']) + + def visit_subexpression(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_field(self, node, value): + try: + return value.get(node['value']) + except AttributeError: + return None + + def visit_comparator(self, node, value): + # Common case: comparator is == or != + comparator_func = self.COMPARATOR_FUNC[node['value']] + if node['value'] in self._EQUALITY_OPS: + return comparator_func( + self.visit(node['children'][0], value), + self.visit(node['children'][1], value) + ) + else: + # Ordering operators are only valid for numbers. + # Evaluating any other type with a comparison operator + # will yield a None value. + left = self.visit(node['children'][0], value) + right = self.visit(node['children'][1], value) + num_types = (int, float) + if not (_is_comparable(left) and + _is_comparable(right)): + return None + return comparator_func(left, right) + + def visit_current(self, node, value): + return value + + def visit_expref(self, node, value): + return _Expression(node['children'][0], self) + + def visit_function_expression(self, node, value): + resolved_args = [] + for child in node['children']: + current = self.visit(child, value) + resolved_args.append(current) + return self._functions.call_function(node['value'], resolved_args) + + def visit_filter_projection(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + return None + comparator_node = node['children'][2] + collected = [] + for element in base: + if self._is_true(self.visit(comparator_node, element)): + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def visit_flatten(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + # Can't flatten the object if it's not a list. + return None + merged_list = [] + for element in base: + if isinstance(element, list): + merged_list.extend(element) + else: + merged_list.append(element) + return merged_list + + def visit_identity(self, node, value): + return value + + def visit_index(self, node, value): + # Even though we can index strings, we don't + # want to support that. + if not isinstance(value, list): + return None + try: + return value[node['value']] + except IndexError: + return None + + def visit_index_expression(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_slice(self, node, value): + if not isinstance(value, list): + return None + s = slice(*node['children']) + return value[s] + + def visit_key_val_pair(self, node, value): + return self.visit(node['children'][0], value) + + def visit_literal(self, node, value): + return node['value'] + + def visit_multi_select_dict(self, node, value): + if value is None: + return None + collected = self._dict_cls() + for child in node['children']: + collected[child['value']] = self.visit(child, value) + return collected + + def visit_multi_select_list(self, node, value): + if value is None: + return None + collected = [] + for child in node['children']: + collected.append(self.visit(child, value)) + return collected + + def visit_or_expression(self, node, value): + matched = self.visit(node['children'][0], value) + if self._is_false(matched): + matched = self.visit(node['children'][1], value) + return matched + + def visit_and_expression(self, node, value): + matched = self.visit(node['children'][0], value) + if self._is_false(matched): + return matched + return self.visit(node['children'][1], value) + + def visit_not_expression(self, node, value): + original_result = self.visit(node['children'][0], value) + if _is_actual_number(original_result) and original_result == 0: + # Special case for 0, !0 should be false, not true. + # 0 is not a special cased integer in jmespath. + return False + return not original_result + + def visit_pipe(self, node, value): + result = value + for node in node['children']: + result = self.visit(node, result) + return result + + def visit_projection(self, node, value): + base = self.visit(node['children'][0], value) + if not isinstance(base, list): + return None + collected = [] + for element in base: + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def visit_value_projection(self, node, value): + base = self.visit(node['children'][0], value) + try: + base = base.values() + except AttributeError: + return None + collected = [] + for element in base: + current = self.visit(node['children'][1], element) + if current is not None: + collected.append(current) + return collected + + def _is_false(self, value): + # This looks weird, but we're explicitly using equality checks + # because the truth/false values are different between + # python and jmespath. + return (value == '' or value == [] or value == {} or value is None or + value is False) + + def _is_true(self, value): + return not self._is_false(value) + + +class GraphvizVisitor(Visitor): + def __init__(self): + super(GraphvizVisitor, self).__init__() + self._lines = [] + self._count = 1 + + def visit(self, node, *args, **kwargs): + self._lines.append('digraph AST {') + current = '%s%s' % (node['type'], self._count) + self._count += 1 + self._visit(node, current) + self._lines.append('}') + return '\n'.join(self._lines) + + def _visit(self, node, current): + self._lines.append('%s [label="%s(%s)"]' % ( + current, node['type'], node.get('value', ''))) + for child in node.get('children', []): + child_name = '%s%s' % (child['type'], self._count) + self._count += 1 + self._lines.append(' %s -> %s' % (current, child_name)) + self._visit(child, child_name) |