import logging from ast import literal_eval from functools import lru_cache logger = logging.getLogger(__name__) GETATTR = 'GETATTR' GET = 'GET' class PathExtractionError(ValueError): pass class RootCanNotBeModified(ValueError): pass def _add_to_elements(elements, elem, inside): # Ignore private items if not elem: return if not elem.startswith('__'): remove_quotes = False if '𝆺𝅥𝅯' in elem or '\\' in elem: remove_quotes = True else: try: elem = literal_eval(elem) remove_quotes = False except (ValueError, SyntaxError): remove_quotes = True if remove_quotes and elem[0] == elem[-1] and elem[0] in {'"', "'"}: elem = elem[1: -1] action = GETATTR if inside == '.' else GET elements.append((elem, action)) DEFAULT_FIRST_ELEMENT = ('root', GETATTR) @lru_cache(maxsize=1024 * 128) def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT): """ Given a path, it extracts the elements that form the path and their relevant most likely retrieval action. >>> from deepdiff import _path_to_elements >>> path = "root[4.3].b['a3']" >>> _path_to_elements(path, root_element=None) [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')] """ if isinstance(path, (tuple, list)): return path elements = [] if root_element: elements.append(root_element) elem = '' inside = False prev_char = None path = path[4:] # removing "root from the beginning" brackets = [] inside_quotes = False quote_used = '' for char in path: if prev_char == '𝆺𝅥𝅯': elem += char elif char in {'"', "'"}: elem += char # If we are inside and the quote is not what we expected, the quote is not closing if not(inside_quotes and quote_used != char): inside_quotes = not inside_quotes if inside_quotes: quote_used = char else: _add_to_elements(elements, elem, inside) elem = '' quote_used = '' elif inside_quotes: elem += char elif char == '[': if inside == '.': _add_to_elements(elements, elem, inside) inside = '[' elem = '' # we are already inside. The bracket is a part of the word. elif inside == '[': elem += char else: inside = '[' brackets.append('[') elem = '' elif char == '.': if inside == '[': elem += char elif inside == '.': _add_to_elements(elements, elem, inside) elem = '' else: inside = '.' elem = '' elif char == ']': if brackets and brackets[-1] == '[': brackets.pop() if brackets: elem += char else: _add_to_elements(elements, elem, inside) elem = '' inside = False else: elem += char prev_char = char if elem: _add_to_elements(elements, elem, inside) return tuple(elements) def _get_nested_obj(obj, elements, next_element=None): for (elem, action) in elements: if action == GET: obj = obj[elem] elif action == GETATTR: obj = getattr(obj, elem) return obj def _guess_type(elements, elem, index, next_element): # If we are not at the last elements if index < len(elements) - 1: # We assume it is a nested dictionary not a nested list return {} if isinstance(next_element, int): return [] return {} def _get_nested_obj_and_force(obj, elements, next_element=None): prev_elem = None prev_action = None prev_obj = obj for index, (elem, action) in enumerate(elements): _prev_obj = obj if action == GET: try: obj = obj[elem] prev_obj = _prev_obj except KeyError: obj[elem] = _guess_type(elements, elem, index, next_element) obj = obj[elem] prev_obj = _prev_obj except IndexError: if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj): obj.extend([None] * (elem - len(obj))) obj.append(_guess_type(elements, elem, index), next_element) obj = obj[-1] prev_obj = _prev_obj elif isinstance(obj, list) and len(obj) == 0 and prev_elem: # We ran into an empty list that should have been a dictionary # We need to change it from an empty list to a dictionary obj = {elem: _guess_type(elements, elem, index, next_element)} if prev_action == GET: prev_obj[prev_elem] = obj else: setattr(prev_obj, prev_elem, obj) obj = obj[elem] elif action == GETATTR: obj = getattr(obj, elem) prev_obj = _prev_obj prev_elem = elem prev_action = action return obj def extract(obj, path): """ Get the item from obj based on path. Example: >>> from deepdiff import extract >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]} >>> path = "root[1][0]['2']" >>> extract(obj, path) 'b' Note that you can use extract in conjunction with DeepDiff results or even with the search and :ref:`deepsearch_label` modules. For example: >>> from deepdiff import grep >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]} >>> result = obj | grep(5) >>> result {'matched_values': ['root[2][1]']} >>> result['matched_values'][0] 'root[2][1]' >>> path = result['matched_values'][0] >>> extract(obj, path) 5 .. note:: Note that even if DeepDiff tried gives you a path to an item in a set, there is no such thing in Python and hence you will get an error trying to extract that item from a set. If you want to be able to get items from sets, use the SetOrdered module to generate the sets. In fact Deepdiff uses SetOrdered as a dependency. >>> from deepdiff import grep, extract >>> obj = {"a", "b"} >>> obj | grep("b") Set item detected in the path.'set' objects do NOT support indexing. But DeepSearch will still report a path. {'matched_values': SetOrdered(['root[0]'])} >>> extract(obj, 'root[0]') Traceback (most recent call last): File "", line 1, in File "deepdiff/deepdiff/path.py", line 126, in extract return _get_nested_obj(obj, elements) File "deepdiff/deepdiff/path.py", line 84, in _get_nested_obj obj = obj[elem] TypeError: 'set' object is not subscriptable >>> from orderly_set import SetOrdered >>> obj = SetOrdered(["a", "b"]) >>> extract(obj, 'root[0]') 'a' """ elements = _path_to_elements(path, root_element=None) return _get_nested_obj(obj, elements) def parse_path(path, root_element=DEFAULT_FIRST_ELEMENT, include_actions=False): """ Parse a path to a format that is machine readable **Parameters** path : A string The path string such as "root[1][2]['age']" root_element: string, default='root' What the root is called in the path. include_actions: boolean, default=False If True, we return the action required to retrieve the item at each element of the path. **Examples** >>> from deepdiff import parse_path >>> parse_path("root[1][2]['age']") [1, 2, 'age'] >>> parse_path("root[1][2]['age']", include_actions=True) [{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}] >>> >>> parse_path("root['joe'].age") ['joe', 'age'] >>> parse_path("root['joe'].age", include_actions=True) [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}] """ result = _path_to_elements(path, root_element=root_element) result = iter(result) if root_element: next(result) # We don't want the root item if include_actions is False: return [i[0] for i in result] return [{'element': i[0], 'action': i[1]} for i in result] def stringify_element(param, quote_str=None): has_quote = "'" in param has_double_quote = '"' in param if has_quote and has_double_quote and not quote_str: new_param = [] for char in param: if char in {'"', "'"}: new_param.append('𝆺𝅥𝅯') new_param.append(char) result = '"' + ''.join(new_param) + '"' elif has_quote: result = f'"{param}"' elif has_double_quote: result = f"'{param}'" else: result = param if quote_str is None else quote_str.format(param) return result def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"): """ Gets the path as an string. For example [1, 2, 'age'] should become root[1][2]['age'] """ if not path: return root_element[0] result = [root_element[0]] has_actions = False try: if path[0][1] in {GET, GETATTR}: has_actions = True except (KeyError, IndexError, TypeError): pass if not has_actions: path = [(i, GET) for i in path] path[0] = (path[0][0], root_element[1]) # The action for the first element might be a GET or GETATTR. We update the action based on the root_element. for element, action in path: if isinstance(element, str) and action == GET: element = stringify_element(element, quote_str) if action == GET: result.append(f"[{element}]") else: result.append(f".{element}") return ''.join(result)