import logging
from ast import literal_eval
from functools import lru_cache
logger = logging.getLogger(__name__)
GETATTR = 'GETATTR'
GET = 'GET'
class PathExtractionError(ValueError):
pass
class RootCanNotBeModified(ValueError):
pass
def _add_to_elements(elements, elem, inside):
# Ignore private items
if not elem:
return
if not elem.startswith('__'):
remove_quotes = False
if '𝆺𝅥𝅯' in elem or '\\' in elem:
remove_quotes = True
else:
try:
elem = literal_eval(elem)
remove_quotes = False
except (ValueError, SyntaxError):
remove_quotes = True
if remove_quotes and elem[0] == elem[-1] and elem[0] in {'"', "'"}:
elem = elem[1: -1]
action = GETATTR if inside == '.' else GET
elements.append((elem, action))
DEFAULT_FIRST_ELEMENT = ('root', GETATTR)
@lru_cache(maxsize=1024 * 128)
def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT):
"""
Given a path, it extracts the elements that form the path and their relevant most likely retrieval action.
>>> from deepdiff import _path_to_elements
>>> path = "root[4.3].b['a3']"
>>> _path_to_elements(path, root_element=None)
[(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')]
"""
if isinstance(path, (tuple, list)):
return path
elements = []
if root_element:
elements.append(root_element)
elem = ''
inside = False
prev_char = None
path = path[4:] # removing "root from the beginning"
brackets = []
inside_quotes = False
quote_used = ''
for char in path:
if prev_char == '𝆺𝅥𝅯':
elem += char
elif char in {'"', "'"}:
elem += char
# If we are inside and the quote is not what we expected, the quote is not closing
if not(inside_quotes and quote_used != char):
inside_quotes = not inside_quotes
if inside_quotes:
quote_used = char
else:
_add_to_elements(elements, elem, inside)
elem = ''
quote_used = ''
elif inside_quotes:
elem += char
elif char == '[':
if inside == '.':
_add_to_elements(elements, elem, inside)
inside = '['
elem = ''
# we are already inside. The bracket is a part of the word.
elif inside == '[':
elem += char
else:
inside = '['
brackets.append('[')
elem = ''
elif char == '.':
if inside == '[':
elem += char
elif inside == '.':
_add_to_elements(elements, elem, inside)
elem = ''
else:
inside = '.'
elem = ''
elif char == ']':
if brackets and brackets[-1] == '[':
brackets.pop()
if brackets:
elem += char
else:
_add_to_elements(elements, elem, inside)
elem = ''
inside = False
else:
elem += char
prev_char = char
if elem:
_add_to_elements(elements, elem, inside)
return tuple(elements)
def _get_nested_obj(obj, elements, next_element=None):
for (elem, action) in elements:
if action == GET:
obj = obj[elem]
elif action == GETATTR:
obj = getattr(obj, elem)
return obj
def _guess_type(elements, elem, index, next_element):
# If we are not at the last elements
if index < len(elements) - 1:
# We assume it is a nested dictionary not a nested list
return {}
if isinstance(next_element, int):
return []
return {}
def _get_nested_obj_and_force(obj, elements, next_element=None):
prev_elem = None
prev_action = None
prev_obj = obj
for index, (elem, action) in enumerate(elements):
_prev_obj = obj
if action == GET:
try:
obj = obj[elem]
prev_obj = _prev_obj
except KeyError:
obj[elem] = _guess_type(elements, elem, index, next_element)
obj = obj[elem]
prev_obj = _prev_obj
except IndexError:
if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj):
obj.extend([None] * (elem - len(obj)))
obj.append(_guess_type(elements, elem, index), next_element)
obj = obj[-1]
prev_obj = _prev_obj
elif isinstance(obj, list) and len(obj) == 0 and prev_elem:
# We ran into an empty list that should have been a dictionary
# We need to change it from an empty list to a dictionary
obj = {elem: _guess_type(elements, elem, index, next_element)}
if prev_action == GET:
prev_obj[prev_elem] = obj
else:
setattr(prev_obj, prev_elem, obj)
obj = obj[elem]
elif action == GETATTR:
obj = getattr(obj, elem)
prev_obj = _prev_obj
prev_elem = elem
prev_action = action
return obj
def extract(obj, path):
"""
Get the item from obj based on path.
Example:
>>> from deepdiff import extract
>>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]}
>>> path = "root[1][0]['2']"
>>> extract(obj, path)
'b'
Note that you can use extract in conjunction with DeepDiff results
or even with the search and :ref:`deepsearch_label` modules. For example:
>>> from deepdiff import grep
>>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]}
>>> result = obj | grep(5)
>>> result
{'matched_values': ['root[2][1]']}
>>> result['matched_values'][0]
'root[2][1]'
>>> path = result['matched_values'][0]
>>> extract(obj, path)
5
.. note::
Note that even if DeepDiff tried gives you a path to an item in a set,
there is no such thing in Python and hence you will get an error trying
to extract that item from a set.
If you want to be able to get items from sets, use the SetOrdered module
to generate the sets.
In fact Deepdiff uses SetOrdered as a dependency.
>>> from deepdiff import grep, extract
>>> obj = {"a", "b"}
>>> obj | grep("b")
Set item detected in the path.'set' objects do NOT support indexing. But DeepSearch will still report a path.
{'matched_values': SetOrdered(['root[0]'])}
>>> extract(obj, 'root[0]')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "deepdiff/deepdiff/path.py", line 126, in extract
return _get_nested_obj(obj, elements)
File "deepdiff/deepdiff/path.py", line 84, in _get_nested_obj
obj = obj[elem]
TypeError: 'set' object is not subscriptable
>>> from orderly_set import SetOrdered
>>> obj = SetOrdered(["a", "b"])
>>> extract(obj, 'root[0]')
'a'
"""
elements = _path_to_elements(path, root_element=None)
return _get_nested_obj(obj, elements)
def parse_path(path, root_element=DEFAULT_FIRST_ELEMENT, include_actions=False):
"""
Parse a path to a format that is machine readable
**Parameters**
path : A string
The path string such as "root[1][2]['age']"
root_element: string, default='root'
What the root is called in the path.
include_actions: boolean, default=False
If True, we return the action required to retrieve the item at each element of the path.
**Examples**
>>> from deepdiff import parse_path
>>> parse_path("root[1][2]['age']")
[1, 2, 'age']
>>> parse_path("root[1][2]['age']", include_actions=True)
[{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}]
>>>
>>> parse_path("root['joe'].age")
['joe', 'age']
>>> parse_path("root['joe'].age", include_actions=True)
[{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}]
"""
result = _path_to_elements(path, root_element=root_element)
result = iter(result)
if root_element:
next(result) # We don't want the root item
if include_actions is False:
return [i[0] for i in result]
return [{'element': i[0], 'action': i[1]} for i in result]
def stringify_element(param, quote_str=None):
has_quote = "'" in param
has_double_quote = '"' in param
if has_quote and has_double_quote and not quote_str:
new_param = []
for char in param:
if char in {'"', "'"}:
new_param.append('𝆺𝅥𝅯')
new_param.append(char)
result = '"' + ''.join(new_param) + '"'
elif has_quote:
result = f'"{param}"'
elif has_double_quote:
result = f"'{param}'"
else:
result = param if quote_str is None else quote_str.format(param)
return result
def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"):
"""
Gets the path as an string.
For example [1, 2, 'age'] should become
root[1][2]['age']
"""
if not path:
return root_element[0]
result = [root_element[0]]
has_actions = False
try:
if path[0][1] in {GET, GETATTR}:
has_actions = True
except (KeyError, IndexError, TypeError):
pass
if not has_actions:
path = [(i, GET) for i in path]
path[0] = (path[0][0], root_element[1]) # The action for the first element might be a GET or GETATTR. We update the action based on the root_element.
for element, action in path:
if isinstance(element, str) and action == GET:
element = stringify_element(element, quote_str)
if action == GET:
result.append(f"[{element}]")
else:
result.append(f".{element}")
return ''.join(result)