1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
|
import logging
from ast import literal_eval
from functools import lru_cache
logger = logging.getLogger(__name__)
GETATTR = 'GETATTR'
GET = 'GET'
class PathExtractionError(ValueError):
pass
class RootCanNotBeModified(ValueError):
pass
def _add_to_elements(elements, elem, inside):
# Ignore private items
if not elem:
return
if not elem.startswith('__'):
remove_quotes = False
if '𝆺𝅥𝅯' in elem or '\\' in elem:
remove_quotes = True
else:
try:
elem = literal_eval(elem)
remove_quotes = False
except (ValueError, SyntaxError):
remove_quotes = True
if remove_quotes and elem[0] == elem[-1] and elem[0] in {'"', "'"}:
elem = elem[1: -1]
action = GETATTR if inside == '.' else GET
elements.append((elem, action))
DEFAULT_FIRST_ELEMENT = ('root', GETATTR)
@lru_cache(maxsize=1024 * 128)
def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT):
"""
Given a path, it extracts the elements that form the path and their relevant most likely retrieval action.
>>> from deepdiff import _path_to_elements
>>> path = "root[4.3].b['a3']"
>>> _path_to_elements(path, root_element=None)
[(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')]
"""
if isinstance(path, (tuple, list)):
return path
elements = []
if root_element:
elements.append(root_element)
elem = ''
inside = False
prev_char = None
path = path[4:] # removing "root from the beginning"
brackets = []
inside_quotes = False
quote_used = ''
for char in path:
if prev_char == '𝆺𝅥𝅯':
elem += char
elif char in {'"', "'"}:
elem += char
# If we are inside and the quote is not what we expected, the quote is not closing
if not(inside_quotes and quote_used != char):
inside_quotes = not inside_quotes
if inside_quotes:
quote_used = char
else:
_add_to_elements(elements, elem, inside)
elem = ''
quote_used = ''
elif inside_quotes:
elem += char
elif char == '[':
if inside == '.':
_add_to_elements(elements, elem, inside)
inside = '['
elem = ''
# we are already inside. The bracket is a part of the word.
elif inside == '[':
elem += char
else:
inside = '['
brackets.append('[')
elem = ''
elif char == '.':
if inside == '[':
elem += char
elif inside == '.':
_add_to_elements(elements, elem, inside)
elem = ''
else:
inside = '.'
elem = ''
elif char == ']':
if brackets and brackets[-1] == '[':
brackets.pop()
if brackets:
elem += char
else:
_add_to_elements(elements, elem, inside)
elem = ''
inside = False
else:
elem += char
prev_char = char
if elem:
_add_to_elements(elements, elem, inside)
return tuple(elements)
def _get_nested_obj(obj, elements, next_element=None):
for (elem, action) in elements:
if action == GET:
obj = obj[elem]
elif action == GETATTR:
obj = getattr(obj, elem)
return obj
def _guess_type(elements, elem, index, next_element):
# If we are not at the last elements
if index < len(elements) - 1:
# We assume it is a nested dictionary not a nested list
return {}
if isinstance(next_element, int):
return []
return {}
def _get_nested_obj_and_force(obj, elements, next_element=None):
prev_elem = None
prev_action = None
prev_obj = obj
for index, (elem, action) in enumerate(elements):
_prev_obj = obj
if action == GET:
try:
obj = obj[elem]
prev_obj = _prev_obj
except KeyError:
obj[elem] = _guess_type(elements, elem, index, next_element)
obj = obj[elem]
prev_obj = _prev_obj
except IndexError:
if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj):
obj.extend([None] * (elem - len(obj)))
obj.append(_guess_type(elements, elem, index), next_element)
obj = obj[-1]
prev_obj = _prev_obj
elif isinstance(obj, list) and len(obj) == 0 and prev_elem:
# We ran into an empty list that should have been a dictionary
# We need to change it from an empty list to a dictionary
obj = {elem: _guess_type(elements, elem, index, next_element)}
if prev_action == GET:
prev_obj[prev_elem] = obj
else:
setattr(prev_obj, prev_elem, obj)
obj = obj[elem]
elif action == GETATTR:
obj = getattr(obj, elem)
prev_obj = _prev_obj
prev_elem = elem
prev_action = action
return obj
def extract(obj, path):
"""
Get the item from obj based on path.
Example:
>>> from deepdiff import extract
>>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]}
>>> path = "root[1][0]['2']"
>>> extract(obj, path)
'b'
Note that you can use extract in conjunction with DeepDiff results
or even with the search and :ref:`deepsearch_label` modules. For example:
>>> from deepdiff import grep
>>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]}
>>> result = obj | grep(5)
>>> result
{'matched_values': ['root[2][1]']}
>>> result['matched_values'][0]
'root[2][1]'
>>> path = result['matched_values'][0]
>>> extract(obj, path)
5
.. note::
Note that even if DeepDiff tried gives you a path to an item in a set,
there is no such thing in Python and hence you will get an error trying
to extract that item from a set.
If you want to be able to get items from sets, use the SetOrdered module
to generate the sets.
In fact Deepdiff uses SetOrdered as a dependency.
>>> from deepdiff import grep, extract
>>> obj = {"a", "b"}
>>> obj | grep("b")
Set item detected in the path.'set' objects do NOT support indexing. But DeepSearch will still report a path.
{'matched_values': SetOrdered(['root[0]'])}
>>> extract(obj, 'root[0]')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "deepdiff/deepdiff/path.py", line 126, in extract
return _get_nested_obj(obj, elements)
File "deepdiff/deepdiff/path.py", line 84, in _get_nested_obj
obj = obj[elem]
TypeError: 'set' object is not subscriptable
>>> from orderly_set import SetOrdered
>>> obj = SetOrdered(["a", "b"])
>>> extract(obj, 'root[0]')
'a'
"""
elements = _path_to_elements(path, root_element=None)
return _get_nested_obj(obj, elements)
def parse_path(path, root_element=DEFAULT_FIRST_ELEMENT, include_actions=False):
"""
Parse a path to a format that is machine readable
**Parameters**
path : A string
The path string such as "root[1][2]['age']"
root_element: string, default='root'
What the root is called in the path.
include_actions: boolean, default=False
If True, we return the action required to retrieve the item at each element of the path.
**Examples**
>>> from deepdiff import parse_path
>>> parse_path("root[1][2]['age']")
[1, 2, 'age']
>>> parse_path("root[1][2]['age']", include_actions=True)
[{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}]
>>>
>>> parse_path("root['joe'].age")
['joe', 'age']
>>> parse_path("root['joe'].age", include_actions=True)
[{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}]
"""
result = _path_to_elements(path, root_element=root_element)
result = iter(result)
if root_element:
next(result) # We don't want the root item
if include_actions is False:
return [i[0] for i in result]
return [{'element': i[0], 'action': i[1]} for i in result]
def stringify_element(param, quote_str=None):
has_quote = "'" in param
has_double_quote = '"' in param
if has_quote and has_double_quote and not quote_str:
new_param = []
for char in param:
if char in {'"', "'"}:
new_param.append('𝆺𝅥𝅯')
new_param.append(char)
result = '"' + ''.join(new_param) + '"'
elif has_quote:
result = f'"{param}"'
elif has_double_quote:
result = f"'{param}'"
else:
result = param if quote_str is None else quote_str.format(param)
return result
def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"):
"""
Gets the path as an string.
For example [1, 2, 'age'] should become
root[1][2]['age']
"""
if not path:
return root_element[0]
result = [root_element[0]]
has_actions = False
try:
if path[0][1] in {GET, GETATTR}:
has_actions = True
except (KeyError, IndexError, TypeError):
pass
if not has_actions:
path = [(i, GET) for i in path]
path[0] = (path[0][0], root_element[1]) # The action for the first element might be a GET or GETATTR. We update the action based on the root_element.
for element, action in path:
if isinstance(element, str) and action == GET:
element = stringify_element(element, quote_str)
if action == GET:
result.append(f"[{element}]")
else:
result.append(f".{element}")
return ''.join(result)
|