aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/deepdiff/path.py
blob: ee63b5b990917743f8eed6513f7b4d157453e598 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import logging
from ast import literal_eval
from functools import lru_cache

logger = logging.getLogger(__name__)

GETATTR = 'GETATTR'
GET = 'GET'


class PathExtractionError(ValueError):
    pass


class RootCanNotBeModified(ValueError):
    pass


def _add_to_elements(elements, elem, inside):
    # Ignore private items
    if not elem:
        return
    if not elem.startswith('__'):
        remove_quotes = False
        if '𝆺𝅥𝅯' in elem or '\\' in elem:
            remove_quotes = True
        else:
            try:
                elem = literal_eval(elem)
                remove_quotes = False
            except (ValueError, SyntaxError):
                remove_quotes = True
        if remove_quotes and elem[0] == elem[-1] and elem[0] in {'"', "'"}:
            elem = elem[1: -1]
        action = GETATTR if inside == '.' else GET
        elements.append((elem, action))


DEFAULT_FIRST_ELEMENT = ('root', GETATTR)


@lru_cache(maxsize=1024 * 128)
def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT):
    """
    Given a path, it extracts the elements that form the path and their relevant most likely retrieval action.

        >>> from deepdiff import _path_to_elements
        >>> path = "root[4.3].b['a3']"
        >>> _path_to_elements(path, root_element=None)
        [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')]
    """
    if isinstance(path, (tuple, list)):
        return path
    elements = []
    if root_element:
        elements.append(root_element)
    elem = ''
    inside = False
    prev_char = None
    path = path[4:]  # removing "root from the beginning"
    brackets = []
    inside_quotes = False
    quote_used = ''
    for char in path:
        if prev_char == '𝆺𝅥𝅯':
            elem += char
        elif char in {'"', "'"}:
            elem += char
            # If we are inside and the quote is not what we expected, the quote is not closing
            if not(inside_quotes and quote_used != char):
                inside_quotes = not inside_quotes
                if inside_quotes:
                    quote_used = char
                else:
                    _add_to_elements(elements, elem, inside)
                    elem = ''
                    quote_used = ''
        elif inside_quotes:
            elem += char
        elif char == '[':
            if inside == '.':
                _add_to_elements(elements, elem, inside)
                inside = '['
                elem = ''
            # we are already inside. The bracket is a part of the word.
            elif inside == '[':
                elem += char
            else:
                inside = '['
                brackets.append('[')
                elem = ''
        elif char == '.':
            if inside == '[':
                elem += char
            elif inside == '.':
                _add_to_elements(elements, elem, inside)
                elem = ''
            else:
                inside = '.'
                elem = ''
        elif char == ']':
            if brackets and brackets[-1] == '[':
                brackets.pop()
            if brackets:
                elem += char
            else:
                _add_to_elements(elements, elem, inside)
                elem = ''
                inside = False
        else:
            elem += char
        prev_char = char
    if elem:
        _add_to_elements(elements, elem, inside)
    return tuple(elements)


def _get_nested_obj(obj, elements, next_element=None):
    for (elem, action) in elements:
        if action == GET:
            obj = obj[elem]
        elif action == GETATTR:
            obj = getattr(obj, elem)
    return obj


def _guess_type(elements, elem, index, next_element):
    # If we are not at the last elements
    if index < len(elements) - 1:
        # We assume it is a nested dictionary not a nested list
        return {}
    if isinstance(next_element, int):
        return []
    return {}


def _get_nested_obj_and_force(obj, elements, next_element=None):
    prev_elem = None
    prev_action = None
    prev_obj = obj
    for index, (elem, action) in enumerate(elements):
        _prev_obj = obj
        if action == GET:
            try:
                obj = obj[elem]
                prev_obj = _prev_obj
            except KeyError:
                obj[elem] = _guess_type(elements, elem, index, next_element)
                obj = obj[elem]
                prev_obj = _prev_obj
            except IndexError:
                if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj):
                    obj.extend([None] * (elem - len(obj)))
                    obj.append(_guess_type(elements, elem, index), next_element)
                    obj = obj[-1]
                    prev_obj = _prev_obj
                elif isinstance(obj, list) and len(obj) == 0 and prev_elem:
                    # We ran into an empty list that should have been a dictionary
                    # We need to change it from an empty list to a dictionary
                    obj = {elem: _guess_type(elements, elem, index, next_element)}
                    if prev_action == GET:
                        prev_obj[prev_elem] = obj
                    else:
                        setattr(prev_obj, prev_elem, obj)
                    obj = obj[elem]
        elif action == GETATTR:
            obj = getattr(obj, elem)
            prev_obj = _prev_obj
        prev_elem = elem
        prev_action = action
    return obj


def extract(obj, path):
    """
    Get the item from obj based on path.

    Example:

        >>> from deepdiff import extract
        >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]}
        >>> path = "root[1][0]['2']"
        >>> extract(obj, path)
        'b'

    Note that you can use extract in conjunction with DeepDiff results
    or even with the search and :ref:`deepsearch_label` modules. For example:

        >>> from deepdiff import grep
        >>> obj = {1: [{'2': 'b'}, 3], 2: [4, 5]}
        >>> result = obj | grep(5)
        >>> result
        {'matched_values': ['root[2][1]']}
        >>> result['matched_values'][0]
        'root[2][1]'
        >>> path = result['matched_values'][0]
        >>> extract(obj, path)
        5


    .. note::
        Note that even if DeepDiff tried gives you a path to an item in a set,
        there is no such thing in Python and hence you will get an error trying
        to extract that item from a set.
        If you want to be able to get items from sets, use the SetOrdered module
        to generate the sets.
        In fact Deepdiff uses SetOrdered as a dependency.

        >>> from deepdiff import grep, extract
        >>> obj = {"a", "b"}
        >>> obj | grep("b")
        Set item detected in the path.'set' objects do NOT support indexing. But DeepSearch will still report a path.
        {'matched_values': SetOrdered(['root[0]'])}
        >>> extract(obj, 'root[0]')
        Traceback (most recent call last):
          File "<stdin>", line 1, in <module>
          File "deepdiff/deepdiff/path.py", line 126, in extract
            return _get_nested_obj(obj, elements)
          File "deepdiff/deepdiff/path.py", line 84, in _get_nested_obj
            obj = obj[elem]
        TypeError: 'set' object is not subscriptable
        >>> from orderly_set import SetOrdered
        >>> obj = SetOrdered(["a", "b"])
        >>> extract(obj, 'root[0]')
        'a'

    """
    elements = _path_to_elements(path, root_element=None)
    return _get_nested_obj(obj, elements)


def parse_path(path, root_element=DEFAULT_FIRST_ELEMENT, include_actions=False):
    """
    Parse a path to a format that is machine readable

    **Parameters**

    path : A string
    The path string such as "root[1][2]['age']"

    root_element: string, default='root'
        What the root is called in the path.

    include_actions: boolean, default=False
        If True, we return the action required to retrieve the item at each element of the path.  

    **Examples**

        >>> from deepdiff import parse_path
        >>> parse_path("root[1][2]['age']")
        [1, 2, 'age']
        >>> parse_path("root[1][2]['age']", include_actions=True)
        [{'element': 1, 'action': 'GET'}, {'element': 2, 'action': 'GET'}, {'element': 'age', 'action': 'GET'}]
        >>>
        >>> parse_path("root['joe'].age")
        ['joe', 'age']
        >>> parse_path("root['joe'].age", include_actions=True)
        [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}]

    """

    result = _path_to_elements(path, root_element=root_element)
    result = iter(result)
    if root_element:
        next(result)  # We don't want the root item
    if include_actions is False:
        return [i[0] for i in result]
    return [{'element': i[0], 'action': i[1]} for i in result]


def stringify_element(param, quote_str=None):
    has_quote = "'" in param
    has_double_quote = '"' in param
    if has_quote and has_double_quote and not quote_str:
        new_param = []
        for char in param:
            if char in {'"', "'"}:
                new_param.append('𝆺𝅥𝅯')
            new_param.append(char)
        result = '"' + ''.join(new_param) + '"'
    elif has_quote:
        result = f'"{param}"'
    elif has_double_quote:
        result = f"'{param}'"
    else:
        result = param if quote_str is None else quote_str.format(param)
    return result


def stringify_path(path, root_element=DEFAULT_FIRST_ELEMENT, quote_str="'{}'"):
    """
    Gets the path as an string.

    For example [1, 2, 'age'] should become
    root[1][2]['age']
    """
    if not path:
        return root_element[0]
    result = [root_element[0]]
    has_actions = False
    try:
        if path[0][1] in {GET, GETATTR}:
            has_actions = True
    except (KeyError, IndexError, TypeError):
        pass
    if not has_actions:
        path = [(i, GET) for i in path]
        path[0] = (path[0][0], root_element[1])  # The action for the first element might be a GET or GETATTR. We update the action based on the root_element.
    for element, action in path:
        if isinstance(element, str) and action == GET:
            element = stringify_element(element, quote_str)
        if action == GET:
            result.append(f"[{element}]")
        else:
            result.append(f".{element}")
    return ''.join(result)