1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
|
#!/usr/bin/env python
import re
from collections.abc import MutableMapping, Iterable
from deepdiff.helper import SetOrdered
import logging
from deepdiff.helper import (
strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges
)
logger = logging.getLogger(__name__)
doc = get_doc('search_doc.rst')
class DeepSearch(dict):
r"""
**DeepSearch**
Deep Search inside objects to find the item matching your criteria.
**Parameters**
obj : The object to search within
item : The item to search for
verbose_level : int >= 0, default = 1.
Verbose level one shows the paths of found items.
Verbose level 2 shows the path and value of the found items.
exclude_paths: list, default = None.
List of paths to exclude from the report.
exclude_types: list, default = None.
List of object types to exclude from the report.
case_sensitive: Boolean, default = False
match_string: Boolean, default = False
If True, the value of the object or its children have to exactly match the item.
If False, the value of the item can be a part of the value of the object or its children
use_regexp: Boolean, default = False
strict_checking: Boolean, default = True
If True, it will check the type of the object to match, so when searching for '1234',
it will NOT match the int 1234. Currently this only affects the numeric values searching.
**Returns**
A DeepSearch object that has the matched paths and matched values.
**Supported data types**
int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects!
**Examples**
Importing
>>> from deepdiff import DeepSearch
>>> from pprint import pprint
Search in list for string
>>> obj = ["long somewhere", "string", 0, "somewhere great!"]
>>> item = "somewhere"
>>> ds = DeepSearch(obj, item, verbose_level=2)
>>> print(ds)
{'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}}
Search in nested data for string
>>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}]
>>> item = "somewhere"
>>> ds = DeepSearch(obj, item, verbose_level=2)
>>> pprint(ds, indent=2)
{ 'matched_paths': {"root[1]['somewhere']": 'around'},
'matched_values': { 'root[0]': 'something somewhere',
"root[1]['long']": 'somewhere'}}
"""
warning_num = 0
def __init__(self,
obj,
item,
exclude_paths=SetOrdered(),
exclude_regex_paths=SetOrdered(),
exclude_types=SetOrdered(),
verbose_level=1,
case_sensitive=False,
match_string=False,
use_regexp=False,
strict_checking=True,
**kwargs):
if kwargs:
raise ValueError((
"The following parameter(s) are not valid: %s\n"
"The valid parameters are obj, item, exclude_paths, exclude_types,\n"
"case_sensitive, match_string and verbose_level."
) % ', '.join(kwargs.keys()))
self.obj = obj
self.case_sensitive = case_sensitive if isinstance(item, strings) else True
item = item if self.case_sensitive else item.lower()
self.exclude_paths = SetOrdered(exclude_paths)
self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths]
self.exclude_types = SetOrdered(exclude_types)
self.exclude_types_tuple = tuple(
exclude_types) # we need tuple for checking isinstance
self.verbose_level = verbose_level
self.update(
matched_paths=self.__set_or_dict(),
matched_values=self.__set_or_dict(),
unprocessed=[])
self.use_regexp = use_regexp
if not strict_checking and (isinstance(item, numbers) or isinstance(item, ipranges)):
item = str(item)
if self.use_regexp:
try:
item = re.compile(item)
except TypeError as e:
raise TypeError(f"The passed item of {item} is not usable for regex: {e}") from None
self.strict_checking = strict_checking
# Cases where user wants to match exact string item
self.match_string = match_string
self.__search(obj, item, parents_ids=frozenset({id(obj)}))
empty_keys = [k for k, v in self.items() if not v]
for k in empty_keys:
del self[k]
def __set_or_dict(self):
return dict_() if self.verbose_level >= 2 else SetOrdered()
def __report(self, report_key, key, value):
if self.verbose_level >= 2:
self[report_key][key] = value
else:
self[report_key].add(key)
def __search_obj(self,
obj,
item,
parent,
parents_ids=frozenset(),
is_namedtuple=False):
"""Search objects"""
found = False
if obj == item:
found = True
# We report the match but also continue inside the match to see if there are
# further matches inside the `looped` object.
self.__report(report_key='matched_values', key=parent, value=obj)
try:
if is_namedtuple:
obj = obj._asdict()
else:
# Skip magic methods. Slightly hacky, but unless people are defining
# new magic methods they want to search, it should work fine.
obj = {i: getattr(obj, i) for i in dir(obj)
if not (i.startswith('__') and i.endswith('__'))}
except AttributeError:
try:
obj = {i: getattr(obj, i) for i in obj.__slots__}
except AttributeError:
if not found:
self['unprocessed'].append("%s" % parent)
return
self.__search_dict(
obj, item, parent, parents_ids, print_as_attribute=True)
def __skip_this(self, item, parent):
skip = False
if parent in self.exclude_paths:
skip = True
elif self.exclude_regex_paths and any(
[exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
skip = True
else:
if isinstance(item, self.exclude_types_tuple):
skip = True
return skip
def __search_dict(self,
obj,
item,
parent,
parents_ids=frozenset(),
print_as_attribute=False):
"""Search dictionaries"""
if print_as_attribute:
parent_text = "%s.%s"
else:
parent_text = "%s[%s]"
obj_keys = SetOrdered(obj.keys())
for item_key in obj_keys:
if not print_as_attribute and isinstance(item_key, strings):
item_key_str = "'%s'" % item_key
else:
item_key_str = item_key
obj_child = obj[item_key]
item_id = id(obj_child)
if parents_ids and item_id in parents_ids:
continue
parents_ids_added = add_to_frozen_set(parents_ids, item_id)
new_parent = parent_text % (parent, item_key_str)
new_parent_cased = new_parent if self.case_sensitive else new_parent.lower()
str_item = str(item)
if (self.match_string and str_item == new_parent_cased) or\
(not self.match_string and str_item in new_parent_cased) or\
(self.use_regexp and item.search(new_parent_cased)):
self.__report(
report_key='matched_paths',
key=new_parent,
value=obj_child)
self.__search(
obj_child,
item,
parent=new_parent,
parents_ids=parents_ids_added)
def __search_iterable(self,
obj,
item,
parent="root",
parents_ids=frozenset()):
"""Search iterables except dictionaries, sets and strings."""
for i, thing in enumerate(obj):
new_parent = "{}[{}]".format(parent, i)
if self.__skip_this(thing, parent=new_parent):
continue
if self.case_sensitive or not isinstance(thing, strings):
thing_cased = thing
else:
thing_cased = thing.lower()
if not self.use_regexp and thing_cased == item:
self.__report(
report_key='matched_values', key=new_parent, value=thing)
else:
item_id = id(thing)
if parents_ids and item_id in parents_ids:
continue
parents_ids_added = add_to_frozen_set(parents_ids, item_id)
self.__search(thing, item, "%s[%s]" %
(parent, i), parents_ids_added)
def __search_str(self, obj, item, parent):
"""Compare strings"""
obj_text = obj if self.case_sensitive else obj.lower()
is_matched = False
if self.use_regexp:
is_matched = item.search(obj_text)
elif (self.match_string and item == obj_text) or (not self.match_string and item in obj_text):
is_matched = True
if is_matched:
self.__report(report_key='matched_values', key=parent, value=obj)
def __search_numbers(self, obj, item, parent):
if (
item == obj or (
not self.strict_checking and (
item == str(obj) or (
self.use_regexp and item.search(str(obj))
)
)
)
):
self.__report(report_key='matched_values', key=parent, value=obj)
def __search_tuple(self, obj, item, parent, parents_ids):
# Checking to see if it has _fields. Which probably means it is a named
# tuple.
try:
obj._asdict
# It must be a normal tuple
except AttributeError:
self.__search_iterable(obj, item, parent, parents_ids)
# We assume it is a namedtuple then
else:
self.__search_obj(
obj, item, parent, parents_ids, is_namedtuple=True)
def __search(self, obj, item, parent="root", parents_ids=frozenset()):
"""The main search method"""
if self.__skip_this(item, parent):
return
elif isinstance(obj, strings) and isinstance(item, (strings, RE_COMPILED_TYPE)):
self.__search_str(obj, item, parent)
elif isinstance(obj, strings) and isinstance(item, numbers):
return
elif isinstance(obj, ipranges):
self.__search_str(str(obj), item, parent)
elif isinstance(obj, numbers):
self.__search_numbers(obj, item, parent)
elif isinstance(obj, MutableMapping):
self.__search_dict(obj, item, parent, parents_ids)
elif isinstance(obj, tuple):
self.__search_tuple(obj, item, parent, parents_ids)
elif isinstance(obj, (set, frozenset)):
if self.warning_num < 10:
logger.warning(
"Set item detected in the path."
"'set' objects do NOT support indexing. But DeepSearch will still report a path."
)
self.warning_num += 1
self.__search_iterable(obj, item, parent, parents_ids)
elif isinstance(obj, Iterable) and not isinstance(obj, strings):
self.__search_iterable(obj, item, parent, parents_ids)
else:
self.__search_obj(obj, item, parent, parents_ids)
class grep:
__doc__ = doc
def __init__(self,
item,
**kwargs):
self.item = item
self.kwargs = kwargs
def __ror__(self, other):
return DeepSearch(obj=other, item=self.item, **self.kwargs)
if __name__ == "__main__": # pragma: no cover
import doctest
doctest.testmod()
|