#!/usr/bin/env python import re from collections.abc import MutableMapping, Iterable from deepdiff.helper import SetOrdered import logging from deepdiff.helper import ( strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges ) logger = logging.getLogger(__name__) doc = get_doc('search_doc.rst') class DeepSearch(dict): r""" **DeepSearch** Deep Search inside objects to find the item matching your criteria. **Parameters** obj : The object to search within item : The item to search for verbose_level : int >= 0, default = 1. Verbose level one shows the paths of found items. Verbose level 2 shows the path and value of the found items. exclude_paths: list, default = None. List of paths to exclude from the report. exclude_types: list, default = None. List of object types to exclude from the report. case_sensitive: Boolean, default = False match_string: Boolean, default = False If True, the value of the object or its children have to exactly match the item. If False, the value of the item can be a part of the value of the object or its children use_regexp: Boolean, default = False strict_checking: Boolean, default = True If True, it will check the type of the object to match, so when searching for '1234', it will NOT match the int 1234. Currently this only affects the numeric values searching. **Returns** A DeepSearch object that has the matched paths and matched values. **Supported data types** int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! **Examples** Importing >>> from deepdiff import DeepSearch >>> from pprint import pprint Search in list for string >>> obj = ["long somewhere", "string", 0, "somewhere great!"] >>> item = "somewhere" >>> ds = DeepSearch(obj, item, verbose_level=2) >>> print(ds) {'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}} Search in nested data for string >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] >>> item = "somewhere" >>> ds = DeepSearch(obj, item, verbose_level=2) >>> pprint(ds, indent=2) { 'matched_paths': {"root[1]['somewhere']": 'around'}, 'matched_values': { 'root[0]': 'something somewhere', "root[1]['long']": 'somewhere'}} """ warning_num = 0 def __init__(self, obj, item, exclude_paths=SetOrdered(), exclude_regex_paths=SetOrdered(), exclude_types=SetOrdered(), verbose_level=1, case_sensitive=False, match_string=False, use_regexp=False, strict_checking=True, **kwargs): if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" "The valid parameters are obj, item, exclude_paths, exclude_types,\n" "case_sensitive, match_string and verbose_level." ) % ', '.join(kwargs.keys())) self.obj = obj self.case_sensitive = case_sensitive if isinstance(item, strings) else True item = item if self.case_sensitive else item.lower() self.exclude_paths = SetOrdered(exclude_paths) self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths] self.exclude_types = SetOrdered(exclude_types) self.exclude_types_tuple = tuple( exclude_types) # we need tuple for checking isinstance self.verbose_level = verbose_level self.update( matched_paths=self.__set_or_dict(), matched_values=self.__set_or_dict(), unprocessed=[]) self.use_regexp = use_regexp if not strict_checking and (isinstance(item, numbers) or isinstance(item, ipranges)): item = str(item) if self.use_regexp: try: item = re.compile(item) except TypeError as e: raise TypeError(f"The passed item of {item} is not usable for regex: {e}") from None self.strict_checking = strict_checking # Cases where user wants to match exact string item self.match_string = match_string self.__search(obj, item, parents_ids=frozenset({id(obj)})) empty_keys = [k for k, v in self.items() if not v] for k in empty_keys: del self[k] def __set_or_dict(self): return dict_() if self.verbose_level >= 2 else SetOrdered() def __report(self, report_key, key, value): if self.verbose_level >= 2: self[report_key][key] = value else: self[report_key].add(key) def __search_obj(self, obj, item, parent, parents_ids=frozenset(), is_namedtuple=False): """Search objects""" found = False if obj == item: found = True # We report the match but also continue inside the match to see if there are # further matches inside the `looped` object. self.__report(report_key='matched_values', key=parent, value=obj) try: if is_namedtuple: obj = obj._asdict() else: # Skip magic methods. Slightly hacky, but unless people are defining # new magic methods they want to search, it should work fine. obj = {i: getattr(obj, i) for i in dir(obj) if not (i.startswith('__') and i.endswith('__'))} except AttributeError: try: obj = {i: getattr(obj, i) for i in obj.__slots__} except AttributeError: if not found: self['unprocessed'].append("%s" % parent) return self.__search_dict( obj, item, parent, parents_ids, print_as_attribute=True) def __skip_this(self, item, parent): skip = False if parent in self.exclude_paths: skip = True elif self.exclude_regex_paths and any( [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): skip = True else: if isinstance(item, self.exclude_types_tuple): skip = True return skip def __search_dict(self, obj, item, parent, parents_ids=frozenset(), print_as_attribute=False): """Search dictionaries""" if print_as_attribute: parent_text = "%s.%s" else: parent_text = "%s[%s]" obj_keys = SetOrdered(obj.keys()) for item_key in obj_keys: if not print_as_attribute and isinstance(item_key, strings): item_key_str = "'%s'" % item_key else: item_key_str = item_key obj_child = obj[item_key] item_id = id(obj_child) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) new_parent = parent_text % (parent, item_key_str) new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() str_item = str(item) if (self.match_string and str_item == new_parent_cased) or\ (not self.match_string and str_item in new_parent_cased) or\ (self.use_regexp and item.search(new_parent_cased)): self.__report( report_key='matched_paths', key=new_parent, value=obj_child) self.__search( obj_child, item, parent=new_parent, parents_ids=parents_ids_added) def __search_iterable(self, obj, item, parent="root", parents_ids=frozenset()): """Search iterables except dictionaries, sets and strings.""" for i, thing in enumerate(obj): new_parent = "{}[{}]".format(parent, i) if self.__skip_this(thing, parent=new_parent): continue if self.case_sensitive or not isinstance(thing, strings): thing_cased = thing else: thing_cased = thing.lower() if not self.use_regexp and thing_cased == item: self.__report( report_key='matched_values', key=new_parent, value=thing) else: item_id = id(thing) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) self.__search(thing, item, "%s[%s]" % (parent, i), parents_ids_added) def __search_str(self, obj, item, parent): """Compare strings""" obj_text = obj if self.case_sensitive else obj.lower() is_matched = False if self.use_regexp: is_matched = item.search(obj_text) elif (self.match_string and item == obj_text) or (not self.match_string and item in obj_text): is_matched = True if is_matched: self.__report(report_key='matched_values', key=parent, value=obj) def __search_numbers(self, obj, item, parent): if ( item == obj or ( not self.strict_checking and ( item == str(obj) or ( self.use_regexp and item.search(str(obj)) ) ) ) ): self.__report(report_key='matched_values', key=parent, value=obj) def __search_tuple(self, obj, item, parent, parents_ids): # Checking to see if it has _fields. Which probably means it is a named # tuple. try: obj._asdict # It must be a normal tuple except AttributeError: self.__search_iterable(obj, item, parent, parents_ids) # We assume it is a namedtuple then else: self.__search_obj( obj, item, parent, parents_ids, is_namedtuple=True) def __search(self, obj, item, parent="root", parents_ids=frozenset()): """The main search method""" if self.__skip_this(item, parent): return elif isinstance(obj, strings) and isinstance(item, (strings, RE_COMPILED_TYPE)): self.__search_str(obj, item, parent) elif isinstance(obj, strings) and isinstance(item, numbers): return elif isinstance(obj, ipranges): self.__search_str(str(obj), item, parent) elif isinstance(obj, numbers): self.__search_numbers(obj, item, parent) elif isinstance(obj, MutableMapping): self.__search_dict(obj, item, parent, parents_ids) elif isinstance(obj, tuple): self.__search_tuple(obj, item, parent, parents_ids) elif isinstance(obj, (set, frozenset)): if self.warning_num < 10: logger.warning( "Set item detected in the path." "'set' objects do NOT support indexing. But DeepSearch will still report a path." ) self.warning_num += 1 self.__search_iterable(obj, item, parent, parents_ids) elif isinstance(obj, Iterable) and not isinstance(obj, strings): self.__search_iterable(obj, item, parent, parents_ids) else: self.__search_obj(obj, item, parent, parents_ids) class grep: __doc__ = doc def __init__(self, item, **kwargs): self.item = item self.kwargs = kwargs def __ror__(self, other): return DeepSearch(obj=other, item=self.item, **self.kwargs) if __name__ == "__main__": # pragma: no cover import doctest doctest.testmod()