about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/deepdiff/search.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/deepdiff/search.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/deepdiff/search.py')
-rw-r--r--.venv/lib/python3.12/site-packages/deepdiff/search.py358
1 files changed, 358 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/deepdiff/search.py b/.venv/lib/python3.12/site-packages/deepdiff/search.py
new file mode 100644
index 00000000..007c566c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/deepdiff/search.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python
+import re
+from collections.abc import MutableMapping, Iterable
+from deepdiff.helper import SetOrdered
+import logging
+
+from deepdiff.helper import (
+    strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges
+)
+
+logger = logging.getLogger(__name__)
+
+
+doc = get_doc('search_doc.rst')
+
+
+class DeepSearch(dict):
+    r"""
+    **DeepSearch**
+
+    Deep Search inside objects to find the item matching your criteria.
+
+    **Parameters**
+
+    obj : The object to search within
+
+    item : The item to search for
+
+    verbose_level : int >= 0, default = 1.
+        Verbose level one shows the paths of found items.
+        Verbose level 2 shows the path and value of the found items.
+
+    exclude_paths: list, default = None.
+        List of paths to exclude from the report.
+
+    exclude_types: list, default = None.
+        List of object types to exclude from the report.
+
+    case_sensitive: Boolean, default = False
+
+    match_string: Boolean, default = False
+        If True, the value of the object or its children have to exactly match the item.
+        If False, the value of the item can be a part of the value of the object or its children
+
+    use_regexp: Boolean, default = False
+
+    strict_checking: Boolean, default = True
+        If True, it will check the type of the object to match, so when searching for '1234',
+        it will NOT match the int 1234. Currently this only affects the numeric values searching.
+
+    **Returns**
+
+        A DeepSearch object that has the matched paths and matched values.
+
+    **Supported data types**
+
+    int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects!
+
+    **Examples**
+
+    Importing
+        >>> from deepdiff import DeepSearch
+        >>> from pprint import pprint
+
+    Search in list for string
+        >>> obj = ["long somewhere", "string", 0, "somewhere great!"]
+        >>> item = "somewhere"
+        >>> ds = DeepSearch(obj, item, verbose_level=2)
+        >>> print(ds)
+        {'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}}
+
+    Search in nested data for string
+        >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}]
+        >>> item = "somewhere"
+        >>> ds = DeepSearch(obj, item, verbose_level=2)
+        >>> pprint(ds, indent=2)
+        { 'matched_paths': {"root[1]['somewhere']": 'around'},
+          'matched_values': { 'root[0]': 'something somewhere',
+                              "root[1]['long']": 'somewhere'}}
+
+    """
+
+    warning_num = 0
+
+    def __init__(self,
+                 obj,
+                 item,
+                 exclude_paths=SetOrdered(),
+                 exclude_regex_paths=SetOrdered(),
+                 exclude_types=SetOrdered(),
+                 verbose_level=1,
+                 case_sensitive=False,
+                 match_string=False,
+                 use_regexp=False,
+                 strict_checking=True,
+                 **kwargs):
+        if kwargs:
+            raise ValueError((
+                "The following parameter(s) are not valid: %s\n"
+                "The valid parameters are obj, item, exclude_paths, exclude_types,\n"
+                "case_sensitive, match_string and verbose_level."
+            ) % ', '.join(kwargs.keys()))
+
+        self.obj = obj
+        self.case_sensitive = case_sensitive if isinstance(item, strings) else True
+        item = item if self.case_sensitive else item.lower()
+        self.exclude_paths = SetOrdered(exclude_paths)
+        self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths]
+        self.exclude_types = SetOrdered(exclude_types)
+        self.exclude_types_tuple = tuple(
+            exclude_types)  # we need tuple for checking isinstance
+        self.verbose_level = verbose_level
+        self.update(
+            matched_paths=self.__set_or_dict(),
+            matched_values=self.__set_or_dict(),
+            unprocessed=[])
+        self.use_regexp = use_regexp
+        if not strict_checking and (isinstance(item, numbers) or isinstance(item, ipranges)):
+            item = str(item)
+        if self.use_regexp:
+            try:
+                item = re.compile(item)
+            except TypeError as e:
+                raise TypeError(f"The passed item of {item} is not usable for regex: {e}") from None
+        self.strict_checking = strict_checking
+
+        # Cases where user wants to match exact string item
+        self.match_string = match_string
+
+        self.__search(obj, item, parents_ids=frozenset({id(obj)}))
+
+        empty_keys = [k for k, v in self.items() if not v]
+
+        for k in empty_keys:
+            del self[k]
+
+    def __set_or_dict(self):
+        return dict_() if self.verbose_level >= 2 else SetOrdered()
+
+    def __report(self, report_key, key, value):
+        if self.verbose_level >= 2:
+            self[report_key][key] = value
+        else:
+            self[report_key].add(key)
+
+    def __search_obj(self,
+                     obj,
+                     item,
+                     parent,
+                     parents_ids=frozenset(),
+                     is_namedtuple=False):
+        """Search objects"""
+        found = False
+        if obj == item:
+            found = True
+            # We report the match but also continue inside the match to see if there are
+            # further matches inside the `looped` object.
+            self.__report(report_key='matched_values', key=parent, value=obj)
+
+        try:
+            if is_namedtuple:
+                obj = obj._asdict()
+            else:
+                # Skip magic methods. Slightly hacky, but unless people are defining
+                # new magic methods they want to search, it should work fine.
+                obj = {i: getattr(obj, i) for i in dir(obj)
+                       if not (i.startswith('__') and i.endswith('__'))}
+        except AttributeError:
+            try:
+                obj = {i: getattr(obj, i) for i in obj.__slots__}
+            except AttributeError:
+                if not found:
+                    self['unprocessed'].append("%s" % parent)
+
+                return
+
+        self.__search_dict(
+            obj, item, parent, parents_ids, print_as_attribute=True)
+
+    def __skip_this(self, item, parent):
+        skip = False
+        if parent in self.exclude_paths:
+            skip = True
+        elif self.exclude_regex_paths and any(
+                [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
+            skip = True
+        else:
+            if isinstance(item, self.exclude_types_tuple):
+                skip = True
+
+        return skip
+
+    def __search_dict(self,
+                      obj,
+                      item,
+                      parent,
+                      parents_ids=frozenset(),
+                      print_as_attribute=False):
+        """Search dictionaries"""
+        if print_as_attribute:
+            parent_text = "%s.%s"
+        else:
+            parent_text = "%s[%s]"
+
+        obj_keys = SetOrdered(obj.keys())
+
+        for item_key in obj_keys:
+            if not print_as_attribute and isinstance(item_key, strings):
+                item_key_str = "'%s'" % item_key
+            else:
+                item_key_str = item_key
+
+            obj_child = obj[item_key]
+
+            item_id = id(obj_child)
+
+            if parents_ids and item_id in parents_ids:
+                continue
+
+            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
+
+            new_parent = parent_text % (parent, item_key_str)
+            new_parent_cased = new_parent if self.case_sensitive else new_parent.lower()
+
+            str_item = str(item)
+            if (self.match_string and str_item == new_parent_cased) or\
+               (not self.match_string and str_item in new_parent_cased) or\
+               (self.use_regexp and item.search(new_parent_cased)):
+                self.__report(
+                    report_key='matched_paths',
+                    key=new_parent,
+                    value=obj_child)
+
+            self.__search(
+                obj_child,
+                item,
+                parent=new_parent,
+                parents_ids=parents_ids_added)
+
+    def __search_iterable(self,
+                          obj,
+                          item,
+                          parent="root",
+                          parents_ids=frozenset()):
+        """Search iterables except dictionaries, sets and strings."""
+        for i, thing in enumerate(obj):
+            new_parent = "{}[{}]".format(parent, i)
+            if self.__skip_this(thing, parent=new_parent):
+                continue
+
+            if self.case_sensitive or not isinstance(thing, strings):
+                thing_cased = thing
+            else:
+                thing_cased = thing.lower()
+
+            if not self.use_regexp and thing_cased == item:
+                self.__report(
+                    report_key='matched_values', key=new_parent, value=thing)
+            else:
+                item_id = id(thing)
+                if parents_ids and item_id in parents_ids:
+                    continue
+                parents_ids_added = add_to_frozen_set(parents_ids, item_id)
+                self.__search(thing, item, "%s[%s]" %
+                              (parent, i), parents_ids_added)
+
+    def __search_str(self, obj, item, parent):
+        """Compare strings"""
+        obj_text = obj if self.case_sensitive else obj.lower()
+
+        is_matched = False
+        if self.use_regexp:
+            is_matched = item.search(obj_text)
+        elif (self.match_string and item == obj_text) or (not self.match_string and item in obj_text):
+            is_matched = True
+        if is_matched:
+            self.__report(report_key='matched_values', key=parent, value=obj)
+
+    def __search_numbers(self, obj, item, parent):
+        if (
+            item == obj or (
+                not self.strict_checking and (
+                    item == str(obj) or (
+                        self.use_regexp and item.search(str(obj))
+                    )
+                )
+            )
+        ):
+            self.__report(report_key='matched_values', key=parent, value=obj)
+
+    def __search_tuple(self, obj, item, parent, parents_ids):
+        # Checking to see if it has _fields. Which probably means it is a named
+        # tuple.
+        try:
+            obj._asdict
+        # It must be a normal tuple
+        except AttributeError:
+            self.__search_iterable(obj, item, parent, parents_ids)
+        # We assume it is a namedtuple then
+        else:
+            self.__search_obj(
+                obj, item, parent, parents_ids, is_namedtuple=True)
+
+    def __search(self, obj, item, parent="root", parents_ids=frozenset()):
+        """The main search method"""
+        if self.__skip_this(item, parent):
+            return
+
+        elif isinstance(obj, strings) and isinstance(item, (strings, RE_COMPILED_TYPE)):
+            self.__search_str(obj, item, parent)
+
+        elif isinstance(obj, strings) and isinstance(item, numbers):
+            return
+
+        elif isinstance(obj, ipranges):
+            self.__search_str(str(obj), item, parent)
+
+        elif isinstance(obj, numbers):
+            self.__search_numbers(obj, item, parent)
+
+        elif isinstance(obj, MutableMapping):
+            self.__search_dict(obj, item, parent, parents_ids)
+
+        elif isinstance(obj, tuple):
+            self.__search_tuple(obj, item, parent, parents_ids)
+
+        elif isinstance(obj, (set, frozenset)):
+            if self.warning_num < 10:
+                logger.warning(
+                    "Set item detected in the path."
+                    "'set' objects do NOT support indexing. But DeepSearch will still report a path."
+                )
+                self.warning_num += 1
+            self.__search_iterable(obj, item, parent, parents_ids)
+
+        elif isinstance(obj, Iterable) and not isinstance(obj, strings):
+            self.__search_iterable(obj, item, parent, parents_ids)
+
+        else:
+            self.__search_obj(obj, item, parent, parents_ids)
+
+
+class grep:
+    __doc__ = doc
+
+    def __init__(self,
+                 item,
+                 **kwargs):
+        self.item = item
+        self.kwargs = kwargs
+
+    def __ror__(self, other):
+        return DeepSearch(obj=other, item=self.item, **self.kwargs)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    import doctest
+    doctest.testmod()