aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/deepdiff/diff.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/deepdiff/diff.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/deepdiff/diff.py')
-rw-r--r--.venv/lib/python3.12/site-packages/deepdiff/diff.py1906
1 files changed, 1906 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/deepdiff/diff.py b/.venv/lib/python3.12/site-packages/deepdiff/diff.py
new file mode 100644
index 00000000..d84ecc7e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/deepdiff/diff.py
@@ -0,0 +1,1906 @@
+#!/usr/bin/env python
+
+# In order to run the docstrings:
+# python3 -m deepdiff.diff
+# You might need to run it many times since dictionaries come in different orders
+# every time you run the docstrings.
+# However the docstring expects it in a specific order in order to pass!
+import difflib
+import logging
+import types
+import datetime
+from enum import Enum
+from copy import deepcopy
+from math import isclose as is_close
+from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol
+from collections.abc import Mapping, Iterable, Sequence
+from collections import defaultdict
+from inspect import getmembers
+from itertools import zip_longest
+from functools import lru_cache
+from deepdiff.helper import (strings, bytes_type, numbers, uuids, ListItemRemovedOrAdded, notpresent,
+ IndexedHash, unprocessed, add_to_frozen_set, basic_types,
+ convert_item_or_items_into_set_else_none, get_type,
+ convert_item_or_items_into_compiled_regexes_else_none,
+ type_is_subclass_of_type_group, type_in_type_group, get_doc,
+ number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans,
+ np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer,
+ TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths,
+ np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
+ PydanticBaseModel, Opcode, SetOrdered, ipranges)
+from deepdiff.serialization import SerializationMixin
+from deepdiff.distance import DistanceMixin, logarithmic_similarity
+from deepdiff.model import (
+ RemapDict, ResultDict, TextResult, TreeResult, DiffLevel,
+ DictRelationship, AttributeRelationship, REPORT_KEYS,
+ SubscriptableIterableRelationship, NonSubscriptableIterableRelationship,
+ SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD,
+ FORCE_DEFAULT,
+)
+from deepdiff.deephash import DeepHash, combine_hashes_lists
+from deepdiff.base import Base
+from deepdiff.lfucache import LFUCache, DummyLFU
+
+if TYPE_CHECKING:
+ from pytz.tzinfo import BaseTzInfo
+
+
+logger = logging.getLogger(__name__)
+
+MAX_PASSES_REACHED_MSG = (
+ 'DeepDiff has reached the max number of passes of {}. '
+ 'You can possibly get more accurate results by increasing the max_passes parameter.')
+
+MAX_DIFFS_REACHED_MSG = (
+ 'DeepDiff has reached the max number of diffs of {}. '
+ 'You can possibly get more accurate results by increasing the max_diffs parameter.')
+
+
+notpresent_indexed = IndexedHash(indexes=[0], item=notpresent)
+
+doc = get_doc('diff_doc.rst')
+
+
+PROGRESS_MSG = "DeepDiff {} seconds in progress. Pass #{}, Diff #{}"
+
+
+def _report_progress(_stats, progress_logger, duration):
+ """
+ Report the progress every few seconds.
+ """
+ progress_logger(PROGRESS_MSG.format(duration, _stats[PASSES_COUNT], _stats[DIFF_COUNT]))
+
+
+DISTANCE_CACHE_HIT_COUNT = 'DISTANCE CACHE HIT COUNT'
+DIFF_COUNT = 'DIFF COUNT'
+PASSES_COUNT = 'PASSES COUNT'
+MAX_PASS_LIMIT_REACHED = 'MAX PASS LIMIT REACHED'
+MAX_DIFF_LIMIT_REACHED = 'MAX DIFF LIMIT REACHED'
+DISTANCE_CACHE_ENABLED = 'DISTANCE CACHE ENABLED'
+PREVIOUS_DIFF_COUNT = 'PREVIOUS DIFF COUNT'
+PREVIOUS_DISTANCE_CACHE_HIT_COUNT = 'PREVIOUS DISTANCE CACHE HIT COUNT'
+CANT_FIND_NUMPY_MSG = 'Unable to import numpy. This must be a bug in DeepDiff since a numpy array is detected.'
+INVALID_VIEW_MSG = 'The only valid values for the view parameter are text and tree. But {} was passed.'
+CUTOFF_RANGE_ERROR_MSG = 'cutoff_distance_for_pairs needs to be a positive float max 1.'
+VERBOSE_LEVEL_RANGE_MSG = 'verbose_level should be 0, 1, or 2.'
+PURGE_LEVEL_RANGE_MSG = 'cache_purge_level should be 0, 1, or 2.'
+_ENABLE_CACHE_EVERY_X_DIFF = '_ENABLE_CACHE_EVERY_X_DIFF'
+
+model_fields_set = frozenset(["model_fields_set"])
+
+
+# What is the threshold to consider 2 items to be pairs. Only used when ignore_order = True.
+CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT = 0.3
+
+# What is the threshold to calculate pairs of items between 2 iterables.
+# For example 2 iterables that have nothing in common, do not need their pairs to be calculated.
+CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT = 0.7
+
+DEEPHASH_PARAM_KEYS = (
+ 'exclude_types',
+ 'exclude_paths',
+ 'include_paths',
+ 'exclude_regex_paths',
+ 'hasher',
+ 'significant_digits',
+ 'number_format_notation',
+ 'ignore_string_type_changes',
+ 'ignore_numeric_type_changes',
+ 'use_enum_value',
+ 'ignore_type_in_groups',
+ 'ignore_type_subclasses',
+ 'ignore_string_case',
+ 'exclude_obj_callback',
+ 'ignore_private_variables',
+ 'encodings',
+ 'ignore_encoding_errors',
+ 'default_timezone',
+ 'custom_operators',
+)
+
+
+class DeepDiffProtocol(Protocol):
+ t1: Any
+ t2: Any
+ cutoff_distance_for_pairs: float
+ use_log_scale: bool
+ log_scale_similarity_threshold: float
+ view: str
+
+
+
+class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, DeepDiffProtocol, Base):
+ __doc__ = doc
+
+ CACHE_AUTO_ADJUST_THRESHOLD = 0.25
+
+ def __init__(self,
+ t1: Any,
+ t2: Any,
+ _original_type=None,
+ cache_purge_level: int=1,
+ cache_size: int=0,
+ cache_tuning_sample_size: int=0,
+ custom_operators: Optional[List[Any]] =None,
+ cutoff_distance_for_pairs: float=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT,
+ cutoff_intersection_for_pairs: float=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT,
+ default_timezone:Union[datetime.timezone, "BaseTzInfo"]=datetime.timezone.utc,
+ encodings: Optional[List[str]]=None,
+ exclude_obj_callback: Optional[Callable]=None,
+ exclude_obj_callback_strict: Optional[Callable]=None,
+ exclude_paths: Union[str, List[str], Set[str], FrozenSet[str], None]=None,
+ exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None,
+ exclude_types: Optional[List[Any]]=None,
+ get_deep_distance: bool=False,
+ group_by: Union[str, Tuple[str, str], None]=None,
+ group_by_sort_key: Union[str, Callable, None]=None,
+ hasher: Optional[Callable]=None,
+ hashes: Optional[Dict]=None,
+ ignore_encoding_errors: bool=False,
+ ignore_nan_inequality: bool=False,
+ ignore_numeric_type_changes: bool=False,
+ ignore_order: bool=False,
+ ignore_order_func: Optional[Callable]=None,
+ ignore_private_variables: bool=True,
+ ignore_string_case: bool=False,
+ ignore_string_type_changes: bool=False,
+ ignore_type_in_groups: Optional[List[Tuple]]=None,
+ ignore_type_subclasses: bool=False,
+ include_obj_callback: Optional[Callable]=None,
+ include_obj_callback_strict: Optional[Callable]=None,
+ include_paths: Union[str, List[str], None]=None,
+ iterable_compare_func: Optional[Callable]=None,
+ log_frequency_in_sec: int=0,
+ log_scale_similarity_threshold: float=0.1,
+ log_stacktrace: bool=False,
+ math_epsilon: Optional[float]=None,
+ max_diffs: Optional[int]=None,
+ max_passes: int=10000000,
+ number_format_notation: str="f",
+ number_to_string_func: Optional[Callable]=None,
+ progress_logger: Callable=logger.info,
+ report_repetition: bool=False,
+ significant_digits: Optional[int]=None,
+ threshold_to_diff_deeper: float = 0.33,
+ truncate_datetime: Optional[str]=None,
+ use_enum_value: bool=False,
+ use_log_scale: bool=False,
+ verbose_level: int=1,
+ view: str=TEXT_VIEW,
+ zip_ordered_iterables: bool=False,
+ _parameters=None,
+ _shared_parameters=None,
+ **kwargs):
+ super().__init__()
+ if kwargs:
+ raise ValueError((
+ "The following parameter(s) are not valid: %s\n"
+ "The valid parameters are ignore_order, report_repetition, significant_digits, "
+ "number_format_notation, exclude_paths, include_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, "
+ "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, truncate_datetime, "
+ "ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, "
+ "view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, "
+ "cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
+ "cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, log_stacktrace,"
+ "math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, default_timezone "
+ "ignore_order_func, custom_operators, encodings, ignore_encoding_errors, use_log_scale, log_scale_similarity_threshold "
+ "_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))
+
+ if _parameters:
+ self.__dict__.update(_parameters)
+ else:
+ self.custom_operators = custom_operators or []
+ self.ignore_order = ignore_order
+
+ self.ignore_order_func = ignore_order_func
+
+ ignore_type_in_groups = ignore_type_in_groups or []
+ if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups:
+ ignore_numeric_type_changes = True
+ self.ignore_numeric_type_changes = ignore_numeric_type_changes
+ if strings == ignore_type_in_groups or strings in ignore_type_in_groups:
+ ignore_string_type_changes = True
+ self.use_enum_value = use_enum_value
+ self.log_scale_similarity_threshold = log_scale_similarity_threshold
+ self.use_log_scale = use_log_scale
+ self.default_timezone = default_timezone
+ self.log_stacktrace = log_stacktrace
+ self.threshold_to_diff_deeper = threshold_to_diff_deeper
+ self.ignore_string_type_changes = ignore_string_type_changes
+ self.ignore_type_in_groups = self.get_ignore_types_in_groups(
+ ignore_type_in_groups=ignore_type_in_groups,
+ ignore_string_type_changes=ignore_string_type_changes,
+ ignore_numeric_type_changes=ignore_numeric_type_changes,
+ ignore_type_subclasses=ignore_type_subclasses)
+ self.report_repetition = report_repetition
+ self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths))
+ self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths))
+ self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
+ self.exclude_types = set(exclude_types) if exclude_types else None
+ self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance
+ self.ignore_type_subclasses = ignore_type_subclasses
+ self.type_check_func = type_in_type_group if ignore_type_subclasses else type_is_subclass_of_type_group
+ self.ignore_string_case = ignore_string_case
+ self.exclude_obj_callback = exclude_obj_callback
+ self.exclude_obj_callback_strict = exclude_obj_callback_strict
+ self.include_obj_callback = include_obj_callback
+ self.include_obj_callback_strict = include_obj_callback_strict
+ self.number_to_string = number_to_string_func or number_to_string
+ self.iterable_compare_func = iterable_compare_func
+ self.zip_ordered_iterables = zip_ordered_iterables
+ self.ignore_private_variables = ignore_private_variables
+ self.ignore_nan_inequality = ignore_nan_inequality
+ self.hasher = hasher
+ self.cache_tuning_sample_size = cache_tuning_sample_size
+ self.group_by = group_by
+ if callable(group_by_sort_key):
+ self.group_by_sort_key = group_by_sort_key
+ elif group_by_sort_key:
+ def _group_by_sort_key(x):
+ return x[group_by_sort_key]
+ self.group_by_sort_key = _group_by_sort_key
+ else:
+ self.group_by_sort_key = None
+ self.encodings = encodings
+ self.ignore_encoding_errors = ignore_encoding_errors
+
+ self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
+ self.math_epsilon = math_epsilon
+ if self.math_epsilon is not None and self.ignore_order:
+ logger.warning("math_epsilon in conjunction with ignore_order=True is only used for flat object comparisons. Custom math_epsilon will not have an effect when comparing nested objects.")
+ self.truncate_datetime = get_truncate_datetime(truncate_datetime)
+ self.number_format_notation = number_format_notation
+ if verbose_level in {0, 1, 2}:
+ self.verbose_level = verbose_level
+ else:
+ raise ValueError(VERBOSE_LEVEL_RANGE_MSG)
+ if cache_purge_level not in {0, 1, 2}:
+ raise ValueError(PURGE_LEVEL_RANGE_MSG)
+ self.view = view
+ # Setting up the cache for dynamic programming. One dictionary per instance of root of DeepDiff running.
+ self.max_passes = max_passes
+ self.max_diffs = max_diffs
+ self.cutoff_distance_for_pairs = float(cutoff_distance_for_pairs)
+ self.cutoff_intersection_for_pairs = float(cutoff_intersection_for_pairs)
+ if self.cutoff_distance_for_pairs < 0 or self.cutoff_distance_for_pairs > 1:
+ raise ValueError(CUTOFF_RANGE_ERROR_MSG)
+ # _Parameters are the clean _parameters to initialize DeepDiff with so we avoid all the above
+ # cleaning functionalities when running DeepDiff recursively.
+ # However DeepHash has its own set of _parameters that are slightly different than DeepDIff.
+ # DeepDiff _parameters are transformed to DeepHash _parameters via _get_deephash_params method.
+ self.progress_logger = progress_logger
+ self.cache_size = cache_size
+ _parameters = self.__dict__.copy()
+ _parameters['group_by'] = None # overwriting since these parameters will be passed on to other passes.
+ if log_stacktrace:
+ self.log_err = logger.exception
+ else:
+ self.log_err = logger.error
+
+ # Non-Root
+ if _shared_parameters:
+ self.is_root = False
+ self._shared_parameters = _shared_parameters
+ self.__dict__.update(_shared_parameters)
+ # We are in some pass other than root
+ progress_timer = None
+ # Root
+ else:
+ self.is_root = True
+ # Caching the DeepDiff results for dynamic programming
+ self._distance_cache = LFUCache(cache_size) if cache_size else DummyLFU()
+ self._stats = {
+ PASSES_COUNT: 0,
+ DIFF_COUNT: 0,
+ DISTANCE_CACHE_HIT_COUNT: 0,
+ PREVIOUS_DIFF_COUNT: 0,
+ PREVIOUS_DISTANCE_CACHE_HIT_COUNT: 0,
+ MAX_PASS_LIMIT_REACHED: False,
+ MAX_DIFF_LIMIT_REACHED: False,
+ DISTANCE_CACHE_ENABLED: bool(cache_size),
+ }
+ self.hashes = dict_() if hashes is None else hashes
+ self._numpy_paths = dict_() # if _numpy_paths is None else _numpy_paths
+ self._shared_parameters = {
+ 'hashes': self.hashes,
+ '_stats': self._stats,
+ '_distance_cache': self._distance_cache,
+ '_numpy_paths': self._numpy_paths,
+ _ENABLE_CACHE_EVERY_X_DIFF: self.cache_tuning_sample_size * 10,
+ }
+ if log_frequency_in_sec:
+ # Creating a progress log reporter that runs in a separate thread every log_frequency_in_sec seconds.
+ progress_timer = RepeatedTimer(log_frequency_in_sec, _report_progress, self._stats, progress_logger)
+ else:
+ progress_timer = None
+
+ self._parameters = _parameters
+ self.deephash_parameters = self._get_deephash_params()
+ self.tree = TreeResult()
+ self._iterable_opcodes = {}
+ if group_by and self.is_root:
+ try:
+ original_t1 = t1
+ t1 = self._group_iterable_to_dict(t1, group_by, item_name='t1')
+ except (KeyError, ValueError):
+ pass
+ else:
+ try:
+ t2 = self._group_iterable_to_dict(t2, group_by, item_name='t2')
+ except (KeyError, ValueError):
+ t1 = original_t1
+
+ self.t1 = t1
+ self.t2 = t2
+
+ try:
+ root = DiffLevel(t1, t2, verbose_level=self.verbose_level)
+ # _original_type is only used to pass the original type of the data. Currently only used for numpy arrays.
+ # The reason is that we convert the numpy array to python list and then later for distance calculations
+ # we convert only the the last dimension of it into numpy arrays.
+ self._diff(root, parents_ids=frozenset({id(t1)}), _original_type=_original_type)
+
+ if get_deep_distance and view in {TEXT_VIEW, TREE_VIEW}:
+ self.tree['deep_distance'] = self._get_rough_distance()
+
+ self.tree.remove_empty_keys()
+ view_results = self._get_view_results(self.view)
+ self.update(view_results)
+ finally:
+ if self.is_root:
+ if cache_purge_level:
+ del self._distance_cache
+ del self.hashes
+ del self._shared_parameters
+ del self._parameters
+ for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT,
+ DISTANCE_CACHE_ENABLED):
+ del self._stats[key]
+ if progress_timer:
+ duration = progress_timer.stop()
+ self._stats['DURATION SEC'] = duration
+ logger.info('stats {}'.format(self.get_stats()))
+ if cache_purge_level == 2:
+ self.__dict__.clear()
+
+ def _get_deephash_params(self):
+ result = {key: self._parameters[key] for key in DEEPHASH_PARAM_KEYS}
+ result['ignore_repetition'] = not self.report_repetition
+ result['number_to_string_func'] = self.number_to_string
+ return result
+
+ def _report_result(self, report_type, change_level, local_tree=None):
+ """
+ Add a detected change to the reference-style result dictionary.
+ report_type will be added to level.
+ (We'll create the text-style report from there later.)
+ :param report_type: A well defined string key describing the type of change.
+ Examples: "set_item_added", "values_changed"
+ :param change_level: A DiffLevel object describing the objects in question in their
+ before-change and after-change object structure.
+
+ :local_tree: None
+ """
+
+ if not self._skip_this(change_level):
+ change_level.report_type = report_type
+ tree = self.tree if local_tree is None else local_tree
+ tree[report_type].add(change_level)
+
+ def custom_report_result(self, report_type, level, extra_info=None):
+ """
+ Add a detected change to the reference-style result dictionary.
+ report_type will be added to level.
+ (We'll create the text-style report from there later.)
+ :param report_type: A well defined string key describing the type of change.
+ Examples: "set_item_added", "values_changed"
+ :param parent: A DiffLevel object describing the objects in question in their
+ before-change and after-change object structure.
+ :param extra_info: A dict that describe this result
+ :rtype: None
+ """
+
+ if not self._skip_this(level):
+ level.report_type = report_type
+ level.additional[CUSTOM_FIELD] = extra_info
+ self.tree[report_type].add(level)
+
+ @staticmethod
+ def _dict_from_slots(object):
+ def unmangle(attribute):
+ if attribute.startswith('__') and attribute != '__weakref__':
+ return '_{type}{attribute}'.format(
+ type=type(object).__name__,
+ attribute=attribute
+ )
+ return attribute
+
+ all_slots = []
+
+ if isinstance(object, type):
+ mro = object.__mro__ # pragma: no cover. I have not been able to write a test for this case. But we still check for it.
+ else:
+ mro = object.__class__.__mro__
+
+ for type_in_mro in mro:
+ slots = getattr(type_in_mro, '__slots__', None)
+ if slots:
+ if isinstance(slots, strings):
+ all_slots.append(slots)
+ else:
+ all_slots.extend(slots)
+
+ return {i: getattr(object, key) for i in all_slots if hasattr(object, key := unmangle(i))}
+
+ def _diff_enum(self, level, parents_ids=frozenset(), local_tree=None):
+ t1 = detailed__dict__(level.t1, include_keys=ENUM_INCLUDE_KEYS)
+ t2 = detailed__dict__(level.t2, include_keys=ENUM_INCLUDE_KEYS)
+
+ self._diff_dict(
+ level,
+ parents_ids,
+ print_as_attribute=True,
+ override=True,
+ override_t1=t1,
+ override_t2=t2,
+ local_tree=local_tree,
+ )
+
+ def _diff_obj(self, level, parents_ids=frozenset(), is_namedtuple=False, local_tree=None, is_pydantic_object=False):
+ """Difference of 2 objects"""
+ processing_error = False
+ try:
+ if is_namedtuple:
+ t1 = level.t1._asdict()
+ t2 = level.t2._asdict()
+ elif is_pydantic_object:
+ t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables, ignore_keys=model_fields_set)
+ t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables, ignore_keys=model_fields_set)
+ elif all('__dict__' in dir(t) for t in level):
+ t1 = detailed__dict__(level.t1, ignore_private_variables=self.ignore_private_variables)
+ t2 = detailed__dict__(level.t2, ignore_private_variables=self.ignore_private_variables)
+ elif all('__slots__' in dir(t) for t in level):
+ t1 = self._dict_from_slots(level.t1)
+ t2 = self._dict_from_slots(level.t2)
+ else:
+ t1 = {k: v for k, v in getmembers(level.t1) if not callable(v)}
+ t2 = {k: v for k, v in getmembers(level.t2) if not callable(v)}
+ except AttributeError:
+ processing_error = True
+ if processing_error is True:
+ self._report_result('unprocessed', level, local_tree=local_tree)
+ return
+
+ self._diff_dict(
+ level,
+ parents_ids,
+ print_as_attribute=True,
+ override=True,
+ override_t1=t1,
+ override_t2=t2,
+ local_tree=local_tree,
+ )
+
+ def _skip_this(self, level):
+ """
+ Check whether this comparison should be skipped because one of the objects to compare meets exclusion criteria.
+ :rtype: bool
+ """
+ level_path = level.path()
+ skip = False
+ if self.exclude_paths and level_path in self.exclude_paths:
+ skip = True
+ if self.include_paths and level_path != 'root':
+ if level_path not in self.include_paths:
+ skip = True
+ for prefix in self.include_paths:
+ if prefix in level_path or level_path in prefix:
+ skip = False
+ break
+ elif self.exclude_regex_paths and any(
+ [exclude_regex_path.search(level_path) for exclude_regex_path in self.exclude_regex_paths]):
+ skip = True
+ elif self.exclude_types_tuple and \
+ (isinstance(level.t1, self.exclude_types_tuple) or isinstance(level.t2, self.exclude_types_tuple)):
+ skip = True
+ elif self.exclude_obj_callback and \
+ (self.exclude_obj_callback(level.t1, level_path) or self.exclude_obj_callback(level.t2, level_path)):
+ skip = True
+ elif self.exclude_obj_callback_strict and \
+ (self.exclude_obj_callback_strict(level.t1, level_path) and
+ self.exclude_obj_callback_strict(level.t2, level_path)):
+ skip = True
+ elif self.include_obj_callback and level_path != 'root':
+ skip = True
+ if (self.include_obj_callback(level.t1, level_path) or self.include_obj_callback(level.t2, level_path)):
+ skip = False
+ elif self.include_obj_callback_strict and level_path != 'root':
+ skip = True
+ if (self.include_obj_callback_strict(level.t1, level_path) and
+ self.include_obj_callback_strict(level.t2, level_path)):
+ skip = False
+
+ return skip
+
+ def _skip_this_key(self, level, key):
+ # if include_paths is not set, than treet every path as included
+ if self.include_paths is None:
+ return False
+ if "{}['{}']".format(level.path(), key) in self.include_paths:
+ return False
+ if level.path() in self.include_paths:
+ # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"]
+ return False
+ for prefix in self.include_paths:
+ if "{}['{}']".format(level.path(), key) in prefix:
+ # matches as long the prefix is longer than this object key
+ # eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths
+ # level+key root['foo'] matches prefix root['foo']['bar'] from include_paths
+ # level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards
+ return False
+ # check if a higher level is included as a whole (=without any sublevels specified)
+ # matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"]
+ # but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"]
+ up = level.up
+ while up is not None:
+ if up.path() in self.include_paths:
+ return False
+ up = up.up
+ return True
+
+ def _get_clean_to_keys_mapping(self, keys, level):
+ """
+ Get a dictionary of cleaned value of keys to the keys themselves.
+ This is mainly used to transform the keys when the type changes of keys should be ignored.
+
+ TODO: needs also some key conversion for groups of types other than the built-in strings and numbers.
+ """
+ result = dict_()
+ for key in keys:
+ if self.ignore_string_type_changes and isinstance(key, bytes):
+ clean_key = key.decode('utf-8')
+ elif self.use_enum_value and isinstance(key, Enum):
+ clean_key = key.value
+ elif isinstance(key, numbers):
+ type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__
+ clean_key = self.number_to_string(key, significant_digits=self.significant_digits,
+ number_format_notation=self.number_format_notation)
+ clean_key = KEY_TO_VAL_STR.format(type_, clean_key)
+ else:
+ clean_key = key
+ if self.ignore_string_case and isinstance(clean_key, str):
+ clean_key = clean_key.lower()
+ if clean_key in result:
+ logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes'
+ 'or ignore_numeric_type_changes are set to be true.').format(
+ key, result[clean_key], level.path()))
+ else:
+ result[clean_key] = key
+ return result
+
+ def _diff_dict(
+ self,
+ level,
+ parents_ids=frozenset([]),
+ print_as_attribute=False,
+ override=False,
+ override_t1=None,
+ override_t2=None,
+ local_tree=None,
+ ):
+ """Difference of 2 dictionaries"""
+ if override:
+ # for special stuff like custom objects and named tuples we receive preprocessed t1 and t2
+ # but must not spoil the chain (=level) with it
+ t1 = override_t1
+ t2 = override_t2
+ else:
+ t1 = level.t1
+ t2 = level.t2
+
+ if print_as_attribute:
+ item_added_key = "attribute_added"
+ item_removed_key = "attribute_removed"
+ rel_class = AttributeRelationship
+ else:
+ item_added_key = "dictionary_item_added"
+ item_removed_key = "dictionary_item_removed"
+ rel_class = DictRelationship
+
+ if self.ignore_private_variables:
+ t1_keys = SetOrdered([key for key in t1 if not(isinstance(key, str) and key.startswith('__')) and not self._skip_this_key(level, key)])
+ t2_keys = SetOrdered([key for key in t2 if not(isinstance(key, str) and key.startswith('__')) and not self._skip_this_key(level, key)])
+ else:
+ t1_keys = SetOrdered([key for key in t1 if not self._skip_this_key(level, key)])
+ t2_keys = SetOrdered([key for key in t2 if not self._skip_this_key(level, key)])
+ if self.ignore_string_type_changes or self.ignore_numeric_type_changes or self.ignore_string_case:
+ t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level)
+ t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level)
+ t1_keys = SetOrdered(t1_clean_to_keys.keys())
+ t2_keys = SetOrdered(t2_clean_to_keys.keys())
+ else:
+ t1_clean_to_keys = t2_clean_to_keys = None
+
+ t_keys_intersect = t2_keys & t1_keys
+ t_keys_added = t2_keys - t_keys_intersect
+ t_keys_removed = t1_keys - t_keys_intersect
+
+ if self.threshold_to_diff_deeper:
+ if self.exclude_paths:
+ t_keys_union = {f"{level.path()}[{repr(key)}]" for key in (t2_keys | t1_keys)}
+ t_keys_union -= self.exclude_paths
+ t_keys_union_len = len(t_keys_union)
+ else:
+ t_keys_union_len = len(t2_keys | t1_keys)
+ if t_keys_union_len > 1 and len(t_keys_intersect) / t_keys_union_len < self.threshold_to_diff_deeper:
+ self._report_result('values_changed', level, local_tree=local_tree)
+ return
+
+ for key in t_keys_added:
+ if self._count_diff() is StopIteration:
+ return
+
+ key = t2_clean_to_keys[key] if t2_clean_to_keys else key
+ change_level = level.branch_deeper(
+ notpresent,
+ t2[key],
+ child_relationship_class=rel_class,
+ child_relationship_param=key,
+ child_relationship_param2=key,
+ )
+ self._report_result(item_added_key, change_level, local_tree=local_tree)
+
+ for key in t_keys_removed:
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+
+ key = t1_clean_to_keys[key] if t1_clean_to_keys else key
+ change_level = level.branch_deeper(
+ t1[key],
+ notpresent,
+ child_relationship_class=rel_class,
+ child_relationship_param=key,
+ child_relationship_param2=key,
+ )
+ self._report_result(item_removed_key, change_level, local_tree=local_tree)
+
+ for key in t_keys_intersect: # key present in both dicts - need to compare values
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+
+ key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key
+ key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key
+ item_id = id(t1[key1])
+ if parents_ids and item_id in parents_ids:
+ continue
+ parents_ids_added = add_to_frozen_set(parents_ids, item_id)
+
+ # Go one level deeper
+ next_level = level.branch_deeper(
+ t1[key1],
+ t2[key2],
+ child_relationship_class=rel_class,
+ child_relationship_param=key,
+ child_relationship_param2=key,
+ )
+ self._diff(next_level, parents_ids_added, local_tree=local_tree)
+
+ def _diff_set(self, level, local_tree=None):
+ """Difference of sets"""
+ t1_hashtable = self._create_hashtable(level, 't1')
+ t2_hashtable = self._create_hashtable(level, 't2')
+
+ t1_hashes = set(t1_hashtable.keys())
+ t2_hashes = set(t2_hashtable.keys())
+
+ hashes_added = t2_hashes - t1_hashes
+ hashes_removed = t1_hashes - t2_hashes
+
+ items_added = [t2_hashtable[i].item for i in hashes_added]
+ items_removed = [t1_hashtable[i].item for i in hashes_removed]
+
+ for item in items_added:
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+
+ change_level = level.branch_deeper(
+ notpresent, item, child_relationship_class=SetRelationship)
+ self._report_result('set_item_added', change_level, local_tree=local_tree)
+
+ for item in items_removed:
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+
+ change_level = level.branch_deeper(
+ item, notpresent, child_relationship_class=SetRelationship)
+ self._report_result('set_item_removed', change_level, local_tree=local_tree)
+
+ @staticmethod
+ def _iterables_subscriptable(t1, t2):
+ try:
+ if getattr(t1, '__getitem__') and getattr(t2, '__getitem__'):
+ return True
+ else: # pragma: no cover
+ return False # should never happen
+ except AttributeError:
+ return False
+
+ def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None):
+ """Difference of iterables"""
+ if (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order:
+ self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+ else:
+ self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+
+ def _compare_in_order(
+ self, level,
+ t1_from_index=None, t1_to_index=None,
+ t2_from_index=None, t2_to_index=None
+ ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]:
+ """
+ Default compare if `iterable_compare_func` is not provided.
+ This will compare in sequence order.
+ """
+ if t1_from_index is None:
+ return [((i, i), (x, y)) for i, (x, y) in enumerate(
+ zip_longest(
+ level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))]
+ else:
+ t1_chunk = level.t1[t1_from_index:t1_to_index]
+ t2_chunk = level.t2[t2_from_index:t2_to_index]
+ return [((i + t1_from_index, i + t2_from_index), (x, y)) for i, (x, y) in enumerate(
+ zip_longest(
+ t1_chunk, t2_chunk, fillvalue=ListItemRemovedOrAdded))]
+
+ def _get_matching_pairs(
+ self, level,
+ t1_from_index=None, t1_to_index=None,
+ t2_from_index=None, t2_to_index=None
+ ) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]:
+ """
+ Given a level get matching pairs. This returns list of two tuples in the form:
+ [
+ (t1 index, t2 index), (t1 item, t2 item)
+ ]
+
+ This will compare using the passed in `iterable_compare_func` if available.
+ Default it to compare in order
+ """
+
+ if self.iterable_compare_func is None:
+ # Match in order if there is no compare function provided
+ return self._compare_in_order(
+ level,
+ t1_from_index=t1_from_index, t1_to_index=t1_to_index,
+ t2_from_index=t2_from_index, t2_to_index=t2_to_index,
+ )
+ try:
+ matches = []
+ y_matched = set()
+ y_index_matched = set()
+ for i, x in enumerate(level.t1):
+ x_found = False
+ for j, y in enumerate(level.t2):
+
+ if(j in y_index_matched):
+ # This ensures a one-to-one relationship of matches from t1 to t2.
+ # If y this index in t2 has already been matched to another x
+ # it cannot have another match, so just continue.
+ continue
+
+ if(self.iterable_compare_func(x, y, level)):
+ deep_hash = DeepHash(y,
+ hashes=self.hashes,
+ apply_hash=True,
+ **self.deephash_parameters,
+ )
+ y_index_matched.add(j)
+ y_matched.add(deep_hash[y])
+ matches.append(((i, j), (x, y)))
+ x_found = True
+ break
+
+ if(not x_found):
+ matches.append(((i, -1), (x, ListItemRemovedOrAdded)))
+ for j, y in enumerate(level.t2):
+
+ deep_hash = DeepHash(y,
+ hashes=self.hashes,
+ apply_hash=True,
+ **self.deephash_parameters,
+ )
+ if(deep_hash[y] not in y_matched):
+ matches.append(((-1, j), (ListItemRemovedOrAdded, y)))
+ return matches
+ except CannotCompare:
+ return self._compare_in_order(
+ level,
+ t1_from_index=t1_from_index, t1_to_index=t1_to_index,
+ t2_from_index=t2_from_index, t2_to_index=t2_to_index
+ )
+
+ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None):
+ # We're handling both subscriptable and non-subscriptable iterables. Which one is it?
+ subscriptable = self._iterables_subscriptable(level.t1, level.t2)
+ if subscriptable:
+ child_relationship_class = SubscriptableIterableRelationship
+ else:
+ child_relationship_class = NonSubscriptableIterableRelationship
+
+ if (
+ not self.zip_ordered_iterables
+ and isinstance(level.t1, Sequence)
+ and isinstance(level.t2, Sequence)
+ and self._all_values_basic_hashable(level.t1)
+ and self._all_values_basic_hashable(level.t2)
+ and self.iterable_compare_func is None
+ ):
+ local_tree_pass = TreeResult()
+ opcodes_with_values = self._diff_ordered_iterable_by_difflib(
+ level,
+ parents_ids=parents_ids,
+ _original_type=_original_type,
+ child_relationship_class=child_relationship_class,
+ local_tree=local_tree_pass,
+ )
+ # Sometimes DeepDiff's old iterable diff does a better job than DeepDiff
+ if len(local_tree_pass) > 1:
+ local_tree_pass2 = TreeResult()
+ self._diff_by_forming_pairs_and_comparing_one_by_one(
+ level,
+ parents_ids=parents_ids,
+ _original_type=_original_type,
+ child_relationship_class=child_relationship_class,
+ local_tree=local_tree_pass2,
+ )
+ if len(local_tree_pass) >= len(local_tree_pass2):
+ local_tree_pass = local_tree_pass2
+ else:
+ self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values
+ for report_type, levels in local_tree_pass.items():
+ if levels:
+ self.tree[report_type] |= levels
+ else:
+ self._diff_by_forming_pairs_and_comparing_one_by_one(
+ level,
+ parents_ids=parents_ids,
+ _original_type=_original_type,
+ child_relationship_class=child_relationship_class,
+ local_tree=local_tree,
+ )
+
+ def _all_values_basic_hashable(self, iterable):
+ """
+ Are all items basic hashable types?
+ Or there are custom types too?
+ """
+
+ # We don't want to exhaust a generator
+ if isinstance(iterable, types.GeneratorType):
+ return False
+ for item in iterable:
+ if not isinstance(item, basic_types):
+ return False
+ return True
+
+ def _diff_by_forming_pairs_and_comparing_one_by_one(
+ self, level, local_tree, parents_ids=frozenset(),
+ _original_type=None, child_relationship_class=None,
+ t1_from_index=None, t1_to_index=None,
+ t2_from_index=None, t2_to_index=None,
+ ):
+ for (i, j), (x, y) in self._get_matching_pairs(
+ level,
+ t1_from_index=t1_from_index, t1_to_index=t1_to_index,
+ t2_from_index=t2_from_index, t2_to_index=t2_to_index
+ ):
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+
+ reference_param1 = i
+ reference_param2 = j
+ if y is ListItemRemovedOrAdded: # item removed completely
+ change_level = level.branch_deeper(
+ x,
+ notpresent,
+ child_relationship_class=child_relationship_class,
+ child_relationship_param=reference_param1,
+ child_relationship_param2=reference_param2,
+ )
+ self._report_result('iterable_item_removed', change_level, local_tree=local_tree)
+
+ elif x is ListItemRemovedOrAdded: # new item added
+ change_level = level.branch_deeper(
+ notpresent,
+ y,
+ child_relationship_class=child_relationship_class,
+ child_relationship_param=reference_param1,
+ child_relationship_param2=reference_param2,
+ )
+ self._report_result('iterable_item_added', change_level, local_tree=local_tree)
+
+ else: # check if item value has changed
+ if (i != j and ((x == y) or self.iterable_compare_func)):
+ # Item moved
+ change_level = level.branch_deeper(
+ x,
+ y,
+ child_relationship_class=child_relationship_class,
+ child_relationship_param=reference_param1,
+ child_relationship_param2=reference_param2
+ )
+ self._report_result('iterable_item_moved', change_level, local_tree=local_tree)
+
+ if self.iterable_compare_func:
+ # Intentionally setting j as the first child relationship param in cases of a moved item.
+ # If the item was moved using an iterable_compare_func then we want to make sure that the index
+ # is relative to t2.
+ reference_param1 = j
+ reference_param2 = i
+ else:
+ continue
+
+ item_id = id(x)
+ if parents_ids and item_id in parents_ids:
+ continue
+ parents_ids_added = add_to_frozen_set(parents_ids, item_id)
+
+ # Go one level deeper
+ next_level = level.branch_deeper(
+ x,
+ y,
+ child_relationship_class=child_relationship_class,
+ child_relationship_param=reference_param1,
+ child_relationship_param2=reference_param2
+ )
+ self._diff(next_level, parents_ids_added, local_tree=local_tree)
+
+ def _diff_ordered_iterable_by_difflib(
+ self, level, local_tree, parents_ids=frozenset(), _original_type=None, child_relationship_class=None,
+ ):
+
+ seq = difflib.SequenceMatcher(isjunk=None, a=level.t1, b=level.t2, autojunk=False)
+
+ opcodes = seq.get_opcodes()
+ opcodes_with_values = []
+
+ # TODO: this logic should be revisted so we detect reverse operations
+ # like when a replacement happens at index X and a reverse replacement happens at index Y
+ # in those cases we have a "iterable_item_moved" operation.
+ for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes:
+ if tag == 'equal':
+ opcodes_with_values.append(Opcode(
+ tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index,
+ ))
+ continue
+ # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format(
+ # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index]))
+
+ opcodes_with_values.append(Opcode(
+ tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index,
+ old_values = level.t1[t1_from_index: t1_to_index],
+ new_values = level.t2[t2_from_index: t2_to_index],
+ ))
+
+ if tag == 'replace':
+ self._diff_by_forming_pairs_and_comparing_one_by_one(
+ level, local_tree=local_tree, parents_ids=parents_ids,
+ _original_type=_original_type, child_relationship_class=child_relationship_class,
+ t1_from_index=t1_from_index, t1_to_index=t1_to_index,
+ t2_from_index=t2_from_index, t2_to_index=t2_to_index,
+ )
+ elif tag == 'delete':
+ for index, x in enumerate(level.t1[t1_from_index:t1_to_index]):
+ change_level = level.branch_deeper(
+ x,
+ notpresent,
+ child_relationship_class=child_relationship_class,
+ child_relationship_param=index + t1_from_index,
+ child_relationship_param2=index + t1_from_index,
+ )
+ self._report_result('iterable_item_removed', change_level, local_tree=local_tree)
+ elif tag == 'insert':
+ for index, y in enumerate(level.t2[t2_from_index:t2_to_index]):
+ change_level = level.branch_deeper(
+ notpresent,
+ y,
+ child_relationship_class=child_relationship_class,
+ child_relationship_param=index + t2_from_index,
+ child_relationship_param2=index + t2_from_index,
+ )
+ self._report_result('iterable_item_added', change_level, local_tree=local_tree)
+ return opcodes_with_values
+
+
+ def _diff_str(self, level, local_tree=None):
+ """Compare strings"""
+ if self.ignore_string_case:
+ level.t1 = level.t1.lower()
+ level.t2 = level.t2.lower()
+
+ if type(level.t1) == type(level.t2) and level.t1 == level.t2: # NOQA
+ return
+
+ # do we add a diff for convenience?
+ do_diff = True
+ t1_str = level.t1
+ t2_str = level.t2
+
+ if isinstance(level.t1, bytes_type):
+ try:
+ t1_str = level.t1.decode('ascii')
+ except UnicodeDecodeError:
+ do_diff = False
+
+ if isinstance(level.t2, bytes_type):
+ try:
+ t2_str = level.t2.decode('ascii')
+ except UnicodeDecodeError:
+ do_diff = False
+
+ if isinstance(level.t1, Enum):
+ t1_str = level.t1.value
+
+ if isinstance(level.t2, Enum):
+ t2_str = level.t2.value
+
+ if t1_str == t2_str:
+ return
+
+ if do_diff:
+ if '\n' in t1_str or isinstance(t2_str, str) and '\n' in t2_str:
+ diff = difflib.unified_diff(
+ t1_str.splitlines(), t2_str.splitlines(), lineterm='')
+ diff = list(diff)
+ if diff:
+ level.additional['diff'] = '\n'.join(diff)
+
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_tuple(self, level, parents_ids, local_tree=None):
+ # Checking to see if it has _fields. Which probably means it is a named
+ # tuple.
+ try:
+ level.t1._asdict
+ # It must be a normal tuple
+ except AttributeError:
+ self._diff_iterable(level, parents_ids, local_tree=local_tree)
+ # We assume it is a namedtuple then
+ else:
+ self._diff_obj(level, parents_ids, is_namedtuple=True, local_tree=local_tree)
+
+ def _add_hash(self, hashes, item_hash, item, i):
+ if item_hash in hashes:
+ hashes[item_hash].indexes.append(i)
+ else:
+ hashes[item_hash] = IndexedHash(indexes=[i], item=item)
+
+ def _create_hashtable(self, level, t):
+ """Create hashtable of {item_hash: (indexes, item)}"""
+ obj = getattr(level, t)
+
+ local_hashes = dict_()
+ for (i, item) in enumerate(obj):
+ try:
+ parent = "{}[{}]".format(level.path(), i)
+ # Note: in the DeepDiff we only calculate the hash of items when we have to.
+ # So self.hashes does not include hashes of all objects in t1 and t2.
+ # It only includes the ones needed when comparing iterables.
+ # The self.hashes dictionary gets shared between different runs of DeepHash
+ # So that any object that is already calculated to have a hash is not re-calculated.
+ deep_hash = DeepHash(
+ item,
+ hashes=self.hashes,
+ parent=parent,
+ apply_hash=True,
+ **self.deephash_parameters,
+ )
+ except UnicodeDecodeError as err:
+ err.reason = f"Can not produce a hash for {level.path()}: {err.reason}"
+ raise
+ except NotImplementedError:
+ raise
+ # except Exception as e: # pragma: no cover
+ # logger.error("Can not produce a hash for %s."
+ # "Not counting this object.\n %s" %
+ # (level.path(), e))
+ else:
+ try:
+ item_hash = deep_hash[item]
+ except KeyError:
+ pass
+ else:
+ if item_hash is unprocessed: # pragma: no cover
+ self.log_err("Item %s was not processed while hashing "
+ "thus not counting this object." %
+ level.path())
+ else:
+ self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i)
+
+ # Also we hash the iterables themselves too so that we can later create cache keys from those hashes.
+ DeepHash(
+ obj,
+ hashes=self.hashes,
+ parent=level.path(),
+ apply_hash=True,
+ **self.deephash_parameters,
+ )
+ return local_hashes
+
+ @staticmethod
+ @lru_cache(maxsize=2028)
+ def _get_distance_cache_key(added_hash, removed_hash):
+ key1, key2 = (added_hash, removed_hash) if added_hash > removed_hash else (removed_hash, added_hash)
+ if isinstance(key1, int):
+ # If the hash function produces integers we convert them to hex values.
+ # This was used when the default hash function was Murmur3 128bit which produces integers.
+ key1 = hex(key1).encode('utf-8')
+ key2 = hex(key2).encode('utf-8')
+ elif isinstance(key1, str):
+ key1 = key1.encode('utf-8')
+ key2 = key2.encode('utf-8')
+ return key1 + b'--' + key2 + b'dc'
+
+ def _get_rough_distance_of_hashed_objs(
+ self, added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type=None):
+ # We need the rough distance between the 2 objects to see if they qualify to be pairs or not
+ _distance = cache_key = None
+ if self._stats[DISTANCE_CACHE_ENABLED]:
+ cache_key = self._get_distance_cache_key(added_hash, removed_hash)
+ if cache_key in self._distance_cache:
+ self._stats[DISTANCE_CACHE_HIT_COUNT] += 1
+ _distance = self._distance_cache.get(cache_key)
+ if _distance is None:
+ # We can only cache the rough distance and not the actual diff result for reuse.
+ # The reason is that we have modified the parameters explicitly so they are different and can't
+ # be used for diff reporting
+ diff = DeepDiff(
+ removed_hash_obj.item, added_hash_obj.item,
+ _parameters=self._parameters,
+ _shared_parameters=self._shared_parameters,
+ view=DELTA_VIEW,
+ _original_type=_original_type,
+ iterable_compare_func=self.iterable_compare_func,
+ )
+ _distance = diff._get_rough_distance()
+ if cache_key and self._stats[DISTANCE_CACHE_ENABLED]:
+ self._distance_cache.set(cache_key, value=_distance)
+ return _distance
+
+ def _get_most_in_common_pairs_in_iterables(
+ self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type):
+ """
+ Get the closest pairs between items that are removed and items that are added.
+
+ returns a dictionary of hashes that are closest to each other.
+ The dictionary is going to be symmetrical so any key will be a value too and otherwise.
+
+ Note that due to the current reporting structure in DeepDiff, we don't compare an item that
+ was added to an item that is in both t1 and t2.
+
+ For example
+
+ [{1, 2}, {4, 5, 6}]
+ [{1, 2}, {1, 2, 3}]
+
+ is only compared between {4, 5, 6} and {1, 2, 3} even though technically {1, 2, 3} is
+ just one item different than {1, 2}
+
+ Perhaps in future we can have a report key that is item duplicated and modified instead of just added.
+ """
+ cache_key = None
+ if self._stats[DISTANCE_CACHE_ENABLED]:
+ cache_key = combine_hashes_lists(items=[hashes_added, hashes_removed], prefix='pairs_cache')
+ if cache_key in self._distance_cache:
+ return self._distance_cache.get(cache_key).copy()
+
+ # A dictionary of hashes to distances and each distance to an ordered set of hashes.
+ # It tells us about the distance of each object from other objects.
+ # And the objects with the same distances are grouped together in an ordered set.
+ # It also includes a "max" key that is just the value of the biggest current distance in the
+ # most_in_common_pairs dictionary.
+ def defaultdict_orderedset():
+ return defaultdict(SetOrdered)
+ most_in_common_pairs = defaultdict(defaultdict_orderedset)
+ pairs = dict_()
+
+ pre_calced_distances = None
+ if hashes_added and hashes_removed and np and len(hashes_added) > 1 and len(hashes_removed) > 1:
+ # pre-calculates distances ONLY for 1D arrays whether an _original_type
+ # was explicitly passed or a homogeneous array is detected.
+ # Numpy is needed for this optimization.
+ pre_calced_distances = self._precalculate_numpy_arrays_distance(
+ hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)
+
+ if hashes_added and hashes_removed \
+ and self.iterable_compare_func \
+ and len(hashes_added) > 0 and len(hashes_removed) > 0:
+ pre_calced_distances = self._precalculate_distance_by_custom_compare_func(
+ hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)
+
+ for added_hash in hashes_added:
+ for removed_hash in hashes_removed:
+ added_hash_obj = t2_hashtable[added_hash]
+ removed_hash_obj = t1_hashtable[removed_hash]
+
+ # Loop is detected
+ if id(removed_hash_obj.item) in parents_ids:
+ continue
+
+ _distance = None
+ if pre_calced_distances:
+ _distance = pre_calced_distances.get("{}--{}".format(added_hash, removed_hash))
+ if _distance is None:
+ _distance = self._get_rough_distance_of_hashed_objs(
+ added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type)
+ # Left for future debugging
+ # print(f'{Fore.RED}distance of {added_hash_obj.item} and {removed_hash_obj.item}: {_distance}{Style.RESET_ALL}')
+ # Discard potential pairs that are too far.
+ if _distance >= self.cutoff_distance_for_pairs:
+ continue
+ pairs_of_item = most_in_common_pairs[added_hash]
+ pairs_of_item[_distance].add(removed_hash)
+ used_to_hashes = set()
+
+ distances_to_from_hashes = defaultdict(SetOrdered)
+ for from_hash, distances_to_to_hashes in most_in_common_pairs.items():
+ # del distances_to_to_hashes['max']
+ for dist in distances_to_to_hashes:
+ distances_to_from_hashes[dist].add(from_hash)
+
+ for dist in sorted(distances_to_from_hashes.keys()):
+ from_hashes = distances_to_from_hashes[dist]
+ while from_hashes:
+ from_hash = from_hashes.pop()
+ if from_hash not in used_to_hashes:
+ to_hashes = most_in_common_pairs[from_hash][dist]
+ while to_hashes:
+ to_hash = to_hashes.pop()
+ if to_hash not in used_to_hashes:
+ used_to_hashes.add(from_hash)
+ used_to_hashes.add(to_hash)
+ # Left for future debugging:
+ # print(f'{bcolors.FAIL}Adding {t2_hashtable[from_hash].item} as a pairs of {t1_hashtable[to_hash].item} with distance of {dist}{bcolors.ENDC}')
+ pairs[from_hash] = to_hash
+
+ inverse_pairs = {v: k for k, v in pairs.items()}
+ pairs.update(inverse_pairs)
+ if cache_key and self._stats[DISTANCE_CACHE_ENABLED]:
+ self._distance_cache.set(cache_key, value=pairs)
+ return pairs.copy()
+
+ def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None, local_tree=None):
+ """Diff of hashable or unhashable iterables. Only used when ignoring the order."""
+
+ full_t1_hashtable = self._create_hashtable(level, 't1')
+ full_t2_hashtable = self._create_hashtable(level, 't2')
+ t1_hashes = SetOrdered(full_t1_hashtable.keys())
+ t2_hashes = SetOrdered(full_t2_hashtable.keys())
+ hashes_added = t2_hashes - t1_hashes
+ hashes_removed = t1_hashes - t2_hashes
+
+ # Deciding whether to calculate pairs or not.
+ if (len(hashes_added) + len(hashes_removed)) / (len(full_t1_hashtable) + len(full_t2_hashtable) + 1) > self.cutoff_intersection_for_pairs:
+ get_pairs = False
+ else:
+ get_pairs = True
+
+ # reduce the size of hashtables
+ if self.report_repetition:
+ t1_hashtable = full_t1_hashtable
+ t2_hashtable = full_t2_hashtable
+ else:
+ t1_hashtable = {k: v for k, v in full_t1_hashtable.items() if k in hashes_removed}
+ t2_hashtable = {k: v for k, v in full_t2_hashtable.items() if k in hashes_added}
+ if self._stats[PASSES_COUNT] < self.max_passes and get_pairs:
+ self._stats[PASSES_COUNT] += 1
+ pairs = self._get_most_in_common_pairs_in_iterables(
+ hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type)
+ elif get_pairs:
+ if not self._stats[MAX_PASS_LIMIT_REACHED]:
+ self._stats[MAX_PASS_LIMIT_REACHED] = True
+ logger.warning(MAX_PASSES_REACHED_MSG.format(self.max_passes))
+ pairs = dict_()
+ else:
+ pairs = dict_()
+
+ def get_other_pair(hash_value, in_t1=True):
+ """
+ Gets the other paired indexed hash item to the hash_value in the pairs dictionary
+ in_t1: are we looking for the other pair in t1 or t2?
+ """
+ if in_t1:
+ hashtable = t1_hashtable
+ the_other_hashes = hashes_removed
+ else:
+ hashtable = t2_hashtable
+ the_other_hashes = hashes_added
+ other = pairs.pop(hash_value, notpresent)
+ if other is notpresent:
+ other = notpresent_indexed
+ else:
+ # The pairs are symmetrical.
+ # removing the other direction of pair
+ # so it does not get used.
+ del pairs[other]
+ the_other_hashes.remove(other)
+ other = hashtable[other]
+ return other
+
+ if self.report_repetition:
+ for hash_value in hashes_added:
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition (when report_repetition=False).
+ other = get_other_pair(hash_value)
+ item_id = id(other.item)
+ indexes = t2_hashtable[hash_value].indexes if other.item is notpresent else other.indexes
+ # When we report repetitions, we want the child_relationship_param2 only if there is no repetition.
+ # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example).
+ # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly.
+ if len(t2_hashtable[hash_value].indexes) == 1:
+ index2 = t2_hashtable[hash_value].indexes[0]
+ else:
+ index2 = None
+ for i in indexes:
+ change_level = level.branch_deeper(
+ other.item,
+ t2_hashtable[hash_value].item,
+ child_relationship_class=SubscriptableIterableRelationship,
+ child_relationship_param=i,
+ child_relationship_param2=index2,
+ )
+ if other.item is notpresent:
+ self._report_result('iterable_item_added', change_level, local_tree=local_tree)
+ else:
+ parents_ids_added = add_to_frozen_set(parents_ids, item_id)
+ self._diff(change_level, parents_ids_added, local_tree=local_tree)
+ for hash_value in hashes_removed:
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+ other = get_other_pair(hash_value, in_t1=False)
+ item_id = id(other.item)
+ # When we report repetitions, we want the child_relationship_param2 only if there is no repetition.
+ # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example).
+ # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly.
+ if other.item is notpresent or len(other.indexes > 1):
+ index2 = None
+ else:
+ index2 = other.indexes[0]
+ for i in t1_hashtable[hash_value].indexes:
+ change_level = level.branch_deeper(
+ t1_hashtable[hash_value].item,
+ other.item,
+ child_relationship_class=SubscriptableIterableRelationship,
+ child_relationship_param=i,
+ child_relationship_param2=index2,
+ )
+ if other.item is notpresent:
+ self._report_result('iterable_item_removed', change_level, local_tree=local_tree)
+ else:
+ # I was not able to make a test case for the following 2 lines since the cases end up
+ # getting resolved above in the hashes_added calcs. However I am leaving these 2 lines
+ # in case things change in future.
+ parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover.
+ self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover.
+
+ items_intersect = t2_hashes.intersection(t1_hashes)
+
+ for hash_value in items_intersect:
+ t1_indexes = t1_hashtable[hash_value].indexes
+ t2_indexes = t2_hashtable[hash_value].indexes
+ t1_indexes_len = len(t1_indexes)
+ t2_indexes_len = len(t2_indexes)
+ if t1_indexes_len != t2_indexes_len: # this is a repetition change!
+ # create "change" entry, keep current level untouched to handle further changes
+ repetition_change_level = level.branch_deeper(
+ t1_hashtable[hash_value].item,
+ t2_hashtable[hash_value].item, # nb: those are equal!
+ child_relationship_class=SubscriptableIterableRelationship,
+ child_relationship_param=t1_hashtable[hash_value]
+ .indexes[0])
+ repetition_change_level.additional['repetition'] = RemapDict(
+ old_repeat=t1_indexes_len,
+ new_repeat=t2_indexes_len,
+ old_indexes=t1_indexes,
+ new_indexes=t2_indexes)
+ self._report_result('repetition_change',
+ repetition_change_level, local_tree=local_tree)
+
+ else:
+ for hash_value in hashes_added:
+ if self._count_diff() is StopIteration:
+ return
+ other = get_other_pair(hash_value)
+ item_id = id(other.item)
+ index = t2_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0]
+ index2 = t2_hashtable[hash_value].indexes[0]
+ change_level = level.branch_deeper(
+ other.item,
+ t2_hashtable[hash_value].item,
+ child_relationship_class=SubscriptableIterableRelationship,
+ child_relationship_param=index,
+ child_relationship_param2=index2,
+ )
+ if other.item is notpresent:
+ self._report_result('iterable_item_added', change_level, local_tree=local_tree)
+ else:
+ parents_ids_added = add_to_frozen_set(parents_ids, item_id)
+ self._diff(change_level, parents_ids_added, local_tree=local_tree)
+
+ for hash_value in hashes_removed:
+ if self._count_diff() is StopIteration:
+ return # pragma: no cover. This is already covered for addition.
+ other = get_other_pair(hash_value, in_t1=False)
+ item_id = id(other.item)
+ index = t1_hashtable[hash_value].indexes[0]
+ index2 = t1_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0]
+ change_level = level.branch_deeper(
+ t1_hashtable[hash_value].item,
+ other.item,
+ child_relationship_class=SubscriptableIterableRelationship,
+ child_relationship_param=index,
+ child_relationship_param2=index2,
+ )
+ if other.item is notpresent:
+ self._report_result('iterable_item_removed', change_level, local_tree=local_tree)
+ else:
+ # Just like the case when report_repetition = True, these lines never run currently.
+ # However they will stay here in case things change in future.
+ parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover.
+ self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover.
+
+ def _diff_booleans(self, level, local_tree=None):
+ if level.t1 != level.t2:
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_numbers(self, level, local_tree=None, report_type_change=True):
+ """Diff Numbers"""
+ if report_type_change:
+ t1_type = "number" if self.ignore_numeric_type_changes else level.t1.__class__.__name__
+ t2_type = "number" if self.ignore_numeric_type_changes else level.t2.__class__.__name__
+ else:
+ t1_type = t2_type = ''
+
+ if self.use_log_scale:
+ if not logarithmic_similarity(level.t1, level.t2, threshold=self.log_scale_similarity_threshold):
+ self._report_result('values_changed', level, local_tree=local_tree)
+ elif self.math_epsilon is not None:
+ if not is_close(level.t1, level.t2, abs_tol=self.math_epsilon):
+ self._report_result('values_changed', level, local_tree=local_tree)
+ elif self.significant_digits is None:
+ if level.t1 != level.t2:
+ self._report_result('values_changed', level, local_tree=local_tree)
+ else:
+ # Bernhard10: I use string formatting for comparison, to be consistent with usecases where
+ # data is read from files that were previously written from python and
+ # to be consistent with on-screen representation of numbers.
+ # Other options would be abs(t1-t2)<10**-self.significant_digits
+ # or math.is_close (python3.5+)
+ # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001
+ # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114
+ # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number)
+ t1_s = self.number_to_string(level.t1,
+ significant_digits=self.significant_digits,
+ number_format_notation=self.number_format_notation)
+ t2_s = self.number_to_string(level.t2,
+ significant_digits=self.significant_digits,
+ number_format_notation=self.number_format_notation)
+
+ t1_s = KEY_TO_VAL_STR.format(t1_type, t1_s)
+ t2_s = KEY_TO_VAL_STR.format(t2_type, t2_s)
+ if t1_s != t2_s:
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_ipranges(self, level, local_tree=None):
+ """Diff IP ranges"""
+ if str(level.t1) != str(level.t2):
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_datetime(self, level, local_tree=None):
+ """Diff DateTimes"""
+ level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
+ level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone)
+
+ if level.t1 != level.t2:
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_time(self, level, local_tree=None):
+ """Diff DateTimes"""
+ if self.truncate_datetime:
+ level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
+ level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone)
+
+ if level.t1 != level.t2:
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_uuids(self, level, local_tree=None):
+ """Diff UUIDs"""
+ if level.t1.int != level.t2.int:
+ self._report_result('values_changed', level, local_tree=local_tree)
+
+ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None):
+ """Diff numpy arrays"""
+ if level.path() not in self._numpy_paths:
+ self._numpy_paths[level.path()] = get_type(level.t2).__name__
+ if np is None:
+ # This line should never be run. If it is ever called means the type check detected a numpy array
+ # which means numpy module needs to be available. So np can't be None.
+ raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover
+
+ if (self.ignore_order_func and not self.ignore_order_func(level)) or not self.ignore_order:
+ # fast checks
+ if self.significant_digits is None:
+ if np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality):
+ return # all good
+ else:
+ try:
+ np.testing.assert_almost_equal(level.t1, level.t2, decimal=self.significant_digits)
+ except TypeError:
+ np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality)
+ except AssertionError:
+ pass # do detailed checking below
+ else:
+ return # all good
+
+ # compare array meta-data
+ _original_type = level.t1.dtype
+ if level.t1.shape != level.t2.shape:
+ # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier.
+ # They will be converted back to Numpy at their final dimension.
+ level.t1 = level.t1.tolist()
+ level.t2 = level.t2.tolist()
+ self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+ else:
+ # metadata same -- the difference is in the content
+ shape = level.t1.shape
+ dimensions = len(shape)
+ if dimensions == 1:
+ self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+ elif (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order:
+ # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier.
+ # They will be converted back to Numpy at their final dimension.
+ level.t1 = level.t1.tolist()
+ level.t2 = level.t2.tolist()
+ self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+ else:
+ for (t1_path, t1_row), (t2_path, t2_row) in zip(
+ get_numpy_ndarray_rows(level.t1, shape),
+ get_numpy_ndarray_rows(level.t2, shape)):
+
+ new_level = level.branch_deeper(
+ t1_row,
+ t2_row,
+ child_relationship_class=NumpyArrayRelationship,
+ child_relationship_param=t1_path,
+ child_relationship_param2=t2_path,
+ )
+
+ self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+
+ def _diff_types(self, level, local_tree=None):
+ """Diff types"""
+ level.report_type = 'type_changes'
+ self._report_result('type_changes', level, local_tree=local_tree)
+
+ def _count_diff(self):
+ if (self.max_diffs is not None and self._stats[DIFF_COUNT] > self.max_diffs):
+ if not self._stats[MAX_DIFF_LIMIT_REACHED]:
+ self._stats[MAX_DIFF_LIMIT_REACHED] = True
+ logger.warning(MAX_DIFFS_REACHED_MSG.format(self.max_diffs))
+ return StopIteration
+ self._stats[DIFF_COUNT] += 1
+ if self.cache_size and self.cache_tuning_sample_size:
+ self._auto_tune_cache()
+
+ def _auto_tune_cache(self):
+ take_sample = (self._stats[DIFF_COUNT] % self.cache_tuning_sample_size == 0)
+ if self.cache_tuning_sample_size:
+ if self._stats[DISTANCE_CACHE_ENABLED]:
+ if take_sample:
+ self._auto_off_cache()
+ # Turn on the cache once in a while
+ elif self._stats[DIFF_COUNT] % self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] == 0:
+ self.progress_logger('Re-enabling the distance and level caches.')
+ # decreasing the sampling frequency
+ self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] *= 10
+ self._stats[DISTANCE_CACHE_ENABLED] = True
+ if take_sample:
+ for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT):
+ self._stats[key] = self._stats[key[9:]]
+
+ def _auto_off_cache(self):
+ """
+ Auto adjust the cache based on the usage
+ """
+ if self._stats[DISTANCE_CACHE_ENABLED]:
+ angle = (self._stats[DISTANCE_CACHE_HIT_COUNT] - self._stats['PREVIOUS {}'.format(DISTANCE_CACHE_HIT_COUNT)]) / (self._stats[DIFF_COUNT] - self._stats[PREVIOUS_DIFF_COUNT])
+ if angle < self.CACHE_AUTO_ADJUST_THRESHOLD:
+ self._stats[DISTANCE_CACHE_ENABLED] = False
+ self.progress_logger('Due to minimal cache hits, {} is disabled.'.format('distance cache'))
+
+ def _use_custom_operator(self, level):
+ """
+ For each level we check all custom operators.
+ If any one of them was a match for the level, we run the diff of the operator.
+ If the operator returned True, the operator must have decided these objects should not
+ be compared anymore. It might have already reported their results.
+ In that case the report will appear in the final results of this diff.
+ Otherwise basically the 2 objects in the level are being omitted from the results.
+ """
+
+ for operator in self.custom_operators:
+ if operator.match(level):
+ prevent_default = operator.give_up_diffing(level=level, diff_instance=self)
+ if prevent_default:
+ return True
+
+ return False
+
+ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None):
+ """
+ The main diff method
+
+ **parameters**
+
+ level: the tree level or tree node
+ parents_ids: the ids of all the parent objects in the tree from the current node.
+ _original_type: If the objects had an original type that was different than what currently exists in the level.t1 and t2
+ """
+ if self._count_diff() is StopIteration:
+ return
+
+ if self._use_custom_operator(level):
+ return
+
+ if level.t1 is level.t2:
+ return
+
+ if self._skip_this(level):
+ return
+
+ report_type_change = True
+ if get_type(level.t1) != get_type(level.t2):
+ for type_group in self.ignore_type_in_groups:
+ if self.type_check_func(level.t1, type_group) and self.type_check_func(level.t2, type_group):
+ report_type_change = False
+ break
+ if self.use_enum_value and isinstance(level.t1, Enum):
+ level.t1 = level.t1.value
+ report_type_change = False
+ if self.use_enum_value and isinstance(level.t2, Enum):
+ level.t2 = level.t2.value
+ report_type_change = False
+ if report_type_change:
+ self._diff_types(level, local_tree=local_tree)
+ return
+ # This is an edge case where t1=None or t2=None and None is in the ignore type group.
+ if level.t1 is None or level.t2 is None:
+ self._report_result('values_changed', level, local_tree=local_tree)
+ return
+
+ if self.ignore_nan_inequality and isinstance(level.t1, (float, np_floating)) and str(level.t1) == str(level.t2) == 'nan':
+ return
+
+ if isinstance(level.t1, booleans):
+ self._diff_booleans(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, strings):
+ self._diff_str(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, datetime.datetime):
+ self._diff_datetime(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, ipranges):
+ self._diff_ipranges(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, (datetime.date, datetime.timedelta, datetime.time)):
+ self._diff_time(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, uuids):
+ self._diff_uuids(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, numbers):
+ self._diff_numbers(level, local_tree=local_tree, report_type_change=report_type_change)
+
+ elif isinstance(level.t1, Mapping):
+ self._diff_dict(level, parents_ids, local_tree=local_tree)
+
+ elif isinstance(level.t1, tuple):
+ self._diff_tuple(level, parents_ids, local_tree=local_tree)
+
+ elif isinstance(level.t1, (set, frozenset, SetOrdered)):
+ self._diff_set(level, local_tree=local_tree)
+
+ elif isinstance(level.t1, np_ndarray):
+ self._diff_numpy_array(level, parents_ids, local_tree=local_tree)
+
+ elif isinstance(level.t1, PydanticBaseModel):
+ self._diff_obj(level, parents_ids, local_tree=local_tree, is_pydantic_object=True)
+
+ elif isinstance(level.t1, Iterable):
+ self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
+
+ elif isinstance(level.t1, Enum):
+ self._diff_enum(level, parents_ids, local_tree=local_tree)
+
+ else:
+ self._diff_obj(level, parents_ids)
+
+ def _get_view_results(self, view):
+ """
+ Get the results based on the view
+ """
+ result = self.tree
+ if not self.report_repetition: # and self.is_root:
+ result.mutual_add_removes_to_become_value_changes()
+ if view == TREE_VIEW:
+ pass
+ elif view == TEXT_VIEW:
+ result = TextResult(tree_results=self.tree, verbose_level=self.verbose_level)
+ result.remove_empty_keys()
+ elif view == DELTA_VIEW:
+ result = self._to_delta_dict(report_repetition_required=False)
+ else:
+ raise ValueError(INVALID_VIEW_MSG.format(view))
+ return result
+
+ @staticmethod
+ def _get_key_for_group_by(row, group_by, item_name):
+ try:
+ return row.pop(group_by)
+ except KeyError:
+ logger.error("Unable to group {} by {}. The key is missing in {}".format(item_name, group_by, row))
+ raise
+
+ def _group_iterable_to_dict(self, item, group_by, item_name):
+ """
+ Convert a list of dictionaries into a dictionary of dictionaries
+ where the key is the value of the group_by key in each dictionary.
+ """
+ group_by_level2 = None
+ if isinstance(group_by, (list, tuple)):
+ group_by_level1 = group_by[0]
+ if len(group_by) > 1:
+ group_by_level2 = group_by[1]
+ else:
+ group_by_level1 = group_by
+ if isinstance(item, Iterable) and not isinstance(item, Mapping):
+ result = {}
+ item_copy = deepcopy(item)
+ for row in item_copy:
+ if isinstance(row, Mapping):
+ key1 = self._get_key_for_group_by(row, group_by_level1, item_name)
+ if group_by_level2:
+ key2 = self._get_key_for_group_by(row, group_by_level2, item_name)
+ if key1 not in result:
+ result[key1] = {}
+ if self.group_by_sort_key:
+ if key2 not in result[key1]:
+ result[key1][key2] = []
+ result_key1_key2 = result[key1][key2]
+ if row not in result_key1_key2:
+ result_key1_key2.append(row)
+ else:
+ result[key1][key2] = row
+ else:
+ if self.group_by_sort_key:
+ if key1 not in result:
+ result[key1] = []
+ if row not in result[key1]:
+ result[key1].append(row)
+ else:
+ result[key1] = row
+ else:
+ msg = "Unable to group {} by {} since the item {} is not a dictionary.".format(item_name, group_by_level1, row)
+ logger.error(msg)
+ raise ValueError(msg)
+ if self.group_by_sort_key:
+ if group_by_level2:
+ for key1, row1 in result.items():
+ for key2, row in row1.items():
+ row.sort(key=self.group_by_sort_key)
+ else:
+ for key, row in result.items():
+ row.sort(key=self.group_by_sort_key)
+ return result
+ msg = "Unable to group {} by {}".format(item_name, group_by)
+ logger.error(msg)
+ raise ValueError(msg)
+
+ def get_stats(self):
+ """
+ Get some stats on internals of the DeepDiff run.
+ """
+ return self._stats
+
+ @property
+ def affected_paths(self):
+ """
+ Get the list of paths that were affected.
+ Whether a value was changed or they were added or removed.
+
+ Example
+ >>> t1 = {1: 1, 2: 2, 3: [3], 4: 4}
+ >>> t2 = {1: 1, 2: 4, 3: [3, 4], 5: 5, 6: 6}
+ >>> ddiff = DeepDiff(t1, t2)
+ >>> ddiff
+ >>> pprint(ddiff, indent=4)
+ { 'dictionary_item_added': [root[5], root[6]],
+ 'dictionary_item_removed': [root[4]],
+ 'iterable_item_added': {'root[3][1]': 4},
+ 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}}
+ >>> ddiff.affected_paths
+ SetOrdered(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
+ >>> ddiff.affected_root_keys
+ SetOrdered([3, 4, 5, 6, 2])
+
+ """
+ result = SetOrdered()
+ for key in REPORT_KEYS:
+ value = self.get(key)
+ if value:
+ if isinstance(value, SetOrdered):
+ result |= value
+ else:
+ result |= SetOrdered(value.keys())
+ return result
+
+ @property
+ def affected_root_keys(self):
+ """
+ Get the list of root keys that were affected.
+ Whether a value was changed or they were added or removed.
+
+ Example
+ >>> t1 = {1: 1, 2: 2, 3: [3], 4: 4}
+ >>> t2 = {1: 1, 2: 4, 3: [3, 4], 5: 5, 6: 6}
+ >>> ddiff = DeepDiff(t1, t2)
+ >>> ddiff
+ >>> pprint(ddiff, indent=4)
+ { 'dictionary_item_added': [root[5], root[6]],
+ 'dictionary_item_removed': [root[4]],
+ 'iterable_item_added': {'root[3][1]': 4},
+ 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}}
+ >>> ddiff.affected_paths
+ SetOrdered(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
+ >>> ddiff.affected_root_keys
+ SetOrdered([3, 4, 5, 6, 2])
+ """
+ result = SetOrdered()
+ for key in REPORT_KEYS:
+ value = self.tree.get(key)
+ if value:
+ if isinstance(value, SetOrdered):
+ values_list = value
+ else:
+ values_list = value.keys()
+ for item in values_list:
+ root_key = item.get_root_key()
+ if root_key is not notpresent:
+ result.add(root_key)
+ return result
+
+
+if __name__ == "__main__": # pragma: no cover
+ import doctest
+ doctest.testmod()