import copy
import logging
from typing import List, Dict, IO, Callable, Set, Union, Optional
from functools import partial, cmp_to_key
from collections.abc import Mapping
from copy import deepcopy
from deepdiff import DeepDiff
from deepdiff.serialization import pickle_load, pickle_dump
from deepdiff.helper import (
strings, numbers,
np_ndarray, np_array_factory, numpy_dtypes, get_doc,
not_found, numpy_dtype_string_to_type, dict_,
Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
OPCODE_TAG_TO_FLAT_DATA_ACTION,
FLAT_DATA_ACTION_TO_OPCODE_TAG,
SetOrdered,
)
from deepdiff.path import (
_path_to_elements, _get_nested_obj, _get_nested_obj_and_force,
GET, GETATTR, parse_path, stringify_path,
)
from deepdiff.anyset import AnySet
from deepdiff.summarize import summarize
logger = logging.getLogger(__name__)
VERIFICATION_MSG = 'Expected the old value for {} to be {} but it is {}. Error found on: {}. You may want to set force=True, especially if this delta is created by passing flat_rows_list or flat_dict_list'
ELEM_NOT_FOUND_TO_ADD_MSG = 'Key or index of {} is not found for {} for setting operation.'
TYPE_CHANGE_FAIL_MSG = 'Unable to do the type change for {} from to type {} due to {}'
VERIFY_BIDIRECTIONAL_MSG = ('You have applied the delta to an object that has '
'different values than the original object the delta was made from.')
FAIL_TO_REMOVE_ITEM_IGNORE_ORDER_MSG = 'Failed to remove index[{}] on {}. It was expected to be {} but got {}'
DELTA_NUMPY_OPERATOR_OVERRIDE_MSG = (
'A numpy ndarray is most likely being added to a delta. '
'Due to Numpy override the + operator, you can only do: delta + ndarray '
'and NOT ndarray + delta')
BINIARY_MODE_NEEDED_MSG = "Please open the file in the binary mode and pass to Delta by passing 'b' in open(..., 'b'): {}"
DELTA_AT_LEAST_ONE_ARG_NEEDED = 'At least one of the diff, delta_path or delta_file arguments need to be passed.'
INVALID_ACTION_WHEN_CALLING_GET_ELEM = 'invalid action of {} when calling _get_elem_and_compare_to_old_value'
INVALID_ACTION_WHEN_CALLING_SIMPLE_SET_ELEM = 'invalid action of {} when calling _simple_set_elem_value'
INVALID_ACTION_WHEN_CALLING_SIMPLE_DELETE_ELEM = 'invalid action of {} when calling _simple_set_elem_value'
UNABLE_TO_GET_ITEM_MSG = 'Unable to get the item at {}: {}'
UNABLE_TO_GET_PATH_MSG = 'Unable to get the item at {}'
INDEXES_NOT_FOUND_WHEN_IGNORE_ORDER = 'Delta added to an incompatible object. Unable to add the following items at the specific indexes. {}'
NUMPY_TO_LIST = 'NUMPY_TO_LIST'
NOT_VALID_NUMPY_TYPE = "{} is not a valid numpy type."
doc = get_doc('delta.rst')
class DeltaError(ValueError):
"""
Delta specific errors
"""
pass
class DeltaNumpyOperatorOverrideError(ValueError):
"""
Delta Numpy Operator Override Error
"""
pass
class Delta:
__doc__ = doc
def __init__(
self,
diff: Union[DeepDiff, Mapping, str, bytes, None]=None,
delta_path: Optional[str]=None,
delta_file: Optional[IO]=None,
delta_diff: Optional[dict]=None,
flat_dict_list: Optional[List[Dict]]=None,
flat_rows_list: Optional[List[FlatDeltaRow]]=None,
deserializer: Callable=pickle_load,
log_errors: bool=True,
mutate: bool=False,
raise_errors: bool=False,
safe_to_import: Optional[Set[str]]=None,
serializer: Callable=pickle_dump,
verify_symmetry: Optional[bool]=None,
bidirectional: bool=False,
always_include_values: bool=False,
iterable_compare_func_was_used: Optional[bool]=None,
force: bool=False,
):
# for pickle deserializer:
if hasattr(deserializer, '__code__') and 'safe_to_import' in set(deserializer.__code__.co_varnames):
_deserializer = deserializer
else:
def _deserializer(obj, safe_to_import=None):
result = deserializer(obj)
if result.get('_iterable_opcodes'):
_iterable_opcodes = {}
for path, op_codes in result['_iterable_opcodes'].items():
_iterable_opcodes[path] = []
for op_code in op_codes:
_iterable_opcodes[path].append(
Opcode(
**op_code
)
)
result['_iterable_opcodes'] = _iterable_opcodes
return result
self._reversed_diff = None
if verify_symmetry is not None:
logger.warning(
"DeepDiff Deprecation: use bidirectional instead of verify_symmetry parameter."
)
bidirectional = verify_symmetry
self.bidirectional = bidirectional
if bidirectional:
self.always_include_values = True # We need to include the values in bidirectional deltas
else:
self.always_include_values = always_include_values
if diff is not None:
if isinstance(diff, DeepDiff):
self.diff = diff._to_delta_dict(directed=not bidirectional, always_include_values=self.always_include_values)
elif isinstance(diff, Mapping):
self.diff = diff
elif isinstance(diff, strings):
self.diff = _deserializer(diff, safe_to_import=safe_to_import)
elif delta_path:
with open(delta_path, 'rb') as the_file:
content = the_file.read()
self.diff = _deserializer(content, safe_to_import=safe_to_import)
elif delta_diff:
self.diff = delta_diff
elif delta_file:
try:
content = delta_file.read()
except UnicodeDecodeError as e:
raise ValueError(BINIARY_MODE_NEEDED_MSG.format(e)) from None
self.diff = _deserializer(content, safe_to_import=safe_to_import)
elif flat_dict_list:
# Use copy to preserve original value of flat_dict_list in calling module
self.diff = self._from_flat_dicts(copy.deepcopy(flat_dict_list))
elif flat_rows_list:
self.diff = self._from_flat_rows(copy.deepcopy(flat_rows_list))
else:
raise ValueError(DELTA_AT_LEAST_ONE_ARG_NEEDED)
self.mutate = mutate
self.raise_errors = raise_errors
self.log_errors = log_errors
self._numpy_paths = self.diff.get('_numpy_paths', False)
# When we create the delta from a list of flat dictionaries, details such as iterable_compare_func_was_used get lost.
# That's why we allow iterable_compare_func_was_used to be explicitly set.
self._iterable_compare_func_was_used = self.diff.get('_iterable_compare_func_was_used', iterable_compare_func_was_used)
self.serializer = serializer
self.deserializer = deserializer
self.force = force
if force:
self.get_nested_obj = _get_nested_obj_and_force
else:
self.get_nested_obj = _get_nested_obj
self.reset()
def __repr__(self):
return "<Delta: {}>".format(summarize(self.diff, max_length=100))
def reset(self):
self.post_process_paths_to_convert = dict_()
def __add__(self, other):
if isinstance(other, numbers) and self._numpy_paths: # type: ignore
raise DeltaNumpyOperatorOverrideError(DELTA_NUMPY_OPERATOR_OVERRIDE_MSG)
if self.mutate:
self.root = other
else:
self.root = deepcopy(other)
self._do_pre_process()
self._do_values_changed()
self._do_set_item_added()
self._do_set_item_removed()
self._do_type_changes()
# NOTE: the remove iterable action needs to happen BEFORE
# all the other iterables to match the reverse of order of operations in DeepDiff
self._do_iterable_opcodes()
self._do_iterable_item_removed()
self._do_iterable_item_added()
self._do_ignore_order()
self._do_dictionary_item_added()
self._do_dictionary_item_removed()
self._do_attribute_added()
self._do_attribute_removed()
self._do_post_process()
other = self.root
# removing the reference to other
del self.root
self.reset()
return other
__radd__ = __add__
def __rsub__(self, other):
if self._reversed_diff is None:
self._reversed_diff = self._get_reverse_diff()
self.diff, self._reversed_diff = self._reversed_diff, self.diff
result = self.__add__(other)
self.diff, self._reversed_diff = self._reversed_diff, self.diff
return result
def _raise_or_log(self, msg, level='error'):
if self.log_errors:
getattr(logger, level)(msg)
if self.raise_errors:
raise DeltaError(msg)
def _do_verify_changes(self, path, expected_old_value, current_old_value):
if self.bidirectional and expected_old_value != current_old_value:
if isinstance(path, str):
path_str = path
else:
path_str = stringify_path(path, root_element=('', GETATTR))
self._raise_or_log(VERIFICATION_MSG.format(
path_str, expected_old_value, current_old_value, VERIFY_BIDIRECTIONAL_MSG))
def _get_elem_and_compare_to_old_value(
self,
obj,
path_for_err_reporting,
expected_old_value,
elem=None,
action=None,
forced_old_value=None,
next_element=None,
):
# if forced_old_value is not None:
try:
if action == GET:
current_old_value = obj[elem]
elif action == GETATTR:
current_old_value = getattr(obj, elem) # type: ignore
else:
raise DeltaError(INVALID_ACTION_WHEN_CALLING_GET_ELEM.format(action))
except (KeyError, IndexError, AttributeError, TypeError) as e:
if self.force:
if forced_old_value is None:
if next_element is None or isinstance(next_element, str):
_forced_old_value = {}
else:
_forced_old_value = []
else:
_forced_old_value = forced_old_value
if action == GET:
if isinstance(obj, list):
if isinstance(elem, int) and elem < len(obj):
obj[elem] = _forced_old_value
else:
obj.append(_forced_old_value)
else:
obj[elem] = _forced_old_value
elif action == GETATTR:
setattr(obj, elem, _forced_old_value) # type: ignore
return _forced_old_value
current_old_value = not_found
if isinstance(path_for_err_reporting, (list, tuple)):
path_for_err_reporting = '.'.join([i[0] for i in path_for_err_reporting])
if self.bidirectional:
self._raise_or_log(VERIFICATION_MSG.format(
path_for_err_reporting,
expected_old_value, current_old_value, e))
else:
self._raise_or_log(UNABLE_TO_GET_PATH_MSG.format(
path_for_err_reporting))
return current_old_value
def _simple_set_elem_value(self, obj, path_for_err_reporting, elem=None, value=None, action=None):
"""
Set the element value directly on an object
"""
try:
if action == GET:
try:
obj[elem] = value
except IndexError:
if elem == len(obj):
obj.append(value)
else:
self._raise_or_log(ELEM_NOT_FOUND_TO_ADD_MSG.format(elem, path_for_err_reporting))
elif action == GETATTR:
setattr(obj, elem, value) # type: ignore
else:
raise DeltaError(INVALID_ACTION_WHEN_CALLING_SIMPLE_SET_ELEM.format(action))
except (KeyError, IndexError, AttributeError, TypeError) as e:
self._raise_or_log('Failed to set {} due to {}'.format(path_for_err_reporting, e))
def _coerce_obj(self, parent, obj, path, parent_to_obj_elem,
parent_to_obj_action, elements, to_type, from_type):
"""
Coerce obj and mark it in post_process_paths_to_convert for later to be converted back.
Also reassign it to its parent to replace the old object.
"""
self.post_process_paths_to_convert[elements[:-1]] = {'old_type': to_type, 'new_type': from_type}
# If this function is going to ever be used to convert numpy arrays, uncomment these lines:
# if from_type is np_ndarray:
# obj = obj.tolist()
# else:
obj = to_type(obj)
if parent:
# Making sure that the object is re-instated inside the parent especially if it was immutable
# and we had to turn it into a mutable one. In such cases the object has a new id.
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
value=obj, action=parent_to_obj_action)
return obj
def _set_new_value(self, parent, parent_to_obj_elem, parent_to_obj_action,
obj, elements, path, elem, action, new_value):
"""
Set the element value on an object and if necessary convert the object to the proper mutable type
"""
if isinstance(obj, tuple):
# convert this object back to a tuple later
obj = self._coerce_obj(
parent, obj, path, parent_to_obj_elem,
parent_to_obj_action, elements,
to_type=list, from_type=tuple)
if elem != 0 and self.force and isinstance(obj, list) and len(obj) == 0:
# it must have been a dictionary
obj = {}
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
value=obj, action=parent_to_obj_action)
self._simple_set_elem_value(obj=obj, path_for_err_reporting=path, elem=elem,
value=new_value, action=action)
def _simple_delete_elem(self, obj, path_for_err_reporting, elem=None, action=None):
"""
Delete the element directly on an object
"""
try:
if action == GET:
del obj[elem]
elif action == GETATTR:
del obj.__dict__[elem]
else:
raise DeltaError(INVALID_ACTION_WHEN_CALLING_SIMPLE_DELETE_ELEM.format(action))
except (KeyError, IndexError, AttributeError) as e:
self._raise_or_log('Failed to set {} due to {}'.format(path_for_err_reporting, e))
def _del_elem(self, parent, parent_to_obj_elem, parent_to_obj_action,
obj, elements, path, elem, action):
"""
Delete the element value on an object and if necessary convert the object to the proper mutable type
"""
obj_is_new = False
if isinstance(obj, tuple):
# convert this object back to a tuple later
self.post_process_paths_to_convert[elements[:-1]] = {'old_type': list, 'new_type': tuple}
obj = list(obj)
obj_is_new = True
self._simple_delete_elem(obj=obj, path_for_err_reporting=path, elem=elem, action=action)
if obj_is_new and parent:
# Making sure that the object is re-instated inside the parent especially if it was immutable
# and we had to turn it into a mutable one. In such cases the object has a new id.
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
value=obj, action=parent_to_obj_action)
def _do_iterable_item_added(self):
iterable_item_added = self.diff.get('iterable_item_added', {})
iterable_item_moved = self.diff.get('iterable_item_moved')
# First we need to create a placeholder for moved items.
# This will then get replaced below after we go through added items.
# Without this items can get double added because moved store the new_value and does not need item_added replayed
if iterable_item_moved:
added_dict = {v["new_path"]: None for k, v in iterable_item_moved.items()}
iterable_item_added.update(added_dict)
if iterable_item_added:
self._do_item_added(iterable_item_added, insert=True)
if iterable_item_moved:
added_dict = {v["new_path"]: v["value"] for k, v in iterable_item_moved.items()}
self._do_item_added(added_dict, insert=False)
def _do_dictionary_item_added(self):
dictionary_item_added = self.diff.get('dictionary_item_added')
if dictionary_item_added:
self._do_item_added(dictionary_item_added, sort=False)
def _do_attribute_added(self):
attribute_added = self.diff.get('attribute_added')
if attribute_added:
self._do_item_added(attribute_added)
@staticmethod
def _sort_key_for_item_added(path_and_value):
elements = _path_to_elements(path_and_value[0])
# Example elements: [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')]
# We only care about the values in the elements not how to get the values.
return [i[0] for i in elements]
@staticmethod
def _sort_comparison(left, right):
"""
We use sort comparison instead of _sort_key_for_item_added when we run into comparing element types that can not
be compared with each other, such as None to None. Or integer to string.
"""
# Example elements: [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')]
# We only care about the values in the elements not how to get the values.
left_path = [i[0] for i in _path_to_elements(left[0], root_element=None)]
right_path = [i[0] for i in _path_to_elements(right[0], root_element=None)]
try:
if left_path < right_path:
return -1
elif left_path > right_path:
return 1
else:
return 0
except TypeError:
if len(left_path) > len(right_path):
left_path = left_path[:len(right_path)]
elif len(right_path) > len(left_path):
right_path = right_path[:len(left_path)]
for l_elem, r_elem in zip(left_path, right_path):
if type(l_elem) != type(r_elem) or type(l_elem) in None:
l_elem = str(l_elem)
r_elem = str(r_elem)
try:
if l_elem < r_elem:
return -1
elif l_elem > r_elem:
return 1
except TypeError:
continue
return 0
def _do_item_added(self, items, sort=True, insert=False):
if sort:
# sorting items by their path so that the items with smaller index
# are applied first (unless `sort` is `False` so that order of
# added items is retained, e.g. for dicts).
try:
items = sorted(items.items(), key=self._sort_key_for_item_added)
except TypeError:
items = sorted(items.items(), key=cmp_to_key(self._sort_comparison))
else:
items = items.items()
for path, new_value in items:
elem_and_details = self._get_elements_and_details(path)
if elem_and_details:
elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details
else:
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
# Insert is only true for iterables, make sure it is a valid index.
if(insert and elem < len(obj)): # type: ignore
obj.insert(elem, None) # type: ignore
self._set_new_value(parent, parent_to_obj_elem, parent_to_obj_action,
obj, elements, path, elem, action, new_value)
def _do_values_changed(self):
values_changed = self.diff.get('values_changed')
if values_changed:
self._do_values_or_type_changed(values_changed)
def _do_type_changes(self):
type_changes = self.diff.get('type_changes')
if type_changes:
self._do_values_or_type_changed(type_changes, is_type_change=True)
def _do_post_process(self):
if self.post_process_paths_to_convert:
# Example: We had converted some object to be mutable and now we are converting them back to be immutable.
# We don't need to check the change because it is not really a change that was part of the original diff.
self._do_values_or_type_changed(self.post_process_paths_to_convert, is_type_change=True, verify_changes=False)
def _do_pre_process(self):
if self._numpy_paths and ('iterable_item_added' in self.diff or 'iterable_item_removed' in self.diff):
preprocess_paths = dict_()
for path, type_ in self._numpy_paths.items(): # type: ignore
preprocess_paths[path] = {'old_type': np_ndarray, 'new_type': list}
try:
type_ = numpy_dtype_string_to_type(type_)
except Exception as e:
self._raise_or_log(NOT_VALID_NUMPY_TYPE.format(e))
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
self.post_process_paths_to_convert[path] = {'old_type': list, 'new_type': type_}
if preprocess_paths:
self._do_values_or_type_changed(preprocess_paths, is_type_change=True)
def _get_elements_and_details(self, path):
try:
elements = _path_to_elements(path)
if len(elements) > 1:
elements_subset = elements[:-2]
if len(elements_subset) != len(elements):
next_element = elements[-2][0]
next2_element = elements[-1][0]
else:
next_element = None
parent = self.get_nested_obj(obj=self, elements=elements_subset, next_element=next_element)
parent_to_obj_elem, parent_to_obj_action = elements[-2]
obj = self._get_elem_and_compare_to_old_value(
obj=parent, path_for_err_reporting=path, expected_old_value=None,
elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element) # type: ignore
else:
# parent = self
# obj = self.root
# parent_to_obj_elem = 'root'
# parent_to_obj_action = GETATTR
parent = parent_to_obj_elem = parent_to_obj_action = None
obj = self
# obj = self.get_nested_obj(obj=self, elements=elements[:-1])
elem, action = elements[-1] # type: ignore
except Exception as e:
self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path, e))
return None
else:
if obj is not_found:
return None
return elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action
def _do_values_or_type_changed(self, changes, is_type_change=False, verify_changes=True):
for path, value in changes.items():
elem_and_details = self._get_elements_and_details(path)
if elem_and_details:
elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details
else:
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
expected_old_value = value.get('old_value', not_found)
current_old_value = self._get_elem_and_compare_to_old_value(
obj=obj, path_for_err_reporting=path, expected_old_value=expected_old_value, elem=elem, action=action)
if current_old_value is not_found:
continue # pragma: no cover. I have not been able to write a test for this case. But we should still check for it.
# With type change if we could have originally converted the type from old_value
# to new_value just by applying the class of the new_value, then we might not include the new_value
# in the delta dictionary. That is defined in Model.DeltaResult._from_tree_type_changes
if is_type_change and 'new_value' not in value:
try:
new_type = value['new_type']
# in case of Numpy we pass the ndarray plus the dtype in a tuple
if new_type in numpy_dtypes:
new_value = np_array_factory(current_old_value, new_type)
else:
new_value = new_type(current_old_value)
except Exception as e:
self._raise_or_log(TYPE_CHANGE_FAIL_MSG.format(obj[elem], value.get('new_type', 'unknown'), e)) # type: ignore
continue
else:
new_value = value['new_value']
self._set_new_value(parent, parent_to_obj_elem, parent_to_obj_action,
obj, elements, path, elem, action, new_value)
if verify_changes:
self._do_verify_changes(path, expected_old_value, current_old_value)
def _do_item_removed(self, items):
"""
Handle removing items.
"""
# Sorting the iterable_item_removed in reverse order based on the paths.
# So that we delete a bigger index before a smaller index
try:
sorted_item = sorted(items.items(), key=self._sort_key_for_item_added, reverse=True)
except TypeError:
sorted_item = sorted(items.items(), key=cmp_to_key(self._sort_comparison), reverse=True)
for path, expected_old_value in sorted_item:
elem_and_details = self._get_elements_and_details(path)
if elem_and_details:
elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details
else:
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
look_for_expected_old_value = False
current_old_value = not_found
try:
if action == GET:
current_old_value = obj[elem] # type: ignore
elif action == GETATTR:
current_old_value = getattr(obj, elem)
look_for_expected_old_value = current_old_value != expected_old_value
except (KeyError, IndexError, AttributeError, TypeError):
look_for_expected_old_value = True
if look_for_expected_old_value and isinstance(obj, list) and not self._iterable_compare_func_was_used:
# It may return None if it doesn't find it
elem = self._find_closest_iterable_element_for_index(obj, elem, expected_old_value)
if elem is not None:
current_old_value = expected_old_value
if current_old_value is not_found or elem is None:
continue
self._del_elem(parent, parent_to_obj_elem, parent_to_obj_action,
obj, elements, path, elem, action)
self._do_verify_changes(path, expected_old_value, current_old_value)
def _find_closest_iterable_element_for_index(self, obj, elem, expected_old_value):
closest_elem = None
closest_distance = float('inf')
for index, value in enumerate(obj):
dist = abs(index - elem)
if dist > closest_distance:
break
if value == expected_old_value and dist < closest_distance:
closest_elem = index
closest_distance = dist
return closest_elem
def _do_iterable_opcodes(self):
_iterable_opcodes = self.diff.get('_iterable_opcodes', {})
if _iterable_opcodes:
for path, opcodes in _iterable_opcodes.items():
transformed = []
# elements = _path_to_elements(path)
elem_and_details = self._get_elements_and_details(path)
if elem_and_details:
elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details
if parent is None:
parent = self
obj = self.root
parent_to_obj_elem = 'root'
parent_to_obj_action = GETATTR
else:
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
# import pytest; pytest.set_trace()
obj = self.get_nested_obj(obj=self, elements=elements)
is_obj_tuple = isinstance(obj, tuple)
for opcode in opcodes:
if opcode.tag == 'replace':
# Replace items in list a[i1:i2] with b[j1:j2]
transformed.extend(opcode.new_values)
elif opcode.tag == 'delete':
# Delete items from list a[i1:i2], so we do nothing here
continue
elif opcode.tag == 'insert':
# Insert items from list b[j1:j2] into the new list
transformed.extend(opcode.new_values)
elif opcode.tag == 'equal':
# Items are the same in both lists, so we add them to the result
transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index]) # type: ignore
if is_obj_tuple:
obj = tuple(obj) # type: ignore
# Making sure that the object is re-instated inside the parent especially if it was immutable
# and we had to turn it into a mutable one. In such cases the object has a new id.
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
value=obj, action=parent_to_obj_action)
else:
obj[:] = transformed # type: ignore
# obj = self.get_nested_obj(obj=self, elements=elements)
# for
def _do_iterable_item_removed(self):
iterable_item_removed = self.diff.get('iterable_item_removed', {})
iterable_item_moved = self.diff.get('iterable_item_moved')
if iterable_item_moved:
# These will get added back during items_added
removed_dict = {k: v["value"] for k, v in iterable_item_moved.items()}
iterable_item_removed.update(removed_dict)
if iterable_item_removed:
self._do_item_removed(iterable_item_removed)
def _do_dictionary_item_removed(self):
dictionary_item_removed = self.diff.get('dictionary_item_removed')
if dictionary_item_removed:
self._do_item_removed(dictionary_item_removed)
def _do_attribute_removed(self):
attribute_removed = self.diff.get('attribute_removed')
if attribute_removed:
self._do_item_removed(attribute_removed)
def _do_set_item_added(self):
items = self.diff.get('set_item_added')
if items:
self._do_set_or_frozenset_item(items, func='union')
def _do_set_item_removed(self):
items = self.diff.get('set_item_removed')
if items:
self._do_set_or_frozenset_item(items, func='difference')
def _do_set_or_frozenset_item(self, items, func):
for path, value in items.items():
elements = _path_to_elements(path)
parent = self.get_nested_obj(obj=self, elements=elements[:-1])
elem, action = elements[-1]
obj = self._get_elem_and_compare_to_old_value(
parent, path_for_err_reporting=path, expected_old_value=None, elem=elem, action=action, forced_old_value=set())
new_value = getattr(obj, func)(value)
self._simple_set_elem_value(parent, path_for_err_reporting=path, elem=elem, value=new_value, action=action)
def _do_ignore_order_get_old(self, obj, remove_indexes_per_path, fixed_indexes_values, path_for_err_reporting):
"""
A generator that gets the old values in an iterable when the order was supposed to be ignored.
"""
old_obj_index = -1
max_len = len(obj) - 1
while old_obj_index < max_len:
old_obj_index += 1
current_old_obj = obj[old_obj_index]
if current_old_obj in fixed_indexes_values:
continue
if old_obj_index in remove_indexes_per_path:
expected_obj_to_delete = remove_indexes_per_path.pop(old_obj_index)
if current_old_obj == expected_obj_to_delete:
continue
else:
self._raise_or_log(FAIL_TO_REMOVE_ITEM_IGNORE_ORDER_MSG.format(
old_obj_index, path_for_err_reporting, expected_obj_to_delete, current_old_obj))
yield current_old_obj
def _do_ignore_order(self):
"""
't1': [5, 1, 1, 1, 6],
't2': [7, 1, 1, 1, 8],
'iterable_items_added_at_indexes': {
'root': {
0: 7,
4: 8
}
},
'iterable_items_removed_at_indexes': {
'root': {
4: 6,
0: 5
}
}
"""
fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
paths = SetOrdered(fixed_indexes.keys()) | SetOrdered(remove_indexes.keys())
for path in paths: # type: ignore
# In the case of ignore_order reports, we are pointing to the container object.
# Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.
elem_and_details = self._get_elements_and_details("{}[0]".format(path))
if elem_and_details:
_, parent, parent_to_obj_elem, parent_to_obj_action, obj, _, _ = elem_and_details
else:
continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
# copying both these dictionaries since we don't want to mutate them.
fixed_indexes_per_path = fixed_indexes.get(path, dict_()).copy()
remove_indexes_per_path = remove_indexes.get(path, dict_()).copy()
fixed_indexes_values = AnySet(fixed_indexes_per_path.values())
new_obj = []
# Numpy's NdArray does not like the bool function.
if isinstance(obj, np_ndarray):
there_are_old_items = obj.size > 0
else:
there_are_old_items = bool(obj)
old_item_gen = self._do_ignore_order_get_old(
obj, remove_indexes_per_path, fixed_indexes_values, path_for_err_reporting=path)
while there_are_old_items or fixed_indexes_per_path:
new_obj_index = len(new_obj)
if new_obj_index in fixed_indexes_per_path:
new_item = fixed_indexes_per_path.pop(new_obj_index)
new_obj.append(new_item)
elif there_are_old_items:
try:
new_item = next(old_item_gen)
except StopIteration:
there_are_old_items = False
else:
new_obj.append(new_item)
else:
# pop a random item from the fixed_indexes_per_path dictionary
self._raise_or_log(INDEXES_NOT_FOUND_WHEN_IGNORE_ORDER.format(fixed_indexes_per_path))
new_item = fixed_indexes_per_path.pop(next(iter(fixed_indexes_per_path)))
new_obj.append(new_item)
if isinstance(obj, tuple):
new_obj = tuple(new_obj)
# Making sure that the object is re-instated inside the parent especially if it was immutable
# and we had to turn it into a mutable one. In such cases the object has a new id.
self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem,
value=new_obj, action=parent_to_obj_action)
def _get_reverse_diff(self):
if not self.bidirectional:
raise ValueError('Please recreate the delta with bidirectional=True')
SIMPLE_ACTION_TO_REVERSE = {
'iterable_item_added': 'iterable_item_removed',
'iterable_items_added_at_indexes': 'iterable_items_removed_at_indexes',
'attribute_added': 'attribute_removed',
'set_item_added': 'set_item_removed',
'dictionary_item_added': 'dictionary_item_removed',
}
# Adding the reverse of the dictionary
for key in list(SIMPLE_ACTION_TO_REVERSE.keys()):
SIMPLE_ACTION_TO_REVERSE[SIMPLE_ACTION_TO_REVERSE[key]] = key
r_diff = {}
for action, info in self.diff.items():
reverse_action = SIMPLE_ACTION_TO_REVERSE.get(action)
if reverse_action:
r_diff[reverse_action] = info
elif action == 'values_changed':
r_diff[action] = {}
for path, path_info in info.items():
reverse_path = path_info['new_path'] if path_info.get('new_path') else path
r_diff[action][reverse_path] = {
'new_value': path_info['old_value'], 'old_value': path_info['new_value']
}
elif action == 'type_changes':
r_diff[action] = {}
for path, path_info in info.items():
reverse_path = path_info['new_path'] if path_info.get('new_path') else path
r_diff[action][reverse_path] = {
'old_type': path_info['new_type'], 'new_type': path_info['old_type'],
}
if 'new_value' in path_info:
r_diff[action][reverse_path]['old_value'] = path_info['new_value']
if 'old_value' in path_info:
r_diff[action][reverse_path]['new_value'] = path_info['old_value']
elif action == 'iterable_item_moved':
r_diff[action] = {}
for path, path_info in info.items():
old_path = path_info['new_path']
r_diff[action][old_path] = {
'new_path': path, 'value': path_info['value'],
}
elif action == '_iterable_opcodes':
r_diff[action] = {}
for path, op_codes in info.items():
r_diff[action][path] = []
for op_code in op_codes:
tag = op_code.tag
tag = {'delete': 'insert', 'insert': 'delete'}.get(tag, tag)
new_op_code = Opcode(
tag=tag,
t1_from_index=op_code.t2_from_index,
t1_to_index=op_code.t2_to_index,
t2_from_index=op_code.t1_from_index,
t2_to_index=op_code.t1_to_index,
new_values=op_code.old_values,
old_values=op_code.new_values,
)
r_diff[action][path].append(new_op_code)
return r_diff
def dump(self, file):
"""
Dump into file object
"""
# Small optimization: Our internal pickle serializer can just take a file object
# and directly write to it. However if a user defined serializer is passed
# we want to make it compatible with the expectation that self.serializer(self.diff)
# will give the user the serialization and then it can be written to
# a file object when using the dump(file) function.
param_names_of_serializer = set(self.serializer.__code__.co_varnames)
if 'file_obj' in param_names_of_serializer:
self.serializer(self.diff, file_obj=file)
else:
file.write(self.dumps())
def dumps(self):
"""
Return the serialized representation of the object as a bytes object, instead of writing it to a file.
"""
return self.serializer(self.diff)
def to_dict(self):
return dict(self.diff)
def _flatten_iterable_opcodes(self, _parse_path):
"""
Converts op_codes to FlatDeltaRows
"""
result = []
for path, op_codes in self.diff['_iterable_opcodes'].items():
for op_code in op_codes:
result.append(
FlatDeltaRow(
path=_parse_path(path),
action=OPCODE_TAG_TO_FLAT_DATA_ACTION[op_code.tag],
value=op_code.new_values,
old_value=op_code.old_values,
type=type(op_code.new_values),
old_type=type(op_code.old_values),
new_path=None,
t1_from_index=op_code.t1_from_index,
t1_to_index=op_code.t1_to_index,
t2_from_index=op_code.t2_from_index,
t2_to_index=op_code.t2_to_index,
)
)
return result
@staticmethod
def _get_flat_row(action, info, _parse_path, keys_and_funcs, report_type_changes=True):
for path, details in info.items():
row = {'path': _parse_path(path), 'action': action}
for key, new_key, func in keys_and_funcs:
if key in details:
if func:
row[new_key] = func(details[key])
else:
row[new_key] = details[key]
if report_type_changes:
if 'value' in row and 'type' not in row:
row['type'] = type(row['value'])
if 'old_value' in row and 'old_type' not in row:
row['old_type'] = type(row['old_value'])
yield FlatDeltaRow(**row)
@staticmethod
def _from_flat_rows(flat_rows_list: List[FlatDeltaRow]):
flat_dict_list = (i._asdict() for i in flat_rows_list)
return Delta._from_flat_dicts(flat_dict_list)
@staticmethod
def _from_flat_dicts(flat_dict_list):
"""
Create the delta's diff object from the flat_dict_list
"""
result = {}
FLATTENING_NEW_ACTION_MAP = {
'unordered_iterable_item_added': 'iterable_items_added_at_indexes',
'unordered_iterable_item_removed': 'iterable_items_removed_at_indexes',
}
for flat_dict in flat_dict_list:
index = None
action = flat_dict.get("action")
path = flat_dict.get("path")
value = flat_dict.get('value')
new_path = flat_dict.get('new_path')
old_value = flat_dict.get('old_value', UnkownValueCode)
if not action:
raise ValueError("Flat dict need to include the 'action'.")
if path is None:
raise ValueError("Flat dict need to include the 'path'.")
if action in FLATTENING_NEW_ACTION_MAP:
action = FLATTENING_NEW_ACTION_MAP[action]
index = path.pop()
if action in {
FlatDataAction.attribute_added,
FlatDataAction.attribute_removed,
}:
root_element = ('root', GETATTR)
else:
root_element = ('root', GET)
if isinstance(path, str):
path_str = path
else:
path_str = stringify_path(path, root_element=root_element) # We need the string path
if new_path and new_path != path:
new_path = stringify_path(new_path, root_element=root_element)
else:
new_path = None
if action not in result:
result[action] = {}
if action in {
'iterable_items_added_at_indexes',
'iterable_items_removed_at_indexes',
}:
if path_str not in result[action]:
result[action][path_str] = {}
result[action][path_str][index] = value
elif action in {
FlatDataAction.set_item_added,
FlatDataAction.set_item_removed
}:
if path_str not in result[action]:
result[action][path_str] = set()
result[action][path_str].add(value)
elif action in {
FlatDataAction.dictionary_item_added,
FlatDataAction.dictionary_item_removed,
FlatDataAction.attribute_removed,
FlatDataAction.attribute_added,
FlatDataAction.iterable_item_added,
FlatDataAction.iterable_item_removed,
}:
result[action][path_str] = value
elif action == 'values_changed':
if old_value == UnkownValueCode:
result[action][path_str] = {'new_value': value}
else:
result[action][path_str] = {'new_value': value, 'old_value': old_value}
elif action == 'type_changes':
type_ = flat_dict.get('type', UnkownValueCode)
old_type = flat_dict.get('old_type', UnkownValueCode)
result[action][path_str] = {'new_value': value}
for elem, elem_value in [
('new_type', type_),
('old_type', old_type),
('old_value', old_value),
]:
if elem_value != UnkownValueCode:
result[action][path_str][elem] = elem_value
elif action == FlatDataAction.iterable_item_moved:
result[action][path_str] = {'value': value}
elif action in {
FlatDataAction.iterable_items_inserted,
FlatDataAction.iterable_items_deleted,
FlatDataAction.iterable_items_replaced,
FlatDataAction.iterable_items_equal,
}:
if '_iterable_opcodes' not in result:
result['_iterable_opcodes'] = {}
if path_str not in result['_iterable_opcodes']:
result['_iterable_opcodes'][path_str] = []
result['_iterable_opcodes'][path_str].append(
Opcode(
tag=FLAT_DATA_ACTION_TO_OPCODE_TAG[action], # type: ignore
t1_from_index=flat_dict.get('t1_from_index'),
t1_to_index=flat_dict.get('t1_to_index'),
t2_from_index=flat_dict.get('t2_from_index'),
t2_to_index=flat_dict.get('t2_to_index'),
new_values=flat_dict.get('value'),
old_values=flat_dict.get('old_value'),
)
)
if new_path:
result[action][path_str]['new_path'] = new_path
return result
def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True) -> List[FlatDeltaRow]:
"""
Returns a flat list of actions that is easily machine readable.
For example:
{'iterable_item_added': {'root[3]': 5, 'root[2]': 3}}
Becomes:
[
{'path': [3], 'value': 5, 'action': 'iterable_item_added'},
{'path': [2], 'value': 3, 'action': 'iterable_item_added'},
]
**Parameters**
include_action_in_path : Boolean, default=False
When False, we translate DeepDiff's paths like root[3].attribute1 into a [3, 'attribute1'].
When True, we include the action to retrieve the item in the path: [(3, 'GET'), ('attribute1', 'GETATTR')]
Note that the "action" here is the different than the action reported by to_flat_dicts. The action here is just about the "path" output.
report_type_changes : Boolean, default=True
If False, we don't report the type change. Instead we report the value change.
Example:
t1 = {"a": None}
t2 = {"a": 1}
dump = Delta(DeepDiff(t1, t2)).dumps()
delta = Delta(dump)
assert t2 == delta + t1
flat_result = delta.to_flat_dicts()
flat_expected = [{'path': ['a'], 'action': 'type_changes', 'value': 1, 'new_type': int, 'old_type': type(None)}]
assert flat_expected == flat_result
flat_result2 = delta.to_flat_dicts(report_type_changes=False)
flat_expected2 = [{'path': ['a'], 'action': 'values_changed', 'value': 1}]
**List of actions**
Here are the list of actions that the flat dictionary can return.
iterable_item_added
iterable_item_removed
iterable_item_moved
values_changed
type_changes
set_item_added
set_item_removed
dictionary_item_added
dictionary_item_removed
attribute_added
attribute_removed
"""
return [
i._asdict() for i in self.to_flat_rows(include_action_in_path=False, report_type_changes=True)
] # type: ignore
def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -> List[FlatDeltaRow]:
"""
Just like to_flat_dicts but returns FlatDeltaRow Named Tuples
"""
result = []
if include_action_in_path:
_parse_path = partial(parse_path, include_actions=True)
else:
_parse_path = parse_path
if report_type_changes:
keys_and_funcs = [
('value', 'value', None),
('new_value', 'value', None),
('old_value', 'old_value', None),
('new_type', 'type', None),
('old_type', 'old_type', None),
('new_path', 'new_path', _parse_path),
]
else:
if not self.always_include_values:
raise ValueError(
"When converting to flat dictionaries, if report_type_changes=False and there are type changes, "
"you must set the always_include_values=True at the delta object creation. Otherwise there is nothing to include."
)
keys_and_funcs = [
('value', 'value', None),
('new_value', 'value', None),
('old_value', 'old_value', None),
('new_path', 'new_path', _parse_path),
]
FLATTENING_NEW_ACTION_MAP = {
'iterable_items_added_at_indexes': 'unordered_iterable_item_added',
'iterable_items_removed_at_indexes': 'unordered_iterable_item_removed',
}
for action, info in self.diff.items():
if action == '_iterable_opcodes':
result.extend(self._flatten_iterable_opcodes(_parse_path=_parse_path))
continue
if action.startswith('_'):
continue
if action in FLATTENING_NEW_ACTION_MAP:
new_action = FLATTENING_NEW_ACTION_MAP[action]
for path, index_to_value in info.items():
path = _parse_path(path)
for index, value in index_to_value.items():
path2 = path.copy()
if include_action_in_path:
path2.append((index, 'GET')) # type: ignore
else:
path2.append(index)
if report_type_changes:
row = FlatDeltaRow(path=path2, value=value, action=new_action, type=type(value)) # type: ignore
else:
row = FlatDeltaRow(path=path2, value=value, action=new_action) # type: ignore
result.append(row)
elif action in {'set_item_added', 'set_item_removed'}:
for path, values in info.items():
path = _parse_path(path)
for value in values:
if report_type_changes:
row = FlatDeltaRow(path=path, value=value, action=action, type=type(value))
else:
row = FlatDeltaRow(path=path, value=value, action=action)
result.append(row)
elif action == 'dictionary_item_added':
for path, value in info.items():
path = _parse_path(path)
if isinstance(value, dict) and len(value) == 1:
new_key = next(iter(value))
path.append(new_key)
value = value[new_key]
elif isinstance(value, (list, tuple)) and len(value) == 1:
value = value[0]
path.append(0) # type: ignore
action = 'iterable_item_added'
elif isinstance(value, set) and len(value) == 1:
value = value.pop()
action = 'set_item_added'
if report_type_changes:
row = FlatDeltaRow(path=path, value=value, action=action, type=type(value)) # type: ignore
else:
row = FlatDeltaRow(path=path, value=value, action=action) # type: ignore
result.append(row)
elif action in {
'dictionary_item_removed', 'iterable_item_added',
'iterable_item_removed', 'attribute_removed', 'attribute_added'
}:
for path, value in info.items():
path = _parse_path(path)
if report_type_changes:
row = FlatDeltaRow(path=path, value=value, action=action, type=type(value))
else:
row = FlatDeltaRow(path=path, value=value, action=action)
result.append(row)
elif action == 'type_changes':
if not report_type_changes:
action = 'values_changed'
for row in self._get_flat_row(
action=action,
info=info,
_parse_path=_parse_path,
keys_and_funcs=keys_and_funcs,
report_type_changes=report_type_changes,
):
result.append(row)
else:
for row in self._get_flat_row(
action=action,
info=info,
_parse_path=_parse_path,
keys_and_funcs=keys_and_funcs,
report_type_changes=report_type_changes,
):
result.append(row)
return result
if __name__ == "__main__": # pragma: no cover
import doctest
doctest.testmod()