Skip to content

Commit b70d68c

Browse files
authored
Merge pull request #525 from seperman/dev
8.4.0
2 parents 6e1ae67 + bba1732 commit b70d68c

21 files changed

+781
-291
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ Tested on Python 3.8+ and PyPy3.
2323

2424
Please check the [ChangeLog](CHANGELOG.md) file for the detailed information.
2525

26+
DeepDiff 8-4-0
27+
28+
- Adding BaseOperatorPlus base class for custom operators
29+
- default_timezone can be passed now to set your default timezone to something other than UTC.
30+
- New summarization algorithm that produces valid json
31+
- Better type hint support
32+
2633
DeepDiff 8-3-0
2734

2835
- Fixed some static typing issues

deepdiff/base.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
1+
from typing import Protocol, Any
12
from deepdiff.helper import strings, numbers, SetOrdered
23

34

45
DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
56
TYPE_STABILIZATION_MSG = 'Unable to stabilize the Numpy array {} due to {}. Please set ignore_order=False.'
67

78

8-
class Base:
9+
class BaseProtocol(Protocol):
10+
t1: Any
11+
t2: Any
12+
cutoff_distance_for_pairs: float
13+
use_log_scale: bool
14+
log_scale_similarity_threshold: float
15+
view: str
16+
17+
18+
class Base(BaseProtocol):
919
numbers = numbers
1020
strings = strings
1121

deepdiff/deephash.py

+33-21
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#!/usr/bin/env python
2-
import inspect
2+
import pytz
33
import logging
44
import datetime
5+
from typing import Union, Optional, Any, List
56
from collections.abc import Iterable, MutableMapping
67
from collections import defaultdict
78
from hashlib import sha1, sha256
@@ -14,7 +15,6 @@
1415
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
1516
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel)
1617

17-
from deepdiff.summarize import summarize
1818
from deepdiff.base import Base
1919

2020
try:
@@ -141,30 +141,32 @@ class DeepHash(Base):
141141
def __init__(self,
142142
obj,
143143
*,
144-
hashes=None,
145-
exclude_types=None,
144+
apply_hash=True,
145+
custom_operators: Optional[List[Any]] =None,
146+
default_timezone:Union[datetime.timezone, datetime.timezone, pytz.tzinfo.BaseTzInfo]=datetime.timezone.utc,
147+
encodings=None,
148+
exclude_obj_callback=None,
146149
exclude_paths=None,
147-
include_paths=None,
148150
exclude_regex_paths=None,
151+
exclude_types=None,
149152
hasher=None,
153+
hashes=None,
154+
ignore_encoding_errors=False,
155+
ignore_iterable_order=True,
156+
ignore_numeric_type_changes=False,
157+
ignore_private_variables=True,
150158
ignore_repetition=True,
151-
significant_digits=None,
152-
truncate_datetime=None,
153-
number_format_notation="f",
154-
apply_hash=True,
155-
ignore_type_in_groups=None,
159+
ignore_string_case=False,
156160
ignore_string_type_changes=False,
157-
ignore_numeric_type_changes=False,
161+
ignore_type_in_groups=None,
158162
ignore_type_subclasses=False,
159-
ignore_string_case=False,
160-
use_enum_value=False,
161-
exclude_obj_callback=None,
163+
include_paths=None,
164+
number_format_notation="f",
162165
number_to_string_func=None,
163-
ignore_private_variables=True,
164166
parent="root",
165-
encodings=None,
166-
ignore_encoding_errors=False,
167-
ignore_iterable_order=True,
167+
significant_digits=None,
168+
truncate_datetime=None,
169+
use_enum_value=False,
168170
**kwargs):
169171
if kwargs:
170172
raise ValueError(
@@ -173,7 +175,7 @@ def __init__(self,
173175
"exclude_paths, include_paths, exclude_regex_paths, hasher, ignore_repetition, "
174176
"number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
175177
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
176-
"number_to_string_func, ignore_private_variables, parent, use_enum_value "
178+
"number_to_string_func, ignore_private_variables, parent, use_enum_value, default_timezone "
177179
"encodings, ignore_encoding_errors") % ', '.join(kwargs.keys()))
178180
if isinstance(hashes, MutableMapping):
179181
self.hashes = hashes
@@ -190,7 +192,7 @@ def __init__(self,
190192
self.hasher = default_hasher if hasher is None else hasher
191193
self.hashes[UNPROCESSED_KEY] = []
192194
self.use_enum_value = use_enum_value
193-
195+
self.default_timezone = default_timezone
194196
self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
195197
self.truncate_datetime = get_truncate_datetime(truncate_datetime)
196198
self.number_format_notation = number_format_notation
@@ -214,6 +216,7 @@ def __init__(self,
214216
self.encodings = encodings
215217
self.ignore_encoding_errors = ignore_encoding_errors
216218
self.ignore_iterable_order = ignore_iterable_order
219+
self.custom_operators = custom_operators
217220

218221
self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)}))
219222

@@ -317,6 +320,7 @@ def __repr__(self):
317320
"""
318321
Hide the counts since it will be confusing to see them when they are hidden everywhere else.
319322
"""
323+
from deepdiff.summarize import summarize
320324
return summarize(self._get_objects_to_hashes_dict(extract_index=0), max_length=500)
321325

322326
def __str__(self):
@@ -349,6 +353,7 @@ def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=Fals
349353
if hasattr(obj, "__slots__"):
350354
obj_to_dict_strategies.append(lambda o: {i: getattr(o, i) for i in o.__slots__})
351355
else:
356+
import inspect
352357
obj_to_dict_strategies.append(lambda o: dict(inspect.getmembers(o, lambda m: not inspect.isroutine(m))))
353358

354359
for get_dict in obj_to_dict_strategies:
@@ -478,7 +483,7 @@ def _prep_number(self, obj):
478483

479484
def _prep_datetime(self, obj):
480485
type_ = 'datetime'
481-
obj = datetime_normalize(self.truncate_datetime, obj)
486+
obj = datetime_normalize(self.truncate_datetime, obj, default_timezone=self.default_timezone)
482487
return KEY_TO_VAL_STR.format(type_, obj)
483488

484489
def _prep_date(self, obj):
@@ -501,6 +506,13 @@ def _prep_tuple(self, obj, parent, parents_ids):
501506
def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
502507
"""The main hash method"""
503508
counts = 1
509+
if self.custom_operators is not None:
510+
for operator in self.custom_operators:
511+
func = getattr(operator, 'normalize_value_for_hashing', None)
512+
if func is None:
513+
raise NotImplementedError(f"{operator.__class__.__name__} needs to define a normalize_value_for_hashing method to be compatible with ignore_order=True or iterable_compare_func.".format(operator))
514+
else:
515+
obj = func(parent, obj)
504516

505517
if isinstance(obj, booleanTypes):
506518
obj = self._prep_bool(obj)

deepdiff/diff.py

+44-34
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
# You might need to run it many times since dictionaries come in different orders
66
# every time you run the docstrings.
77
# However the docstring expects it in a specific order in order to pass!
8+
import pytz
89
import difflib
910
import logging
1011
import types
1112
import datetime
1213
from enum import Enum
1314
from copy import deepcopy
1415
from math import isclose as is_close
15-
from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional
16+
from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet
1617
from collections.abc import Mapping, Iterable, Sequence
1718
from collections import defaultdict
1819
from inspect import getmembers
@@ -110,6 +111,8 @@ def _report_progress(_stats, progress_logger, duration):
110111
'ignore_private_variables',
111112
'encodings',
112113
'ignore_encoding_errors',
114+
'default_timezone',
115+
'custom_operators',
113116
)
114117

115118

@@ -128,10 +131,11 @@ def __init__(self,
128131
custom_operators: Optional[List[Any]] =None,
129132
cutoff_distance_for_pairs: float=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT,
130133
cutoff_intersection_for_pairs: float=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT,
134+
default_timezone:Union[datetime.timezone, datetime.timezone, pytz.tzinfo.BaseTzInfo]=datetime.timezone.utc,
131135
encodings: Optional[List[str]]=None,
132136
exclude_obj_callback: Optional[Callable]=None,
133137
exclude_obj_callback_strict: Optional[Callable]=None,
134-
exclude_paths: Union[str, List[str], None]=None,
138+
exclude_paths: Union[str, List[str], Set[str], FrozenSet[str], None]=None,
135139
exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None,
136140
exclude_types: Optional[List[Any]]=None,
137141
get_deep_distance: bool=False,
@@ -154,6 +158,8 @@ def __init__(self,
154158
include_paths: Union[str, List[str], None]=None,
155159
iterable_compare_func: Optional[Callable]=None,
156160
log_frequency_in_sec: int=0,
161+
log_scale_similarity_threshold: float=0.1,
162+
log_stacktrace: bool=False,
157163
math_epsilon: Optional[float]=None,
158164
max_diffs: Optional[int]=None,
159165
max_passes: int=10000000,
@@ -162,11 +168,10 @@ def __init__(self,
162168
progress_logger: Callable=logger.info,
163169
report_repetition: bool=False,
164170
significant_digits: Optional[int]=None,
165-
use_log_scale: bool=False,
166-
log_scale_similarity_threshold: float=0.1,
167171
threshold_to_diff_deeper: float = 0.33,
168172
truncate_datetime: Optional[str]=None,
169173
use_enum_value: bool=False,
174+
use_log_scale: bool=False,
170175
verbose_level: int=1,
171176
view: str=TEXT_VIEW,
172177
zip_ordered_iterables: bool=False,
@@ -183,8 +188,8 @@ def __init__(self,
183188
"ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, "
184189
"view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, "
185190
"cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
186-
"cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, "
187-
"math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, "
191+
"cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, log_stacktrace,"
192+
"math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, default_timezone "
188193
"ignore_order_func, custom_operators, encodings, ignore_encoding_errors, use_log_scale, log_scale_similarity_threshold "
189194
"_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))
190195

@@ -205,6 +210,8 @@ def __init__(self,
205210
self.use_enum_value = use_enum_value
206211
self.log_scale_similarity_threshold = log_scale_similarity_threshold
207212
self.use_log_scale = use_log_scale
213+
self.default_timezone = default_timezone
214+
self.log_stacktrace = log_stacktrace
208215
self.threshold_to_diff_deeper = threshold_to_diff_deeper
209216
self.ignore_string_type_changes = ignore_string_type_changes
210217
self.ignore_type_in_groups = self.get_ignore_types_in_groups(
@@ -272,6 +279,10 @@ def _group_by_sort_key(x):
272279
self.cache_size = cache_size
273280
_parameters = self.__dict__.copy()
274281
_parameters['group_by'] = None # overwriting since these parameters will be passed on to other passes.
282+
if log_stacktrace:
283+
self.log_err = logger.exception
284+
else:
285+
self.log_err = logger.error
275286

276287
# Non-Root
277288
if _shared_parameters:
@@ -732,7 +743,7 @@ def _compare_in_order(
732743
self, level,
733744
t1_from_index=None, t1_to_index=None,
734745
t2_from_index=None, t2_to_index=None
735-
):
746+
) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]:
736747
"""
737748
Default compare if `iterable_compare_func` is not provided.
738749
This will compare in sequence order.
@@ -752,7 +763,7 @@ def _get_matching_pairs(
752763
self, level,
753764
t1_from_index=None, t1_to_index=None,
754765
t2_from_index=None, t2_to_index=None
755-
):
766+
) -> List[Tuple[Tuple[int, int], Tuple[Any, Any]]]:
756767
"""
757768
Given a level get matching pairs. This returns list of two tuples in the form:
758769
[
@@ -1084,44 +1095,43 @@ def _create_hashtable(self, level, t):
10841095
# It only includes the ones needed when comparing iterables.
10851096
# The self.hashes dictionary gets shared between different runs of DeepHash
10861097
# So that any object that is already calculated to have a hash is not re-calculated.
1087-
deep_hash = DeepHash(item,
1088-
hashes=self.hashes,
1089-
parent=parent,
1090-
apply_hash=True,
1091-
**self.deephash_parameters,
1092-
)
1098+
deep_hash = DeepHash(
1099+
item,
1100+
hashes=self.hashes,
1101+
parent=parent,
1102+
apply_hash=True,
1103+
**self.deephash_parameters,
1104+
)
10931105
except UnicodeDecodeError as err:
10941106
err.reason = f"Can not produce a hash for {level.path()}: {err.reason}"
10951107
raise
1096-
except Exception as e: # pragma: no cover
1097-
logger.error("Can not produce a hash for %s."
1098-
"Not counting this object.\n %s" %
1099-
(level.path(), e))
1108+
except NotImplementedError:
1109+
raise
1110+
# except Exception as e: # pragma: no cover
1111+
# logger.error("Can not produce a hash for %s."
1112+
# "Not counting this object.\n %s" %
1113+
# (level.path(), e))
11001114
else:
11011115
try:
11021116
item_hash = deep_hash[item]
11031117
except KeyError:
11041118
pass
11051119
else:
11061120
if item_hash is unprocessed: # pragma: no cover
1107-
logger.warning("Item %s was not processed while hashing "
1121+
self.log_err("Item %s was not processed while hashing "
11081122
"thus not counting this object." %
11091123
level.path())
11101124
else:
11111125
self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i)
11121126

11131127
# Also we hash the iterables themselves too so that we can later create cache keys from those hashes.
1114-
try:
1115-
DeepHash(
1116-
obj,
1117-
hashes=self.hashes,
1118-
parent=level.path(),
1119-
apply_hash=True,
1120-
**self.deephash_parameters,
1121-
)
1122-
except Exception as e: # pragma: no cover
1123-
logger.error("Can not produce a hash for iterable %s. %s" %
1124-
(level.path(), e))
1128+
DeepHash(
1129+
obj,
1130+
hashes=self.hashes,
1131+
parent=level.path(),
1132+
apply_hash=True,
1133+
**self.deephash_parameters,
1134+
)
11251135
return local_hashes
11261136

11271137
@staticmethod
@@ -1490,17 +1500,17 @@ def _diff_numbers(self, level, local_tree=None, report_type_change=True):
14901500

14911501
def _diff_datetime(self, level, local_tree=None):
14921502
"""Diff DateTimes"""
1493-
level.t1 = datetime_normalize(self.truncate_datetime, level.t1)
1494-
level.t2 = datetime_normalize(self.truncate_datetime, level.t2)
1503+
level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
1504+
level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone)
14951505

14961506
if level.t1 != level.t2:
14971507
self._report_result('values_changed', level, local_tree=local_tree)
14981508

14991509
def _diff_time(self, level, local_tree=None):
15001510
"""Diff DateTimes"""
15011511
if self.truncate_datetime:
1502-
level.t1 = datetime_normalize(self.truncate_datetime, level.t1)
1503-
level.t2 = datetime_normalize(self.truncate_datetime, level.t2)
1512+
level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
1513+
level.t2 = datetime_normalize(self.truncate_datetime, level.t2, default_timezone=self.default_timezone)
15041514

15051515
if level.t1 != level.t2:
15061516
self._report_result('values_changed', level, local_tree=local_tree)

deepdiff/distance.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import math
22
import datetime
3+
from deepdiff.base import BaseProtocol
34
from deepdiff.deephash import DeepHash
45
from deepdiff.helper import (
56
DELTA_VIEW, numbers, strings, add_to_frozen_set, not_found, only_numbers, np, np_float64, time_to_seconds,
@@ -11,7 +12,9 @@
1112
DISTANCE_CALCS_NEEDS_CACHE = "Distance calculation can not happen once the cache is purged. Try with _cache='keep'"
1213

1314

14-
class DistanceMixin:
15+
16+
17+
class DistanceMixin(BaseProtocol):
1518

1619
def _get_rough_distance(self):
1720
"""

0 commit comments

Comments
 (0)