Skip to content

Commit b6afc83

Browse files
authored
Merge pull request #137 from seperman/dev
Adding ignore_type_subclasses and ignore_string_case
2 parents 88f7fed + 266ff03 commit b6afc83

17 files changed

+352
-112
lines changed

README.md

+23-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# DeepDiff v 4.0.2
1+
# DeepDiff v 4.0.4
22

33
<!-- ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) -->
44
![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat)
@@ -23,11 +23,11 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3
2323

2424
### Install from PyPi:
2525

26-
`pip install deepdiff`
26+
`pip install deepdiff`
2727

2828
DeepDiff prefers to use Murmur3 for hashing. However you have to manually install Murmur3 by running:
2929

30-
`pip install mmh3`
30+
`pip install 'deepdiff[murmur]'`
3131

3232
Otherwise DeepDiff will be using SHA256 for hashing which is a cryptographic hash and is considerably slower.
3333

@@ -48,7 +48,9 @@ DeepDiff gets the difference of 2 objects.
4848
> - Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst)
4949
> - The full documentation can be found on <https://deepdiff.readthedocs.io>
5050
51-
## Examples
51+
## A few Examples
52+
53+
> Note: This is just a brief overview of what DeepDiff can do. Please visit <https://deepdiff.readthedocs.io> for full documentation.
5254
5355
### List difference ignoring order or duplicates
5456

@@ -157,8 +159,7 @@ Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Numb
157159
'new_value': 3.0,
158160
'old_type': <class 'int'>,
159161
'old_value': 3}}}
160-
>>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=True)
161-
>>> pprint(ddiff, indent=2)
162+
>>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[(int, float)])
162163
{}
163164
```
164165

@@ -408,12 +409,16 @@ On MacOS Mojave some user experience difficulty when installing Murmur3.
408409

409410
The problem can be solved by running:
410411

411-
`xcode-select --install`
412+
`xcode-select --install`
412413

413-
And then running `pip install mmh3`
414+
And then running
415+
416+
`pip install mmh3`
414417

415418
# ChangeLog
416419

420+
- v4-0-4: Adding ignore_string_case and ignore_type_subclasses
421+
- v4-0-3: Adding versionbump tool for release
417422
- v4-0-2: Fixing installation issue where rst files are missing.
418423
- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional.
419424
- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing.
@@ -448,6 +453,16 @@ And then running `pip install mmh3`
448453
- v0-5-6: Adding slots support
449454
- v0-5-5: Adding loop detection
450455

456+
# Releases
457+
458+
We use bump2version to bump and tag releases.
459+
460+
```bash
461+
git checkout master && git pull
462+
bumpversion {patch|minor|major}
463+
git push && git push --tags
464+
```
465+
451466
# Contribute
452467

453468
1. Please make your PR against the dev branch

deepdiff/__init__.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
"""This module offers the DeepDiff, DeepSearch, grep and DeepHash classes."""
2-
# flake8: noqa
3-
__version__ = '4.0.2'
4-
import logging
5-
6-
if __name__ == '__main__':
7-
logging.basicConfig(format='%(asctime)s %(levelname)8s %(message)s')
8-
9-
from .diff import DeepDiff
10-
from .search import DeepSearch, grep
11-
from .deephash import DeepHash
1+
"""This module offers the DeepDiff, DeepSearch, grep and DeepHash classes."""
2+
# flake8: noqa
3+
__version__ = '4.0.4'
4+
import logging
5+
6+
if __name__ == '__main__':
7+
logging.basicConfig(format='%(asctime)s %(levelname)8s %(message)s')
8+
9+
10+
from .diff import DeepDiff
11+
from .search import DeepSearch, grep
12+
from .deephash import DeepHash

deepdiff/base.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,30 @@ def get_significant_digits(self, significant_digits, ignore_numeric_type_changes
1919

2020
def get_ignore_types_in_groups(self, ignore_type_in_groups,
2121
ignore_string_type_changes,
22-
ignore_numeric_type_changes):
22+
ignore_numeric_type_changes,
23+
ignore_type_subclasses):
2324
if ignore_type_in_groups:
2425
if isinstance(ignore_type_in_groups[0], type):
25-
ignore_type_in_groups = [OrderedSet(ignore_type_in_groups)]
26-
else:
27-
ignore_type_in_groups = list(map(OrderedSet, ignore_type_in_groups))
26+
ignore_type_in_groups = [ignore_type_in_groups]
2827
else:
2928
ignore_type_in_groups = []
3029

30+
result = []
31+
for item_group in ignore_type_in_groups:
32+
new_item_group = OrderedSet()
33+
for item in item_group:
34+
item = type(item) if item is None or not isinstance(item, type) else item
35+
new_item_group.add(item)
36+
result.append(new_item_group)
37+
ignore_type_in_groups = result
38+
3139
if ignore_string_type_changes and self.strings not in ignore_type_in_groups:
3240
ignore_type_in_groups.append(OrderedSet(self.strings))
3341

3442
if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups:
3543
ignore_type_in_groups.append(OrderedSet(self.numbers))
3644

45+
if ignore_type_subclasses:
46+
ignore_type_in_groups = list(map(tuple, ignore_type_in_groups))
47+
3748
return ignore_type_in_groups

deepdiff/deephash.py

+22-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3-
import os
43
import logging
54
from collections import Iterable
65
from collections import MutableMapping
@@ -9,16 +8,15 @@
98
from hashlib import sha1, sha256
109

1110
from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set,
12-
convert_item_or_items_into_set_else_none, current_dir,
11+
convert_item_or_items_into_set_else_none, get_doc,
1312
convert_item_or_items_into_compiled_regexes_else_none,
14-
get_id)
13+
get_id, type_is_subclass_of_type_group, type_in_type_group)
1514
from deepdiff.base import Base
1615
logger = logging.getLogger(__name__)
1716

1817
try:
1918
import mmh3
2019
except ImportError:
21-
logger.warning('Can not find Murmur3 hashing installed. Switching to SHA256 as the default hash. Refer to https://github.com/seperman/deepdiff#murmur3 for more info.')
2220
mmh3 = False
2321

2422
UNPROCESSED = 'unprocessed'
@@ -34,7 +32,7 @@
3432
ZERO_DECIMAL_CHARACTERS = set("-0.")
3533

3634

37-
def prepare_string_for_hashing(obj, ignore_string_type_changes=False):
35+
def prepare_string_for_hashing(obj, ignore_string_type_changes=False, ignore_string_case=False):
3836
"""
3937
Clean type conversions
4038
"""
@@ -43,11 +41,12 @@ def prepare_string_for_hashing(obj, ignore_string_type_changes=False):
4341
obj = obj.decode('utf-8')
4442
if not ignore_string_type_changes:
4543
obj = KEY_TO_VAL_STR.format(original_type, obj)
44+
if ignore_string_case:
45+
obj = obj.lower()
4646
return obj
4747

4848

49-
with open(os.path.join(current_dir, 'deephash_doc.rst'), 'r') as doc_file:
50-
doc = doc_file.read()
49+
doc = get_doc('deephash_doc.rst')
5150

5251

5352
class DeepHash(dict, Base):
@@ -67,14 +66,16 @@ def __init__(self,
6766
ignore_type_in_groups=None,
6867
ignore_string_type_changes=False,
6968
ignore_numeric_type_changes=False,
69+
ignore_type_subclasses=False,
70+
ignore_string_case=False,
7071
**kwargs):
7172
if kwargs:
7273
raise ValueError(
7374
("The following parameter(s) are not valid: %s\n"
7475
"The valid parameters are obj, hashes, exclude_types,"
7576
"exclude_paths, exclude_regex_paths, hasher, ignore_repetition,"
7677
"significant_digits, apply_hash, ignore_type_in_groups, ignore_string_type_changes,"
77-
"ignore_numeric_type_changes") % ', '.join(kwargs.keys()))
78+
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case") % ', '.join(kwargs.keys()))
7879
self.obj = obj
7980
exclude_types = set() if exclude_types is None else set(exclude_types)
8081
self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance
@@ -89,14 +90,18 @@ def __init__(self,
8990

9091
self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
9192
self.ignore_type_in_groups = self.get_ignore_types_in_groups(
92-
ignore_type_in_groups,
93-
ignore_string_type_changes, ignore_numeric_type_changes)
93+
ignore_type_in_groups=ignore_type_in_groups,
94+
ignore_string_type_changes=ignore_string_type_changes,
95+
ignore_numeric_type_changes=ignore_numeric_type_changes,
96+
ignore_type_subclasses=ignore_type_subclasses)
9497
self.ignore_string_type_changes = ignore_string_type_changes
9598
self.ignore_numeric_type_changes = ignore_numeric_type_changes
99+
self.ignore_string_case = ignore_string_case
96100
# makes the hash return constant size result if true
97101
# the only time it should be set to False is when
98102
# testing the individual hash functions for different types of objects.
99103
self.apply_hash = apply_hash
104+
self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group
100105

101106
self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)}))
102107

@@ -218,7 +223,7 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut
218223
type_ = original_type or type(obj)
219224
type_str = type_.__name__
220225
for type_group in self.ignore_type_in_groups:
221-
if type_ in type_group:
226+
if self.type_check_func(type_, type_group):
222227
type_str = ','.join(map(lambda x: x.__name__, type_group))
223228
break
224229
else:
@@ -303,7 +308,9 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
303308
result = 'NONE'
304309

305310
elif isinstance(obj, strings):
306-
result = prepare_string_for_hashing(obj, ignore_string_type_changes=self.ignore_string_type_changes)
311+
result = prepare_string_for_hashing(
312+
obj, ignore_string_type_changes=self.ignore_string_type_changes,
313+
ignore_string_case=self.ignore_string_case)
307314

308315
elif isinstance(obj, numbers):
309316
result = self._prep_number(obj)
@@ -333,7 +340,9 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
333340
if isinstance(obj, strings):
334341
result_cleaned = result
335342
else:
336-
result_cleaned = prepare_string_for_hashing(result, ignore_string_type_changes=self.ignore_string_type_changes)
343+
result_cleaned = prepare_string_for_hashing(
344+
result, ignore_string_type_changes=self.ignore_string_type_changes,
345+
ignore_string_case=self.ignore_string_case)
337346
result = self.hasher(result_cleaned)
338347

339348
# It is important to keep the hash of all objects.

deepdiff/deephash_doc.rst

+67-14
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ At the core of it, DeepHash is a deterministic serialization of your object into
1010
can be passed to a hash function. By default it uses Murmur 3 128 bit hash function which is a
1111
fast, non-cryptographic hashing function. You have the option to pass any another hashing function to be used instead.
1212

13-
If it can't find Murmur3 package (mmh3) installed, it uses Python's built-in SHA256 for hashing which is considerably slower than Murmur3. So it is advised that you install Murmur3 by running `pip install mmh3`
13+
If it can't find Murmur3 package (mmh3) installed, it uses Python's built-in SHA256 for hashing which is considerably slower than Murmur3. So it is advised that you install Murmur3 by running `pip install 'deepdiff[murmur]`
1414

1515
**Import**
1616
>>> from deepdiff import DeepHash
@@ -138,6 +138,15 @@ ignore_type_in_groups example with custom objects:
138138
>>> d1[burrito] == d2[taco]
139139
True
140140

141+
142+
ignore_type_subclasses
143+
Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too.
144+
145+
146+
ignore_string_case
147+
Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively.
148+
149+
141150
**Returns**
142151
A dictionary of {item: item hash}.
143152
If your object is nested, it will build hashes of all the objects it contains too.
@@ -161,43 +170,87 @@ But with DeepHash:
161170
>>> DeepHash(obj)
162171
{1: 234041559348429806012597903916437026784, 2: 148655924348182454950690728321917595655, 'a': 119173504597196970070553896747624927922, 'b': 4994827227437929991738076607196210252, '!>*id4488569408': 32452838416412500686422093274247968754}
163172

164-
So what is exactly the hash of obj in this case?
165-
DeepHash is calculating the hash of the obj and any other object that obj contains.
166-
The output of DeepHash is a dictionary of object IDs to their hashes.
167-
In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash:
173+
So what is exactly the hash of obj in this case?
174+
DeepHash is calculating the hash of the obj and any other object that obj contains.
175+
The output of DeepHash is a dictionary of object IDs to their hashes.
176+
In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash:
168177
>>> hashes = DeepHash(obj)
169178
>>> hashes[obj]
170179
34150898645750099477987229399128149852
171180

172-
Which you can write as:
181+
Which you can write as:
173182
>>> hashes = DeepHash(obj)[obj]
174183

175-
At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too.
184+
At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too.
176185

177-
The result hash is 34150898645750099477987229399128149852 which is generated by
178-
Murmur 3 128bit hashing algorithm. If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. Read more about Murmur3 here: https://en.wikipedia.org/wiki/MurmurHash
186+
The result hash is 34150898645750099477987229399128149852 which is generated by
187+
Murmur 3 128bit hashing algorithm. If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. Read more about Murmur3 here: https://en.wikipedia.org/wiki/MurmurHash
179188

180-
If you do a deep copy of obj, it should still give you the same hash:
189+
If you do a deep copy of obj, it should still give you the same hash:
181190
>>> from copy import deepcopy
182191
>>> obj2 = deepcopy(obj)
183192
>>> DeepHash(obj2)[obj2]
184193
34150898645750099477987229399128149852
185194

186-
Note that by default DeepHash will include string type differences. So if your strings were bytes:
195+
Note that by default DeepHash will include string type differences. So if your strings were bytes:
187196
>>> obj3 = {1: 2, b'a': b'b'}
188197
>>> DeepHash(obj3)[obj3]
189198
64067525765846024488103933101621212760
190199

191-
But if you want the same hash if string types are different, set ignore_string_type_changes to True:
200+
But if you want the same hash if string types are different, set ignore_string_type_changes to True:
192201
>>> DeepHash(obj3, ignore_string_type_changes=True)[obj3]
193202
34150898645750099477987229399128149852
194203

195-
ignore_numeric_type_changes is by default False too.
204+
ignore_numeric_type_changes is by default False too.
196205
>>> obj1 = {4:10}
197206
>>> obj2 = {4.0: Decimal(10.0)}
198207
>>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0]
199208
False
200209

201-
But by setting it to True, we can get the same hash.
210+
But by setting it to True, we can get the same hash.
202211
>>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0]
203212
True
213+
214+
215+
ignore_type_subclasses
216+
Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too.
217+
218+
>>> from deepdiff import DeepHash
219+
>>>
220+
>>> class ClassB:
221+
... def __init__(self, x):
222+
... self.x = x
223+
... def __repr__(self):
224+
... return "obj b"
225+
...
226+
>>>
227+
>>> class ClassC(ClassB):
228+
... def __repr__(self):
229+
... return "obj c"
230+
...
231+
>>> obj_b = ClassB(1)
232+
>>> obj_c = ClassC(1)
233+
>>>
234+
>>> # Since these 2 objects are from 2 different classes, the hashes are different by default.
235+
... # ignore_type_in_groups is set to [(ClassB, )] which means to ignore any type conversion between
236+
... # objects of classB and itself which does not make sense but it illustrates a better point when
237+
... # ignore_type_subclasses is set to be True.
238+
... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )])
239+
>>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )])
240+
>>> hashes_b[obj_b] != hashes_c[obj_c]
241+
True
242+
>>>
243+
>>> # Hashes of these 2 objects will be the same when ignore_type_subclasses is set to True
244+
... hashes_b = DeepHash(obj_b, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True)
245+
>>> hashes_c = DeepHash(obj_c, ignore_type_in_groups=[(ClassB, )], ignore_type_subclasses=True)
246+
>>> hashes_b[obj_b] == hashes_c[obj_c]
247+
True
248+
249+
ignore_string_case
250+
Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively.
251+
252+
>>> from deepdiff import DeepHash
253+
>>> DeepHash('hello')['hello'] == DeepHash('heLLO')['heLLO']
254+
False
255+
>>> DeepHash('hello', ignore_string_case=True)['hello'] == DeepHash('heLLO', ignore_string_case=True)['heLLO']
256+
True

0 commit comments

Comments
 (0)