Skip to content

Commit 7a9cd27

Browse files
svsgooglecopybara-github
authored andcommitted
Add __hash__ to DataSliceManagerView.
This makes DataSliceManagerView instances hashable, allowing them to be used in sets and as dictionary keys. The hash is based on the underlying data slice manager and the path from the root. PiperOrigin-RevId: 864834042 Change-Id: I2ad93d76b50777914596bd976dfb21452a38be3a
1 parent 5a8b0dd commit 7a9cd27

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

py/koladata/ext/persisted_data/data_slice_manager_view.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,31 @@ def __init__(
6868
path_from_root: The view path. It must be a valid path for the manager,
6969
i.e. manager.exists(path_from_root) must be True.
7070
"""
71+
# Conceptually, this class is a frozen dataclass with two attributes:
72+
# _data_slice_manager and _path_from_root. However, since we override
73+
# __getattr__() and __setattr__(), we cannot use a frozen dataclass, and we
74+
# use __dict__ here to avoid triggering __setattr__().
7175
self.__dict__['_data_slice_manager'] = manager
7276
self.__dict__['_path_from_root'] = path_from_root
7377
self._check_path_from_root_is_valid()
7478

7579
def __eq__(self, other: Any) -> bool:
80+
# A frozen dataclass would give this behavior for free. As mentioned above,
81+
# we cannot use a frozen dataclass, so we implement __eq__ manually here.
7682
return (
7783
type(self) is type(other)
7884
and self._data_slice_manager == other._data_slice_manager
7985
and self._path_from_root == other._path_from_root
8086
)
8187

88+
def __hash__(self) -> int:
89+
# A frozen dataclass would give this behavior for free. As mentioned above,
90+
# we cannot use a frozen dataclass, so we implement __hash__ manually here
91+
# to be able to use sets and dicts with DataSliceManagerView instances.
92+
# Implementing __eq__ removes the default hash behavior, so we need to
93+
# implement __hash__ explicitly.
94+
return hash((self._data_slice_manager, self._path_from_root))
95+
8296
def __repr__(self) -> str:
8397
return (
8498
f'DataSliceManagerView({repr(self._data_slice_manager)},'

py/koladata/ext/persisted_data/data_slice_manager_view_test.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,6 +1873,62 @@ def test_get_with_populate_arguments(self):
18731873
ids_equality=True,
18741874
)
18751875

1876+
def test_views_are_hashable(self):
1877+
persistence_dir = self.create_tempdir().full_path
1878+
manager = pidsm.PersistedIncrementalDataSliceManager.create_new(
1879+
persistence_dir
1880+
)
1881+
root = DataSliceManagerView(manager)
1882+
root.query = (
1883+
kd.list([
1884+
kd.named_schema('query').new(query_id=0, text='How tall is Obama'),
1885+
kd.named_schema('query').new(
1886+
query_id=1, text='How high is the Eiffel tower'
1887+
),
1888+
]),
1889+
'Added queries with query_id and text',
1890+
)
1891+
query = root.query[:]
1892+
query.doc = (
1893+
kd.list([
1894+
kd.named_schema('doc').new(doc_id=0, title='Barack Obama'),
1895+
kd.named_schema('doc').new(doc_id=1, title='Michelle Obama'),
1896+
kd.named_schema('doc').new(doc_id=2, title='George W. Bush'),
1897+
]),
1898+
'Added docs with doc_id and title',
1899+
)
1900+
doc = query.doc[:]
1901+
1902+
self.assertEqual(hash(root), hash(root))
1903+
self.assertEqual(hash(root), hash(root.get_root()))
1904+
self.assertEqual(hash(root.query[:]), hash(query))
1905+
self.assertEqual(hash(root.query[:]), hash(root.query[:]))
1906+
self.assertEqual(hash(root.query[:].text), hash(query.text))
1907+
self.assertEqual(hash(root.query[:].text), hash(root.query[:].text))
1908+
self.assertEqual(hash(root.query[:].doc[:].title), hash(doc.title))
1909+
1910+
my_view_set = {root, query, doc, doc.title, root.query[:].doc[:]}
1911+
self.assertLen(my_view_set, 4)
1912+
1913+
another_manager = (
1914+
pidsm.PersistedIncrementalDataSliceManager.create_from_dir(
1915+
persistence_dir
1916+
)
1917+
)
1918+
another_root = DataSliceManagerView(another_manager)
1919+
1920+
self.assertNotEqual(hash(another_root), hash(root))
1921+
self.assertNotEqual(hash(another_root), hash(root.get_root()))
1922+
self.assertNotEqual(hash(another_root.query[:]), hash(query))
1923+
self.assertNotEqual(hash(another_root.query[:]), hash(root.query[:]))
1924+
self.assertNotEqual(hash(another_root.query[:].text), hash(query.text))
1925+
self.assertNotEqual(
1926+
hash(another_root.query[:].text), hash(root.query[:].text)
1927+
)
1928+
self.assertNotEqual(
1929+
hash(another_root.query[:].doc[:].title), hash(doc.title)
1930+
)
1931+
18761932

18771933
if __name__ == '__main__':
18781934
absltest.main()

0 commit comments

Comments
 (0)