Skip to content

Commit 3712d9d

Browse files
svsgooglecopybara-github
authored andcommitted
Add get_schema_at to SchemaHelperMixin and use it in DataSliceManagerView.
As a result, view.get_schema() does not have to re-analyze the whole schema of the manager anymore. PiperOrigin-RevId: 859109377 Change-Id: I740ca79477b29bc06bbb10c33159fa17a42bf7bf
1 parent 32fc79b commit 3712d9d

File tree

6 files changed

+94
-10
lines changed

6 files changed

+94
-10
lines changed

py/koladata/ext/persisted_data/bare_root_initial_data_manager_test.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,25 @@ def test_copy(self):
333333
self.assertIs(copy._root_item, manager._root_item)
334334
self.assertIs(copy._schema_helper, manager._schema_helper)
335335

336+
def test_get_schema_at(self):
337+
root_schema = kd.named_schema('root_schema')
338+
manager = BareRootInitialDataManager.create_new(root_schema.new())
339+
kd.testing.assert_equivalent(
340+
manager.get_schema_at(
341+
data_slice_path_lib.DataSlicePath.parse_from_string('')
342+
),
343+
root_schema,
344+
ids_equality=True,
345+
)
346+
347+
manager = BareRootInitialDataManager.create_new()
348+
kd.testing.assert_equivalent(
349+
manager.get_schema_at(
350+
data_slice_path_lib.DataSlicePath.parse_from_string('')
351+
),
352+
kd.schema.new_schema(),
353+
)
354+
336355

337356
if __name__ == '__main__':
338357
absltest.main()

py/koladata/ext/persisted_data/data_slice_manager_interface.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ def get_schema(self) -> kd.types.DataSlice:
2727
"""Returns the schema of the entire DataSlice managed by this manager."""
2828
raise NotImplementedError(type(self))
2929

30+
def get_schema_at(
31+
self, path: data_slice_path_lib.DataSlicePath
32+
) -> kd.types.SchemaItem:
33+
"""Returns the schema of the DataSlice at the given path."""
34+
raise NotImplementedError(type(self))
35+
3036
def generate_paths(
3137
self, *, max_depth: int
3238
) -> Generator[data_slice_path_lib.DataSlicePath, None, None]:

py/koladata/ext/persisted_data/data_slice_manager_view.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,7 @@ def get_schema(self) -> kd.types.SchemaItem:
9393
The view path must be valid, i.e. self.is_view_valid() must be True.
9494
"""
9595
self._check_path_from_root_is_valid()
96-
schema_helper = schema_helper_lib.SchemaHelper(
97-
self._data_slice_manager.get_schema()
98-
)
99-
return schema_helper.get_subschema_at(
100-
schema_helper.get_schema_node_name_for_data_slice_path(
101-
self._path_from_root
102-
)
103-
)
96+
return self._data_slice_manager.get_schema_at(self._path_from_root)
10497

10598
def get_data_slice(
10699
self, *, with_ancestors: bool = False, with_descendants: bool = False

py/koladata/ext/persisted_data/initial_data_manager_interface.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ def get_schema(self) -> kd.types.SchemaItem:
6969
"""
7070
raise NotImplementedError(type(self))
7171

72+
def get_schema_at(
73+
self, path: data_slice_path_lib.DataSlicePath
74+
) -> kd.types.SchemaItem:
75+
"""Returns the schema of the DataSlice at the given path."""
76+
raise NotImplementedError(type(self))
77+
7278
def get_all_schema_node_names(self) -> AbstractSet[str]:
7379
"""Returns all the schema node names of self.get_schema().
7480

py/koladata/ext/persisted_data/persisted_incremental_data_slice_manager_test.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4620,6 +4620,57 @@ def test_description_of_initial_revision(self):
46204620
['Some bespoke description'],
46214621
)
46224622

4623+
def test_get_schema_at(self):
4624+
persistence_dir = os.path.join(
4625+
self.create_tempdir().full_path, 'persisted_dataslice'
4626+
)
4627+
manager = PersistedIncrementalDataSliceManager.create_new(persistence_dir)
4628+
query_schema = kd.named_schema('query')
4629+
manager.update(
4630+
at_path=parse_dsp(''),
4631+
attr_name='query',
4632+
attr_value=kd.list([
4633+
query_schema.new(query_id='q1'),
4634+
query_schema.new(query_id='q2'),
4635+
]),
4636+
)
4637+
doc_schema = kd.named_schema('doc')
4638+
manager.update(
4639+
at_path=parse_dsp('.query[:]'),
4640+
attr_name='doc',
4641+
attr_value=kd.slice([
4642+
doc_schema.new(doc_id=kd.slice([0, 1, 2, 3])).implode(),
4643+
doc_schema.new(doc_id=kd.slice([4, 5, 6])).implode(),
4644+
]),
4645+
)
4646+
4647+
full_schema = manager.get_schema()
4648+
kd.testing.assert_equivalent(
4649+
manager.get_schema_at(parse_dsp('')),
4650+
full_schema,
4651+
ids_equality=True,
4652+
)
4653+
kd.testing.assert_equivalent(
4654+
manager.get_schema_at(parse_dsp('.query[:]')),
4655+
full_schema.query.get_item_schema(),
4656+
ids_equality=True,
4657+
)
4658+
kd.testing.assert_equivalent(
4659+
manager.get_schema_at(parse_dsp('.query[:].query_id')),
4660+
kd.STRING,
4661+
ids_equality=True,
4662+
)
4663+
kd.testing.assert_equivalent(
4664+
manager.get_schema_at(parse_dsp('.query[:].doc[:]')),
4665+
full_schema.query.get_item_schema().doc.get_item_schema(),
4666+
ids_equality=True,
4667+
)
4668+
kd.testing.assert_equivalent(
4669+
manager.get_schema_at(parse_dsp('.query[:].doc[:].doc_id')),
4670+
kd.INT32,
4671+
ids_equality=True,
4672+
)
4673+
46234674

46244675
if __name__ == '__main__':
46254676
absltest.main()

py/koladata/ext/persisted_data/schema_helper_mixin.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from koladata import kd
2020
from koladata.ext.persisted_data import data_slice_path as data_slice_path_lib
21-
from koladata.ext.persisted_data import schema_helper
21+
from koladata.ext.persisted_data import schema_helper as schema_helper_lib
2222

2323

2424
class SchemaHelperMixin:
@@ -31,7 +31,7 @@ class SchemaHelperMixin:
3131
get_data_slice_at(), exists() and get_schema() methods.
3232
"""
3333

34-
def _get_schema_helper(self) -> schema_helper.SchemaHelper:
34+
def _get_schema_helper(self) -> schema_helper_lib.SchemaHelper:
3535
raise NotImplementedError(type(self))
3636

3737
def get_data_slice(
@@ -46,6 +46,15 @@ def get_data_slice(
4646
def get_schema(self) -> kd.types.SchemaItem:
4747
return self._get_schema_helper().get_schema()
4848

49+
def get_schema_at(
50+
self, path: data_slice_path_lib.DataSlicePath
51+
) -> kd.types.SchemaItem:
52+
"""Returns the schema of the DataSlice at the given path."""
53+
schema_helper = self._get_schema_helper()
54+
return schema_helper.get_subschema_at(
55+
schema_helper.get_schema_node_name_for_data_slice_path(path)
56+
)
57+
4958
def exists(self, path: data_slice_path_lib.DataSlicePath) -> bool:
5059
return self._get_schema_helper().exists(path)
5160

0 commit comments

Comments
 (0)