Skip to content

Commit 0ed901a

Browse files
svsgooglecopybara-github
authored andcommitted
Persisted incremental data: add a global_cache for DataBags and DataSlices.
PiperOrigin-RevId: 871821248 Change-Id: I138c1a9d7199598b4bf9037d28307bca188f77a0
1 parent e66344b commit 0ed901a

File tree

3 files changed

+147
-0
lines changed

3 files changed

+147
-0
lines changed

py/koladata/ext/persisted_data/BUILD

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,30 @@ py_library(
448448
deps = ["//py:python_path"],
449449
)
450450

451+
py_library(
452+
name = "global_cache_lib",
453+
srcs = ["global_cache_lib.py"],
454+
deps = [
455+
":lru_size_tracking_cache",
456+
"//py:python_path",
457+
"//py/koladata:kd",
458+
"@com_google_absl_py//absl/flags",
459+
],
460+
)
461+
462+
py_test(
463+
name = "global_cache_lib_test",
464+
srcs = ["global_cache_lib_test.py"],
465+
deps = [
466+
":global_cache_lib",
467+
":lru_size_tracking_cache",
468+
"//py:python_path",
469+
"@com_google_absl_py//absl/flags",
470+
"@com_google_absl_py//absl/testing:absltest",
471+
"@com_google_absl_py//absl/testing:flagsaver",
472+
],
473+
)
474+
451475
py_test(
452476
name = "lru_size_tracking_cache_test",
453477
srcs = ["lru_size_tracking_cache_test.py"],
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Global cache for persisted incremental data."""
16+
17+
from absl import flags
18+
from koladata import kd
19+
from koladata.ext.persisted_data import lru_size_tracking_cache as _lru_size_tracking_cache
20+
21+
_DEFAULT_MAX_SIZE_MB = 1024 * 10 # 10 GiB
22+
_mb_to_bytes = lambda mb: mb * 1024 * 1024
23+
24+
KD_EXT_PERSISTED_DATA_GLOBAL_CACHE_MAX_SIZE_MB = flags.DEFINE_integer(
25+
'kd_ext_persisted_data_global_cache_max_size_mb',
26+
_DEFAULT_MAX_SIZE_MB,
27+
'The maximum size of the kd_ext.persisted_data global cache in megabytes.',
28+
)
29+
30+
31+
def _kd_ext_persisted_data_global_cache_max_size_mb_validator(
32+
value: int,
33+
) -> bool:
34+
if value < 0:
35+
return False
36+
get_global_cache().set_max_total_bytes_of_entries_in_cache(
37+
_mb_to_bytes(value)
38+
)
39+
return True
40+
41+
42+
flags.register_validator(
43+
'kd_ext_persisted_data_global_cache_max_size_mb',
44+
_kd_ext_persisted_data_global_cache_max_size_mb_validator,
45+
message='Must be a non-negative number.',
46+
flag_values=flags.FLAGS,
47+
)
48+
49+
50+
CACHE_VALUE_TYPE = kd.types.DataBag | kd.types.DataSlice
51+
52+
Cache = _lru_size_tracking_cache.LruSizeTrackingCache
53+
CacheEntryMetadata = _lru_size_tracking_cache.CacheEntryMetadata
54+
55+
_CACHE: Cache[str, CACHE_VALUE_TYPE] = Cache(
56+
max_total_bytes_of_entries_in_cache=_mb_to_bytes(_DEFAULT_MAX_SIZE_MB)
57+
)
58+
59+
60+
def get_global_cache() -> Cache[str, CACHE_VALUE_TYPE]:
61+
"""Returns the global cache."""
62+
return _CACHE
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from absl import flags
16+
from absl.testing import absltest
17+
from absl.testing import flagsaver
18+
from koladata.ext.persisted_data import global_cache_lib
19+
from koladata.ext.persisted_data import lru_size_tracking_cache
20+
21+
22+
class GlobalCacheLibTest(absltest.TestCase):
23+
24+
def test_get_global_cache_return_type(self):
25+
cache = global_cache_lib.get_global_cache()
26+
self.assertIsInstance(cache, lru_size_tracking_cache.LruSizeTrackingCache)
27+
28+
def test_get_global_cache_repeated_calls_return_same_object(self):
29+
result1 = global_cache_lib.get_global_cache()
30+
result2 = global_cache_lib.get_global_cache()
31+
32+
self.assertIs(result1, result2)
33+
34+
def test_global_cache_default_max_size(self):
35+
self.assertEqual(
36+
global_cache_lib.get_global_cache().get_max_total_bytes_of_entries_in_cache(),
37+
global_cache_lib._DEFAULT_MAX_SIZE_MB * 1024 * 1024,
38+
)
39+
40+
@flagsaver.flagsaver()
41+
def test_kd_ext_persisted_data_global_cache_max_size_mb_flag_validation(self):
42+
with self.assertRaises(flags._exceptions.IllegalFlagValueError):
43+
flags.FLAGS.kd_ext_persisted_data_global_cache_max_size_mb = -1
44+
45+
@flagsaver.flagsaver()
46+
def test_kd_ext_persisted_data_global_cache_max_size_mb_flag_updates_cache(
47+
self,
48+
):
49+
self.assertNotEqual(
50+
global_cache_lib.get_global_cache().get_max_total_bytes_of_entries_in_cache(),
51+
42 * 1024 * 1024,
52+
)
53+
flags.FLAGS.kd_ext_persisted_data_global_cache_max_size_mb = 42
54+
self.assertEqual(
55+
global_cache_lib.get_global_cache().get_max_total_bytes_of_entries_in_cache(),
56+
42 * 1024 * 1024,
57+
)
58+
59+
60+
if __name__ == '__main__':
61+
absltest.main()

0 commit comments

Comments
 (0)