Skip to content

Commit 54610e0

Browse files
petrmikheevcopybara-github
authored andcommitted
Forbid mutable original DataBag and autofreeze mutable argument DataBags in "kd.core.enriched" and "kd.core.updated".
PiperOrigin-RevId: 859044745 Change-Id: I57e654ad711833447a3489161dc1234be6f9f063
1 parent afae0b0 commit 54610e0

35 files changed

+174
-146
lines changed

docs/api_reference.md

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,9 @@ Aliases:
708708
<pre class="no-copy"><code class="lang-text no-auto-prettify">Returns a copy of a DataSlice with a additional fallback DataBag(s).
709709

710710
Values in the original DataBag of `ds` take precedence over the ones in
711-
`*bag`.
711+
`*bag`. The original DataBag (if present) must be immutable. If the original
712+
DataBag is mutable, either freeze `ds` first, or add updates inplace using
713+
mutable API.
712714

713715
The DataBag attached to the result is a new immutable DataBag that falls back
714716
to the DataBag of `ds` if present and then to `*bag`.
@@ -1188,7 +1190,9 @@ Aliases:
11881190
<pre class="no-copy"><code class="lang-text no-auto-prettify">Returns a copy of a DataSlice with DataBag(s) of updates applied.
11891191

11901192
Values in `*bag` take precedence over the ones in the original DataBag of
1191-
`ds`.
1193+
`ds`. The original DataBag (if present) must be immutable. If the original
1194+
DataBag is mutable, either freeze `ds` first, or add updates inplace using
1195+
mutable API.
11921196

11931197
The DataBag attached to the result is a new immutable DataBag that falls back
11941198
to the DataBag of `ds` if present and then to `*bag`.
@@ -12832,7 +12836,9 @@ Args:
1283212836
<pre class="no-copy"><code class="lang-text no-auto-prettify">Returns a copy of a DataSlice with a additional fallback DataBag(s).
1283312837

1283412838
Values in the original DataBag of `ds` take precedence over the ones in
12835-
`*bag`.
12839+
`*bag`. The original DataBag (if present) must be immutable. If the original
12840+
DataBag is mutable, either freeze `ds` first, or add updates inplace using
12841+
mutable API.
1283612842

1283712843
The DataBag attached to the result is a new immutable DataBag that falls back
1283812844
to the DataBag of `ds` if present and then to `*bag`.
@@ -13646,7 +13652,9 @@ Args:
1364613652
<pre class="no-copy"><code class="lang-text no-auto-prettify">Returns a copy of a DataSlice with DataBag(s) of updates applied.
1364713653

1364813654
Values in `*bag` take precedence over the ones in the original DataBag of
13649-
`ds`.
13655+
`ds`. The original DataBag (if present) must be immutable. If the original
13656+
DataBag is mutable, either freeze `ds` first, or add updates inplace using
13657+
mutable API.
1365013658

1365113659
The DataBag attached to the result is a new immutable DataBag that falls back
1365213660
to the DataBag of `ds` if present and then to `*bag`.

koladata/operators/core.cc

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <cstddef>
1919
#include <cstdint>
2020
#include <memory>
21+
#include <string>
2122
#include <utility>
2223
#include <vector>
2324

@@ -366,27 +367,39 @@ class EnrichedOrUpdatedOperator final : public arolla::QExprOperator {
366367
absl::StatusOr<std::unique_ptr<arolla::BoundOperator>> DoBind(
367368
absl::Span<const arolla::TypedSlot> input_slots,
368369
arolla::TypedSlot output_slot) const override {
370+
absl::string_view op_name =
371+
is_enriched_operator_ ? "kd.core.enriched" : "kd.core.updated";
369372
return MakeBoundOperator(
370-
is_enriched_operator_ ? "kd.core.enriched" : "kd.core.updated",
373+
std::string(op_name),
371374
[input_slots = std::vector<arolla::TypedSlot>(input_slots.begin(),
372375
input_slots.end()),
373376
output_slot = output_slot.UnsafeToSlot<DataSlice>(),
374-
is_enriched_operator = is_enriched_operator_](
377+
is_enriched_operator = is_enriched_operator_, op_name](
375378
arolla::EvaluationContext* ctx, arolla::FramePtr frame) {
376379
const DataSlice& ds =
377380
frame.Get(input_slots[0].UnsafeToSlot<DataSlice>());
381+
if (ds.GetBag() != nullptr && ds.GetBag()->IsMutable()) {
382+
return absl::InvalidArgumentError(
383+
absl::StrCat(op_name,
384+
" requires the original DataBag to be immutable; "
385+
"either freeze it, or use mutable API"));
386+
}
378387
std::vector<DataBagPtr> db_list;
379388
db_list.reserve(input_slots.size());
380389
if (is_enriched_operator) {
381390
db_list.push_back(ds.GetBag());
382391
for (size_t i = 1; i < input_slots.size(); ++i) {
392+
const auto& bag =
393+
frame.Get(input_slots[i].UnsafeToSlot<DataBagPtr>());
383394
db_list.push_back(
384-
frame.Get(input_slots[i].UnsafeToSlot<DataBagPtr>()));
395+
bag != nullptr && bag->IsMutable() ? bag->Freeze() : bag);
385396
}
386397
} else {
387398
for (size_t i = input_slots.size() - 1; i >= 1; --i) {
399+
const auto& bag =
400+
frame.Get(input_slots[i].UnsafeToSlot<DataBagPtr>());
388401
db_list.push_back(
389-
frame.Get(input_slots[i].UnsafeToSlot<DataBagPtr>()));
402+
bag != nullptr && bag->IsMutable() ? bag->Freeze() : bag);
390403
}
391404
db_list.push_back(ds.GetBag());
392405
}

py/koladata/benchmarks/set_get_attr_benchmarks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def set_get_multiple_attrs_entity_via_fallback(state):
291291
update_ds = ds.with_bag(kd.mutable_bag())
292292
arg_name = 'abc' if state.range(1) else f'abc{i}'
293293
update_ds.set_attr(arg_name, val)
294-
updates.append((arg_name, update_ds))
294+
updates.append((arg_name, update_ds.freeze_bag()))
295295
try:
296296
_ = ds.missing # To initialize all lazy initializers and reduce variance.
297297
except AttributeError:
@@ -410,7 +410,7 @@ def set_get_multiple_attrs_10000_entity_via_fallback(state):
410410
update_ds = ds.with_bag(kd.mutable_bag())
411411
arg_name = 'abc' if state.range(1) else f'abc{i}'
412412
update_ds.set_attr(arg_name, val)
413-
updates.append((arg_name, update_ds))
413+
updates.append((arg_name, update_ds.freeze_bag()))
414414
try:
415415
_ = ds.missing # To initialize all lazy initializers and reduce variance.
416416
except AttributeError:

py/koladata/ext/persisted_data/schema_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ class SchemaHelper:
625625

626626
def __init__(self, schema: kd.types.SchemaItem):
627627
_check_is_schema_item(schema)
628-
self._schema = schema
628+
self._schema = schema.freeze_bag()
629629
artifacts = _analyze_schema(self._schema)
630630
self._parent_to_child_graph = artifacts.schema_graph
631631
self._schema_node_name_to_schema = artifacts.schema_node_name_to_schema

py/koladata/functions/tests/set_attr_test.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,16 +165,12 @@ def test_merging(self):
165165
testing.assert_equal(x.xyz.a, ds([5, 4]).with_bag(db))
166166

167167
def test_merging_with_fallbacks(self):
168-
db = kd.mutable_bag()
169-
x = db.obj()
170-
db2 = kd.mutable_bag()
171-
y = db2.new(bar='foo')
172-
db3 = kd.mutable_bag()
173-
y.with_bag(db3).set_attr('bar', 2)
174-
y.with_bag(db3).set_attr('baz', 5)
175-
x.foo = y.enriched(db3)
176-
testing.assert_equal(x.foo.bar, ds('foo').with_bag(db))
177-
testing.assert_equal(x.foo.baz, ds(5).with_bag(db))
168+
x = kd.mutable_bag().obj()
169+
y = kd.new(bar='foo')
170+
z = y.with_bag(kd.bag()).with_attrs(bar=2, baz=5)
171+
x.foo = y.enriched(z.get_bag())
172+
testing.assert_equal(x.foo.bar.no_bag(), ds('foo'))
173+
testing.assert_equal(x.foo.baz.no_bag(), ds(5))
178174

179175
def test_assignment_rhs_error(self):
180176
x = kd.mutable_bag().obj(x=ds([1, 2]))

py/koladata/operators/core.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,9 @@ def updated(ds, *bag): # pylint: disable=unused-argument
349349
"""Returns a copy of a DataSlice with DataBag(s) of updates applied.
350350
351351
Values in `*bag` take precedence over the ones in the original DataBag of
352-
`ds`.
352+
`ds`. The original DataBag (if present) must be immutable. If the original
353+
DataBag is mutable, either freeze `ds` first, or add updates inplace using
354+
mutable API.
353355
354356
The DataBag attached to the result is a new immutable DataBag that falls back
355357
to the DataBag of `ds` if present and then to `*bag`.
@@ -1270,7 +1272,9 @@ def enriched(ds, *bag): # pylint: disable=unused-argument
12701272
"""Returns a copy of a DataSlice with a additional fallback DataBag(s).
12711273
12721274
Values in the original DataBag of `ds` take precedence over the ones in
1273-
`*bag`.
1275+
`*bag`. The original DataBag (if present) must be immutable. If the original
1276+
DataBag is mutable, either freeze `ds` first, or add updates inplace using
1277+
mutable API.
12741278
12751279
The DataBag attached to the result is a new immutable DataBag that falls back
12761280
to the DataBag of `ds` if present and then to `*bag`.

py/koladata/operators/tests/core_attr_test.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,13 @@ def test_attr_update_implicit_casting(self):
8585

8686
def test_attr_update_with_ds_attr(self):
8787
with self.subTest('object'):
88-
db = bag()
89-
ds1 = kd.stack(db.obj(x=1), db.obj(y=2))
88+
ds1 = kd.stack(kd.obj(x=1), kd.obj(y=2))
9089
db = kd.core.attr(ds1, ds(['x', 'y']), 42)
9190
testing.assert_equal(ds1.updated(db).maybe('x').no_bag(), ds([42, None]))
9291
testing.assert_equal(ds1.updated(db).maybe('y').no_bag(), ds([None, 42]))
9392

9493
with self.subTest('entity'):
95-
db = bag()
96-
ds1 = db.new(x=ds([1, 3]), y=ds([2, 4]))
94+
ds1 = kd.new(x=ds([1, 3]), y=ds([2, 4]))
9795
db = kd.core.attr(ds1, ds(['x', 'y']), 42)
9896
testing.assert_equal(ds1.updated(db).x.no_bag(), ds([42, 3]))
9997
testing.assert_equal(ds1.updated(db).y.no_bag(), ds([2, 42]))

py/koladata/operators/tests/core_attrs_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def test_multi_attr_overwrite(self):
8383
)
8484

8585
def test_multi_attr_overwrite_object_schema(self):
86-
o = bag().obj(x=1, y=10)
86+
o = kd.obj(x=1, y=10)
8787
db2 = kd.core.attrs(o, x='2', a=1, b='p', c=bag().list([1, 2]))
8888
self.assertNotEqual(o.get_bag().fingerprint, db2.fingerprint)
8989

py/koladata/operators/tests/core_clone_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def test_entity(self, pass_schema):
116116
)
117117
def test_clone_only_reachable(self, pass_schema):
118118
a_slice = bag().new(b=ds([1, None, 2]), c=ds(['foo', 'bar', 'baz']))
119-
o = bag().new(a=a_slice)
119+
o = kd.new(a=a_slice)
120120
fb = bag().new(a=a_slice.no_bag(), c=ds([1, None, 2]))
121121
o = o.enriched(fb.get_bag())
122122
if pass_schema:
@@ -155,9 +155,9 @@ def test_uu(self, noise_positioned_in_front, pass_schema):
155155
fb_noise = bag()
156156
noise = fb_noise.obj(a=[1, 2, 3])
157157
if noise_positioned_in_front:
158-
o_fb = o.with_bag(noise.enriched(db).get_bag())
158+
o_fb = o.with_bag(noise.freeze_bag().enriched(db).get_bag())
159159
else:
160-
o_fb = o.enriched(fb_noise)
160+
o_fb = o.freeze_bag().enriched(fb_noise)
161161

162162
if pass_schema:
163163
result = kd.core.clone(o_fb, schema=o_fb.get_schema())

py/koladata/operators/tests/core_deep_clone_test.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,9 @@ def test_named_schema(self, pass_schema):
153153

154154
@parameterized.parameters(True, False)
155155
def test_clone_only_reachable(self, pass_schema):
156-
db = data_bag.DataBag.empty_mutable()
157-
fb = data_bag.DataBag.empty_mutable()
158-
a_slice = db.new(b=ds([1, None, 2]), c=ds(['foo', 'bar', 'baz']))
159-
_ = fb.new(a=a_slice.no_bag(), c=ds([1, None, 2]))
160-
o = db.new(a=a_slice)
156+
a_slice = kd.new(b=ds([1, None, 2]), c=ds(['foo', 'bar', 'baz']))
157+
fb = kd.new(a=a_slice.no_bag(), c=ds([1, None, 2])).get_bag()
158+
o = kd.new(a=a_slice)
161159
merged_bag = o.enriched(fb).get_bag().merge_fallbacks()
162160
o = o.with_bag(merged_bag)
163161
if pass_schema:
@@ -257,8 +255,7 @@ def test_metadata_implicit_schema(self):
257255
kd.core.with_metadata(a.get_obj_schema(), attrs='xy')
258256

259257
def test_metadata_chains(self):
260-
db = bag()
261-
ds_root = db.new(a=db.obj(x=1, y=2), b=db.obj(foo='bar'))
258+
ds_root = kd.new(a=kd.obj(x=1, y=2), b=kd.obj(foo='bar'))
262259
ds_a = ds_root.a
263260
for i in range(10):
264261
schema = kd.core.with_metadata(ds_a.get_obj_schema(), data=f'a_depth_{i}')

0 commit comments

Comments
 (0)