Skip to content

Commit 3e5f089

Browse files
apronchenkovcopybara-github
authored andcommitted
Change from_py and map_py to always assign a bag when schema=OBJECT is requested
Motivation: * Removed an edge case in the behaviour of `kd.from_py(..., from_dim=1)` where the result would unexpectedly be assigned a bag when the input list was empty. This edge case was encountered while optimising `map_py_on_present`. * Removed discrepancy: - `kd.map_py(lambda x: x, [], schema=kd.OBJECT).x` ok - `kd.map_py(lambda x: x, [None], schema=kd.OBJECT).x` ok\ while - `kd.map_py(lambda x: x, []).x` ok - `kd.map_py(lambda x: x, [None]).x` failure PiperOrigin-RevId: 714118231 Change-Id: I57aab375f6185a31cc33a9f03a899cca336ee902
1 parent c869090 commit 3e5f089

File tree

6 files changed

+23
-15
lines changed

6 files changed

+23
-15
lines changed

py/koladata/functions/tests/from_py_test.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,9 @@ def test_primitive(self):
197197
self.assertIsNone(item.get_bag())
198198

199199
item = fns.from_py(42, schema=schema_constants.OBJECT)
200-
testing.assert_equal(item, ds(42, schema_constants.OBJECT))
201-
self.assertIsNone(item.get_bag())
200+
testing.assert_equal(item.no_bag(), ds(42, schema_constants.OBJECT))
201+
self.assertIsNotNone(item.get_bag())
202+
self.assertFalse(item.get_bag().is_mutable())
202203

203204
def test_primitive_casting_error(self):
204205
with self.assertRaisesRegex(
@@ -225,23 +226,27 @@ def test_primitives_common_schema(self):
225226

226227
def test_primitives_object(self):
227228
res = fns.from_py([1, 3.14], from_dim=1, schema=schema_constants.OBJECT)
228-
testing.assert_equal(res, ds([1, 3.14], schema_constants.OBJECT))
229-
self.assertIsNone(res.get_bag())
229+
testing.assert_equal(res.no_bag(), ds([1, 3.14], schema_constants.OBJECT))
230+
self.assertIsNotNone(res.get_bag())
231+
self.assertFalse(res.get_bag().is_mutable())
230232

231233
def test_empty_object(self):
232234
res = fns.from_py(None, schema=schema_constants.OBJECT)
233235
testing.assert_equal(res.no_bag(), ds(None, schema_constants.OBJECT))
234236
self.assertIsNotNone(res.get_bag())
237+
self.assertFalse(res.get_bag().is_mutable())
235238

236239
res = fns.from_py([], from_dim=1, schema=schema_constants.OBJECT)
237240
testing.assert_equal(res.no_bag(), ds([], schema_constants.OBJECT))
238241
self.assertIsNotNone(res.get_bag())
242+
self.assertFalse(res.get_bag().is_mutable())
239243

240244
res = fns.from_py([None, None], from_dim=1, schema=schema_constants.OBJECT)
241245
testing.assert_equal(
242246
res.no_bag(), ds([None, None], schema_constants.OBJECT)
243247
)
244248
self.assertIsNotNone(res.get_bag())
249+
self.assertFalse(res.get_bag().is_mutable())
245250

246251
def test_list_from_dim(self):
247252
input_list = [[1, 2.0], [3, 4]]
@@ -306,7 +311,9 @@ def test_list_from_dim_with_schema(self):
306311

307312
l3 = fns.from_py(input_list, schema=schema_constants.OBJECT, from_dim=2)
308313
self.assertEqual(l3.get_ndim(), 2)
309-
testing.assert_equal(l3, ds([[1, 2.0], [3, 4]], schema_constants.OBJECT))
314+
testing.assert_equal(
315+
l3.no_bag(), ds([[1, 2.0], [3, 4]], schema_constants.OBJECT)
316+
)
310317

311318
def test_dict_from_dim(self):
312319
input_dict = [{ds('a'): [1, 2], 'b': [42]}, {ds('c'): [3, 4], 'd': [34]}]

py/koladata/operators/tests/py_map_py_on_cond_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def test_empty_input(self):
9090

9191
res = expr_eval.eval(kde.py.map_py_on_cond(yes_fn, None, cond, x=val))
9292
testing.assert_equal(res.no_bag(), ds([], schema_constants.OBJECT))
93-
self.assertIsNotNone(res.get_bag())
93+
self.assertIsNone(res.get_bag())
9494

9595
res = expr_eval.eval(
9696
kde.py.map_py_on_cond(

py/koladata/operators/tests/py_map_py_on_present_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_rank_2(self):
5353
testing.assert_equal(r.no_bag(), ds([[4.5, 5.5], [], []]))
5454

5555
def test_empty_input(self):
56-
yes_fn = lambda x: x + 1 if x is not None else 0
56+
yes_fn = lambda x: x + 1
5757

5858
val = ds([])
5959
res = expr_eval.eval(
@@ -64,7 +64,7 @@ def test_empty_input(self):
6464

6565
res = expr_eval.eval(kde.py.map_py_on_present(yes_fn, x=val))
6666
testing.assert_equal(res.no_bag(), ds([], schema_constants.OBJECT))
67-
self.assertIsNotNone(res.get_bag())
67+
self.assertIsNone(res.get_bag())
6868

6969
res = expr_eval.eval(
7070
kde.py.map_py_on_present(yes_fn, x=val, schema=schema_constants.OBJECT)

py/koladata/operators/tests/py_map_py_on_selected_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_empty_input(self):
7777

7878
res = expr_eval.eval(kde.py.map_py_on_selected(fn, cond, x=val))
7979
testing.assert_equal(res.no_bag(), ds([], schema_constants.OBJECT))
80-
self.assertIsNotNone(res.get_bag())
80+
self.assertIsNone(res.get_bag())
8181

8282
res = expr_eval.eval(
8383
kde.py.map_py_on_selected(

py/koladata/operators/tests/py_map_py_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def my_fn(x):
300300
kde.py.map_py(my_fn, val, include_missing=include_missing)
301301
)
302302
testing.assert_equal(res.no_bag(), ds([[]], schema_constants.OBJECT))
303-
self.assertIsNotNone(res.get_bag())
303+
self.assertIsNone(res.get_bag())
304304

305305
with self.subTest('schema=FLOAT32'):
306306
res = expr_eval.eval(
@@ -324,7 +324,7 @@ def my_fn(x):
324324
)
325325
)
326326
testing.assert_equal(res.no_bag(), ds([[]], schema_constants.OBJECT))
327-
self.assertIsNotNone(res.get_bag())
327+
self.assertFalse(res.get_bag().is_mutable())
328328

329329
@parameterized.parameters(False, True)
330330
def test_map_py_all_missing_input(self, include_missing):
@@ -364,7 +364,7 @@ def my_fn(x):
364364
)
365365
)
366366
testing.assert_equal(res.no_bag(), ds([[None]], schema_constants.OBJECT))
367-
self.assertIsNotNone(res.get_bag())
367+
self.assertFalse(res.get_bag().is_mutable())
368368

369369
def test_map_py_scalar_input(self):
370370
def add_one(x):

py/koladata/types/boxing.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,10 +1530,11 @@ absl::StatusOr<DataSlice> GenericFromPyObject(
15301530
DCHECK(res_db == nullptr || res_db->IsMutable());
15311531
if (res_slice.GetBag() == nullptr) {
15321532
ASSIGN_OR_RETURN(res_db, adoption_queue.GetCommonOrMergedDb());
1533-
// Attach an empty DataBag if the result is empty and has OBJECT schema.
1534-
if (res_db == nullptr && res_slice.present_count() == 0 &&
1535-
res_slice.GetSchemaImpl() == schema::kObject) {
1533+
// If the result has no associated DataBag but an OBJECT schema was
1534+
// requested, attach an empty DataBag.
1535+
if (res_db == nullptr && schema && schema->item() == schema::kObject) {
15361536
res_db = DataBag::Empty();
1537+
res_db->UnsafeMakeImmutable();
15371538
}
15381539
return res_slice.WithBag(std::move(res_db));
15391540
}

0 commit comments

Comments
 (0)