Skip to content

Commit 32fc79b

Browse files
atorerocopybara-github
authored andcommitted
Use FromPy in kd.new.
PiperOrigin-RevId: 859109353 Change-Id: I3c1241ed572bc56bcce7ddd54c2ccc85ccaba0d7
1 parent 54610e0 commit 32fc79b

File tree

6 files changed

+69
-69
lines changed

6 files changed

+69
-69
lines changed

py/koladata/base/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ cc_library(
214214
"//koladata:data_slice",
215215
"//koladata:data_slice_qtype",
216216
"//koladata:object_factories",
217+
"//py/koladata/base/py_conversions:from_py",
217218
"@com_google_absl//absl/log:check",
218219
"@com_google_absl//absl/status",
219220
"@com_google_absl//absl/status:statusor",

py/koladata/base/boxing.cc

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,26 +1352,4 @@ class UniversalConverter {
13521352

13531353
} // namespace
13541354

1355-
absl::StatusOr<DataSlice> EntitiesFromPyObject(PyObject* py_obj,
1356-
const DataBagPtr& db,
1357-
AdoptionQueue& adoption_queue) {
1358-
return EntitiesFromPyObject(py_obj, /*schema=*/std::nullopt,
1359-
/*itemid=*/std::nullopt, db, adoption_queue);
1360-
}
1361-
1362-
absl::StatusOr<DataSlice> EntitiesFromPyObject(
1363-
PyObject* py_obj, const std::optional<DataSlice>& schema,
1364-
const std::optional<DataSlice>& itemid, const DataBagPtr& db,
1365-
AdoptionQueue& adoption_queue) {
1366-
// NOTE: UniversalConverter does not allow converting multi-dimensional
1367-
// DataSlices, so we are processing it before invoking the UniversalConverter.
1368-
if (arolla::python::IsPyQValueInstance(py_obj) && !itemid.has_value()) {
1369-
ASSIGN_OR_RETURN(auto res,
1370-
DataSliceFromPyValue(py_obj, adoption_queue, schema));
1371-
return EntityCreator::ConvertWithoutAdopt(db, res);
1372-
}
1373-
return UniversalConverter<EntityCreator>(db, adoption_queue)
1374-
.Convert(py_obj, schema, /*from_dim=*/0, itemid);
1375-
}
1376-
13771355
} // namespace koladata::python

py/koladata/base/boxing.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include "absl/status/statusor.h"
2727
#include "absl/strings/string_view.h"
2828
#include "koladata/adoption_utils.h"
29-
#include "koladata/data_bag.h"
3029
#include "koladata/data_slice.h"
3130
#include "koladata/internal/data_item.h"
3231

@@ -86,25 +85,6 @@ inline absl::StatusOr<DataSlice> DataSliceFromPyValueNoAdoption(
8685
absl::StatusOr<DataSlice> DataItemFromPyValue(
8786
PyObject* py_obj, const std::optional<DataSlice>& schema = std::nullopt);
8887

89-
// Converts Python objects into DataSlices and converts them into appropriate
90-
// Koda Entities. The conversion is deep, such that all nested structures (e.g.
91-
// dicts) are also Entities (including Koda Lists and Dicts) or primitive
92-
// DataSlices.
93-
//
94-
// `db` and `adoption_queue` are side outputs. `db` is used to create Koda
95-
// objects found under `py_obj`, while `adoption_queue` is used to collect
96-
// DataBag(s) of DataSlice(s) from nested `py_obj`.
97-
absl::StatusOr<DataSlice> EntitiesFromPyObject(PyObject* py_obj,
98-
const DataBagPtr& db,
99-
AdoptionQueue& adoption_queue);
100-
101-
// Same as above, but allows specifying the schemas of Lists / Dicts. When
102-
// schema == OBJECT, the behavior is the same as `ObjectFromPyObject`.
103-
absl::StatusOr<DataSlice> EntitiesFromPyObject(
104-
PyObject* py_obj, const std::optional<DataSlice>& schema,
105-
const std::optional<DataSlice>& itemid, const DataBagPtr& db,
106-
AdoptionQueue& adoption_queue);
107-
10888
// Returns an Error from provided status with incompatible schema information
10989
// during narrow casting.
11090
absl::Status CreateIncompatibleSchemaErrorFromStatus(

py/koladata/base/py_utils.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "koladata/object_factories.h"
3434
#include "py/arolla/abc/py_qvalue.h"
3535
#include "py/koladata/base/boxing.h"
36+
#include "py/koladata/base/py_conversions/from_py.h"
3637
#include "arolla/util/status_macros_backport.h"
3738

3839
namespace koladata::python {
@@ -76,7 +77,25 @@ absl::StatusOr<DataSlice> AssignmentRhsFromPyValue(
7677
" the slice, or kd.dict_like() to create multiple"
7778
" dictionary instances");
7879
}
79-
return EntitiesFromPyObject(rhs, db, adoption_queue);
80+
81+
if (arolla::python::IsPyQValueInstance(rhs)) {
82+
const auto& typed_value = arolla::python::UnsafeUnwrapPyQValue(rhs);
83+
if (typed_value.GetType() == arolla::GetQType<DataSlice>()) {
84+
return typed_value.UnsafeAs<DataSlice>();
85+
} else {
86+
return absl::InvalidArgumentError(absl::StrFormat(
87+
"only DataSlice QValues are supported as objects; got: %s.",
88+
typed_value.GetType()->name()));
89+
}
90+
}
91+
92+
ASSIGN_OR_RETURN(DataSlice res,
93+
FromPy(rhs, std::nullopt, /*from_dim=*/0,
94+
/*dict_as_obj=*/false, /*itemid=*/std::nullopt));
95+
96+
adoption_queue.Add(res);
97+
RETURN_IF_ERROR(adoption_queue.AdoptInto(*db));
98+
return res.WithBag(db);
8099
}
81100
ASSIGN_OR_RETURN(auto res, DataSliceFromPyValue(rhs, adoption_queue));
82101
if (res.GetShape().rank() > 0) {

py/koladata/functions/tests/new_test.py

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,9 @@ def test_schema_arg_dict_schema_error(self):
203203
list_schema = kde.list_schema(item_schema=schema_constants.FLOAT32).eval()
204204
with self.assertRaisesRegex(
205205
ValueError,
206-
re.escape(
207-
'Python Dict can be converted to either Entity or Dict, got schema:'
208-
' DataItem(LIST[FLOAT32]'
209-
),
206+
re.escape('schema mismatch: expected list/tuple'),
210207
):
211-
fns.new({'a': [1, 2, 3], 'b': [4, 5]}, schema=list_schema)
208+
_ = fns.new({'a': [1, 2, 3], 'b': [4, 5]}, schema=list_schema)
212209

213210
def test_schema_arg_schema_with_fallback(self):
214211
schema = kde.schema.new_schema(a=schema_constants.INT32).eval()
@@ -514,12 +511,14 @@ def test_universal_converter_list_of_complex(self):
514511
)
515512

516513
def test_universal_converter_list_of_different_primitive_lists(self):
517-
with self.assertRaisesRegex(ValueError, 'cannot find a common schema'):
518-
fns.new([[1, 2], [3.14]])
519-
fns.new(
514+
x = fns.new([[1, 2], [3.14]])
515+
y = fns.new(
520516
[[1, 2], [3.14]],
521-
schema=kde.list_schema(kde.list_schema(schema_constants.FLOAT32)).eval()
517+
schema=kde.list_schema(
518+
kde.list_schema(schema_constants.FLOAT32)
519+
).eval(),
522520
)
521+
testing.assert_equivalent(x, y)
523522

524523
def test_universal_converter_container_contains_multi_dim_data_slice(self):
525524
with self.assertRaisesRegex(
@@ -596,12 +595,12 @@ def test_universal_converter_entity(self):
596595
with self.subTest('item'):
597596
entity = fns.new(a=42, b='abc')
598597
new_entity = fns.new(entity)
599-
testing.assert_not_equal(entity.get_bag(), new_entity.get_bag())
598+
testing.assert_equal(entity.get_bag(), new_entity.get_bag())
600599
testing.assert_equivalent(new_entity, entity)
601600
with self.subTest('slice'):
602601
entity = fns.new(a=ds([1, 2]), b='abc')
603602
new_entity = fns.new(entity)
604-
testing.assert_not_equal(entity.get_bag(), new_entity.get_bag())
603+
testing.assert_equal(entity.get_bag(), new_entity.get_bag())
605604
testing.assert_equivalent(new_entity, entity)
606605

607606
def test_universal_converter_adopt_bag_data(self):
@@ -624,19 +623,29 @@ def test_universal_converter_with_cross_ref_schema_conflict(self):
624623
d2 = {'b': 37}
625624
d1['d'] = d2
626625
d = {'d1': d1, 'd2': d2}
627-
with self.assertRaisesRegex(ValueError, 'cannot find a common schema'):
626+
with self.assertRaisesRegex(
627+
ValueError,
628+
'could not parse list of primitives / data items: object with'
629+
' unsupported type: dict',
630+
):
628631
fns.new(d)
629632

630633
def test_universal_converter_with_itemid(self):
631634
itemid = kde.uuid_for_list('new').eval()
632635
res = fns.new([{'a': 42, 'b': 17}, {'c': 12}], itemid=itemid)
633-
child_itemid = kde.uuid_for_dict(
634-
'__from_py_child__', parent=itemid,
635-
list_item_index=ds([0, 1], schema_constants.INT64)
636+
child_itemid = kde.uuid(
637+
'__from_py_child__',
638+
parent=itemid,
639+
list_item_index=ds([0, 1], schema_constants.INT64),
640+
).eval()
641+
child_dict_item_id = kde.uuid_for_dict(
642+
'',
643+
base_itemid=child_itemid,
636644
).eval()
645+
637646
testing.assert_equal(res.no_bag().get_itemid(), itemid)
638647
testing.assert_dicts_keys_equal(res[:], ds([['a', 'b'], ['c']]))
639-
testing.assert_equal(res[:].no_bag().get_itemid(), child_itemid)
648+
testing.assert_equal(res[:].no_bag().get_itemid(), child_dict_item_id)
640649

641650
with self.assertRaisesRegex(
642651
ValueError, 'itemid argument to list creation, requires List ItemIds'
@@ -650,12 +659,18 @@ def test_universal_converter_with_itemid(self):
650659

651660
def test_universal_converter_recursive_object_error(self):
652661
d = {'a': 42}
653-
d['self'] = d
654-
with self.assertRaisesRegex(ValueError, 'recursive .* cannot be converted'):
662+
d['a'] = d
663+
with self.assertRaisesRegex(
664+
ValueError,
665+
'recursive Python object cannot be converted',
666+
):
655667
fns.new(d)
656668
# Deeper recursion:
657669
d2 = {'a': {'b': d}}
658-
with self.assertRaisesRegex(ValueError, 'recursive .* cannot be converted'):
670+
with self.assertRaisesRegex(
671+
ValueError,
672+
'recursive Python object cannot be converted',
673+
):
659674
fns.new(d2)
660675
# Longer cycle:
661676
d = {'a': 42}
@@ -665,12 +680,17 @@ def test_universal_converter_recursive_object_error(self):
665680
level_1_d = d
666681
d = {'top': d}
667682
bottom_d['cycle'] = level_1_d
668-
with self.assertRaisesRegex(ValueError, 'recursive .* cannot be converted'):
683+
with self.assertRaisesRegex(
684+
ValueError,
685+
'recursive Python object cannot be converted',
686+
):
669687
fns.new(d2)
670688
# Cycle in list:
671-
l = [[1, 2], 3]
689+
l = [[1, 2], (3)]
672690
l[0].append(l)
673-
with self.assertRaisesRegex(ValueError, 'recursive .* cannot be converted'):
691+
with self.assertRaisesRegex(
692+
ValueError, 'schema mismatch: expected list/tuple'
693+
):
674694
fns.new(l)
675695

676696
def test_universal_converter_with_attrs(self):

py/koladata/types/data_bag.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,12 @@ struct EntityCreatorHelper {
195195
PyObject* py_obj, const std::optional<DataSlice>& schema_arg,
196196
const std::optional<DataSlice>& itemid, const DataBagPtr& db,
197197
AdoptionQueue& adoption_queue) {
198-
ASSIGN_OR_RETURN(
199-
DataSlice res,
200-
EntitiesFromPyObject(py_obj, schema_arg, itemid, db, adoption_queue));
201-
return res.WithBag(db);
198+
if (arolla::python::IsPyQValueInstance(py_obj) && !itemid.has_value()) {
199+
return DataSliceFromPyValue(py_obj, adoption_queue, schema_arg);
200+
}
201+
202+
return FromPy(py_obj, schema_arg, /*from_dim=*/0,
203+
/*dict_as_obj=*/false, itemid);
202204
}
203205
};
204206

0 commit comments

Comments
 (0)