Skip to content

Commit 88d4540

Browse files
timofey-stepanovcopybara-github
authored andcommitted
ExpectHaveCommonSchema
So far I used a one-line error message, aligned with other `Expect*` set of functions. If we will want to switch to multiline error messages, we should probably do it consistently for all the functions. PiperOrigin-RevId: 713654374 Change-Id: I5bd9e9b8703a4de3bf9e208bc742fcab3e4e2e2a
1 parent ca68448 commit 88d4540

File tree

10 files changed

+96
-40
lines changed

10 files changed

+96
-40
lines changed

koladata/operators/comparison.cc

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,8 @@ absl::StatusOr<DataSlice> GreaterEqual(const DataSlice& x, const DataSlice& y) {
103103
absl::StatusOr<DataSlice> Equal(const DataSlice& x, const DataSlice& y) {
104104
// NOTE: Casting is handled internally by EqualOp. The schema compatibility is
105105
// still verified to ensure that e.g. ITEMID and OBJECT are not compared.
106-
RETURN_IF_ERROR(
107-
schema::CommonSchema(x.GetSchemaImpl(), y.GetSchemaImpl()).status())
108-
.With([&](const absl::Status& status) {
109-
return AssembleErrorMessage(status,
110-
{.db = DataBag::ImmutableEmptyWithFallbacks(
111-
{x.GetBag(), y.GetBag()})});
112-
});
106+
RETURN_IF_ERROR(ExpectHaveCommonSchema({"x", "y"}, x, y))
107+
.With(OpError("kd.comparison.equal"));
113108
return DataSliceOp<internal::EqualOp>()(
114109
x, y, internal::DataItem(schema::kMask), nullptr);
115110
}

koladata/operators/masking.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
#include "koladata/internal/op_utils/presence_or.h"
3333
#include "koladata/internal/op_utils/utils.h"
3434
#include "koladata/operators/arolla_bridge.h"
35-
#include "koladata/repr_utils.h"
3635
#include "koladata/schema_utils.h"
3736
#include "arolla/util/status_macros_backport.h"
3837

@@ -52,9 +51,10 @@ inline absl::StatusOr<DataSlice> ApplyMask(const DataSlice& obj,
5251
// kde.masking.coalesce.
5352
inline absl::StatusOr<DataSlice> Coalesce(const DataSlice& x,
5453
const DataSlice& y) {
54+
RETURN_IF_ERROR(ExpectHaveCommonSchema({"x", "y"}, x, y))
55+
.With(OpError("kd.masking.coalesce"));
5556
auto res_db = DataBag::CommonDataBag({x.GetBag(), y.GetBag()});
56-
ASSIGN_OR_RETURN(auto aligned_slices, AlignSchemas({x, y}),
57-
AssembleErrorMessage(_, {.db = res_db}));
57+
ASSIGN_OR_RETURN(auto aligned_slices, AlignSchemas({x, y}));
5858
return DataSliceOp<internal::PresenceOrOp>()(
5959
aligned_slices.slices[0], aligned_slices.slices[1],
6060
aligned_slices.common_schema, std::move(res_db));

koladata/schema_utils.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,23 @@ absl::Status ExpectConsistentStringOrBytesImpl(
220220

221221
} // namespace schema_utils_internal
222222

223+
absl::Status ExpectHaveCommonSchema(
224+
absl::Span<const absl::string_view> arg_names, const DataSlice& lhs,
225+
const DataSlice& rhs) {
226+
if (arg_names.size() != 2) {
227+
return absl::InternalError("arg_names must have exactly 2 elements");
228+
}
229+
if (schema::CommonSchema(lhs.GetSchemaImpl(), rhs.GetSchemaImpl()).ok()) {
230+
return absl::OkStatus();
231+
}
232+
return absl::InvalidArgumentError(
233+
absl::StrFormat("arguments `%s` and `%s` must contain values castable to "
234+
"a common type, got %s and %s",
235+
arg_names[0], arg_names[1],
236+
schema_utils_internal::DescribeSliceSchema(lhs),
237+
schema_utils_internal::DescribeSliceSchema(rhs)));
238+
}
239+
223240
absl::Status ExpectHaveCommonPrimitiveSchema(
224241
absl::Span<const absl::string_view> arg_names, const DataSlice& lhs,
225242
const DataSlice& rhs) {

koladata/schema_utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ inline absl::Status ExpectConsistentStringOrBytes(absl::string_view arg_name,
9090
{&arg});
9191
}
9292

93+
// Returns OK if the DataSlices contain values castable to a common type.
94+
absl::Status ExpectHaveCommonSchema(
95+
absl::Span<const absl::string_view> arg_names, const DataSlice& lhs,
96+
const DataSlice& rhs);
97+
9398
// Returns OK if the DataSlices contain values castable to a common primitive
9499
// type.
95100
// NOTE: arg_names must have exactly 2 elements.

koladata/schema_utils_test.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,25 @@ TEST(SchemaUtilsTest, ExpectConsistentStringOrBytes) {
481481
"slice of OBJECT with items of types BYTES, STRING"));
482482
}
483483

484+
TEST(SchemaUtilsTest, ExpectHaveCommonSchema) {
485+
auto empty_and_unknown = test::DataItem(std::nullopt, schema::kObject);
486+
auto integer = test::DataSlice<int>({1, 2, std::nullopt});
487+
auto floating = test::DataSlice<float>({1, 2, std::nullopt});
488+
auto bytes = test::DataSlice<std::string>({"a", "b", std::nullopt});
489+
auto bytes_any =
490+
test::DataSlice<std::string>({"a", "b", std::nullopt}, schema::kAny);
491+
auto schema = test::DataItem(std::nullopt, schema::kSchema);
492+
493+
EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, bytes, empty_and_unknown),
494+
IsOk());
495+
EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, bytes, bytes_any), IsOk());
496+
EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, integer, bytes), IsOk());
497+
EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, integer, schema),
498+
StatusIs(absl::StatusCode::kInvalidArgument,
499+
"arguments `foo` and `bar` must contain values castable "
500+
"to a common type, got INT32 and SCHEMA"));
501+
}
502+
484503
TEST(SchemaUtilsTest, ExpectHaveCommonPrimitiveSchema) {
485504
auto empty_and_unknown = test::DataItem(std::nullopt, schema::kObject);
486505
auto integer = test::DataSlice<int>({1, 2, std::nullopt});

py/koladata/operators/tests/comparison_equal_test.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Tests for kde.comparison.equal."""
15+
import re
1616

1717
from absl.testing import absltest
1818
from absl.testing import parameterized
@@ -115,32 +115,39 @@ def test_qtype_signatures(self):
115115
def test_raises_on_incompatible_schemas(self):
116116
with self.assertRaisesRegex(
117117
exceptions.KodaError,
118-
r"""cannot find a common schema for provided schemas
119-
120-
the common schema\(s\) INT32: INT32
121-
the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""",
118+
re.escape(
119+
'kd.comparison.equal: arguments `x` and `y` must contain values'
120+
' castable to a common type, got SCHEMA() and INT32'
121+
),
122122
):
123123
expr_eval.eval(kde.comparison.equal(ENTITY_1, ds(1)))
124124

125125
db = data_bag.DataBag.empty()
126126
with self.assertRaisesRegex(
127127
exceptions.KodaError,
128-
r"""cannot find a common schema for provided schemas
129-
130-
the common schema\(s\) [0-9a-f]{32}:0: SCHEMA\(x=INT32\)
131-
the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""",
128+
re.escape(
129+
'kd.comparison.equal: arguments `x` and `y` must contain values'
130+
' castable to a common type, got SCHEMA(x=INT32) and SCHEMA()'
131+
),
132132
):
133133
expr_eval.eval(kde.comparison.equal(db.new(x=1), db.new()))
134134

135135
with self.assertRaisesRegex(
136136
exceptions.KodaError,
137-
'cannot find a common schema for provided schemas',
137+
re.escape(
138+
'kd.comparison.equal: arguments `x` and `y` must contain values'
139+
' castable to a common type, got SCHEMA(x=INT32) and OBJECT with an'
140+
' item of type ITEMID'
141+
),
138142
):
139143
expr_eval.eval(kde.comparison.equal(db.new(x=1), db.obj()))
140144

141145
with self.assertRaisesRegex(
142146
exceptions.KodaError,
143-
'cannot find a common schema for provided schemas',
147+
re.escape(
148+
'kd.comparison.equal: arguments `x` and `y` must contain values'
149+
' castable to a common type, got SCHEMA(x=INT32) and ITEMID'
150+
),
144151
):
145152
expr_eval.eval(
146153
kde.comparison.equal(

py/koladata/operators/tests/comparison_full_equal_test.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import re
16+
1517
from absl.testing import absltest
1618
from absl.testing import parameterized
1719
from arolla import arolla
@@ -109,32 +111,39 @@ def test_qtype_signatures(self):
109111
def test_raises_on_incompatible_schemas(self):
110112
with self.assertRaisesRegex(
111113
exceptions.KodaError,
112-
r"""cannot find a common schema for provided schemas
113-
114-
the common schema\(s\) INT32: INT32
115-
the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""",
114+
re.escape(
115+
'kd.comparison.equal: arguments `x` and `y` must contain values'
116+
' castable to a common type, got SCHEMA() and INT32'
117+
),
116118
):
117119
expr_eval.eval(kde.comparison.full_equal(ENTITY_1, ds(1)))
118120

119121
db = data_bag.DataBag.empty()
120122
with self.assertRaisesRegex(
121123
exceptions.KodaError,
122-
r"""cannot find a common schema for provided schemas
123-
124-
the common schema\(s\) [0-9a-f]{32}:0: SCHEMA\(x=INT32\)
125-
the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""",
124+
re.escape(
125+
'kd.comparison.equal: arguments `x` and `y` must contain values'
126+
' castable to a common type, got SCHEMA(x=INT32) and SCHEMA()'
127+
),
126128
):
127129
expr_eval.eval(kde.comparison.full_equal(db.new(x=1), db.new()))
128130

129131
with self.assertRaisesRegex(
130132
exceptions.KodaError,
131-
'cannot find a common schema for provided schemas',
133+
re.escape(
134+
'kd.comparison.equal: arguments `x` and `y` must contain values'
135+
' castable to a common type, got SCHEMA(x=INT32) and OBJECT with an'
136+
' item of type ITEMID'
137+
),
132138
):
133139
expr_eval.eval(kde.comparison.full_equal(db.new(x=1), db.obj()))
134140

135141
with self.assertRaisesRegex(
136142
exceptions.KodaError,
137-
'cannot find a common schema for provided schemas',
143+
re.escape(
144+
'kd.comparison.equal: arguments `x` and `y` must contain values'
145+
' castable to a common type, got SCHEMA(x=INT32) and ITEMID'
146+
),
138147
):
139148
expr_eval.eval(
140149
kde.comparison.full_equal(

py/koladata/operators/tests/masking_coalesce_test.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,8 @@ def test_incompatible_schema_error(self):
143143
y = data_bag.DataBag.empty().new()
144144
with self.assertRaisesRegex(
145145
exceptions.KodaError,
146-
r"""cannot find a common schema for provided schemas
147-
148-
the common schema\(s\) INT32: INT32
149-
the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""",
146+
'kd.masking.coalesce: arguments `x` and `y` must contain values'
147+
' castable to a common type, got INT32 and SCHEMA()',
150148
):
151149
expr_eval.eval(kde.masking.coalesce(x, y))
152150

py/koladata/operators/tests/masking_cond_test.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import re
16+
1517
from absl.testing import absltest
1618
from absl.testing import parameterized
1719
from arolla import arolla
@@ -184,7 +186,11 @@ def test_incompatible_schema_error(self):
184186
x = ds([1, None])
185187
y = data_bag.DataBag.empty().new()
186188
with self.assertRaisesRegex(
187-
exceptions.KodaError, 'cannot find a common schema for provided schemas'
189+
exceptions.KodaError,
190+
re.escape(
191+
'kd.masking.coalesce: arguments `x` and `y` must contain values'
192+
' castable to a common type, got INT32 and SCHEMA()'
193+
),
188194
):
189195
expr_eval.eval(kde.masking.cond(ds(arolla.present()), x, y))
190196

py/koladata/operators/tests/masking_disjoint_coalesce_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,10 @@ def test_incompatible_schema_error(self):
151151
y = data_bag.DataBag.empty().new() & ds(arolla.missing())
152152
with self.assertRaisesRegex(
153153
exceptions.KodaError,
154-
r"""cannot find a common schema for provided schemas
155-
156-
the common schema\(s\) INT32: INT32
157-
the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""",
154+
re.escape(
155+
'kd.masking.coalesce: arguments `x` and `y` must contain values'
156+
' castable to a common type, got INT32 and SCHEMA()'
157+
),
158158
):
159159
expr_eval.eval(kde.masking.disjoint_coalesce(x, y))
160160

0 commit comments

Comments
 (0)