Skip to content
Open
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/cpu/ml/label_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ std::vector<T> GetAttribute(const OpKernelInfo& info, const std::string& name, c
} else {
ORT_ENFORCE(result.IsOK(), "LabelEncoder is missing attribute ", tensor_name, " or ", name);
}
ORT_ENFORCE(!utils::HasExternalData(attr_tensor_proto),
"Tensor attribute ", tensor_name, " with external data is not supported.");
SafeInt<int64_t> element_count(1);
for (auto dim : attr_tensor_proto.dims()) {
element_count *= dim;
Expand All @@ -135,6 +137,8 @@ T GetDefault(const OpKernelInfo& info, const std::string& attr_name, const T& ba
ONNX_NAMESPACE::TensorProto attr_tensor_proto;
auto result = info.GetAttr("default_tensor", &attr_tensor_proto);
if (result.IsOK() && utils::HasDataType(attr_tensor_proto)) {
ORT_ENFORCE(!utils::HasExternalData(attr_tensor_proto),
"Tensor attribute default_tensor with external data is not supported.");
T default_value;
result = utils::UnpackTensor<T>(attr_tensor_proto, std::filesystem::path(), &default_value, 1);
ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack default tensor ", attr_name);
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ Status GetAnyVectorAttrsOrDefault(const OpKernelInfo& info, const std::string& n
ONNX_NAMESPACE::TensorProto proto;
auto result = info.GetAttr(name, &proto);

ORT_RETURN_IF(utils::HasExternalData(proto),
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
"Tensor attribute ", name, " with external data is not supported.");

SafeInt<int64_t> n_elements(1);
for (auto dim : proto.dims()) {
n_elements *= dim;
Expand Down
162 changes: 162 additions & 0 deletions onnxruntime/test/providers/cpu/ml/label_encoder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -756,5 +756,167 @@ TEST(LabelEncoder, EmptyInputOpset4) {
test.Run();
}

// External data in tensor attributes is not supported. The kernel must reject such attributes
// during construction. These tests verify the rejection.
// In no-exceptions builds, ORT_ENFORCE calls abort() so these tests cannot run.
#if !defined(ORT_NO_EXCEPTIONS)
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated

TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) {
OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

// Create keys_tensor with external data location
ONNX_NAMESPACE::TensorProto keys_proto;
keys_proto.set_name("keys_tensor");
keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
keys_proto.add_dims(2);
keys_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* entry = keys_proto.add_external_data();
entry->set_key("location");
entry->set_value("some_file.bin");
test.AddAttribute("keys_tensor", keys_proto);

// Normal values_tensor
ONNX_NAMESPACE::TensorProto values_proto;
values_proto.set_name("values_tensor");
values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
values_proto.add_dims(2);
values_proto.add_int64_data(10);
values_proto.add_int64_data(20);
test.AddAttribute("values_tensor", values_proto);

ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(0);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", {1, 2}, {1, 2});
test.AddOutput<int64_t>("Y", {1, 2}, {10, 20});

// CUDA EP uses a different code path that doesn't hit this issue, exclude it.
test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported",
{kCudaExecutionProvider});
}

TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) {
OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

test.AddAttribute("keys_int64s", std::vector<int64_t>{1, 2});
test.AddAttribute("values_int64s", std::vector<int64_t>{10, 20});

// default_tensor with external data location
ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* entry = default_proto.add_external_data();
entry->set_key("location");
entry->set_value("some_file.bin");
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", {1, 2}, {1, 3});
test.AddOutput<int64_t>("Y", {1, 2}, {10, 0});

// CUDA EP uses a different code path that doesn't hit this issue, exclude it.
test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported",
{kCudaExecutionProvider});
}

TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) {
OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

// Normal keys_tensor
ONNX_NAMESPACE::TensorProto keys_proto;
keys_proto.set_name("keys_tensor");
keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
keys_proto.add_dims(2);
keys_proto.add_int64_data(1);
keys_proto.add_int64_data(2);
test.AddAttribute("keys_tensor", keys_proto);

// values_tensor with external data location
ONNX_NAMESPACE::TensorProto values_proto;
values_proto.set_name("values_tensor");
values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
values_proto.add_dims(2);
values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* entry = values_proto.add_external_data();
entry->set_key("location");
entry->set_value("some_file.bin");
test.AddAttribute("values_tensor", values_proto);

ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(0);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", {1, 2}, {1, 2});
test.AddOutput<int64_t>("Y", {1, 2}, {10, 20});

// CUDA EP uses a different code path that doesn't hit this issue, exclude it.
test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported",
{kCudaExecutionProvider});
}

#endif // !defined(ORT_NO_EXCEPTIONS)
Comment thread
yuslepukhin marked this conversation as resolved.

// Duplicate keys: emplace() keeps the first occurrence. Verify this behavior.
TEST(LabelEncoder, DuplicateKeysFirstWinsOpset4) {
std::vector<std::int64_t> dims{1, 3};

std::vector<int64_t> input{1, 2, 3};
// key 1 maps to 10 (first), not 99 (second duplicate)
std::vector<int64_t> output{10, 20, 42};
std::vector<int64_t> key_data{1, 2, 1}; // duplicate key 1
std::vector<int64_t> value_data{10, 20, 99};

OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

test.AddAttribute("keys_int64s", key_data);
test.AddAttribute("values_int64s", value_data);

ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(42);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", dims, input);
test.AddOutput<int64_t>("Y", dims, output);

test.Run();
}

// Scalar (zero-rank) default_tensor — single element with no dims
TEST(LabelEncoder, ScalarDefaultTensorOpset4) {
std::vector<std::int64_t> dims{1, 3};

std::vector<int64_t> input{1, 2, 99};
std::vector<int64_t> output{10, 20, -7};

OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

test.AddAttribute("keys_int64s", std::vector<int64_t>{1, 2});
test.AddAttribute("values_int64s", std::vector<int64_t>{10, 20});

// Scalar default_tensor: no dims, single element
ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
// No add_dims() — zero-rank tensor (scalar)
default_proto.add_int64_data(-7);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", dims, input);
test.AddOutput<int64_t>("Y", dims, output);

test.Run();
}

} // namespace test
} // namespace onnxruntime
54 changes: 54 additions & 0 deletions onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -452,5 +452,59 @@ TEST(MLOpTest, TreeEnsembleIssue25400) {
test.Run();
}

// External data in tensor attributes is not supported. In no-exceptions builds, the enforcement
// calls abort() so these tests cannot run.
#if !defined(ORT_NO_EXCEPTIONS)
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated

TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) {
OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain);

// nodes_splits with external data location
ONNX_NAMESPACE::TensorProto splits_proto;
splits_proto.set_name("nodes_splits");
splits_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
splits_proto.add_dims(3);
splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* entry = splits_proto.add_external_data();
entry->set_key("location");
entry->set_value("some_file.bin");
test.AddAttribute("nodes_splits", splits_proto);

// Minimal valid structure for remaining attributes
ONNX_NAMESPACE::TensorProto leaf_weights_proto;
leaf_weights_proto.set_name("leaf_weights");
leaf_weights_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
leaf_weights_proto.add_dims(2);
leaf_weights_proto.add_float_data(1.0f);
leaf_weights_proto.add_float_data(2.0f);
test.AddAttribute("leaf_weights", leaf_weights_proto);

ONNX_NAMESPACE::TensorProto modes_proto;
modes_proto.set_name("nodes_modes");
modes_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8);
modes_proto.add_dims(1);
modes_proto.add_int32_data(0);
test.AddAttribute("nodes_modes", modes_proto);

test.AddAttribute("aggregate_function", static_cast<int64_t>(1));
test.AddAttribute("leaf_targetids", std::vector<int64_t>{0, 0});
test.AddAttribute("n_targets", static_cast<int64_t>(1));
test.AddAttribute("nodes_falseleafs", std::vector<int64_t>{1});
test.AddAttribute("nodes_falsenodeids", std::vector<int64_t>{1});
test.AddAttribute("nodes_featureids", std::vector<int64_t>{0});
test.AddAttribute("nodes_trueleafs", std::vector<int64_t>{1});
test.AddAttribute("nodes_truenodeids", std::vector<int64_t>{0});
test.AddAttribute("post_transform", static_cast<int64_t>(0));
test.AddAttribute("tree_roots", std::vector<int64_t>{0});

std::vector<float> X = {1.f};
test.AddInput<float>("X", {1, 1}, X);
test.AddOutput<float>("Y", {1, 1}, {0.f});

test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported");
}

#endif // !defined(ORT_NO_EXCEPTIONS)
Comment thread
yuslepukhin marked this conversation as resolved.

} // namespace test
} // namespace onnxruntime
47 changes: 47 additions & 0 deletions onnxruntime/test/providers/cpu/ml/treeregressor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1081,5 +1081,52 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) {
test.Run(OpTester::ExpectResult::kExpectFailure, "base_values should have 0 or 2 values.");
}

// External data in tensor attributes is not supported. The kernel must reject such attributes
// during construction. In no-exceptions builds, ORT_ENFORCE/ORT_THROW_IF_ERROR calls abort().
#if !defined(ORT_NO_EXCEPTIONS)

TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) {
OpTester test("TreeEnsembleRegressor", 3, onnxruntime::kMLDomain);

// Minimal valid tree structure
std::vector<int64_t> lefts = {1, 0, 0};
std::vector<int64_t> rights = {2, 0, 0};
std::vector<int64_t> treeids = {0, 0, 0};
std::vector<int64_t> nodeids = {0, 1, 2};
std::vector<int64_t> featureids = {0, 0, 0};
std::vector<std::string> modes = {"BRANCH_LEQ", "LEAF", "LEAF"};

test.AddAttribute("nodes_truenodeids", lefts);
test.AddAttribute("nodes_falsenodeids", rights);
test.AddAttribute("nodes_treeids", treeids);
test.AddAttribute("nodes_nodeids", nodeids);
test.AddAttribute("nodes_featureids", featureids);
test.AddAttribute("nodes_modes", modes);
test.AddAttribute("target_treeids", std::vector<int64_t>{0, 0});
test.AddAttribute("target_nodeids", std::vector<int64_t>{1, 2});
test.AddAttribute("target_ids", std::vector<int64_t>{0, 0});
test.AddAttribute("target_weights", std::vector<float>{1.f, 2.f});
test.AddAttribute("n_targets", static_cast<int64_t>(1));

// Use nodes_values_as_tensor (without setting nodes_values) with external data location
ONNX_NAMESPACE::TensorProto values_proto;
values_proto.set_name("nodes_values_as_tensor");
values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
values_proto.add_dims(3);
values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* entry = values_proto.add_external_data();
entry->set_key("location");
entry->set_value("some_file.bin");
test.AddAttribute("nodes_values_as_tensor", values_proto);

std::vector<float> X = {1.f};
test.AddInput<float>("X", {1, 1}, X);
test.AddOutput<float>("Y", {1, 1}, {0.f});

test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported");
}

#endif // !defined(ORT_NO_EXCEPTIONS)
Comment thread
yuslepukhin marked this conversation as resolved.

} // namespace test
} // namespace onnxruntime
Loading