Update example kernel EP to get the OrtEp from OrtKernelInfo and add a unit test

adrianlizarraga · adrianlizarraga · commit 52453f2e932a · 2025-12-26T14:49:11.000-08:00
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep.cc
@@ -14,12 +14,13 @@
 #include "ep_factory.h"
 #include "../plugin_ep_utils.h"
 
-ExampleKernelEp::ExampleKernelEp(ExampleKernelEpFactory& factory, const OrtLogger& logger)
+ExampleKernelEp::ExampleKernelEp(ExampleKernelEpFactory& factory, const Config& config, const OrtLogger& logger)
     : OrtEp{},  // explicitly call the struct ctor to ensure all optional values are default initialized
       factory_{factory},
       ort_api_{factory.GetOrtApi()},
       ep_api_{factory.GetEpApi()},
       name_{factory.GetEpName()},
+      config_{config},
       logger_{logger} {
   ort_version_supported = ORT_API_VERSION;  // set to the ORT version we were compiled with.
 
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep.h b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep.h
@@ -14,11 +14,16 @@ class ExampleKernelEpFactory;
 /// </summary>
 class ExampleKernelEp : public OrtEp {
  public:
-  ExampleKernelEp(ExampleKernelEpFactory& factory, const OrtLogger& logger);
+  struct Config {
+    bool enable_prepack_weight_sharing = false;
+  };
+
+  ExampleKernelEp(ExampleKernelEpFactory& factory, const Config& config, const OrtLogger& logger);
   ~ExampleKernelEp();
 
   const OrtApi& GetOrtApi() const { return ort_api_; }
   const OrtEpApi& GetEpApi() const { return ep_api_; }
+  const Config& GetConfig() const { return config_; }
 
  private:
   static const char* ORT_API_CALL GetNameImpl(const OrtEp* this_ptr) noexcept;
@@ -34,5 +39,6 @@ class ExampleKernelEp : public OrtEp {
   const OrtApi& ort_api_;
   const OrtEpApi& ep_api_;
   std::string name_;
+  Config config_;
   const OrtLogger& logger_;
 };
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep_factory.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep_factory.cc
@@ -176,7 +176,7 @@ OrtStatus* ORT_API_CALL ExampleKernelEpFactory::CreateEpImpl(OrtEpFactory* this_
                                                              const OrtHardwareDevice* const* /*devices*/,
                                                              const OrtKeyValuePairs* const* /*ep_metadata*/,
                                                              size_t num_devices,
-                                                             const OrtSessionOptions* /*session_options*/,
+                                                             const OrtSessionOptions* session_options,
                                                              const OrtLogger* logger,
                                                              OrtEp** ep) noexcept {
   auto* factory = static_cast<ExampleKernelEpFactory*>(this_ptr);
@@ -187,7 +187,14 @@ OrtStatus* ORT_API_CALL ExampleKernelEpFactory::CreateEpImpl(OrtEpFactory* this_
                                           "ExampleKernelEpFactory only supports selection for one device.");
   }
 
-  auto actual_ep = std::make_unique<ExampleKernelEp>(*factory, *logger);
+  std::string enable_prepack_weight_sharing;
+  RETURN_IF_ERROR(GetSessionConfigEntryOrDefault(*session_options, "ep.examplekernelep.enable_prepack_weight_sharing",
+                                                 "0", enable_prepack_weight_sharing));
+
+  ExampleKernelEp::Config config = {};
+  config.enable_prepack_weight_sharing = enable_prepack_weight_sharing == "1";
+
+  auto actual_ep = std::make_unique<ExampleKernelEp>(*factory, config, *logger);
   *ep = actual_ep.release();
 
   return nullptr;
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/mul.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/mul.cc
@@ -5,6 +5,7 @@
 #include <sstream>
 #include "mul.h"
 #include "utils.h"
+#include "../ep.h"
 
 // Defines a kernel creation function for version 14 of Mul.
 ONNX_OPERATOR_KERNEL_EX(
@@ -34,7 +35,7 @@ OrtStatus* Mul::Create(const OrtKernelInfo* info, void* state,
                        /*out*/ std::unique_ptr<Mul>& result) noexcept {
   EXCEPTION_TO_RETURNED_STATUS_BEGIN
   // Note: can do basic validation or preprocessing via the OrtKernelInfo APIs.
-  result = std::make_unique<Mul>(info, state, PrivateTag{});
+  result = std::make_unique<Mul>(Ort::ConstKernelInfo(info), state, PrivateTag{});
   return nullptr;
   EXCEPTION_TO_RETURNED_STATUS_END
 }
@@ -48,7 +49,6 @@ void ORT_API_CALL Mul::ReleaseImpl(OrtKernelImpl* this_ptr) noexcept {
 OrtStatus* ORT_API_CALL Mul::ComputeImpl(OrtKernelImpl* this_ptr, OrtKernelContext* kernel_ctx) noexcept {
   EXCEPTION_TO_RETURNED_STATUS_BEGIN
   Mul* mul_kernel = static_cast<Mul*>(this_ptr);
-  static_cast<void>(mul_kernel->info_);  // NOTE: Unused in this example.
 
   Ort::KernelContext kernel_context(kernel_ctx);
 
@@ -128,9 +128,11 @@ OrtStatus* ORT_API_CALL Mul::PrePackWeightImpl(OrtKernelImpl* this_ptr, const Or
 
   RETURN_IF_ERROR(CopyTensor(*mul_kernel->data_transfer_impl_, original_weight, packed_weight.GetUnowned()));
 
-  const bool sharing_allowed = prepacked_weight_cache != nullptr;
+  const ExampleKernelEp* ep = static_cast<const ExampleKernelEp*>(mul_kernel->info_.GetEp());
+  const bool ep_sharing_enabled = ep->GetConfig().enable_prepack_weight_sharing;
+  const bool ort_sharing_allowed = prepacked_weight_cache != nullptr;
 
-  if (sharing_allowed) {
+  if (ort_sharing_allowed && ep_sharing_enabled) {
     std::array<void*, 1> buffer_data_ptrs = {weight_info.owned_data.get()};
     std::array<size_t, 1> buffer_data_sizes = {weight_info.num_bytes};
 
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/mul.h b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/mul.h
@@ -41,7 +41,7 @@ class Mul : public OrtKernelImpl {
                                                               size_t num_buffers, int input_index) noexcept;
 
  private:
-  const OrtKernelInfo* info_;
+  Ort::ConstKernelInfo info_;
   OrtDataTransferImpl* data_transfer_impl_;  // Custom state passed from OrtEp
   std::optional<PackedWeightInfo> packed_weight_1_info_ = std::nullopt;
 };
diff --git a/onnxruntime/test/autoep/test_execution.cc b/onnxruntime/test/autoep/test_execution.cc
@@ -245,42 +245,48 @@ TEST(OrtEpLibrary, KernelPluginEp_Inference) {
                                                          example_kernel_ep));
   Ort::ConstEpDevice plugin_ep_device(example_kernel_ep.get());
 
-  // Create session with example kernel-based plugin EP
-  Ort::SessionOptions session_options;
-  session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1");  // Fail if any node assigned to CPU EP.
+  auto run_model_with_ep_options = [&](const std::unordered_map<std::string, std::string>& ep_options) {
+    // Create session with example kernel-based plugin EP
+    Ort::SessionOptions session_options;
+    session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1");  // Fail if any node assigned to CPU EP.
+    session_options.AppendExecutionProvider_V2(*ort_env, {plugin_ep_device}, ep_options);
 
-  std::unordered_map<std::string, std::string> ep_options;
-  session_options.AppendExecutionProvider_V2(*ort_env, {plugin_ep_device}, ep_options);
+    // This model has Squeeze, Mul, and Relu nodes. The example plugin EP supports all nodes using registered kernels.
+    Ort::Session session(*ort_env, ORT_TSTR("testdata/squeeze_mul_relu.onnx"), session_options);
 
-  // This model has Squeeze, Mul, and Relu nodes. The example plugin EP supports all nodes using registered kernels.
-  Ort::Session session(*ort_env, ORT_TSTR("testdata/squeeze_mul_relu.onnx"), session_options);
+    // Create inputs
+    Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+    std::array<int64_t, 3> a_shape = {3, 1, 2};
+    std::array<int64_t, 2> b_shape = {3, 2};
 
-  // Create inputs
-  Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-  std::array<int64_t, 3> a_shape = {3, 1, 2};
-  std::array<int64_t, 2> b_shape = {3, 2};
+    std::array<float, 6> a_data = {1.f, -2.f, 3.f, 4.f, -5.f, 6.f};
+    std::array<float, 6> b_data = {2.f, 3.f, 4.f, -5.f, 6.f, 7.f};
 
-  std::array<float, 6> a_data = {1.f, -2.f, 3.f, 4.f, -5.f, 6.f};
-  std::array<float, 6> b_data = {2.f, 3.f, 4.f, -5.f, 6.f, 7.f};
+    std::vector<Ort::Value> ort_inputs{};
+    ort_inputs.emplace_back(
+        Ort::Value::CreateTensor<float>(memory_info, a_data.data(), a_data.size(), a_shape.data(), a_shape.size()));
+    ort_inputs.emplace_back(
+        Ort::Value::CreateTensor<float>(memory_info, b_data.data(), b_data.size(), b_shape.data(), b_shape.size()));
 
-  std::vector<Ort::Value> ort_inputs{};
-  ort_inputs.emplace_back(
-      Ort::Value::CreateTensor<float>(memory_info, a_data.data(), a_data.size(), a_shape.data(), a_shape.size()));
-  ort_inputs.emplace_back(
-      Ort::Value::CreateTensor<float>(memory_info, b_data.data(), b_data.size(), b_shape.data(), b_shape.size()));
+    std::array ort_input_names{"A", "B"};
 
-  std::array ort_input_names{"A", "B"};
+    // Run session and get outputs
+    std::array output_names{"C"};
+    std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, ort_input_names.data(), ort_inputs.data(),
+                                                      ort_inputs.size(), output_names.data(), output_names.size());
 
-  // Run session and get outputs
-  std::array output_names{"C"};
-  std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, ort_input_names.data(), ort_inputs.data(),
-                                                    ort_inputs.size(), output_names.data(), output_names.size());
+    // Check expected output values
+    Ort::Value& ort_output = ort_outputs[0];
+    const float* output_data = ort_output.GetTensorData<float>();
+    gsl::span<const float> output_span(output_data, 6);
+    EXPECT_THAT(output_span, ::testing::ElementsAre(4, 0, 24, 0, 0, 84));
+  };
 
-  // Check expected output values
-  Ort::Value& ort_output = ort_outputs[0];
-  const float* output_data = ort_output.GetTensorData<float>();
-  gsl::span<const float> output_span(output_data, 6);
-  EXPECT_THAT(output_span, ::testing::ElementsAre(4, 0, 24, 0, 0, 84));
+  run_model_with_ep_options({});
+
+  // Enable sharing of pre-packed weights.
+  // This also tests the ability for the kernel implementation to retrieve the OrtEp and get its configuration.
+  run_model_with_ep_options({{"enable_prepack_weight_sharing", "1"}});
 }
 }  // namespace test
 }  // namespace onnxruntime