Add Shape and Reshape kernels to example kernel EP

adrianlizarraga · adrianlizarraga · commit 96fe212e4b4a · 2026-01-05T05:04:37.000-08:00
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -2108,6 +2108,10 @@ if (onnxruntime_BUILD_SHARED_LIB AND
           "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/ep_kernel_registration.h"
           "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/ep_kernel_registration.cc"
           "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/utils.h"
+          "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/shape.h"
+          "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/shape.cc"
+          "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/reshape.h"
+          "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/reshape.cc"
           "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/squeeze.h"
           "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/squeeze.cc"
           "${TEST_SRC_DIR}/autoep/library/example_plugin_ep_kernel_registry/kernels/relu.h"
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep_kernel_registration.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep_kernel_registration.cc
@@ -19,6 +19,18 @@ static const BuildKernelCreateInfoFn build_kernel_create_info_funcs[] = {
     BuildKernelCreateInfo<class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kOnnxDomain, 21, 22, Squeeze)>,
     BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 23, Squeeze)>,
     BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 24, Squeeze)>,
+
+    // Support Shape 21, 23, and 24.
+    // Note: end versions are inclusive.
+    BuildKernelCreateInfo<class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kOnnxDomain, 21, 22, Shape)>,
+    BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 23, Shape)>,
+    BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 24, Shape)>,
+
+    // Support Reshape 21, 23, and 24.
+    // Note: end versions are inclusive.
+    BuildKernelCreateInfo<class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kOnnxDomain, 21, 22, Reshape)>,
+    BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 23, Reshape)>,
+    BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 24, Reshape)>,
 };
 
 size_t GetNumKernels() {
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/reshape.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/reshape.cc
@@ -0,0 +1,149 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "reshape.h"
+
+#include <gsl/span>
+#include <vector>
+#include "utils.h"
+
+// ONNX Reshape version 21
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Reshape,
+    kOnnxDomain,
+    /*start_version*/ 21, /*end_version (inclusive)*/ 22,
+    (Ort::KernelDefBuilder()
+         .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))
+         .AddTypeConstraint("shape", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64))
+         .AddInputOutputAlias(0, 0)
+         .SetInputMemType(1, OrtMemTypeCPU)),
+    Reshape)
+
+// ONNX Reshape version 23
+ONNX_OPERATOR_KERNEL_EX(
+    Reshape,
+    kOnnxDomain,
+    /*version*/ 23,  // Equivalent to start_version: 23, end_version: 23
+    (Ort::KernelDefBuilder()
+         .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))
+         .AddTypeConstraint("shape", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64))
+         .AddInputOutputAlias(0, 0)
+         .SetInputMemType(1, OrtMemTypeCPU)),
+    Reshape)
+
+// ONNX Reshape version 24
+ONNX_OPERATOR_KERNEL_EX(
+    Reshape,
+    kOnnxDomain,
+    /*version*/ 24,  // Equivalent start_version: 24, end_version: 24
+    (Ort::KernelDefBuilder()
+         .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))
+         .AddTypeConstraint("shape", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64))
+         .AddInputOutputAlias(0, 0)
+         .SetInputMemType(1, OrtMemTypeCPU)),
+    Reshape)
+
+Reshape::Reshape(const OrtKernelInfo* info, void* state, bool allow_zero, PrivateTag)
+    : OrtKernelImpl{},  // Initialize all OrtKernelImpl functions to NULL
+      info_{info},
+      data_transfer_impl_{reinterpret_cast<OrtDataTransferImpl*>(state)},
+      allow_zero_{allow_zero} {
+  ort_version_supported = ORT_API_VERSION;
+  Compute = ComputeImpl;
+  Release = ReleaseImpl;
+}
+
+/*static*/
+OrtStatus* Reshape::Create(const OrtKernelInfo* info, void* state, /*out*/ std::unique_ptr<Reshape>& kernel) noexcept {
+  EXCEPTION_TO_RETURNED_STATUS_BEGIN
+  Ort::ConstKernelInfo kernel_info(info);
+  bool allow_zero = kernel_info.GetAttribute<int64_t>("allowzero") == 1;
+
+  kernel = std::make_unique<Reshape>(info, state, allow_zero, PrivateTag{});
+  return nullptr;
+  EXCEPTION_TO_RETURNED_STATUS_END
+}
+
+// Computes the requested shape for the reshape operation.
+// Implementation is based on ReshapeHelper in onnxruntime/core/providers/cpu/tensor/reshape_helper.h
+static OrtStatus* GetRequestedShape(gsl::span<const int64_t> input_shape, bool allow_zero,
+                                    /*out*/ std::vector<int64_t>& requested_shape) {
+  EXCEPTION_TO_RETURNED_STATUS_BEGIN
+  const OrtApi& ort_api = Ort::GetApi();
+
+  int64_t num_input_elems = 1;
+  for (auto dim_val : input_shape) {
+    num_input_elems *= dim_val;
+  }
+  RETURN_IF(num_input_elems == -1, ort_api, "Input tensor must not have dynamic (-1) dimensions.");
+
+  size_t num_dims = requested_shape.size();
+  int64_t unknown_dim = -1;
+  int64_t size = 1;
+
+  for (size_t i = 0; i < num_dims; i++) {
+    RETURN_IF(requested_shape[i] < -1, ort_api, "A dimension cannot be less than -1");
+
+    if (requested_shape[i] == -1) {
+      RETURN_IF(unknown_dim != -1, ort_api, "At most one dimension can be -1");
+      unknown_dim = static_cast<int64_t>(i);
+    } else {
+      if (!allow_zero && requested_shape[i] == 0) {
+        RETURN_IF(i >= input_shape.size(), ort_api,
+                  "The dimension with value zero exceeds the dimension size of the input");
+        requested_shape[i] = input_shape[i];
+      }
+
+      size *= requested_shape[i];
+    }
+  }
+
+  if (unknown_dim != -1) {
+    // Calculate unknown dimension.
+    RETURN_IF(size == 0 || (num_input_elems % size) != 0, ort_api,
+              "The input cannot be reshaped to the requested shape");
+    requested_shape[unknown_dim] = num_input_elems / size;
+  } else {
+    // Check if the output shape is valid.
+    RETURN_IF(num_input_elems != size, ort_api, "The input cannot be reshaped to the requested shape");
+  }
+
+  return nullptr;
+  EXCEPTION_TO_RETURNED_STATUS_END
+}
+
+/*static*/
+OrtStatus* ORT_API_CALL Reshape::ComputeImpl(OrtKernelImpl* this_ptr, OrtKernelContext* kernel_ctx) noexcept {
+  EXCEPTION_TO_RETURNED_STATUS_BEGIN
+  Reshape* reshape_kernel = static_cast<Reshape*>(this_ptr);
+  static_cast<void>(reshape_kernel->info_);  // NOTE: Unused in this example.
+
+  Ort::KernelContext kernel_context(kernel_ctx);
+
+  // Input[0] has the data to reshape.
+  Ort::ConstValue input = kernel_context.GetInput(0);
+  auto type_shape_info = input.GetTensorTypeAndShapeInfo();
+  std::vector<int64_t> input_shape = type_shape_info.GetShape();
+
+  // Input[1] has the requested shape for the reshape operation.
+  Ort::ConstValue shape_input = kernel_context.GetInput(1);
+  gsl::span<const int64_t> shape_input_data;
+  std::vector<int64_t> final_shape;
+
+  RETURN_IF_ERROR(GetValueDataAndShape(shape_input, shape_input_data, final_shape));
+  RETURN_IF(final_shape.size() != 1, Ort::GetApi(), "A shape tensor must have one dimension");
+  RETURN_IF_ERROR(GetRequestedShape(input_shape, reshape_kernel->allow_zero_, final_shape));
+
+  Ort::UnownedValue output = kernel_context.GetOutput(0, final_shape);
+
+  // This kernel aliases the input and output, so a copy is not really necessary.
+  // CopyTensor() will not do a copy if the source and destination buffers are the same.
+  RETURN_IF_ERROR(CopyTensor(*reshape_kernel->data_transfer_impl_, input, output));
+  return nullptr;
+  EXCEPTION_TO_RETURNED_STATUS_END
+}
+
+/*static*/
+void ORT_API_CALL Reshape::ReleaseImpl(OrtKernelImpl* this_ptr) noexcept {
+  delete static_cast<Reshape*>(this_ptr);
+}
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/reshape.h b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/reshape.h
@@ -0,0 +1,24 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "../../plugin_ep_utils.h"
+
+class Reshape : public OrtKernelImpl {
+ private:
+  struct PrivateTag {};
+
+ public:
+  static OrtStatus* Create(const OrtKernelInfo* info, void* state, /*out*/ std::unique_ptr<Reshape>& kernel) noexcept;
+  Reshape(const OrtKernelInfo* info, void* state, bool allow_zero, PrivateTag);
+
+  // Static functions assigned to the OrtKernelImpl fields:
+  static OrtStatus* ORT_API_CALL ComputeImpl(OrtKernelImpl* this_ptr, OrtKernelContext* kernel_ctx) noexcept;
+  static void ORT_API_CALL ReleaseImpl(OrtKernelImpl* this_ptr) noexcept;
+
+ private:
+  const OrtKernelInfo* info_;
+  OrtDataTransferImpl* data_transfer_impl_;  // Custom state passed from OrtEp
+  bool allow_zero_;
+};
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/shape.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/shape.cc
@@ -0,0 +1,97 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "shape.h"
+
+#include <vector>
+#include "utils.h"
+
+// ONNX Shape version 21
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Shape,
+    kOnnxDomain,
+    /*start_version*/ 21, /*end_version (inclusive)*/ 22,
+    (Ort::KernelDefBuilder()
+         .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))
+         .AddTypeConstraint("T1", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64))
+         .SetOutputMemType(0, OrtMemTypeCPU)),
+    Shape)
+
+// ONNX Shape version 23
+ONNX_OPERATOR_KERNEL_EX(
+    Shape,
+    kOnnxDomain,
+    /*version*/ 23,  // Equivalent to start_version: 23, end_version: 23
+    (Ort::KernelDefBuilder()
+         .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))
+         .AddTypeConstraint("T1", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64))
+         .SetOutputMemType(0, OrtMemTypeCPU)),
+    Shape)
+
+// ONNX Shape version 24
+ONNX_OPERATOR_KERNEL_EX(
+    Shape,
+    kOnnxDomain,
+    /*version*/ 24,  // Equivalent start_version: 24, end_version: 24
+    (Ort::KernelDefBuilder()
+         .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))
+         .AddTypeConstraint("T1", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64))
+         .SetOutputMemType(0, OrtMemTypeCPU)),
+    Shape)
+
+Shape::Shape(const OrtKernelInfo* info, void* state, PrivateTag)
+    : OrtKernelImpl{},  // Initialize all OrtKernelImpl functions to NULL
+      info_{info},
+      data_transfer_impl_{reinterpret_cast<OrtDataTransferImpl*>(state)} {
+  ort_version_supported = ORT_API_VERSION;
+  Compute = ComputeImpl;
+  Release = ReleaseImpl;
+}
+
+/*static*/
+OrtStatus* Shape::Create(const OrtKernelInfo* info, void* state, /*out*/ std::unique_ptr<Shape>& kernel) noexcept {
+  EXCEPTION_TO_RETURNED_STATUS_BEGIN
+  Ort::ConstKernelInfo kernel_info(info);
+
+  int64_t start = kernel_info.GetAttribute<int64_t>("start");
+  int64_t end = 0;
+  Ort::Status status{Ort::GetApi().KernelInfoGetAttribute_int64(info, "end", &end)};
+
+  // This example kernel does not support shape slicing.
+  RETURN_IF(start != 0 || status.IsOK(), Ort::GetApi(),
+            "Example Shape kernel does not support non-default start/end attributes");
+
+  kernel = std::make_unique<Shape>(info, state, PrivateTag{});
+  return nullptr;
+  EXCEPTION_TO_RETURNED_STATUS_END
+}
+
+/*static*/
+OrtStatus* ORT_API_CALL Shape::ComputeImpl(OrtKernelImpl* this_ptr, OrtKernelContext* kernel_ctx) noexcept {
+  EXCEPTION_TO_RETURNED_STATUS_BEGIN
+  Shape* shape_kernel = static_cast<Shape*>(this_ptr);
+  static_cast<void>(shape_kernel->info_);                // NOTE: Unused in this example.
+  static_cast<void>(shape_kernel->data_transfer_impl_);  // NOTE: Unused in this example.
+
+  Ort::KernelContext kernel_context(kernel_ctx);
+
+  Ort::ConstValue input = kernel_context.GetInput(0);
+  auto type_shape_info = input.GetTensorTypeAndShapeInfo();
+  std::vector<int64_t> input_shape = type_shape_info.GetShape();
+
+  std::vector<int64_t> output_shape = {static_cast<int64_t>(input_shape.size())};
+  Ort::UnownedValue output = kernel_context.GetOutput(0, output_shape);
+  int64_t* output_data = output.GetTensorMutableData<int64_t>();
+
+  for (size_t i = 0; i < input_shape.size(); i++) {
+    output_data[i] = input_shape[i];
+  }
+
+  return nullptr;
+  EXCEPTION_TO_RETURNED_STATUS_END
+}
+
+/*static*/
+void ORT_API_CALL Shape::ReleaseImpl(OrtKernelImpl* this_ptr) noexcept {
+  delete static_cast<Shape*>(this_ptr);
+}
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/shape.h b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/shape.h
@@ -0,0 +1,23 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "../../plugin_ep_utils.h"
+
+class Shape : public OrtKernelImpl {
+ private:
+  struct PrivateTag {};
+
+ public:
+  static OrtStatus* Create(const OrtKernelInfo* info, void* state, /*out*/ std::unique_ptr<Shape>& kernel) noexcept;
+  Shape(const OrtKernelInfo* info, void* state, PrivateTag);
+
+  // Static functions assigned to the OrtKernelImpl fields:
+  static OrtStatus* ORT_API_CALL ComputeImpl(OrtKernelImpl* this_ptr, OrtKernelContext* kernel_ctx) noexcept;
+  static void ORT_API_CALL ReleaseImpl(OrtKernelImpl* this_ptr) noexcept;
+
+ private:
+  const OrtKernelInfo* info_;
+  OrtDataTransferImpl* data_transfer_impl_;  // Custom state passed from OrtEp
+};