Update example BinaryOp class to support Sub instead of Add

adrianlizarraga · adrianlizarraga · commit 3968ec38e0d7 · 2026-01-06T12:06:16.000-08:00
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep.cc
@@ -68,10 +68,10 @@ OrtStatus* ORT_API_CALL ExampleKernelEp::GetCapabilityImpl(OrtEp* this_ptr, cons
 
       if (op_type == "Relu" || op_type == "Squeeze") {
         candidate_nodes.push_back(node);
-      } else if (op_type == "Mul" || op_type == "Add") {
+      } else if (op_type == "Mul" || op_type == "Sub") {
         std::vector<Ort::ConstValueInfo> inputs = node.GetInputs();
 
-        // Note: ONNX shape inference should ensure Mul/Add has two inputs.
+        // Note: ONNX shape inference should ensure Mul/Sub has two inputs.
         std::optional<std::vector<int64_t>> input_0_shape = GetTensorShape(inputs[0]);
         std::optional<std::vector<int64_t>> input_1_shape = GetTensorShape(inputs[1]);
 
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep_kernel_registration.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/ep_kernel_registration.cc
@@ -11,8 +11,8 @@ static const BuildKernelCreateInfoFn build_kernel_create_info_funcs[] = {
     // Mul version 14
     BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 14, Mul)>,
 
-    // Add version 14
-    BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 14, Add)>,
+    // Sub version 14
+    BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 14, Sub)>,
 
     // Relu version 14
     BuildKernelCreateInfo<class ONNX_OPERATOR_KERNEL_CLASS_NAME(kOnnxDomain, 14, Relu)>,
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/binary_op.cc b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/binary_op.cc
@@ -17,9 +17,9 @@ ONNX_OPERATOR_KERNEL_EX(
          .AddTypeConstraint("T", GetTensorType(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT))),
     BinaryOp)
 
-// Defines a kernel creation function for version 14 of Add.
+// Defines a kernel creation function for version 14 of Sub.
 ONNX_OPERATOR_KERNEL_EX(
-    Add,
+    Sub,
     kOnnxDomain,
     /*version*/ 14,  // Equivalent to start_version: 14, end_version: 14 (inclusive)
     (Ort::KernelDefBuilder()
@@ -35,7 +35,7 @@ BinaryOp::BinaryOp(Ort::ConstKernelInfo info, void* state, PrivateTag)
   Release = ReleaseImpl;
 
   // Optional functions that are only needed to pre-pack weights. This BinaryOp kernel pre-packs
-  // input[1] weights as an example (not typically done by an actual implementations of Mul/Add).
+  // input[1] weights as an example (not typically done by an actual implementations of Mul/Sub).
   PrePackWeight = PrePackWeightImpl;
   SetSharedPrePackedWeight = SetSharedPrePackedWeightImpl;
 }
@@ -47,11 +47,11 @@ OrtStatus* BinaryOp::Create(const OrtKernelInfo* info, void* state,
   Ort::ConstKernelInfo kernel_info(info);
 
   // Note: can do basic validation or preprocessing via the OrtKernelInfo APIs.
-  // Here, we check that this BinaryOp class is only instantiated for an onnx Mul or Add operator.
+  // Here, we check that this BinaryOp class is only instantiated for an onnx Mul or Sub operator.
   std::string op_domain = kernel_info.GetOperatorDomain();
   std::string op_type = kernel_info.GetOperatorType();
 
-  if ((!op_domain.empty() && op_domain != "ai.onnx") || (op_type != "Add" && op_type != "Mul")) {
+  if ((!op_domain.empty() && op_domain != "ai.onnx") || (op_type != "Sub" && op_type != "Mul")) {
     std::ostringstream oss;
     oss << "ExampleKernelEp's BinaryOp class does not support operator with domain '" << op_domain << "' and "
         << " type '" << op_type << "'.";
@@ -110,9 +110,9 @@ OrtStatus* ORT_API_CALL BinaryOp::ComputeImpl(OrtKernelImpl* this_ptr, OrtKernel
   float* output_data = output.GetTensorMutableData<float>();
 
   std::string op_type = binary_op_kernel->info_.GetOperatorType();
-  if (op_type == "Add") {
+  if (op_type == "Sub") {
     for (size_t i = 0; i < input0.size(); ++i) {
-      output_data[i] = input0[i] + input1[i];
+      output_data[i] = input0[i] - input1[i];
     }
   } else {
     assert(op_type == "Mul");  // Checked by BinaryOp::Create
diff --git a/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/binary_op.h b/onnxruntime/test/autoep/library/example_plugin_ep_kernel_registry/kernels/binary_op.h
@@ -9,7 +9,7 @@
 
 /// <summary>
 /// An OrtKernelImpl class for binary element-wise operations.
-/// Only Add and Mul are supported currently.
+/// Only Sub and Mul are supported currently.
 /// </summary>
 class BinaryOp : public OrtKernelImpl {
  private:
diff --git a/onnxruntime/test/autoep/test_execution.cc b/onnxruntime/test/autoep/test_execution.cc
@@ -78,8 +78,40 @@ void RunSqueezeMulReluModel(const Ort::SessionOptions& session_options) {
   EXPECT_THAT(output_span, ::testing::ElementsAre(4, 0, 24, 0, 0, 84));
 }
 
-void RunAddMulAddModel(const Ort::SessionOptions& session_options) {
-  // This model has Add -> Mul -> Add. The example plugin EP only supports Mul.
+void RunSubMulSubModel(const Ort::SessionOptions& session_options) {
+  // This model has Sub -> Mul -> Sub: (A - B) * B - A
+  // The example plugin EP supports all ops.
+  Ort::Session session(*ort_env, ORT_TSTR("testdata/sub_mul_sub.onnx"), session_options);
+
+  // Create inputs
+  Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+  std::vector<int64_t> shape = {3, 2};
+
+  std::vector<float> a_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> b_data{2, 3, 4, 5, 6, 7};
+
+  std::vector<Ort::Value> ort_inputs{};
+  ort_inputs.emplace_back(
+      Ort::Value::CreateTensor<float>(memory_info, a_data.data(), a_data.size(), shape.data(), shape.size()));
+  ort_inputs.emplace_back(
+      Ort::Value::CreateTensor<float>(memory_info, b_data.data(), b_data.size(), shape.data(), shape.size()));
+
+  std::array ort_input_names{"A", "B"};
+
+  // Run session and get outputs
+  std::array output_names{"C"};
+  std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, ort_input_names.data(), ort_inputs.data(),
+                                                    ort_inputs.size(), output_names.data(), output_names.size());
+
+  // Check expected output values
+  Ort::Value& ort_output = ort_outputs[0];
+  const float* output_data = ort_output.GetTensorData<float>();
+  gsl::span<const float> output_span(output_data, 6);
+  EXPECT_THAT(output_span, ::testing::ElementsAre(-3, -5, -7, -9, -11, -13));
+}
+
+void RunPartiallySupportedModelWithPluginEp(const Ort::SessionOptions& session_options) {
+  // This model has Add -> Mul -> Add. The example plugin EP supports Mul but not Add.
   Ort::Session session(*ort_env, ORT_TSTR("testdata/add_mul_add.onnx"), session_options);
 
   // Create inputs
@@ -150,7 +182,7 @@ TEST(OrtEpLibrary, PluginEp_AppendV2_PartiallySupportedModelInference) {
   std::unordered_map<std::string, std::string> ep_options;
   session_options.AppendExecutionProvider_V2(*ort_env, {plugin_ep_device}, ep_options);
 
-  RunAddMulAddModel(session_options);
+  RunPartiallySupportedModelWithPluginEp(session_options);
 }
 
 // Generate an EPContext model with a plugin EP.
@@ -298,26 +330,26 @@ TEST(OrtEpLibrary, KernelPluginEp_Inference) {
     ASSERT_NO_FATAL_FAILURE(RunSqueezeMulReluModel(session_options));
   }
 
-  // Run model with add, mul, add.
+  // Run model with sub, mul, sub.
   // No sharing of pre-packed weights.
   {
     Ort::SessionOptions session_options;
     std::unordered_map<std::string, std::string> ep_options;
 
     session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1");  // Fail if any node assigned to CPU EP
     session_options.AppendExecutionProvider_V2(*ort_env, {plugin_ep_device}, ep_options);
-    ASSERT_NO_FATAL_FAILURE(RunAddMulAddModel(session_options));
+    ASSERT_NO_FATAL_FAILURE(RunSubMulSubModel(session_options));
   }
 
-  // Run model with add, mul, add.
+  // Run model with sub, mul, sub.
   // Enable sharing of pre-packed weights.
   {
     std::unordered_map<std::string, std::string> ep_options = {{"enable_prepack_weight_sharing", "1"}};
     Ort::SessionOptions session_options;
 
     session_options.AddConfigEntry(kOrtSessionOptionsDisableCPUEPFallback, "1");  // Fail if any node assigned to CPU EP
     session_options.AppendExecutionProvider_V2(*ort_env, {plugin_ep_device}, ep_options);
-    ASSERT_NO_FATAL_FAILURE(RunAddMulAddModel(session_options));
+    ASSERT_NO_FATAL_FAILURE(RunSubMulSubModel(session_options));
   }
 }
 }  // namespace test
diff --git a/onnxruntime/test/testdata/sub_mul_sub.onnx b/onnxruntime/test/testdata/sub_mul_sub.onnx
@@ -0,0 +1,27 @@
+:�
+
+A
+B
+sub_outputsub_0"Sub
+'
+
+sub_output
+B
+mul_outputmul_0"Mul
+
+
+mul_output
+ACsub_1"Sub
+Main_graphZ
+A
+
+
+Z
+B
+
+
+b
+C
+
+
+B
diff --git a/onnxruntime/test/testdata/sub_mul_sub.py b/onnxruntime/test/testdata/sub_mul_sub.py
@@ -0,0 +1,37 @@
+from onnx import TensorProto, checker, helper, save
+
+# (A - B) * B - A
+graph_proto = helper.make_graph(
+    nodes=[
+        helper.make_node(
+            "Sub",
+            inputs=["A", "B"],
+            outputs=["sub_output"],
+            name="sub_0",
+        ),
+        helper.make_node(
+            "Mul",
+            inputs=["sub_output", "B"],
+            outputs=["mul_output"],
+            name="mul_0",
+        ),
+        helper.make_node(
+            "Sub",
+            inputs=["mul_output", "A"],
+            outputs=["C"],
+            name="sub_1",
+        ),
+    ],
+    name="Main_graph",
+    inputs=[
+        helper.make_tensor_value_info("A", TensorProto.FLOAT, [3, 2]),
+        helper.make_tensor_value_info("B", TensorProto.FLOAT, [3, 2]),
+    ],
+    outputs=[
+        helper.make_tensor_value_info("C", TensorProto.FLOAT, [3, 2]),
+    ],
+)
+
+model = helper.make_model(graph_proto)
+checker.check_model(model, True)
+save(model, "sub_mul_sub.onnx")

-Original file line number
+Diff line change
@@ @@ -0,0 +1,27 @@ @@
 +:�
++
 +A
 +B
 +sub_outputsub_0"Sub
 +'
++
 +sub_output
 +B
 +mul_outputmul_0"Mul
++
++
 +mul_output
 +ACsub_1"Sub
 +Main_graphZ
 +A
++
++
 +Z
 +B
++
++
 +b
 +C
++
++
 +B