[CoreML] Update Conv and Softmax ops (#24594)

carzh · web-flow · commit ebcf2eb36de6 · 2025-06-06T10:31:01.000-07:00
### Description
Moved the dimension limit because it seems to only apply to conv
operations (texture memory is typically used for conv operations in the
GPU because it has a slow write but fast read -- ChromaDB model had a
slice operation with an input &gt; 16384 -- operation worked fine after I
had moved the dim check)

Also added extra checks for Softmax on MLProgram that allows more
softmax nodes to be moved to CoreML
diff --git a/onnxruntime/core/providers/coreml/builders/helper.cc b/onnxruntime/core/providers/coreml/builders/helper.cc
@@ -74,17 +74,6 @@ bool IsInputSupported(const Node& node, const NodeArg& input, const OpBuilderInp
   }
 
   for (const auto dim : shape) {
-    // For some undocumented reason, Apple CoreML framework will fail loading the model if the model
-    // input has dimension > 16384
-    // See this issue, https://github.com/apple/coremltools/issues/1003
-    // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf has maximum texture widths which may be the
-    // root cause.
-    if (dim > 16384) {
-      LOGS(logger, WARNING) << "CoreML does not support input dim > 16384. Input:" << input_name
-                            << ", shape: " << Shape2String(shape);
-      return false;
-    }
-
     if (dim == 0 && !allow_empty_input) {
       LOGS(logger, WARNING) << "CoreML does not support shapes with dimension values of 0. Input:" << input_name
                             << ", shape: " << Shape2String(shape);
@@ -173,5 +162,22 @@ bool HasNeuralEngine() {
   return has_neural_engine;
 }
 
+bool CheckShapeForConvMemoryLimit(gsl::span<const int64_t> shape, const logging::Logger& logger) {
+  // For some undocumented reason, Apple CoreML framework will fail loading the model if the model
+  // input has dimension > 16384
+  // See this issue, https://github.com/apple/coremltools/issues/1003
+  // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf has maximum texture widths which may be the
+  // root cause.
+  // Only seems to apply to convolution networks -- limit comes from the size of the texture memory
+  for (auto dim : shape) {
+    if (dim > 16384) {
+      LOGS(logger, VERBOSE) << "Input shape: " << Shape2String(shape)
+                            << " exceeds CoreML convolution memory limit of 16384";
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace coreml
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/helper.h b/onnxruntime/core/providers/coreml/builders/helper.h
@@ -48,5 +48,10 @@ bool CheckIsConstantInitializer(const NodeArg& node_arg, const GraphViewer& grap
 // This is to detect if the current system has Apple Neural Engine
 bool HasNeuralEngine();
 
+// See this issue, https://github.com/apple/coremltools/issues/1003
+// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf has maximum texture widths which may be the
+// root cause.
+bool CheckShapeForConvMemoryLimit(gsl::span<const int64_t> shape, const logging::Logger& logger);
+
 }  // namespace coreml
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc
@@ -236,6 +236,32 @@ bool ConvOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara
   // use the weight for the shape as it should always be known
   const auto* weight_shape = input_defs[1]->Shape();
   int64_t num_dims = weight_shape ? weight_shape->dim_size() : -1;
+  const auto& output = *node.OutputDefs()[0];
+
+  std::vector<int64_t> weight_shape_vec;
+  std::vector<int64_t> x_shape_vec;
+  std::vector<int64_t> output_shape_vec;
+
+  if (!GetShape(*input_defs[1], weight_shape_vec, logger)) {
+    LOGS(logger, VERBOSE) << "Unable to get the shape of 'W' input, which is necessary to check for valid convolutions.";
+    return false;
+  }
+
+  if (!GetShape(*input_defs[0], x_shape_vec, logger)) {
+    LOGS(logger, VERBOSE) << "Unable to get the shape of 'X' input, which is necessary to check for valid convolutions.";
+    return false;
+  }
+
+  if (!GetShape(output, output_shape_vec, logger)) {
+    LOGS(logger, VERBOSE) << "Unable to get the shape of the output, which is necessary to check for valid convolutions.";
+    return false;
+  }
+
+  if (!CheckShapeForConvMemoryLimit(weight_shape_vec, logger) ||
+      !CheckShapeForConvMemoryLimit(x_shape_vec, logger) ||
+      !CheckShapeForConvMemoryLimit(output_shape_vec, logger)) {
+    return false;
+  }
 
   // ONNX spec requires N and C as first 2 dims
   if (num_dims != 3 && num_dims != 4) {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/convtranspose_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/convtranspose_op_builder.cc
@@ -137,6 +137,10 @@ bool ConvTransposeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilder
     return false;
   }
 
+  if (!CheckShapeForConvMemoryLimit(weight_shape, logger) || !CheckShapeForConvMemoryLimit(input_shape, logger)) {
+    return false;
+  }
+
   int64_t num_spatial_dims = narrow<int64_t>(weight_shape.size()) - 2;
 
   NodeAttrHelper helper(node);
diff --git a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
@@ -30,12 +30,19 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   const auto& output_name = node.OutputDefs()[0]->Name();
 
   std::vector<int64_t> data_shape;
-  ORT_RETURN_IF_NOT(GetStaticShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
 
   NodeAttrHelper helper(node);
   int32_t axis_default_value = (node.SinceVersion() < 13) ? 1 : -1;
   const auto axis = helper.Get("axis", axis_default_value);
-  auto axis_nonnegative = HandleNegativeAxis(axis, data_shape.size());
+  int64_t axis_nonnegative = axis;
+
+  if (node.SinceVersion() < 13) {
+    ORT_RETURN_IF_NOT(GetStaticShape(*node.InputDefs()[0], data_shape, logger), "Failed to get input shape.");
+    axis_nonnegative = HandleNegativeAxis(axis, data_shape.size());
+  } else {
+    ORT_RETURN_IF_NOT(GetShape(*node.InputDefs()[0], data_shape, logger),
+                      "Softmax input must have shape information.");
+  }
 
   // CoreML's softmax match onnx's softmax behavior since opset 13.
   // For opset < 13, we need to reshape to 2D and set axis to -1 to simulate onnx softmax behavior.
@@ -125,16 +132,18 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   return Status::OK();
 }
 
-bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
+bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                          const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
   std::vector<int64_t> input_shape;
-  if (!GetStaticShape(*input_defs[0], input_shape, logger))
+
+  if (!GetShape(*input_defs[0], input_shape, logger)) {
+    LOGS(logger, VERBOSE) << "Softmax input [" << input_defs[0]->Name() << "] must have shape information.";
     return false;
+  }
 
-  const TensorShape shape(input_shape);
-  if (shape.Size() == 0) {
-    LOGS(logger, VERBOSE) << "Empty input data is not supported.";
+  if (!IsStaticShape(input_shape) && node.SinceVersion() < 13) {
+    LOGS(logger, VERBOSE) << "Softmax input must have static shape for ONNX opset < 13";
     return false;
   }
 
diff --git a/onnxruntime/test/providers/cpu/tensor/tensor_op_test.cc b/onnxruntime/test/providers/cpu/tensor/tensor_op_test.cc
@@ -91,6 +91,36 @@ TEST(TensorOpTest, Reshape_WithOutAllowZero) {
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }
 
+TEST(TensorOpTest, Reshape_WithOutAllowZeroToDiffRank) {
+  OpTester test("Reshape", 14);
+
+  test.AddInput<float>("data", {2, 3, 12}, std::vector<float>(72, 1.0f));
+  test.AddInput<int64_t>("shape", {4}, {2, 3, 3, 4}, true);
+  test.AddAttribute<int64_t>("allowzero", 0);
+  test.AddOutput<float>("reshaped", {2, 3, 3, 4}, std::vector<float>(72, 1.0f));
+  test.Run();
+}
+
+TEST(TensorOpTest, Reshape_WithOutAllowZeroToDiffRankOneZero) {
+  OpTester test("Reshape", 14);
+
+  test.AddInput<float>("data", {2, 3, 12}, std::vector<float>(72, 1.0f));
+  test.AddInput<int64_t>("shape", {4}, {0, 3, 3, 4}, true);
+  test.AddAttribute<int64_t>("allowzero", 0);
+  test.AddOutput<float>("reshaped", {2, 3, 3, 4}, std::vector<float>(72, 1.0f));
+  test.Run();
+}
+
+TEST(TensorOpTest, Reshape_WithOutAllowZeroToDiffRankTwoZeroes) {
+  OpTester test("Reshape", 14);
+
+  test.AddInput<float>("data", {2, 3, 12}, std::vector<float>(72, 1.0f));
+  test.AddInput<int64_t>("shape", {4}, {0, 0, 3, 4}, true);
+  test.AddAttribute<int64_t>("allowzero", 0);
+  test.AddOutput<float>("reshaped", {2, 3, 3, 4}, std::vector<float>(72, 1.0f));
+  test.Run();
+}
+
 TEST(TensorOpTest, Reshape_WithAllowZero) {
   // TODO: Unskip when fixed #41968513
   if (DefaultDmlExecutionProvider().get() != nullptr) {

Original file line number	Diff line number	Diff line change
`@@ -137,6 +137,10 @@ bool ConvTransposeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilder`
`137`	`137`	`return false;`
`138`	`138`	`}`
`139`	`139`
	`140`	`+ if (!CheckShapeForConvMemoryLimit(weight_shape, logger) \|\| !CheckShapeForConvMemoryLimit(input_shape, logger)) {`
	`141`	`+ return false;`
	`142`	`+ }`
	`143`	`+`
`140`	`144`	`int64_t num_spatial_dims = narrow<int64_t>(weight_shape.size()) - 2;`
`141`	`145`
`142`	`146`	`NodeAttrHelper helper(node);`