Fix WebGPU Conv auto_pad=SAME_UPPER padding calculation (#27249)

ingyukoh · web-flow · commit 8ffc33c6b7a8 · 2026-02-11T20:28:49.000Z
The WebGPU Conv and ConvTranspose operators were producing incorrect results when using auto_pad=SAME_UPPER with strides > 1. Root cause: The head padding values were being unnecessarily recalculated after InferPadsAndOutputShape() had already computed the correct values. The recalculation formula could produce incorrect results. Fix: Simply use pads[0] and pads[1] directly, which already contain the correct head padding values computed upstream. This matches the behavior of the TypeScript implementation. Fixes #26734
diff --git a/onnxruntime/core/providers/webgpu/nn/conv.cc b/onnxruntime/core/providers/webgpu/nn/conv.cc
@@ -119,10 +119,11 @@ Status Conv<is_channels_last, is_fused>::ComputeInternal(ComputeContext& context
   const auto output_height = output_shape_vector[is_channels_last ? 1 : 2];
   const auto output_width = output_shape_vector[is_channels_last ? 2 : 3];
 
-  uint32_t auto_pad_adjust = conv_attrs_.auto_pad == AutoPadType::SAME_LOWER ? 1 : 0;
-  auto pad0 = conv_attrs_.auto_pad == AutoPadType::NOTSET ? pads[0] : (pads[0] + pads[2] + auto_pad_adjust) / 2;
-  auto pad1 = conv_attrs_.auto_pad == AutoPadType::NOTSET ? pads[1] : (pads[1] + pads[3] + auto_pad_adjust) / 2;
-  std::vector<uint32_t> updated_pads{pad0, pad1};
+  // pads[0] and pads[1] already contain the correct head (beginning) padding values
+  // computed by InferPadsAndOutputShape() which handles auto_pad correctly.
+  // For SAME_UPPER: head gets less padding (pad_needed / 2)
+  // For SAME_LOWER: head gets more padding ((pad_needed + 1) / 2)
+  std::vector<uint32_t> updated_pads{pads[0], pads[1]};
 
   if (CanApplyIm2ColMatMulProgram(context,
                                   is_channels_last,
diff --git a/onnxruntime/core/providers/webgpu/nn/conv_transpose.cc b/onnxruntime/core/providers/webgpu/nn/conv_transpose.cc
@@ -92,12 +92,11 @@ Status ConvTranspose<is_channels_last>::ComputeInternal(ComputeContext& context)
     inputs.push_back(bias);
     input_output_shapes.push_back(bias->Shape());
   }
-  uint32_t auto_pad_adjust = conv_transpose_attrs_.auto_pad == AutoPadType::SAME_LOWER ? 1 : 0;
-  auto pad0 = conv_transpose_attrs_.auto_pad == AutoPadType::NOTSET ? pads[0] : (pads[0] + pads[2] + auto_pad_adjust) / 2;
-  auto pad1 = conv_transpose_attrs_.auto_pad == AutoPadType::NOTSET ? pads[1] : (pads[1] + pads[3] + auto_pad_adjust) / 2;
+  // pads[0] and pads[1] already contain the correct head (beginning) padding values
+  // computed by ComputePadsAndOutputShape() which handles auto_pad correctly.
   Tensor* output = context.Output(0, computed_output_shape);
   input_output_shapes.push_back(output_shape);
-  auto program = CreateConvTranspose2DProgram(inputs, {pad0, pad1}, strides, dilations, output, is_channels_last, input_output_shapes, static_cast<uint32_t>(conv_transpose_attrs_.group));
+  auto program = CreateConvTranspose2DProgram(inputs, {pads[0], pads[1]}, strides, dilations, output, is_channels_last, input_output_shapes, static_cast<uint32_t>(conv_transpose_attrs_.group));
   return context.RunProgram(program);
 }
 
diff --git a/onnxruntime/test/providers/cpu/nn/conv_op_test.cc b/onnxruntime/test/providers/cpu/nn/conv_op_test.cc
@@ -502,6 +502,49 @@ TEST(ConvTest, Conv2D_AutoPad1) {
   TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true);
 }
 
+// Regression test for issue #26734: SAME_UPPER with stride > 1
+// Tests asymmetric padding calculation that was incorrect in WebGPU EP
+TEST(ConvTest, Conv2D_AutoPad_SAME_UPPER_Stride2) {
+  ConvOpAndTestAttributes attrs = {
+      "SAME_UPPER",           // auto_pad
+      vector<int64_t>{1, 1},  // dilations
+      1,                      // group
+      vector<int64_t>{3, 3},  // kernel_shape
+      {},                     // pads
+      vector<int64_t>{2, 2},  // strides > 1 triggers asymmetric padding
+      {}                      // excluded EPs
+  };
+
+  // 1x1x4x4 input
+  vector<float> X = {1.0f, 2.0f, 3.0f, 4.0f,
+                     5.0f, 6.0f, 7.0f, 8.0f,
+                     9.0f, 10.0f, 11.0f, 12.0f,
+                     13.0f, 14.0f, 15.0f, 16.0f};
+  vector<int64_t> X_shape = {1, 1, 4, 4};
+
+  // 3x3 kernel of all 1s for easy verification
+  vector<float> W = {1.0f, 1.0f, 1.0f,
+                     1.0f, 1.0f, 1.0f,
+                     1.0f, 1.0f, 1.0f};
+  vector<int64_t> W_shape = {1, 1, 3, 3};
+
+  // Output: 2x2 (ceil(4/2) = 2)
+  // SAME_UPPER with total_pad=1: pad_head=0, pad_tail=1
+  vector<int64_t> Y_shape = {1, 1, 2, 2};
+
+  // Expected values:
+  // (0,0): 1+2+3+5+6+7+9+10+11 = 54
+  // (0,1): 3+4+0+7+8+0+11+12+0 = 45
+  // (1,0): 9+10+11+13+14+15+0+0+0 = 72
+  // (1,1): 11+12+0+15+16+0+0+0+0 = 54
+  auto expected_vals = {54.0f, 45.0f, 72.0f, 54.0f};
+
+  TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape);
+
+  // NNAPI/CoreML EP requires weight to be an initializer
+  TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true);
+}
+
 TEST(ConvTest, Conv2D_AutoPad2) {
   ConvOpAndTestAttributes attrs = {
       "SAME_LOWER",           // auto_pad