From 10a383b132422d0e099ee9365aa90fbe9f56885c Mon Sep 17 00:00:00 2001 From: Alaska Tengli Date: Thu, 12 Feb 2026 10:38:07 -0500 Subject: [PATCH 1/4] Guard AveragePool ceil_mode clamp to fix perf regression #27190 --- .vscode/settings.json | 122 +++++++++++++----- onnxruntime/core/providers/cpu/nn/pool.cc | 6 +- .../core/providers/cpu/nn/pool_functors.h | 27 +++- 3 files changed, 111 insertions(+), 44 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 9074103775bc0..c16a895597321 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,36 +1,88 @@ { - // Always remove trailing whitespaces - "files.trimTrailingWhitespace": true, - "files.insertFinalNewline": true, - "files.trimFinalNewlines": true, - "editor.rulers": [ - 120 - ], - "[python]": { - "editor.tabSize": 4, - }, - "cpplint.lineLength": 120, - "cpplint.filters": [ - "-build/include_subdir", - "-runtime/references" - ], - "C_Cpp.autoAddFileAssociations": false, - - // Exclude build directories and non-essential folders from C++ parsing - "C_Cpp.files.exclude": { - "**/build/**": true, - "**/build_*/**": true, - "**/cmake/external/**": true, - "**/node_modules/**": true, - "**/.git/**": true - }, - - // Exclude from search but keep in explorer - "search.exclude": { - "**/build/**": true, - "**/build_*/**": true, - "**/cmake/external/**": true, - "**/node_modules/**": true, - "**/.git/**": true - } -} + "files.trimTrailingWhitespace": true, + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "editor.rulers": [ + 120 + ], + "[python]": { + "editor.tabSize": 4 + }, + "cpplint.lineLength": 120, + "cpplint.filters": [ + "-build/include_subdir", + "-runtime/references" + ], + "C_Cpp.autoAddFileAssociations": false, + "C_Cpp.files.exclude": { + "**/build/**": true, + "**/build_*/**": true, + "**/cmake/external/**": true, + "**/node_modules/**": true, + "**/.git/**": true + }, + "search.exclude": { + "**/build/**": true, + "**/build_*/**": true, + "**/cmake/external/**": true, + "**/node_modules/**": true, + "**/.git/**": true + }, + "C_Cpp_Runner.cCompilerPath": "clang", + "C_Cpp_Runner.cppCompilerPath": "clang++", + "C_Cpp_Runner.debuggerPath": "lldb", + "C_Cpp_Runner.cStandard": "", + "C_Cpp_Runner.cppStandard": "", + "C_Cpp_Runner.msvcBatchPath": "", + "C_Cpp_Runner.useMsvc": false, + "C_Cpp_Runner.warnings": [ + "-Wall", + "-Wextra", + "-Wpedantic", + "-Wshadow", + "-Wformat=2", + "-Wcast-align", + "-Wconversion", + "-Wsign-conversion", + "-Wnull-dereference" + ], + "C_Cpp_Runner.msvcWarnings": [ + "/W4", + "/permissive-", + "/w14242", + "/w14287", + "/w14296", + "/w14311", + "/w14826", + "/w44062", + "/w44242", + "/w14905", + "/w14906", + "/w14263", + "/w44265", + "/w14928" + ], + "C_Cpp_Runner.enableWarnings": true, + "C_Cpp_Runner.warningsAsError": false, + "C_Cpp_Runner.compilerArgs": [], + "C_Cpp_Runner.linkerArgs": [], + "C_Cpp_Runner.includePaths": [], + "C_Cpp_Runner.includeSearch": [ + "*", + "**/*" + ], + "C_Cpp_Runner.excludeSearch": [ + "**/build", + "**/build/**", + "**/.*", + "**/.*/**", + "**/.vscode", + "**/.vscode/**" + ], + "C_Cpp_Runner.useAddressSanitizer": false, + "C_Cpp_Runner.useUndefinedSanitizer": false, + "C_Cpp_Runner.useLeakSanitizer": false, + "C_Cpp_Runner.showCompilationTime": false, + "C_Cpp_Runner.useLinkTimeOptimization": false, + "C_Cpp_Runner.msvcSecureNoWarnings": false +} \ No newline at end of file diff --git a/onnxruntime/core/providers/cpu/nn/pool.cc b/onnxruntime/core/providers/cpu/nn/pool.cc index d6b9ed693432b..f38b63c0a9663 100644 --- a/onnxruntime/core/providers/cpu/nn/pool.cc +++ b/onnxruntime/core/providers/cpu/nn/pool.cc @@ -289,7 +289,7 @@ Status AveragePoolV19::Compute(OpKernelContext* context) const { RunLoop>(tp, onnxruntime::narrow(total_channels), {X_data, Y_data, x_step, y_step, dilation_h, pooled_height, stride_h(), - height, kernel_shape, pads, pool_attrs_.count_include_pad, p_}); + height, kernel_shape, pads, pool_attrs_.count_include_pad, p_, pool_attrs_.ceil_mode}); break; } @@ -301,7 +301,7 @@ Status AveragePoolV19::Compute(OpKernelContext* context) const { RunLoop>( tp, onnxruntime::narrow(total_channels), {X_data, Y_data, x_step, y_step, dilation_h, dilation_w, pooled_height, pooled_width, stride_h(), - stride_w(), height, width, kernel_shape, pads, pool_attrs_.count_include_pad, p_}); + stride_w(), height, width, kernel_shape, pads, pool_attrs_.count_include_pad, p_, pool_attrs_.ceil_mode}); break; } case 3: { @@ -314,7 +314,7 @@ Status AveragePoolV19::Compute(OpKernelContext* context) const { {X_data, Y_data, x_step, y_step, dilation_h, dilation_w, dilation_d, pooled_height, pooled_width, pooled_depth, stride_h(), stride_w(), stride_d(), height, - width, depth, kernel_shape, pads, pool_attrs_.count_include_pad, p_}); + width, depth, kernel_shape, pads, pool_attrs_.count_include_pad, p_, pool_attrs_.ceil_mode}); break; } default: diff --git a/onnxruntime/core/providers/cpu/nn/pool_functors.h b/onnxruntime/core/providers/cpu/nn/pool_functors.h index 476a9a0338969..fa3155eb70b1f 100644 --- a/onnxruntime/core/providers/cpu/nn/pool_functors.h +++ b/onnxruntime/core/providers/cpu/nn/pool_functors.h @@ -390,6 +390,7 @@ struct AveragePool1DTask final { gsl::span pads; bool count_include_pad; int64_t p; + int64_t ceil_mode; TensorOpCost Cost() { double loop_count = static_cast(pooled_height * kernel_shape[0]); return TensorOpCost{loop_count, loop_count, loop_count}; @@ -406,7 +407,9 @@ struct AveragePool1DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; - hend = std::min(hend, height + pads[1]); + if (ceil_mode) { + hend = std::min(hend, height + pads[1]); + } y_d[ph] = 0; int total_elements = 0; for (int64_t h = hstart; h < hend; h += dilation_h) { @@ -444,6 +447,7 @@ struct AveragePool2DTask final { gsl::span pads; bool count_include_pad; int64_t p; + int64_t ceil_mode; TensorOpCost Cost() { double loop_count = static_cast(pooled_height * pooled_width * kernel_shape[0] * kernel_shape[1]); @@ -462,11 +466,15 @@ struct AveragePool2DTask final { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; - hend = std::min(hend, height + pads[1]); + if (ceil_mode) { + hend = std::min(hend, height + pads[1]); + } for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; - wend = std::min(wend, width + pads[3]); + if (ceil_mode) { + wend = std::min(wend, width + pads[3]); + } const int64_t pool_index = ph * pooled_width + pw; y_d[pool_index] = 0; int total_elements = 0; @@ -515,6 +523,7 @@ struct AveragePool3DTask { gsl::span pads; bool count_include_pad; int64_t p; + int64_t ceil_mode; void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const { for (std::ptrdiff_t c = begin; c < end; ++c) { @@ -535,15 +544,21 @@ struct AveragePool3DTask { for (int64_t ph = 0; ph < pooled_height; ++ph) { int64_t hstart = ph * stride_h - pads[0]; int64_t hend = hstart + kernel_shape[0] * dilation_h; - hend = std::min(hend, height + pads[1]); + if (ceil_mode) { + hend = std::min(hend, height + pads[1]); + } for (int64_t pw = 0; pw < pooled_width; ++pw) { int64_t wstart = pw * stride_w - pads[1]; int64_t wend = wstart + kernel_shape[1] * dilation_w; - wend = std::min(wend, width + pads[3]); + if (ceil_mode) { + wend = std::min(wend, width + pads[3]); + } for (int64_t pd = 0; pd < pooled_depth; ++pd) { int64_t dstart = pd * stride_d - pads[2]; int64_t dend = dstart + kernel_shape[2] * dilation_d; - dend = std::min(dend, depth + pads[5]); + if (ceil_mode) { + dend = std::min(dend, depth + pads[5]); + } const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd; y_d[pool_index] = 0; int total_elements = 0; From 2e66c837e29e201d5f93e19fe1cf90968eb7d4f0 Mon Sep 17 00:00:00 2001 From: Alaska Tengli Date: Thu, 12 Feb 2026 14:21:13 -0500 Subject: [PATCH 2/4] Removed unrelated vscode settings change --- .vscode/settings.json | 58 +------------------------------------------ 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index c16a895597321..9ddd8f061d2f6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -28,61 +28,5 @@ "**/node_modules/**": true, "**/.git/**": true }, - "C_Cpp_Runner.cCompilerPath": "clang", - "C_Cpp_Runner.cppCompilerPath": "clang++", - "C_Cpp_Runner.debuggerPath": "lldb", - "C_Cpp_Runner.cStandard": "", - "C_Cpp_Runner.cppStandard": "", - "C_Cpp_Runner.msvcBatchPath": "", - "C_Cpp_Runner.useMsvc": false, - "C_Cpp_Runner.warnings": [ - "-Wall", - "-Wextra", - "-Wpedantic", - "-Wshadow", - "-Wformat=2", - "-Wcast-align", - "-Wconversion", - "-Wsign-conversion", - "-Wnull-dereference" - ], - "C_Cpp_Runner.msvcWarnings": [ - "/W4", - "/permissive-", - "/w14242", - "/w14287", - "/w14296", - "/w14311", - "/w14826", - "/w44062", - "/w44242", - "/w14905", - "/w14906", - "/w14263", - "/w44265", - "/w14928" - ], - "C_Cpp_Runner.enableWarnings": true, - "C_Cpp_Runner.warningsAsError": false, - "C_Cpp_Runner.compilerArgs": [], - "C_Cpp_Runner.linkerArgs": [], - "C_Cpp_Runner.includePaths": [], - "C_Cpp_Runner.includeSearch": [ - "*", - "**/*" - ], - "C_Cpp_Runner.excludeSearch": [ - "**/build", - "**/build/**", - "**/.*", - "**/.*/**", - "**/.vscode", - "**/.vscode/**" - ], - "C_Cpp_Runner.useAddressSanitizer": false, - "C_Cpp_Runner.useUndefinedSanitizer": false, - "C_Cpp_Runner.useLeakSanitizer": false, - "C_Cpp_Runner.showCompilationTime": false, - "C_Cpp_Runner.useLinkTimeOptimization": false, - "C_Cpp_Runner.msvcSecureNoWarnings": false + "C_Cpp_Runner.msvcBatchPath": "" } \ No newline at end of file From 07a9512c7424f03425093b54d980d9f933053570 Mon Sep 17 00:00:00 2001 From: Alaska Tengli Date: Thu, 12 Feb 2026 14:52:03 -0500 Subject: [PATCH 3/4] Removed unrelated vscode settings --- .vscode/settings.json | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9ddd8f061d2f6..0000000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "files.trimTrailingWhitespace": true, - "files.insertFinalNewline": true, - "files.trimFinalNewlines": true, - "editor.rulers": [ - 120 - ], - "[python]": { - "editor.tabSize": 4 - }, - "cpplint.lineLength": 120, - "cpplint.filters": [ - "-build/include_subdir", - "-runtime/references" - ], - "C_Cpp.autoAddFileAssociations": false, - "C_Cpp.files.exclude": { - "**/build/**": true, - "**/build_*/**": true, - "**/cmake/external/**": true, - "**/node_modules/**": true, - "**/.git/**": true - }, - "search.exclude": { - "**/build/**": true, - "**/build_*/**": true, - "**/cmake/external/**": true, - "**/node_modules/**": true, - "**/.git/**": true - }, - "C_Cpp_Runner.msvcBatchPath": "" -} \ No newline at end of file From 401f384e0fad03efaa528356bd90eb0a99423081 Mon Sep 17 00:00:00 2001 From: Alaska Tengli Date: Fri, 13 Feb 2026 22:30:51 -0500 Subject: [PATCH 4/4] Restore .vscode/settings.json --- .vscode/settings.json | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000..9074103775bc0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,36 @@ +{ + // Always remove trailing whitespaces + "files.trimTrailingWhitespace": true, + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "editor.rulers": [ + 120 + ], + "[python]": { + "editor.tabSize": 4, + }, + "cpplint.lineLength": 120, + "cpplint.filters": [ + "-build/include_subdir", + "-runtime/references" + ], + "C_Cpp.autoAddFileAssociations": false, + + // Exclude build directories and non-essential folders from C++ parsing + "C_Cpp.files.exclude": { + "**/build/**": true, + "**/build_*/**": true, + "**/cmake/external/**": true, + "**/node_modules/**": true, + "**/.git/**": true + }, + + // Exclude from search but keep in explorer + "search.exclude": { + "**/build/**": true, + "**/build_*/**": true, + "**/cmake/external/**": true, + "**/node_modules/**": true, + "**/.git/**": true + } +}