Skip to content

Commit 10a383b

Browse files
committed
Guard AveragePool ceil_mode clamp to fix perf regression #27190
1 parent 1a71a5f commit 10a383b

File tree

3 files changed

+111
-44
lines changed

3 files changed

+111
-44
lines changed

.vscode/settings.json

Lines changed: 87 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,88 @@
11
{
2-
// Always remove trailing whitespaces
3-
"files.trimTrailingWhitespace": true,
4-
"files.insertFinalNewline": true,
5-
"files.trimFinalNewlines": true,
6-
"editor.rulers": [
7-
120
8-
],
9-
"[python]": {
10-
"editor.tabSize": 4,
11-
},
12-
"cpplint.lineLength": 120,
13-
"cpplint.filters": [
14-
"-build/include_subdir",
15-
"-runtime/references"
16-
],
17-
"C_Cpp.autoAddFileAssociations": false,
18-
19-
// Exclude build directories and non-essential folders from C++ parsing
20-
"C_Cpp.files.exclude": {
21-
"**/build/**": true,
22-
"**/build_*/**": true,
23-
"**/cmake/external/**": true,
24-
"**/node_modules/**": true,
25-
"**/.git/**": true
26-
},
27-
28-
// Exclude from search but keep in explorer
29-
"search.exclude": {
30-
"**/build/**": true,
31-
"**/build_*/**": true,
32-
"**/cmake/external/**": true,
33-
"**/node_modules/**": true,
34-
"**/.git/**": true
35-
}
36-
}
2+
"files.trimTrailingWhitespace": true,
3+
"files.insertFinalNewline": true,
4+
"files.trimFinalNewlines": true,
5+
"editor.rulers": [
6+
120
7+
],
8+
"[python]": {
9+
"editor.tabSize": 4
10+
},
11+
"cpplint.lineLength": 120,
12+
"cpplint.filters": [
13+
"-build/include_subdir",
14+
"-runtime/references"
15+
],
16+
"C_Cpp.autoAddFileAssociations": false,
17+
"C_Cpp.files.exclude": {
18+
"**/build/**": true,
19+
"**/build_*/**": true,
20+
"**/cmake/external/**": true,
21+
"**/node_modules/**": true,
22+
"**/.git/**": true
23+
},
24+
"search.exclude": {
25+
"**/build/**": true,
26+
"**/build_*/**": true,
27+
"**/cmake/external/**": true,
28+
"**/node_modules/**": true,
29+
"**/.git/**": true
30+
},
31+
"C_Cpp_Runner.cCompilerPath": "clang",
32+
"C_Cpp_Runner.cppCompilerPath": "clang++",
33+
"C_Cpp_Runner.debuggerPath": "lldb",
34+
"C_Cpp_Runner.cStandard": "",
35+
"C_Cpp_Runner.cppStandard": "",
36+
"C_Cpp_Runner.msvcBatchPath": "",
37+
"C_Cpp_Runner.useMsvc": false,
38+
"C_Cpp_Runner.warnings": [
39+
"-Wall",
40+
"-Wextra",
41+
"-Wpedantic",
42+
"-Wshadow",
43+
"-Wformat=2",
44+
"-Wcast-align",
45+
"-Wconversion",
46+
"-Wsign-conversion",
47+
"-Wnull-dereference"
48+
],
49+
"C_Cpp_Runner.msvcWarnings": [
50+
"/W4",
51+
"/permissive-",
52+
"/w14242",
53+
"/w14287",
54+
"/w14296",
55+
"/w14311",
56+
"/w14826",
57+
"/w44062",
58+
"/w44242",
59+
"/w14905",
60+
"/w14906",
61+
"/w14263",
62+
"/w44265",
63+
"/w14928"
64+
],
65+
"C_Cpp_Runner.enableWarnings": true,
66+
"C_Cpp_Runner.warningsAsError": false,
67+
"C_Cpp_Runner.compilerArgs": [],
68+
"C_Cpp_Runner.linkerArgs": [],
69+
"C_Cpp_Runner.includePaths": [],
70+
"C_Cpp_Runner.includeSearch": [
71+
"*",
72+
"**/*"
73+
],
74+
"C_Cpp_Runner.excludeSearch": [
75+
"**/build",
76+
"**/build/**",
77+
"**/.*",
78+
"**/.*/**",
79+
"**/.vscode",
80+
"**/.vscode/**"
81+
],
82+
"C_Cpp_Runner.useAddressSanitizer": false,
83+
"C_Cpp_Runner.useUndefinedSanitizer": false,
84+
"C_Cpp_Runner.useLeakSanitizer": false,
85+
"C_Cpp_Runner.showCompilationTime": false,
86+
"C_Cpp_Runner.useLinkTimeOptimization": false,
87+
"C_Cpp_Runner.msvcSecureNoWarnings": false
88+
}

onnxruntime/core/providers/cpu/nn/pool.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ Status AveragePoolV19<T>::Compute(OpKernelContext* context) const {
289289

290290
RunLoop<AveragePool1DTask<T>>(tp, onnxruntime::narrow<size_t>(total_channels),
291291
{X_data, Y_data, x_step, y_step, dilation_h, pooled_height, stride_h(),
292-
height, kernel_shape, pads, pool_attrs_.count_include_pad, p_});
292+
height, kernel_shape, pads, pool_attrs_.count_include_pad, p_, pool_attrs_.ceil_mode});
293293
break;
294294
}
295295

@@ -301,7 +301,7 @@ Status AveragePoolV19<T>::Compute(OpKernelContext* context) const {
301301
RunLoop<AveragePool2DTask<T>>(
302302
tp, onnxruntime::narrow<size_t>(total_channels),
303303
{X_data, Y_data, x_step, y_step, dilation_h, dilation_w, pooled_height, pooled_width, stride_h(),
304-
stride_w(), height, width, kernel_shape, pads, pool_attrs_.count_include_pad, p_});
304+
stride_w(), height, width, kernel_shape, pads, pool_attrs_.count_include_pad, p_, pool_attrs_.ceil_mode});
305305
break;
306306
}
307307
case 3: {
@@ -314,7 +314,7 @@ Status AveragePoolV19<T>::Compute(OpKernelContext* context) const {
314314
{X_data, Y_data, x_step, y_step,
315315
dilation_h, dilation_w, dilation_d, pooled_height, pooled_width,
316316
pooled_depth, stride_h(), stride_w(), stride_d(), height,
317-
width, depth, kernel_shape, pads, pool_attrs_.count_include_pad, p_});
317+
width, depth, kernel_shape, pads, pool_attrs_.count_include_pad, p_, pool_attrs_.ceil_mode});
318318
break;
319319
}
320320
default:

onnxruntime/core/providers/cpu/nn/pool_functors.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ struct AveragePool1DTask final {
390390
gsl::span<const int64_t> pads;
391391
bool count_include_pad;
392392
int64_t p;
393+
int64_t ceil_mode;
393394
TensorOpCost Cost() {
394395
double loop_count = static_cast<double>(pooled_height * kernel_shape[0]);
395396
return TensorOpCost{loop_count, loop_count, loop_count};
@@ -406,7 +407,9 @@ struct AveragePool1DTask final {
406407
for (int64_t ph = 0; ph < pooled_height; ++ph) {
407408
int64_t hstart = ph * stride_h - pads[0];
408409
int64_t hend = hstart + kernel_shape[0] * dilation_h;
409-
hend = std::min(hend, height + pads[1]);
410+
if (ceil_mode) {
411+
hend = std::min(hend, height + pads[1]);
412+
}
410413
y_d[ph] = 0;
411414
int total_elements = 0;
412415
for (int64_t h = hstart; h < hend; h += dilation_h) {
@@ -444,6 +447,7 @@ struct AveragePool2DTask final {
444447
gsl::span<const int64_t> pads;
445448
bool count_include_pad;
446449
int64_t p;
450+
int64_t ceil_mode;
447451

448452
TensorOpCost Cost() {
449453
double loop_count = static_cast<double>(pooled_height * pooled_width * kernel_shape[0] * kernel_shape[1]);
@@ -462,11 +466,15 @@ struct AveragePool2DTask final {
462466
for (int64_t ph = 0; ph < pooled_height; ++ph) {
463467
int64_t hstart = ph * stride_h - pads[0];
464468
int64_t hend = hstart + kernel_shape[0] * dilation_h;
465-
hend = std::min(hend, height + pads[1]);
469+
if (ceil_mode) {
470+
hend = std::min(hend, height + pads[1]);
471+
}
466472
for (int64_t pw = 0; pw < pooled_width; ++pw) {
467473
int64_t wstart = pw * stride_w - pads[1];
468474
int64_t wend = wstart + kernel_shape[1] * dilation_w;
469-
wend = std::min(wend, width + pads[3]);
475+
if (ceil_mode) {
476+
wend = std::min(wend, width + pads[3]);
477+
}
470478
const int64_t pool_index = ph * pooled_width + pw;
471479
y_d[pool_index] = 0;
472480
int total_elements = 0;
@@ -515,6 +523,7 @@ struct AveragePool3DTask {
515523
gsl::span<const int64_t> pads;
516524
bool count_include_pad;
517525
int64_t p;
526+
int64_t ceil_mode;
518527

519528
void operator()(std::ptrdiff_t begin, std::ptrdiff_t end) const {
520529
for (std::ptrdiff_t c = begin; c < end; ++c) {
@@ -535,15 +544,21 @@ struct AveragePool3DTask {
535544
for (int64_t ph = 0; ph < pooled_height; ++ph) {
536545
int64_t hstart = ph * stride_h - pads[0];
537546
int64_t hend = hstart + kernel_shape[0] * dilation_h;
538-
hend = std::min(hend, height + pads[1]);
547+
if (ceil_mode) {
548+
hend = std::min(hend, height + pads[1]);
549+
}
539550
for (int64_t pw = 0; pw < pooled_width; ++pw) {
540551
int64_t wstart = pw * stride_w - pads[1];
541552
int64_t wend = wstart + kernel_shape[1] * dilation_w;
542-
wend = std::min(wend, width + pads[3]);
553+
if (ceil_mode) {
554+
wend = std::min(wend, width + pads[3]);
555+
}
543556
for (int64_t pd = 0; pd < pooled_depth; ++pd) {
544557
int64_t dstart = pd * stride_d - pads[2];
545558
int64_t dend = dstart + kernel_shape[2] * dilation_d;
546-
dend = std::min(dend, depth + pads[5]);
559+
if (ceil_mode) {
560+
dend = std::min(dend, depth + pads[5]);
561+
}
547562
const int64_t pool_index = ph * pooled_width * pooled_depth + pw * pooled_depth + pd;
548563
y_d[pool_index] = 0;
549564
int total_elements = 0;

0 commit comments

Comments
 (0)