Skip to content

Commit 4b0cee3

Browse files
authored
fix: Pad/AveragePool fusion (microsoft#23190)
### Description Fusing Pad & AveragePool requires AveragePool to use `count_include_pad=1`. If the AveragePool already set some padding and `count_include_pad=0`, fusion can't happen. This PR adds a condition to perform fusion depending on those attributes. If fusion occurs, `count_include_pad` is always set to `1`. ### Motivation and Context Fix microsoft#22177 (mislabelled as a performance issue but there's an actual bug in the implementation) Bug introduced in microsoft#21556
1 parent 4883ec5 commit 4b0cee3

File tree

6 files changed

+217
-2
lines changed

6 files changed

+217
-2
lines changed

onnxruntime/core/optimizer/pad_fusion.cc

+27-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ namespace onnxruntime {
1010

1111
bool VerifyNotCastChild(const Node& child_node) {
1212
if (!graph_utils::IsSupportedOptypeVersionAndDomain(child_node, "Conv", {1, 11}) &&
13-
!graph_utils::IsSupportedOptypeVersionAndDomain(child_node, "AveragePool", {1, 7, 10, 11, 19}) &&
13+
!graph_utils::IsSupportedOptypeVersionAndDomain(child_node, "AveragePool", {7, 10, 11, 19}) &&
1414
!graph_utils::IsSupportedOptypeVersionAndDomain(child_node, "MaxPool", {1, 8, 10, 11, 12})) {
1515
return false;
1616
}
@@ -31,11 +31,32 @@ bool VerifyNotCastChild(const Node& child_node) {
3131
return false;
3232
}
3333

34+
if (child_node.OpType() == "AveragePool") {
35+
// in case there's already padding and count_include_pad is 0, fusion can't be performed
36+
auto has_pad = false;
37+
if (child_node.GetAttributes().find("pads") != child_node.GetAttributes().end()) {
38+
auto const& pads_values = child_node.GetAttributes().at("pads").ints();
39+
if (!pads_values.empty()) {
40+
has_pad = std::any_of(pads_values.begin(), pads_values.end(), [](int64_t value) { return value != 0; });
41+
}
42+
}
43+
if (has_pad && child_node.GetAttributes().find("count_include_pad") != child_node.GetAttributes().end()) {
44+
if (child_node.GetAttributes().at("count_include_pad").i() == 0) {
45+
return false;
46+
}
47+
}
48+
}
49+
3450
return true;
3551
}
3652

3753
void UpdatePaddingAttribute(Node& child_node, const std::vector<int64_t>& pads_values, const uint32_t pads_size) {
38-
if (child_node.GetAttributes().find("pads") == child_node.GetAttributes().end()) {
54+
auto reset_pads = true;
55+
if (child_node.GetAttributes().find("pads") != child_node.GetAttributes().end()) {
56+
/* pads can be empty, overwrite pads attribute in this case */
57+
reset_pads = child_node.GetAttributes().at("pads").ints().empty();
58+
}
59+
if (reset_pads) {
3960
std::vector<int64_t> pads(pads_size - 4, 0);
4061
child_node.AddAttribute("pads", pads);
4162
}
@@ -49,6 +70,10 @@ void UpdatePaddingAttribute(Node& child_node, const std::vector<int64_t>& pads_v
4970
uint32_t mirrored_pad_index = pads_index + (pads_size / 2);
5071
child_pads->Set(mirrored_child_index, child_pads->Get(mirrored_child_index) + pads_values[mirrored_pad_index]);
5172
}
73+
74+
if (child_node.OpType() == "AveragePool") {
75+
child_node.AddAttribute("count_include_pad", static_cast<int64_t>(1));
76+
}
5277
}
5378
/*
5479
* Before:

onnxruntime/test/optimizer/graph_transform_test.cc

+122
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,128 @@ TEST_F(GraphTransformationTests, FusePadWithMaxPoolOpsetLessThan11) {
16221622
}
16231623
}
16241624

1625+
TEST_F(GraphTransformationTests, FusePadWithAvgPool) {
1626+
constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-pad-avgpool.onnx";
1627+
1628+
std::shared_ptr<Model> p_model;
1629+
ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
1630+
Graph& graph = p_model->MainGraph();
1631+
1632+
std::vector<int64_t> expected_pads;
1633+
GraphViewer graphViewer(graph);
1634+
for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
1635+
auto& node = *graph.GetNode(node_index);
1636+
if (node.OpType() == "Pad") {
1637+
auto const& pads_proto = node.GetAttributes().at("pads").ints();
1638+
gsl::span<const int64_t> pads_values = gsl::make_span(pads_proto.data(), pads_proto.size());
1639+
expected_pads.resize(pads_values.size() - 4);
1640+
for (uint32_t pads_index = 2, index = 0; pads_index < pads_values.size() / 2; pads_index++, index++) {
1641+
expected_pads[index] = pads_values[pads_index];
1642+
expected_pads[index + (expected_pads.size() / 2)] = pads_values[pads_index + (pads_values.size() / 2)];
1643+
}
1644+
}
1645+
}
1646+
1647+
onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
1648+
auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
1649+
ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<PadFusion>()));
1650+
ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
1651+
1652+
ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
1653+
1654+
std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
1655+
ASSERT_EQ(op_to_count["Pad"], 0);
1656+
ASSERT_EQ(op_to_count["AveragePool"], 1);
1657+
1658+
for (auto& node : graph.Nodes()) {
1659+
if (node.OpType() == "AveragePool") {
1660+
auto const& child_pads = node.GetAttributes().at("pads").ints();
1661+
auto const& count_include_pad = node.GetAttributes().at("count_include_pad");
1662+
ASSERT_NE(count_include_pad.i(), 0) << "fusion should ensure count_include_pad!=0";
1663+
ASSERT_EQ(child_pads.size(), static_cast<int32_t>(expected_pads.size()))
1664+
<< "fusion should produce the same size of pads integer as the AvgPool node";
1665+
for (uint32_t index = 0; index < expected_pads.size(); index++) {
1666+
ASSERT_EQ(expected_pads[index], child_pads.Get(index))
1667+
<< "fusion does not produce correct padding value";
1668+
}
1669+
}
1670+
}
1671+
}
1672+
1673+
TEST_F(GraphTransformationTests, FusePadWithAvgPoolWithPad) {
1674+
constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-pad-avgpool_with_pad.onnx";
1675+
1676+
std::shared_ptr<Model> p_model;
1677+
ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
1678+
Graph& graph = p_model->MainGraph();
1679+
1680+
std::vector<int64_t> expected_pads;
1681+
GraphViewer graphViewer(graph);
1682+
for (auto& node_index : graphViewer.GetNodesInTopologicalOrder()) {
1683+
auto& node = *graph.GetNode(node_index);
1684+
if (node.OpType() == "Pad") {
1685+
auto const& pads_proto = node.GetAttributes().at("pads").ints();
1686+
gsl::span<const int64_t> pads_values = gsl::make_span(pads_proto.data(), pads_proto.size());
1687+
expected_pads.resize(pads_values.size() - 4);
1688+
1689+
for (uint32_t pads_index = 2, index = 0; pads_index < pads_values.size() / 2; pads_index++, index++) {
1690+
expected_pads[index] = pads_values[pads_index];
1691+
expected_pads[index + (expected_pads.size() / 2)] = pads_values[pads_index + (pads_values.size() / 2)];
1692+
}
1693+
} else if (node.OpType() == "AveragePool") {
1694+
auto const& child_pads = node.GetAttributes().at("pads").ints();
1695+
for (uint32_t index = 0; index < expected_pads.size(); index++) {
1696+
expected_pads[index] += child_pads.Get(index);
1697+
}
1698+
}
1699+
}
1700+
1701+
onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
1702+
auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
1703+
ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<PadFusion>()));
1704+
ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
1705+
1706+
ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
1707+
1708+
std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
1709+
ASSERT_EQ(op_to_count["Pad"], 0);
1710+
ASSERT_EQ(op_to_count["AveragePool"], 1);
1711+
1712+
for (auto& node : graph.Nodes()) {
1713+
if (node.OpType() == "AveragePool") {
1714+
auto const& child_pads = node.GetAttributes().at("pads").ints();
1715+
auto const& count_include_pad = node.GetAttributes().at("count_include_pad");
1716+
ASSERT_NE(count_include_pad.i(), 0) << "fusion should ensure count_include_pad!=0";
1717+
ASSERT_EQ(child_pads.size(), static_cast<int32_t>(expected_pads.size()))
1718+
<< "fusion should produce the same size of pads integer as the AvgPool node";
1719+
for (uint32_t index = 0; index < expected_pads.size(); index++) {
1720+
ASSERT_EQ(expected_pads[index], child_pads.Get(index))
1721+
<< "fusion does not produce correct padding value";
1722+
}
1723+
}
1724+
}
1725+
}
1726+
1727+
// should not fuse
1728+
TEST_F(GraphTransformationTests, FusePadWithAvgPoolWithPadNoInclude) {
1729+
constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-pad-avgpool_with_pad-nofuse.onnx";
1730+
1731+
std::shared_ptr<Model> p_model;
1732+
ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
1733+
Graph& graph = p_model->MainGraph();
1734+
1735+
onnxruntime::GraphTransformerManager graph_transformation_mgr{5};
1736+
auto rule_transformer_L1 = std::make_unique<RuleBasedGraphTransformer>("RuleTransformerL1");
1737+
ASSERT_STATUS_OK(rule_transformer_L1->Register(std::make_unique<PadFusion>()));
1738+
ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1));
1739+
1740+
ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
1741+
1742+
std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
1743+
ASSERT_EQ(op_to_count["Pad"], 1);
1744+
ASSERT_EQ(op_to_count["AveragePool"], 1);
1745+
}
1746+
16251747
TEST_F(GraphTransformationTests, FuseMatmulBNWithInBetweenNodes) {
16261748
constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/fuse-matmul-bn-with-reshape.onnx";
16271749

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from pathlib import Path
2+
3+
import numpy as np
4+
import onnx
5+
6+
HERE = Path(__file__).parent.resolve(strict=True)
7+
TEST = False
8+
9+
if TEST:
10+
import onnxruntime
11+
12+
13+
def generate_fuse_pad_avgpool():
14+
parameters = {
15+
"fuse-pad-avgpool": (
16+
{},
17+
[[1.333333, 2.333333, 1.777778], [3.0, 5.0, 3.666667], [2.666667, 4.333333, 3.111111]],
18+
),
19+
"fuse-pad-avgpool_with_pad": (
20+
{"pads": [1, 1, 0, 0], "count_include_pad": 1},
21+
[
22+
[0.111111, 0.333333, 0.666667, 0.555556],
23+
[0.555556, 1.333333, 2.333333, 1.777778],
24+
[1.333333, 3.0, 5.0, 3.666667],
25+
[1.222222, 2.666667, 4.333333, 3.111111],
26+
],
27+
),
28+
"fuse-pad-avgpool_with_pad-nofuse": (
29+
{"pads": [1, 1, 0, 0]},
30+
[
31+
[0.25, 0.5, 1.0, 0.833333],
32+
[0.833333, 1.333333, 2.333333, 1.777778],
33+
[2.0, 3.0, 5.0, 3.666667],
34+
[1.833333, 2.666667, 4.333333, 3.111111],
35+
],
36+
),
37+
}
38+
for name in parameters:
39+
model_path = HERE / f"{name}.onnx"
40+
input_ = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, (1, 1, 3, 3))
41+
pad = onnx.helper.make_node("Pad", ["input"], ["tp"], mode="constant", pads=[0, 0, 1, 1, 0, 0, 1, 1])
42+
pool = onnx.helper.make_node("AveragePool", ["tp"], ["output"], kernel_shape=[3, 3], **parameters[name][0])
43+
nodes = [pad, pool]
44+
output_shape = (1, 1, 3, 3) if name == "fuse-pad-avgpool" else (1, 1, 4, 4)
45+
output_ = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, output_shape)
46+
graph = onnx.helper.make_graph(nodes, name, [input_], [output_])
47+
model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 7)])
48+
onnx.checker.check_model(model)
49+
onnx.save_model(model, model_path)
50+
if TEST:
51+
input_array = np.array([[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]], dtype=np.float32)
52+
expected = np.array(parameters[name][1], dtype=np.float32)
53+
session_options = onnxruntime.SessionOptions()
54+
session_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
55+
session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
56+
session = onnxruntime.InferenceSession(model_path, session_options)
57+
out = session.run(["output"], {"input": input_array})
58+
actual = out[0].squeeze()
59+
np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0.0)
60+
session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
61+
session = onnxruntime.InferenceSession(model_path, session_options)
62+
out = session.run(["output"], {"input": input_array})
63+
actual = out[0].squeeze()
64+
np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0.0)
65+
66+
67+
if __name__ == "__main__":
68+
generate_fuse_pad_avgpool()
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)