diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index d454df3393f2b..2cc8531dc3d79 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -85,7 +85,7 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr selector_no_16bit_and_positive_scale = std::make_unique(false, true, false, providers); qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_and_positive_scale_name, - {{"MaxPool", {12}}, + {{"MaxPool", {12, 22}}, {"ReduceMax", {}}, {"ReduceMin", {}}}, std::move(selector_no_16bit_and_positive_scale), diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 37da2d4247e34..7f9c5ac21994f 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -989,6 +989,57 @@ TEST(QDQTransformerTests, ReshapeDropQDQ) { RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}, false, 21); // Use int16 ONNX QDQ ops } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> MaxPool -> Q. +template +static void RunMaxPoolDropQDQTestCase(bool use_contrib_qdq = false, + int opset = 12) { + auto build_test_case = [use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + const std::vector input_shape = {1, 17, 17, 3}; + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + // add DequantizeLinear + auto* input_arg_dq = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + + // add MaxPool + auto* maxpool_output = builder.MakeIntermediate(); + Node& maxpool_node = builder.AddNode("MaxPool", {input_arg_dq}, {maxpool_output}); + maxpool_node.AddAttribute("auto_pad", "VALID"); + maxpool_node.AddAttribute("kernel_shape", std::vector({2, 2})); + + // add QuantizeLinear + builder.AddQuantizeLinearNode(maxpool_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["MaxPool"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, opset); +} + +// Checks that Q/DQ nodes are dropped from DQ -> MaxPool -> Q. Uses 8-bit Q/DQ ops. +TEST(QDQTransformerTests, MaxPoolDropQDQ) { + // Opset 12 + RunMaxPoolDropQDQTestCase(); + RunMaxPoolDropQDQTestCase(true); // Use com.microsoft QDQ ops + RunMaxPoolDropQDQTestCase(); + RunMaxPoolDropQDQTestCase(true); // Use com.microsoft QDQ ops + + // Opset 22 + RunMaxPoolDropQDQTestCase(false, 22); + RunMaxPoolDropQDQTestCase(false, 22); +} + // Runs a test case that checks if Q/DQ nodes are *not* dropped from DQ -> MaxPool -> Q if the quantization scale is // negative. template