Quant tool: Consistent get_qdq_config and get_qnn_qdq_config behavior (#23856)

jambayk · web-flow · commit daf9565d1b55 · 2025-03-01T20:16:10.000-08:00
diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
@@ -204,9 +204,9 @@ def get_qnn_qdq_config(
         calibrate_method=calibrate_method,
         activation_type=activation_type,
         weight_type=weight_type,
-        op_types_to_quantize=op_types_to_quantize
-        if op_types_to_quantize
-        else list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
+        op_types_to_quantize=(
+            op_types_to_quantize if op_types_to_quantize else list(op_types.difference(OP_TYPES_TO_EXCLUDE))
+        ),
         nodes_to_exclude=nodes_to_exclude,
         per_channel=per_channel,
         use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py
@@ -240,6 +240,8 @@ def get_qdq_config(
     keep_removable_activations: bool = False,
     min_real_range: float | None = None,
     tensor_quant_overrides: dict[str, list[dict[str, Any]]] | None = None,
+    calibration_providers: list[str] | None = None,
+    op_types_to_quantize: list[str] | None = None,
     nodes_to_exclude: list[str] | Callable[[onnx.ModelProto, onnx.NodeProto], bool] | None = None,
     extra_options: dict | None = None,
 ) -> StaticQuantConfig:
@@ -294,6 +296,10 @@ def get_qdq_config(
                 'convert["recv_nodes"] = Set : Set of node names that consume the converted activation,
                                                other nodes get the original type. If not specified,
                                                assume all consumer nodes get the converted type.
+        calibration_providers: Execution providers to run the session during calibration. Default is None which uses
+            [ "CPUExecutionProvider" ].
+        op_types_to_quantize: List of operator types to quantize. If None, all operators other than Cast, DequantizeLinear,
+            and QuantizeLinear are quantized.
         nodes_to_exclude: List of nodes names to exclude from quantization. Alternatively, can provide a function that
             accepts an onnx.ModelProto and onnx.NodeProto as arguments and returns true if the give onnx.NodeProto
             should be excluded from quantization.
@@ -324,17 +330,20 @@ def get_qdq_config(
         if onnx.external_data_helper.uses_external_data(initializer):
             model_has_external_data = True
 
-    final_nodes_to_exclude = []
-    if nodes_to_exclude is not None and isinstance(nodes_to_exclude, list):
-        final_nodes_to_exclude.extend(nodes_to_exclude)
+    op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None
+    nodes_to_exclude_set = set(nodes_to_exclude) if isinstance(nodes_to_exclude, list) else set()
 
     # Iterate through nodes to get all operator types in the model and
     # call user's function to filter out nodes from quantization.
     for node in model.graph.node:
-        op_types.add(node.op_type)
-        if nodes_to_exclude is not None and callable(nodes_to_exclude):
-            if nodes_to_exclude(model, node):
-                final_nodes_to_exclude.append(node.name)
+        if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set:
+            continue
+        if node.name in nodes_to_exclude_set:
+            continue
+        if callable(nodes_to_exclude) and nodes_to_exclude(model, node):
+            nodes_to_exclude_set.add(node.name)
+        else:
+            op_types.add(node.op_type)
 
     final_extra_options = {
         "MinimumRealRange": min_real_range,
@@ -378,11 +387,14 @@ def get_qdq_config(
         quant_format=QuantFormat.QDQ,
         activation_type=activation_type,
         weight_type=weight_type,
-        op_types_to_quantize=list(op_types.difference(op_types_to_exclude)),
-        nodes_to_exclude=final_nodes_to_exclude,
+        op_types_to_quantize=(
+            op_types_to_quantize if op_types_to_quantize else list(op_types.difference(op_types_to_exclude))
+        ),
+        nodes_to_exclude=list(nodes_to_exclude_set),
         per_channel=per_channel,
         reduce_range=reduce_range,
         use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
+        calibration_providers=calibration_providers,
         extra_options=final_extra_options,
     )
 
@@ -442,7 +454,7 @@ def check_static_quant_arguments(quant_format: QuantFormat, activation_type: Qua
     if activation_type != QuantType.QFLOAT8E4M3FN and weight_type == QuantType.QFLOAT8E4M3FN:
         raise ValueError(
             f"ONNXRuntime quantization doesn't support data format: activation_type={activation_type} "
-            f"!=QuantType.QFLOAT8E4M3FN, weight_type=QuantType.QFLOAT8E4M3FN."
+            "!=QuantType.QFLOAT8E4M3FN, weight_type=QuantType.QFLOAT8E4M3FN."
         )
 
     if activation_type == QuantType.QFLOAT8E4M3FN and weight_type != QuantType.QFLOAT8E4M3FN:
diff --git a/onnxruntime/test/python/quantization/test_get_qdq_config.py b/onnxruntime/test/python/quantization/test_get_qdq_config.py
@@ -156,6 +156,62 @@ def should_exclude_node_(model: onnx.ModelProto, node: onnx.NodeProto) -> bool:
         self.assertTrue(bool(expected_excluded_nodes))
         self.assertEqual(set(qdq_config.nodes_to_exclude), expected_excluded_nodes)
 
+    def test_op_types_to_quantize(self):
+        """
+        Test that get_qdq_config() returns a config that sets the op_types_to_quantize arg.
+        """
+        shape = [1, 8, 8]
+        tensor_type = onnx.TensorProto.FLOAT
+        np_dtype = onnx.helper.tensor_dtype_to_np_dtype(tensor_type)
+        weight = onnx.numpy_helper.from_array(np.ones(shape, dtype=np_dtype), "weight")
+        float_model = self.build_add_model(shape, tensor_type, weight)
+
+        input_data_list = [
+            {"input_0": np.ones(shape, dtype=np_dtype) * np.array(-2, dtype=np_dtype)},
+            {"input_0": np.ones(shape, dtype=np_dtype) * np.array(2, dtype=np_dtype)},
+        ]
+        data_reader = TestDataFeeds(input_data_list)
+
+        # No op_types_to_quantize arg means all ops are quantized.
+        qdq_config = get_qdq_config(float_model, data_reader, op_types_to_quantize=None)
+        self.assertEqual(set(qdq_config.op_types_to_quantize), {"Add"})
+
+        # specify custom op_types_to_quantize arg.
+        qdq_config = get_qdq_config(float_model, data_reader, op_types_to_quantize=["Mul"])
+        self.assertEqual(set(qdq_config.op_types_to_quantize), {"Mul"})
+
+        # exclude op_type indirectly by specifying nodes_to_exclude arg.
+        qdq_config = get_qdq_config(
+            float_model,
+            data_reader,
+            nodes_to_exclude=[node.name for node in float_model.graph.node if node.op_type == "Add"],
+        )
+        self.assertEqual(set(qdq_config.op_types_to_quantize), set())
+
+    def test_calibration_providers(self):
+        """
+        Test that get_qdq_config() returns a config that sets the calibration providers arg.
+        """
+
+        shape = [1, 8, 8]
+        tensor_type = onnx.TensorProto.FLOAT
+        np_dtype = onnx.helper.tensor_dtype_to_np_dtype(tensor_type)
+        weight = onnx.numpy_helper.from_array(np.ones(shape, dtype=np_dtype), "weight")
+        float_model = self.build_add_model(shape, tensor_type, weight)
+
+        input_data_list = [
+            {"input_0": np.ones(shape, dtype=np_dtype) * np.array(-2, dtype=np_dtype)},
+            {"input_0": np.ones(shape, dtype=np_dtype) * np.array(2, dtype=np_dtype)},
+        ]
+        data_reader = TestDataFeeds(input_data_list)
+
+        qdq_config = get_qdq_config(
+            float_model,
+            data_reader,
+            calibration_providers=["CPUExecutionProvider"],
+        )
+        self.assertEqual(qdq_config.calibration_providers, ["CPUExecutionProvider"])
+
     def test_external_data(self):
         """
         Test that get_qdq_config() returns a config that enables external data