Merge pull request #224 from fastmachinelearning/feature/extract_conv_quant_bias

jmitrevs · web-flow · commit 3d8729591d68 · 2026-01-29T10:45:56.000-06:00
Extract quantized biases for Conv/ConvTranspose
diff --git a/src/qonnx/transformation/extract_conv_bias.py b/src/qonnx/transformation/extract_conv_bias.py
@@ -49,8 +49,16 @@ def apply(self, model):
                     # Extract bias
                     bias = model.get_initializer(n.input[2])
                     if bias is None:
-                        warnings.warn(f"Could not extract bias from node {n}")
-                        continue
+                        # check if bias is quantized
+                        # then initializer would be empty but coming from a Quant node
+                        producer = model.find_producer(n.input[2])
+                        # only if producer is Quant node and has no predecessors continue with extraction
+                        if not (
+                            producer.op_type in ["Quant", "IntQuant", "BipolarQuant"]
+                            and not model.find_direct_predecessors(producer)
+                        ):
+                            warnings.warn(f"Could not extract bias from node {n}")
+                            continue
 
                     # Insert bias as Add node behind the Conv node
                     out_shape = model.get_tensor_shape(n.output[0])
@@ -62,6 +70,17 @@ def apply(self, model):
                     add_shape[1] = bias_shape[0]
                     if bias is not None:
                         model.set_initializer(n.input[2], bias.reshape(add_shape))
+                    else:
+                        # if connected to a Quant node, we need to reshape the parameters
+                        quant_param = model.get_initializer(producer.input[0])
+                        model.set_initializer(producer.input[0], quant_param.reshape(add_shape))
+                        quant_scale = model.get_initializer(producer.input[1])
+                        if quant_scale.shape != (1,):
+                            model.set_initializer(producer.input[1], quant_scale.reshape(add_shape))
+                        quant_zpt = model.get_initializer(producer.input[2])
+                        if quant_zpt.shape != (1,):
+                            model.set_initializer(producer.input[2], quant_zpt.reshape(add_shape))
+                        model.set_tensor_shape(producer.output[0], add_shape)
 
                     act_add_tensor = helper.make_tensor_value_info(
                         model.make_new_valueinfo_name(),
diff --git a/tests/transformation/test_extract_conv_bias.py b/tests/transformation/test_extract_conv_bias.py
@@ -0,0 +1,313 @@
+# Copyright (c) 2025 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pytest
+
+import numpy as np
+import onnx.helper as oh
+from onnx import TensorProto
+
+import qonnx.core.onnx_exec as oxe
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.extract_conv_bias import ExtractBiasFromConv
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model
+
+
+# Helper function to generate valid parameter combinations, with an option to include 'dw'
+def generate_params(include_dw=True):
+    params = []
+    biases = ["float", None, "int_quant", "bp_quant"]
+    scales = ["per_tensor", "per_channel"]
+    zero_points = ["per_tensor", "per_channel"]
+
+    dw_options = [True, False] if include_dw else [None]
+
+    for dw in dw_options:
+        for bias in biases:
+            if bias in ["float", None]:
+                # Ignore scale and zero_point for this bias
+                params.append((dw, bias, None, None) if include_dw else (bias, None, None))
+            else:
+                # Include all combinations of scale and zero_point for other biases
+                for scale in scales:
+                    for zero_point in zero_points:
+                        if include_dw:
+                            params.append((dw, bias, scale, zero_point))
+                        else:
+                            params.append((bias, scale, zero_point))
+    return params
+
+
+@pytest.mark.parametrize("dw, bias, scale, zero_point", generate_params(include_dw=True))
+def test_extract_conv_bias(dw, bias, scale, zero_point):
+    ishape = (1, 32, 111, 111)
+    if dw is True:
+        group = ishape[1]
+        out_channels = ishape[1]
+        kernel_size = 3
+        padding = 1
+        stride = 1
+        w_shape = (32, 1, 3, 3)
+
+    else:
+        group = 1
+        out_channels = 64
+        kernel_size = 1
+        padding = 0
+        stride = 1
+        w_shape = (64, 32, 1, 1)
+
+    wdt = idt = odt = DataType["FLOAT32"]
+
+    # set up onnx model
+    inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, ishape)
+    outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, [ishape[0], out_channels, ishape[2], ishape[3]])
+
+    W = oh.make_tensor_value_info("W", TensorProto.FLOAT, w_shape)
+
+    if bias is not None:
+        bias_shape = (out_channels,)
+        if scale is not None and "per_channel" in scale:
+            scale_shape = (out_channels,)
+        elif scale is not None and "per_tensor" in scale:
+            scale_shape = (1,)
+        if scale is not None and "per_channel" in zero_point:
+            zpt_shape = (out_channels,)
+        elif scale is not None and "per_tensor" in zero_point:
+            zpt_shape = (1,)
+        B = oh.make_tensor_value_info("B", TensorProto.FLOAT, bias_shape)
+
+    cnv_node = oh.make_node(
+        "Conv",
+        inputs=["inp", "W"] if not bias else ["inp", "W", "B"],
+        outputs=["outp"],
+        kernel_shape=[kernel_size, kernel_size],
+        pads=[padding, padding, padding, padding],
+        strides=[stride, stride],
+        group=group,
+    )
+    nodes = [cnv_node]
+    value_info = [W] if not bias else [W, B]
+    # if the bias isn't quantized, we can directly wire up the Conv layer
+    # otherwise an additional Quant node needs to be inserted
+    if bias not in ["float", None]:
+        if "bp" in bias:
+            optype = "BipolarQuant"
+        elif "int" in bias:
+            optype = "IntQuant"
+        # inputs to Quant node
+        param0 = oh.make_tensor_value_info("param0", TensorProto.FLOAT, bias_shape)
+        param1 = oh.make_tensor_value_info("param1", TensorProto.FLOAT, scale_shape)
+        param2 = oh.make_tensor_value_info("param2", TensorProto.FLOAT, zpt_shape)
+        value_info.append(param0)
+        value_info.append(param1)
+        value_info.append(param2)
+        if "int" in bias:
+            param3 = oh.make_tensor_value_info("param3", TensorProto.FLOAT, [1])
+            value_info.append(param3)
+        quant_node = oh.make_node(
+            optype,
+            domain="qonnx.custom_op.general",
+            inputs=["param0", "param1", "param2", "param3"] if "int" in bias else ["param0", "param1", "param2"],
+            outputs=["B"],
+            narrow=0,
+            rounding_mode="ROUND",
+            signed=1,
+        )
+        nodes.append(quant_node)
+    graph = oh.make_graph(
+        nodes=nodes,
+        name="cnv_graph",
+        inputs=[inp],
+        outputs=[outp],
+        value_info=value_info,
+    )
+
+    model = qonnx_make_model(graph, producer_name="test-cnv-model")
+    model = ModelWrapper(model)
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("outp", odt)
+    model.set_tensor_datatype("W", wdt)
+
+    w_tensor = gen_finn_dt_tensor(wdt, w_shape)
+
+    if bias is not None:
+        b_tensor = gen_finn_dt_tensor(DataType["FLOAT32"], bias_shape)
+        # set B tensor directly or set first input of quant node
+        if bias != "float":
+            model.set_initializer("param0", b_tensor)
+            scale = gen_finn_dt_tensor(DataType["FLOAT32"], scale_shape)
+            model.set_initializer("param1", scale)
+            zpt = gen_finn_dt_tensor(DataType["FLOAT32"], zpt_shape)
+            model.set_initializer("param2", zpt)
+            if "int" in bias:
+                model.set_initializer("param3", 8 * np.ones(1))
+        else:
+            model.set_initializer("B", b_tensor)
+
+    model.set_initializer("W", w_tensor)
+    model = model.transform(InferShapes())
+
+    input_tensor = gen_finn_dt_tensor(idt, ishape)
+    output_dict = oxe.execute_onnx(model, {model.graph.input[0].name: input_tensor})
+    expected = output_dict[model.graph.output[0].name]
+
+    model = model.transform(ExtractBiasFromConv())
+
+    if bias is not None:
+        assert len(model.get_nodes_by_op_type("Add")) > 0, "Bias wasn't extracted into add node"
+
+    output_dict = oxe.execute_onnx(model, {model.graph.input[0].name: input_tensor})
+    produced = output_dict[model.graph.output[0].name]
+
+    # check if is close (fp calculation)
+    assert np.isclose(produced, expected, atol=1e-3).all()
+
+
+@pytest.mark.parametrize("bias, scale, zero_point", generate_params(include_dw=False))
+def test_extract_conv_transpose_bias(bias, scale, zero_point):
+    ishape = (1, 32, 111, 111)
+    group = 1
+    out_channels = 64
+    kernel_size = 1
+    padding = 0
+    stride = 1
+    w_shape = (32, 64, 1, 1)
+
+    wdt = idt = odt = DataType["FLOAT32"]
+
+    # Set up ONNX model
+    inp = oh.make_tensor_value_info("inp", TensorProto.FLOAT, ishape)
+    outp_shape = (ishape[0], out_channels, ishape[2], ishape[3])
+    outp = oh.make_tensor_value_info("outp", TensorProto.FLOAT, outp_shape)
+
+    W = oh.make_tensor_value_info("W", TensorProto.FLOAT, w_shape)
+
+    if bias is not None:
+        bias_shape = (out_channels,)
+        if scale is not None and "per_channel" in scale:
+            scale_shape = (out_channels,)
+        elif scale is not None and "per_tensor" in scale:
+            scale_shape = (1,)
+        if scale is not None and "per_channel" in zero_point:
+            zpt_shape = (out_channels,)
+        elif scale is not None and "per_tensor" in zero_point:
+            zpt_shape = (1,)
+
+        B = oh.make_tensor_value_info("B", TensorProto.FLOAT, bias_shape)
+
+    cnv_node = oh.make_node(
+        "ConvTranspose",
+        inputs=["inp", "W"] if not bias else ["inp", "W", "B"],
+        outputs=["outp"],
+        kernel_shape=[kernel_size, kernel_size],
+        pads=[padding, padding, padding, padding],
+        strides=[stride, stride],
+        group=group,
+    )
+    nodes = [cnv_node]
+    value_info = [W] if not bias else [W, B]
+
+    # If the bias isn't quantized, we can directly wire up the ConvTranspose layer
+    # Otherwise, an additional Quant node needs to be inserted
+    if bias not in ["float", None]:
+        if "bp" in bias:
+            optype = "BipolarQuant"
+        elif "int" in bias:
+            optype = "IntQuant"
+        # Inputs to Quant node
+        param0 = oh.make_tensor_value_info("param0", TensorProto.FLOAT, bias_shape)
+        param1 = oh.make_tensor_value_info("param1", TensorProto.FLOAT, scale_shape)
+        param2 = oh.make_tensor_value_info("param2", TensorProto.FLOAT, zpt_shape)
+        value_info.append(param0)
+        value_info.append(param1)
+        value_info.append(param2)
+        if "int" in bias:
+            param3 = oh.make_tensor_value_info("param3", TensorProto.FLOAT, [1])
+            value_info.append(param3)
+        quant_node = oh.make_node(
+            optype,
+            domain="qonnx.custom_op.general",
+            inputs=["param0", "param1", "param2", "param3"] if "int" in bias else ["param0", "param1", "param2"],
+            outputs=["B"],
+            narrow=0,
+            rounding_mode="ROUND",
+            signed=1,
+        )
+        nodes.append(quant_node)
+
+    graph = oh.make_graph(
+        nodes=nodes,
+        name="cnv_transpose_graph",
+        inputs=[inp],
+        outputs=[outp],
+        value_info=value_info,
+    )
+
+    model = qonnx_make_model(graph, producer_name="test-cnv-transpose-model")
+    model = ModelWrapper(model)
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("outp", odt)
+    model.set_tensor_datatype("W", wdt)
+
+    w_tensor = gen_finn_dt_tensor(wdt, w_shape)
+
+    if bias is not None:
+        b_tensor = gen_finn_dt_tensor(DataType["FLOAT32"], bias_shape)
+        # Set B tensor directly or set first input of quant node
+        if bias != "float":
+            model.set_initializer("param0", b_tensor)
+            scale = gen_finn_dt_tensor(DataType["FLOAT32"], scale_shape)
+            model.set_initializer("param1", scale)
+            zpt = gen_finn_dt_tensor(DataType["FLOAT32"], zpt_shape)
+            model.set_initializer("param2", zpt)
+            if "int" in bias:
+                model.set_initializer("param3", 8 * np.ones(1))
+        else:
+            model.set_initializer("B", b_tensor)
+
+    model.set_initializer("W", w_tensor)
+    model = model.transform(InferShapes())
+
+    input_tensor = gen_finn_dt_tensor(idt, ishape)
+    output_dict = oxe.execute_onnx(model, {model.graph.input[0].name: input_tensor})
+    expected = output_dict[model.graph.output[0].name]
+
+    model = model.transform(ExtractBiasFromConv())
+
+    if bias is not None:
+        assert len(model.get_nodes_by_op_type("Add")) > 0, "Bias wasn't extracted into add node"
+
+    output_dict = oxe.execute_onnx(model, {model.graph.input[0].name: input_tensor})
+    produced = output_dict[model.graph.output[0].name]
+
+    # Check if is close (fp calculation)
+    assert np.isclose(produced, expected, atol=1e-3).all()