Skip to content

Make quantized activation handlers data layout aware #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

11 changes: 5 additions & 6 deletions src/finn/builder/build_dataflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from typing import Any, List, Optional

from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy
from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map
from finn.util.basic import alveo_default_platform, part_map


class AutoFIFOSizingMethod(str, Enum):
Expand Down Expand Up @@ -370,11 +370,10 @@ def _resolve_driver_platform(self):
def _resolve_fpga_part(self):
if self.fpga_part is None:
# lookup from part map if not specified
if self.shell_flow_type == ShellFlowType.VIVADO_ZYNQ:
return pynq_part_map[self.board]
elif self.shell_flow_type == ShellFlowType.VITIS_ALVEO:
return alveo_part_map[self.board]
else:
try:
fpga_part = part_map[self.board]
return fpga_part
except KeyError:
raise Exception("Couldn't resolve fpga_part for " + self.board)
else:
# return as-is when explicitly specified
Expand Down
103 changes: 92 additions & 11 deletions src/finn/transformation/qonnx/qonnx_activation_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import warnings
from abc import ABC, abstractmethod
from onnx import TensorProto, helper
from qonnx.core.modelwrapper import ModelWrapper
Expand Down Expand Up @@ -70,7 +70,7 @@ def _check_compatibility(self):
@abstractmethod
def _calculate_act_bias(self):
"""Calculate the activation bias,
which is introduced as an Add node behind the MultiTrheshold node.
which is introduced as an Add node behind the MultiThreshold node.
"""
raise NotImplementedError()

Expand All @@ -82,7 +82,7 @@ def _calculate_thresholds(self):
@abstractmethod
def _calculate_act_scale(self):
"""Calculate the activation scale,
which is indroduced as a Mul node behind the Add node
which is introduced as a Mul node behind the Add node
for the activation bias.
"""
raise NotImplementedError()
Expand Down Expand Up @@ -139,6 +139,8 @@ def replace_quant_node(self):
graph.value_info.append(thresh_tensor)
model.set_initializer(thresh_tensor.name, thresholds)

data_layout = model.get_tensor_layout(n.input[0])

# Insert MultiThreshold node
outp_trans_node = helper.make_node(
"MultiThreshold",
Expand All @@ -154,10 +156,15 @@ def replace_quant_node(self):
mt_node = graph.node[running_node_index - 1]
mt_inst = getCustomOp(mt_node)

# Inherit the data layout from the input tensor if available
if data_layout is not None:
# Convert list to string representation of the data layout
mt_inst.set_nodeattr("data_layout", "".join(data_layout))

# Set scale and bias
# If these values are scalar then they can be set as attributes
# of the MultiThreshold node, if not they get inserted as adder and mul nodes
# behind the MultiTrheshold nodes.
# behind the MultiThreshold nodes.
bias_scalar = adder_bias.shape == (1,) or len(adder_bias.shape) == 0
scale_scalar = mul_scale.shape == (1,) or len(mul_scale.shape) == 0
if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant":
Expand Down Expand Up @@ -355,7 +362,7 @@ def _calculate_thresholds(self):
act_node = self._model.find_direct_predecessors(self._q_node)
act_node = act_node[0]
if act_node.op_type == "Relu":
# Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/
# Calculate thresholds, see: https://github.com/Xilinx/brevitas/blob/
# a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
# onnx/finn/handler/act.py#L21
num_distinct_values = 2**bit_width
Expand Down Expand Up @@ -395,8 +402,46 @@ def _calculate_thresholds(self):
else:
thresholds[c][t] = step / selu_scale

# Get the shape of the input (should also be the output) tensor
# Note: Querying the input is more safe as we do not want to
# propagate shapes backwards by accident.
shape = self._model.get_tensor_shape(self._q_node.input[0]) # noqa
# First try to consider the tensor layout of the input for
# determining the number of output channels
layout = self._model.get_tensor_layout(self._q_node.input[0])
# If there is no layout annotation, guess based on rank of the
# tensor
# TODO: No support for Rank >= 5
if layout is None and len(shape) < 5:
# Maps tensor rank to layout annotation
rank_to_layout = {0: None, 1: "C", 2: "NC", 3: "NWC", 4: "NCHW"}
# Lookup the layout required by this input shape
layout = rank_to_layout[len(shape)]
# If there is a layout annotation, use this to determine the index
# of the channel dimension
if layout is not None and "C" in layout: # noqa: Duplicate
# Lookup the index in list
cdim = layout.index("C")
# If no layout has been annotated or there is no channel dimension, fall
# back to the previous default assumption
else:
# Assume the channels to be in axis 1
cdim = 1
# Issue a warning to the user, so they are aware of this
warnings.warn(
f"No layout annotations for {self._q_node.input[0]}:"
f" Assuming channel dimension at index {cdim}"
)

# ToDo: The index 1 needs to be changed to -1 for the channels last format
num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1]
num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[cdim]

assert (
thresholds.shape[0] == 1 or thresholds.shape[
0] == num_output_channels
), """Quant node cannot be converted to MultiThreshold because only
per tensor or per channel quantization supported."""

final_shape = (num_output_channels, num_thresholds)
if thresholds.shape != final_shape:
thresholds = np.broadcast_to(thresholds, final_shape)
Expand All @@ -417,12 +462,12 @@ def _remove_activation_node(self, multi_threshold_node):
act_node = self._model.find_direct_predecessors(self._q_node)
if act_node is None:
raise RuntimeError(
"For handling of Relu activations a predecesor to " "the Quant node must exist."
"For handling of Relu activations a predecessor to " "the Quant node must exist."
)
act_node = act_node[0]
if act_node.op_type not in self.valid_predecessor_op_types():
raise RuntimeError(
"The predecesor of the Quant node must be Relu or Selu for handling "
"The predecessor of the Quant node must be Relu or Selu for handling "
"of activations."
)

Expand Down Expand Up @@ -509,7 +554,7 @@ def _calculate_thresholds(self):
else:
raise RuntimeError("Got an unexpected quantizer node type")

# Calculate thersholds, see: https://github.com/Xilinx/brevitas/
# Calculate thresholds, see: https://github.com/Xilinx/brevitas/
# blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
# export/onnx/finn/handler/act.py#L76
if bit_width == 1.0:
Expand Down Expand Up @@ -537,13 +582,49 @@ def _calculate_thresholds(self):
for t in range(num_thresholds):
thresholds[c][t] = min_threshold[c] + step[c] * t

# currently only per tensor or per channel quantization is supported
num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1]
# Get the shape of the input (should also be the output) tensor
# Note: Querying the input is more safe as we do not want to
# propagate shapes backwards by accident.
shape = self._model.get_tensor_shape(self._q_node.input[0])
# First try to consider the tensor layout of the input for
# determining the number of output channels
layout = self._model.get_tensor_layout(self._q_node.input[0]) # noqa
# If there is no layout annotation, guess based on rank of the
# tensor
# TODO: No support for Rank >= 5
if layout is None and len(shape) < 5:
# Maps tensor rank to layout annotation
rank_to_layout = {0: None, 1: "C", 2: "NC", 3: "NWC", 4: "NCHW"}
# Lookup the layout required by this input shape
layout = rank_to_layout[len(shape)]
# If there is a layout annotation, use this to determine the index
# of the channel dimension
if layout is not None and "C" in layout: # noqa: Duplicate
# Lookup the index in list
cdim = layout.index("C")
# If no layout has been annotated or there is no channel dimension,
# fall back to the previous default assumption
else:
# Assume the channels to be in axis 1
cdim = 1
# Issue a warning to the user, so they are aware of this
warnings.warn(
f"No layout annotations for {self._q_node.input[0]}:"
f" Assuming channel dimension at index {cdim}"
)

# ToDo: The index 1 needs to be changed to -1 for the channels last format
num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[cdim]

assert (
thresholds.shape[0] == 1 or thresholds.shape[0] == num_output_channels
), """Quant node cannot be converted to MultiThreshold because only
per tensor or per channel quantization supported."""

final_shape = (num_output_channels, num_thresholds)
if thresholds.shape != final_shape:
thresholds = np.broadcast_to(thresholds, final_shape)

return thresholds

def _calculate_act_scale(self):
Expand Down
1 change: 1 addition & 0 deletions src/finn/util/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
part_map = {**pynq_part_map, **alveo_part_map}
part_map["VEK280"] = "xcve2802-vsvh1760-2MP-e-S"
part_map["VCK190"] = "xcvc1902-vsva2197-2MP-e-S"
part_map["V80"] = "xcv80-lsva4737-2MHP-e-s"


def get_rtlsim_trace_depth():
Expand Down
1 change: 0 additions & 1 deletion tests/fpgadataflow/test_fifosizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def test_fifosizing_linear(method, topology):
synth_clk_period_ns=10.0,
board="Pynq-Z1",
rtlsim_batch_size=100 if topology == "tfc" else 2,
shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ,
generate_outputs=[
build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
build_cfg.DataflowOutputType.STITCHED_IP,
Expand Down