From 44e66e5a1fea66010c05371fbe15379c99f7c0cb Mon Sep 17 00:00:00 2001 From: ollycassidy13 Date: Tue, 28 Apr 2026 17:31:31 +0100 Subject: [PATCH 1/5] AddCLSToken initial commit --- .../finn.custom_op.fpgadataflow.rst | 8 + .../finn.custom_op.fpgadataflow.rtl.rst | 8 + finn-rtllib/addclstoken/hdl/addclstoken.sv | 150 +++++++++ .../addclstoken/hdl/addclstoken_template.v | 81 +++++ src/finn/builder/build_dataflow_steps.py | 2 + src/finn/custom_op/fpgadataflow/__init__.py | 2 + .../custom_op/fpgadataflow/addclstoken.py | 171 ++++++++++ .../custom_op/fpgadataflow/rtl/__init__.py | 2 + .../fpgadataflow/rtl/addclstoken_rtl.py | 211 ++++++++++++ .../fpgadataflow/convert_to_hw_layers.py | 79 +++++ .../fpgadataflow/specialize_layers.py | 1 + src/finn/util/vivado.py | 44 ++- .../test_fpgadataflow_addclstoken.py | 299 ++++++++++++++++++ 13 files changed, 1050 insertions(+), 8 deletions(-) create mode 100644 finn-rtllib/addclstoken/hdl/addclstoken.sv create mode 100644 finn-rtllib/addclstoken/hdl/addclstoken_template.v create mode 100644 src/finn/custom_op/fpgadataflow/addclstoken.py create mode 100644 src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py create mode 100644 tests/fpgadataflow/test_fpgadataflow_addclstoken.py diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst index 25aafc324e..0688664bfe 100644 --- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst +++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst @@ -39,6 +39,14 @@ RTLBackend :undoc-members: :show-inheritance: +finn.custom\_op.fpgadataflow.addclstoken +----------------------------------------- + +.. automodule:: finn.custom_op.fpgadataflow.addclstoken + :members: + :undoc-members: + :show-inheritance: + finn.custom\_op.fpgadataflow.addstreams ---------------------------------------- diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst index 346eddb073..859a789f2f 100644 --- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst +++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst @@ -5,6 +5,14 @@ Custom Op - fpgadataflow.rtl RTL Custom Op Nodes =================== +finn.custom\_op.fpgadataflow.rtl.addclstoken\_rtl +-------------------------------------------------- + +.. automodule:: finn.custom_op.fpgadataflow.rtl.addclstoken_rtl + :members: + :undoc-members: + :show-inheritance: + finn.custom\_op.fpgadataflow.convolutioninputgenerator\_rtl ------------------------------------------------------------ diff --git a/finn-rtllib/addclstoken/hdl/addclstoken.sv b/finn-rtllib/addclstoken/hdl/addclstoken.sv new file mode 100644 index 0000000000..768b2a9a06 --- /dev/null +++ b/finn-rtllib/addclstoken/hdl/addclstoken.sv @@ -0,0 +1,150 @@ +/****************************************************************************** + * Copyright (C) 2026, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +module addclstoken #( + parameter int unsigned NUM_TOKENS = 196, + parameter int unsigned NUM_CHANNELS = 192, + parameter int unsigned SIMD = 1, + parameter int unsigned ELEM_WIDTH = 8, + parameter int unsigned PAD_TOKENS = 0 +)( + input logic clk, + input logic rst, + + output logic irdy, + input logic ivld, + input logic [SIMD*ELEM_WIDTH-1:0] idat, + + input logic ordy, + output logic ovld, + output logic [SIMD*ELEM_WIDTH-1:0] odat, + + input logic [NUM_CHANNELS*ELEM_WIDTH-1:0] cls_data +); + + localparam int unsigned FOLD_WIDTH = SIMD * ELEM_WIDTH; + localparam int unsigned FOLDS_PER_TOKEN = NUM_CHANNELS / SIMD; + localparam int unsigned TOTAL_INPUT_FOLDS = NUM_TOKENS * FOLDS_PER_TOKEN; + localparam int unsigned TOTAL_PAD_FOLDS = PAD_TOKENS * FOLDS_PER_TOKEN; + localparam int unsigned MAX_PHASE_FOLDS = + (TOTAL_INPUT_FOLDS > FOLDS_PER_TOKEN) ? + ((TOTAL_INPUT_FOLDS > TOTAL_PAD_FOLDS) ? + TOTAL_INPUT_FOLDS : TOTAL_PAD_FOLDS) : + ((FOLDS_PER_TOKEN > TOTAL_PAD_FOLDS) ? + FOLDS_PER_TOKEN : TOTAL_PAD_FOLDS); + localparam int unsigned CNT_WIDTH = (MAX_PHASE_FOLDS <= 1) ? 1 : $clog2(MAX_PHASE_FOLDS); + + typedef enum logic [1:0] { + EMIT_CLS, + PASSTHROUGH, + EMIT_PAD + } state_t; + + state_t state; + state_t next_state; + logic [CNT_WIDTH-1:0] fold_cnt; + logic fold_cnt_last; + logic out_transfer; + + logic [CNT_WIDTH-1:0] cls_fold_cnt; + logic [FOLD_WIDTH-1:0] cls_fold; + + assign cls_fold_cnt = (int'(fold_cnt) < FOLDS_PER_TOKEN) ? fold_cnt : '0; + assign cls_fold = cls_data[cls_fold_cnt * FOLD_WIDTH +: FOLD_WIDTH]; + assign out_transfer = ovld & ordy; + + always_comb begin + unique case (state) + EMIT_CLS: fold_cnt_last = (int'(fold_cnt) == FOLDS_PER_TOKEN - 1); + PASSTHROUGH: fold_cnt_last = (int'(fold_cnt) == TOTAL_INPUT_FOLDS - 1); + EMIT_PAD: fold_cnt_last = (int'(fold_cnt) == TOTAL_PAD_FOLDS - 1); + default: fold_cnt_last = 1'b1; + endcase + end + + always_comb begin + irdy = 1'b0; + ovld = 1'b0; + odat = '0; + + unique case (state) + EMIT_CLS: begin + ovld = 1'b1; + odat = cls_fold; + end + PASSTHROUGH: begin + irdy = ordy; + ovld = ivld; + odat = idat; + end + EMIT_PAD: begin + ovld = 1'b1; + end + default: begin + end + endcase + end + + always_comb begin + next_state = state; + if (out_transfer && fold_cnt_last) begin + unique case (state) + EMIT_CLS: begin + next_state = PASSTHROUGH; + end + PASSTHROUGH: begin + next_state = (PAD_TOKENS == 0) ? EMIT_CLS : EMIT_PAD; + end + EMIT_PAD: begin + next_state = EMIT_CLS; + end + default: begin + next_state = EMIT_CLS; + end + endcase + end + end + + always_ff @(posedge clk) begin + if (rst) begin + state <= EMIT_CLS; + fold_cnt <= '0; + end else if (out_transfer) begin + if (fold_cnt_last) begin + state <= next_state; + fold_cnt <= '0; + end else begin + fold_cnt <= fold_cnt + 1'b1; + end + end + end + +endmodule diff --git a/finn-rtllib/addclstoken/hdl/addclstoken_template.v b/finn-rtllib/addclstoken/hdl/addclstoken_template.v new file mode 100644 index 0000000000..57bba51c8d --- /dev/null +++ b/finn-rtllib/addclstoken/hdl/addclstoken_template.v @@ -0,0 +1,81 @@ +/****************************************************************************** + * Copyright (C) 2026, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +module $TOP_MODULE_NAME$ #( + parameter FOLD_WIDTH = $FOLD_WIDTH$, + parameter AXI_WIDTH = ((FOLD_WIDTH + 7) / 8) * 8 +)( + (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V, ASSOCIATED_RESET ap_rst_n" *) + input ap_clk, + (* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) + input ap_rst_n, + + output in0_V_TREADY, + input in0_V_TVALID, + input [AXI_WIDTH-1:0] in0_V_TDATA, + + input out_V_TREADY, + output out_V_TVALID, + output [AXI_WIDTH-1:0] out_V_TDATA +); + + localparam [$CLS_WIDTH$-1:0] CLS_DATA = $CLS_DATA$; + + wire [FOLD_WIDTH-1:0] core_out; + + assign out_V_TDATA[FOLD_WIDTH-1:0] = core_out; + + generate + if (AXI_WIDTH > FOLD_WIDTH) begin : gen_pad_tdata + assign out_V_TDATA[AXI_WIDTH-1:FOLD_WIDTH] = {(AXI_WIDTH-FOLD_WIDTH){1'b0}}; + end + endgenerate + + addclstoken #( + .NUM_TOKENS($NUM_TOKENS$), + .NUM_CHANNELS($NUM_CHANNELS$), + .SIMD($SIMD$), + .ELEM_WIDTH($ELEM_WIDTH$), + .PAD_TOKENS($PAD_TOKENS$) + ) impl ( + .clk(ap_clk), + .rst(!ap_rst_n), + .irdy(in0_V_TREADY), + .ivld(in0_V_TVALID), + .idat(in0_V_TDATA[FOLD_WIDTH-1:0]), + .ordy(out_V_TREADY), + .ovld(out_V_TVALID), + .odat(core_out), + .cls_data(CLS_DATA) + ); + +endmodule diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index ecc1d28c53..8c2f79c1d6 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -348,6 +348,8 @@ def step_convert_to_hw(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(to_hw.InferQuantizedMatrixVectorActivation()) # TopK to LabelSelect model = model.transform(to_hw.InferLabelSelectLayer()) + # sequence CLS token insertion + model = model.transform(to_hw.InferAddCLSTokenLayer()) # input quantization (if any) as standalone threshold model = model.transform(to_hw.InferThresholdingLayer()) # needed for convolutions -- TODO always exec? diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index aed2ab7fe1..c6e8dd1dcc 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -27,6 +27,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from finn.custom_op.fpgadataflow.addclstoken import AddCLSToken from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.channelwise_op import ChannelwiseOp from finn.custom_op.fpgadataflow.concat import StreamingConcat @@ -66,6 +67,7 @@ custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition custom_op["AddStreams"] = AddStreams +custom_op["AddCLSToken"] = AddCLSToken custom_op["ChannelwiseOp"] = ChannelwiseOp custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator custom_op["DownSampler"] = DownSampler diff --git a/src/finn/custom_op/fpgadataflow/addclstoken.py b/src/finn/custom_op/fpgadataflow/addclstoken.py new file mode 100644 index 0000000000..35eae4bb29 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/addclstoken.py @@ -0,0 +1,171 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import warnings +from qonnx.core.datatype import DataType + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp + + +class AddCLSToken(HWCustomOp): + """Prepend a learned class token to a sequence of patch tokens.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = super().get_nodeattr_types() + my_attrs.update( + { + "NumTokens": ("i", True, 0), + "NumChannels": ("i", True, 0), + "PadTokens": ("i", False, 0), + "SIMD": ("i", False, 1), + "inputDataType": ("s", True, ""), + "outputDataType": ("s", False, ""), + } + ) + return my_attrs + + def get_normal_input_shape(self, ind=0): + num_channels = self.get_nodeattr("NumChannels") + if ind == 0: + return (1, self.get_nodeattr("NumTokens"), num_channels) + elif ind == 1: + return (1, 1, num_channels) + else: + raise Exception("AddCLSToken only has two inputs") + + def get_folded_input_shape(self, ind=0): + normal_shape = self.get_normal_input_shape(ind) + simd = self.get_nodeattr("SIMD") + num_channels = normal_shape[-1] + assert num_channels % simd == 0, "SIMD must divide NumChannels" + return normal_shape[:-1] + (num_channels // simd, simd) + + def get_normal_output_shape(self, ind=0): + num_tokens = self.get_nodeattr("NumTokens") + num_channels = self.get_nodeattr("NumChannels") + pad_tokens = self.get_nodeattr("PadTokens") + return (1, num_tokens + 1 + pad_tokens, num_channels) + + def get_folded_output_shape(self, ind=0): + normal_shape = self.get_normal_output_shape(ind) + simd = self.get_nodeattr("SIMD") + num_channels = normal_shape[-1] + assert num_channels % simd == 0, "SIMD must divide NumChannels" + return normal_shape[:-1] + (num_channels // simd, simd) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape(0) + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape for patch tokens." + + exp_wshape = self.get_normal_input_shape(1) + wshape = tuple(model.get_tensor_shape(self.onnx_node.input[1])) + assert wshape == exp_wshape, "Unexpected input shape for CLS token." + + return super().make_const_shape_op(self.get_normal_output_shape()) + + def infer_node_datatype(self, model): + node = self.onnx_node + attr_idt = None + if self.get_nodeattr("inputDataType") != "": + attr_idt = self.get_input_datatype() + + idt = model.get_tensor_datatype(node.input[0]) + if idt is None: + idt = attr_idt + if idt is None: + raise Exception("AddCLSToken input datatype is not set") + + if attr_idt is not None and attr_idt != idt: + warnings.warn( + "inputDataType changing for %s: %s -> %s" % (node.name, str(attr_idt), str(idt)) + ) + self.set_nodeattr("inputDataType", idt.name) + + cls_dt = model.get_tensor_datatype(node.input[1]) + if cls_dt is None: + model.set_tensor_datatype(node.input[1], idt) + else: + assert cls_dt == idt, "CLS token datatype must match input datatype." + + self.set_nodeattr("outputDataType", idt.name) + model.set_tensor_datatype(node.output[0], idt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self, ind=0): + odt = self.get_nodeattr("outputDataType") + if odt == "": + return self.get_input_datatype(ind) + return DataType[odt] + + def get_instream_width(self, ind=0): + if ind != 0: + return 0 + return self.get_input_datatype().bitwidth() * self.get_nodeattr("SIMD") + + def get_outstream_width(self, ind=0): + return self.get_output_datatype().bitwidth() * self.get_nodeattr("SIMD") + + def get_number_output_values(self): + return int(np.prod(self.get_folded_output_shape()[:-1])) + + def get_exp_cycles(self): + return int(np.prod(self.get_folded_output_shape()[:-1])) + + def execute_node(self, context, graph): + node = self.onnx_node + patches = context[node.input[0]] + cls_token = context[node.input[1]] + + result = np.concatenate([cls_token, patches], axis=1) + pad_tokens = self.get_nodeattr("PadTokens") + if pad_tokens > 0: + pad_shape = (1, pad_tokens, self.get_nodeattr("NumChannels")) + padding = np.zeros(pad_shape, dtype=result.dtype) + result = np.concatenate([result, padding], axis=1) + + oshape = self.get_normal_output_shape() + context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + def bram_estimation(self): + return 0 + + def lut_estimation(self): + return int(128 + self.get_nodeattr("NumChannels")) + + def get_op_and_param_counts(self): + return {"param_cls_token": int(self.get_nodeattr("NumChannels"))} diff --git a/src/finn/custom_op/fpgadataflow/rtl/__init__.py b/src/finn/custom_op/fpgadataflow/rtl/__init__.py index 06067a4fca..26ed73e382 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/__init__.py +++ b/src/finn/custom_op/fpgadataflow/rtl/__init__.py @@ -26,6 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from finn.custom_op.fpgadataflow.rtl.addclstoken_rtl import AddCLSToken_rtl from finn.custom_op.fpgadataflow.rtl.convolutioninputgenerator_rtl import ( ConvolutionInputGenerator_rtl, ) @@ -42,6 +43,7 @@ # make sure new HLSCustomOp subclasses are imported here so that they get # registered and plug in correctly into the infrastructure +custom_op["AddCLSToken_rtl"] = AddCLSToken_rtl custom_op["ConvolutionInputGenerator_rtl"] = ConvolutionInputGenerator_rtl custom_op["FMPadding_rtl"] = FMPadding_rtl custom_op["StreamingDataWidthConverter_rtl"] = StreamingDataWidthConverter_rtl diff --git a/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py new file mode 100644 index 0000000000..53e6318f49 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py @@ -0,0 +1,211 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os +import shutil +from qonnx.core.datatype import DataType + +from finn.custom_op.fpgadataflow.addclstoken import AddCLSToken +from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + + +def _rtlsrc_dir(): + return os.environ["FINN_ROOT"] + "/finn-rtllib/addclstoken/hdl" + + +class AddCLSToken_rtl(AddCLSToken, RTLBackend): + """RTL implementation of AddCLSToken.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(AddCLSToken.get_nodeattr_types(self)) + my_attrs.update(RTLBackend.get_nodeattr_types(self)) + return my_attrs + + def _pack_value(self, value, dtype): + bitwidth = dtype.bitwidth() + if dtype == DataType["BIPOLAR"]: + int_value = int((value + 1) // 2) + else: + if dtype.is_fixed_point(): + value = value / dtype.scale_factor() + int_value = int(value) + if int_value < 0: + int_value += 1 << bitwidth + return int_value & ((1 << bitwidth) - 1) + + def _pack_cls_token(self, model): + dtype = self.get_input_datatype() + bitwidth = dtype.bitwidth() + num_channels = self.get_nodeattr("NumChannels") + cls_token = model.get_initializer(self.onnx_node.input[1]) + if cls_token is None: + raise Exception("AddCLSToken RTL generation requires a constant CLS token input.") + + cls_token = np.asarray(cls_token, dtype=np.float32) + assert cls_token.shape == self.get_normal_input_shape( + 1 + ), "CLS token shape does not match AddCLSToken attributes." + assert np.vectorize(dtype.allowed)(cls_token).all(), ( + "CLS token values cannot be represented with %s" % dtype.name + ) + packed = 0 + for i, value in enumerate(cls_token.flatten()): + packed |= self._pack_value(value, dtype) << (i * bitwidth) + return "%d'h%x" % (num_channels * bitwidth, packed) + + def generate_hdl(self, model, fpgapart, clk): + simd = self.get_nodeattr("SIMD") + num_channels = self.get_nodeattr("NumChannels") + assert num_channels % simd == 0, "SIMD must divide NumChannels" + + rtlsrc = _rtlsrc_dir() + template_path = rtlsrc + "/addclstoken_template.v" + with open(template_path, "r") as f: + template = f.read() + + topname = self.get_verilog_top_module_name() + self.set_nodeattr("gen_top_module", topname) + + elem_width = self.get_input_datatype().bitwidth() + fold_width = elem_width * simd + code_gen_dict = { + "TOP_MODULE_NAME": topname, + "NUM_TOKENS": self.get_nodeattr("NumTokens"), + "NUM_CHANNELS": num_channels, + "SIMD": simd, + "ELEM_WIDTH": elem_width, + "PAD_TOKENS": self.get_nodeattr("PadTokens"), + "FOLD_WIDTH": fold_width, + "CLS_WIDTH": num_channels * elem_width, + "CLS_DATA": self._pack_cls_token(model), + } + + for key, value in code_gen_dict.items(): + template = template.replace("$%s$" % key, str(value)) + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + with open(os.path.join(code_gen_dir, topname + ".v"), "w") as f: + f.write(template) + shutil.copy(rtlsrc + "/addclstoken.sv", code_gen_dir) + + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + + def prepare_rtlsim(self): + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + verilog_files = [ + "addclstoken.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=[code_gen_dir], + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_nodeattr("gen_top_module"), + ) + self.set_nodeattr("rtlsim_so", sim.lib._name) + return sim + + def code_generation_ipi(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + sourcefiles = [ + "addclstoken.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % f] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] + return cmd + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + if mode == "cppsim": + AddCLSToken.execute_node(self, context, graph) + elif mode == "rtlsim": + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + exp_ishape = self.get_normal_input_shape(0) + exp_oshape = self.get_normal_output_shape() + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == exp_ishape, "Input shape does not match expected shape." + + folded_ishape = self.get_folded_input_shape(0) + np.save(os.path.join(code_gen_dir, "input_0.npy"), inp.reshape(folded_ishape).copy()) + + sim = self.get_rtlsim() + export_idt = self.get_input_datatype() + rtlsim_inp = npy_to_rtlsim_input( + os.path.join(code_gen_dir, "input_0.npy"), + export_idt, + self.get_instream_width(), + ) + self.reset_rtlsim(sim) + self.toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + + odt = self.get_output_datatype() + out_npy = rtlsim_output_to_npy( + rtlsim_output, + os.path.join(code_gen_dir, "output.npy"), + odt, + self.get_folded_output_shape(), + self.get_outstream_width(), + odt.bitwidth(), + ) + context[node.output[0]] = np.asarray(out_npy, dtype=np.float32).reshape(exp_oshape) + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following values ("cppsim", "rtlsim")""".format( + mode + ) + ) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index e14181b140..e486b19ce4 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1196,6 +1196,85 @@ def apply(self, model): return (model, graph_modified) +class InferAddCLSTokenLayer(Transformation): + """Convert Concat([cls_token, patches], axis=1) into AddCLSToken.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type != "Concat": + continue + + axis = get_by_name(node.attribute, "axis") + if axis is None or len(node.input) != 2: + continue + + cls_name = node.input[0] + patch_name = node.input[1] + cls_init = model.get_initializer(cls_name) + if cls_init is None or model.get_initializer(patch_name) is not None: + continue + + cls_shape = model.get_tensor_shape(cls_name) + if cls_shape is None: + cls_shape = list(cls_init.shape) + patch_shape = model.get_tensor_shape(patch_name) + if cls_shape is None or patch_shape is None: + continue + if any(x is None for x in list(cls_shape) + list(patch_shape)): + continue + + rank = len(patch_shape) + concat_axis = axis.i if axis.i >= 0 else axis.i + rank + if rank != 3 or concat_axis != 1: + continue + + if len(cls_shape) != 3 or cls_shape[0] != 1 or cls_shape[1] != 1: + continue + if patch_shape[0] != 1 or cls_shape[2] != patch_shape[2]: + continue + + out_shape = model.get_tensor_shape(node.output[0]) + exp_oshape = [1, patch_shape[1] + 1, patch_shape[2]] + if out_shape is not None and list(out_shape) != exp_oshape: + continue + + idt = model.get_tensor_datatype(patch_name) + if idt is None or not idt.is_integer(): + continue + cls_dt = model.get_tensor_datatype(cls_name) + if cls_dt is None: + model.set_tensor_datatype(cls_name, idt) + elif cls_dt != idt: + continue + + new_node = helper.make_node( + "AddCLSToken", + [patch_name, cls_name], + node.output, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="AddCLSToken_" + node.name, + NumTokens=int(patch_shape[1]), + NumChannels=int(patch_shape[2]), + PadTokens=0, + SIMD=1, + inputDataType=idt.name, + outputDataType=idt.name, + ) + graph.node.insert(node_ind, new_node) + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferStreamingEltwise(Transformation): """Convert eltwise Sub or Sub -> Abs to StreamingEltwise layer with SubEltwise or AbsDiffEltwise op.""" diff --git a/src/finn/transformation/fpgadataflow/specialize_layers.py b/src/finn/transformation/fpgadataflow/specialize_layers.py index dbcadd1df5..ac26028106 100644 --- a/src/finn/transformation/fpgadataflow/specialize_layers.py +++ b/src/finn/transformation/fpgadataflow/specialize_layers.py @@ -311,6 +311,7 @@ def apply(self, model): node.input, node.output, domain="finn.custom_op.fpgadataflow." + impl_style, + name=node.name, ) # add all attributes for attribute in node.attribute: diff --git a/src/finn/util/vivado.py b/src/finn/util/vivado.py index bc8ca40d88..14cdba54df 100644 --- a/src/finn/util/vivado.py +++ b/src/finn/util/vivado.py @@ -27,10 +27,27 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os +import re from finn.util.basic import launch_process_helper, which +def _extract_util_from_report(vivado_proj_folder, row_name): + """Extract the Used column for a row in Vivado's utilization report.""" + + log_path = os.path.join(vivado_proj_folder, "vivado.log") + if not os.path.isfile(log_path): + return None + + row_pattern = re.compile(r"^\|\s*%s\s*\|\s*([0-9.]+)\s*\|" % re.escape(row_name)) + with open(log_path, "r") as f: + for line in f: + match = row_pattern.match(line) + if match is not None: + return float(match.group(1)) + return None + + def out_of_context_synth( verilog_dir, top_name, @@ -48,16 +65,17 @@ def out_of_context_synth( raise Exception("vivado is not in PATH, ensure settings64.sh is sourced.") omx_path = os.environ["OHMYXILINX"] script = "vivadocompile.sh" - # vivadocompile.sh - call_omx = "zsh %s/%s %s %s %s %f" % ( - omx_path, - script, + # vivadocompile.sh + # + call_omx = [ + "zsh", + os.path.join(omx_path, script), top_name, + "", clk_name, fpga_part, - float(clk_period_ns), - ) - call_omx = call_omx.split() + "%f" % float(clk_period_ns), + ] launch_process_helper(call_omx, proc_env=os.environ.copy(), cwd=verilog_dir) vivado_proj_folder = "%s/results_%s" % (verilog_dir, top_name) @@ -67,13 +85,23 @@ def out_of_context_synth( res_data = myfile.read().split("\n") ret = {} ret["vivado_proj_folder"] = vivado_proj_folder + util_report_rows = { + "DSP": "DSPs", + } for res_line in res_data: res_fields = res_line.split("=") print(res_fields) try: ret[res_fields[0]] = float(res_fields[1]) except ValueError: - ret[res_fields[0]] = 0 + util_value = None + if res_fields[0] in util_report_rows: + util_value = _extract_util_from_report( + vivado_proj_folder, util_report_rows[res_fields[0]] + ) + if util_value is None: + raise + ret[res_fields[0]] = util_value except IndexError: ret[res_fields[0]] = 0 if ret["WNS"] == 0: diff --git a/tests/fpgadataflow/test_fpgadataflow_addclstoken.py b/tests/fpgadataflow/test_fpgadataflow_addclstoken.py new file mode 100644 index 0000000000..07caadc99f --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_addclstoken.py @@ -0,0 +1,299 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +import os +from functools import partial +from onnx import TensorProto, helper, numpy_helper +from pathlib import Path +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames + +from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer +from finn.analysis.fpgadataflow.res_estimation import ( + res_estimation, + res_estimation_complete, +) +from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.convert_to_hw_layers import InferAddCLSTokenLayer +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext + +FPGA_PART = "xc7z020clg400-1" +CLK_NS = 10 + + +def _make_graph(nodes, output_shape, cls_values, finn_dtype=DataType["INT8"]): + patch_shape = [1, 3, 4] + patches = helper.make_tensor_value_info("patches", TensorProto.FLOAT, patch_shape) + output = helper.make_tensor_value_info("out", TensorProto.FLOAT, output_shape) + cls_init = numpy_helper.from_array(cls_values.astype(np.float32), name="cls") + graph = helper.make_graph(nodes, "addclstoken_test", [patches], [output], [cls_init]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)]) + model = ModelWrapper(model) + for tensor_name in ["patches", "cls", "out"]: + model.set_tensor_datatype(tensor_name, finn_dtype) + return model + + +def _make_concat_model(): + cls_values = np.asarray([[[1, -2, 3, -4]]], dtype=np.float32) + concat = helper.make_node( + "Concat", + ["cls", "patches"], + ["out"], + axis=1, + name="concat_cls", + ) + model = _make_graph([concat], [1, 4, 4], cls_values) + return model, cls_values + + +def _make_addclstoken_model( + pad_tokens=0, + simd=1, + finn_dtype=DataType["INT8"], + cls_values=None, +): + if cls_values is None: + cls_values = np.asarray([[[1, -2, 3, -4]]], dtype=np.float32) + addcls = helper.make_node( + "AddCLSToken", + ["patches", "cls"], + ["out"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="AddCLSToken_0", + NumTokens=3, + NumChannels=4, + PadTokens=pad_tokens, + SIMD=simd, + inputDataType=finn_dtype.name, + outputDataType=finn_dtype.name, + ) + model = _make_graph([addcls], [1, 4 + pad_tokens, 4], cls_values, finn_dtype) + return model, cls_values + + +def _prepare_addclstoken_stitched_ip_model(simd=1, pad_tokens=0): + model, cls_values = _make_addclstoken_model(pad_tokens=pad_tokens, simd=simd) + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(InsertFIFO(create_shallow_fifos=True)) + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(FPGA_PART, CLK_NS)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(FPGA_PART, CLK_NS, vitis=False)) + return model, cls_values + + +@pytest.mark.fpgadataflow +def test_convert_concat_to_addclstoken(): + model, cls_values = _make_concat_model() + patches = np.arange(12, dtype=np.float32).reshape(1, 3, 4) + expected = np.concatenate([cls_values, patches], axis=1) + + ret = execute_onnx(model, {"patches": patches}) + assert (ret["out"] == expected).all() + + model = model.transform(InferAddCLSTokenLayer()) + node = model.graph.node[0] + assert node.op_type == "AddCLSToken" + assert node.domain == "finn.custom_op.fpgadataflow" + assert list(node.input) == ["patches", "cls"] + + inst = getCustomOp(node) + assert inst.get_normal_output_shape() == (1, 4, 4) + assert inst.get_exp_cycles() == 16 + + ret = execute_onnx(model, {"patches": patches}) + assert (ret["out"] == expected).all() + + model = model.transform(SpecializeLayers("xc7z020clg400-1")) + assert model.graph.node[0].op_type == "AddCLSToken_rtl" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.rtl" + assert model.graph.node[0].name == "AddCLSToken_concat_cls" + + +@pytest.mark.fpgadataflow +def test_addclstoken_python_execution_with_padding(): + model, cls_values = _make_addclstoken_model(pad_tokens=2) + patches = np.arange(12, dtype=np.float32).reshape(1, 3, 4) + expected = np.concatenate( + [cls_values, patches, np.zeros((1, 2, 4), dtype=np.float32)], + axis=1, + ) + + ret = execute_onnx(model, {"patches": patches}) + assert (ret["out"] == expected).all() + + +@pytest.mark.fpgadataflow +@pytest.mark.parametrize( + "finn_dtype,cls_values,expected_cls_data", + [ + (DataType["INT8"], np.asarray([[[1, -2, 3, -4]]], dtype=np.float32), "32'hfc03fe01"), + (DataType["UINT4"], np.asarray([[[1, 2, 3, 4]]], dtype=np.float32), "16'h4321"), + (DataType["BIPOLAR"], np.asarray([[[1, -1, 1, -1]]], dtype=np.float32), "4'h5"), + ], +) +def test_addclstoken_rtl_codegen(tmp_path, monkeypatch, finn_dtype, cls_values, expected_cls_data): + if "FINN_ROOT" not in os.environ: + monkeypatch.setenv("FINN_ROOT", str(Path(__file__).resolve().parents[2])) + + model, _ = _make_addclstoken_model( + pad_tokens=1, + simd=2, + finn_dtype=finn_dtype, + cls_values=cls_values, + ) + model = model.transform(SpecializeLayers("xc7z020clg400-1")) + + node = model.graph.node[0] + inst = getCustomOp(node) + inst.set_nodeattr("code_gen_dir_ipgen", str(tmp_path)) + inst.code_generation_ipgen(model, "xc7z020clg400-1", 10) + + topname = inst.get_nodeattr("gen_top_module") + assert topname == "AddCLSToken_0" + wrapper = tmp_path / (topname + ".v") + core = tmp_path / "addclstoken.sv" + assert wrapper.is_file() + assert core.is_file() + wrapper_text = wrapper.read_text() + assert "parameter FOLD_WIDTH = %d" % (2 * finn_dtype.bitwidth()) in wrapper_text + assert ".SIMD(2)" in wrapper_text + assert ".PAD_TOKENS(1)" in wrapper_text + assert "CLS_DATA = %s" % expected_cls_data in wrapper_text + assert "= '0" not in wrapper_text + + ipi_cmds = inst.code_generation_ipi() + assert any("addclstoken.sv" in cmd for cmd in ipi_cmds) + assert any("create_bd_cell" in cmd and topname in cmd for cmd in ipi_cmds) + + +@pytest.mark.fpgadataflow +def test_addclstoken_resource_estimation(): + model, _ = _make_addclstoken_model(pad_tokens=1, simd=2) + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) + + expected = { + "BRAM_18K": 0, + "BRAM_efficiency": 1, + "LUT": 132, + "URAM": 0, + "URAM_efficiency": 1, + "DSP": 0, + } + resources = model.analysis(partial(res_estimation, fpgapart=FPGA_PART)) + assert len(resources) == 1 + assert list(resources.values())[0] == expected + + complete_resources = model.analysis(partial(res_estimation_complete, fpgapart=FPGA_PART)) + assert len(complete_resources) == 1 + assert list(complete_resources.values())[0] == [expected] + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.parametrize("simd,pad_tokens", [(1, 0), (2, 1)]) +def test_addclstoken_rtlsim(simd, pad_tokens): + model, cls_values = _make_addclstoken_model(pad_tokens=pad_tokens, simd=simd) + patches = np.arange(12, dtype=np.float32).reshape(1, 3, 4) + expected_values = [cls_values, patches] + if pad_tokens > 0: + expected_values.append(np.zeros((1, pad_tokens, 4), dtype=np.float32)) + expected = np.concatenate(expected_values, axis=1) + + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(FPGA_PART, CLK_NS)) + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + + ret = execute_onnx(model, {"patches": patches}) + assert (ret["out"] == expected).all() + + node = model.get_nodes_by_op_type("AddCLSToken_rtl")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) + assert exp_cycles != 0 + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.parametrize("simd,pad_tokens", [(1, 0), (2, 1)]) +def test_addclstoken_stitched_ip_rtlsim(simd, pad_tokens): + model, cls_values = _prepare_addclstoken_stitched_ip_model( + simd=simd, + pad_tokens=pad_tokens, + ) + patches = np.arange(12, dtype=np.float32).reshape(1, 3, 4) + expected_values = [cls_values, patches] + if pad_tokens > 0: + expected_values.append(np.zeros((1, pad_tokens, 4), dtype=np.float32)) + expected = np.concatenate(expected_values, axis=1) + + model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("extra_verilator_args", str(["-Wno-TIMESCALEMOD"])) + + ret = execute_onnx(model, {"patches": patches}) + assert (ret["out"] == expected).all() + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_addclstoken_stitched_ip_synth_ooc(): + model, _ = _prepare_addclstoken_stitched_ip_model(simd=2, pad_tokens=1) + model = model.transform(SynthOutOfContext(FPGA_PART, CLK_NS)) + ret = model.get_metadata_prop("res_total_ooc_synth") + assert ret is not None + ret = eval(ret) + + assert ret["LUT"] > 0 + assert ret["FF"] > 0 + assert ret["DSP"] == 0 + assert ret["BRAM"] == 0 + assert ret["WNS"] >= 0 From a3eac3899098d61f2a6ec08019f468ed7c2d8a7a Mon Sep 17 00:00:00 2001 From: ollycassidy13 Date: Wed, 29 Apr 2026 09:57:21 +0100 Subject: [PATCH 2/5] header --- finn-rtllib/addclstoken/hdl/addclstoken.sv | 37 ++++++++-------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/finn-rtllib/addclstoken/hdl/addclstoken.sv b/finn-rtllib/addclstoken/hdl/addclstoken.sv index 768b2a9a06..d5bbdc2188 100644 --- a/finn-rtllib/addclstoken/hdl/addclstoken.sv +++ b/finn-rtllib/addclstoken/hdl/addclstoken.sv @@ -1,33 +1,22 @@ -/****************************************************************************** +/**************************************************************************** * Copyright (C) 2026, Advanced Micro Devices, Inc. * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * SPDX-License-Identifier: BSD-3-Clause * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. + * @brief Insert a constant class token into a folded token stream. + * @author Oliver Cassidy * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * @description + * Prepends a learned class token, supplied through cls_data, to each + * input sequence of patch tokens. The class token and patch tokens are + * transferred as SIMD-wide folds of ELEM_WIDTH-bit elements. * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - *****************************************************************************/ + * Per sequence, the output stream is: + * 1. NUM_CHANNELS/SIMD folds from cls_data + * 2. NUM_TOKENS pass-through input tokens + * 3. PAD_TOKENS zero-valued tokens, when padding is enabled + ***************************************************************************/ module addclstoken #( parameter int unsigned NUM_TOKENS = 196, From 598b572c9e5f6593a2e4e9d7629f02c9a24abe83 Mon Sep 17 00:00:00 2001 From: ollycassidy13 Date: Wed, 29 Apr 2026 20:52:23 +0100 Subject: [PATCH 3/5] select token initial commit --- .../finn.custom_op.fpgadataflow.rst | 8 + .../finn.custom_op.fpgadataflow.rtl.rst | 8 + finn-rtllib/selecttoken/hdl/select_token.sv | 82 ++++++ .../selecttoken/hdl/select_token_template.v | 78 +++++ src/finn/builder/build_dataflow_steps.py | 1 + src/finn/custom_op/fpgadataflow/__init__.py | 4 +- .../custom_op/fpgadataflow/rtl/__init__.py | 2 + .../fpgadataflow/rtl/selecttoken_rtl.py | 133 +++++++++ .../custom_op/fpgadataflow/selecttoken.py | 155 ++++++++++ .../fpgadataflow/convert_to_hw_layers.py | 82 +++++- .../test_fpgadataflow_selecttoken.py | 267 ++++++++++++++++++ 11 files changed, 818 insertions(+), 2 deletions(-) create mode 100644 finn-rtllib/selecttoken/hdl/select_token.sv create mode 100644 finn-rtllib/selecttoken/hdl/select_token_template.v create mode 100644 src/finn/custom_op/fpgadataflow/rtl/selecttoken_rtl.py create mode 100644 src/finn/custom_op/fpgadataflow/selecttoken.py create mode 100644 tests/fpgadataflow/test_fpgadataflow_selecttoken.py diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst index 26a2073e4a..6cefa2f15d 100644 --- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst +++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst @@ -119,6 +119,14 @@ finn.custom\_op.fpgadataflow.labelselect :undoc-members: :show-inheritance: +finn.custom\_op.fpgadataflow.selecttoken +----------------------------------------- + +.. automodule:: finn.custom_op.fpgadataflow.selecttoken + :members: + :undoc-members: + :show-inheritance: + finn.custom\_op.fpgadataflow.lookup ----------------------------------------------- diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst index 859a789f2f..26834ec610 100644 --- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst +++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rtl.rst @@ -45,6 +45,14 @@ finn.custom\_op.fpgadataflow.streamingdatawidthconverter\_rtl :undoc-members: :show-inheritance: +finn.custom\_op.fpgadataflow.selecttoken\_rtl +--------------------------------------------------------------- + +.. automodule:: finn.custom_op.fpgadataflow.rtl.selecttoken_rtl + :members: + :undoc-members: + :show-inheritance: + finn.custom\_op.fpgadataflow.streamingfifo\_rtl ------------------------------------------------- diff --git a/finn-rtllib/selecttoken/hdl/select_token.sv b/finn-rtllib/selecttoken/hdl/select_token.sv new file mode 100644 index 0000000000..fb4c3df800 --- /dev/null +++ b/finn-rtllib/selecttoken/hdl/select_token.sv @@ -0,0 +1,82 @@ +/**************************************************************************** + * Copyright (C) 2026, Advanced Micro Devices, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + * @brief Select one token from a folded token stream. + * @author Oliver Cassidy + * + * @description + * Consumes NUM_TOKENS token vectors fold-by-fold. Folds belonging to + * TOKEN_INDEX are forwarded to the output stream; all other folds are + * consumed and discarded. + ***************************************************************************/ + +module select_token #( + parameter int unsigned NUM_TOKENS = 197, + parameter int unsigned NUM_CHANNELS = 192, + parameter int unsigned SIMD = 1, + parameter int unsigned ELEM_WIDTH = 8, + parameter int unsigned TOKEN_INDEX = 0 +)( + input logic clk, + input logic rst, + + output logic irdy, + input logic ivld, + input logic [SIMD*ELEM_WIDTH-1:0] idat, + + input logic ordy, + output logic ovld, + output logic [SIMD*ELEM_WIDTH-1:0] odat +); + + localparam int unsigned FOLDS_PER_TOKEN = NUM_CHANNELS / SIMD; + localparam int unsigned TOKEN_CNT_WIDTH = (NUM_TOKENS <= 1) ? 1 : $clog2(NUM_TOKENS); + localparam int unsigned FOLD_CNT_WIDTH = + (FOLDS_PER_TOKEN <= 1) ? 1 : $clog2(FOLDS_PER_TOKEN); + + logic [TOKEN_CNT_WIDTH-1:0] token_cnt; + logic [FOLD_CNT_WIDTH-1:0] fold_cnt; + logic is_selected; + logic in_transfer; + logic fold_cnt_last; + logic token_cnt_last; + + assign is_selected = (int'(token_cnt) == TOKEN_INDEX); + assign in_transfer = irdy & ivld; + assign fold_cnt_last = (int'(fold_cnt) == FOLDS_PER_TOKEN - 1); + assign token_cnt_last = (int'(token_cnt) == NUM_TOKENS - 1); + + always_comb begin + irdy = 1'b1; + ovld = 1'b0; + odat = '0; + + if (is_selected) begin + irdy = ordy; + ovld = ivld; + odat = idat; + end + end + + always_ff @(posedge clk) begin + if (rst) begin + token_cnt <= '0; + fold_cnt <= '0; + end else if (in_transfer) begin + if (fold_cnt_last) begin + fold_cnt <= '0; + if (token_cnt_last) begin + token_cnt <= '0; + end else begin + token_cnt <= token_cnt + 1'b1; + end + end else begin + fold_cnt <= fold_cnt + 1'b1; + end + end + end + +endmodule diff --git a/finn-rtllib/selecttoken/hdl/select_token_template.v b/finn-rtllib/selecttoken/hdl/select_token_template.v new file mode 100644 index 0000000000..566fa63ac5 --- /dev/null +++ b/finn-rtllib/selecttoken/hdl/select_token_template.v @@ -0,0 +1,78 @@ +/****************************************************************************** + * Copyright (C) 2026, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +module $TOP_MODULE_NAME$ #( + parameter FOLD_WIDTH = $FOLD_WIDTH$, + parameter AXI_WIDTH = ((FOLD_WIDTH + 7) / 8) * 8 +)( + (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) + (* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out0_V, ASSOCIATED_RESET ap_rst_n" *) + input ap_clk, + (* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) + input ap_rst_n, + + output in0_V_TREADY, + input in0_V_TVALID, + input [AXI_WIDTH-1:0] in0_V_TDATA, + + input out0_V_TREADY, + output out0_V_TVALID, + output [AXI_WIDTH-1:0] out0_V_TDATA +); + + wire [FOLD_WIDTH-1:0] core_out; + + assign out0_V_TDATA[FOLD_WIDTH-1:0] = core_out; + + generate + if (AXI_WIDTH > FOLD_WIDTH) begin : gen_pad_tdata + assign out0_V_TDATA[AXI_WIDTH-1:FOLD_WIDTH] = {(AXI_WIDTH-FOLD_WIDTH){1'b0}}; + end + endgenerate + + select_token #( + .NUM_TOKENS($NUM_TOKENS$), + .NUM_CHANNELS($NUM_CHANNELS$), + .SIMD($SIMD$), + .ELEM_WIDTH($ELEM_WIDTH$), + .TOKEN_INDEX($TOKEN_INDEX$) + ) impl ( + .clk(ap_clk), + .rst(!ap_rst_n), + .irdy(in0_V_TREADY), + .ivld(in0_V_TVALID), + .idat(in0_V_TDATA[FOLD_WIDTH-1:0]), + .ordy(out0_V_TREADY), + .ovld(out0_V_TVALID), + .odat(core_out) + ); + +endmodule diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index ca15a01c07..e84b997b2e 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -539,6 +539,7 @@ def apply_if_relevant(model, op_types, transform, desc=""): ) # Lookup layers + model = apply_if_relevant(model, ["Gather"], to_hw.InferSelectTokenLayer(), "token selection") model = apply_if_relevant(model, ["Gather"], to_hw.InferLookupLayer(), "lookup layers") # Activation functions diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index c00a1d5054..4dc93e7dd6 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -71,6 +71,7 @@ def register_custom_op(cls): from finn.custom_op.fpgadataflow.outer_shuffle import OuterShuffle from finn.custom_op.fpgadataflow.pool import Pool from finn.custom_op.fpgadataflow.requant import Requant +from finn.custom_op.fpgadataflow.selecttoken import SelectToken from finn.custom_op.fpgadataflow.shuffle import Shuffle from finn.custom_op.fpgadataflow.split import StreamingSplit from finn.custom_op.fpgadataflow.streamingdataflowpartition import ( @@ -105,10 +106,11 @@ def register_custom_op(cls): custom_op["Lookup"] = Lookup custom_op["OuterShuffle"] = OuterShuffle custom_op["Pool"] = Pool +custom_op["Requant"] = Requant +custom_op["SelectToken"] = SelectToken custom_op["Shuffle"] = Shuffle custom_op["StreamingConcat"] = StreamingConcat custom_op["StreamingSplit"] = StreamingSplit custom_op["StreamingDataWidthConverter"] = StreamingDataWidthConverter custom_op["UpsampleNearestNeighbour"] = UpsampleNearestNeighbour custom_op["HWSoftmax"] = HWSoftmax -custom_op["Requant"] = Requant diff --git a/src/finn/custom_op/fpgadataflow/rtl/__init__.py b/src/finn/custom_op/fpgadataflow/rtl/__init__.py index fd3df3fbb7..10deceb9c3 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/__init__.py +++ b/src/finn/custom_op/fpgadataflow/rtl/__init__.py @@ -41,6 +41,7 @@ from finn.custom_op.fpgadataflow.rtl.layernorm_rtl import LayerNorm_rtl from finn.custom_op.fpgadataflow.rtl.matrixvectoractivation_rtl import MVAU_rtl from finn.custom_op.fpgadataflow.rtl.requant_rtl import Requant_rtl +from finn.custom_op.fpgadataflow.rtl.selecttoken_rtl import SelectToken_rtl from finn.custom_op.fpgadataflow.rtl.streamingdatawidthconverter_rtl import ( StreamingDataWidthConverter_rtl, ) @@ -62,6 +63,7 @@ custom_op["StreamingDataWidthConverter_rtl"] = StreamingDataWidthConverter_rtl custom_op["StreamingFIFO_rtl"] = StreamingFIFO_rtl custom_op["MVAU_rtl"] = MVAU_rtl +custom_op["SelectToken_rtl"] = SelectToken_rtl custom_op["VVAU_rtl"] = VVAU_rtl custom_op["Thresholding_rtl"] = Thresholding_rtl custom_op["InnerShuffle_rtl"] = InnerShuffle_rtl diff --git a/src/finn/custom_op/fpgadataflow/rtl/selecttoken_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/selecttoken_rtl.py new file mode 100644 index 0000000000..c429f6e4ed --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/rtl/selecttoken_rtl.py @@ -0,0 +1,133 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import shutil + +from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend +from finn.custom_op.fpgadataflow.selecttoken import SelectToken + + +def _rtlsrc_dir(): + return os.environ["FINN_ROOT"] + "/finn-rtllib/selecttoken/hdl" + + +class SelectToken_rtl(SelectToken, RTLBackend): + """RTL implementation of SelectToken.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(SelectToken.get_nodeattr_types(self)) + my_attrs.update(RTLBackend.get_nodeattr_types(self)) + return my_attrs + + def generate_hdl(self, model, fpgapart, clk): + simd = self.get_nodeattr("SIMD") + num_channels = self.get_nodeattr("NumChannels") + token_index = self.get_nodeattr("TokenIndex") + num_tokens = self.get_nodeattr("NumTokens") + if token_index < 0: + token_index += num_tokens + assert num_channels % simd == 0, "SIMD must divide NumChannels" + assert 0 <= token_index < num_tokens, "TokenIndex must select an existing token" + + rtlsrc = _rtlsrc_dir() + template_path = rtlsrc + "/select_token_template.v" + with open(template_path, "r") as f: + template = f.read() + + topname = self.get_verilog_top_module_name() + self.set_nodeattr("gen_top_module", topname) + + elem_width = self.get_input_datatype().bitwidth() + fold_width = elem_width * simd + code_gen_dict = { + "TOP_MODULE_NAME": topname, + "NUM_TOKENS": num_tokens, + "NUM_CHANNELS": num_channels, + "SIMD": simd, + "ELEM_WIDTH": elem_width, + "TOKEN_INDEX": token_index, + "FOLD_WIDTH": fold_width, + } + + for key, value in code_gen_dict.items(): + template = template.replace("$%s$" % key, str(value)) + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + with open(os.path.join(code_gen_dir, topname + ".v"), "w") as f: + f.write(template) + shutil.copy(rtlsrc + "/select_token.sv", code_gen_dir) + + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = _rtlsrc_dir() + "/" + else: + code_gen_dir = "" + rtllib_dir = "" + + verilog_files = [ + rtllib_dir + "select_token.sv", + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", + ] + return verilog_files + + def code_generation_ipi(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + sourcefiles = self.get_rtl_file_list() + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % f] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] + return cmd + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + if mode == "cppsim": + SelectToken.execute_node(self, context, graph) + elif mode == "rtlsim": + RTLBackend.execute_node(self, context, graph) + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following values ("cppsim", "rtlsim")""".format( + mode + ) + ) diff --git a/src/finn/custom_op/fpgadataflow/selecttoken.py b/src/finn/custom_op/fpgadataflow/selecttoken.py new file mode 100644 index 0000000000..8139fbfbc8 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/selecttoken.py @@ -0,0 +1,155 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import warnings +from qonnx.core.datatype import DataType + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp + + +class SelectToken(HWCustomOp): + """Select one token vector from a sequence of token vectors.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = super().get_nodeattr_types() + my_attrs.update( + { + "NumTokens": ("i", True, 0), + "NumChannels": ("i", True, 0), + "TokenIndex": ("i", True, 0), + "SIMD": ("i", False, 1), + "inputDataType": ("s", True, ""), + "outputDataType": ("s", False, ""), + } + ) + return my_attrs + + def get_normal_input_shape(self, ind=0): + if ind != 0: + raise Exception("SelectToken only has one input") + return (1, self.get_nodeattr("NumTokens"), self.get_nodeattr("NumChannels")) + + def get_folded_input_shape(self, ind=0): + normal_shape = self.get_normal_input_shape(ind) + simd = self.get_nodeattr("SIMD") + num_channels = normal_shape[-1] + assert num_channels % simd == 0, "SIMD must divide NumChannels" + return normal_shape[:-1] + (num_channels // simd, simd) + + def get_normal_output_shape(self, ind=0): + return (1, self.get_nodeattr("NumChannels")) + + def get_folded_output_shape(self, ind=0): + normal_shape = self.get_normal_output_shape(ind) + simd = self.get_nodeattr("SIMD") + num_channels = normal_shape[-1] + assert num_channels % simd == 0, "SIMD must divide NumChannels" + return normal_shape[:-1] + (num_channels // simd, simd) + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape for token sequence." + return super().make_const_shape_op(self.get_normal_output_shape()) + + def infer_node_datatype(self, model): + node = self.onnx_node + attr_idt = None + if self.get_nodeattr("inputDataType") != "": + attr_idt = self.get_input_datatype() + + idt = model.get_tensor_datatype(node.input[0]) + if idt is None: + idt = attr_idt + if idt is None: + raise Exception("SelectToken input datatype is not set") + + if attr_idt is not None and attr_idt != idt: + warnings.warn( + "inputDataType changing for %s: %s -> %s" % (node.name, str(attr_idt), str(idt)) + ) + self.set_nodeattr("inputDataType", idt.name) + + attr_odt = self.get_nodeattr("outputDataType") + if attr_odt != "" and DataType[attr_odt] != idt: + warnings.warn( + "outputDataType changing for %s: %s -> %s" + % (node.name, str(DataType[attr_odt]), str(idt)) + ) + self.set_nodeattr("outputDataType", idt.name) + model.set_tensor_datatype(node.output[0], idt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self, ind=0): + odt = self.get_nodeattr("outputDataType") + if odt == "": + return self.get_input_datatype(ind) + return DataType[odt] + + def get_instream_width(self, ind=0): + if ind != 0: + return 0 + return self.get_input_datatype().bitwidth() * self.get_nodeattr("SIMD") + + def get_outstream_width(self, ind=0): + return self.get_output_datatype().bitwidth() * self.get_nodeattr("SIMD") + + def get_number_output_values(self): + return int(np.prod(self.get_folded_output_shape()[:-1])) + + def get_exp_cycles(self): + return int(np.prod(self.get_folded_input_shape()[:-1])) + + def execute_node(self, context, graph): + node = self.onnx_node + inp = context[node.input[0]] + token_index = self.get_nodeattr("TokenIndex") + num_tokens = self.get_nodeattr("NumTokens") + if token_index < 0: + token_index += num_tokens + assert 0 <= token_index < num_tokens, "TokenIndex must select an existing token." + + result = inp[:, token_index, :] + context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape( + self.get_normal_output_shape() + ) + + def bram_estimation(self): + return 0 + + def lut_estimation(self): + return 200 diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 994905c9c6..2c73c88702 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1265,9 +1265,89 @@ def apply(self, model): return (model, graph_modified) +class InferSelectTokenLayer(Transformation): + """Convert scalar Gather(input, token_index, axis=1) into SelectToken.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type != "Gather": + continue + + axis = get_by_name(node.attribute, "axis") + if axis is None or len(node.input) != 2: + continue + + seq_name = node.input[0] + idx_name = node.input[1] + idx_init = model.get_initializer(idx_name) + if idx_init is None or idx_init.size != 1: + continue + if model.get_initializer(seq_name) is not None: + continue + + seq_shape = model.get_tensor_shape(seq_name) + if seq_shape is None or any(x is None for x in seq_shape): + continue + + rank = len(seq_shape) + gather_axis = axis.i if axis.i >= 0 else axis.i + rank + if rank != 3 or gather_axis != 1: + continue + + token_index = int(idx_init.flatten()[0]) + num_tokens = int(seq_shape[1]) + if token_index < 0: + token_index += num_tokens + if token_index < 0 or token_index >= num_tokens: + continue + + out_shape = model.get_tensor_shape(node.output[0]) + exp_oshape = [int(seq_shape[0]), int(seq_shape[2])] + if out_shape is not None and list(out_shape) != exp_oshape: + continue + if seq_shape[0] != 1: + continue + + idt = model.get_tensor_datatype(seq_name) + if idt is None or not idt.is_integer(): + continue + odt = model.get_tensor_datatype(node.output[0]) + if odt is None: + odt = idt + elif odt != idt: + continue + + new_node = helper.make_node( + "SelectToken", + [seq_name], + node.output, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="SelectToken_" + node.name, + NumTokens=num_tokens, + NumChannels=int(seq_shape[2]), + TokenIndex=token_index, + SIMD=1, + inputDataType=idt.name, + outputDataType=odt.name, + ) + graph.node.insert(node_ind, new_node) + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferSplitLayer(Transformation): """Convert suitable Split nodes (operating on last/-1 axis) - into StreamingConcat HW layers.""" + into StreamingSplit HW layers.""" def apply(self, model): graph = model.graph diff --git a/tests/fpgadataflow/test_fpgadataflow_selecttoken.py b/tests/fpgadataflow/test_fpgadataflow_selecttoken.py new file mode 100644 index 0000000000..47709a7520 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_selecttoken.py @@ -0,0 +1,267 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from functools import partial +from onnx import TensorProto, helper, numpy_helper +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames + +from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer +from finn.analysis.fpgadataflow.res_estimation import ( + res_estimation, + res_estimation_complete, +) +from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.convert_to_hw_layers import InferSelectTokenLayer +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext + +FPGA_PART = "xc7z020clg400-1" +CLK_NS = 10 + + +def _make_graph(nodes, output_shape, idx_values=None, finn_dtype=DataType["INT8"]): + tokens_shape = [1, 4, 4] + tokens = helper.make_tensor_value_info("tokens", TensorProto.FLOAT, tokens_shape) + output = helper.make_tensor_value_info("out", TensorProto.FLOAT, output_shape) + initializers = [] + if idx_values is not None: + initializers.append(numpy_helper.from_array(idx_values, name="idx")) + graph = helper.make_graph(nodes, "selecttoken_test", [tokens], [output], initializers) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)]) + model = ModelWrapper(model) + for tensor_name in ["tokens", "out"]: + model.set_tensor_datatype(tensor_name, finn_dtype) + return model + + +def _make_gather_model(token_index=0): + idx_values = np.asarray(token_index, dtype=np.int64) + gather = helper.make_node( + "Gather", + ["tokens", "idx"], + ["out"], + axis=1, + name="gather_token", + ) + return _make_graph([gather], [1, 4], idx_values) + + +def _make_selecttoken_model(token_index=0, simd=1, finn_dtype=DataType["INT8"]): + select = helper.make_node( + "SelectToken", + ["tokens"], + ["out"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="SelectToken_0", + NumTokens=4, + NumChannels=4, + TokenIndex=token_index, + SIMD=simd, + inputDataType=finn_dtype.name, + outputDataType=finn_dtype.name, + ) + return _make_graph([select], [1, 4], None, finn_dtype) + + +def _prepare_selecttoken_stitched_ip_model(simd=1, token_index=0): + model = _make_selecttoken_model(token_index=token_index, simd=simd) + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(InsertFIFO(create_shallow_fifos=True)) + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(FPGA_PART, CLK_NS)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(FPGA_PART, CLK_NS, vitis=False)) + return model + + +@pytest.mark.fpgadataflow +def test_convert_gather_to_selecttoken(): + model = _make_gather_model(token_index=2) + tokens = np.arange(16, dtype=np.float32).reshape(1, 4, 4) + expected = tokens[:, 2, :] + + ret = execute_onnx(model, {"tokens": tokens}) + assert (ret["out"] == expected).all() + + model = model.transform(InferSelectTokenLayer()) + node = model.graph.node[0] + assert node.op_type == "SelectToken" + assert node.domain == "finn.custom_op.fpgadataflow" + assert list(node.input) == ["tokens"] + + inst = getCustomOp(node) + assert inst.get_normal_output_shape() == (1, 4) + assert inst.get_exp_cycles() == 16 + assert inst.get_nodeattr("TokenIndex") == 2 + + ret = execute_onnx(model, {"tokens": tokens}) + assert (ret["out"] == expected).all() + + model = model.transform(SpecializeLayers(FPGA_PART)) + assert model.graph.node[0].op_type == "SelectToken_rtl" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.rtl" + assert model.graph.node[0].name == "SelectToken_gather_token" + + +@pytest.mark.fpgadataflow +@pytest.mark.parametrize("token_index", [0, 1, 3]) +def test_selecttoken_python_execution(token_index): + model = _make_selecttoken_model(token_index=token_index) + tokens = np.arange(16, dtype=np.float32).reshape(1, 4, 4) + expected = tokens[:, token_index, :] + + ret = execute_onnx(model, {"tokens": tokens}) + assert (ret["out"] == expected).all() + + +@pytest.mark.fpgadataflow +@pytest.mark.parametrize( + "finn_dtype,fold_width", + [(DataType["INT8"], 16), (DataType["UINT4"], 8), (DataType["BIPOLAR"], 2)], +) +def test_selecttoken_rtl_codegen(tmp_path, finn_dtype, fold_width): + model = _make_selecttoken_model(token_index=3, simd=2, finn_dtype=finn_dtype) + model = model.transform(SpecializeLayers(FPGA_PART)) + + node = model.graph.node[0] + inst = getCustomOp(node) + inst.set_nodeattr("code_gen_dir_ipgen", str(tmp_path)) + inst.code_generation_ipgen(model, FPGA_PART, CLK_NS) + + topname = inst.get_nodeattr("gen_top_module") + assert topname == "SelectToken_0" + wrapper = tmp_path / (topname + ".v") + core = tmp_path / "select_token.sv" + assert wrapper.is_file() + assert core.is_file() + wrapper_text = wrapper.read_text() + assert "parameter FOLD_WIDTH = %d" % fold_width in wrapper_text + assert ".SIMD(2)" in wrapper_text + assert ".TOKEN_INDEX(3)" in wrapper_text + assert "select_token #(" in wrapper_text + assert "out0_V_TVALID" in wrapper_text + + ipi_cmds = inst.code_generation_ipi() + assert any("select_token.sv" in cmd for cmd in ipi_cmds) + assert any("create_bd_cell" in cmd and topname in cmd for cmd in ipi_cmds) + + +@pytest.mark.fpgadataflow +def test_selecttoken_resource_estimation(): + model = _make_selecttoken_model(token_index=1, simd=2) + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) + + expected = { + "BRAM_18K": 0, + "BRAM_efficiency": 1, + "LUT": 200, + "URAM": 0, + "URAM_efficiency": 1, + "DSP": 0, + } + resources = model.analysis(partial(res_estimation, fpgapart=FPGA_PART)) + assert len(resources) == 1 + assert list(resources.values())[0] == expected + + complete_resources = model.analysis(partial(res_estimation_complete, fpgapart=FPGA_PART)) + assert len(complete_resources) == 1 + assert list(complete_resources.values())[0] == [expected] + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.parametrize("simd,token_index", [(1, 0), (2, 3)]) +def test_selecttoken_rtlsim(simd, token_index): + model = _make_selecttoken_model(token_index=token_index, simd=simd) + tokens = np.arange(16, dtype=np.float32).reshape(1, 4, 4) + expected = tokens[:, token_index, :] + + model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(FPGA_PART, CLK_NS)) + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + + ret = execute_onnx(model, {"tokens": tokens}) + assert (ret["out"] == expected).all() + + node = model.get_nodes_by_op_type("SelectToken_rtl")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) + assert exp_cycles != 0 + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.parametrize("simd,token_index", [(1, 0), (2, 3)]) +def test_selecttoken_stitched_ip_rtlsim(simd, token_index): + model = _prepare_selecttoken_stitched_ip_model(simd=simd, token_index=token_index) + tokens = np.arange(16, dtype=np.float32).reshape(1, 4, 4) + expected = tokens[:, token_index, :] + + model.set_metadata_prop("exec_mode", "rtlsim") + + ret = execute_onnx(model, {"tokens": tokens}) + assert (ret["out"] == expected).all() + + +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_selecttoken_stitched_ip_synth_ooc(): + model = _prepare_selecttoken_stitched_ip_model(simd=2, token_index=1) + model = model.transform(SynthOutOfContext(FPGA_PART, CLK_NS)) + ret = model.get_metadata_prop("res_total_ooc_synth") + assert ret is not None + ret = eval(ret) + + assert ret["LUT"] > 0 + assert ret["FF"] > 0 + assert ret["DSP"] == 0 + assert ret["BRAM"] == 0 + assert ret["WNS"] >= 0 From 658149983145fbbe53ccc8de6bf6c7846806d4eb Mon Sep 17 00:00:00 2001 From: ollycassidy13 Date: Thu, 30 Apr 2026 16:58:07 +0100 Subject: [PATCH 4/5] Address AddCLSToken review comments --- docs/finn/components/rtl-swg.rst | 2 +- docs/finn/developers.rst | 2 +- docs/finn/source_code/finn.builder.rst | 2 +- docs/finn/source_code/finn.core.rst | 2 +- docs/finn/source_code/finn.rst | 2 +- .../fpgadataflow/rtl/addclstoken_rtl.py | 4 ++++ src/finn/custom_op/fpgadataflow/rtlbackend.py | 10 +++++++--- .../fpgadataflow/specialize_layers.py | 1 - .../test_fpgadataflow_addclstoken.py | 19 ++++++++++++------- 9 files changed, 28 insertions(+), 16 deletions(-) diff --git a/docs/finn/components/rtl-swg.rst b/docs/finn/components/rtl-swg.rst index e8db1d2fa7..8d48dc9d5a 100644 --- a/docs/finn/components/rtl-swg.rst +++ b/docs/finn/components/rtl-swg.rst @@ -96,7 +96,7 @@ Dynamic Mode The "default" style also supports a dynamic mode, which provides an interface to change feature map dimensions, stride, or dilation at run-time. See `this pull request `_ for more information. Folding -------- +======= The RTL SWG is supported by the basic automatic folding algorithm in FINN (:py:mod:`finn.transformation.fpgadataflow.set_folding.SetFolding`). Consider the following implications: diff --git a/docs/finn/developers.rst b/docs/finn/developers.rst index 985b86b279..a265c699c9 100644 --- a/docs/finn/developers.rst +++ b/docs/finn/developers.rst @@ -99,7 +99,7 @@ computer, and you should be able to launch the various .tcl scripts or .xpr proj Docker container as well. Linting -------- +======= We use a pre-commit hook to auto-format Python code and check for issues. See https://pre-commit.com/ for installation. Once you have pre-commit, you can install diff --git a/docs/finn/source_code/finn.builder.rst b/docs/finn/source_code/finn.builder.rst index e4dc810e81..caadf3f91f 100644 --- a/docs/finn/source_code/finn.builder.rst +++ b/docs/finn/source_code/finn.builder.rst @@ -3,7 +3,7 @@ Builder ******* Modules -~~~~~~~ +======= finn.builder.build\_dataflow ---------------------------- diff --git a/docs/finn/source_code/finn.core.rst b/docs/finn/source_code/finn.core.rst index 4f16b3ac74..28cb47eaf7 100644 --- a/docs/finn/source_code/finn.core.rst +++ b/docs/finn/source_code/finn.core.rst @@ -3,7 +3,7 @@ Core **** Modules -~~~~~~~ +======= qonnx.core.data\_layout ------------------------- diff --git a/docs/finn/source_code/finn.rst b/docs/finn/source_code/finn.rst index f67dd0fe9c..5547a46623 100644 --- a/docs/finn/source_code/finn.rst +++ b/docs/finn/source_code/finn.rst @@ -6,7 +6,7 @@ The FINN sources are divided into different modules. They are listed below. .. note:: **Some of these functions and modules are located in the `qonnx` repository.** Modules -~~~~~~~ +======= .. toctree:: :maxdepth: 1 diff --git a/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py index 7b3f810cad..8ca3daec88 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/addclstoken_rtl.py @@ -135,6 +135,10 @@ def get_rtl_file_list(self, abspath=False): ] return verilog_files + def get_rtlsim_input_indices(self): + """Only patch tokens are streamed; CLS token data is embedded in generated RTL.""" + return [0] + def code_generation_ipi(self): code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") sourcefiles = self.get_rtl_file_list() diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index 642523f2db..2b8db0310e 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -85,6 +85,10 @@ def code_generation_ipi(self): def code_generation_ipgen(self, model, fpgapart, clk): self.generate_hdl(model, fpgapart, clk) + def get_rtlsim_input_indices(self): + """Return ONNX input indices that are driven as RTLSim input streams.""" + return range(len(self.onnx_node.input)) + def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") @@ -92,10 +96,10 @@ def execute_node(self, context, graph): if mode == "rtlsim": node = self.onnx_node inputs = {} - for i, inp in enumerate(node.input): + for i in self.get_rtlsim_input_indices(): + inp = node.input[i] nbits = self.get_instream_width(i) - if nbits == 0: - continue + assert nbits > 0, "RTLSim input stream %d has zero width." % i exp_ishape = tuple(self.get_normal_input_shape(i)) folded_ishape = self.get_folded_input_shape(i) inp_val = context[inp] diff --git a/src/finn/transformation/fpgadataflow/specialize_layers.py b/src/finn/transformation/fpgadataflow/specialize_layers.py index b2a8629789..dcd2472e0a 100644 --- a/src/finn/transformation/fpgadataflow/specialize_layers.py +++ b/src/finn/transformation/fpgadataflow/specialize_layers.py @@ -389,7 +389,6 @@ def apply(self, model): node.input, node.output, domain="finn.custom_op.fpgadataflow." + impl_style, - name=node.name, ) # add all attributes for attribute in node.attribute: diff --git a/tests/fpgadataflow/test_fpgadataflow_addclstoken.py b/tests/fpgadataflow/test_fpgadataflow_addclstoken.py index 766d783271..7e57c3ef0e 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addclstoken.py +++ b/tests/fpgadataflow/test_fpgadataflow_addclstoken.py @@ -120,13 +120,17 @@ def _prepare_addclstoken_stitched_ip_model(simd=1, pad_tokens=0): return model, cls_values +def _make_input_dict(model, patches): + return {model.graph.input[0].name: patches} + + @pytest.mark.fpgadataflow def test_convert_concat_to_addclstoken(): model, cls_values = _make_concat_model() patches = np.arange(12, dtype=np.float32).reshape(1, 3, 4) expected = np.concatenate([cls_values, patches], axis=1) - ret = execute_onnx(model, {"patches": patches}) + ret = execute_onnx(model, _make_input_dict(model, patches)) assert (ret["out"] == expected).all() model = model.transform(InferAddCLSTokenLayer()) @@ -139,13 +143,13 @@ def test_convert_concat_to_addclstoken(): assert inst.get_normal_output_shape() == (1, 4, 4) assert inst.get_exp_cycles() == 16 - ret = execute_onnx(model, {"patches": patches}) + ret = execute_onnx(model, _make_input_dict(model, patches)) assert (ret["out"] == expected).all() model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) assert model.graph.node[0].op_type == "AddCLSToken_rtl" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.rtl" - assert model.graph.node[0].name == "AddCLSToken_concat_cls" @pytest.mark.fpgadataflow @@ -157,7 +161,7 @@ def test_addclstoken_python_execution_with_padding(): axis=1, ) - ret = execute_onnx(model, {"patches": patches}) + ret = execute_onnx(model, _make_input_dict(model, patches)) assert (ret["out"] == expected).all() @@ -178,6 +182,7 @@ def test_addclstoken_rtl_codegen(tmp_path, finn_dtype, cls_values, expected_cls_ cls_values=cls_values, ) model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) node = model.graph.node[0] inst = getCustomOp(node) @@ -185,7 +190,7 @@ def test_addclstoken_rtl_codegen(tmp_path, finn_dtype, cls_values, expected_cls_ inst.code_generation_ipgen(model, FPGA_PART, CLK_NS) topname = inst.get_nodeattr("gen_top_module") - assert topname == "AddCLSToken_0" + assert topname == node.name wrapper = tmp_path / (topname + ".v") core = tmp_path / "addclstoken.sv" assert wrapper.is_file() @@ -244,7 +249,7 @@ def test_addclstoken_rtlsim(simd, pad_tokens): model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) - ret = execute_onnx(model, {"patches": patches}) + ret = execute_onnx(model, _make_input_dict(model, patches)) assert (ret["out"] == expected).all() node = model.get_nodes_by_op_type("AddCLSToken_rtl")[0] @@ -273,7 +278,7 @@ def test_addclstoken_stitched_ip_rtlsim(simd, pad_tokens): model.set_metadata_prop("exec_mode", "rtlsim") - ret = execute_onnx(model, {"patches": patches}) + ret = execute_onnx(model, _make_input_dict(model, patches)) assert (ret["out"] == expected).all() From ae8f3e2b072e45a9e1002d6d2f9ded792c6c23d7 Mon Sep 17 00:00:00 2001 From: ollycassidy13 Date: Fri, 1 May 2026 08:44:53 +0100 Subject: [PATCH 5/5] Address SelectToken follow-ups after AddCLSToken merge --- .../test_fpgadataflow_selecttoken.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_selecttoken.py b/tests/fpgadataflow/test_fpgadataflow_selecttoken.py index 47709a7520..29c6323ac8 100644 --- a/tests/fpgadataflow/test_fpgadataflow_selecttoken.py +++ b/tests/fpgadataflow/test_fpgadataflow_selecttoken.py @@ -113,13 +113,17 @@ def _prepare_selecttoken_stitched_ip_model(simd=1, token_index=0): return model +def _make_input_dict(model, tokens): + return {model.graph.input[0].name: tokens} + + @pytest.mark.fpgadataflow def test_convert_gather_to_selecttoken(): model = _make_gather_model(token_index=2) tokens = np.arange(16, dtype=np.float32).reshape(1, 4, 4) expected = tokens[:, 2, :] - ret = execute_onnx(model, {"tokens": tokens}) + ret = execute_onnx(model, _make_input_dict(model, tokens)) assert (ret["out"] == expected).all() model = model.transform(InferSelectTokenLayer()) @@ -133,13 +137,13 @@ def test_convert_gather_to_selecttoken(): assert inst.get_exp_cycles() == 16 assert inst.get_nodeattr("TokenIndex") == 2 - ret = execute_onnx(model, {"tokens": tokens}) + ret = execute_onnx(model, _make_input_dict(model, tokens)) assert (ret["out"] == expected).all() model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) assert model.graph.node[0].op_type == "SelectToken_rtl" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.rtl" - assert model.graph.node[0].name == "SelectToken_gather_token" @pytest.mark.fpgadataflow @@ -149,7 +153,7 @@ def test_selecttoken_python_execution(token_index): tokens = np.arange(16, dtype=np.float32).reshape(1, 4, 4) expected = tokens[:, token_index, :] - ret = execute_onnx(model, {"tokens": tokens}) + ret = execute_onnx(model, _make_input_dict(model, tokens)) assert (ret["out"] == expected).all() @@ -161,6 +165,7 @@ def test_selecttoken_python_execution(token_index): def test_selecttoken_rtl_codegen(tmp_path, finn_dtype, fold_width): model = _make_selecttoken_model(token_index=3, simd=2, finn_dtype=finn_dtype) model = model.transform(SpecializeLayers(FPGA_PART)) + model = model.transform(GiveUniqueNodeNames()) node = model.graph.node[0] inst = getCustomOp(node) @@ -168,7 +173,7 @@ def test_selecttoken_rtl_codegen(tmp_path, finn_dtype, fold_width): inst.code_generation_ipgen(model, FPGA_PART, CLK_NS) topname = inst.get_nodeattr("gen_top_module") - assert topname == "SelectToken_0" + assert topname == node.name wrapper = tmp_path / (topname + ".v") core = tmp_path / "select_token.sv" assert wrapper.is_file() @@ -223,7 +228,7 @@ def test_selecttoken_rtlsim(simd, token_index): model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareRTLSim()) - ret = execute_onnx(model, {"tokens": tokens}) + ret = execute_onnx(model, _make_input_dict(model, tokens)) assert (ret["out"] == expected).all() node = model.get_nodes_by_op_type("SelectToken_rtl")[0] @@ -246,7 +251,7 @@ def test_selecttoken_stitched_ip_rtlsim(simd, token_index): model.set_metadata_prop("exec_mode", "rtlsim") - ret = execute_onnx(model, {"tokens": tokens}) + ret = execute_onnx(model, _make_input_dict(model, tokens)) assert (ret["out"] == expected).all()