Skip to content

Commit 8d5295c

Browse files
committed
Merge branch 'dev' into custom/transformer
2 parents dd7952e + 46bbe99 commit 8d5295c

File tree

7 files changed

+512
-17
lines changed

7 files changed

+512
-17
lines changed

finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -110,17 +110,17 @@ module mvu_vvu_8sx9_dsp58 #(
110110
assign vld = L[0];
111111

112112
//-------------------- Shift register for ZERO flag --------------------\\
113-
logic Z [0:MAX_PIPELINE_STAGES-2] = '{default:0}; // We need MAX_PIPELINE_STAGES-1 pipeline stages (note: INMODE is buffered inside DSP fabric)
114-
115-
if (MAX_PIPELINE_STAGES > 1) begin : genZreg
113+
// We need MAX_PIPELINE_STAGES-1 delay stages (INMODE is registed once more inside DSP)
114+
uwire [MAX_PIPELINE_STAGES-1:0] inmode_zero;
115+
assign inmode_zero[0] = zero;
116+
if(MAX_PIPELINE_STAGES > 1) begin : genZReg
117+
logic [MAX_PIPELINE_STAGES-1:1] Z = '1;
116118
always_ff @(posedge clk) begin
117-
if (rst) Z <= '{default: 0};
118-
else if(en) begin
119-
Z[0] <= zero;
120-
if (MAX_PIPELINE_STAGES > 2) Z[1:MAX_PIPELINE_STAGES-2] <= Z[0:MAX_PIPELINE_STAGES-3];
121-
end
119+
if(rst) Z <= '1;
120+
else if(en) Z <= inmode_zero[MAX_PIPELINE_STAGES-2:0];
122121
end
123-
end;
122+
assign inmode_zero[MAX_PIPELINE_STAGES-1:1] = Z;
123+
end : genZReg
124124

125125
//-------------------- Buffer for input activations --------------------\\
126126
localparam int unsigned PAD_BITS_ACT = 9 - ACTIVATION_WIDTH;
@@ -131,10 +131,10 @@ module mvu_vvu_8sx9_dsp58 #(
131131
localparam int LANES_OCCUPIED = i == CHAINLEN-1 ? SIMD - 3*i : 3;
132132

133133
if (EXTERNAL_PREGS > 0) begin : genExternalPregAct
134-
logic [0:EXTERNAL_PREGS-1][LANES_OCCUPIED-1:0][ACTIVATION_WIDTH-1:0] A = '{ default : 0};
134+
(* EXTRACT_SHREG = "true" *)
135+
logic [0:EXTERNAL_PREGS-1][LANES_OCCUPIED-1:0][ACTIVATION_WIDTH-1:0] A = '{ default : 'x };
135136
always_ff @(posedge clk) begin
136-
if (rst) A <= '{default: 0};
137-
else if(en) begin
137+
if(en) begin
138138
A[EXTERNAL_PREGS-1] <=
139139
// synthesis translate_off
140140
zero ? '1 :
@@ -177,10 +177,10 @@ module mvu_vvu_8sx9_dsp58 #(
177177
localparam int LANES_OCCUPIED = j == CHAINLEN-1 ? SIMD - 3*j : 3;
178178

179179
if (EXTERNAL_PREGS > 0) begin : genExternalPregWeight
180-
logic [0:PE-1][0:EXTERNAL_PREGS-1][LANES_OCCUPIED-1:0][WEIGHT_WIDTH-1:0] B = '{ default : 0};
180+
(* EXTRACT_SHREG = "true" *)
181+
logic [0:PE-1][0:EXTERNAL_PREGS-1][LANES_OCCUPIED-1:0][WEIGHT_WIDTH-1:0] B = '{ default : 'x };
181182
always_ff @(posedge clk) begin
182-
if (rst) B <= '{default: 0};
183-
else if (en) begin
183+
if(en) begin
184184
B[i][EXTERNAL_PREGS-1] <=
185185
// synthesis translate_off
186186
zero ? '1 :
@@ -253,7 +253,7 @@ module mvu_vvu_8sx9_dsp58 #(
253253
logic InmodeZero = 0;
254254
always_ff @(posedge clk) begin
255255
if (rst) InmodeZero <= 0;
256-
else if (en) InmodeZero <= ( TOTAL_PREGS > 0 ? Z[TOTAL_PREGS-1] : zero );
256+
else if (en) InmodeZero <= inmode_zero[TOTAL_PREGS];
257257
end
258258
always_ff @(posedge clk) begin
259259
if (rst) Mreg <= 0;
@@ -401,7 +401,7 @@ module mvu_vvu_8sx9_dsp58 #(
401401
.INMODE({
402402
INTERNAL_PREGS==2 ? 1'b0 : 1'b1,
403403
2'b00,
404-
TOTAL_PREGS > 0 ? Z[TOTAL_PREGS-1] : zero,
404+
inmode_zero[TOTAL_PREGS],
405405
INTERNAL_PREGS==2 ? 1'b0 : 1'b1
406406
}), // 5-bit input: INMODE control
407407
.NEGATE('0), // 3-bit input: Negates the input of the multiplier

src/finn/custom_op/fpgadataflow/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from finn.custom_op.fpgadataflow.convolutioninputgenerator import (
3838
ConvolutionInputGenerator,
3939
)
40+
from finn.custom_op.fpgadataflow.crop import Crop
4041
from finn.custom_op.fpgadataflow.duplicatestreams import DuplicateStreams
4142

4243
# Also import ElementwiseBinary variants
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
###################################################################################
2+
# Copyright (C) 2025, Advanced Micro Devices, Inc.
3+
# All rights reserved.
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
#
7+
# Copyright for portions of this file is held by AMD and Microsoft under
8+
# MIT license as part of project Brainsmith.
9+
# All other copyright is held by AMD and is provided under BSD-3-Clause license.
10+
#
11+
###################################################################################
12+
13+
import numpy as np
14+
import warnings
15+
from qonnx.core.datatype import DataType
16+
17+
from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
18+
19+
20+
class Crop(HWCustomOp):
21+
"""Abstraction layer for Crop layers."""
22+
23+
def __init__(self, onnx_node, **kwargs):
24+
super().__init__(onnx_node, **kwargs)
25+
26+
def get_nodeattr_types(self):
27+
my_attrs = {
28+
"DataType": ("s", True, ""),
29+
"ImgDim": ("ints", True, []), # [h, w]
30+
"NumChannels": ("i", True, 0),
31+
"CropNorth": ("i", True, []),
32+
"CropSouth": ("i", True, []),
33+
"CropWest": ("i", True, []),
34+
"CropEast": ("i", True, []),
35+
"SIMD": ("i", False, 1),
36+
"numInputVectors": ("ints", False, []),
37+
}
38+
my_attrs.update(super().get_nodeattr_types())
39+
return my_attrs
40+
41+
def get_normal_input_shape(self, ind=0):
42+
num_vec = self.get_nodeattr("numInputVectors")
43+
h, w = self.get_nodeattr("ImgDim")
44+
if h == 0:
45+
img_dim = [w]
46+
else:
47+
img_dim = [h, w]
48+
ch = self.get_nodeattr("NumChannels")
49+
return num_vec + img_dim + [ch] if num_vec != [0] else img_dim + [ch]
50+
51+
def get_normal_output_shape(self, ind=0):
52+
num_vec = self.get_nodeattr("numInputVectors")
53+
height, width = self.get_nodeattr("ImgDim")
54+
ch = self.get_nodeattr("NumChannels")
55+
crop_north = self.get_nodeattr("CropNorth")
56+
crop_east = self.get_nodeattr("CropEast")
57+
crop_west = self.get_nodeattr("CropWest")
58+
crop_south = self.get_nodeattr("CropSouth")
59+
owidth = width - (crop_west + crop_east)
60+
oheight = height - (crop_north + crop_south)
61+
if oheight == 0:
62+
o_img_dim = [owidth]
63+
else:
64+
o_img_dim = [oheight, owidth]
65+
return num_vec + o_img_dim + [ch] if num_vec != [0] else o_img_dim + [ch]
66+
67+
def execute_node(self, context, graph):
68+
node = self.onnx_node
69+
h, w = self.get_nodeattr("ImgDim")
70+
crop_north = self.get_nodeattr("CropNorth")
71+
crop_east = self.get_nodeattr("CropEast")
72+
crop_west = self.get_nodeattr("CropWest")
73+
crop_south = self.get_nodeattr("CropSouth")
74+
inp = context[node.input[0]]
75+
if len(inp.shape) == 3:
76+
cropped_slice = inp[crop_north : h - crop_south, crop_west : w - crop_east, :]
77+
elif len(inp.shape) == 2:
78+
cropped_slice = inp[crop_west : w - crop_east, :]
79+
elif len(inp.shape) == 4:
80+
cropped_slice = inp[:, crop_north : h - crop_south, crop_west : w - crop_east, :]
81+
else:
82+
raise Exception("Crop execute node currently only supports 2D - 4D input tensors.")
83+
assert cropped_slice.shape == tuple(self.get_normal_output_shape())
84+
context[node.output[0]] = cropped_slice
85+
86+
def get_input_datatype(self, ind=0):
87+
return DataType[self.get_nodeattr("DataType")]
88+
89+
def infer_node_datatype(self, model):
90+
node = self.onnx_node
91+
dt = model.get_tensor_datatype(node.input[0])
92+
if dt != self.get_input_datatype():
93+
warn_str = (
94+
f"data_type changing for {node.name}: {str(self.get_input_datatype())} -> {str(dt)}"
95+
)
96+
warnings.warn(warn_str)
97+
self.set_nodeattr("DataType", dt.name)
98+
99+
def get_instream_width(self, ind=0):
100+
ibits = self.get_input_datatype().bitwidth()
101+
simd = self.get_nodeattr("SIMD")
102+
return ibits * simd
103+
104+
def get_outstream_width(self, ind=0):
105+
obits = self.get_output_datatype().bitwidth()
106+
simd = self.get_nodeattr("SIMD")
107+
return obits * simd
108+
109+
def get_output_datatype(self, ind=0):
110+
return DataType[self.get_nodeattr("DataType")]
111+
112+
def get_folded_output_shape(self, ind=0):
113+
normal_oshape = list(self.get_normal_output_shape())
114+
simd = self.get_nodeattr("SIMD")
115+
assert normal_oshape[-1] % simd == 0, "Innermost dimension must be divisible by SIMD"
116+
fold = int(normal_oshape[-1] / simd)
117+
folded_oshape = normal_oshape[:-1] + [fold, simd]
118+
return tuple(folded_oshape)
119+
120+
def get_folded_input_shape(self, ind=0):
121+
normal_ishape = list(self.get_normal_input_shape())
122+
simd = self.get_nodeattr("SIMD")
123+
assert normal_ishape[-1] % simd == 0, "Innermost dimension must be divisible by SIMD"
124+
fold = int(normal_ishape[-1] / simd)
125+
folded_ishape = normal_ishape[:-1] + [fold, simd]
126+
return tuple(folded_ishape)
127+
128+
def get_exp_cycles(self):
129+
simd = self.get_nodeattr("SIMD")
130+
num_vec = self.get_nodeattr("numInputVectors")
131+
height, width = self.get_nodeattr("ImgDim")
132+
ch = self.get_nodeattr("NumChannels")
133+
if height == 0:
134+
# pretend that height is 1 for code generation
135+
height = 1
136+
137+
return (
138+
np.prod(num_vec) * height * width * (ch // simd)
139+
if num_vec != [0]
140+
else height * width * (ch // simd)
141+
)

src/finn/custom_op/fpgadataflow/hls/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from finn.custom_op.fpgadataflow.hls.channelwise_op_hls import ChannelwiseOp_hls
3636
from finn.custom_op.fpgadataflow.hls.checksum_hls import CheckSum_hls
3737
from finn.custom_op.fpgadataflow.hls.concat_hls import StreamingConcat_hls
38+
from finn.custom_op.fpgadataflow.hls.crop_hls import Crop_hls
3839
from finn.custom_op.fpgadataflow.hls.duplicatestreams_hls import DuplicateStreams_hls
3940

4041
# Also import ElementwiseBinary variants
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
###################################################################################
2+
# Copyright (C) 2025, Advanced Micro Devices, Inc.
3+
# All rights reserved.
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
#
7+
# Copyright for portions of this file is held by AMD and Microsoft under
8+
# MIT license as part of project Brainsmith.
9+
# All other copyright is held by AMD and is provided under BSD-3-Clause license.
10+
#
11+
###################################################################################
12+
13+
from finn.custom_op.fpgadataflow.crop import Crop
14+
from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
15+
16+
17+
class Crop_hls(Crop, HLSBackend):
18+
def __init__(self, onnx_node, **kwargs):
19+
super().__init__(onnx_node, **kwargs)
20+
21+
def get_nodeattr_types(self):
22+
return Crop.get_nodeattr_types(self) | HLSBackend.get_nodeattr_types(self)
23+
24+
def global_includes(self):
25+
self.code_gen_dict["$GLOBALS$"] = [
26+
'#include "crop.hpp"',
27+
]
28+
29+
def defines(self, var):
30+
simd = self.get_nodeattr("SIMD")
31+
dtype = self.get_input_datatype()
32+
height, width = self.get_nodeattr("ImgDim")
33+
if height == 0:
34+
# pretend that height is 1 for code generation
35+
height = 1
36+
ch = self.get_nodeattr("NumChannels")
37+
self.code_gen_dict["$DEFINES$"] = [
38+
f"""
39+
constexpr unsigned SIMD = {simd};
40+
constexpr unsigned H = {height};
41+
constexpr unsigned W = {width};
42+
constexpr unsigned CF = {ch // simd};
43+
constexpr unsigned CROP_N = {self.get_nodeattr("CropNorth")};
44+
constexpr unsigned CROP_E = {self.get_nodeattr("CropEast")};
45+
constexpr unsigned CROP_S = {self.get_nodeattr("CropSouth")};
46+
constexpr unsigned CROP_W = {self.get_nodeattr("CropWest")};
47+
using TV = hls::vector<{dtype.get_hls_datatype_str()}, SIMD>;
48+
"""
49+
]
50+
51+
def docompute(self):
52+
self.code_gen_dict["$DOCOMPUTE$"] = [
53+
"""
54+
hls::stream<TV> src0;
55+
hls::stream<TV> dst0;
56+
#pragma HLS stream variable=src0 depth=2
57+
#pragma HLS stream variable=dst0 depth=2
58+
59+
move(in0_V, src0);
60+
crop< H, W, CF, CROP_N, CROP_E, CROP_S, CROP_W, TV>(src0, dst0);
61+
move(dst0, out0_V);
62+
"""
63+
]
64+
65+
def blackboxfunction(self):
66+
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
67+
f"""
68+
void {self.onnx_node.name} (
69+
hls::stream<TV> &in0_V,
70+
hls::stream<TV> &out0_V
71+
)
72+
"""
73+
]
74+
75+
def pragmas(self):
76+
self.code_gen_dict["$PRAGMAS$"] = [
77+
"""
78+
#pragma HLS interface AXIS port=in0_V
79+
#pragma HLS interface AXIS port=out0_V
80+
#pragma HLS aggregate variable=in0_V compact=bit
81+
#pragma HLS aggregate variable=out0_V compact=bit
82+
83+
#pragma HLS interface ap_ctrl_none port=return
84+
#pragma HLS dataflow disable_start_propagation
85+
"""
86+
]
87+
88+
def execute_node(self, context, graph):
89+
HLSBackend.execute_node(self, context, graph)

0 commit comments

Comments
 (0)