Open
Description
@nandor and I have stumbled across the following interesting discrepancy between MFC and SFC when running the resulting Verilog through Vivado and comparing the number of DSP slices used (PipelinedMultiplier
module from Rocket Chip).
Utilization discrepancy:
| LUTs | FFs | DSP48 Blocks |
MFC | 434 | 122 | 31 |
SFC | 227 | 105 | 16 |
Input FIRRTL:
; input.fir
circuit PipelinedMultiplier :
module PipelinedMultiplier :
input clock : Clock
input reset : UInt<1>
output io : { flip req : { valid : UInt<1>, bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}}, resp : { valid : UInt<1>, bits : { data : UInt<64>, tag : UInt<5>}}}
reg inPipe_valid : UInt<1>, clock with :
reset => (reset, UInt<1>("h0"))
inPipe_valid <= io.req.valid
reg inPipe_bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}, clock with :
reset => (UInt<1>("h0"), inPipe_bits)
when io.req.valid :
inPipe_bits <= io.req.bits
wire in : { valid : UInt<1>, bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}}
in.valid <= inPipe_valid
in.bits <= inPipe_bits
wire c : UInt<2>
node d = not(c)
wire e : UInt<3>
node a_0 = bits(d, 1, 1)
node _e_T = andr(a_0)
node a_0_1 = bits(c, 0, 0)
node _e_T_1 = andr(a_0_1)
node a_0_2 = bits(d, 0, 0)
node a_1 = bits(c, 1, 1)
node _e_T_2 = cat(a_0_2, a_1)
node _e_T_3 = andr(_e_T_2)
node _b_T = orr(_e_T)
node _b_T_1 = cat(_e_T, _e_T_3)
node _b_T_2 = orr(_b_T_1)
node _b_T_3 = cat(_e_T_1, _e_T_3)
node _b_T_4 = orr(_b_T_3)
node b_hi = cat(_b_T_4, _b_T_2)
node b = cat(b_hi, _b_T)
node _f_T = bits(b, 0, 0)
node _f_T_1 = bits(b, 1, 1)
node _f_T_2 = bits(b, 2, 2)
node f_hi = cat(_f_T_2, _f_T_1)
node f = cat(f_hi, _f_T)
e <= f
c <= in.bits.fn
node _T = bits(e, 2, 2)
node _T_1 = bits(e, 1, 1)
node _T_2 = bits(e, 0, 0)
node cmdHi = bits(_T, 0, 0)
node lhsSigned = bits(_T_1, 0, 0)
node rhsSigned = bits(_T_2, 0, 0)
node _cmdHalf_T = eq(in.bits.dw, UInt<1>("h0"))
node cmdHalf = and(UInt<1>("h1"), _cmdHalf_T)
node _lhs_T = bits(in.bits.in1, 63, 63)
node _lhs_T_1 = and(lhsSigned, _lhs_T)
node _lhs_T_2 = cat(_lhs_T_1, in.bits.in1)
node lhs = asSInt(_lhs_T_2)
node _rhs_T = bits(in.bits.in2, 63, 63)
node _rhs_T_1 = and(rhsSigned, _rhs_T)
node _rhs_T_2 = cat(_rhs_T_1, in.bits.in2)
node rhs = asSInt(_rhs_T_2)
node prod = mul(lhs, rhs)
node _muxed_T = bits(prod, 127, 64)
node _muxed_T_1 = bits(prod, 31, 0)
node _muxed_T_2 = bits(_muxed_T_1, 31, 31)
node _muxed_T_3 = bits(_muxed_T_2, 0, 0)
node _muxed_T_4 = mux(_muxed_T_3, UInt<32>("hffffffff"), UInt<32>("h0"))
node _muxed_T_5 = cat(_muxed_T_4, _muxed_T_1)
node _muxed_T_6 = bits(prod, 63, 0)
node _muxed_T_7 = mux(cmdHalf, _muxed_T_5, _muxed_T_6)
node muxed = mux(cmdHi, _muxed_T, _muxed_T_7)
reg respPipe_valid : UInt<1>, clock with :
reset => (reset, UInt<1>("h0"))
respPipe_valid <= in.valid
reg respPipe_bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}, clock with :
reset => (UInt<1>("h0"), respPipe_bits)
when in.valid :
respPipe_bits <= in.bits
wire resp : { valid : UInt<1>, bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}}
resp.valid <= respPipe_valid
resp.bits <= respPipe_bits
io.resp.valid <= resp.valid
io.resp.bits.tag <= resp.bits.tag
reg io_resp_bits_data_v : UInt<1>, clock with :
reset => (reset, UInt<1>("h0"))
io_resp_bits_data_v <= in.valid
reg io_resp_bits_data_b : UInt<64>, clock with :
reset => (UInt<1>("h0"), io_resp_bits_data_b)
when in.valid :
io_resp_bits_data_b <= muxed
wire io_resp_bits_data_out : { valid : UInt<1>, bits : UInt<64>}
io_resp_bits_data_out.valid <= io_resp_bits_data_v
io_resp_bits_data_out.bits <= io_resp_bits_data_b
io.resp.bits.data <= io_resp_bits_data_out.bits
Steps to reproduce:
#!/bin/bash
set -xe
firtool input.fir -o output.mfc.sv
firrtl -i input.fir -o output.sfc.v
vivado -mode batch -source vivado.tcl
grep "^| PipelinedMultiplier" vivado.mfc.rpt vivado.sfc.rpt
Vivado synth script (you probably don't have to use that specific AWS F1 Xilinx part -- probably almost any part will work):
# MFC
create_project -in_memory -part xcvu9p-flgb2104-2-i
read_verilog -sv output.mfc.sv
synth_design -top PipelinedMultiplier
report_utilization -hierarchical -file vivado.mfc.rpt
close_project
# SFC
create_project -in_memory -part xcvu9p-flgb2104-2-i
read_verilog output.sfc.v
synth_design -top PipelinedMultiplier
report_utilization -hierarchical -file vivado.sfc.rpt
close_project
Resulting Verilog for Reference
MFC Output
// Generated by CIRCT firtool-1.27.0
// Standard header to adapt well known macros to our needs.
`ifndef RANDOMIZE
`ifdef RANDOMIZE_REG_INIT
`define RANDOMIZE
`endif // RANDOMIZE_REG_INIT
`endif // not def RANDOMIZE
// RANDOM may be set to an expression that produces a 32-bit random unsigned value.
`ifndef RANDOM
`define RANDOM $random
`endif // not def RANDOM
// Users can define INIT_RANDOM as general code that gets injected into the
// initializer block for modules with registers.
`ifndef INIT_RANDOM
`define INIT_RANDOM
`endif // not def INIT_RANDOM
// If using random initialization, you can also define RANDOMIZE_DELAY to
// customize the delay used, otherwise 0.002 is used.
`ifndef RANDOMIZE_DELAY
`define RANDOMIZE_DELAY 0.002
`endif // not def RANDOMIZE_DELAY
// Define INIT_RANDOM_PROLOG_ for use in our modules below.
`ifndef INIT_RANDOM_PROLOG_
`ifdef RANDOMIZE
`ifdef VERILATOR
`define INIT_RANDOM_PROLOG_ `INIT_RANDOM
`else // VERILATOR
`define INIT_RANDOM_PROLOG_ `INIT_RANDOM #`RANDOMIZE_DELAY begin end
`endif // VERILATOR
`else // RANDOMIZE
`define INIT_RANDOM_PROLOG_
`endif // RANDOMIZE
`endif // not def INIT_RANDOM_PROLOG_
module PipelinedMultiplier(
input clock,
reset,
io_req_valid,
input [3:0] io_req_bits_fn,
input io_req_bits_dw,
input [63:0] io_req_bits_in1,
io_req_bits_in2,
input [4:0] io_req_bits_tag,
output io_resp_valid,
output [63:0] io_resp_bits_data,
output [4:0] io_resp_bits_tag);
reg inPipe_valid;
reg [3:0] inPipe_bits_fn;
reg inPipe_bits_dw;
reg [63:0] inPipe_bits_in1;
reg [63:0] inPipe_bits_in2;
reg [4:0] inPipe_bits_tag;
reg respPipe_valid;
reg [4:0] respPipe_bits_tag;
reg [63:0] io_resp_bits_data_b;
always @(posedge clock) begin
if (reset) begin
inPipe_valid <= 1'h0;
respPipe_valid <= 1'h0;
end
else begin
inPipe_valid <= io_req_valid;
respPipe_valid <= inPipe_valid;
end
if (io_req_valid) begin
inPipe_bits_fn <= io_req_bits_fn;
inPipe_bits_dw <= io_req_bits_dw;
inPipe_bits_in1 <= io_req_bits_in1;
inPipe_bits_in2 <= io_req_bits_in2;
inPipe_bits_tag <= io_req_bits_tag;
end
if (inPipe_valid) begin
automatic logic [1:0] d;
automatic logic [1:0] _e_T_2;
automatic logic [127:0] _GEN;
d = ~(inPipe_bits_fn[1:0]);
_e_T_2 = {d[0], inPipe_bits_fn[1]};
_GEN =
{{64{(|{d[1], &_e_T_2}) & inPipe_bits_in1[63]}}, inPipe_bits_in1}
* {{64{d[1] & inPipe_bits_in2[63]}}, inPipe_bits_in2};
respPipe_bits_tag <= inPipe_bits_tag;
if (|{inPipe_bits_fn[0], &_e_T_2})
io_resp_bits_data_b <= _GEN[127:64];
else if (inPipe_bits_dw)
io_resp_bits_data_b <= _GEN[63:0];
else
io_resp_bits_data_b <= {{32{_GEN[31]}}, _GEN[31:0]};
end
end // always @(posedge)
`ifndef SYNTHESIS
`ifdef FIRRTL_BEFORE_INITIAL
`FIRRTL_BEFORE_INITIAL
`endif // FIRRTL_BEFORE_INITIAL
initial begin
automatic logic [31:0] _RANDOM_0;
automatic logic [31:0] _RANDOM_1;
automatic logic [31:0] _RANDOM_2;
automatic logic [31:0] _RANDOM_3;
automatic logic [31:0] _RANDOM_4;
automatic logic [31:0] _RANDOM_5;
automatic logic [31:0] _RANDOM_6;
automatic logic [31:0] _RANDOM_7;
automatic logic [31:0] _RANDOM_8;
automatic logic [31:0] _RANDOM_9;
automatic logic [31:0] _RANDOM_10;
`ifdef INIT_RANDOM_PROLOG_
`INIT_RANDOM_PROLOG_
`endif // INIT_RANDOM_PROLOG_
`ifdef RANDOMIZE_REG_INIT
_RANDOM_0 = `RANDOM;
_RANDOM_1 = `RANDOM;
_RANDOM_2 = `RANDOM;
_RANDOM_3 = `RANDOM;
_RANDOM_4 = `RANDOM;
_RANDOM_5 = `RANDOM;
_RANDOM_6 = `RANDOM;
_RANDOM_7 = `RANDOM;
_RANDOM_8 = `RANDOM;
_RANDOM_9 = `RANDOM;
_RANDOM_10 = `RANDOM;
inPipe_valid = _RANDOM_0[0];
inPipe_bits_fn = _RANDOM_0[4:1];
inPipe_bits_dw = _RANDOM_0[5];
inPipe_bits_in1 = {_RANDOM_0[31:6], _RANDOM_1, _RANDOM_2[5:0]};
inPipe_bits_in2 = {_RANDOM_2[31:6], _RANDOM_3, _RANDOM_4[5:0]};
inPipe_bits_tag = _RANDOM_4[10:6];
respPipe_valid = _RANDOM_4[11];
respPipe_bits_tag = _RANDOM_8[21:17];
io_resp_bits_data_b = {_RANDOM_8[31:23], _RANDOM_9, _RANDOM_10[22:0]};
`endif // RANDOMIZE_REG_INIT
end // initial
`ifdef FIRRTL_AFTER_INITIAL
`FIRRTL_AFTER_INITIAL
`endif // FIRRTL_AFTER_INITIAL
`endif // not def SYNTHESIS
assign io_resp_valid = respPipe_valid;
assign io_resp_bits_data = io_resp_bits_data_b;
assign io_resp_bits_tag = respPipe_bits_tag;
endmodule
SFC Output
module PipelinedMultiplier(
input clock,
input reset,
input io_req_valid,
input [3:0] io_req_bits_fn,
input io_req_bits_dw,
input [63:0] io_req_bits_in1,
input [63:0] io_req_bits_in2,
input [4:0] io_req_bits_tag,
output io_resp_valid,
output [63:0] io_resp_bits_data,
output [4:0] io_resp_bits_tag
);
`ifdef RANDOMIZE_REG_INIT
reg [31:0] _RAND_0;
reg [31:0] _RAND_1;
reg [31:0] _RAND_2;
reg [63:0] _RAND_3;
reg [63:0] _RAND_4;
reg [31:0] _RAND_5;
reg [31:0] _RAND_6;
reg [31:0] _RAND_7;
reg [63:0] _RAND_8;
`endif // RANDOMIZE_REG_INIT
reg inPipe_valid;
reg [3:0] inPipe_bits_fn;
reg inPipe_bits_dw;
reg [63:0] inPipe_bits_in1;
reg [63:0] inPipe_bits_in2;
reg [4:0] inPipe_bits_tag;
wire [1:0] c = inPipe_bits_fn[1:0];
wire [1:0] d = ~c;
wire a_0 = d[1];
wire a_0_1 = c[0];
wire a_0_2 = d[0];
wire a_1 = c[1];
wire [1:0] _e_T_2 = {a_0_2,a_1};
wire [1:0] _b_T_1 = {&a_0,&_e_T_2};
wire [1:0] _b_T_3 = {&a_0_1,&_e_T_2};
wire [2:0] b = {|_b_T_3,|_b_T_1,|(&a_0)};
wire [2:0] f = {b[2],b[1],b[0]};
wire cmdHi = f[2];
wire lhsSigned = f[1];
wire rhsSigned = f[0];
wire cmdHalf = ~inPipe_bits_dw;
wire _lhs_T_1 = lhsSigned & inPipe_bits_in1[63];
wire [64:0] lhs = {_lhs_T_1,inPipe_bits_in1};
wire _rhs_T_1 = rhsSigned & inPipe_bits_in2[63];
wire [64:0] rhs = {_rhs_T_1,inPipe_bits_in2};
wire [129:0] prod = $signed(lhs) * $signed(rhs);
wire [31:0] _muxed_T_4 = prod[31] ? 32'hffffffff : 32'h0;
wire [63:0] _muxed_T_5 = {_muxed_T_4,prod[31:0]};
reg respPipe_valid;
reg [4:0] respPipe_bits_tag;
reg [63:0] io_resp_bits_data_b;
assign io_resp_valid = respPipe_valid;
assign io_resp_bits_data = io_resp_bits_data_b;
assign io_resp_bits_tag = respPipe_bits_tag;
always @(posedge clock) begin
if (reset) begin
inPipe_valid <= 1'h0;
end else begin
inPipe_valid <= io_req_valid;
end
if (io_req_valid) begin
inPipe_bits_fn <= io_req_bits_fn;
end
if (io_req_valid) begin
inPipe_bits_dw <= io_req_bits_dw;
end
if (io_req_valid) begin
inPipe_bits_in1 <= io_req_bits_in1;
end
if (io_req_valid) begin
inPipe_bits_in2 <= io_req_bits_in2;
end
if (io_req_valid) begin
inPipe_bits_tag <= io_req_bits_tag;
end
if (reset) begin
respPipe_valid <= 1'h0;
end else begin
respPipe_valid <= inPipe_valid;
end
if (inPipe_valid) begin
respPipe_bits_tag <= inPipe_bits_tag;
end
if (inPipe_valid) begin
if (cmdHi) begin
io_resp_bits_data_b <= prod[127:64];
end else if (cmdHalf) begin
io_resp_bits_data_b <= _muxed_T_5;
end else begin
io_resp_bits_data_b <= prod[63:0];
end
end
end
// Register and memory initialization
`ifdef RANDOMIZE_GARBAGE_ASSIGN
`define RANDOMIZE
`endif
`ifdef RANDOMIZE_INVALID_ASSIGN
`define RANDOMIZE
`endif
`ifdef RANDOMIZE_REG_INIT
`define RANDOMIZE
`endif
`ifdef RANDOMIZE_MEM_INIT
`define RANDOMIZE
`endif
`ifndef RANDOM
`define RANDOM $random
`endif
`ifdef RANDOMIZE_MEM_INIT
integer initvar;
`endif
`ifndef SYNTHESIS
`ifdef FIRRTL_BEFORE_INITIAL
`FIRRTL_BEFORE_INITIAL
`endif
initial begin
`ifdef RANDOMIZE
`ifdef INIT_RANDOM
`INIT_RANDOM
`endif
`ifndef VERILATOR
`ifdef RANDOMIZE_DELAY
#`RANDOMIZE_DELAY begin end
`else
#0.002 begin end
`endif
`endif
`ifdef RANDOMIZE_REG_INIT
_RAND_0 = {1{`RANDOM}};
inPipe_valid = _RAND_0[0:0];
_RAND_1 = {1{`RANDOM}};
inPipe_bits_fn = _RAND_1[3:0];
_RAND_2 = {1{`RANDOM}};
inPipe_bits_dw = _RAND_2[0:0];
_RAND_3 = {2{`RANDOM}};
inPipe_bits_in1 = _RAND_3[63:0];
_RAND_4 = {2{`RANDOM}};
inPipe_bits_in2 = _RAND_4[63:0];
_RAND_5 = {1{`RANDOM}};
inPipe_bits_tag = _RAND_5[4:0];
_RAND_6 = {1{`RANDOM}};
respPipe_valid = _RAND_6[0:0];
_RAND_7 = {1{`RANDOM}};
respPipe_bits_tag = _RAND_7[4:0];
_RAND_8 = {2{`RANDOM}};
io_resp_bits_data_b = _RAND_8[63:0];
`endif // RANDOMIZE_REG_INIT
`endif // RANDOMIZE
end // initial
`ifdef FIRRTL_AFTER_INITIAL
`FIRRTL_AFTER_INITIAL
`endif
`endif // SYNTHESIS
endmodule
Activity