Skip to content

[FIRRTL] Excessive DSP slice use on Xilinx FPGAs #4549

Open
@fabianschuiki

Description

@fabianschuiki

@nandor and I have stumbled across the following interesting discrepancy between MFC and SFC when running the resulting Verilog through Vivado and comparing the number of DSP slices used (PipelinedMultiplier module from Rocket Chip).

Utilization discrepancy:

    | LUTs | FFs | DSP48 Blocks |
MFC |  434 | 122 |           31 |
SFC |  227 | 105 |           16 |

Input FIRRTL:

; input.fir
circuit PipelinedMultiplier :
  module PipelinedMultiplier :
    input clock : Clock
    input reset : UInt<1>
    output io : { flip req : { valid : UInt<1>, bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}}, resp : { valid : UInt<1>, bits : { data : UInt<64>, tag : UInt<5>}}}

    reg inPipe_valid : UInt<1>, clock with :
      reset => (reset, UInt<1>("h0"))
    inPipe_valid <= io.req.valid
    reg inPipe_bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}, clock with :
      reset => (UInt<1>("h0"), inPipe_bits)
    when io.req.valid :
      inPipe_bits <= io.req.bits
    wire in : { valid : UInt<1>, bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}}
    in.valid <= inPipe_valid
    in.bits <= inPipe_bits
    wire c : UInt<2>
    node d = not(c)
    wire e : UInt<3>
    node a_0 = bits(d, 1, 1)
    node _e_T = andr(a_0)
    node a_0_1 = bits(c, 0, 0)
    node _e_T_1 = andr(a_0_1)
    node a_0_2 = bits(d, 0, 0)
    node a_1 = bits(c, 1, 1)
    node _e_T_2 = cat(a_0_2, a_1)
    node _e_T_3 = andr(_e_T_2)
    node _b_T = orr(_e_T)
    node _b_T_1 = cat(_e_T, _e_T_3)
    node _b_T_2 = orr(_b_T_1)
    node _b_T_3 = cat(_e_T_1, _e_T_3)
    node _b_T_4 = orr(_b_T_3)
    node b_hi = cat(_b_T_4, _b_T_2)
    node b = cat(b_hi, _b_T)
    node _f_T = bits(b, 0, 0)
    node _f_T_1 = bits(b, 1, 1)
    node _f_T_2 = bits(b, 2, 2)
    node f_hi = cat(_f_T_2, _f_T_1)
    node f = cat(f_hi, _f_T)
    e <= f
    c <= in.bits.fn
    node _T = bits(e, 2, 2)
    node _T_1 = bits(e, 1, 1)
    node _T_2 = bits(e, 0, 0)
    node cmdHi = bits(_T, 0, 0)
    node lhsSigned = bits(_T_1, 0, 0)
    node rhsSigned = bits(_T_2, 0, 0)
    node _cmdHalf_T = eq(in.bits.dw, UInt<1>("h0"))
    node cmdHalf = and(UInt<1>("h1"), _cmdHalf_T)
    node _lhs_T = bits(in.bits.in1, 63, 63)
    node _lhs_T_1 = and(lhsSigned, _lhs_T)
    node _lhs_T_2 = cat(_lhs_T_1, in.bits.in1)
    node lhs = asSInt(_lhs_T_2)
    node _rhs_T = bits(in.bits.in2, 63, 63)
    node _rhs_T_1 = and(rhsSigned, _rhs_T)
    node _rhs_T_2 = cat(_rhs_T_1, in.bits.in2)
    node rhs = asSInt(_rhs_T_2)
    node prod = mul(lhs, rhs)
    node _muxed_T = bits(prod, 127, 64)
    node _muxed_T_1 = bits(prod, 31, 0)
    node _muxed_T_2 = bits(_muxed_T_1, 31, 31)
    node _muxed_T_3 = bits(_muxed_T_2, 0, 0)
    node _muxed_T_4 = mux(_muxed_T_3, UInt<32>("hffffffff"), UInt<32>("h0"))
    node _muxed_T_5 = cat(_muxed_T_4, _muxed_T_1)
    node _muxed_T_6 = bits(prod, 63, 0)
    node _muxed_T_7 = mux(cmdHalf, _muxed_T_5, _muxed_T_6)
    node muxed = mux(cmdHi, _muxed_T, _muxed_T_7)
    reg respPipe_valid : UInt<1>, clock with :
      reset => (reset, UInt<1>("h0"))
    respPipe_valid <= in.valid
    reg respPipe_bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}, clock with :
      reset => (UInt<1>("h0"), respPipe_bits)
    when in.valid :
      respPipe_bits <= in.bits
    wire resp : { valid : UInt<1>, bits : { fn : UInt<4>, dw : UInt<1>, in1 : UInt<64>, in2 : UInt<64>, tag : UInt<5>}}
    resp.valid <= respPipe_valid
    resp.bits <= respPipe_bits
    io.resp.valid <= resp.valid
    io.resp.bits.tag <= resp.bits.tag
    reg io_resp_bits_data_v : UInt<1>, clock with :
      reset => (reset, UInt<1>("h0"))
    io_resp_bits_data_v <= in.valid
    reg io_resp_bits_data_b : UInt<64>, clock with :
      reset => (UInt<1>("h0"), io_resp_bits_data_b)
    when in.valid :
      io_resp_bits_data_b <= muxed
    wire io_resp_bits_data_out : { valid : UInt<1>, bits : UInt<64>}
    io_resp_bits_data_out.valid <= io_resp_bits_data_v
    io_resp_bits_data_out.bits <= io_resp_bits_data_b
    io.resp.bits.data <= io_resp_bits_data_out.bits

Steps to reproduce:

#!/bin/bash
set -xe
firtool input.fir -o output.mfc.sv
firrtl -i input.fir -o output.sfc.v
vivado -mode batch -source vivado.tcl
grep "^| PipelinedMultiplier" vivado.mfc.rpt vivado.sfc.rpt

Vivado synth script (you probably don't have to use that specific AWS F1 Xilinx part -- probably almost any part will work):

# MFC
create_project -in_memory -part xcvu9p-flgb2104-2-i
read_verilog -sv output.mfc.sv
synth_design -top PipelinedMultiplier
report_utilization -hierarchical -file vivado.mfc.rpt
close_project

# SFC
create_project -in_memory -part xcvu9p-flgb2104-2-i
read_verilog output.sfc.v
synth_design -top PipelinedMultiplier
report_utilization -hierarchical -file vivado.sfc.rpt
close_project

Resulting Verilog for Reference

MFC Output

// Generated by CIRCT firtool-1.27.0
// Standard header to adapt well known macros to our needs.
`ifndef RANDOMIZE
  `ifdef RANDOMIZE_REG_INIT
    `define RANDOMIZE
  `endif // RANDOMIZE_REG_INIT
`endif // not def RANDOMIZE

// RANDOM may be set to an expression that produces a 32-bit random unsigned value.
`ifndef RANDOM
  `define RANDOM $random
`endif // not def RANDOM

// Users can define INIT_RANDOM as general code that gets injected into the
// initializer block for modules with registers.
`ifndef INIT_RANDOM
  `define INIT_RANDOM
`endif // not def INIT_RANDOM

// If using random initialization, you can also define RANDOMIZE_DELAY to
// customize the delay used, otherwise 0.002 is used.
`ifndef RANDOMIZE_DELAY
  `define RANDOMIZE_DELAY 0.002
`endif // not def RANDOMIZE_DELAY

// Define INIT_RANDOM_PROLOG_ for use in our modules below.
`ifndef INIT_RANDOM_PROLOG_
  `ifdef RANDOMIZE
    `ifdef VERILATOR
      `define INIT_RANDOM_PROLOG_ `INIT_RANDOM
    `else  // VERILATOR
      `define INIT_RANDOM_PROLOG_ `INIT_RANDOM #`RANDOMIZE_DELAY begin end
    `endif // VERILATOR
  `else  // RANDOMIZE
    `define INIT_RANDOM_PROLOG_
  `endif // RANDOMIZE
`endif // not def INIT_RANDOM_PROLOG_

module PipelinedMultiplier(
  input         clock,
                reset,
                io_req_valid,
  input  [3:0]  io_req_bits_fn,
  input         io_req_bits_dw,
  input  [63:0] io_req_bits_in1,
                io_req_bits_in2,
  input  [4:0]  io_req_bits_tag,
  output        io_resp_valid,
  output [63:0] io_resp_bits_data,
  output [4:0]  io_resp_bits_tag);

  reg        inPipe_valid;
  reg [3:0]  inPipe_bits_fn;
  reg        inPipe_bits_dw;
  reg [63:0] inPipe_bits_in1;
  reg [63:0] inPipe_bits_in2;
  reg [4:0]  inPipe_bits_tag;
  reg        respPipe_valid;
  reg [4:0]  respPipe_bits_tag;
  reg [63:0] io_resp_bits_data_b;
  always @(posedge clock) begin
    if (reset) begin
      inPipe_valid <= 1'h0;
      respPipe_valid <= 1'h0;
    end
    else begin
      inPipe_valid <= io_req_valid;
      respPipe_valid <= inPipe_valid;
    end
    if (io_req_valid) begin
      inPipe_bits_fn <= io_req_bits_fn;
      inPipe_bits_dw <= io_req_bits_dw;
      inPipe_bits_in1 <= io_req_bits_in1;
      inPipe_bits_in2 <= io_req_bits_in2;
      inPipe_bits_tag <= io_req_bits_tag;
    end
    if (inPipe_valid) begin
      automatic logic [1:0]   d;
      automatic logic [1:0]   _e_T_2;
      automatic logic [127:0] _GEN;
      d = ~(inPipe_bits_fn[1:0]);
      _e_T_2 = {d[0], inPipe_bits_fn[1]};
      _GEN =
        {{64{(|{d[1], &_e_T_2}) & inPipe_bits_in1[63]}}, inPipe_bits_in1}
        * {{64{d[1] & inPipe_bits_in2[63]}}, inPipe_bits_in2};
      respPipe_bits_tag <= inPipe_bits_tag;
      if (|{inPipe_bits_fn[0], &_e_T_2})
        io_resp_bits_data_b <= _GEN[127:64];
      else if (inPipe_bits_dw)
        io_resp_bits_data_b <= _GEN[63:0];
      else
        io_resp_bits_data_b <= {{32{_GEN[31]}}, _GEN[31:0]};
    end
  end // always @(posedge)
  `ifndef SYNTHESIS
    `ifdef FIRRTL_BEFORE_INITIAL
      `FIRRTL_BEFORE_INITIAL
    `endif // FIRRTL_BEFORE_INITIAL
    initial begin
      automatic logic [31:0] _RANDOM_0;
      automatic logic [31:0] _RANDOM_1;
      automatic logic [31:0] _RANDOM_2;
      automatic logic [31:0] _RANDOM_3;
      automatic logic [31:0] _RANDOM_4;
      automatic logic [31:0] _RANDOM_5;
      automatic logic [31:0] _RANDOM_6;
      automatic logic [31:0] _RANDOM_7;
      automatic logic [31:0] _RANDOM_8;
      automatic logic [31:0] _RANDOM_9;
      automatic logic [31:0] _RANDOM_10;
      `ifdef INIT_RANDOM_PROLOG_
        `INIT_RANDOM_PROLOG_
      `endif // INIT_RANDOM_PROLOG_
      `ifdef RANDOMIZE_REG_INIT
        _RANDOM_0 = `RANDOM;
        _RANDOM_1 = `RANDOM;
        _RANDOM_2 = `RANDOM;
        _RANDOM_3 = `RANDOM;
        _RANDOM_4 = `RANDOM;
        _RANDOM_5 = `RANDOM;
        _RANDOM_6 = `RANDOM;
        _RANDOM_7 = `RANDOM;
        _RANDOM_8 = `RANDOM;
        _RANDOM_9 = `RANDOM;
        _RANDOM_10 = `RANDOM;
        inPipe_valid = _RANDOM_0[0];
        inPipe_bits_fn = _RANDOM_0[4:1];
        inPipe_bits_dw = _RANDOM_0[5];
        inPipe_bits_in1 = {_RANDOM_0[31:6], _RANDOM_1, _RANDOM_2[5:0]};
        inPipe_bits_in2 = {_RANDOM_2[31:6], _RANDOM_3, _RANDOM_4[5:0]};
        inPipe_bits_tag = _RANDOM_4[10:6];
        respPipe_valid = _RANDOM_4[11];
        respPipe_bits_tag = _RANDOM_8[21:17];
        io_resp_bits_data_b = {_RANDOM_8[31:23], _RANDOM_9, _RANDOM_10[22:0]};
      `endif // RANDOMIZE_REG_INIT
    end // initial
    `ifdef FIRRTL_AFTER_INITIAL
      `FIRRTL_AFTER_INITIAL
    `endif // FIRRTL_AFTER_INITIAL
  `endif // not def SYNTHESIS
  assign io_resp_valid = respPipe_valid;
  assign io_resp_bits_data = io_resp_bits_data_b;
  assign io_resp_bits_tag = respPipe_bits_tag;
endmodule

SFC Output

module PipelinedMultiplier(
  input         clock,
  input         reset,
  input         io_req_valid,
  input  [3:0]  io_req_bits_fn,
  input         io_req_bits_dw,
  input  [63:0] io_req_bits_in1,
  input  [63:0] io_req_bits_in2,
  input  [4:0]  io_req_bits_tag,
  output        io_resp_valid,
  output [63:0] io_resp_bits_data,
  output [4:0]  io_resp_bits_tag
);
`ifdef RANDOMIZE_REG_INIT
  reg [31:0] _RAND_0;
  reg [31:0] _RAND_1;
  reg [31:0] _RAND_2;
  reg [63:0] _RAND_3;
  reg [63:0] _RAND_4;
  reg [31:0] _RAND_5;
  reg [31:0] _RAND_6;
  reg [31:0] _RAND_7;
  reg [63:0] _RAND_8;
`endif // RANDOMIZE_REG_INIT
  reg  inPipe_valid;
  reg [3:0] inPipe_bits_fn;
  reg  inPipe_bits_dw;
  reg [63:0] inPipe_bits_in1;
  reg [63:0] inPipe_bits_in2;
  reg [4:0] inPipe_bits_tag;
  wire [1:0] c = inPipe_bits_fn[1:0];
  wire [1:0] d = ~c;
  wire  a_0 = d[1];
  wire  a_0_1 = c[0];
  wire  a_0_2 = d[0];
  wire  a_1 = c[1];
  wire [1:0] _e_T_2 = {a_0_2,a_1};
  wire [1:0] _b_T_1 = {&a_0,&_e_T_2};
  wire [1:0] _b_T_3 = {&a_0_1,&_e_T_2};
  wire [2:0] b = {|_b_T_3,|_b_T_1,|(&a_0)};
  wire [2:0] f = {b[2],b[1],b[0]};
  wire  cmdHi = f[2];
  wire  lhsSigned = f[1];
  wire  rhsSigned = f[0];
  wire  cmdHalf = ~inPipe_bits_dw;
  wire  _lhs_T_1 = lhsSigned & inPipe_bits_in1[63];
  wire [64:0] lhs = {_lhs_T_1,inPipe_bits_in1};
  wire  _rhs_T_1 = rhsSigned & inPipe_bits_in2[63];
  wire [64:0] rhs = {_rhs_T_1,inPipe_bits_in2};
  wire [129:0] prod = $signed(lhs) * $signed(rhs);
  wire [31:0] _muxed_T_4 = prod[31] ? 32'hffffffff : 32'h0;
  wire [63:0] _muxed_T_5 = {_muxed_T_4,prod[31:0]};
  reg  respPipe_valid;
  reg [4:0] respPipe_bits_tag;
  reg [63:0] io_resp_bits_data_b;
  assign io_resp_valid = respPipe_valid;
  assign io_resp_bits_data = io_resp_bits_data_b;
  assign io_resp_bits_tag = respPipe_bits_tag;
  always @(posedge clock) begin
    if (reset) begin
      inPipe_valid <= 1'h0;
    end else begin
      inPipe_valid <= io_req_valid;
    end
    if (io_req_valid) begin
      inPipe_bits_fn <= io_req_bits_fn;
    end
    if (io_req_valid) begin
      inPipe_bits_dw <= io_req_bits_dw;
    end
    if (io_req_valid) begin
      inPipe_bits_in1 <= io_req_bits_in1;
    end
    if (io_req_valid) begin
      inPipe_bits_in2 <= io_req_bits_in2;
    end
    if (io_req_valid) begin
      inPipe_bits_tag <= io_req_bits_tag;
    end
    if (reset) begin
      respPipe_valid <= 1'h0;
    end else begin
      respPipe_valid <= inPipe_valid;
    end
    if (inPipe_valid) begin
      respPipe_bits_tag <= inPipe_bits_tag;
    end
    if (inPipe_valid) begin
      if (cmdHi) begin
        io_resp_bits_data_b <= prod[127:64];
      end else if (cmdHalf) begin
        io_resp_bits_data_b <= _muxed_T_5;
      end else begin
        io_resp_bits_data_b <= prod[63:0];
      end
    end
  end
// Register and memory initialization
`ifdef RANDOMIZE_GARBAGE_ASSIGN
`define RANDOMIZE
`endif
`ifdef RANDOMIZE_INVALID_ASSIGN
`define RANDOMIZE
`endif
`ifdef RANDOMIZE_REG_INIT
`define RANDOMIZE
`endif
`ifdef RANDOMIZE_MEM_INIT
`define RANDOMIZE
`endif
`ifndef RANDOM
`define RANDOM $random
`endif
`ifdef RANDOMIZE_MEM_INIT
  integer initvar;
`endif
`ifndef SYNTHESIS
`ifdef FIRRTL_BEFORE_INITIAL
`FIRRTL_BEFORE_INITIAL
`endif
initial begin
  `ifdef RANDOMIZE
    `ifdef INIT_RANDOM
      `INIT_RANDOM
    `endif
    `ifndef VERILATOR
      `ifdef RANDOMIZE_DELAY
        #`RANDOMIZE_DELAY begin end
      `else
        #0.002 begin end
      `endif
    `endif
`ifdef RANDOMIZE_REG_INIT
  _RAND_0 = {1{`RANDOM}};
  inPipe_valid = _RAND_0[0:0];
  _RAND_1 = {1{`RANDOM}};
  inPipe_bits_fn = _RAND_1[3:0];
  _RAND_2 = {1{`RANDOM}};
  inPipe_bits_dw = _RAND_2[0:0];
  _RAND_3 = {2{`RANDOM}};
  inPipe_bits_in1 = _RAND_3[63:0];
  _RAND_4 = {2{`RANDOM}};
  inPipe_bits_in2 = _RAND_4[63:0];
  _RAND_5 = {1{`RANDOM}};
  inPipe_bits_tag = _RAND_5[4:0];
  _RAND_6 = {1{`RANDOM}};
  respPipe_valid = _RAND_6[0:0];
  _RAND_7 = {1{`RANDOM}};
  respPipe_bits_tag = _RAND_7[4:0];
  _RAND_8 = {2{`RANDOM}};
  io_resp_bits_data_b = _RAND_8[63:0];
`endif // RANDOMIZE_REG_INIT
  `endif // RANDOMIZE
end // initial
`ifdef FIRRTL_AFTER_INITIAL
`FIRRTL_AFTER_INITIAL
`endif
`endif // SYNTHESIS
endmodule

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Metadata

Assignees

No one assigned

    Labels

    ExportVerilogFIRRTLInvolving the `firrtl` dialectHWInvolving the `hw` dialect

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions