|
| 1 | +/**************************************************************************** |
| 2 | + * Copyright Advanced Micro Devices, Inc. |
| 3 | + * SPDX-License-Identifier: BSD-3-Clause |
| 4 | + * |
| 5 | + * @brief Two-input elementwise stream operation (generalized). |
| 6 | + * Supports float/float, int/float, float/int, and int/int paths. |
| 7 | + * @author Thomas B. Preußer <thomas.preusser@amd.com> |
| 8 | + * @author Shane Fleming <shane.fleming@amd.com> |
| 9 | + ***************************************************************************/ |
| 10 | + |
| 11 | +module eltwise #( |
| 12 | + parameter OP, // ADD(a+b), SUB(a-b), SBR(b-a), MUL(a*b) |
| 13 | + int unsigned PE = 1, |
| 14 | + shortreal B_SCALE = 1.0, |
| 15 | + bit FORCE_BEHAVIORAL = 0, |
| 16 | + |
| 17 | + // Type selection: 1 = float32, 0 = integer |
| 18 | + bit A_FLOAT = 1, |
| 19 | + bit B_FLOAT = 1, |
| 20 | + |
| 21 | + // Integer parameters (ignored when corresponding input is float) |
| 22 | + int unsigned A_WIDTH = 32, |
| 23 | + bit A_SIGNED = 0, |
| 24 | + int unsigned B_WIDTH = 32, |
| 25 | + bit B_SIGNED = 0, |
| 26 | + |
| 27 | + // Port-width derivations (do not override) |
| 28 | + localparam int unsigned A_DAT_W = A_FLOAT? 32 : A_WIDTH, |
| 29 | + localparam int unsigned B_DAT_W = B_FLOAT? 32 : B_WIDTH, |
| 30 | + localparam bit BOTH_INT = !A_FLOAT && !B_FLOAT, |
| 31 | + localparam bit IS_MUL = (OP == "MUL"), |
| 32 | + localparam int unsigned INT_WIDTH = BOTH_INT? A_WIDTH : 0, |
| 33 | + localparam int unsigned O_WIDTH = |
| 34 | + BOTH_INT? (IS_MUL? 2*INT_WIDTH : INT_WIDTH + 1) : 32 |
| 35 | +)( |
| 36 | + input logic clk, |
| 37 | + input logic rst, |
| 38 | + |
| 39 | + input logic [PE-1:0][A_DAT_W-1:0] adat, |
| 40 | + input logic avld, |
| 41 | + output logic ardy, |
| 42 | + input logic [PE-1:0][B_DAT_W-1:0] bdat, |
| 43 | + input logic bvld, |
| 44 | + output logic brdy, |
| 45 | + |
| 46 | + output logic [PE-1:0][O_WIDTH-1:0] odat, |
| 47 | + output logic ovld, |
| 48 | + input logic ordy |
| 49 | +); |
| 50 | + |
| 51 | + //=== Derived Parameters =============================================== |
| 52 | + localparam bit BOTH_FLOAT = A_FLOAT && B_FLOAT; |
| 53 | + localparam bit HAVE_SCALE = (B_SCALE != 1.0); |
| 54 | + localparam int unsigned BINOPF_LATENCY = HAVE_SCALE? 4 : 2 + IS_MUL; |
| 55 | + localparam int unsigned BINOPI_LATENCY = IS_MUL? 3 : 1; |
| 56 | + localparam int unsigned CONV_LATENCY = (A_FLOAT ^ B_FLOAT)? 1 : 0; |
| 57 | + localparam int unsigned LATENCY = BOTH_INT? BINOPI_LATENCY |
| 58 | + : (BINOPF_LATENCY + CONV_LATENCY); |
| 59 | + |
| 60 | + localparam int unsigned CREDIT = LATENCY + 3; |
| 61 | + |
| 62 | + //=== Parameter Validation ============================================= |
| 63 | + initial begin |
| 64 | + if(BOTH_INT && B_SCALE != 1.0) begin |
| 65 | + $error("%m: B_SCALE=%f not supported for integer-integer path", B_SCALE); |
| 66 | + $finish; |
| 67 | + end |
| 68 | + if(BOTH_INT && A_SIGNED != B_SIGNED) begin |
| 69 | + $error("%m: A_SIGNED must match B_SIGNED for integer-integer path"); |
| 70 | + $finish; |
| 71 | + end |
| 72 | + if(BOTH_INT && A_WIDTH != B_WIDTH) begin |
| 73 | + $error("%m: A_WIDTH must match B_WIDTH for integer-integer path"); |
| 74 | + $finish; |
| 75 | + end |
| 76 | + end |
| 77 | + |
| 78 | + //=== Input Sidestep Registers ========================================= |
| 79 | + uwire take; |
| 80 | + |
| 81 | + typedef logic [PE-1:0][A_DAT_W-1:0] a_vec_t; |
| 82 | + typedef logic [PE-1:0][B_DAT_W-1:0] b_vec_t; |
| 83 | + typedef logic [PE-1:0][O_WIDTH-1:0] o_vec_t; |
| 84 | + |
| 85 | + typedef struct { |
| 86 | + a_vec_t val; |
| 87 | + logic rdy; |
| 88 | + } abuf_t; |
| 89 | + typedef struct { |
| 90 | + b_vec_t val; |
| 91 | + logic rdy; |
| 92 | + } bbuf_t; |
| 93 | + abuf_t A = '{ val: 'x, rdy: '1 }; |
| 94 | + bbuf_t B = '{ val: 'x, rdy: '1 }; |
| 95 | + always_ff @(posedge clk) begin |
| 96 | + if(rst) begin |
| 97 | + A <= '{ val: 'x, rdy: '1 }; |
| 98 | + B <= '{ val: 'x, rdy: '1 }; |
| 99 | + end |
| 100 | + else begin |
| 101 | + if(A.rdy) A.val <= adat; |
| 102 | + A.rdy <= (A.rdy && !avld) || take; |
| 103 | + if(B.rdy) B.val <= bdat; |
| 104 | + B.rdy <= (B.rdy && !bvld) || take; |
| 105 | + end |
| 106 | + end |
| 107 | + assign ardy = A.rdy; |
| 108 | + assign brdy = B.rdy; |
| 109 | + uwire a_vec_t a = A.rdy? adat : A.val; |
| 110 | + uwire b_vec_t b = B.rdy? bdat : B.val; |
| 111 | + |
| 112 | + //=== Credit-based Operation Issue ===================================== |
| 113 | + logic signed [$clog2(CREDIT):0] Credit = -CREDIT; |
| 114 | + uwire give = ovld && ordy; |
| 115 | + assign take = (avld || !ardy) && (bvld || !brdy) && Credit[$left(Credit)]; |
| 116 | + always_ff @(posedge clk) begin |
| 117 | + if(rst) Credit <= -CREDIT; |
| 118 | + else Credit <= Credit + ((give == take)? 0 : give? -1 : 1); |
| 119 | + end |
| 120 | + |
| 121 | + //=== Converter Valid Alignment ======================================= |
| 122 | + logic Take = 1'b0; |
| 123 | + always_ff @(posedge clk) Take <= rst? 1'b0 : take; |
| 124 | + |
| 125 | + //=== Free-running Compute Pipeline ==================================== |
| 126 | + uwire o_vec_t r; |
| 127 | + uwire [PE-1:0] rvld_vec; |
| 128 | + uwire rvld; |
| 129 | + |
| 130 | + for(genvar i = 0; i < PE; i++) begin : genPE |
| 131 | + |
| 132 | + if(BOTH_FLOAT) begin : genFF |
| 133 | + binopf #(.OP(OP), .B_SCALE(B_SCALE), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core ( |
| 134 | + .clk, .rst, |
| 135 | + .a(a[i]), .avld(take), |
| 136 | + .b(b[i]), .bload('1), |
| 137 | + .r(r[i]), .rvld(rvld_vec[i]) |
| 138 | + ); |
| 139 | + end : genFF |
| 140 | + |
| 141 | + else if(!A_FLOAT && B_FLOAT) begin : genIF |
| 142 | + uwire [31:0] a_fp; |
| 143 | + int_to_fp32 #(.WIDTH(A_WIDTH), .SIGNED(A_SIGNED)) conv ( |
| 144 | + .ival(a[i]), .fval(a_fp) |
| 145 | + ); |
| 146 | + logic [31:0] AFp = '0; |
| 147 | + logic [31:0] Bd = '0; |
| 148 | + always_ff @(posedge clk) begin |
| 149 | + if(rst) begin |
| 150 | + AFp <= '0; Bd <= '0; |
| 151 | + end |
| 152 | + else begin |
| 153 | + AFp <= a_fp; |
| 154 | + Bd <= b[i]; |
| 155 | + end |
| 156 | + end |
| 157 | + binopf #(.OP(OP), .B_SCALE(B_SCALE), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core ( |
| 158 | + .clk, .rst, |
| 159 | + .a(AFp), .avld(Take), |
| 160 | + .b(Bd), .bload('1), |
| 161 | + .r(r[i]), .rvld(rvld_vec[i]) |
| 162 | + ); |
| 163 | + end : genIF |
| 164 | + |
| 165 | + else if(A_FLOAT && !B_FLOAT) begin : genFI |
| 166 | + uwire [31:0] b_fp; |
| 167 | + int_to_fp32 #(.WIDTH(B_WIDTH), .SIGNED(B_SIGNED)) conv ( |
| 168 | + .ival(b[i]), .fval(b_fp) |
| 169 | + ); |
| 170 | + logic [31:0] BFp = '0; |
| 171 | + logic [31:0] Ad = '0; |
| 172 | + always_ff @(posedge clk) begin |
| 173 | + if(rst) begin |
| 174 | + BFp <= '0; Ad <= '0; |
| 175 | + end |
| 176 | + else begin |
| 177 | + BFp <= b_fp; |
| 178 | + Ad <= a[i]; |
| 179 | + end |
| 180 | + end |
| 181 | + binopf #(.OP(OP), .B_SCALE(B_SCALE), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core ( |
| 182 | + .clk, .rst, |
| 183 | + .a(Ad), .avld(Take), |
| 184 | + .b(BFp), .bload('1), |
| 185 | + .r(r[i]), .rvld(rvld_vec[i]) |
| 186 | + ); |
| 187 | + end : genFI |
| 188 | + |
| 189 | + else begin : genII |
| 190 | + binopi #(.OP(OP), .WIDTH(INT_WIDTH), .SIGNED(A_SIGNED)) core ( |
| 191 | + .clk, .rst, |
| 192 | + .a(a[i]), .avld(take), |
| 193 | + .b(b[i]), .bload('1), |
| 194 | + .r(r[i]), .rvld(rvld_vec[i]) |
| 195 | + ); |
| 196 | + end : genII |
| 197 | + |
| 198 | + end : genPE |
| 199 | + |
| 200 | + // All PE results should be valid simultaneously |
| 201 | + assign rvld = rvld_vec[0]; |
| 202 | + always_ff @(posedge clk) begin |
| 203 | + assert(rvld_vec == {(PE){rvld}}) else begin |
| 204 | + $error("%m: Inconsistent output valid indications."); |
| 205 | + $stop; |
| 206 | + end |
| 207 | + end |
| 208 | + |
| 209 | + //=== Credit-backing Elastic Output Queue ============================== |
| 210 | + uwire rrdy; |
| 211 | + queue #(.DATA_WIDTH($bits(o_vec_t)), .ELASTICITY(CREDIT)) obuf ( |
| 212 | + .clk, .rst, |
| 213 | + .idat(r), .ivld(rvld), .irdy(rrdy), |
| 214 | + .odat, .ovld, .ordy |
| 215 | + ); |
| 216 | + always_ff @(posedge clk) begin |
| 217 | + assert(rrdy || !rvld) else begin |
| 218 | + $error("%m: Result queue overrun."); |
| 219 | + $stop; |
| 220 | + end |
| 221 | + end |
| 222 | + |
| 223 | +endmodule : eltwise |
0 commit comments