5656 * enabling both the write path and the read path to be active simultaneously.
5757****************************************************************************/
5858
59- // A memory bank in the inner_shuffle design. Pattern was kept as simple
60- // as possible to help with Vivado BRAM inference.
61- module mem_bank # (
62- int unsigned WIDTH ,
63- int unsigned DEPTH ,
64- parameter RAM_STYLE = " auto"
65- )(
66- input logic clk,
67-
68- input logic [WIDTH - 1 : 0 ] d_in,
69- input logic [$clog2(DEPTH )- 1 : 0 ] wr_addr,
70- input logic wr_en,
71-
72- output logic [WIDTH - 1 : 0 ] d_out,
73- input logic [$clog2(DEPTH )- 1 : 0 ] rd_addr,
74- input logic rd_en
75- );
76-
77- (* ram_style= RAM_STYLE * )
78- logic [WIDTH - 1 : 0 ] Mem [DEPTH - 1 : 0 ]; // The Mem for this bank
79- always_ff @ (posedge clk) begin
80- if (wr_en) Mem[wr_addr] <= d_in;
81- if (rd_en) d_out <= Mem[rd_addr];
82- end
83-
84- endmodule : mem_bank
85-
86-
8759// ----------------------------------------
8860// Parallel Transpose Unit (InnerShuffle)
8961// ----------------------------------------
@@ -124,23 +96,37 @@ module inner_shuffle #(
12496 localparam int unsigned PAGE_OFFSET = I * J / SIMD ;
12597 uwire wr_en = irdy && ivld;
12698 uwire [$clog2 (BANK_DEPTH )- 1 : 0 ] wr_addr;
127- uwire rd_en;
12899 uwire [BITS - 1 : 0 ] d_in [SIMD - 1 : 0 ];
129100 uwire [BITS - 1 : 0 ] d_out [SIMD - 1 : 0 ];
101+ uwire rd_req_en;
102+ uwire [SIMD - 1 : 0 ] rd_req_rdy;
103+ uwire rd_req_rdy_all = & rd_req_rdy;
130104 logic [$clog2 (BANK_DEPTH )- 1 : 0 ] raddr [SIMD - 1 : 0 ];
105+ uwire rd_dat_vld [SIMD - 1 : 0 ];
106+ uwire rd_dat_rdy;
107+ assign rd_dat_rdy = osb_rdy && rd_pattern_sb_ovld;
108+ uwire rd_pattern_sb_irdy;
109+ uwire rd_pattern_sb_ovld;
131110 for (genvar i = 0 ; i< SIMD ; i++ ) begin : gen_mem_banks
132- mem_bank # (
111+ elasticmem # (
133112 .WIDTH (BITS ),
134113 .DEPTH (BANK_DEPTH ),
135114 .RAM_STYLE (RAM_STYLE )
136- ) mem_bank_inst (
137- .clk,
138- .wr_en,
139- .wr_addr,
140- .d_in (d_in[i]),
141- .rd_en,
115+ ) mem_banks_inst (
116+ .clk (clk),
117+ .rst (rst),
118+
119+ .wr_data (d_in[i]),
120+ .wr_addr (wr_addr),
121+ .wr_en (wr_en),
122+
142123 .rd_addr (raddr[i]),
143- .d_out (d_out[i])
124+ .rd_req_vld (rd_req_en),
125+ .rd_req_rdy (rd_req_rdy[i]),
126+
127+ .rd_dat (d_out[i]),
128+ .rd_dat_vld (rd_dat_vld[i]),
129+ .rd_dat_rdy (rd_dat_rdy)
144130 );
145131 end : gen_mem_banks
146132
@@ -221,6 +207,7 @@ module inner_shuffle #(
221207
222208 localparam int unsigned RD_ROT_PERIOD = I / SIMD ; // (I % SIMD == 0) is a constraint
223209 typedef logic [$clog2(SIMD )- 1 : 0 ] rotidx_vec_t [SIMD - 1 : 0 ];
210+ typedef logic [SIMD * $clog2(SIMD )- 1 : 0 ] packed_rotidx_vec_t ;
224211 typedef logic [$clog2(BANK_DEPTH )- 1 : 0 ] bank_addr_t [SIMD - 1 : 0 ];
225212
226213 // --------------------------------------------------------------------------
@@ -279,7 +266,7 @@ module inner_shuffle #(
279266 uwire [$clog2 (BANK_DEPTH )- 1 : 0 ] page_rd_offset;
280267 uwire osb_rdy; // output skid buffer ready signal
281268 uwire rd_guard = ! CurrentPageRd && ! WrJobsDone[0 ] && ! WrJobsDone[1 ];
282- uwire rd_inc = osb_rdy & ! rd_guard ;
269+ uwire rd_inc = rd_req_en && rd_req_rdy_all ;
283270
284271 // Counts reads across the columns
285272 logic [$clog2 (I )- 1 : 0 ] RdICnt = 0 ; // 0, ..., I - 1
@@ -349,9 +336,7 @@ module inner_shuffle #(
349336 // --------------------------------------------------------------------------
350337 // Page management
351338
352- logic OsbVld = 0 ; // output skidbuffer valid signal
353- logic OsbVld_D = 0 ; // output skidbuffer valid signal
354- assign rd_en = osb_rdy;
339+ assign rd_req_en = ! rd_guard && rd_pattern_sb_irdy; // We can read once the guard is not up
355340
356341 always_ff @ (posedge clk) begin
357342 if (rst) begin
@@ -363,37 +348,26 @@ module inner_shuffle #(
363348 if (wr_addr == 2 * PAGE_OFFSET - 1 ) WrJobsDone[1 ] <= 1 ;
364349
365350 // Clear the relevant job once it is read
366- if (page_boundary && (osb_rdy && OsbVld )) begin
351+ if (page_boundary && (rd_req_en && rd_req_rdy_all )) begin
367352 WrJobsDone[CurrentPageRd] <= 0 ;
368353 CurrentPageRd <= ! CurrentPageRd;
369354 end
370355 end
371356 end
372357
373- assign page_rd_offset = CurrentPageRd ? 0 : PAGE_OFFSET ;
358+ assign page_rd_offset = CurrentPageRd ? 0 : PAGE_OFFSET ;
374359 assign irdy = ! WrJobsDone[0 ] || ! WrJobsDone[1 ];
375360
376361 // Forward the current RD_PATTERN row onto the next pipeline stage
377362 rotidx_vec_t RdPat = RD_INIT_PAT ;
378- rotidx_vec_t RdPat_D = RD_INIT_PAT ; // The fowarded rotation pattern
379- always_ff @ (posedge clk) begin : rd_pattern_col_forwarding
380- if (rst) begin
381- OsbVld <= 0 ;
382- RdPat_D <= RD_INIT_PAT ;
383- end
384- else begin
385- OsbVld <= ! rd_guard;
386- OsbVld_D <= OsbVld;
387- if (rd_inc) RdPat_D <= RdPat;
388- if (OsbVld & rd_guard & ! osb_rdy) OsbVld <= 1 ;
389- end
390- end : rd_pattern_col_forwarding
363+ packed_rotidx_vec_t rd_pat_forwarded_packed;
364+ rotidx_vec_t rd_pat_forwarded;
391365
392366 // Structural remapping using the output of the memory banks
393367 // and the Read rotation from the previous clock cycle that was
394368 // used to generate the read addresses.
395369 uwire [SIMD - 1 : 0 ][BITS - 1 : 0 ] remapped_data; // remapped output
396- for (genvar i= 0 ; i< SIMD ; i++ ) assign remapped_data[i] = d_out[RdPat_D [i]];
370+ for (genvar i= 0 ; i< SIMD ; i++ ) assign remapped_data[i] = d_out[rd_pat_forwarded [i]];
397371
398372 // the next permutation of the rd pattern
399373 rotidx_vec_t rd_pat_next;
@@ -433,11 +407,32 @@ module inner_shuffle #(
433407 end
434408 end
435409 end : rd_pattern_assignment
410+
411+
412+ // =======================================================================
413+ skid # (
414+ .DATA_WIDTH ($bits (packed_rotidx_vec_t))
415+ )
416+ rd_pattern_skid (
417+ .clk (clk),
418+ .rst (rst),
419+
420+ .idat (packed_rotidx_vec_t ' (RdPat)),
421+ .ivld (rd_req_en),
422+ .irdy (rd_pattern_sb_irdy),
423+
424+ .odat (rd_pat_forwarded_packed),
425+ .ovld (rd_pattern_sb_ovld),
426+ .ordy (osb_rdy && rd_dat_vld[0 ])
427+ );
428+
429+ assign rd_pat_forwarded = rotidx_vec_t ' (rd_pat_forwarded_packed);
436430 // --------------------------------------------------------------------------
437431
438432 // =======================================================================
439433 // Output SkidBuffer -- Used to decouple control signals for timing
440434 // improvements
435+ uwire osb_vld = rd_pattern_sb_ovld && rd_dat_vld[0 ];
441436 skid # (
442437 .DATA_WIDTH (SIMD * BITS )
443438 )
@@ -446,7 +441,7 @@ module inner_shuffle #(
446441 .rst (rst),
447442
448443 .idat (remapped_data),
449- .ivld (OsbVld ),
444+ .ivld (osb_vld ),
450445 .irdy (osb_rdy),
451446
452447 .odat (odat),
0 commit comments