3232 * @author Thomas B. Preußer <thomas.preusser@amd.com>
3333 *
3434 * @description
35- * Produces the N-bit count of those among 2^N-1 thresholds that are not
35+ * Produces the count of those among N thresholds that are not
3636 * larger than the corresponding input:
3737 * y = Σ(T_i <= x)
3838 * The result is computed by binary search. The runtime-configurable
39- * thresholds must be written in ascending order:
39+ * thresholds must be sorted in ascending order:
4040 * i < j => T_i < T_j
4141 * The design supports channel folding allowing each input to be processed
4242 * with respect to a selectable set of thresholds. The corresponding
4343 * threshold configuration relies on a channel address prefix. Inputs are
4444 * accompanied by a channel selector.
4545 *
4646 * Parameter Layout as seen on AXI-Lite (row by row):
47- * | Base \ Offs | 0 1 2 ... 2^N-2 2^N-1
48- * ---------+--------------------------------+ ------------------------------------
49- * Chnl #0 | 0 | T_0 T_1 T_2 ... T_{2^N-2 } 'x
50- * Chnl #1 | 2^N | T_0 T_1 T_2 ... T_{2^N-2 } 'x
51- * Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*2^N | T_0 T_1 T_2 ... T_{2^N-2 } 'x
47+ * | Base \ Offs | 0 1 2 ... N-1 ...
48+ * ---------+----------------------------------------+ ---------------------------------
49+ * Chnl #0 | 0 | T_0 T_1 T_2 ... T_{N-1 } 'x
50+ * Chnl #1 | 2^$clog2(N) | T_0 T_1 T_2 ... T_{N-1 } 'x
51+ * Chnl #c | ((c/PE)*$clog2(PE) + c%PE)*2^$clog2(N) | T_0 T_1 T_2 ... T_{N-1 } 'x
5252 *
5353 *****************************************************************************/
5454module thresholding # (
55- int unsigned N , // output precision
5655 int unsigned K , // input/threshold precision
56+ int unsigned N , // number of thresholds
5757 int unsigned C , // number of channels
5858 int unsigned PE , // parallel processing elements
5959
6060 bit SIGNED = 1 , // signed inputs
6161 bit FPARG = 0 , // floating-point inputs: [sign] | exponent | mantissa
62- int BIAS = 0 , // offsetting the output [0, 2^N-1 ] -> [BIAS, 2^N-1 + BIAS]
62+ int BIAS = 0 , // offsetting the output [0, N ] -> [BIAS, N+ BIAS]
6363
6464 // Initial Thresholds
6565 parameter THRESHOLDS_PATH = " " ,
@@ -72,8 +72,8 @@ module thresholding #(
7272
7373 localparam int unsigned CF = C/ PE , // Channel fold
7474 localparam int unsigned O_BITS = BIAS >= 0 ?
75- /* unsigned */ $clog2(2 ** N + BIAS ) :
76- /* signed */ 1 + $clog2(- BIAS >= 2 ** ( N - 1 ) ? - BIAS : 2 ** N + BIAS )
75+ /* unsigned */ $clog2(N + BIAS + 1 ) :
76+ /* signed */ 1 + $clog2(- BIAS >= N + BIAS + 1 ? - BIAS : N + BIAS + 1 )
7777)(
7878 // Global Control
7979 input logic clk,
@@ -82,7 +82,7 @@ module thresholding #(
8282 // Threshold Configuration
8383 input logic cfg_en,
8484 input logic cfg_we,
85- input logic [$clog2(CF )+ $clog2(PE )+ N - 1 : 0 ] cfg_a,
85+ input logic [$clog2(CF )+ $clog2(PE )+ $clog2( N ) - 1 : 0 ] cfg_a,
8686 input logic [K - 1 : 0 ] cfg_d,
8787 output logic cfg_rack,
8888 output logic [K - 1 : 0 ] cfg_q,
@@ -115,22 +115,25 @@ module thresholding #(
115115 CFG = 2'b1x // Config op (pointer-preserving)
116116 } op_e ;
117117
118+ // -----------------------------------------------------------------------
119+ // Pipeline Feed
120+ // - M := $clog2(N+1) pipeline stages
121+ // - configuration always takes precedence
122+ // - number of pending thresholding ops capped to M+3
123+ // across pipeline and output FIFO: pipe:M + A:1 + B:1 + 1
124+ localparam int unsigned M = $clog2 (N + 1 );
125+ localparam int unsigned MAX_PENDING = (DEEP_PIPELINE + 1 )* M + 3 ;
126+
118127 // Pipeline Link Type
119- typedef logic [$clog2(CF )+ N - 1 : 0 ] ptr_t ;
128+ typedef logic [$clog2(CF )+ M - 1 : 0 ] ptr_t ;
120129 typedef logic [K - 1 : 0 ] val_t ;
121130 typedef struct packed {
122131 op_e op;
123132 ptr_t ptr; // WR/RB: address; TH: result
124133 val_t val; // WR/RB: threshold value; TH: input value
125134 } pipe_t ;
126135
127- // -----------------------------------------------------------------------
128- // Pipeline Feed
129- // - configuration always takes precedence
130- // - number of pending thresholding ops capped to N+3
131- // across pipeline and output FIFO: pipe:N + A:1 + B:1 + 1
132- localparam int unsigned MAX_PENDING = (DEEP_PIPELINE + 1 )* N + 3 ;
133- pipe_t pipe[PE ][N + 1 ];
136+ pipe_t pipe[PE ][M + 1 ];
134137 if (1 ) begin : blkFeed
135138
136139 // Thresholding Input Guard ensuring Output FIFO is never overrun
@@ -148,20 +151,20 @@ module thresholding #(
148151 // PE Configuration Address Decoding
149152 logic cfg_sel[PE ];
150153 logic cfg_oob;
151- logic [N - 1 : 0 ] cfg_ofs;
154+ logic [$clog2 ( N ) - 1 : 0 ] cfg_ofs;
152155 if (PE == 1 ) begin
153156 assign cfg_sel[0 ] = 1 ;
154157 assign cfg_oob = 0 ;
155- assign cfg_ofs = cfg_a[0 + : N ];
158+ assign cfg_ofs = cfg_a[0 + : $clog2 ( N ) ];
156159 end
157160 else begin
158- uwire [$clog2 (PE )- 1 : 0 ] cfg_pe = cfg_a[N + : $clog2 (PE )];
161+ uwire [$clog2 (PE )- 1 : 0 ] cfg_pe = cfg_a[$clog2 ( N ) + : $clog2 (PE )];
159162 always_comb begin
160163 foreach (cfg_sel[pe]) begin
161164 cfg_sel[pe] = USE_CONFIG && cfg_en && (cfg_pe == pe);
162165 end
163166 cfg_oob = (cfg_pe >= PE );
164- cfg_ofs = cfg_a[0 + : N ];
167+ cfg_ofs = cfg_a[0 + : $clog2 ( N ) ];
165168 if (cfg_oob && ! cfg_we) begin
166169 // Map readbacks from padded rows (non-existent PEs) to padded highest threshold index of first PE
167170 cfg_sel[0 ] = 1 ;
@@ -171,7 +174,7 @@ module thresholding #(
171174 end
172175
173176 uwire ptr_t iptr;
174- assign iptr[0 + : N ] = cfg_ofs;
177+ assign iptr[0 + : M ] = cfg_ofs; // Zero-extend Expand for N = 2^k
175178 if (CF > 1 ) begin
176179 // Channel Fold Rotation
177180 logic [$clog2 (CF )- 1 : 0 ] CnlCnt = 0 ;
@@ -187,7 +190,7 @@ module thresholding #(
187190 end
188191 end
189192
190- assign iptr[N + : $clog2 (CF )] = USE_CONFIG && cfg_en? cfg_a[N + $clog2 (PE )+ : $clog2 (CF )] : CnlCnt;
193+ assign iptr[M + : $clog2 (CF )] = USE_CONFIG && cfg_en? cfg_a[$clog2 ( N ) + $clog2 (PE )+ : $clog2 (CF )] : CnlCnt;
191194 end
192195
193196 for (genvar pe = 0 ; pe < PE ; pe++ ) begin
@@ -205,9 +208,9 @@ module thresholding #(
205208
206209 // -----------------------------------------------------------------------
207210 // Free-Running Thresholding Pipeline
208- for (genvar stage = 0 ; stage < N ; stage++ ) begin : genStages
211+ for (genvar stage = 0 ; stage < M ; stage++ ) begin : genStages
209212
210- localparam int unsigned SN = N - 1 - stage;
213+ localparam int unsigned SN = M - 1 - stage;
211214 for (genvar pe = 0 ; pe < PE ; pe++ ) begin : genPE
212215 uwire pipe_t p = pipe[pe][stage];
213216 uwire cs = (p.ptr[SN : 0 ] == 2 ** SN - 1 );
@@ -222,7 +225,7 @@ module thresholding #(
222225 // If BRAM trigger defined, force distributed memory below if Vivado may be tempted to use BRAM nonetheless.
223226 DEPTH_TRIGGER_BRAM && (DEPTH >= 64 )? " distributed" : " auto" ;
224227
225- (* DONT_TOUCH = " true" , RAM_STYLE = RAM_STYLE * )
228+ (* DONT_TOUCH = " true" , RAM_STYLE = RAM_STYLE * )
226229 val_t Threshs[DEPTH ];
227230 if (THRESHOLDS_PATH != " " ) begin
228231 initial $readmemh ($sformatf (" %s threshs_%0d _%0d .dat" , THRESHOLDS_PATH , pe, stage), Threshs);
@@ -236,7 +239,7 @@ module thresholding #(
236239 end
237240 end
238241 else begin
239- uwire [$clog2 (CF )+ stage- 1 : 0 ] addr = p.ptr[$clog2 (CF )+ N - 1 : SN + 1 ];
242+ uwire [$clog2 (CF )+ stage- 1 : 0 ] addr = p.ptr[$clog2 (CF )+ M - 1 : SN + 1 ];
240243 always @ (posedge clk) begin
241244 if (we) Threshs[addr] <= p.val;
242245 end
@@ -247,7 +250,7 @@ module thresholding #(
247250 assign Thresh = Threshs[0 ];
248251 end
249252 else begin
250- uwire [$clog2 (CF )+ stage- 1 : 0 ] addr = p.ptr[$clog2 (CF )+ N - 1 : SN + 1 ];
253+ uwire [$clog2 (CF )+ stage- 1 : 0 ] addr = p.ptr[$clog2 (CF )+ M - 1 : SN + 1 ];
251254 always_ff @ (posedge clk) begin
252255 Thresh <= Threshs[addr];
253256 end
@@ -256,19 +259,30 @@ module thresholding #(
256259 end : blkThresh
257260
258261 // Pipeline State
262+ localparam int unsigned SCOPE_REDUCE = (2 ** (M - stage- 1 ) + 2 ** M - 1 - N ) >> (M - stage);
259263 pipe_t P = '{ op : NOP , default : 'x } ;
260- logic Reval = 0 ;
264+ logic Reval = 'x ; // Replace value by readback
265+ logic Scope = 'x ; // Comparison in scope of specified threshold count
261266 always_ff @ (posedge clk) begin
262267 if (rst) begin
263268 P <= '{ op : NOP , default : 'x } ;
264- Reval <= 0 ;
269+ Reval <= 'x ;
270+ Scope <= 'x ;
265271 end
266272 else begin
267273 P <= p;
268274 Reval <= (p.op == ? RB ) && cs;
275+ Scope <= (SCOPE_REDUCE == 0 )? 1 : p.ptr[M - 1 : SN + 1 ] < 2 ** stage - SCOPE_REDUCE ;
269276 end
270277 end
271278
279+ always_ff @ (posedge clk) begin
280+ assert ((P .op != ? TH ) || (Scope !== 1'bx )) else begin
281+ $error (" %m : [%0d .%0d ] Broken Scope." , pe, stage);
282+ end
283+ end
284+
285+ // Mask comparisons beyond specified threshold count
272286 logic cmp;
273287 if (! SIGNED ) assign cmp = $unsigned (Thresh) <= $unsigned (P .val);
274288 else if (! FPARG ) assign cmp = $signed (Thresh) <= $signed (P .val);
@@ -290,7 +304,7 @@ module thresholding #(
290304 pipe_t pp;
291305 always_comb begin
292306 pp = P ;
293- if (P .op != ? CFG ) pp.ptr[SN ] = cmp;
307+ if (P .op != ? CFG ) pp.ptr[SN ] = Scope && cmp;
294308 if (Reval) pp.val = Thresh;
295309 end
296310
@@ -301,7 +315,12 @@ module thresholding #(
301315 pipe_t Pf = '{ op : NOP , default : 'x } ;
302316 always_ff @ (posedge clk) begin
303317 if (rst) Pf <= '{ op : NOP , default : 'x } ;
304- else Pf <= pp;
318+ else begin
319+ assert ((pp.op != ? TH ) || (^ pp.ptr[$left (ptr_t): SN ] !== 1'bx )) else begin
320+ $error (" %m : [%0d .%0d ] Broken ptr[$left:%0d ]." , pe, stage, SN );
321+ end
322+ Pf <= pp;
323+ end
305324 end
306325 assign pf = Pf;
307326 end
@@ -317,34 +336,34 @@ module thresholding #(
317336 cfg_rack = 0 ;
318337 cfg_q = 0 ;
319338 foreach (pipe[pe]) begin
320- automatic pipe_t p = pipe[pe][N ];
339+ automatic pipe_t p = pipe[pe][M ];
321340 cfg_rack | = p.op == ? RB ;
322341 cfg_q | = p.val;
323342 end
324343 end
325344
326345 // -----------------------------------------------------------------------
327346 // Stream Output through FIFO
328- // - Depth of N + Output Reg to allow pipe to drain entirely under backpressure
347+ // - Depth of M + Output Reg to allow pipe to drain entirely under backpressure
329348 // - Typically mapped to an SRL shift register
330349 if (1 ) begin : blkStreamOutput
331350 localparam int unsigned A_DEPTH = MAX_PENDING - 1 ;
332- logic [PE - 1 : 0 ][N - 1 : 0 ] ADat[A_DEPTH ];
351+ logic [PE - 1 : 0 ][M - 1 : 0 ] ADat[A_DEPTH ];
333352 logic signed [$clog2 (A_DEPTH ): 0 ] APtr = '1 ; // -1, 0, 1, ..., A_DEPTH-1
334353 uwire avld = ! APtr[$left (APtr)];
335354
336- logic [PE - 1 : 0 ][N - 1 : 0 ] BDat = 'x ;
355+ logic [PE - 1 : 0 ][M - 1 : 0 ] BDat = 'x ;
337356 logic BVld = 0 ;
338357
339- uwire aload = pipe[0 ][N ].op == ? TH ;
358+ uwire aload = pipe[0 ][M ].op == ? TH ;
340359 uwire bload = ! BVld || ordy;
341360
342361 always_ff @ (posedge clk) begin
343362 if (aload) begin
344363 assert (APtr < $signed (A_DEPTH - 1 )) else begin
345364 $error (" Overrun after failing stream guard." );
346365 end
347- foreach (pipe[pe]) ADat[0 ][pe] <= pipe[pe][N ].ptr;
366+ foreach (pipe[pe]) ADat[0 ][pe] <= pipe[pe][M ].ptr;
348367 for (int unsigned i = 1 ; i < A_DEPTH ; i++ ) ADat[i] <= ADat[i- 1 ];
349368 end
350369 end
0 commit comments