|
60 | 60 | constexpr unsigned ILEN = @ILEN@; // Input words per IFM |
61 | 61 | constexpr unsigned OLEN = @OLEN@; // Output words per OFM |
62 | 62 | constexpr unsigned KO = @KO@; // Subwords within OFM transaction word |
| 63 | + constexpr unsigned AVG_N = @AVG_N@; // Max frames in averaging window |
63 | 64 | using TI = @TI@; // IFM transaction word |
64 | 65 | using TO = @TO@; // OFM transaction word |
65 | 66 |
|
|
133 | 134 | unsigned ILEN, |
134 | 135 | unsigned OLEN, |
135 | 136 | unsigned KO, |
| 137 | + unsigned AVG_N, |
136 | 138 | typename TI, |
137 | 139 | typename TO |
138 | 140 | > |
|
141 | 143 | hls::stream<TO> &finnox, |
142 | 144 | ap_uint<32> cfg, // [0] - 0:hold, 1:lfsr; [31:1] - minimum interval (cycles) between IFM starts |
143 | 145 | ap_uint<32> seed, // [31:16] - LFSR seed (only upper 16 bits used) |
| 146 | + ap_uint<32> avg_n, // [31:0] - averaging window size (1..AVG_N frames) |
144 | 147 | ap_uint<32> &status, // [0] - timestamp overflow; [1] - timestamp underflow |
145 | 148 | ap_uint<32> &latency, |
146 | 149 | ap_uint<32> &interval, |
147 | 150 | ap_uint<32> &checksum, |
148 | | - ap_uint<32> &min_latency |
| 151 | + ap_uint<32> &min_latency, |
| 152 | + ap_uint<32> &avg_latency, |
| 153 | + ap_uint<32> &avg_interval |
149 | 154 | ) { |
150 | 155 | #pragma HLS pipeline II=1 style=flp |
151 | 156 |
|
|
219 | 224 | #pragma HLS reset variable=last_interval |
220 | 225 | #pragma HLS reset variable=cur_min_latency |
221 | 226 |
|
| 227 | + // Sliding-Window Averaging State |
| 228 | + static ap_uint<clog2nz(AVG_N)> avg_head = 0; // write pointer in circular buffer |
| 229 | + static ap_uint<clog2nz(AVG_N+1)> avg_fill = 0; // number of valid entries (0..AVG_N) |
| 230 | + static clock_t lat_buf[AVG_N]; |
| 231 | + static clock_t int_buf[AVG_N]; |
| 232 | + static ap_uint<64> lat_sum = 0; |
| 233 | + static ap_uint<64> int_sum = 0; |
| 234 | + static clock_t last_avg_latency = 0; |
| 235 | + static clock_t last_avg_interval = 0; |
| 236 | + static ap_uint<32> prev_avg_n = 0; |
| 237 | + #pragma HLS reset variable=avg_head |
| 238 | + #pragma HLS reset variable=avg_fill |
| 239 | + #pragma HLS reset variable=lat_buf off |
| 240 | + #pragma HLS reset variable=int_buf off |
| 241 | + #pragma HLS reset variable=lat_sum |
| 242 | + #pragma HLS reset variable=int_sum |
| 243 | + #pragma HLS reset variable=last_avg_latency |
| 244 | + #pragma HLS reset variable=last_avg_interval |
| 245 | + #pragma HLS reset variable=prev_avg_n |
| 246 | + |
222 | 247 | static ap_uint<8> pkts = 0; |
223 | 248 | #pragma HLS reset variable=pkts |
224 | 249 | static ap_uint< 2> coeff[3]; |
|
264 | 289 | last_interval = cnt_clk - ts1; // completion - previous completion |
265 | 290 | cur_min_latency = std::min(cur_min_latency, last_latency); |
266 | 291 | ts1 = cnt_clk; // mark completion ^ |
| 292 | + |
| 293 | + // Sliding-window average update |
| 294 | + // TODO: II=1 but depth is ~70 cycles, can we optimize this? |
| 295 | + ap_uint<32> win = (avg_n == 0 || avg_n > AVG_N) ? ap_uint<32>(AVG_N) : avg_n; |
| 296 | + if(prev_avg_n != win) { |
| 297 | + avg_head = 0; |
| 298 | + avg_fill = 0; |
| 299 | + lat_sum = 0; |
| 300 | + int_sum = 0; |
| 301 | + prev_avg_n = win; |
| 302 | + } |
| 303 | + clock_t old_lat = lat_buf[avg_head]; |
| 304 | + clock_t old_int = int_buf[avg_head]; |
| 305 | + lat_buf[avg_head] = last_latency; |
| 306 | + int_buf[avg_head] = last_interval; |
| 307 | + if(avg_fill < win) { |
| 308 | + lat_sum += last_latency; |
| 309 | + int_sum += last_interval; |
| 310 | + avg_fill++; |
| 311 | + } else { |
| 312 | + lat_sum = lat_sum + last_latency - old_lat; |
| 313 | + int_sum = int_sum + last_interval - old_int; |
| 314 | + } |
| 315 | + avg_head++; |
| 316 | + if(avg_head >= ap_uint<clog2nz(AVG_N)+1>(win)) avg_head = 0; |
| 317 | + last_avg_latency = lat_sum / avg_fill; |
| 318 | + last_avg_interval = int_sum / avg_fill; |
267 | 319 | } |
268 | 320 | ocnt = 0; |
269 | 321 |
|
|
279 | 331 | latency = last_latency; |
280 | 332 | interval = last_interval; |
281 | 333 | checksum = last_checksum; |
282 | | - min_latency = cur_min_latency; |
| 334 | + min_latency = cur_min_latency; |
| 335 | + avg_latency = last_avg_latency; |
| 336 | + avg_interval = last_avg_interval; |
283 | 337 |
|
284 | 338 | } // instrument() |
285 | 339 |
|
|
288 | 342 | hls::stream<TO> &finnox, |
289 | 343 | ap_uint<32> cfg, |
290 | 344 | ap_uint<32> seed, |
| 345 | + ap_uint<32> avg_n, |
291 | 346 | ap_uint<32> &status, |
292 | 347 | ap_uint<32> &latency, |
293 | 348 | ap_uint<32> &interval, |
294 | 349 | ap_uint<32> &checksum, |
295 | | - ap_uint<32> &min_latency |
| 350 | + ap_uint<32> &min_latency, |
| 351 | + ap_uint<32> &avg_latency, |
| 352 | + ap_uint<32> &avg_interval |
296 | 353 | ) { |
297 | 354 | #pragma HLS interface axis port=finnix |
298 | 355 | #pragma HLS interface axis port=finnox |
299 | 356 | #pragma HLS interface s_axilite bundle=ctrl port=cfg |
300 | 357 | #pragma HLS interface s_axilite bundle=ctrl port=seed |
| 358 | + #pragma HLS interface s_axilite bundle=ctrl port=avg_n |
301 | 359 | #pragma HLS interface s_axilite bundle=ctrl port=status |
302 | 360 | #pragma HLS interface s_axilite bundle=ctrl port=latency |
303 | 361 | #pragma HLS interface s_axilite bundle=ctrl port=interval |
304 | 362 | #pragma HLS interface s_axilite bundle=ctrl port=checksum |
305 | 363 | #pragma HLS interface s_axilite bundle=ctrl port=min_latency |
| 364 | + #pragma HLS interface s_axilite bundle=ctrl port=avg_latency |
| 365 | + #pragma HLS interface s_axilite bundle=ctrl port=avg_interval |
306 | 366 | #pragma HLS interface ap_ctrl_none port=return |
307 | 367 |
|
308 | 368 | #pragma HLS dataflow disable_start_propagation |
|
315 | 375 | move(finnox, finnox0); |
316 | 376 |
|
317 | 377 | // Main |
318 | | - instrument<PENDING, ILEN, OLEN, KO>(finnix0, finnox0, cfg, seed, status, latency, interval, checksum, min_latency); |
| 378 | + instrument<PENDING, ILEN, OLEN, KO, AVG_N>(finnix0, finnox0, cfg, seed, avg_n, status, latency, interval, checksum, min_latency, avg_latency, avg_interval); |
319 | 379 |
|
320 | 380 | // FIFO -> AXI-Stream |
321 | 381 | move(finnix0, finnix); |
|
0 commit comments