@@ -40,12 +40,13 @@ typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t;
4040
4141#include "config_fw.h"
4242
43- #define f32 O_TYPE
43+ #define f32__ O_TYPE
4444#define X_BITS (1 << X_BITS_L2)
4545#define X_WORDS_PER_BYTE (8 / X_BITS)
4646#define X_BITS_MASK ((1 << X_BITS) -1)
4747#ifdef SIM
4848 #define XDEBUG
49+ void usleep (int x ) {}
4950#endif
5051
5152typedef struct {
@@ -61,12 +62,14 @@ typedef struct {
6162 i8 out_buffers [N_OUT_BUF ][O_BYTES_MAX ];
6263
6364#ifdef XDEBUG
64- i8 debug_tiled [O_WORDS_MAX ];
65- i32 debug_nhwc [NHWC_WORDS ];
65+ int8_t debug_tiled [O_WORDS_MAX ];
66+ int32_t debug_nhwc [NHWC_WORDS ];
6667#endif
67- i8 add_buffers [N_ADD_BUF ][NHWC_WORDS ]; // should be last, since N_ADD_BUF can be empty
68+ int8_t add_buffers [N_ADD_BUF ][NHWC_WORDS ]; // should be last, since N_ADD_BUF can be empty
6869} Memory_st ;
6970
71+ #include "fb_fw_wrap.h"
72+
7073#define A_START 0x0
7174#define A_DONE_READ 0x1 // 2
7275#define A_DONE_WRITE 0x3 // 2
@@ -78,48 +81,57 @@ typedef struct {
7881#define A_X_DONE 0xB
7982#define A_O_DONE 0xC
8083
81- #ifdef __cplusplus
82- #define EXT_C "C"
83- #define restrict __restrict__
84- #else
85- #define EXT_C
86- #endif
84+ int32_t * p_config = (int32_t * )CONFIG_BASEADDR ;
8785
86+ extern EXT_C void model_setup (Memory_st * restrict mp ) {
8887#ifdef SIM
89- #define sim_fprintf fprintf
90- #include <stdbool.h>
88+ FILE * fp ;
89+ char f_path [1000 ];
90+ sprintf (f_path , "%s/wbx.bin" , DATA_DIR );
91+ fp = fopen (f_path , "rb" );
92+ debug_printf ("DEBUG: Reading from file %s \n" , f_path );
93+ if (!fp ) debug_printf ("ERROR! File not found: %s \n" , f_path );
94+ int bytes = fread (mp -> w , 1 , WB_BYTES + X_BYTES , fp );
95+ fclose (fp );
96+ #endif
97+ flush_cache (mp -> w , WB_BYTES + X_BYTES ); // force transfer to DDR, starting addr & length
9198
92- Memory_st mem_phy ;
93- extern EXT_C u32 get_config (void * , u32 );
94- extern EXT_C void set_config (void * , u32 , u32 );
95- static inline void flush_cache (void * addr , uint32_t bytes ) {} // Do nothing
9699
97- #else
98- #define sim_fprintf (...)
100+ // Write registers in controller
101+ fb_write_reg32 (p_config + A_START , 0 ); // Start
102+ fb_write_reg32 (p_config + A_DONE_READ + 0 , 1 ); // Done read mp->ocm bank 0
103+ fb_write_reg32 (p_config + A_DONE_READ + 1 , 1 ); // Done read mp->ocm bank 1
104+ fb_write_reg32 (p_config + A_DONE_WRITE + 0 , 0 ); // Done write mp->ocm bank 0
105+ fb_write_reg32 (p_config + A_DONE_WRITE + 1 , 0 ); // Done write mp->ocm bank 1
106+ fb_write_reg32 (p_config + A_OCM_BASE + 0 , fb_addr_64to32 (mem_phy .ocm [0 ])); // Base addr mp->ocm bank 0
107+ fb_write_reg32 (p_config + A_OCM_BASE + 1 , fb_addr_64to32 (mem_phy .ocm [1 ])); // Base addr mp->ocm bank 1
108+ fb_write_reg32 (p_config + A_WEIGHTS_BASE , fb_addr_64to32 (mem_phy .w )); // Base adddr weights
109+ fb_write_reg32 (p_config + A_BUNDLE_DONE , 1 ); // Bundle done writing (pixel dma waits for this)
110+ fb_write_reg32 (p_config + A_N_BUNDLES_1 , N_BUNDLES ); // Number of bundles
111+ fb_write_reg32 (p_config + A_W_DONE , 0 ); // Weigths done
112+ fb_write_reg32 (p_config + A_X_DONE , 0 ); // Bundle done
113+ fb_write_reg32 (p_config + A_O_DONE , 0 ); // Output done
99114
100- // #ifdef RISCV
101- // Memory_st mem_phy;
102- // #else
103- #define mem_phy (*(Memory_st* restrict)MEM_BASEADDR)
104- // #endif
115+ // Write into BRAM the config for controller
116+ i32 parameters [8 * N_BUNDLES ];
117+ for (int var = 0 ; var < N_BUNDLES ; var ++ ){
118+ parameters [8 * var ] = (var == 0 ) ? fb_addr_64to32 (mem_phy .x ) : fb_addr_64to32 (mem_phy .out_buffers [bundles [var ].in_buffer_idx ]); // x_base address
119+ parameters [8 * var + 1 ] = bundles [var ].x_bpt_p0 ; // x_bpt0
120+ parameters [8 * var + 2 ] = bundles [var ].x_bpt ; // x_bpt
121+ parameters [8 * var + 3 ] = bundles [var ].w_bpt_p0 ; // w_bpt0
122+ parameters [8 * var + 4 ] = bundles [var ].w_bpt ; // w_bpt
105123
106- inline volatile u32 get_config (void * config_base , u32 offset ){
107- return * (volatile u32 * )(config_base + offset * 4 );
124+ assert_printf (bundles [var ].p , < , 1 <<16 , "" , "P should be less than 2**16 for bundle:%x" , var );
125+ assert_printf (bundles [var ].t , < , 1 <<16 , "" , "T should be less than 2**16 for bundle:%x" , var );
126+ parameters [8 * var + 5 ] = (bundles [var ].t << 16 ) + bundles [var ].p ; // max p
127+ uint64_t h = bundles [var ].header ;
128+ parameters [8 * var + 6 ] = (uint32_t )(h & 0xFFFFFFFFu );
129+ parameters [8 * var + 7 ] = (uint32_t )(h >> 32 );
108130 }
109-
110- inline void set_config (void * config_base , u32 offset , u32 data ){
111- * (volatile u32 * restrict)(config_base + offset * 4 ) = data ;
131+ for (int var = 0 ; var < 8 * N_BUNDLES ; var ++ ){
132+ fb_write_reg32 (p_config + 16 + var , parameters [var ]);
112133 }
113- #endif
114-
115- #ifdef XDEBUG
116- #define debug_printf printf
117- #define assert_printf (v1 , op , v2 , optional_debug_info ,...) ((v1 op v2) || (debug_printf("ASSERT FAILED: \n CONDITION: "), debug_printf("( " #v1 " " #op " " #v2 " )"), debug_printf(", VALUES: ( %d %s %d ), ", v1, #op, v2), debug_printf("DEBUG_INFO: " optional_debug_info), debug_printf(" " __VA_ARGS__), debug_printf("\n\n"), assert(v1 op v2), 0))
118- #else
119- #define assert_printf (...)
120- // #define debug_printf(...)
121- #endif
122-
134+ }
123135
124136// Helper functions
125137
@@ -245,7 +257,7 @@ static inline void tile_write( i32 out_val, i8 *restrict p_out_buffer, i32 ib, B
245257
246258}
247259
248- extern EXT_C u8 model_run (Memory_st * restrict mp , void * p_config ) {
260+ extern EXT_C void run (Memory_st * restrict mp ) {
249261
250262 static Bundle_t * restrict pb = & bundles [0 ];
251263 static i32 it_bias = 0 , w_last , o_bpt ;
@@ -259,25 +271,11 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
259271
260272 static i8 ocm_bank = 1 ; // We flip the bank at the beginning of loop. starting from bank 0
261273
262- /**
263- * ---------- WAIT FOR S2MM DMA DONE ----------
264- *
265- * When running on hardware, we wait for DMA's interrupt at "DMA_WAIT"
266- * But Verilator cannot pass simulation time when "waiting"
267- * Therefore,
268- * During simulation, this function gets called again and again
269- * On first call, values are set and returned before processing.
270- * On subsequent calls, function skips to DMA_WAIT, and starts processing
271- * This mimics the behavior of waiting for DMA's interrupt
272- */
273- #ifdef SIM
274- static char is_first_call = 1 ;
275- if (is_first_call ) is_first_call = 0 ;
276- else goto DMA_WAIT ;
277- #endif
274+ debug_printf ("Starting model_setup()\n" );
275+ model_setup (mp );
278276
279- debug_printf ("Starting model_run() \n" );
280- set_config (p_config , A_START , 1 );
277+ debug_printf ("model_setup done \n" );
278+ fb_write_reg32 (p_config + A_START , 1 );
281279
282280 for (ib = 0 ; ib < N_BUNDLES ; ib ++ ) {
283281
@@ -298,24 +296,20 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
298296 o_bpt = PE_ROWS * pb -> coe * w_last * sizeof (Y_TYPE );
299297
300298#ifdef SIM
301- DMA_WAIT :
302- // if sim return, so SV can pass time, and call again, which will jump to DMA_WAIT again
303- if (!get_config (p_config , A_DONE_WRITE + ocm_bank ))
304- return 1 ;
305-
306299 char f_path_raw [1000 ], f_path_sum [1000 ]; // make sure full f_path_raw is shorter than 1000
307300 sprintf (f_path_raw , "%s/%0d_%0d_%0d_y_raw_sim.txt" , DATA_DIR , ib , ip , it );
308301 sprintf (f_path_sum , "%s/%0d_y_sum_sim.txt" , DATA_DIR , ib );
309302 FILE * fp_raw = fopen (f_path_raw , "a" );
310303 FILE * fp_sum = fopen (f_path_sum , "a" );
311- #else
312- while (!get_config (p_config , A_DONE_WRITE + ocm_bank )){
313- // in FPGA, wait for write done
304+ #endif
305+
306+ while (!fb_read_reg32 (p_config + A_DONE_WRITE + ocm_bank ))
307+ {
308+ // wait
314309 };
315310 flush_cache (& (mp -> ocm [ocm_bank ]), o_bpt );
316311 usleep (0 );
317- #endif
318- set_config (p_config , A_DONE_WRITE + ocm_bank , 0 );
312+ fb_write_reg32 (p_config + A_DONE_WRITE + ocm_bank , 0 );
319313
320314 i32 sram_addr = 0 ;
321315 for (i32 icoe = 0 ; icoe < pb -> coe ; icoe ++ ) {
@@ -401,14 +395,14 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
401395 if (pb -> is_softmax ) {
402396 assert_printf (ib , != , N_BUNDLES , "Softmax is only allowed for the last bundle." , DEBUG_INFO );
403397
404- f32 val = (f32 )out_val ;
405- val = val / (f32 )(1 << pb -> softmax_frac );
406- val = val - ((f32 )pb -> softmax_max_i )/(1 << 17 );
407- val = (f32 )exp (val );
398+ f32__ val = (f32__ )out_val ;
399+ val = val / (f32__ )(1 << pb -> softmax_frac );
400+ val = val - ((f32__ )pb -> softmax_max_i )/(1 << 17 );
401+ val = (f32__ )exp (val );
408402 mp -> y [iy_nhwc ] = val ;
409403
410404 if (i_yc == pb -> co - 1 ) {
411- f32 sum = 0 ;
405+ f32__ sum = 0 ;
412406 i32 iy_nhwc ;
413407 for (int i = 0 ; i < pb -> co ; i ++ ){
414408 iy_nhwc = flatten_nhwc (i_yn ,i_yh ,i_yw ,i , yn ,yh ,yw ,yc , "Before softmax sum" , DEBUG_INFO );
@@ -499,7 +493,7 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
499493 fclose (fp_sum );
500494 fclose (fp_raw );
501495#endif
502- set_config (p_config , A_DONE_READ + ocm_bank , 1 );
496+ fb_write_reg32 (p_config + A_DONE_READ + ocm_bank , 1 );
503497 debug_printf ("%d-------- iw_kw2 %d done \n" , ib , iw_kw2 );
504498 } // iw_kw2
505499 debug_printf ("%d-------- il %d done\n" , ib , il );
@@ -537,101 +531,11 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
537531 }
538532#endif
539533 flush_cache (p_out_buffer , pb -> o_bytes );
540- set_config (p_config , A_BUNDLE_DONE , 1 );
534+ fb_write_reg32 (p_config + A_BUNDLE_DONE , 1 );
541535 } // ib
542- debug_printf ("done all bundles!!\n" );
543- #ifdef SIM
544- is_first_call = 1 ;
545- #endif
546- return 0 ;
547- }
548-
549-
550- // Rest of the helper functions used in simulation.
551- #ifdef SIM
552-
553- extern EXT_C u32 addr_64to32 (void * restrict addr ){
554- u64 offset = (u64 )addr - (u64 )& mem_phy ;
555- return (u32 )offset + 0x20000000 ;
556- // return (u32)((uintptr_t)addr);
557- }
558-
559- extern EXT_C u64 sim_addr_32to64 (u32 addr ){
560- return (u64 )addr - (u64 )0x20000000 + (u64 )& mem_phy ;
561- }
562-
563- extern EXT_C u8 get_byte_a32 (u32 addr_32 ){
564- u64 addr = sim_addr_32to64 (addr_32 );
565- u8 val = * (u8 * restrict)addr ;
566- //debug_printf("get_byte_a32: addr32:0x%x, addr64:0x%lx, val:0x%x\n", addr_32, addr, val);
567- return val ;
568- }
569-
570- extern EXT_C void set_byte_a32 (u32 addr_32 , u8 data ){
571- u64 addr = sim_addr_32to64 (addr_32 );
572- * (u8 * restrict)addr = data ;
573- }
574-
575- extern EXT_C void * get_mp (){
576- return & mem_phy ;
536+ debug_printf ("done all bundles!!\n" );
577537}
578- #else
579-
580- u32 addr_64to32 (void * addr ){
581- return (u32 )addr ;
582- }
583-
584- #endif
585-
586- extern EXT_C void model_setup (Memory_st * restrict mp , void * p_config ) {
587-
588- #ifdef SIM
589- FILE * fp ;
590- char f_path [1000 ];
591- sprintf (f_path , "%s/wbx.bin" , DATA_DIR );
592- fp = fopen (f_path , "rb" );
593- debug_printf ("DEBUG: Reading from file %s \n" , f_path );
594- if (!fp ) debug_printf ("ERROR! File not found: %s \n" , f_path );
595- int bytes = fread (mp -> w , 1 , WB_BYTES + X_BYTES , fp );
596- fclose (fp );
597- #endif
598- flush_cache (mp -> w , WB_BYTES + X_BYTES ); // force transfer to DDR, starting addr & length
599538
600- // Write registers in controller
601- set_config (p_config , A_START , 0 ); // Start
602- set_config (p_config , A_DONE_READ + 0 , 1 ); // Done read mp->ocm bank 0
603- set_config (p_config , A_DONE_READ + 1 , 1 ); // Done read mp->ocm bank 1
604- set_config (p_config , A_DONE_WRITE + 0 , 0 ); // Done write mp->ocm bank 0
605- set_config (p_config , A_DONE_WRITE + 1 , 0 ); // Done write mp->ocm bank 1
606- set_config (p_config , A_OCM_BASE + 0 , addr_64to32 (mem_phy .ocm [0 ])); // Base addr mp->ocm bank 0
607- set_config (p_config , A_OCM_BASE + 1 , addr_64to32 (mem_phy .ocm [1 ])); // Base addr mp->ocm bank 1
608- set_config (p_config , A_WEIGHTS_BASE , addr_64to32 (mem_phy .w )); // Base adddr weights
609- set_config (p_config , A_BUNDLE_DONE , 1 ); // Bundle done writing (pixel dma waits for this)
610- set_config (p_config , A_N_BUNDLES_1 , N_BUNDLES ); // Number of bundles
611- set_config (p_config , A_W_DONE , 0 ); // Weigths done
612- set_config (p_config , A_X_DONE , 0 ); // Bundle done
613- set_config (p_config , A_O_DONE , 0 ); // Output done
614-
615- // Write into BRAM the config for controller
616- i32 parameters [8 * N_BUNDLES ];
617- for (int var = 0 ; var < N_BUNDLES ; var ++ ){
618- parameters [8 * var ] = (var == 0 ) ? addr_64to32 (mem_phy .x ) : addr_64to32 (mem_phy .out_buffers [bundles [var ].in_buffer_idx ]); // x_base address
619- parameters [8 * var + 1 ] = bundles [var ].x_bpt_p0 ; // x_bpt0
620- parameters [8 * var + 2 ] = bundles [var ].x_bpt ; // x_bpt
621- parameters [8 * var + 3 ] = bundles [var ].w_bpt_p0 ; // w_bpt0
622- parameters [8 * var + 4 ] = bundles [var ].w_bpt ; // w_bpt
623-
624- assert_printf (bundles [var ].p , < , 1 <<16 , "" , "P should be less than 2**16 for bundle:%x" , var );
625- assert_printf (bundles [var ].t , < , 1 <<16 , "" , "T should be less than 2**16 for bundle:%x" , var );
626- parameters [8 * var + 5 ] = (bundles [var ].t << 16 ) + bundles [var ].p ; // max p
627- uint64_t h = bundles [var ].header ;
628- parameters [8 * var + 6 ] = (uint32_t )(h & 0xFFFFFFFFu );
629- parameters [8 * var + 7 ] = (uint32_t )(h >> 32 );
630- }
631- for (int var = 0 ; var < 8 * N_BUNDLES ; var ++ ){
632- set_config (p_config , 16 + var , parameters [var ]);
633- }
634- }
635539
636540extern EXT_C void print_output (Memory_st * restrict mp ) {
637541 flush_cache (mp -> y , sizeof (mp -> y ));
0 commit comments