Skip to content

Commit 6b940f9

Browse files
committed
Merge master (firebridge) with ibex-soc
2 parents a3fe898 + 78ec91a commit 6b940f9

38 files changed

+1241
-962
lines changed

.github/workflows/verify.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ jobs:
148148
- name: Pull CI image
149149
run: docker pull "${GHCR_IMAGE}:${IMAGE_TAG}"
150150

151-
- name: Ibex SoC regression + output check
151+
- name: Ibex SoC + output check
152152
run: |
153153
docker run --rm \
154154
-v "$PWD":/work \
@@ -157,5 +157,5 @@ jobs:
157157
bash -c '
158158
fusesoc library add sa_ip "$(pwd -P)" || true
159159
mkdir -p run/work
160-
make TEST=ibex_test smoke_test iclean ibuild irun verify_ibex
160+
make smoke_ibex
161161
'

.gitignore

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# Generated files
2+
run/work*/config_fw.h
3+
run/work*/config_hw.svh
4+
run/work*/config_hw.tcl
5+
run/work*/config_tb.svh
6+
run/work*/hardware.json
7+
run/work*/vivado_flow.tcl
8+
run/work*/sources.txt
9+
10+
# Other files
111
old/
212
__pycache__
313

@@ -83,4 +93,4 @@ docs/source/**
8393

8494
# VSCode
8595
settings.json
86-
.vscode
96+
.vscode

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ smoke_test:
1010
verify_ibex:
1111
cd ibex-soc && python check_output.py
1212

13+
smoke_ibex:
14+
make TEST=ibex_test smoke_test iclean ibuild irun verify_ibex
15+
1316
# Docker
1417

1518
USR := $(shell id -un)

deepsocflow/c/deepsocflow_xilinx.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "xil_io.h"
77
#include "xil_sleeptimer.h"
88
#include "xil_mmu.h"
9+
#include "sleep.h"
910

1011
#include <assert.h>
1112
#include <limits.h>
@@ -39,11 +40,12 @@ static inline void hardware_cleanup(){
3940
cleanup_platform();
4041
}
4142

42-
static inline void model_run_timed(void *mp, void *p_config, int n){
43+
static inline void model_run_timed(void *mp, int n){
4344
XTime time_start, time_end;
45+
run(mp);
4446
XTime_GetTime(&time_start);
4547
for (int i=0; i<n; i++)
46-
model_run(mp, p_config);
48+
run(mp);
4749
XTime_GetTime(&time_end);
4850
printf("Done inference! time taken: %.5f ms \n", 1000.0*(float)(time_end-time_start)/COUNTS_PER_SECOND/n);
4951
}

deepsocflow/c/runtime.h

Lines changed: 69 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@ typedef enum {POOL_NONE, POOL_MAX, POOL_AVG} Pool_t;
4040

4141
#include "config_fw.h"
4242

43-
#define f32 O_TYPE
43+
#define f32__ O_TYPE
4444
#define X_BITS (1 << X_BITS_L2)
4545
#define X_WORDS_PER_BYTE (8 / X_BITS)
4646
#define X_BITS_MASK ((1 << X_BITS) -1)
4747
#ifdef SIM
4848
#define XDEBUG
49+
void usleep(int x) {}
4950
#endif
5051

5152
typedef struct {
@@ -61,12 +62,14 @@ typedef struct {
6162
i8 out_buffers [N_OUT_BUF ][O_BYTES_MAX ];
6263

6364
#ifdef XDEBUG
64-
i8 debug_tiled [O_WORDS_MAX ];
65-
i32 debug_nhwc [NHWC_WORDS ];
65+
int8_t debug_tiled [O_WORDS_MAX ];
66+
int32_t debug_nhwc [NHWC_WORDS ];
6667
#endif
67-
i8 add_buffers [N_ADD_BUF ][NHWC_WORDS ]; // should be last, since N_ADD_BUF can be empty
68+
int8_t add_buffers [N_ADD_BUF ][NHWC_WORDS ]; // should be last, since N_ADD_BUF can be empty
6869
} Memory_st;
6970

71+
#include "fb_fw_wrap.h"
72+
7073
#define A_START 0x0
7174
#define A_DONE_READ 0x1 // 2
7275
#define A_DONE_WRITE 0x3 // 2
@@ -78,48 +81,57 @@ typedef struct {
7881
#define A_X_DONE 0xB
7982
#define A_O_DONE 0xC
8083

81-
#ifdef __cplusplus
82-
#define EXT_C "C"
83-
#define restrict __restrict__
84-
#else
85-
#define EXT_C
86-
#endif
84+
int32_t *p_config = (int32_t *)CONFIG_BASEADDR;
8785

86+
extern EXT_C void model_setup(Memory_st *restrict mp) {
8887
#ifdef SIM
89-
#define sim_fprintf fprintf
90-
#include <stdbool.h>
88+
FILE *fp;
89+
char f_path [1000];
90+
sprintf(f_path, "%s/wbx.bin", DATA_DIR);
91+
fp = fopen(f_path, "rb");
92+
debug_printf("DEBUG: Reading from file %s \n", f_path);
93+
if(!fp) debug_printf("ERROR! File not found: %s \n", f_path);
94+
int bytes = fread(mp->w, 1, WB_BYTES+X_BYTES, fp);
95+
fclose(fp);
96+
#endif
97+
flush_cache(mp->w, WB_BYTES+X_BYTES); // force transfer to DDR, starting addr & length
9198

92-
Memory_st mem_phy;
93-
extern EXT_C u32 get_config(void*, u32);
94-
extern EXT_C void set_config(void*, u32, u32);
95-
static inline void flush_cache(void *addr, uint32_t bytes) {} // Do nothing
9699

97-
#else
98-
#define sim_fprintf(...)
100+
// Write registers in controller
101+
fb_write_reg32(p_config + A_START , 0); // Start
102+
fb_write_reg32(p_config + A_DONE_READ +0, 1); // Done read mp->ocm bank 0
103+
fb_write_reg32(p_config + A_DONE_READ +1, 1); // Done read mp->ocm bank 1
104+
fb_write_reg32(p_config + A_DONE_WRITE+0, 0); // Done write mp->ocm bank 0
105+
fb_write_reg32(p_config + A_DONE_WRITE+1, 0); // Done write mp->ocm bank 1
106+
fb_write_reg32(p_config + A_OCM_BASE +0, fb_addr_64to32(mem_phy.ocm[0])); // Base addr mp->ocm bank 0
107+
fb_write_reg32(p_config + A_OCM_BASE +1, fb_addr_64to32(mem_phy.ocm[1])); // Base addr mp->ocm bank 1
108+
fb_write_reg32(p_config + A_WEIGHTS_BASE, fb_addr_64to32(mem_phy.w)); // Base adddr weights
109+
fb_write_reg32(p_config + A_BUNDLE_DONE , 1); // Bundle done writing (pixel dma waits for this)
110+
fb_write_reg32(p_config + A_N_BUNDLES_1 , N_BUNDLES); // Number of bundles
111+
fb_write_reg32(p_config + A_W_DONE , 0); // Weigths done
112+
fb_write_reg32(p_config + A_X_DONE , 0); // Bundle done
113+
fb_write_reg32(p_config + A_O_DONE , 0); // Output done
99114

100-
// #ifdef RISCV
101-
// Memory_st mem_phy;
102-
// #else
103-
#define mem_phy (*(Memory_st* restrict)MEM_BASEADDR)
104-
// #endif
115+
// Write into BRAM the config for controller
116+
i32 parameters[8*N_BUNDLES];
117+
for (int var = 0; var < N_BUNDLES; var++){
118+
parameters[8*var] = (var == 0) ? fb_addr_64to32(mem_phy.x) : fb_addr_64to32(mem_phy.out_buffers[bundles[var].in_buffer_idx]); // x_base address
119+
parameters[8*var+1] = bundles[var].x_bpt_p0; // x_bpt0
120+
parameters[8*var+2] = bundles[var].x_bpt; // x_bpt
121+
parameters[8*var+3] = bundles[var].w_bpt_p0; // w_bpt0
122+
parameters[8*var+4] = bundles[var].w_bpt; // w_bpt
105123

106-
inline volatile u32 get_config(void *config_base, u32 offset){
107-
return *(volatile u32 *)(config_base + offset*4);
124+
assert_printf(bundles[var].p, <, 1<<16, "", "P should be less than 2**16 for bundle:%x", var);
125+
assert_printf(bundles[var].t, <, 1<<16, "", "T should be less than 2**16 for bundle:%x", var);
126+
parameters[8*var+5] = (bundles[var].t << 16) + bundles[var].p; // max p
127+
uint64_t h = bundles[var].header;
128+
parameters[8*var + 6] = (uint32_t)(h & 0xFFFFFFFFu);
129+
parameters[8*var + 7] = (uint32_t)(h >> 32);
108130
}
109-
110-
inline void set_config(void *config_base, u32 offset, u32 data){
111-
*(volatile u32 *restrict)(config_base + offset*4) = data;
131+
for (int var = 0; var < 8*N_BUNDLES; var++){
132+
fb_write_reg32(p_config + 16+var, parameters[var]);
112133
}
113-
#endif
114-
115-
#ifdef XDEBUG
116-
#define debug_printf printf
117-
#define assert_printf(v1, op, v2, optional_debug_info,...) ((v1 op v2) || (debug_printf("ASSERT FAILED: \n CONDITION: "), debug_printf("( " #v1 " " #op " " #v2 " )"), debug_printf(", VALUES: ( %d %s %d ), ", v1, #op, v2), debug_printf("DEBUG_INFO: " optional_debug_info), debug_printf(" " __VA_ARGS__), debug_printf("\n\n"), assert(v1 op v2), 0))
118-
#else
119-
#define assert_printf(...)
120-
// #define debug_printf(...)
121-
#endif
122-
134+
}
123135

124136
// Helper functions
125137

@@ -245,7 +257,7 @@ static inline void tile_write( i32 out_val, i8 *restrict p_out_buffer, i32 ib, B
245257

246258
}
247259

248-
extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
260+
extern EXT_C void run(Memory_st *restrict mp) {
249261

250262
static Bundle_t *restrict pb = &bundles[0];
251263
static i32 it_bias=0, w_last, o_bpt;
@@ -259,25 +271,11 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
259271

260272
static i8 ocm_bank = 1; // We flip the bank at the beginning of loop. starting from bank 0
261273

262-
/**
263-
* ---------- WAIT FOR S2MM DMA DONE ----------
264-
*
265-
* When running on hardware, we wait for DMA's interrupt at "DMA_WAIT"
266-
* But Verilator cannot pass simulation time when "waiting"
267-
* Therefore,
268-
* During simulation, this function gets called again and again
269-
* On first call, values are set and returned before processing.
270-
* On subsequent calls, function skips to DMA_WAIT, and starts processing
271-
* This mimics the behavior of waiting for DMA's interrupt
272-
*/
273-
#ifdef SIM
274-
static char is_first_call = 1;
275-
if (is_first_call) is_first_call = 0;
276-
else goto DMA_WAIT;
277-
#endif
274+
debug_printf("Starting model_setup()\n");
275+
model_setup(mp);
278276

279-
debug_printf("Starting model_run()\n");
280-
set_config(p_config, A_START, 1);
277+
debug_printf("model_setup done\n");
278+
fb_write_reg32(p_config + A_START, 1);
281279

282280
for (ib = 0; ib < N_BUNDLES; ib++) {
283281

@@ -298,24 +296,20 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
298296
o_bpt = PE_ROWS * pb->coe * w_last * sizeof(Y_TYPE);
299297

300298
#ifdef SIM
301-
DMA_WAIT:
302-
// if sim return, so SV can pass time, and call again, which will jump to DMA_WAIT again
303-
if (!get_config(p_config, A_DONE_WRITE + ocm_bank))
304-
return 1;
305-
306299
char f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
307300
sprintf(f_path_raw, "%s/%0d_%0d_%0d_y_raw_sim.txt", DATA_DIR, ib, ip, it);
308301
sprintf(f_path_sum, "%s/%0d_y_sum_sim.txt", DATA_DIR, ib);
309302
FILE *fp_raw = fopen(f_path_raw, "a");
310303
FILE *fp_sum = fopen(f_path_sum, "a");
311-
#else
312-
while (!get_config(p_config, A_DONE_WRITE + ocm_bank)){
313-
// in FPGA, wait for write done
304+
#endif
305+
306+
while (!fb_read_reg32(p_config + A_DONE_WRITE + ocm_bank))
307+
{
308+
// wait
314309
};
315310
flush_cache(&(mp->ocm[ocm_bank]), o_bpt);
316311
usleep(0);
317-
#endif
318-
set_config(p_config, A_DONE_WRITE + ocm_bank, 0);
312+
fb_write_reg32(p_config + A_DONE_WRITE + ocm_bank, 0);
319313

320314
i32 sram_addr=0;
321315
for (i32 icoe=0; icoe < pb->coe; icoe++) {
@@ -401,14 +395,14 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
401395
if (pb->is_softmax) {
402396
assert_printf (ib , !=, N_BUNDLES, "Softmax is only allowed for the last bundle.", DEBUG_INFO);
403397

404-
f32 val = (f32)out_val;
405-
val = val / (f32)(1 << pb->softmax_frac);
406-
val = val - ((f32)pb->softmax_max_i)/(1 << 17);
407-
val = (f32)exp(val);
398+
f32__ val = (f32__)out_val;
399+
val = val / (f32__)(1 << pb->softmax_frac);
400+
val = val - ((f32__)pb->softmax_max_i)/(1 << 17);
401+
val = (f32__)exp(val);
408402
mp->y[iy_nhwc] = val;
409403

410404
if (i_yc == pb->co-1) {
411-
f32 sum = 0;
405+
f32__ sum = 0;
412406
i32 iy_nhwc;
413407
for (int i=0; i<pb->co; i++){
414408
iy_nhwc = flatten_nhwc(i_yn,i_yh,i_yw,i, yn,yh,yw,yc, "Before softmax sum", DEBUG_INFO);
@@ -499,7 +493,7 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
499493
fclose(fp_sum);
500494
fclose(fp_raw);
501495
#endif
502-
set_config(p_config, A_DONE_READ + ocm_bank, 1);
496+
fb_write_reg32(p_config + A_DONE_READ + ocm_bank, 1);
503497
debug_printf("%d-------- iw_kw2 %d done \n", ib, iw_kw2);
504498
} // iw_kw2
505499
debug_printf("%d-------- il %d done\n", ib, il);
@@ -537,101 +531,11 @@ extern EXT_C u8 model_run(Memory_st *restrict mp, void *p_config) {
537531
}
538532
#endif
539533
flush_cache(p_out_buffer, pb->o_bytes);
540-
set_config(p_config, A_BUNDLE_DONE, 1);
534+
fb_write_reg32(p_config + A_BUNDLE_DONE, 1);
541535
} // ib
542-
debug_printf("done all bundles!!\n");
543-
#ifdef SIM
544-
is_first_call = 1;
545-
#endif
546-
return 0;
547-
}
548-
549-
550-
// Rest of the helper functions used in simulation.
551-
#ifdef SIM
552-
553-
extern EXT_C u32 addr_64to32(void* restrict addr){
554-
u64 offset = (u64)addr - (u64)&mem_phy;
555-
return (u32)offset + 0x20000000;
556-
// return (u32)((uintptr_t)addr);
557-
}
558-
559-
extern EXT_C u64 sim_addr_32to64(u32 addr){
560-
return (u64)addr - (u64)0x20000000 + (u64)&mem_phy;
561-
}
562-
563-
extern EXT_C u8 get_byte_a32 (u32 addr_32){
564-
u64 addr = sim_addr_32to64(addr_32);
565-
u8 val = *(u8*restrict)addr;
566-
//debug_printf("get_byte_a32: addr32:0x%x, addr64:0x%lx, val:0x%x\n", addr_32, addr, val);
567-
return val;
568-
}
569-
570-
extern EXT_C void set_byte_a32 (u32 addr_32, u8 data){
571-
u64 addr = sim_addr_32to64(addr_32);
572-
*(u8*restrict)addr = data;
573-
}
574-
575-
extern EXT_C void *get_mp(){
576-
return &mem_phy;
536+
debug_printf("done all bundles!!\n");
577537
}
578-
#else
579-
580-
u32 addr_64to32 (void* addr){
581-
return (u32)addr;
582-
}
583-
584-
#endif
585-
586-
extern EXT_C void model_setup(Memory_st *restrict mp, void *p_config) {
587-
588-
#ifdef SIM
589-
FILE *fp;
590-
char f_path [1000];
591-
sprintf(f_path, "%s/wbx.bin", DATA_DIR);
592-
fp = fopen(f_path, "rb");
593-
debug_printf("DEBUG: Reading from file %s \n", f_path);
594-
if(!fp) debug_printf("ERROR! File not found: %s \n", f_path);
595-
int bytes = fread(mp->w, 1, WB_BYTES+X_BYTES, fp);
596-
fclose(fp);
597-
#endif
598-
flush_cache(mp->w, WB_BYTES+X_BYTES); // force transfer to DDR, starting addr & length
599538

600-
// Write registers in controller
601-
set_config(p_config, A_START , 0); // Start
602-
set_config(p_config, A_DONE_READ +0, 1); // Done read mp->ocm bank 0
603-
set_config(p_config, A_DONE_READ +1, 1); // Done read mp->ocm bank 1
604-
set_config(p_config, A_DONE_WRITE+0, 0); // Done write mp->ocm bank 0
605-
set_config(p_config, A_DONE_WRITE+1, 0); // Done write mp->ocm bank 1
606-
set_config(p_config, A_OCM_BASE +0, addr_64to32(mem_phy.ocm[0])); // Base addr mp->ocm bank 0
607-
set_config(p_config, A_OCM_BASE +1, addr_64to32(mem_phy.ocm[1])); // Base addr mp->ocm bank 1
608-
set_config(p_config, A_WEIGHTS_BASE, addr_64to32(mem_phy.w)); // Base adddr weights
609-
set_config(p_config, A_BUNDLE_DONE , 1); // Bundle done writing (pixel dma waits for this)
610-
set_config(p_config, A_N_BUNDLES_1 , N_BUNDLES); // Number of bundles
611-
set_config(p_config, A_W_DONE , 0); // Weigths done
612-
set_config(p_config, A_X_DONE , 0); // Bundle done
613-
set_config(p_config, A_O_DONE , 0); // Output done
614-
615-
// Write into BRAM the config for controller
616-
i32 parameters[8*N_BUNDLES];
617-
for (int var = 0; var < N_BUNDLES; var++){
618-
parameters[8*var] = (var == 0) ? addr_64to32(mem_phy.x) : addr_64to32(mem_phy.out_buffers[bundles[var].in_buffer_idx]); // x_base address
619-
parameters[8*var+1] = bundles[var].x_bpt_p0; // x_bpt0
620-
parameters[8*var+2] = bundles[var].x_bpt; // x_bpt
621-
parameters[8*var+3] = bundles[var].w_bpt_p0; // w_bpt0
622-
parameters[8*var+4] = bundles[var].w_bpt; // w_bpt
623-
624-
assert_printf(bundles[var].p, <, 1<<16, "", "P should be less than 2**16 for bundle:%x", var);
625-
assert_printf(bundles[var].t, <, 1<<16, "", "T should be less than 2**16 for bundle:%x", var);
626-
parameters[8*var+5] = (bundles[var].t << 16) + bundles[var].p; // max p
627-
uint64_t h = bundles[var].header;
628-
parameters[8*var + 6] = (uint32_t)(h & 0xFFFFFFFFu);
629-
parameters[8*var + 7] = (uint32_t)(h >> 32);
630-
}
631-
for (int var = 0; var < 8*N_BUNDLES; var++){
632-
set_config(p_config, 16+var, parameters[var]);
633-
}
634-
}
635539

636540
extern EXT_C void print_output (Memory_st *restrict mp) {
637541
flush_cache(mp->y, sizeof(mp->y));

0 commit comments

Comments
 (0)