Skip to content

Commit 526241a

Browse files
committed
Firmware: Add UART loader and SDRAM instruction fetch
Add a UART loader firmware (uart_loader) that allows uploading and running programs over UART without reflashing the FPGA. The host-side script (scripts/uart_load.py) implements a simple binary protocol: Load, Go, Dump, and Ping commands. To support executing uploaded code, extend top.v so that instruction fetches with bit 31 set are routed through the SDRAM arbiter instead of the LUT-ROM. This allows any program loaded into SDRAM (0x8000_0000+) to be executed directly. Add shared firmware/link_ram.ld and firmware/start_ram.S for building RAM-targeted binaries with the stack placed safely in SDRAM. Update hello_world with a `make bin BIN_ADDR=0x80000000` target and a README documenting both the bitstream-bake and UART-loader workflows. Made-with: Cursor
1 parent 13e7715 commit 526241a

File tree

11 files changed

+684
-14
lines changed

11 files changed

+684
-14
lines changed

boards/tangnano20k/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ all: $(BOARD).fs
2121
imem.hex imem_rom.vh imem_data_rom.vh: FORCE
2222
make -C $(PROG_DIR)
2323
cp $(PROG_DIR)/imem.hex .
24-
cp $(PROG_DIR)/imem_rom.vh .
24+
# Rename the assigned variable so top.v can use imem_lut_rdata as an
25+
# intermediate and override it for the SDRAM instruction-fetch path.
26+
sed 's/imem_rdata/imem_lut_rdata/g' $(PROG_DIR)/imem_rom.vh > imem_rom.vh
2527
cp $(PROG_DIR)/imem_data_rom.vh .
2628

2729
# ── FPGA bitstream ────────────────────────────────────────────────────────────

boards/tangnano20k/top.v

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,32 @@ module top #(
118118

119119
wire o_trap;
120120

121-
// ── Instruction memory: combinatorial LUT-ROM ─────────────────────────────
122-
// A pure combinatorial case ROM synthesises reliably to LUTs.
123-
// $readmemh-based BRAM init is unreliable with the open-source Gowin flow.
121+
// ── Instruction memory ────────────────────────────────────────────────────
122+
// Addresses with bit 31 = 0 are served by the combinatorial LUT-ROM.
123+
// Addresses with bit 31 = 1 are fetched from SDRAM via the shared data-bus
124+
// arbiter — this enables uart_loader to upload and execute code in SDRAM
125+
// (0x8000_0000 – 0x81FF_FFFF) without reflashing the FPGA.
124126
wire [9:0] imem_idx = imem_addr[11:2];
127+
wire imem_from_sdram = imem_addr[31] & imem_valid;
125128

129+
// LUT-ROM: combinatorial case ROM (renamed to imem_lut_rdata by the Makefile
130+
// sed pass so the SDRAM mux below can override without a multiple-driver error).
131+
reg [31:0] imem_lut_rdata;
126132
always @(*) begin
127133
`include "imem_rom.vh"
128-
imem_ready = imem_valid;
134+
end
135+
136+
// Final IMEM output mux. bus_rdata / bus_rready are defined further below
137+
// (after the SDRAM arbiter section); Verilog combinatorial always blocks may
138+
// reference signals declared later in the file.
139+
always @(*) begin
140+
if (imem_from_sdram) begin
141+
imem_rdata = bus_rdata;
142+
imem_ready = bus_rready;
143+
end else begin
144+
imem_rdata = imem_lut_rdata;
145+
imem_ready = imem_valid;
146+
end
129147
end
130148

131149
// ── Data BRAM ─────────────────────────────────────────────────────────────
@@ -136,11 +154,16 @@ module top #(
136154

137155
wire dmem_wsel = dmem_wvalid && (dmem_waddr[19:16] == 4'b0001);
138156

139-
// Unified read address: PTW read takes priority when ptw_valid is asserted.
140-
// The CPU is stalled (dmem_rvalid=0) during PTW walks, so no conflict arises.
141-
// bus_raddr_full is declared later (after SDRAM section), alias it here for BRAM.
142-
wire [31:0] bus_raddr = ptw_valid ? ptw_addr : dmem_raddr;
143-
wire bus_rvalid = ptw_valid ? 1'b1 : dmem_rvalid;
157+
// Unified read address mux — priority: PTW > IMEM-from-SDRAM > DMEM.
158+
// PTW: CPU is stalled (dmem_rvalid=0, imem_valid still set but we ignore it).
159+
// IMEM-from-SDRAM: CPU is stalled at fetch; dmem_rvalid=0 during fetch stall.
160+
// DMEM: normal data read.
161+
wire [31:0] bus_raddr = ptw_valid ? ptw_addr :
162+
imem_from_sdram ? imem_addr :
163+
dmem_raddr;
164+
wire bus_rvalid = ptw_valid ? 1'b1 :
165+
imem_from_sdram ? 1'b1 :
166+
dmem_rvalid;
144167

145168
wire [9:0] dmem_raddr_idx = bus_raddr[11:2];
146169
wire [9:0] dmem_waddr_idx = dmem_waddr[11:2];
@@ -719,12 +742,13 @@ module top #(
719742
end
720743
end
721744

722-
// CPU DMEM read port: forward from shared bus when not ptw.
745+
// CPU DMEM read port: forward from shared bus when not ptw or imem-sdram.
723746
always @(*) begin
724747
dmem_rdata = bus_rdata;
725-
dmem_rready = ptw_valid ? 1'b0 : bus_rready;
748+
dmem_rready = (ptw_valid || imem_from_sdram) ? 1'b0 : bus_rready;
726749
end
727750

751+
728752
// PTW read port: combinatorial pass-through from shared bus.
729753
assign ptw_rdata = bus_rdata;
730754
assign ptw_ready = ptw_valid ? bus_rready : 1'b0;

firmware/hello_world/Makefile

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@ LDFLAGS := -T link.ld -march=rv32i -mabi=ilp32 -nostdlib \
1010

1111
SRCS := start.S hello_world.c
1212

13+
# Load address used when building a flat .bin for uart_loader.
14+
# Default: DMEM base. Override: make BIN_ADDR=0x80000000
15+
BIN_ADDR ?= 0x00010000
16+
1317
AWK_HEXTODEC := function hextodec(h, i,v,c){v=0;h=toupper(h);for(i=1;i<=length(h);i++){c=index("0123456789ABCDEF",substr(h,i,1))-1;v=v*16+c};return v} {gsub(/\r/,"",$$0)}
1418

15-
.PHONY: all clean dis
19+
.PHONY: all clean dis bin
1620

1721
all: imem.hex imem_init.vh imem_rom.vh imem_data_rom.vh
1822

@@ -40,6 +44,16 @@ imem_rom.vh: imem.hex
4044
hello_world.elf: $(SRCS) link.ld
4145
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(SRCS)
4246

47+
# Flat binary linked at BIN_ADDR for uart_loader.
48+
# Uses start_ram.S (stack in SDRAM) instead of start.S (stack in DMEM).
49+
bin: hello_world.bin
50+
hello_world.bin: hello_world.c ../start_ram.S ../link_ram.ld
51+
$(CC) $(CFLAGS) -T ../link_ram.ld -Wl,--defsym,RAM_BASE=$(BIN_ADDR) \
52+
-march=rv32im_zicsr -mabi=ilp32 -nostdlib -nostartfiles -Wl,--no-relax \
53+
-o hello_world_ram.elf ../start_ram.S hello_world.c
54+
$(OBJCOPY) -O binary hello_world_ram.elf $@
55+
@ls -la $@
56+
4357
dis: hello_world.elf
4458
$(OBJDUMP) -d $<
4559

@@ -51,4 +65,5 @@ imem_data_rom.vh: imem.hex
5165
$< > $@
5266

5367
clean:
54-
rm -f hello_world.elf imem.hex imem_init.vh imem_rom.vh imem_data_rom.vh
68+
rm -f hello_world.elf hello_world_ram.elf hello_world.bin \
69+
imem.hex imem_init.vh imem_rom.vh imem_data_rom.vh

firmware/hello_world/README.md

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# hello_world
2+
3+
Repeatedly prints `Hello, world!` over UART TX every ~0.5 s.
4+
5+
## Prerequisites
6+
7+
- `riscv64-elf-gcc` toolchain
8+
- OSS CAD Suite (`yosys`, `nextpnr-himbaechel`, `gowin_pack`, `openFPGALoader`)
9+
- `pyserial` (`pip3 install pyserial`)
10+
- Tang Nano 20K connected over USB
11+
12+
## Method 1 — Bake into FPGA bitstream (IMEM LUT-ROM)
13+
14+
This is the traditional path. The firmware is compiled into the FPGA bitstream
15+
itself and runs immediately on power-up.
16+
17+
```bash
18+
# Build and flash (synthesises the full SoC with hello_world in the ROM)
19+
make -C boards/tangnano20k FW=hello_world flash-sram
20+
21+
# Connect to see output
22+
picocom -b 115200 /dev/tty.usbserial-XXXXXXXX
23+
```
24+
25+
## Method 2 — Upload via UART loader (no reflash needed)
26+
27+
The UART loader firmware sits in the IMEM LUT-ROM and accepts programs over
28+
UART. Programs are linked to run from SDRAM (`0x8000_0000`) where the CPU can
29+
both write and fetch instructions.
30+
31+
### Step 1 — Flash the UART loader (one time only)
32+
33+
```bash
34+
make -C boards/tangnano20k FW=uart_loader flash-sram
35+
```
36+
37+
### Step 2 — Build the SDRAM binary
38+
39+
```bash
40+
make -C firmware/hello_world bin BIN_ADDR=0x80000000
41+
# produces firmware/hello_world/hello_world.bin (140 bytes)
42+
```
43+
44+
### Step 3 — Upload and run
45+
46+
```bash
47+
python3 scripts/uart_load.py -p /dev/tty.usbserial-XXXXXXXX run \
48+
firmware/hello_world/hello_world.bin 0x80000000
49+
```
50+
51+
The script uploads the binary, jumps to it, and streams the program's output
52+
directly to your terminal. Press `Ctrl+C` to disconnect.
53+
54+
Expected output:
55+
56+
```
57+
Connected to /dev/tty.usbserial-XXXXXXXX at 115200 baud
58+
Loading 140 bytes to 0x80000000 (csum=0x9A)...
59+
OK
60+
Jumping to 0x80000000...
61+
--- program output (Ctrl+C to exit) ---
62+
Hello, world!
63+
Hello, world!
64+
Hello, world!
65+
...
66+
```
67+
68+
## Notes
69+
70+
- The `BIN_ADDR` default is `0x00010000` (DMEM), but DMEM cannot be used for
71+
instruction fetch — always use `BIN_ADDR=0x80000000` (SDRAM) with the UART
72+
loader.
73+
- The stack is placed at `0x801F_FFFC` (top of the first 2 MiB of SDRAM) by
74+
`firmware/start_ram.S`, safely above the 140-byte program image.
75+
- To disassemble: `make -C firmware/hello_world dis`

firmware/link_ram.ld

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* link_ram.ld — position-independent RAM link script for uart_loader targets.
2+
*
3+
* Links the entire program (text + rodata + data + bss) into one contiguous
4+
* region starting at RAM_BASE. RAM_BASE must be provided on the linker
5+
* command line with --defsym RAM_BASE=0x....
6+
*
7+
* Usage:
8+
* $(CC) ... -T ../../link_ram.ld -Wl,--defsym,RAM_BASE=0x00010000 ...
9+
*
10+
* Typical targets:
11+
* DMEM : RAM_BASE = 0x00010000 (4 KiB)
12+
* SDRAM : RAM_BASE = 0x80000000 (32 MiB)
13+
*/
14+
15+
ENTRY(_start)
16+
17+
MEMORY {
18+
RAM (rwx) : ORIGIN = RAM_BASE, LENGTH = 32M
19+
}
20+
21+
SECTIONS {
22+
.text : {
23+
*(.text.start)
24+
*(.text*)
25+
*(.rodata*)
26+
} > RAM
27+
28+
.data : { *(.data*) } > RAM
29+
30+
.bss : {
31+
__bss_start = .;
32+
*(.bss*)
33+
*(.sbss*)
34+
*(COMMON)
35+
__bss_end = .;
36+
} > RAM
37+
38+
/DISCARD/ : { *(.comment) *(.eh_frame) }
39+
}

firmware/start_ram.S

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/* start_ram.S — CRT0 for uart_loader-uploaded programs.
2+
*
3+
* Used when a firmware binary is loaded into RAM (DMEM or SDRAM) via the
4+
* UART loader rather than baked into the IMEM LUT-ROM.
5+
*
6+
* Stack is placed at the top of the first 2 MiB of SDRAM (0x801F_FFFC).
7+
* This is safe for both DMEM-loaded programs (stack is far above the code)
8+
* and SDRAM-loaded programs (stack is within SDRAM, above the typical load
9+
* address of 0x8000_0000).
10+
*
11+
* .bss is zeroed in-place (it follows .data in the same RAM region).
12+
*/
13+
14+
.section .text.start
15+
.global _start
16+
_start:
17+
/* Stack at top of first 2 MiB of SDRAM — safe for any load address */
18+
li sp, 0x801FFFFC
19+
20+
/* Zero .bss */
21+
la a0, __bss_start
22+
la a1, __bss_end
23+
1: bge a0, a1, 2f
24+
sw zero, 0(a0)
25+
addi a0, a0, 4
26+
j 1b
27+
2:
28+
call main
29+
30+
/* main() returned — spin forever */
31+
halt:
32+
j halt

firmware/uart_loader/Makefile

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
CROSS ?= riscv64-elf
2+
CC := $(CROSS)-gcc
3+
OBJCOPY := $(CROSS)-objcopy
4+
OBJDUMP := $(CROSS)-objdump
5+
6+
CFLAGS := -march=rv32im_zicsr -mabi=ilp32 -Os -ffreestanding -nostdlib \
7+
-nostartfiles -Wall -Wextra
8+
LDFLAGS := -T link.ld -march=rv32im_zicsr -mabi=ilp32 -nostdlib \
9+
-nostartfiles -Wl,--no-relax
10+
11+
SRCS := start.S uart_loader.c
12+
13+
AWK_HEXTODEC := function hextodec(h, i,v,c){v=0;h=toupper(h);for(i=1;i<=length(h);i++){c=index("0123456789ABCDEF",substr(h,i,1))-1;v=v*16+c};return v} {gsub(/\r/,"",$$0)}
14+
15+
.PHONY: all clean dis
16+
17+
all: imem.hex imem_init.vh imem_rom.vh imem_data_rom.vh
18+
19+
imem.hex: uart_loader.elf
20+
$(OBJCOPY) -O verilog --verilog-data-width=4 \
21+
--only-section=.text --only-section=.rodata \
22+
$< $@
23+
24+
imem_init.vh: imem.hex
25+
awk '$(AWK_HEXTODEC) \
26+
/^@/{addr=hextodec(substr($$0,2)); next} \
27+
{for(i=1;i<=NF;i++) printf " imem[%d] = 32'\''h%s;\n", addr++, $$i}' \
28+
$< > $@
29+
30+
imem_rom.vh: imem.hex
31+
@printf ' case (imem_idx)\n' > $@
32+
awk '$(AWK_HEXTODEC) \
33+
/^@/{addr=hextodec(substr($$0,2)); next} \
34+
{for(i=1;i<=NF;i++){gsub(/\r/,"",$$i); printf " 10'\''d%d: imem_rdata = 32'\''h%s;\n", addr++, toupper($$i)}}' \
35+
$< >> $@
36+
@printf ' default: imem_rdata = 32'\''h00000013;\n endcase\n' >> $@
37+
38+
imem_data_rom.vh: imem.hex
39+
awk '$(AWK_HEXTODEC) \
40+
/^@/{addr=hextodec(substr($$0,2)); next} \
41+
{for(i=1;i<=NF;i++){gsub(/\r/,"",$$i); printf " 10'\''d%d: dmem_imem_rdata = 32'\''h%s;\n", addr++, toupper($$i)}}' \
42+
$< > $@
43+
44+
uart_loader.elf: $(SRCS) link.ld
45+
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(SRCS)
46+
47+
dis: uart_loader.elf
48+
$(OBJDUMP) -d $<
49+
50+
clean:
51+
rm -f uart_loader.elf imem.hex imem_init.vh imem_rom.vh imem_data_rom.vh

firmware/uart_loader/link.ld

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/* Linker script for NyanSoC firmware running on Tang Nano 20K.
2+
*
3+
* Memory map:
4+
* IMEM: 0x0000_0000 - 0x0000_0FFF (4 KiB, combinatorial LUT-ROM)
5+
* DMEM: 0x0001_0000 - 0x0001_0FFF (4 KiB, BRAM, read/write)
6+
*/
7+
8+
ENTRY(_start)
9+
10+
MEMORY {
11+
IMEM (rx) : ORIGIN = 0x00000000, LENGTH = 4K
12+
DMEM (rw) : ORIGIN = 0x00010000, LENGTH = 4K
13+
}
14+
15+
SECTIONS {
16+
.text : {
17+
*(.text.start)
18+
*(.text*)
19+
*(.rodata*)
20+
} > IMEM
21+
22+
.data : { *(.data*) } > DMEM
23+
24+
.bss : {
25+
__bss_start = .;
26+
*(.bss*)
27+
*(.sbss*)
28+
*(COMMON)
29+
__bss_end = .;
30+
} > DMEM
31+
32+
/DISCARD/ : { *(.comment) *(.eh_frame) }
33+
}

firmware/uart_loader/start.S

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* start.S — CRT0 for uart_loader.
2+
*
3+
* Sets up stack in DMEM, installs a minimal trap handler, zeroes .bss,
4+
* calls main(). The loader never enables interrupts so the trap handler
5+
* is just a safety net.
6+
*/
7+
8+
.section .text.start
9+
.global _start
10+
_start:
11+
li sp, 0x00010FFC
12+
13+
la t0, trap_handler
14+
csrw mtvec, t0
15+
16+
/* Zero .bss */
17+
la a0, __bss_start
18+
la a1, __bss_end
19+
1: bge a0, a1, 2f
20+
sw zero, 0(a0)
21+
addi a0, a0, 4
22+
j 1b
23+
2:
24+
call main
25+
26+
halt:
27+
j halt
28+
29+
.balign 4
30+
trap_handler:
31+
mret

0 commit comments

Comments
 (0)