Skip to content

Commit d963adf

Browse files
SmephiteNavaneeth-KunhiPurayil
authored andcommitted
hw: Add SystemRDL for Spatz cluster generation
Note: This only adds the RDL files, but does not yet generate the register files.
1 parent a5094d9 commit d963adf

File tree

4 files changed

+339
-0
lines changed

4 files changed

+339
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
## Copyright 2025 ETH Zurich and University of Bologna.
2+
## Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+
## SPDX-License-Identifier: SHL-0.51
4+
// Copyright 2025 ETH Zurich and University of Bologna.
5+
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
6+
// SPDX-License-Identifier: SHL-0.51
7+
8+
${disclaimer}
9+
10+
<%!
11+
import math
12+
13+
def next_power_of_2(n):
14+
"""Returns the next power of 2 greater than or equal to n."""
15+
return 1 if n == 0 else 2**math.ceil(math.log2(n))
16+
%>
17+
18+
`ifndef __${cfg['name'].upper()}_RDL__
19+
`define __${cfg['name'].upper()}_RDL__
20+
21+
`include "spatz_cluster_peripheral_reg.rdl"
22+
23+
addrmap ${cfg['name']} {
24+
25+
default regwidth = ${cfg['dma_data_width']};
26+
mem tcdm {
27+
mementries = ${hex(int(cfg['tcdm']['size'] * 1024 * 8 / cfg['dma_data_width']))};
28+
memwidth = ${cfg['dma_data_width']};
29+
};
30+
31+
mem bootrom {
32+
mementries = ${hex(int(4 * 1024 * 8 / cfg['dma_data_width']))};
33+
memwidth = ${cfg['dma_data_width']};
34+
};
35+
36+
37+
38+
external tcdm tcdm ;
39+
external bootrom bootrom @ ${hex(next_power_of_2(cfg['tcdm']['size']) * 1024)};
40+
spatz_cluster_peripheral_reg peripheral_reg @ ${hex((next_power_of_2(cfg['tcdm']['size'])+4) * 1024)};
41+
42+
};
43+
44+
`endif // __${cfg['name'].upper()}_RDL__
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
// Copyright 2025 ETH Zurich and University of Bologna.
2+
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+
// SPDX-License-Identifier: SHL-0.51
4+
5+
`ifndef __SPATZ_CLUSTER_PERIPHERAL_REG_RDL__
6+
`define __SPATZ_CLUSTER_PERIPHERAL_REG_RDL__
7+
8+
addrmap spatz_cluster_peripheral_reg #(
9+
longint unsigned NumPerfCounters = 2
10+
) {
11+
default regwidth = 64;
12+
13+
reg perf_cnt_en {
14+
field {
15+
name = "enable";
16+
desc = "Enable a particular performance counter to start tracking.";
17+
hw = r;
18+
sw = rw;
19+
} enable[0:0] = 1;
20+
};
21+
22+
enum perf_metric {
23+
cycle = 0 {
24+
desc = "Cycle counter. Counts up as long as the cluster is powered.";
25+
};
26+
tcdm_accessed = 1 {
27+
desc = "Increased whenever the TCDM is accessed. Each individual access is tracked,
28+
so if `n` cores access the TCDM, `n` will be added. Accesses are tracked at the TCDM,
29+
so it doesn't matter whether the cores or for example the SSR hardware accesses
30+
the TCDM. _This is a cluster-global signal._";
31+
};
32+
tcdm_congested = 2 {
33+
desc = "Incremented whenever an access towards the TCDM is made but the arbitration
34+
logic didn't grant the access (due to congestion). It's strictly less than tcdm_accessed.
35+
_This is a cluster-global signal._";
36+
};
37+
issue_fpu = 3 {
38+
desc = "Operations performed in the FPU. Includes both operations initiated by the
39+
sequencer and by the core. When the Xfrep extension is available, this counter is
40+
equivalent to issue_fpu_seq (see description of issue_fpu_seq). If the Xfrep extension
41+
is not supported, then it is equivalent to issue_core_to_fpu. _This is a hart-local signal._";
42+
};
43+
issue_fpu_seq = 4 {
44+
desc = "Incremented whenever the FPU Sequencer issues an FPU instruction.
45+
Might not be available if the hardware doesn't support FREP.
46+
Note that all FP instructions offloaded by the core to the FPU are routed
47+
through the sequencer (although not necessarily buffered) and thus are also counted.
48+
The instructions issued independently by the FPU sequencer could thus be
49+
calculated as issue_fpu_seq_proper = issue_fpu_seq - issue_core_to_fpu.
50+
_This is a hart-local signal._";
51+
};
52+
issue_core_to_fpu = 5 {
53+
desc = "Incremented whenever the core issues an FPU instruction.
54+
_This is a hart-local signal._";
55+
};
56+
retired_instr = 6 {
57+
desc = "Instructions retired by the core, both offloaded and not. Does not
58+
count instructions issued independently by the FPU sequencer.
59+
_This is a hart-local signal._";
60+
};
61+
retired_load = 7 {
62+
desc = "Load instructions retired by the core. _This is a hart-local signal._";
63+
};
64+
retired_i = 8 {
65+
desc = "Base instructions retired by the core. _This is a hart-local signal._";
66+
};
67+
retired_acc = 9 {
68+
desc = "Offloaded instructions retired by the core. _This is a hart-local signal._";
69+
};
70+
dma_aw_stall = 10 {
71+
desc = "Incremented whenever aw_valid = 1 but aw_ready = 0.
72+
_This is a DMA-local signal_";
73+
};
74+
dma_ar_stall = 11 {
75+
desc = "Incremented whenever ar_valid = 1 but ar_ready = 0.
76+
_This is a DMA-local signal_";
77+
};
78+
dma_r_stall = 12 {
79+
desc = "Incremented whenever r_ready = 1 but r_valid = 0.
80+
_This is a DMA-local signal_";
81+
};
82+
dma_w_stall = 13 {
83+
desc = "Incremented whenever w_valid = 1 but w_ready = 0.
84+
_This is a DMA-local signal_";
85+
};
86+
dma_buf_w_stall = 14 {
87+
desc = "Incremented whenever w_ready = 1 but w_valid = 0.
88+
_This is a DMA-local signal_";
89+
};
90+
dma_buf_r_stall = 15 {
91+
desc = "Incremented whenever r_valid = 1 but r_ready = 0.
92+
_This is a DMA-local signal_";
93+
};
94+
dma_aw_done = 16 {
95+
desc = "Incremented whenever AW handshake occurs.
96+
_This is a DMA-local signal_";
97+
};
98+
dma_aw_bw = 17 {
99+
desc = "Whenever AW handshake occurs, the counter is incremented
100+
by the number of bytes transfered for this transaction
101+
_This is a DMA-local signal_";
102+
};
103+
dma_ar_done = 18 {
104+
desc = "Incremented whenever AR handshake occurs.
105+
_This is a DMA-local signal_";
106+
};
107+
dma_ar_bw = 19 {
108+
desc = "Whenever AR handshake occurs, the counter is incremented
109+
by the number of bytes transfered for this transaction
110+
_This is a DMA-local signal_";
111+
};
112+
dma_r_done = 20 {
113+
desc = "Incremented whenever R handshake occurs.
114+
_This is a DMA-local signal_";
115+
};
116+
dma_r_bw = 21 {
117+
desc = "Whenever R handshake occurs, the counter is incremented
118+
by the number of bytes transfered in this cycle
119+
_This is a DMA-local signal_";
120+
};
121+
dma_w_done = 22 {
122+
desc = "Incremented whenvever W handshake occurs.
123+
_This is a DMA-local signal_";
124+
};
125+
dma_w_bw = 23 {
126+
desc = "Whenever W handshake occurs, the counter is incremented
127+
by the number of bytes transfered in this cycle
128+
_This is a DMA-local signal_";
129+
};
130+
dma_b_done = 24 {
131+
desc = "Incremented whenever B handshake occurs.
132+
_This is a DMA-local signal_";
133+
};
134+
dma_busy = 25 {
135+
desc = "Incremented whenever DMA is busy.
136+
_This is a DMA-local signal_";
137+
};
138+
icache_miss = 26 {
139+
desc = "Incremented for instruction cache misses.
140+
_This is a hart-local signal_";
141+
};
142+
icache_hit = 27 {
143+
desc = "Incremented for instruction cache hits.
144+
_This is a hart-local signal_";
145+
};
146+
icache_prefetch = 28 {
147+
desc = "Incremented for instruction cache prefetches.
148+
_This is a hart-local signal_";
149+
};
150+
icache_double_hit = 29 {
151+
desc = "Incremented for instruction cache double hit.
152+
_This is a hart-local signal_";
153+
};
154+
icache_stall = 30 {
155+
desc = "Incremented for instruction cache stalls.
156+
_This is a hart-local signal_";
157+
};
158+
};
159+
160+
reg perf_cnt_sel {
161+
desc = "Select the metric that is tracked for each performance counter.";
162+
field {
163+
name = "hart";
164+
desc = "Select from which hart in the cluster, starting from `0`,
165+
the event should be counted. For each performance counter
166+
the cores can be selected individually. If a hart greater
167+
than the cluster's total hart size is selected the selection
168+
will wrap and the hart corresponding to `hart_select % total_harts_in_cluster`
169+
will be selected.";
170+
hw = rw;
171+
sw = rw;
172+
} hart[9:0];
173+
};
174+
175+
reg perf_cnt {
176+
desc = "Performance counter. Set corresponding perf_cnt_sel register depending on what
177+
performance metric and hart you would like to track.";
178+
field {
179+
desc = "Performance counter";
180+
hw = rw;
181+
sw = rw;
182+
onwrite = wclr;
183+
} perf_counter[47:0];
184+
};
185+
186+
reg cl_clint_set {
187+
desc = "Set bits in the cluster-local CLINT. Writing a 1 at location i sets the cluster-local interrupt
188+
of hart i, where i is relative to the first hart in the cluster, ignoring the cluster base hart ID.";
189+
field {
190+
desc = "Set cluster-local interrupt of hart i";
191+
sw = w;
192+
hw = r;
193+
} cl_clint_set[31:0];
194+
};
195+
196+
reg cl_clint_clear {
197+
desc = "Clear bits in the cluster-local CLINT. Writing a 1 at location i clears the cluster-local interrupt
198+
of hart i, where i is relative to the first hart in the cluster, ignoring the cluster base hart ID.";
199+
field {
200+
desc = "Set cluster-local interrupt of hart i";
201+
sw = w;
202+
hw = r;
203+
} cl_clint_clear[31:0];
204+
};
205+
206+
reg hw_barrier {
207+
desc = "Hardware barrier register. Loads to this register will block until all cores have
208+
performed the load. At this stage we know that they reached the same point in the control flow,
209+
i.e., the cores are synchronized.";
210+
field {
211+
desc = "Hardware barrier register.";
212+
sw = r;
213+
hw = rw;
214+
} hw_barrier[31:0];
215+
};
216+
217+
reg icache_prefetch_enable {
218+
desc = "Controls prefetching of the instruction cache.";
219+
field {
220+
desc = "Enable instruction prefetching.";
221+
hw = r;
222+
sw = w;
223+
} icache_prefetch_enable[0:0] = 1;
224+
};
225+
226+
reg spatz_status {
227+
desc = "Sets the status of the Spatz cluster.";
228+
field {
229+
desc = "Indicates the cluster is computing a kernel.";
230+
sw = w;
231+
hw = r;
232+
} hw_barrier[0:0] = 0;
233+
};
234+
235+
reg cluster_boot_control {
236+
desc = "Controls the cluster boot process";
237+
field {
238+
desc = "Post-bootstrapping entry point.";
239+
sw = rw;
240+
hw = r;
241+
} boot_cluster[31:0] = 0;
242+
};
243+
244+
reg cluster_eoc_exit {
245+
desc = "End of computation and exit status register";
246+
field {
247+
desc = "Indicates the end of computation and exit status.";
248+
sw = rw;
249+
hw = r;
250+
} eoc_exit[31:0] = 0;
251+
};
252+
253+
// Required for PeakRDL to define the `perf_metric` enum in
254+
// the generated RTL file, as this seems not to be done for
255+
// for registers defined to be `external`.
256+
reg unused {
257+
desc = "Unused register added to print enum";
258+
field {
259+
sw=r; hw=na;
260+
encode = perf_metric;
261+
} f[4:0] = 0;
262+
};
263+
264+
regfile perf_regs {
265+
perf_cnt_en perf_cnt_en[NumPerfCounters];
266+
external perf_cnt_sel perf_cnt_sel[NumPerfCounters];
267+
external perf_cnt perf_cnt[NumPerfCounters];
268+
};
269+
270+
perf_regs perf_regs;
271+
external cl_clint_set cl_clint_set;
272+
external cl_clint_clear cl_clint_clear;
273+
external hw_barrier hw_barrier;
274+
icache_prefetch_enable icache_prefetch_enable;
275+
spatz_status spatz_status;
276+
cluster_boot_control cluster_boot_control;
277+
cluster_eoc_exit cluster_eoc_exit;
278+
unused unused;
279+
};
280+
281+
`endif // __SPATZ_CLUSTER_PERIPHERAL_REG_RDL__

util/clustergen.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ def main():
5454
with open(outdir / "spatz_cluster_wrapper.sv", "w") as f:
5555
f.write(cluster_tb.render_wrapper())
5656

57+
with open(outdir / "spatz_cluster.rdl", "w") as f:
58+
f.write(cluster_tb.render_rdl())
59+
5760
with open(outdir / "link.ld", "w") as f:
5861
f.write(cluster_tb.render_linker_script())
5962

util/clustergen/cluster.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class SnitchCluster(Generator):
157157
files = {
158158
"spatzpkg": "src/spatz_pkg.sv.tpl",
159159
"wrapper": "src/spatz_cluster_wrapper.sv.tpl",
160+
"rdl": "src/spatz_cluster.rdl.tpl",
160161
"testbench": "tb/testbench.sv.tpl",
161162
}
162163

@@ -184,6 +185,13 @@ def l1_region(self):
184185
"""Return L1 Region as tuple. Base and length."""
185186
return (self.cfg["cluster_base_addr"], self.cfg["tcdm"]["size"])
186187

188+
def render_rdl(self):
189+
"""Render the cluster RDL"""
190+
cfg_template = self.templates.get_template(self.files["rdl"])
191+
return cfg_template.render_unicode(
192+
cfg=self.cfg, to_sv_hex=to_sv_hex, disclaimer=self.DISCLAIMER
193+
)
194+
187195
def render_wrapper(self):
188196
"""Render the cluster wrapper"""
189197
cfg_template = self.templates.get_template(self.files["wrapper"])
@@ -397,6 +405,9 @@ def __init__(self, cfg):
397405
# Store Snitch cluster config in separate variable
398406
self.cluster = SnitchCluster(cfg["cluster"], pma_cfg)
399407

408+
def render_rdl(self):
409+
return self.cluster.render_rdl()
410+
400411
def render_wrapper(self):
401412
return self.cluster.render_wrapper()
402413

0 commit comments

Comments
 (0)