Skip to content

Commit dc0788b

Browse files
Added a custom plugin to execute any code after each instruction
Added extensions to implement: 1. Mask agnostic behavior of filling with 1s - xspikema1s 2. Tail agnostic behavior of filling with 1s - xspiketa1s
1 parent fd604be commit dc0788b

File tree

9 files changed

+704
-2
lines changed

9 files changed

+704
-2
lines changed

ci-tests/spike-ld.ld

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ENTRY (_entry)
2+
3+
SECTIONS {
4+
. = 0x80000000;
5+
.text : { *(.text) }
6+
.data : { *(.data) }
7+
}
8+

ci-tests/test-spike

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ riscv64-linux-gnu-gcc -static -O2 -o hello $CI/hello.c
2626
riscv64-linux-gnu-gcc -static -O2 -o dummy-slliuw $CI/dummy-slliuw.c
2727
riscv64-linux-gnu-gcc -static -O2 -o customcsr $CI/customcsr.c
2828
riscv64-linux-gnu-gcc -static -O2 -o atomics $CI/atomics.c
29+
riscv64-linux-gnu-gcc -static -nostdlib -o test-spiketama1s-ext -O0 -march=rv64gcv -mabi=lp64d -T $CI/spike-ld.ld $CI/test-spiketama1s-ext.S
2930

3031
# run snippy-based tests
3132
wget https://github.com/syntacore/snippy/releases/download/snippy-2.1/snippy-x86_64-linux.tar.xz
@@ -46,6 +47,45 @@ g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/testlib.cc -lriscv -o /dev/
4647
# run tests
4748
time $INSTALL/bin/spike --isa=rv64gc $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588."
4849
$INSTALL/bin/spike --log-commits --isa=rv64gc $BUILD/pk/pk atomics 2> /dev/null | grep "First atomic counter is 1000, second is 100"
50+
$INSTALL/bin/spike -l --log-commits --isa=rv64gcv_xspikema1s_xspiketa1s test-spiketama1s-ext &> spike-all-fill1s.log
51+
grep "v2 0x00000000ffffffffffffffff00000002" spike-all-fill1s.log
52+
grep "v3 0xffffffff000000000000000000000003" spike-all-fill1s.log
53+
grep "v4 0xffffffffffffffffffffffffff000000" spike-all-fill1s.log
54+
grep "v5 0xffffffffffffffffffffffffffffc000" spike-all-fill1s.log
55+
! grep "v6 0x" spike-all-fill1s.log
56+
grep "v7 0xffffffffffffffff00000000ffff0000" spike-all-fill1s.log
57+
grep "v8 0xffffffffffffffffffffffffffff0000" spike-all-fill1s.log
58+
grep "v9 0xffffffffffffffffffffffffff410113" spike-all-fill1s.log
59+
grep "v10 0xffffffffffff000002b1048700000113" spike-all-fill1s.log
60+
grep "v11 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log
61+
grep "v12 0xffffffff02b10487000000000c410113" spike-all-fill1s.log
62+
grep "v13 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log
63+
grep "v14 0x0c4101130c410113000000000c410113" spike-all-fill1s.log
64+
grep "v15 0xffffffffffffffff000000000c410113" spike-all-fill1s.log
65+
grep "v16 0xffffffffffffffffffff000000000000" spike-all-fill1s.log
66+
grep "v17 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log
67+
grep "v18 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log
68+
grep "v19 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log
69+
grep "v20 0xffffffff000000000206205700000113" spike-all-fill1s.log
70+
grep "v21 0xffffffff0000000055076e0000000c41" spike-all-fill1s.log
71+
grep "v22 0xffffffff000000000001b05700000487" spike-all-fill1s.log
72+
grep "v24 0xffffffffffffffffffffffffff000000" spike-all-fill1s.log
73+
grep "v25 0xffffffffffffffffffffffffff000000" spike-all-fill1s.log
74+
grep "v26 0xffff000000000000f557b05700000113" spike-all-fill1s.log
75+
grep "v27 0xffff000000000000cc81020600000c41" spike-all-fill1s.log
76+
grep "v28 0xffff0000000000000213550700000487" spike-all-fill1s.log
77+
grep "v29 0xffff00000000000000200001000002b1" spike-all-fill1s.log
78+
grep "v30 0xffffffffffffffff000000000c410113" spike-all-fill1s.log
79+
grep "v31 0xffffffffffffffff0000000002b10487" spike-all-fill1s.log
80+
$INSTALL/bin/spike -l --log-commits --isa=rv64gcv_xspikema1s test-spiketama1s-ext &> spike-mask-fill1s.log
81+
grep "v2 0x00000000ffffffffffffffff00000002" spike-mask-fill1s.log
82+
grep "v3 0x00000000000000000000000000000003" spike-mask-fill1s.log
83+
$INSTALL/bin/spike -l --log-commits --isa=rv64gcv_xspiketa1s test-spiketama1s-ext &> spike-tail-fill1s.log
84+
grep "v2 0x00000000000000000000000000000002" spike-tail-fill1s.log
85+
grep "v3 0xffffffff000000000000000000000003" spike-tail-fill1s.log
86+
$INSTALL/bin/spike -l --log-commits --isa=rv64gcv test-spiketama1s-ext &> spike-undisturbed.log
87+
grep "v2 0x00000000000000000000000000000002" spike-undisturbed.log
88+
grep "v3 0x00000000000000000000000000000003" spike-undisturbed.log
4989
LD_LIBRARY_PATH=$INSTALL/lib ./test-libriscv $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588."
5090
LD_LIBRARY_PATH=$INSTALL/lib ./test-customext $BUILD/pk/pk dummy-slliuw | grep "Executed successfully"
5191
LD_LIBRARY_PATH=$INSTALL/lib ./test-custom-csr $BUILD/pk/pk customcsr | grep "Executed successfully"

ci-tests/test-spiketama1s-ext.S

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
.option norvc
2+
3+
.global _entry
4+
.global fromhost
5+
.global tohost
6+
7+
.text
8+
_entry:
9+
la t0, exception_handler
10+
csrw mtvec, t0
11+
csrr t1, mstatus
12+
// Setting bit number 9 (mstatus.VS)
13+
li t2, 1
14+
slli t2, t2, 9
15+
or t1, t1, t2
16+
// Setting bit number 13 (mstatus.FS)
17+
li t3, 1
18+
slli t3, t3, 13
19+
or t1, t1, t3
20+
csrw mstatus, t1
21+
22+
//----------------------------MASK AGNOSTIC TEST-------------------------------
23+
// Tail undisturbed, mask agnostic with VL = 3
24+
vsetivli a0, 3, e32, m1, tu, ma
25+
vmclr.m v0
26+
// Mask v0 - 0x00000000000000010000000100000001
27+
vadd.vi v0, v0, 1
28+
// Fourth element should be zero because of tu
29+
// If mask fill 1s extension (xspikema1s) enable:
30+
// Second and third elements should be 0xffffffff because of 0001 mask
31+
// v2 0x00000000ffffffffffffffff00000002
32+
// If mask fill 1s extension (xspikema1s) disable:
33+
// Second and third elements should be zero because of 0001 mask
34+
// v2 0x00000000000000000000000000000002
35+
vadd.vi v2, v2, 2, v0.t
36+
37+
//----------------------------TAIL AGNOSTIC TEST-------------------------------
38+
vsetivli a0, 3, e32, m1, ta, mu
39+
// Second and third elements should be zero because of 0001 mask with mu
40+
// If tail fill 1s extension (xspiketa1s) enable:
41+
// Fourth element should be 0xffffffff because of ta
42+
// v3 0xffffffff000000000000000000000003
43+
// If tail fill 1s extension (xspiketa1s) disable:
44+
// Fourth element should be zero because of ta
45+
// v3 0x00000000000000000000000000000003
46+
vadd.vi v3, v3, 3, v0.t
47+
48+
//-----------------------------VCOMPRESS.VM TEST-------------------------------
49+
// Tail agnostic, mask undisturbed with VL = 8
50+
vsetivli a0, 8, e8, m1, ta, mu
51+
vmclr.m v0
52+
vadd.vi v0, v0, 13
53+
// Mask v0 - 0b...00001101 => 3 setting bits (of the first 8) => 3 elements
54+
// packed, remaining elements - tail: v4 0xffffffffffffffffffffffffff000000
55+
vcompress.vm v4, v1, v0
56+
57+
//-----------------------------MASK LOGICAL TEST-------------------------------
58+
// Tail undisturbed, mask agnostic with VL = 14
59+
vsetivli a0, 14, e8, m1, tu, ma
60+
// Mask destination => unmasked, tail-agnostic
61+
// v5 0xffffffffffffffffffffffffffffc000
62+
vmand.mm v5, v5, v5
63+
64+
//--------------------------SCALAR DESTINATION TEST----------------------------
65+
vsetivli a0, 8, e8, m1, ta, ma
66+
// t1 = x6, check that v6 hasn't been changed
67+
vcpop.m t1, v1, v0.t
68+
69+
//-------------------------------VIOTA.M TEST----------------------------------
70+
// Tail agnostic, mask agnostic with VL = 5
71+
vsetivli a0, 5, e16, m1, ta, ma
72+
vmclr.m v0
73+
vadd.vi v0, v0, 13
74+
// Mask v0 - 0b...00001101
75+
// packed, remaining elements - tail: v7 0xffffffffffffffff00000000ffff0000
76+
viota.m v7, v1, v0.t
77+
78+
//--------------------------------VMV.S.X TEST---------------------------------
79+
// Only first element updated, remaining elements - tail:
80+
// v8 0xffffffffffffffffffffffffffff0000
81+
vmv.s.x v8, sp
82+
83+
//---------------------------------VLM.V TEST----------------------------------
84+
// Tail agnostic, mask agnostic with VL = 31
85+
vsetivli a0, 31, e32, m8, tu, ma
86+
// For all loads, we will use this address of the executable section (so that
87+
// the bytes are unambiguous) placed in the sp.
88+
li sp, 0x00000000800000c4
89+
// Destination register is always written with a tail-agnostic policy.
90+
// evl = ceil(vl / 8) = 3, eew = 8 => tail starts from third element
91+
// v9 0xffffffffffffffffffffffffff410113
92+
vlm.v v9, 0(sp)
93+
94+
//--------------------------VECTOR UNIT-STRIDE TEST----------------------------
95+
// Tail agnostic, mask agnostic with VL = 5
96+
vsetivli a0, 5, e8, m1, ta, ma
97+
vmclr.m v0
98+
vadd.vi v0, v0, 13
99+
// Mask v0 - 0b...00001101
100+
// Masked vector loads do not update inactive elements.
101+
// eew = 16, sew = 8 ignored.
102+
// v10 0xffffffffffff000002b1048700000113
103+
// v11 0xffffffffffffffffffffffffffffffff
104+
vle16.v v10, 0(sp), v0.t
105+
106+
//----------------------------VECTOR STRIDED TEST------------------------------
107+
// Tail agnostic, mask agnostic with VL = 3
108+
vsetivli a0, 3, e16, m1, ta, ma
109+
// Mask v0 - 0b...00001101
110+
li tp, 2
111+
// Masked vector loads do not update inactive elements.
112+
// eew = 32, sew = 16 ignored.
113+
// v12 0xffffffff02b10487000000000c410113
114+
// v13 0xffffffffffffffffffffffffffffffff
115+
vlse32.v v12, 0(sp), tp, v0.t
116+
117+
//----------------------------VECTOR INDEXED TEST------------------------------
118+
// Tail agnostic, mask agnostic with VL = 6
119+
vsetivli a0, 6, e32, m2, ta, ma
120+
// Mask v0 - 0b...00011101
121+
li tp, 29
122+
vmv.s.x v0, tp
123+
// Masked vector loads do not update inactive elements.
124+
// eew = sew = 32
125+
// v14 0x0c4101130c410113000000000c410113
126+
// v15 0xffffffffffffffff000000000c410113
127+
vloxei8.v v14, 0(sp), v1, v0.t
128+
129+
//------------------VECTOR UNIT-STRIDE FAULT-ONLY-FIRST TEST-------------------
130+
// Tail agnostic, mask agnostic with VL = 31
131+
vsetivli a0, 31, e8, m2, ta, ma
132+
// Mask v0 - 0b...001111
133+
li tp, 15
134+
// For all fault-only-first loads, we will use this address, because a few
135+
// bytes after it (0x00000000ffffffff), a trap happens.
136+
vmv.s.x v0, tp
137+
li tp, 0x00000000fffffffa
138+
// Load instructions may overwrite active destination vector register group
139+
// elements past the element index at which the trap is reported.
140+
// Here element with index 3 raised an exception => vl reduced to 3.
141+
// eew = 16, sew = 8 ignored.
142+
// v16 0xffffffffffffffffffff000000000000
143+
// v17 0xffffffffffffffffffffffffffffffff
144+
// v18 0xffffffffffffffffffffffffffffffff
145+
// v19 0xffffffffffffffffffffffffffffffff
146+
vle16ff.v v16, (tp), v0.t
147+
148+
//----------------------VECTOR UNIT-STRIDE SEGMENT TEST------------------------
149+
// Tail agnostic, mask agnostic with VL = 6
150+
vsetivli a0, 6, e32, m2, ta, ma
151+
// Mask v0 - 0b...0001101
152+
li t0, 13
153+
vmv.s.x v0, t0
154+
// Masked vector loads do not update inactive elements.
155+
// nf = 3
156+
// v20 0xffffffff000000000206205700000113
157+
// v21 0xffffffff0000000055076e0000000c41
158+
// v22 0xffffffff000000000001b05700000487
159+
vlseg3e16.v v20, 0(sp), v0.t
160+
161+
//-------------VECTOR UNIT-STRIDE SEGMENT FAULT-ONLY-FIRST TEST----------------
162+
// Tail agnostic, mask agnostic with VL = 14
163+
vsetivli a0, 14, e8, m1, ta, ma
164+
// nf = 2, vl reduced to 3.
165+
// v24 0xffffffffffffffffffffffffff000000
166+
// v25 0xffffffffffffffffffffffffff000000
167+
vlseg2e8ff.v v24, (tp), v0.t
168+
169+
//------------------------VECTOR STRIDED SEGMENT TEST--------------------------
170+
// Tail agnostic, mask agnostic with VL = 7
171+
vsetivli a0, 7, e32, m2, ta, ma
172+
// Mask v0 - 0b...0001101
173+
// Masked vector loads do not update inactive elements.
174+
li t1, 8
175+
// nf = 4
176+
// v26 0xffff000000000000f557b05700000113
177+
// v27 0xffff000000000000cc81020600000c41
178+
// v28 0xffff0000000000000213550700000487
179+
// v29 0xffff00000000000000200001000002b1
180+
vlsseg4e16.v v26, 0(sp), t1, v0.t
181+
182+
//------------------------VECTOR INDEXED SEGMENT TEST--------------------------
183+
// Tail agnostic, mask agnostic with VL = 2
184+
vsetivli a0, 2, e32, m1, ta, ma
185+
// Mask v0 - 0b...00001
186+
vmclr.m v0
187+
vadd.vi v0, v0, 1
188+
// Masked vector loads do not update inactive elements.
189+
// v30 0xffffffffffffffff000000000c410113
190+
// v31 0xffffffffffffffff0000000002b10487
191+
vluxseg2ei32.v v30, 0(sp), v1, v0.t
192+
193+
//---------------------------------END TESTING---------------------------------
194+
la t0, exit
195+
jalr t0
196+
197+
exception_handler:
198+
csrr x10, mcause
199+
// In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize.
200+
// Otherwise it's not the expected behavior and we go into an infinite loop.
201+
li x11, 3
202+
beq x10, x11, exit
203+
j infinite_loop
204+
205+
exit:
206+
li ra, 1
207+
la sp, tohost
208+
sd ra, 0(sp)
209+
210+
infinite_loop:
211+
j infinite_loop
212+
213+
.balign 64
214+
tohost:
215+
.8byte 0x0
216+
.balign 64
217+
fromhost:
218+
.8byte 0x0

customext/agnostic_macros.h

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#ifndef _RISCV_AGNOSTIC_MACROS_H
2+
#define _RISCV_AGNOSTIC_MACROS_H
3+
4+
#include "decode_macros.h"
5+
#include "insn_macros.h"
6+
7+
#include <cassert>
8+
9+
#define AGNOSTIC_RVV_PARAMS \
10+
reg_t UNUSED vl = P.VU.vl->read(); \
11+
reg_t UNUSED vstart = P.VU.vstart->read(); \
12+
reg_t UNUSED sew = P.VU.vsew; \
13+
reg_t rd_num_start = insn.rd(); \
14+
reg_t rd_num = rd_num_start;
15+
16+
#define AGNOSTIC_VD_PARAM(x) \
17+
type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
18+
type_usew_t<x>::type all_ones = -1;
19+
20+
#define AGNOSTIC_SEW_SWITCH(BODY) \
21+
do { \
22+
if (sew == e8) { \
23+
AGNOSTIC_VD_PARAM(e8); \
24+
BODY; \
25+
} else if (sew == e16) { \
26+
AGNOSTIC_VD_PARAM(e16); \
27+
BODY; \
28+
} else if (sew == e32) { \
29+
AGNOSTIC_VD_PARAM(e32); \
30+
BODY; \
31+
} else if (sew == e64) { \
32+
AGNOSTIC_VD_PARAM(e64); \
33+
BODY; \
34+
} \
35+
} while (0)
36+
37+
#define AGNOSTIC_LOOP_TAIL(BODY, TAIL_FROM, TAIL_TO) \
38+
do { \
39+
AGNOSTIC_RVV_PARAMS; \
40+
for (reg_t i = (TAIL_FROM); i < (TAIL_TO); ++i) { \
41+
AGNOSTIC_SEW_SWITCH(BODY); \
42+
} \
43+
} while (0)
44+
45+
#define AGNOSTIC_LOOP_BODY(BODY) AGNOSTIC_LOOP_TAIL(BODY, vstart, vl)
46+
47+
#define AGNOSTIC_LOOP_MASK_TAIL(VALUE, TAIL_FROM, TAIL_TO) \
48+
do { \
49+
AGNOSTIC_RVV_PARAMS; \
50+
for (reg_t i = (TAIL_FROM); i < (TAIL_TO); ++i) \
51+
P.VU.set_mask_elt(rd_num, i, (VALUE)); \
52+
} while (0)
53+
54+
#define AGNOSTIC_LOOP_LOAD_TAIL(BODY, EEW, NF, TAIL_FROM, TAIL_TO) \
55+
do { \
56+
AGNOSTIC_RVV_PARAMS; \
57+
sew = (EEW); \
58+
for (; rd_num < rd_num_start + (NF); ++rd_num) { \
59+
for (reg_t i = (TAIL_FROM); i < (TAIL_TO); ++i) { \
60+
AGNOSTIC_SEW_SWITCH(BODY); \
61+
} \
62+
} \
63+
} while (0)
64+
65+
inline bool is_rvv_or_fp_store(insn_t insn) {
66+
// rvv-spec-1.0: Vector Load/Store Instruction Encoding: unit-stride,
67+
// strided, indexed stores.
68+
// FP stores also have this opcode.
69+
return insn.opcode() == 0b0100111;
70+
}
71+
72+
inline bool is_rvv_or_fp_load(insn_t insn) {
73+
// rvv-spec-1.0: Vector Load/Store Instruction Encoding: unit-stride,
74+
// strided, indexed loads.
75+
// FP loads also have this opcode.
76+
return insn.opcode() == 0b0000111;
77+
}
78+
79+
inline bool is_rvv([[maybe_unused]] insn_t insn) {
80+
const auto &opcode = insn.opcode();
81+
const auto &bits = insn.bits();
82+
if (is_rvv_or_fp_load(insn)) {
83+
return (bits & MASK_FLD) != MATCH_FLD && (bits & MASK_FLH) != MATCH_FLH &&
84+
(bits & MASK_FLQ) != MATCH_FLQ && (bits & MASK_FLW) != MATCH_FLW;
85+
}
86+
if (is_rvv_or_fp_store(insn)) {
87+
return (bits & MASK_FSD) != MATCH_FSD && (bits & MASK_FSH) != MATCH_FSH &&
88+
(bits & MASK_FSQ) != MATCH_FSQ && (bits & MASK_FSW) != MATCH_FSW;
89+
}
90+
return opcode == 0b1010111 || opcode == 0b1110111;
91+
}
92+
93+
inline bool is_rvv_scalar_dest(insn_t insn) {
94+
assert(is_rvv(insn));
95+
const auto &bits = insn.bits();
96+
return (bits & MASK_VSETVL) == MATCH_VSETVL ||
97+
(bits & MASK_VSETVLI) == MATCH_VSETVLI ||
98+
(bits & MASK_VSETIVLI) == MATCH_VSETIVLI ||
99+
(bits & MASK_VCPOP_M) == MATCH_VCPOP_M ||
100+
(bits & MASK_VFIRST_M) == MATCH_VFIRST_M ||
101+
(bits & MASK_VMV_X_S) == MATCH_VMV_X_S ||
102+
(bits & MASK_VFMV_F_S) == MATCH_VFMV_F_S;
103+
}
104+
105+
#endif

customext/customext.mk.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,7 @@ customext_subproject_deps = \
77
customext_srcs = \
88
dummy_rocc.cc \
99
cflush.cc \
10+
spikema1s.cc \
11+
spiketa1s.cc \
1012

1113
customext_install_shared_lib = yes

0 commit comments

Comments
 (0)