Skip to content

Commit 2a3a6d0

Browse files
committed
support emulation of misaligned vector loads/stores
1 parent b190bef commit 2a3a6d0

File tree

4 files changed

+155
-0
lines changed

4 files changed

+155
-0
lines changed

machine/emulation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ void redirect_trap(uintptr_t epc, uintptr_t mstatus, uintptr_t badaddr);
1717
DECLARE_EMULATION_FUNC(truly_illegal_insn);
1818
DECLARE_EMULATION_FUNC(emulate_rvc_0);
1919
DECLARE_EMULATION_FUNC(emulate_rvc_2);
20+
DECLARE_EMULATION_FUNC(misaligned_vec_ldst);
2021

2122
#define SH_RD 7
2223
#define SH_RS1 15

machine/machine.mk.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ machine_c_srcs = \
3434
uart_litex.c \
3535
finisher.c \
3636
misaligned_ldst.c \
37+
misaligned_vec_ldst.c \
3738
flush_icache.c \
3839

3940
machine_asm_srcs = \

machine/misaligned_ldst.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
4444
len = 2, shift = 8*(sizeof(uintptr_t) - len);
4545
else if ((insn & MASK_LHU) == MATCH_LHU)
4646
len = 2;
47+
#ifdef __riscv_vector
48+
else if ((insn & (MASK_VLE8_V & 0x707f)) == (MATCH_VLE8_V & 0x707f)
49+
|| (insn & (MASK_VLE16_V & 0x707f)) == (MATCH_VLE16_V & 0x707f)
50+
|| (insn & (MASK_VLE32_V & 0x707f)) == (MATCH_VLE32_V & 0x707f)
51+
|| (insn & (MASK_VLE64_V & 0x707f)) == (MATCH_VLE64_V & 0x707f))
52+
return misaligned_vec_ldst(regs, mcause, mepc, mstatus, insn);
53+
#endif
4754
#ifdef __riscv_compressed
4855
# if __riscv_xlen >= 64
4956
else if ((insn & MASK_C_LD) == MATCH_C_LD)
@@ -119,6 +126,13 @@ void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
119126
#endif
120127
else if ((insn & MASK_SH) == MATCH_SH)
121128
len = 2;
129+
#ifdef __riscv_vector
130+
else if ((insn & (MASK_VSE8_V & 0x707f)) == (MATCH_VSE8_V & 0x707f)
131+
|| (insn & (MASK_VSE16_V & 0x707f)) == (MATCH_VSE16_V & 0x707f)
132+
|| (insn & (MASK_VSE32_V & 0x707f)) == (MATCH_VSE32_V & 0x707f)
133+
|| (insn & (MASK_VSE64_V & 0x707f)) == (MATCH_VSE64_V & 0x707f))
134+
return misaligned_vec_ldst(regs, mcause, mepc, mstatus, insn);
135+
#endif
122136
#ifdef __riscv_compressed
123137
# if __riscv_xlen >= 64
124138
else if ((insn & MASK_C_SD) == MATCH_C_SD)

machine/misaligned_vec_ldst.c

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// See LICENSE for license details.
2+
3+
#include "emulation.h"
4+
#include "fp_emulation.h"
5+
#include "unprivileged_memory.h"
6+
#include "mtrap.h"
7+
#include "config.h"
8+
#include "pk.h"
9+
10+
#ifdef __riscv_vector
11+
12+
static inline void set_vreg(uintptr_t vlenb, uintptr_t which, uintptr_t pos, uintptr_t size, const uint8_t* bytes)
13+
{
14+
pos += (which % 8) * vlenb;
15+
bytes -= pos;
16+
17+
asm volatile ("vsetvli x0, %0, e8, m8, tu, ma" :: "r" (pos + size));
18+
write_csr(vstart, pos);
19+
20+
switch (which / 8) {
21+
case 0: asm volatile ("vle8.v v0, (%0)" :: "r" (bytes) : "memory"); break;
22+
case 1: asm volatile ("vle8.v v8, (%0)" :: "r" (bytes) : "memory"); break;
23+
case 2: asm volatile ("vle8.v v16, (%0)" :: "r" (bytes) : "memory"); break;
24+
case 3: asm volatile ("vle8.v v24, (%0)" :: "r" (bytes) : "memory"); break;
25+
default:
26+
}
27+
}
28+
29+
static inline void get_vreg(uintptr_t vlenb, uintptr_t which, uintptr_t pos, uintptr_t size, uint8_t* bytes)
30+
{
31+
pos += (which % 8) * vlenb;
32+
bytes -= pos;
33+
34+
asm volatile ("vsetvli x0, %0, e8, m8, tu, ma" :: "r" (pos + size));
35+
write_csr(vstart, pos);
36+
37+
switch (which / 8) {
38+
case 0: asm volatile ("vse8.v v0, (%0)" :: "r" (bytes) : "memory"); break;
39+
case 1: asm volatile ("vse8.v v8, (%0)" :: "r" (bytes) : "memory"); break;
40+
case 2: asm volatile ("vse8.v v16, (%0)" :: "r" (bytes) : "memory"); break;
41+
case 3: asm volatile ("vse8.v v24, (%0)" :: "r" (bytes) : "memory"); break;
42+
default:
43+
}
44+
}
45+
46+
static inline void vsetvl(uintptr_t vl, uintptr_t vtype)
47+
{
48+
asm volatile ("vsetvl x0, %0, %1" :: "r" (vl), "r" (vtype));
49+
}
50+
51+
#define VLEN_MAX 4096
52+
53+
DECLARE_EMULATION_FUNC(misaligned_vec_ldst)
54+
{
55+
uintptr_t vl = read_csr(vl);
56+
uintptr_t vtype = read_csr(vtype);
57+
uintptr_t vlenb = read_csr(vlenb);
58+
uintptr_t vstart = read_csr(vstart);
59+
60+
_Bool masked = ((insn >> 25) & 1) == 0;
61+
_Bool unit = ((insn >> 26) & 3) == 0;
62+
_Bool strided = ((insn >> 26) & 3) == 2;
63+
_Bool indexed = !strided && !unit;
64+
_Bool mew = (insn >> 28) & 1;
65+
_Bool lumop_simple = ((insn >> 20) & 0x1f) == 0;
66+
_Bool lumop_whole = ((insn >> 20) & 0x1f) == 8;
67+
_Bool lumop_fof = ((insn >> 20) & 0x1f) == 16;
68+
_Bool load = ((insn >> 5) & 1) == 0;
69+
_Bool illegal = mew || (unit && !(lumop_simple || lumop_whole || (load && lumop_fof)));
70+
_Bool fof = unit && lumop_fof;
71+
_Bool whole_reg = unit && lumop_whole;
72+
uintptr_t vd = (insn >> 7) & 0x1f;
73+
uintptr_t vs2 = (insn >> 20) & 0x1f;
74+
uintptr_t vsew = (vtype >> 3) & 3;
75+
uintptr_t vlmul = vtype & 7;
76+
uintptr_t view = (insn >> 12) & 3;
77+
uintptr_t veew = indexed ? vsew : view;
78+
uintptr_t len = 1 << veew;
79+
uintptr_t nf0 = 1 + ((insn >> 29) & 7);
80+
uintptr_t nf = whole_reg ? 1 : nf0;
81+
uintptr_t evl = whole_reg ? (nf0 * vlenb) >> veew : vl;
82+
uintptr_t base = GET_RS1(insn, regs);
83+
uintptr_t stride = strided ? GET_RS2(insn, regs) : nf * len;
84+
uintptr_t vemul = whole_reg ? 0 : (vlmul + veew - vsew) & 7;
85+
uintptr_t emul = 1 << ((vemul & 4) ? 0 : vemul);
86+
87+
if (illegal || vlenb > VLEN_MAX / 8)
88+
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
89+
90+
uint8_t mask[VLEN_MAX / 8];
91+
if (masked)
92+
get_vreg(vlenb, 0, 0, vlenb, mask);
93+
94+
do {
95+
if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
96+
// compute element address
97+
uintptr_t addr = base + vstart * stride;
98+
if (indexed) {
99+
uintptr_t offset = 0;
100+
get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
101+
addr = base + offset;
102+
}
103+
104+
uint8_t bytes[8 /* max segments */ * sizeof(uint64_t)];
105+
106+
if (!load) {
107+
// obtain store data from regfile
108+
for (uintptr_t seg = 0; seg < nf; seg++)
109+
get_vreg(vlenb, vd + seg * emul, vstart * len, len, &bytes[seg * len]);
110+
}
111+
112+
// restore clobbered vl/vtype/vstart in case we trap
113+
vsetvl(vl, vtype);
114+
write_csr(vstart, vstart);
115+
116+
if (load) {
117+
// obtain load data from memory
118+
for (uintptr_t seg = 0; seg < nf; seg++)
119+
for (uintptr_t i = 0; i < len; i++)
120+
bytes[seg * len + i] = load_uint8_t((void *)(addr + seg * len + i), mepc);
121+
122+
// write load data to regfile
123+
for (uintptr_t seg = 0; seg < nf; seg++)
124+
set_vreg(vlenb, vd + seg * emul, vstart * len, len, &bytes[seg * len]);
125+
} else {
126+
// write store data to memory
127+
for (uintptr_t seg = 0; seg < nf; seg++)
128+
for (uintptr_t i = 0; i < len; i++)
129+
store_uint8_t((void *)(addr + seg * len + i), bytes[seg * len + i], mepc);
130+
}
131+
}
132+
} while (++vstart < evl && !fof);
133+
134+
// restore clobbered vl/vtype; vstart=0; advance pc
135+
vsetvl(fof ? 1 : vl, vtype);
136+
write_csr(mepc, mepc + 4);
137+
}
138+
139+
#endif

0 commit comments

Comments
 (0)