|
| 1 | +// See LICENSE for license details. |
| 2 | + |
| 3 | +#include "emulation.h" |
| 4 | +#include "fp_emulation.h" |
| 5 | +#include "unprivileged_memory.h" |
| 6 | +#include "mtrap.h" |
| 7 | +#include "config.h" |
| 8 | +#include "pk.h" |
| 9 | + |
| 10 | +#ifdef __riscv_vector |
| 11 | + |
| 12 | +static inline void set_vreg(uintptr_t vlenb, uintptr_t which, uintptr_t pos, uintptr_t size, const uint8_t* bytes) |
| 13 | +{ |
| 14 | + pos += (which % 8) * vlenb; |
| 15 | + bytes -= pos; |
| 16 | + |
| 17 | + asm volatile ("vsetvli x0, %0, e8, m8, tu, ma" :: "r" (pos + size)); |
| 18 | + write_csr(vstart, pos); |
| 19 | + |
| 20 | + switch (which / 8) { |
| 21 | + case 0: asm volatile ("vle8.v v0, (%0)" :: "r" (bytes) : "memory"); break; |
| 22 | + case 1: asm volatile ("vle8.v v8, (%0)" :: "r" (bytes) : "memory"); break; |
| 23 | + case 2: asm volatile ("vle8.v v16, (%0)" :: "r" (bytes) : "memory"); break; |
| 24 | + case 3: asm volatile ("vle8.v v24, (%0)" :: "r" (bytes) : "memory"); break; |
| 25 | + default: |
| 26 | + } |
| 27 | +} |
| 28 | + |
| 29 | +static inline void get_vreg(uintptr_t vlenb, uintptr_t which, uintptr_t pos, uintptr_t size, uint8_t* bytes) |
| 30 | +{ |
| 31 | + pos += (which % 8) * vlenb; |
| 32 | + bytes -= pos; |
| 33 | + |
| 34 | + asm volatile ("vsetvli x0, %0, e8, m8, tu, ma" :: "r" (pos + size)); |
| 35 | + write_csr(vstart, pos); |
| 36 | + |
| 37 | + switch (which / 8) { |
| 38 | + case 0: asm volatile ("vse8.v v0, (%0)" :: "r" (bytes) : "memory"); break; |
| 39 | + case 1: asm volatile ("vse8.v v8, (%0)" :: "r" (bytes) : "memory"); break; |
| 40 | + case 2: asm volatile ("vse8.v v16, (%0)" :: "r" (bytes) : "memory"); break; |
| 41 | + case 3: asm volatile ("vse8.v v24, (%0)" :: "r" (bytes) : "memory"); break; |
| 42 | + default: |
| 43 | + } |
| 44 | +} |
| 45 | + |
| 46 | +static inline void vsetvl(uintptr_t vl, uintptr_t vtype) |
| 47 | +{ |
| 48 | + asm volatile ("vsetvl x0, %0, %1" :: "r" (vl), "r" (vtype)); |
| 49 | +} |
| 50 | + |
| 51 | +#define VLEN_MAX 4096 |
| 52 | + |
| 53 | +DECLARE_EMULATION_FUNC(misaligned_vec_ldst) |
| 54 | +{ |
| 55 | + uintptr_t vl = read_csr(vl); |
| 56 | + uintptr_t vtype = read_csr(vtype); |
| 57 | + uintptr_t vlenb = read_csr(vlenb); |
| 58 | + uintptr_t vstart = read_csr(vstart); |
| 59 | + |
| 60 | + _Bool masked = ((insn >> 25) & 1) == 0; |
| 61 | + _Bool unit = ((insn >> 26) & 3) == 0; |
| 62 | + _Bool strided = ((insn >> 26) & 3) == 2; |
| 63 | + _Bool indexed = !strided && !unit; |
| 64 | + _Bool mew = (insn >> 28) & 1; |
| 65 | + _Bool lumop_simple = ((insn >> 20) & 0x1f) == 0; |
| 66 | + _Bool lumop_whole = ((insn >> 20) & 0x1f) == 8; |
| 67 | + _Bool lumop_fof = ((insn >> 20) & 0x1f) == 16; |
| 68 | + _Bool load = ((insn >> 5) & 1) == 0; |
| 69 | + _Bool illegal = mew || (unit && !(lumop_simple || lumop_whole || (load && lumop_fof))); |
| 70 | + _Bool fof = unit && lumop_fof; |
| 71 | + _Bool whole_reg = unit && lumop_whole; |
| 72 | + uintptr_t vd = (insn >> 7) & 0x1f; |
| 73 | + uintptr_t vs2 = (insn >> 20) & 0x1f; |
| 74 | + uintptr_t vsew = (vtype >> 3) & 3; |
| 75 | + uintptr_t vlmul = vtype & 7; |
| 76 | + uintptr_t view = (insn >> 12) & 3; |
| 77 | + uintptr_t veew = indexed ? vsew : view; |
| 78 | + uintptr_t len = 1 << veew; |
| 79 | + uintptr_t nf0 = 1 + ((insn >> 29) & 7); |
| 80 | + uintptr_t nf = whole_reg ? 1 : nf0; |
| 81 | + uintptr_t evl = whole_reg ? (nf0 * vlenb) >> veew : vl; |
| 82 | + uintptr_t base = GET_RS1(insn, regs); |
| 83 | + uintptr_t stride = strided ? GET_RS2(insn, regs) : nf * len; |
| 84 | + uintptr_t vemul = whole_reg ? 0 : (vlmul + veew - vsew) & 7; |
| 85 | + uintptr_t emul = 1 << ((vemul & 4) ? 0 : vemul); |
| 86 | + |
| 87 | + if (illegal || vlenb > VLEN_MAX / 8) |
| 88 | + return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); |
| 89 | + |
| 90 | + uint8_t mask[VLEN_MAX / 8]; |
| 91 | + if (masked) |
| 92 | + get_vreg(vlenb, 0, 0, vlenb, mask); |
| 93 | + |
| 94 | + do { |
| 95 | + if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) { |
| 96 | + // compute element address |
| 97 | + uintptr_t addr = base + vstart * stride; |
| 98 | + if (indexed) { |
| 99 | + uintptr_t offset = 0; |
| 100 | + get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset); |
| 101 | + addr = base + offset; |
| 102 | + } |
| 103 | + |
| 104 | + uint8_t bytes[8 /* max segments */ * sizeof(uint64_t)]; |
| 105 | + |
| 106 | + if (!load) { |
| 107 | + // obtain store data from regfile |
| 108 | + for (uintptr_t seg = 0; seg < nf; seg++) |
| 109 | + get_vreg(vlenb, vd + seg * emul, vstart * len, len, &bytes[seg * len]); |
| 110 | + } |
| 111 | + |
| 112 | + // restore clobbered vl/vtype/vstart in case we trap |
| 113 | + vsetvl(vl, vtype); |
| 114 | + write_csr(vstart, vstart); |
| 115 | + |
| 116 | + if (load) { |
| 117 | + // obtain load data from memory |
| 118 | + for (uintptr_t seg = 0; seg < nf; seg++) |
| 119 | + for (uintptr_t i = 0; i < len; i++) |
| 120 | + bytes[seg * len + i] = load_uint8_t((void *)(addr + seg * len + i), mepc); |
| 121 | + |
| 122 | + // write load data to regfile |
| 123 | + for (uintptr_t seg = 0; seg < nf; seg++) |
| 124 | + set_vreg(vlenb, vd + seg * emul, vstart * len, len, &bytes[seg * len]); |
| 125 | + } else { |
| 126 | + // write store data to memory |
| 127 | + for (uintptr_t seg = 0; seg < nf; seg++) |
| 128 | + for (uintptr_t i = 0; i < len; i++) |
| 129 | + store_uint8_t((void *)(addr + seg * len + i), bytes[seg * len + i], mepc); |
| 130 | + } |
| 131 | + } |
| 132 | + } while (++vstart < evl && !fof); |
| 133 | + |
| 134 | + // restore clobbered vl/vtype; vstart=0; advance pc |
| 135 | + vsetvl(fof ? 1 : vl, vtype); |
| 136 | + write_csr(mepc, mepc + 4); |
| 137 | +} |
| 138 | + |
| 139 | +#endif |
0 commit comments