diff --git a/ci-tests/spike-ld.ld b/ci-tests/spike-ld.ld new file mode 100644 index 0000000000..9da5cb2df0 --- /dev/null +++ b/ci-tests/spike-ld.ld @@ -0,0 +1,8 @@ +ENTRY (_entry) + +SECTIONS { + . = 0x80000000; + .text : { *(.text) } + .data : { *(.data) } +} + diff --git a/ci-tests/test-spike b/ci-tests/test-spike index ebec4c6343..97355a0bcf 100755 --- a/ci-tests/test-spike +++ b/ci-tests/test-spike @@ -26,6 +26,7 @@ riscv64-linux-gnu-gcc -static -O2 -o hello $CI/hello.c riscv64-linux-gnu-gcc -static -O2 -o dummy-slliuw $CI/dummy-slliuw.c riscv64-linux-gnu-gcc -static -O2 -o customcsr $CI/customcsr.c riscv64-linux-gnu-gcc -static -O2 -o atomics $CI/atomics.c +riscv64-linux-gnu-gcc -static -nostdlib -o test-spiketama1s-ext -O0 -march=rv64gcv -mabi=lp64d -T $CI/spike-ld.ld $CI/test-spiketama1s-ext.S # run snippy-based tests wget https://github.com/syntacore/snippy/releases/download/snippy-2.1/snippy-x86_64-linux.tar.xz @@ -46,6 +47,45 @@ g++ -std=c++2a -I$INSTALL/include -L$INSTALL/lib $CI/testlib.cc -lriscv -o /dev/ # run tests time $INSTALL/bin/spike --isa=rv64gc $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588." $INSTALL/bin/spike --log-commits --isa=rv64gc $BUILD/pk/pk atomics 2> /dev/null | grep "First atomic counter is 1000, second is 100" +$INSTALL/bin/spike -l --log-commits --isa=rv64gcv_xspikema1s_xspiketa1s test-spiketama1s-ext &> spike-all-fill1s.log +grep "v2 0x00000000ffffffffffffffff00000002" spike-all-fill1s.log +grep "v3 0xffffffff000000000000000000000003" spike-all-fill1s.log +grep "v4 0xffffffffffffffffffffffffff000000" spike-all-fill1s.log +grep "v5 0xffffffffffffffffffffffffffffc000" spike-all-fill1s.log +! grep "v6 0x" spike-all-fill1s.log +grep "v7 0xffffffffffffffff00000000ffff0000" spike-all-fill1s.log +grep "v8 0xffffffffffffffffffffffffffff0000" spike-all-fill1s.log +grep "v9 0xffffffffffffffffffffffffff410113" spike-all-fill1s.log +grep "v10 0xffffffffffff000002b1048700000113" spike-all-fill1s.log +grep "v11 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log +grep "v12 0xffffffff02b10487000000000c410113" spike-all-fill1s.log +grep "v13 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log +grep "v14 0x0c4101130c410113000000000c410113" spike-all-fill1s.log +grep "v15 0xffffffffffffffff000000000c410113" spike-all-fill1s.log +grep "v16 0xffffffffffffffffffff000000000000" spike-all-fill1s.log +grep "v17 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log +grep "v18 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log +grep "v19 0xffffffffffffffffffffffffffffffff" spike-all-fill1s.log +grep "v20 0xffffffff000000000206205700000113" spike-all-fill1s.log +grep "v21 0xffffffff0000000055076e0000000c41" spike-all-fill1s.log +grep "v22 0xffffffff000000000001b05700000487" spike-all-fill1s.log +grep "v24 0xffffffffffffffffffffffffff000000" spike-all-fill1s.log +grep "v25 0xffffffffffffffffffffffffff000000" spike-all-fill1s.log +grep "v26 0xffff000000000000f557b05700000113" spike-all-fill1s.log +grep "v27 0xffff000000000000cc81020600000c41" spike-all-fill1s.log +grep "v28 0xffff0000000000000213550700000487" spike-all-fill1s.log +grep "v29 0xffff00000000000000200001000002b1" spike-all-fill1s.log +grep "v30 0xffffffffffffffff000000000c410113" spike-all-fill1s.log +grep "v31 0xffffffffffffffff0000000002b10487" spike-all-fill1s.log +$INSTALL/bin/spike -l --log-commits --isa=rv64gcv_xspikema1s test-spiketama1s-ext &> spike-mask-fill1s.log +grep "v2 0x00000000ffffffffffffffff00000002" spike-mask-fill1s.log +grep "v3 0x00000000000000000000000000000003" spike-mask-fill1s.log +$INSTALL/bin/spike -l --log-commits --isa=rv64gcv_xspiketa1s test-spiketama1s-ext &> spike-tail-fill1s.log +grep "v2 0x00000000000000000000000000000002" spike-tail-fill1s.log +grep "v3 0xffffffff000000000000000000000003" spike-tail-fill1s.log +$INSTALL/bin/spike -l --log-commits --isa=rv64gcv test-spiketama1s-ext &> spike-undisturbed.log +grep "v2 0x00000000000000000000000000000002" spike-undisturbed.log +grep "v3 0x00000000000000000000000000000003" spike-undisturbed.log LD_LIBRARY_PATH=$INSTALL/lib ./test-libriscv $BUILD/pk/pk hello | grep "Hello, world! Pi is approximately 3.141588." LD_LIBRARY_PATH=$INSTALL/lib ./test-customext $BUILD/pk/pk dummy-slliuw | grep "Executed successfully" LD_LIBRARY_PATH=$INSTALL/lib ./test-custom-csr $BUILD/pk/pk customcsr | grep "Executed successfully" diff --git a/ci-tests/test-spiketama1s-ext.S b/ci-tests/test-spiketama1s-ext.S new file mode 100644 index 0000000000..5aa77a6b88 --- /dev/null +++ b/ci-tests/test-spiketama1s-ext.S @@ -0,0 +1,218 @@ +.option norvc + +.global _entry +.global fromhost +.global tohost + +.text +_entry: + la t0, exception_handler + csrw mtvec, t0 + csrr t1, mstatus + // Setting bit number 9 (mstatus.VS) + li t2, 1 + slli t2, t2, 9 + or t1, t1, t2 + // Setting bit number 13 (mstatus.FS) + li t3, 1 + slli t3, t3, 13 + or t1, t1, t3 + csrw mstatus, t1 + +//----------------------------MASK AGNOSTIC TEST------------------------------- + // Tail undisturbed, mask agnostic with VL = 3 + vsetivli a0, 3, e32, m1, tu, ma + vmclr.m v0 + // Mask v0 - 0x00000000000000010000000100000001 + vadd.vi v0, v0, 1 + // Fourth element should be zero because of tu + // If mask fill 1s extension (xspikema1s) enable: + // Second and third elements should be 0xffffffff because of 0001 mask + // v2 0x00000000ffffffffffffffff00000002 + // If mask fill 1s extension (xspikema1s) disable: + // Second and third elements should be zero because of 0001 mask + // v2 0x00000000000000000000000000000002 + vadd.vi v2, v2, 2, v0.t + +//----------------------------TAIL AGNOSTIC TEST------------------------------- + vsetivli a0, 3, e32, m1, ta, mu + // Second and third elements should be zero because of 0001 mask with mu + // If tail fill 1s extension (xspiketa1s) enable: + // Fourth element should be 0xffffffff because of ta + // v3 0xffffffff000000000000000000000003 + // If tail fill 1s extension (xspiketa1s) disable: + // Fourth element should be zero because of ta + // v3 0x00000000000000000000000000000003 + vadd.vi v3, v3, 3, v0.t + +//-----------------------------VCOMPRESS.VM TEST------------------------------- + // Tail agnostic, mask undisturbed with VL = 8 + vsetivli a0, 8, e8, m1, ta, mu + vmclr.m v0 + vadd.vi v0, v0, 13 + // Mask v0 - 0b...00001101 => 3 setting bits (of the first 8) => 3 elements + // packed, remaining elements - tail: v4 0xffffffffffffffffffffffffff000000 + vcompress.vm v4, v1, v0 + +//-----------------------------MASK LOGICAL TEST------------------------------- + // Tail undisturbed, mask agnostic with VL = 14 + vsetivli a0, 14, e8, m1, tu, ma + // Mask destination => unmasked, tail-agnostic + // v5 0xffffffffffffffffffffffffffffc000 + vmand.mm v5, v5, v5 + +//--------------------------SCALAR DESTINATION TEST---------------------------- + vsetivli a0, 8, e8, m1, ta, ma + // t1 = x6, check that v6 hasn't been changed + vcpop.m t1, v1, v0.t + +//-------------------------------VIOTA.M TEST---------------------------------- + // Tail agnostic, mask agnostic with VL = 5 + vsetivli a0, 5, e16, m1, ta, ma + vmclr.m v0 + vadd.vi v0, v0, 13 + // Mask v0 - 0b...00001101 + // packed, remaining elements - tail: v7 0xffffffffffffffff00000000ffff0000 + viota.m v7, v1, v0.t + +//--------------------------------VMV.S.X TEST--------------------------------- + // Only first element updated, remaining elements - tail: + // v8 0xffffffffffffffffffffffffffff0000 + vmv.s.x v8, sp + +//---------------------------------VLM.V TEST---------------------------------- + // Tail agnostic, mask agnostic with VL = 31 + vsetivli a0, 31, e32, m8, tu, ma + // For all loads, we will use this address of the executable section (so that + // the bytes are unambiguous) placed in the sp. + li sp, 0x00000000800000c4 + // Destination register is always written with a tail-agnostic policy. + // evl = ceil(vl / 8) = 3, eew = 8 => tail starts from third element + // v9 0xffffffffffffffffffffffffff410113 + vlm.v v9, 0(sp) + +//--------------------------VECTOR UNIT-STRIDE TEST---------------------------- + // Tail agnostic, mask agnostic with VL = 5 + vsetivli a0, 5, e8, m1, ta, ma + vmclr.m v0 + vadd.vi v0, v0, 13 + // Mask v0 - 0b...00001101 + // Masked vector loads do not update inactive elements. + // eew = 16, sew = 8 ignored. + // v10 0xffffffffffff000002b1048700000113 + // v11 0xffffffffffffffffffffffffffffffff + vle16.v v10, 0(sp), v0.t + +//----------------------------VECTOR STRIDED TEST------------------------------ + // Tail agnostic, mask agnostic with VL = 3 + vsetivli a0, 3, e16, m1, ta, ma + // Mask v0 - 0b...00001101 + li tp, 2 + // Masked vector loads do not update inactive elements. + // eew = 32, sew = 16 ignored. + // v12 0xffffffff02b10487000000000c410113 + // v13 0xffffffffffffffffffffffffffffffff + vlse32.v v12, 0(sp), tp, v0.t + +//----------------------------VECTOR INDEXED TEST------------------------------ + // Tail agnostic, mask agnostic with VL = 6 + vsetivli a0, 6, e32, m2, ta, ma + // Mask v0 - 0b...00011101 + li tp, 29 + vmv.s.x v0, tp + // Masked vector loads do not update inactive elements. + // eew = sew = 32 + // v14 0x0c4101130c410113000000000c410113 + // v15 0xffffffffffffffff000000000c410113 + vloxei8.v v14, 0(sp), v1, v0.t + +//------------------VECTOR UNIT-STRIDE FAULT-ONLY-FIRST TEST------------------- + // Tail agnostic, mask agnostic with VL = 31 + vsetivli a0, 31, e8, m2, ta, ma + // Mask v0 - 0b...001111 + li tp, 15 + // For all fault-only-first loads, we will use this address, because a few + // bytes after it (0x00000000ffffffff), a trap happens. + vmv.s.x v0, tp + li tp, 0x00000000fffffffa + // Load instructions may overwrite active destination vector register group + // elements past the element index at which the trap is reported. + // Here element with index 3 raised an exception => vl reduced to 3. + // eew = 16, sew = 8 ignored. + // v16 0xffffffffffffffffffff000000000000 + // v17 0xffffffffffffffffffffffffffffffff + // v18 0xffffffffffffffffffffffffffffffff + // v19 0xffffffffffffffffffffffffffffffff + vle16ff.v v16, (tp), v0.t + +//----------------------VECTOR UNIT-STRIDE SEGMENT TEST------------------------ + // Tail agnostic, mask agnostic with VL = 6 + vsetivli a0, 6, e32, m2, ta, ma + // Mask v0 - 0b...0001101 + li t0, 13 + vmv.s.x v0, t0 + // Masked vector loads do not update inactive elements. + // nf = 3 + // v20 0xffffffff000000000206205700000113 + // v21 0xffffffff0000000055076e0000000c41 + // v22 0xffffffff000000000001b05700000487 + vlseg3e16.v v20, 0(sp), v0.t + +//-------------VECTOR UNIT-STRIDE SEGMENT FAULT-ONLY-FIRST TEST---------------- + // Tail agnostic, mask agnostic with VL = 14 + vsetivli a0, 14, e8, m1, ta, ma + // nf = 2, vl reduced to 3. + // v24 0xffffffffffffffffffffffffff000000 + // v25 0xffffffffffffffffffffffffff000000 + vlseg2e8ff.v v24, (tp), v0.t + +//------------------------VECTOR STRIDED SEGMENT TEST-------------------------- + // Tail agnostic, mask agnostic with VL = 7 + vsetivli a0, 7, e32, m2, ta, ma + // Mask v0 - 0b...0001101 + // Masked vector loads do not update inactive elements. + li t1, 8 + // nf = 4 + // v26 0xffff000000000000f557b05700000113 + // v27 0xffff000000000000cc81020600000c41 + // v28 0xffff0000000000000213550700000487 + // v29 0xffff00000000000000200001000002b1 + vlsseg4e16.v v26, 0(sp), t1, v0.t + +//------------------------VECTOR INDEXED SEGMENT TEST-------------------------- + // Tail agnostic, mask agnostic with VL = 2 + vsetivli a0, 2, e32, m1, ta, ma + // Mask v0 - 0b...00001 + vmclr.m v0 + vadd.vi v0, v0, 1 + // Masked vector loads do not update inactive elements. + // v30 0xffffffffffffffff000000000c410113 + // v31 0xffffffffffffffff0000000002b10487 + vluxseg2ei32.v v30, 0(sp), v1, v0.t + +//---------------------------------END TESTING--------------------------------- + la t0, exit + jalr t0 + +exception_handler: + csrr x10, mcause + // In case of breakpoint (Interrupt = 0, Exception code = 3) we finalize. + // Otherwise it's not the expected behavior and we go into an infinite loop. + li x11, 3 + beq x10, x11, exit + j infinite_loop + +exit: + li ra, 1 + la sp, tohost + sd ra, 0(sp) + +infinite_loop: + j infinite_loop + +.balign 64 +tohost: +.8byte 0x0 +.balign 64 +fromhost: +.8byte 0x0 diff --git a/customext/agnostic_macros.h b/customext/agnostic_macros.h new file mode 100644 index 0000000000..715634e03e --- /dev/null +++ b/customext/agnostic_macros.h @@ -0,0 +1,105 @@ +#ifndef _RISCV_AGNOSTIC_MACROS_H +#define _RISCV_AGNOSTIC_MACROS_H + +#include "decode_macros.h" +#include "insn_macros.h" + +#include + +#define AGNOSTIC_RVV_PARAMS \ + reg_t UNUSED vl = P.VU.vl->read(); \ + reg_t UNUSED vstart = P.VU.vstart->read(); \ + reg_t UNUSED sew = P.VU.vsew; \ + reg_t rd_num_start = insn.rd(); \ + reg_t rd_num = rd_num_start; + +#define AGNOSTIC_VD_PARAM(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_usew_t::type all_ones = -1; + +#define AGNOSTIC_SEW_SWITCH(BODY) \ + do { \ + if (sew == e8) { \ + AGNOSTIC_VD_PARAM(e8); \ + BODY; \ + } else if (sew == e16) { \ + AGNOSTIC_VD_PARAM(e16); \ + BODY; \ + } else if (sew == e32) { \ + AGNOSTIC_VD_PARAM(e32); \ + BODY; \ + } else if (sew == e64) { \ + AGNOSTIC_VD_PARAM(e64); \ + BODY; \ + } \ + } while (0) + +#define AGNOSTIC_LOOP_TAIL(BODY, TAIL_FROM, TAIL_TO) \ + do { \ + AGNOSTIC_RVV_PARAMS; \ + for (reg_t i = (TAIL_FROM); i < (TAIL_TO); ++i) { \ + AGNOSTIC_SEW_SWITCH(BODY); \ + } \ + } while (0) + +#define AGNOSTIC_LOOP_BODY(BODY) AGNOSTIC_LOOP_TAIL(BODY, vstart, vl) + +#define AGNOSTIC_LOOP_MASK_TAIL(VALUE, TAIL_FROM, TAIL_TO) \ + do { \ + AGNOSTIC_RVV_PARAMS; \ + for (reg_t i = (TAIL_FROM); i < (TAIL_TO); ++i) \ + P.VU.set_mask_elt(rd_num, i, (VALUE)); \ + } while (0) + +#define AGNOSTIC_LOOP_LOAD_TAIL(BODY, EEW, NF, TAIL_FROM, TAIL_TO) \ + do { \ + AGNOSTIC_RVV_PARAMS; \ + sew = (EEW); \ + for (; rd_num < rd_num_start + (NF); ++rd_num) { \ + for (reg_t i = (TAIL_FROM); i < (TAIL_TO); ++i) { \ + AGNOSTIC_SEW_SWITCH(BODY); \ + } \ + } \ + } while (0) + +inline bool is_rvv_or_fp_store(insn_t insn) { + // rvv-spec-1.0: Vector Load/Store Instruction Encoding: unit-stride, + // strided, indexed stores. + // FP stores also have this opcode. + return insn.opcode() == 0b0100111; +} + +inline bool is_rvv_or_fp_load(insn_t insn) { + // rvv-spec-1.0: Vector Load/Store Instruction Encoding: unit-stride, + // strided, indexed loads. + // FP loads also have this opcode. + return insn.opcode() == 0b0000111; +} + +inline bool is_rvv([[maybe_unused]] insn_t insn) { + const auto &opcode = insn.opcode(); + const auto &bits = insn.bits(); + if (is_rvv_or_fp_load(insn)) { + return (bits & MASK_FLD) != MATCH_FLD && (bits & MASK_FLH) != MATCH_FLH && + (bits & MASK_FLQ) != MATCH_FLQ && (bits & MASK_FLW) != MATCH_FLW; + } + if (is_rvv_or_fp_store(insn)) { + return (bits & MASK_FSD) != MATCH_FSD && (bits & MASK_FSH) != MATCH_FSH && + (bits & MASK_FSQ) != MATCH_FSQ && (bits & MASK_FSW) != MATCH_FSW; + } + return opcode == 0b1010111 || opcode == 0b1110111; +} + +inline bool is_rvv_scalar_dest(insn_t insn) { + assert(is_rvv(insn)); + const auto &bits = insn.bits(); + return (bits & MASK_VSETVL) == MATCH_VSETVL || + (bits & MASK_VSETVLI) == MATCH_VSETVLI || + (bits & MASK_VSETIVLI) == MATCH_VSETIVLI || + (bits & MASK_VCPOP_M) == MATCH_VCPOP_M || + (bits & MASK_VFIRST_M) == MATCH_VFIRST_M || + (bits & MASK_VMV_X_S) == MATCH_VMV_X_S || + (bits & MASK_VFMV_F_S) == MATCH_VFMV_F_S; +} + +#endif diff --git a/customext/customext.mk.in b/customext/customext.mk.in index a14e771c2f..927fa01bae 100644 --- a/customext/customext.mk.in +++ b/customext/customext.mk.in @@ -7,5 +7,7 @@ customext_subproject_deps = \ customext_srcs = \ dummy_rocc.cc \ cflush.cc \ + spikema1s.cc \ + spiketa1s.cc \ customext_install_shared_lib = yes diff --git a/customext/spikema1s.cc b/customext/spikema1s.cc new file mode 100644 index 0000000000..3e7673663c --- /dev/null +++ b/customext/spikema1s.cc @@ -0,0 +1,58 @@ +#include "agnostic_macros.h" +#include "extension.h" +#include "processor.h" + +#include + +static reg_t mask_agnostic_fill1s(processor_t *p, insn_t insn, reg_t pc) { + // rvv-spec-1.0: Vector Loads and Stores: Masked vector loads do not update + // inactive elements in the destination vector register group, unless masked + // agnostic is specifed (vtype.vma=1). Masked vector stores only update active + // memory elements. + if (is_rvv_or_fp_store(insn) || is_rvv_or_fp_load(insn)) + return pc; + // Belonging to the V extension is checked after loads and stores, because + // their opcodes do not match the opcodes of V extension. + if (!is_rvv(insn) || is_rvv_scalar_dest(insn)) + return pc; + // When vm==1 then instruction unmasked, therefore, the agnostic policy does + // not apply to any elements. + if (insn.v_vm()) + return pc; + // When vma==0 then undisturbed policy is in effect. + if (p->VU.vma == 0) + return pc; + + AGNOSTIC_LOOP_BODY({ // mask agnostic - fill 1s + if (!p->VU.mask_elt(0, i)) + vd = all_ones; + }); + return pc; +} + +struct mask_agnostic_fill1s_t : public extension_t { + const char *name() const override { return "spikema1s"; } + + mask_agnostic_fill1s_t() {} + + std::vector get_instructions(const processor_t &) override { + return {}; + } + + std::vector get_disasms(const processor_t *) override { + return {}; + } + + void reset(processor_t &p) override { + auto &insn_postprocesses = p.get_state()->insn_postprocesses; + auto mask_func = std::find(insn_postprocesses.begin(), + insn_postprocesses.end(), &mask_agnostic_fill1s); + if (mask_func == insn_postprocesses.end()) + insn_postprocesses.push_back(&mask_agnostic_fill1s); + } +}; + +REGISTER_EXTENSION(spikema1s, []() { + static mask_agnostic_fill1s_t ext; + return &ext; +}) diff --git a/customext/spiketa1s.cc b/customext/spiketa1s.cc new file mode 100644 index 0000000000..234eb61d36 --- /dev/null +++ b/customext/spiketa1s.cc @@ -0,0 +1,264 @@ +#include "agnostic_macros.h" +#include "extension.h" +#include "processor.h" + +#include +#include +#include + +static bool is_mask_dest(insn_t insn) { + const auto &bits = insn.bits(); + return (bits & MASK_VMAND_MM) == MATCH_VMAND_MM || + (bits & MASK_VMNAND_MM) == MATCH_VMNAND_MM || + (bits & MASK_VMANDN_MM) == MATCH_VMANDN_MM || + (bits & MASK_VMXOR_MM) == MATCH_VMXOR_MM || + (bits & MASK_VMOR_MM) == MATCH_VMOR_MM || + (bits & MASK_VMNOR_MM) == MATCH_VMNOR_MM || + (bits & MASK_VMORN_MM) == MATCH_VMORN_MM || + (bits & MASK_VMXNOR_MM) == MATCH_VMXNOR_MM || + (bits & MASK_VLM_V) == MATCH_VLM_V || + // rvv-spec-1.0: vmsbf.m, vmsif.m, vmsof.m: The tail elements in the + // destination mask register are updated under a tail-agnostic policy. + (bits & MASK_VMSBF_M) == MATCH_VMSBF_M || + (bits & MASK_VMSIF_M) == MATCH_VMSIF_M || + (bits & MASK_VMSOF_M) == MATCH_VMSOF_M || + // rvv-spec-1.0: Vector Integer Add-with-Carry / Subtract-with-Borrow + // Instructions: Because these instructions produce a mask value, they + // always operate with a tail-agnostic policy. + (bits & MASK_VADC_VVM) == MATCH_VADC_VVM || + (bits & MASK_VADC_VXM) == MATCH_VADC_VXM || + (bits & MASK_VADC_VIM) == MATCH_VADC_VIM || + (bits & MASK_VMADC_VVM) == MATCH_VMADC_VVM || + (bits & MASK_VMADC_VXM) == MATCH_VMADC_VXM || + (bits & MASK_VMADC_VIM) == MATCH_VMADC_VIM || + (bits & MASK_VMADC_VV) == MATCH_VMADC_VV || + (bits & MASK_VMADC_VX) == MATCH_VMADC_VX || + (bits & MASK_VMADC_VI) == MATCH_VMADC_VI || + (bits & MASK_VSBC_VVM) == MATCH_VSBC_VVM || + (bits & MASK_VSBC_VXM) == MATCH_VSBC_VXM || + (bits & MASK_VMSBC_VVM) == MATCH_VMSBC_VVM || + (bits & MASK_VMSBC_VXM) == MATCH_VMSBC_VXM || + (bits & MASK_VMSBC_VV) == MATCH_VMSBC_VV || + (bits & MASK_VMSBC_VX) == MATCH_VMSBC_VX || + // rvv-spec-1.0: Vector Integer Compare Instructions: The destination + // mask vector is always held in a single vector register, with a + // layout of elements as described in Section Mask Register Layout. + (bits & MASK_VMSEQ_VV) == MATCH_VMSEQ_VV || + (bits & MASK_VMSEQ_VX) == MATCH_VMSEQ_VX || + (bits & MASK_VMSEQ_VI) == MATCH_VMSEQ_VI || + (bits & MASK_VMSNE_VV) == MATCH_VMSNE_VV || + (bits & MASK_VMSNE_VX) == MATCH_VMSNE_VX || + (bits & MASK_VMSNE_VI) == MATCH_VMSNE_VI || + (bits & MASK_VMSLTU_VV) == MATCH_VMSLTU_VV || + (bits & MASK_VMSLTU_VX) == MATCH_VMSLTU_VX || + (bits & MASK_VMSLT_VV) == MATCH_VMSLT_VV || + (bits & MASK_VMSLT_VX) == MATCH_VMSLT_VX || + (bits & MASK_VMSLEU_VV) == MATCH_VMSLEU_VV || + (bits & MASK_VMSLEU_VX) == MATCH_VMSLEU_VX || + (bits & MASK_VMSLEU_VI) == MATCH_VMSLEU_VI || + (bits & MASK_VMSLE_VV) == MATCH_VMSLE_VV || + (bits & MASK_VMSLE_VX) == MATCH_VMSLE_VX || + (bits & MASK_VMSLE_VI) == MATCH_VMSLE_VI || + (bits & MASK_VMSGTU_VX) == MATCH_VMSGTU_VX || + (bits & MASK_VMSGTU_VI) == MATCH_VMSGTU_VI || + (bits & MASK_VMSGT_VX) == MATCH_VMSGT_VX || + (bits & MASK_VMSGT_VI) == MATCH_VMSGT_VI || + // rvv-spec-1.0: Vector Floating-Point Compare Instructions: Compares + // write mask registers, and so always operate under a tail-agnostic + // policy. + (bits & MASK_VMFEQ_VV) == MATCH_VMFEQ_VV || + (bits & MASK_VMFEQ_VF) == MATCH_VMFEQ_VF || + (bits & MASK_VMFNE_VV) == MATCH_VMFNE_VV || + (bits & MASK_VMFNE_VF) == MATCH_VMFNE_VF || + (bits & MASK_VMFLT_VV) == MATCH_VMFLT_VV || + (bits & MASK_VMFLT_VF) == MATCH_VMFLT_VF || + (bits & MASK_VMFLE_VV) == MATCH_VMFLE_VV || + (bits & MASK_VMFLE_VF) == MATCH_VMFLE_VF || + (bits & MASK_VMFGT_VF) == MATCH_VMFGT_VF || + (bits & MASK_VMFGE_VF) == MATCH_VMFGE_VF; +} + +static bool is_single_dest(insn_t insn) { + const auto &bits = insn.bits(); + return (bits & MASK_VMV_S_X) == MATCH_VMV_S_X || + (bits & MASK_VFMV_S_F) == MATCH_VFMV_S_F; +} + +static reg_t get_instr_evl(const processor_t *p, insn_t insn) { + // There is no accounting for whole register instructions here, as they do not + // have a tail. + reg_t vl = p->VU.vl->read(); + // rvv-spec-1.0: vlm.v: These operate similarly to unmasked byte loads or + // stores (EEW=8), except that the effective vector length is evl=vl/8. + if ((insn.bits() & MASK_VLM_V) == MATCH_VLM_V) + return vl / 8; + return vl; +} + +static reg_t get_data_eew(const processor_t *p, insn_t insn) { + assert(is_rvv_or_fp_load(insn)); + const auto &bits = insn.bits(); + // rvv-spec-1.0: vector indexed loads and stores use the EEW/EMUL encoded in + // the instruction for the index values and the SEW/LMUL encoded in vtype for + // the data values + if ((bits & MASK_VLOXEI16_V) == MATCH_VLOXEI16_V || + (bits & MASK_VLOXEI32_V) == MATCH_VLOXEI32_V || + (bits & MASK_VLOXEI64_V) == MATCH_VLOXEI64_V || + (bits & MASK_VLOXEI8_V) == MATCH_VLOXEI8_V || + (bits & MASK_VLUXEI16_V) == MATCH_VLUXEI16_V || + (bits & MASK_VLUXEI32_V) == MATCH_VLUXEI32_V || + (bits & MASK_VLUXEI64_V) == MATCH_VLUXEI64_V || + (bits & MASK_VLUXEI8_V) == MATCH_VLUXEI8_V) + return p->VU.vsew; + // rvv-spec-1.0: Vector unit-stride and constant-stride use the EEW/EMUL + // encoded in the instruction for the data values + switch (insn.v_width()) { + case 0b000: + return e8; + case 0b101: + return e16; + case 0b110: + return e32; + case 0b111: + return e64; + } + assert(0); +} + +static bool is_whole_reg(insn_t insn) { + const auto &bits = insn.bits(); + return (bits & MASK_VMV1R_V) == MATCH_VMV1R_V || + (bits & MASK_VMV2R_V) == MATCH_VMV2R_V || + (bits & MASK_VMV4R_V) == MATCH_VMV4R_V || + (bits & MASK_VMV8R_V) == MATCH_VMV8R_V || + (bits & MASK_VL1RE8_V) == MATCH_VL1RE8_V || + (bits & MASK_VL2RE8_V) == MATCH_VL2RE8_V || + (bits & MASK_VL4RE8_V) == MATCH_VL4RE8_V || + (bits & MASK_VL8RE8_V) == MATCH_VL8RE8_V || + (bits & MASK_VL1RE16_V) == MATCH_VL1RE16_V || + (bits & MASK_VL2RE16_V) == MATCH_VL2RE16_V || + (bits & MASK_VL4RE16_V) == MATCH_VL4RE16_V || + (bits & MASK_VL8RE16_V) == MATCH_VL8RE16_V || + (bits & MASK_VL1RE32_V) == MATCH_VL1RE32_V || + (bits & MASK_VL2RE32_V) == MATCH_VL2RE32_V || + (bits & MASK_VL4RE32_V) == MATCH_VL4RE32_V || + (bits & MASK_VL8RE32_V) == MATCH_VL8RE32_V || + (bits & MASK_VL1RE64_V) == MATCH_VL1RE64_V || + (bits & MASK_VL2RE64_V) == MATCH_VL2RE64_V || + (bits & MASK_VL4RE64_V) == MATCH_VL4RE64_V || + (bits & MASK_VL8RE64_V) == MATCH_VL8RE64_V; +} + +static reg_t tail_agnostic_fill1s(processor_t *p, insn_t insn, reg_t pc) { + // rvv-spec-1.0: Vector Loads and Stores: Masked vector stores only update + // active memory elements. + if (is_rvv_or_fp_store(insn)) + return pc; + // Belonging to the V extension is checked after loads and stores, because + // their opcodes do not match the opcodes of V extension. + if (!is_rvv(insn) || is_rvv_scalar_dest(insn)) + return pc; + // Whole register instructions don't have a tail. + if (is_whole_reg(insn)) + return pc; + // If the instruction has evl, then - rvv-spec-1.0: The usual property that + // no elements are written if vstart ≥ vl does not apply to these + // instructions. Instead, no elements are written if vstart ≥ evl. + reg_t evl = get_instr_evl(p, insn); + // rvv-spec-1.0: When vstart ≥ vl, there are no body elements, and no + // elements are updated in any destination vector register group, including + // that no tail elements are updated with agnostic values. As a consequence, + // when vl=0, no elements, including agnostic elements, are updated in the + // destination vector register group regardless of vstart. + // FIXME: Vstart is used here, which was reset to 0 as a result of executing + // this instruction, we need to get the old value of Vstart here. + if (p->VU.vstart->read() >= evl || evl == 0) + return pc; + + reg_t tail_from = evl; + reg_t vl_one_reg = p->VU.VLEN / p->VU.vsew; + reg_t tail_to = std::max(p->VU.vlmax, vl_one_reg); + if (is_rvv_or_fp_load(insn)) { + const reg_t nf = insn.v_nf() + 1; + const reg_t eew = get_data_eew(p, insn); + if ((insn.bits() & MASK_VLM_V) == MATCH_VLM_V) { + AGNOSTIC_LOOP_LOAD_TAIL( + { // tail agnostic - fill 1s + vd = all_ones; + }, + eew, nf, tail_from, p->VU.VLEN / eew); + return pc; + } + vl_one_reg = p->VU.VLEN / eew; + tail_to = std::max(p->VU.vlmax, vl_one_reg); + AGNOSTIC_LOOP_LOAD_TAIL( + { // tail agnostic - fill 1s + vd = all_ones; + }, + eew, nf, tail_from, tail_to); + return pc; + } + // rvv-spec-1.0: mask destination tail elements are always treated as + // tail-agnostic, regardless of the setting of vta. + if (is_mask_dest(insn)) { + AGNOSTIC_LOOP_MASK_TAIL( + // tail agnostic - fill 1s + /* value */ 1, p->VU.vl->read(), p->VU.VLEN); + return pc; + } + // When vta==0 then undisturbed policy is in effect. + if (p->VU.vta == 0) + return pc; + + // rvv-spec-1.0: vmv.s.x, vfmv.s.f: The instructions ignore LMUL and vector + // register groups; The other elements in the destination vector register + // ( 0 < index < VLEN/SEW) are treated as tail elements using the current tail + // agnostic/undisturbed policy. + if (is_single_dest(insn)) { + tail_from = 1; + tail_to = vl_one_reg; + } + // Special way to select the tail elements of the vcompress.vm. + else if ((insn.bits() & MASK_VCOMPRESS_VM) == MATCH_VCOMPRESS_VM) { + // It is safe to read vs1 after instruction execution, since rvv-spec-1.0: + // vcompress.vm: The destination vector register group cannot overlap the + // source vector register group or the source mask register. + std::vector idxes(evl); + std::iota(idxes.begin(), idxes.end(), /*value*/ 0); + tail_from = std::count_if(idxes.begin(), idxes.end(), [&](reg_t i) { + return p->VU.mask_elt(insn.rs1(), i); + }); + } + + AGNOSTIC_LOOP_TAIL( + // tail agnostic - fill 1s + { vd = all_ones; }, tail_from, tail_to); + return pc; +} + +struct tail_agnostic_fill1s_t : public extension_t { + const char *name() const override { return "spiketa1s"; } + + tail_agnostic_fill1s_t() {} + + std::vector get_instructions(const processor_t &) override { + return {}; + } + + std::vector get_disasms(const processor_t *) override { + return {}; + } + + void reset(processor_t &p) override { + auto &insn_postprocesses = p.get_state()->insn_postprocesses; + auto tail_func = std::find(insn_postprocesses.begin(), + insn_postprocesses.end(), &tail_agnostic_fill1s); + if (tail_func == insn_postprocesses.end()) + insn_postprocesses.push_back(&tail_agnostic_fill1s); + } +}; + +REGISTER_EXTENSION(spiketa1s, []() { + static tail_agnostic_fill1s_t ext; + return &ext; +}) diff --git a/riscv/insn_template.cc b/riscv/insn_template.cc index 12d564b8b1..3ffd01a5ff 100644 --- a/riscv/insn_template.cc +++ b/riscv/insn_template.cc @@ -9,8 +9,10 @@ reg_t npc = sext_xlen(pc + insn_length(OPCODE)); \ if (!p->extension_enabled(EXT_ZCA)) assume(insn_length(OPCODE) % 4 == 0) -#define EPILOGUE \ - trace_opcode(p, OPCODE, insn); \ +#define EPILOGUE \ + for (auto postproc : P.get_state()->insn_postprocesses) \ + postproc(p, insn, pc); \ + trace_opcode(p, OPCODE, insn); \ return npc reg_t fast_rv32i_NAME(processor_t* p, insn_t insn, reg_t pc) diff --git a/riscv/processor.h b/riscv/processor.h index d0c5b3f9de..d192fca71e 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -209,6 +209,11 @@ struct state_t elp_t elp; bool critical_error; + // This member is needed to execute the user extension code after each + // instruction. Extensions that need this functionality + // (mask_agnostic_fill1s_t, tail_agnostic_fill1s_t) should add pointer to the + // function in reset method. + std::vector insn_postprocesses; private: void csr_init(processor_t* const proc, reg_t max_isa);