Skip to content

Commit 15c8ef7

Browse files
drommslijepc
authored andcommitted
Add Zvfbfa, Zvfofp8min, Zvfofp4min support
1 parent dac31c1 commit 15c8ef7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+1863
-106
lines changed

disasm/disasm.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,6 +1973,16 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
19731973
#undef DISASM_VFUNARY0_INSN
19741974
}
19751975

1976+
if (ext_enabled(EXT_ZVFOFP4MIN)) {
1977+
DEFINE_VECTOR_V(vfext_vf2);
1978+
}
1979+
1980+
if (ext_enabled(EXT_ZVFOFP8MIN)) {
1981+
DEFINE_VECTOR_V(vfncvt_f_f_q);
1982+
DEFINE_VECTOR_V(vfncvt_sat_f_f_q);
1983+
DEFINE_VECTOR_V(vfncvtbf16_sat_f_f_w);
1984+
}
1985+
19761986
if (ext_enabled(EXT_ZVFBFMIN)) {
19771987
DEFINE_VECTOR_V(vfncvtbf16_f_f_w);
19781988
DEFINE_VECTOR_V(vfwcvtbf16_f_f_v);

disasm/isa_parser.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
130130
// Zvfh implies Zfhmin
131131
extension_table[EXT_ZFHMIN] = true;
132132
}
133+
} else if (ext_str == "zvfbfa") {
134+
extension_table[EXT_ZVFBFA] = true;
135+
} else if (ext_str == "zvfofp4min") {
136+
extension_table[EXT_ZVFOFP4MIN] = true;
137+
} else if (ext_str == "zvfofp8min") {
138+
extension_table[EXT_ZVFOFP8MIN] = true;
133139
} else if (ext_str == "zicsr") {
134140
// Spike necessarily has Zicsr, because
135141
// Zicsr is implied by the privileged architecture
@@ -481,10 +487,22 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
481487
bad_isa_string(str, "'Zvfbfmin' extension requires 'Zve32f' extension");
482488
}
483489

490+
if (extension_table[EXT_ZVFBFA] && (!has_any_vector() || !extension_table[EXT_ZFBFMIN] || !get_zvf())) {
491+
bad_isa_string(str, "'zvfbfa' extension requires at least 'Zve32f', and 'Zfbfmin'");
492+
}
493+
484494
if (extension_table[EXT_ZVFBFWMA] && (!extension_table[EXT_ZFBFMIN] || !extension_table[EXT_ZVFBFMIN])) {
485495
bad_isa_string(str, "'Zvfbfwma' extension requires 'Zfbfmin' and 'Zvfbfmin' extensions");
486496
}
487497

498+
if (extension_table[EXT_ZVFOFP4MIN] && (!has_any_vector() || !get_zvf())) {
499+
bad_isa_string(str, "'Zvfofp4min' extension requires either 'V' or 'Zve32f' extension");
500+
}
501+
502+
if (extension_table[EXT_ZVFOFP8MIN] && (!has_any_vector() || !get_zvf())) {
503+
bad_isa_string(str, "'Zvfofp8min' extension requires either 'V' or 'Zve32f' extension");
504+
}
505+
488506
if (extension_table[EXT_ZFINX] && extension_table['F']) {
489507
bad_isa_string(str, ("Zfinx/Zdinx/Zhinx{min} extensions conflict with 'F/D/Q/Zfh{min}' extensions"));
490508
}

riscv/decode_macros.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,21 @@ inline bfloat16_t bf16(freg_t r) { return bf16(unboxBF16(r)); }
267267
inline float32_t f32(freg_t r) { return f32(unboxF32(r)); }
268268
inline float64_t f64(freg_t r) { return f64(unboxF64(r)); }
269269
inline float128_t f128(freg_t r) { return r; }
270+
inline float16_t f16(freg_t r, reg_t altfmt) { return altfmt ? bf16(r) : f16(r); }
271+
inline float32_t f32(freg_t r, UNUSED reg_t altfmt) { return f32(r); }
272+
inline float64_t f64(freg_t r, UNUSED reg_t altfmt) { return f64(r); }
270273
inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; }
271274
inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; }
272275
inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; }
273276
inline freg_t freg(float128_t f) { return f; }
274277
#define F16_SIGN ((uint16_t)1 << 15)
278+
#define BF16_SIGN F16_SIGN
275279
#define F32_SIGN ((uint32_t)1 << 31)
276280
#define F64_SIGN ((uint64_t)1 << 63)
277281
#define fsgnj16(a, b, n, x) \
278282
f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN))
283+
#define bfsgnj16(a, b, n, x) \
284+
bf16((bf16(a).v & ~BF16_SIGN) | ((((x) ? bf16(a).v : (n) ? BF16_SIGN : 0) ^ bf16(b).v) & BF16_SIGN))
279285
#define fsgnj32(a, b, n, x) \
280286
f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN))
281287
#define fsgnj64(a, b, n, x) \

riscv/encoding.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,10 @@
17701770
#define MASK_VFMV_V_F 0xfff0707f
17711771
#define MATCH_VFNCVT_F_F_W 0x480a1057
17721772
#define MASK_VFNCVT_F_F_W 0xfc0ff07f
1773+
#define MATCH_VFNCVT_F_F_Q 0x480c9057
1774+
#define MASK_VFNCVT_F_F_Q 0xfc0ff07f
1775+
#define MATCH_VFNCVT_SAT_F_F_Q 0x480d9057
1776+
#define MASK_VFNCVT_SAT_F_F_Q 0xfc0ff07f
17731777
#define MATCH_VFNCVT_F_X_W 0x48099057
17741778
#define MASK_VFNCVT_F_X_W 0xfc0ff07f
17751779
#define MATCH_VFNCVT_F_XU_W 0x48091057
@@ -3716,6 +3720,8 @@ DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S)
37163720
DECLARE_INSN(vfmv_s_f, MATCH_VFMV_S_F, MASK_VFMV_S_F)
37173721
DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F)
37183722
DECLARE_INSN(vfncvt_f_f_w, MATCH_VFNCVT_F_F_W, MASK_VFNCVT_F_F_W)
3723+
DECLARE_INSN(vfncvt_f_f_q, MATCH_VFNCVT_F_F_Q, MASK_VFNCVT_F_F_Q)
3724+
DECLARE_INSN(vfncvt_sat_f_f_q, MATCH_VFNCVT_SAT_F_F_Q, MASK_VFNCVT_SAT_F_F_Q)
37193725
DECLARE_INSN(vfncvt_f_x_w, MATCH_VFNCVT_F_X_W, MASK_VFNCVT_F_X_W)
37203726
DECLARE_INSN(vfncvt_f_xu_w, MATCH_VFNCVT_F_XU_W, MASK_VFNCVT_F_XU_W)
37213727
DECLARE_INSN(vfncvt_rod_f_f_w, MATCH_VFNCVT_ROD_F_F_W, MASK_VFNCVT_ROD_F_F_W)

riscv/insns/vfadd_vf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// vfadd.vf vd, vs2, rs1
2+
require_zvfbfa
3+
24
VI_VFP_VF_LOOP
35
({
4-
vd = f16_add(rs1, vs2);
6+
vd = VFP_OP_16(add, vs2, rs1);
57
},
68
{
79
vd = f32_add(rs1, vs2);

riscv/insns/vfadd_vv.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// vfadd.vv vd, vs2, vs1
2+
require_zvfbfa
3+
24
VI_VFP_VV_LOOP
35
({
4-
vd = f16_add(vs1, vs2);
6+
vd = VFP_OP_16(add, vs2, vs1);
57
},
68
{
79
vd = f32_add(vs1, vs2);

riscv/insns/vfclass_v.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// vfclass.v vd, vs2, vm
22
VI_VFP_V_LOOP
33
({
4-
vd = f16(f16_classify(vs2));
4+
vd = P.VU.altfmt ? bf16(bf16_classify(vs2)) : f16(f16_classify(vs2));
55
},
66
{
77
vd = f32(f32_classify(vs2));

riscv/insns/vfdiv_vf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
// vfdiv.vf vd, vs2, rs1
2+
VI_NON_ALTFMT_INSN
3+
24
VI_VFP_VF_LOOP
35
({
46
vd = f16_div(vs2, rs1);

riscv/insns/vfdiv_vv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
// vfdiv.vv vd, vs2, vs1
2+
VI_NON_ALTFMT_INSN
3+
24
VI_VFP_VV_LOOP
35
({
46
vd = f16_div(vs2, vs1);

riscv/insns/vfext_vf2.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
static const uint8_t ofp4_to_e4m3_lower[8] = {
2+
0x00, 0x30, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c
3+
};
4+
5+
require_extension(EXT_ZVFOFP4MIN);
6+
require(!P.VU.altfmt);
7+
VI_VF_EXT(2,
8+
{
9+
uint_fast8_t packed_ofp4_reg = P.VU.elt<uint8_t>(rs2_num, i / 2);
10+
uint_fast8_t data = ((packed_ofp4_reg >> ((i & 1UL)*4)) & 0xF);
11+
P.VU.elt<uint8_t>(rd_num, i, true) = (ofp4_to_e4m3_lower[data& 0x7] | ((data & 0x8) << 4));
12+
}
13+
)

0 commit comments

Comments
 (0)