Skip to content

Commit 2f4c653

Browse files
committed
Add extend-add-pairwise instructions x64
1 parent 5737558 commit 2f4c653

File tree

9 files changed

+147
-10
lines changed

9 files changed

+147
-10
lines changed

build.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,8 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
192192

193193
match (testsuite, testname) {
194194
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
195-
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
196195
("simd", "simd_i16x8_extmul_i8x16") => return true,
197196
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
198-
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
199197
("simd", "simd_i32x4_extmul_i16x8") => return true,
200198
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
201199
("simd", "simd_i64x2_extmul_i32x4") => return true,
@@ -232,10 +230,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
232230
// These are new instructions that are not really implemented in any backend.
233231
("simd", "simd_i8x16_arith2")
234232
| ("simd", "simd_conversions")
235-
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
233+
// | ("simd", "simd_i16x8_extadd_pairwise_i8x16")
236234
| ("simd", "simd_i16x8_extmul_i8x16")
237235
| ("simd", "simd_i16x8_q15mulr_sat_s")
238-
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
236+
//| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
239237
| ("simd", "simd_i32x4_extmul_i16x8")
240238
| ("simd", "simd_i32x4_trunc_sat_f64x2")
241239
| ("simd", "simd_i64x2_extmul_i32x4") => return true,

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4068,7 +4068,34 @@ pub(crate) fn define(
40684068
Inst::new(
40694069
"uwiden_high",
40704070
r#"
4071-
Widen the high lanes of `x` using unsigned extension.
4071+
Lane-wise integer extended pairwise addition producing extended results
4072+
(twice wider results than the input)
4073+
"#,
4074+
&formats.unary,
4075+
)
4076+
.operands_in(vec![x])
4077+
.operands_out(vec![a]),
4078+
);
4079+
4080+
ig.push(
4081+
Inst::new(
4082+
"extended_pairwise_add_signed",
4083+
r#"
4084+
Widen the high lanes of `x` using signed extension.
4085+
4086+
This will double the lane width and halve the number of lanes.
4087+
"#,
4088+
&formats.unary,
4089+
)
4090+
.operands_in(vec![x])
4091+
.operands_out(vec![a]),
4092+
);
4093+
4094+
ig.push(
4095+
Inst::new(
4096+
"extended_pairwise_add_unsigned",
4097+
r#"
4098+
Widen the high lanes of `x` extending with zeros.
40724099
40734100
This will double the lane width and halve the number of lanes.
40744101
"#,

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
33593359
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
33603360
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
33613361
Opcode::Fvdemote => unimplemented!("Fvdemote"),
3362+
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
3363+
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
33623364
}
33633365

33643366
Ok(())

cranelift/codegen/src/isa/s390x/lower.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2550,7 +2550,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
25502550
| Opcode::UwidenHigh
25512551
| Opcode::WideningPairwiseDotProductS
25522552
| Opcode::FvpromoteLow
2553-
| Opcode::Fvdemote => {
2553+
| Opcode::Fvdemote
2554+
| Opcode::ExtendedPairwiseAddSigned
2555+
| Opcode::ExtendedPairwiseAddUnsigned => {
25542556
// TODO
25552557
panic!("Vector ops not implemented.");
25562558
}

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ pub enum SseOpcode {
567567
Pinsrb,
568568
Pinsrw,
569569
Pinsrd,
570+
Pmaddubsw,
570571
Pmaddwd,
571572
Pmaxsb,
572573
Pmaxsw,
@@ -734,6 +735,7 @@ impl SseOpcode {
734735
| SseOpcode::Pcmpgtd
735736
| SseOpcode::Pextrw
736737
| SseOpcode::Pinsrw
738+
| SseOpcode::Pmaddubsw
737739
| SseOpcode::Pmaddwd
738740
| SseOpcode::Pmaxsw
739741
| SseOpcode::Pmaxub
@@ -925,6 +927,7 @@ impl fmt::Debug for SseOpcode {
925927
SseOpcode::Pinsrb => "pinsrb",
926928
SseOpcode::Pinsrw => "pinsrw",
927929
SseOpcode::Pinsrd => "pinsrd",
930+
SseOpcode::Pmaddubsw => "pmaddubsw",
928931
SseOpcode::Pmaddwd => "pmaddwd",
929932
SseOpcode::Pmaxsb => "pmaxsb",
930933
SseOpcode::Pmaxsw => "pmaxsw",

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,7 @@ pub(crate) fn emit(
14831483
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
14841484
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
14851485
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
1486+
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
14861487
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
14871488
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
14881489
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),

cranelift/codegen/src/isa/x64/lower.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4472,6 +4472,96 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
44724472
}
44734473
}
44744474
}
4475+
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
4476+
4477+
let input_ty = ctx.input_ty(insn, 0);
4478+
let output_ty = ctx.output_ty(insn, 0);
4479+
let src = put_input_in_reg(ctx, inputs[0]);
4480+
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4481+
4482+
match op {
4483+
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
4484+
(types::I8X16, types::I16X8) => {
4485+
static MUL_CONST: [u8; 16] = [0x01; 16];
4486+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4487+
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4488+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4489+
ctx.emit(Inst::xmm_mov(
4490+
SseOpcode::Movdqa,
4491+
RegMem::reg(mul_const_reg.to_reg()),
4492+
dst,
4493+
));
4494+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
4495+
eprintln!("Match A!! {:?} {:?} {:?}", op, input_ty, output_ty);
4496+
},
4497+
(types::I16X8, types::I32X4) => {
4498+
static MUL_CONST: [u8; 16] = [0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00];
4499+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4500+
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4501+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
4502+
ctx.emit(Inst::xmm_mov(
4503+
SseOpcode::Movdqa,
4504+
RegMem::reg(src),
4505+
dst,
4506+
));
4507+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(mul_const_reg.to_reg()), dst));
4508+
eprintln!("Match B!! {:?} {:?} {:?}", op, input_ty, output_ty);
4509+
4510+
},
4511+
_ => unreachable!("Type pattern should not be possible."),
4512+
},
4513+
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
4514+
(types::I8X16, types::I16X8) => {
4515+
static MUL_CONST: [u8; 16] = [0x01; 16];
4516+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4517+
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4518+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4519+
ctx.emit(Inst::xmm_mov(
4520+
SseOpcode::Movdqa,
4521+
RegMem::reg(src),
4522+
dst,
4523+
));
4524+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(mul_const_reg.to_reg()), dst));
4525+
println!("Match C!! {:?} {:?} {:?}", op, input_ty, output_ty);
4526+
},
4527+
(types::I16X8, types::I32X4) => {
4528+
//static PXOR_CONST: [u8; 16] = [0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00];
4529+
static PXOR_CONST: [u8; 16] = [0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80];
4530+
let pxor_const = ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
4531+
let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4532+
ctx.emit(Inst::xmm_load_const(pxor_const, pxor_const_reg, types::I16X8));
4533+
ctx.emit(Inst::xmm_mov(
4534+
SseOpcode::Movdqa,
4535+
RegMem::reg(src),
4536+
dst,
4537+
));
4538+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(pxor_const_reg.to_reg()), dst));
4539+
4540+
4541+
//static MADD_CONST: [u8; 16] = [0x01; 16];
4542+
static MADD_CONST: [u8; 16] = [0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00];
4543+
let madd_const = ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
4544+
let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4545+
ctx.emit(Inst::xmm_load_const(madd_const, madd_const_reg, types::I16X8));
4546+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(madd_const_reg.to_reg()), dst));
4547+
4548+
4549+
//static ADDD_CONST2: [u8; 16] = [0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00];
4550+
static ADDD_CONST2: [u8; 16] = [0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00];
4551+
let addd_const2 = ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
4552+
let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4553+
ctx.emit(Inst::xmm_load_const(addd_const2, addd_const2_reg, types::I16X8));
4554+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(addd_const2_reg.to_reg()), dst));
4555+
4556+
4557+
eprintln!("Match D!! {:?} {:?} {:?}", op, input_ty, output_ty);
4558+
4559+
},
4560+
_ => unreachable!("Type pattern should not be possible."),
4561+
},
4562+
_ => unreachable!("Opcode should not be possible."),
4563+
}
4564+
}
44754565
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
44764566
let input_ty = ctx.input_ty(insn, 0);
44774567
let output_ty = ctx.output_ty(insn, 0);

cranelift/interpreter/src/step.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,8 @@ where
574574
Opcode::AtomicStore => unimplemented!("AtomicStore"),
575575
Opcode::Fence => unimplemented!("Fence"),
576576
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
577+
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
578+
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
577579

578580
// TODO: these instructions should be removed once the new backend makes these obsolete
579581
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

cranelift/wasm/src/code_translator.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1859,6 +1859,22 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
18591859
let a = pop1_with_bitcast(state, I32X4, builder);
18601860
state.push1(builder.ins().uwiden_high(a))
18611861
}
1862+
Operator::I16x8ExtAddPairwiseI8x16S => {
1863+
let a = pop1_with_bitcast(state, I8X16, builder);
1864+
state.push1(builder.ins().extended_pairwise_add_signed(a))
1865+
}
1866+
Operator::I32x4ExtAddPairwiseI16x8S => {
1867+
let a = pop1_with_bitcast(state, I16X8, builder);
1868+
state.push1(builder.ins().extended_pairwise_add_signed(a))
1869+
}
1870+
Operator::I16x8ExtAddPairwiseI8x16U => {
1871+
let a = pop1_with_bitcast(state, I8X16, builder);
1872+
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1873+
}
1874+
Operator::I32x4ExtAddPairwiseI16x8U => {
1875+
let a = pop1_with_bitcast(state, I16X8, builder);
1876+
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1877+
}
18621878
Operator::F32x4Ceil | Operator::F64x2Ceil => {
18631879
// This is something of a misuse of `type_of`, because that produces the return type
18641880
// of `op`. In this case we want the arg type, but we know it's the same as the
@@ -1899,10 +1915,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
18991915
| Operator::I64x2ExtMulHighI32x4S
19001916
| Operator::I64x2ExtMulLowI32x4U
19011917
| Operator::I64x2ExtMulHighI32x4U
1902-
| Operator::I16x8ExtAddPairwiseI8x16S
1903-
| Operator::I16x8ExtAddPairwiseI8x16U
1904-
| Operator::I32x4ExtAddPairwiseI16x8S
1905-
| Operator::I32x4ExtAddPairwiseI16x8U
19061918
| Operator::F64x2ConvertLowI32x4U
19071919
| Operator::I32x4TruncSatF64x2SZero
19081920
| Operator::I32x4TruncSatF64x2UZero => {

0 commit comments

Comments
 (0)