Skip to content

Commit 57ee118

Browse files
committed
Add extend-add-pairwise instructions x64
1 parent c71ad94 commit 57ee118

File tree

10 files changed

+183
-7
lines changed

10 files changed

+183
-7
lines changed

build.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,14 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
190190

191191
match (testsuite, testname) {
192192
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
193-
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
194193
("simd", "simd_i16x8_extmul_i8x16") => return true,
195194
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
196-
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
197195
("simd", "simd_i32x4_extmul_i16x8") => return true,
198196
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
199197
("simd", "simd_i64x2_extmul_i32x4") => return true,
200198
("simd", "simd_int_to_int_extend") => return true,
199+
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
200+
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
201201
("simd", _) => return false,
202202
_ => {}
203203
}
@@ -229,9 +229,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
229229

230230
// These are new instructions that are not really implemented in any backend.
231231
("simd", "simd_conversions")
232-
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
233232
| ("simd", "simd_i16x8_extmul_i8x16")
234-
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
233+
| ("simd", "simd_i16x8_q15mulr_sat_s")
235234
| ("simd", "simd_i32x4_extmul_i16x8")
236235
| ("simd", "simd_i64x2_extmul_i32x4") => return true,
237236

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4114,7 +4114,34 @@ pub(crate) fn define(
41144114
Inst::new(
41154115
"uwiden_high",
41164116
r#"
4117-
Widen the high lanes of `x` using unsigned extension.
4117+
Lane-wise integer extended pairwise addition producing extended results
4118+
(twice wider results than the input)
4119+
"#,
4120+
&formats.unary,
4121+
)
4122+
.operands_in(vec![x])
4123+
.operands_out(vec![a]),
4124+
);
4125+
4126+
ig.push(
4127+
Inst::new(
4128+
"extended_pairwise_add_signed",
4129+
r#"
4130+
Widen the high lanes of `x` using signed extension.
4131+
4132+
This will double the lane width and halve the number of lanes.
4133+
"#,
4134+
&formats.unary,
4135+
)
4136+
.operands_in(vec![x])
4137+
.operands_out(vec![a]),
4138+
);
4139+
4140+
ig.push(
4141+
Inst::new(
4142+
"extended_pairwise_add_unsigned",
4143+
r#"
4144+
Widen the high lanes of `x` extending with zeros.
41184145
41194146
This will double the lane width and halve the number of lanes.
41204147
"#,

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3560,6 +3560,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
35603560
| Opcode::Fvdemote
35613561
| Opcode::FvpromoteLow
35623562
| Opcode::Vconcat
3563+
| Opcode::ExtendedPairwiseAddSigned
3564+
| Opcode::ExtendedPairwiseAddUnsigned
35633565
| Opcode::Vsplit => unimplemented!("lowering {}", op),
35643566
}
35653567

cranelift/codegen/src/isa/s390x/lower.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2868,7 +2868,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
28682868
| Opcode::WideningPairwiseDotProductS
28692869
| Opcode::SqmulRoundSat
28702870
| Opcode::FvpromoteLow
2871-
| Opcode::Fvdemote => {
2871+
| Opcode::Fvdemote
2872+
| Opcode::ExtendedPairwiseAddSigned
2873+
| Opcode::ExtendedPairwiseAddUnsigned => {
28722874
// TODO
28732875
unimplemented!("Vector ops not implemented.");
28742876
}

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ pub enum SseOpcode {
567567
Pinsrb,
568568
Pinsrw,
569569
Pinsrd,
570+
Pmaddubsw,
570571
Pmaddwd,
571572
Pmaxsb,
572573
Pmaxsw,
@@ -734,6 +735,7 @@ impl SseOpcode {
734735
| SseOpcode::Pcmpgtd
735736
| SseOpcode::Pextrw
736737
| SseOpcode::Pinsrw
738+
| SseOpcode::Pmaddubsw
737739
| SseOpcode::Pmaddwd
738740
| SseOpcode::Pmaxsw
739741
| SseOpcode::Pmaxub
@@ -925,6 +927,7 @@ impl fmt::Debug for SseOpcode {
925927
SseOpcode::Pinsrb => "pinsrb",
926928
SseOpcode::Pinsrw => "pinsrw",
927929
SseOpcode::Pinsrd => "pinsrd",
930+
SseOpcode::Pmaddubsw => "pmaddubsw",
928931
SseOpcode::Pmaddwd => "pmaddwd",
929932
SseOpcode::Pmaxsb => "pmaxsb",
930933
SseOpcode::Pmaxsw => "pmaxsw",

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,7 @@ pub(crate) fn emit(
14831483
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
14841484
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
14851485
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
1486+
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
14861487
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
14871488
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
14881489
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),

cranelift/codegen/src/isa/x64/lower.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4495,6 +4495,128 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
44954495
}
44964496
}
44974497
}
4498+
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
4499+
// Extended pairwise addition instructions computes extended sums within adjacent
4500+
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
4501+
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
4502+
/*
4503+
let input_ty = ctx.input_ty(insn, 0);
4504+
let output_ty = ctx.output_ty(insn, 0);
4505+
let src = put_input_in_reg(ctx, inputs[0]);
4506+
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4507+
unreachable!();
4508+
match op {
4509+
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
4510+
(types::I8X16, types::I16X8) => {
4511+
static MUL_CONST: [u8; 16] = [0x01; 16];
4512+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4513+
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4514+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4515+
ctx.emit(Inst::xmm_mov(
4516+
SseOpcode::Movdqa,
4517+
RegMem::reg(mul_const_reg.to_reg()),
4518+
dst,
4519+
));
4520+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
4521+
}
4522+
(types::I16X8, types::I32X4) => {
4523+
static MUL_CONST: [u8; 16] = [
4524+
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
4525+
0x01, 0x00, 0x01, 0x00,
4526+
];
4527+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4528+
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4529+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
4530+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4531+
ctx.emit(Inst::xmm_rm_r(
4532+
SseOpcode::Pmaddwd,
4533+
RegMem::reg(mul_const_reg.to_reg()),
4534+
dst,
4535+
));
4536+
}
4537+
_ => unreachable!(
4538+
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
4539+
input_ty, output_ty, op
4540+
),
4541+
},
4542+
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
4543+
(types::I8X16, types::I16X8) => {
4544+
static MUL_CONST: [u8; 16] = [0x01; 16];
4545+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4546+
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4547+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4548+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4549+
ctx.emit(Inst::xmm_rm_r(
4550+
SseOpcode::Pmaddubsw,
4551+
RegMem::reg(mul_const_reg.to_reg()),
4552+
dst,
4553+
));
4554+
}
4555+
(types::I16X8, types::I32X4) => {
4556+
static PXOR_CONST: [u8; 16] = [
4557+
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
4558+
0x00, 0x80, 0x00, 0x80,
4559+
];
4560+
let pxor_const =
4561+
ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
4562+
let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4563+
ctx.emit(Inst::xmm_load_const(
4564+
pxor_const,
4565+
pxor_const_reg,
4566+
types::I16X8,
4567+
));
4568+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4569+
ctx.emit(Inst::xmm_rm_r(
4570+
SseOpcode::Pxor,
4571+
RegMem::reg(pxor_const_reg.to_reg()),
4572+
dst,
4573+
));
4574+
4575+
static MADD_CONST: [u8; 16] = [
4576+
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
4577+
0x01, 0x00, 0x01, 0x00,
4578+
];
4579+
let madd_const =
4580+
ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
4581+
let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4582+
ctx.emit(Inst::xmm_load_const(
4583+
madd_const,
4584+
madd_const_reg,
4585+
types::I16X8,
4586+
));
4587+
ctx.emit(Inst::xmm_rm_r(
4588+
SseOpcode::Pmaddwd,
4589+
RegMem::reg(madd_const_reg.to_reg()),
4590+
dst,
4591+
));
4592+
4593+
static ADDD_CONST2: [u8; 16] = [
4594+
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
4595+
0x00, 0x00, 0x01, 0x00,
4596+
];
4597+
let addd_const2 =
4598+
ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
4599+
let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4600+
ctx.emit(Inst::xmm_load_const(
4601+
addd_const2,
4602+
addd_const2_reg,
4603+
types::I16X8,
4604+
));
4605+
ctx.emit(Inst::xmm_rm_r(
4606+
SseOpcode::Paddd,
4607+
RegMem::reg(addd_const2_reg.to_reg()),
4608+
dst,
4609+
));
4610+
}
4611+
_ => unreachable!(
4612+
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
4613+
input_ty, output_ty, op
4614+
),
4615+
},
4616+
_ => unreachable!("{:?} not supported.", op),
4617+
}
4618+
*/
4619+
}
44984620
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
44994621
let input_ty = ctx.input_ty(insn, 0);
45004622
let output_ty = ctx.output_ty(insn, 0);
0 Bytes
Binary file not shown.

cranelift/interpreter/src/step.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,8 @@ where
576576
Opcode::Fence => unimplemented!("Fence"),
577577
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
578578
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
579+
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
580+
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
579581

580582
// TODO: these instructions should be removed once the new backend makes these obsolete
581583
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

cranelift/wasm/src/code_translator.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1874,6 +1874,22 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
18741874
let a = pop1_with_bitcast(state, I32X4, builder);
18751875
state.push1(builder.ins().uwiden_high(a))
18761876
}
1877+
Operator::I16x8ExtAddPairwiseI8x16S => {
1878+
let a = pop1_with_bitcast(state, I8X16, builder);
1879+
state.push1(builder.ins().extended_pairwise_add_signed(a))
1880+
}
1881+
Operator::I32x4ExtAddPairwiseI16x8S => {
1882+
let a = pop1_with_bitcast(state, I16X8, builder);
1883+
state.push1(builder.ins().extended_pairwise_add_signed(a))
1884+
}
1885+
Operator::I16x8ExtAddPairwiseI8x16U => {
1886+
let a = pop1_with_bitcast(state, I8X16, builder);
1887+
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1888+
}
1889+
Operator::I32x4ExtAddPairwiseI16x8U => {
1890+
let a = pop1_with_bitcast(state, I16X8, builder);
1891+
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1892+
}
18771893
Operator::F32x4Ceil | Operator::F64x2Ceil => {
18781894
// This is something of a misuse of `type_of`, because that produces the return type
18791895
// of `op`. In this case we want the arg type, but we know it's the same as the
@@ -1922,7 +1938,9 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
19221938
| Operator::I16x8ExtAddPairwiseI8x16U
19231939
| Operator::I32x4ExtAddPairwiseI16x8S
19241940
| Operator::I32x4ExtAddPairwiseI16x8U
1925-
| Operator::F64x2ConvertLowI32x4U => {
1941+
| Operator::F64x2ConvertLowI32x4U
1942+
| Operator::I32x4TruncSatF64x2SZero
1943+
| Operator::I32x4TruncSatF64x2UZero => {
19261944
return Err(wasm_unsupported!("proposed simd operator {:?}", op));
19271945
}
19281946
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {

0 commit comments

Comments
 (0)