Skip to content

Commit e373ddf

Browse files
committed
Add extend-add-pairwise instructions x64
1 parent 26c78c0 commit e373ddf

File tree

10 files changed

+180
-17
lines changed

10 files changed

+180
-17
lines changed

build.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,6 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
190190
}
191191

192192
match (testsuite, testname) {
193-
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
194-
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
195-
("simd", _) => return false,
196193
_ => {}
197194
}
198195
false
@@ -220,11 +217,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
220217
("simd", _) if cfg!(feature = "old-x86-backend") => return true,
221218
// No simd support yet for s390x.
222219
("simd", _) if platform_is_s390x() => return true,
223-
224-
// These are new instructions that are not really implemented in any backend.
225-
("simd", "simd_i16x8_extadd_pairwise_i8x16")
226-
| ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
227-
228220
_ => {}
229221
},
230222
_ => panic!("unrecognized strategy"),

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4114,7 +4114,34 @@ pub(crate) fn define(
41144114
Inst::new(
41154115
"uwiden_high",
41164116
r#"
4117-
Widen the high lanes of `x` using unsigned extension.
4117+
Lane-wise integer extended pairwise addition producing extended results
4118+
(twice wider results than the input)
4119+
"#,
4120+
&formats.unary,
4121+
)
4122+
.operands_in(vec![x])
4123+
.operands_out(vec![a]),
4124+
);
4125+
4126+
ig.push(
4127+
Inst::new(
4128+
"extended_pairwise_add_signed",
4129+
r#"
4130+
Widen the high lanes of `x` using signed extension.
4131+
4132+
This will double the lane width and halve the number of lanes.
4133+
"#,
4134+
&formats.unary,
4135+
)
4136+
.operands_in(vec![x])
4137+
.operands_out(vec![a]),
4138+
);
4139+
4140+
ig.push(
4141+
Inst::new(
4142+
"extended_pairwise_add_unsigned",
4143+
r#"
4144+
Widen the high lanes of `x` extending with zeros.
41184145
41194146
This will double the lane width and halve the number of lanes.
41204147
"#,

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3519,7 +3519,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
35193519
});
35203520
}
35213521

3522-
Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => unimplemented!("lowering {}", op),
3522+
Opcode::ExtendedPairwiseAddSigned
3523+
| Opcode::ExtendedPairwiseAddUnsigned
3524+
| Opcode::ConstAddr
3525+
| Opcode::Vconcat
3526+
| Opcode::Vsplit => unimplemented!("lowering {}", op),
35233527
}
35243528

35253529
Ok(())

cranelift/codegen/src/isa/s390x/lower.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2868,7 +2868,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
28682868
| Opcode::WideningPairwiseDotProductS
28692869
| Opcode::SqmulRoundSat
28702870
| Opcode::FvpromoteLow
2871-
| Opcode::Fvdemote => {
2871+
| Opcode::Fvdemote
2872+
| Opcode::ExtendedPairwiseAddSigned
2873+
| Opcode::ExtendedPairwiseAddUnsigned => {
28722874
// TODO
28732875
unimplemented!("Vector ops not implemented.");
28742876
}

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,7 @@ pub enum SseOpcode {
568568
Pinsrb,
569569
Pinsrw,
570570
Pinsrd,
571+
Pmaddubsw,
571572
Pmaddwd,
572573
Pmaxsb,
573574
Pmaxsw,
@@ -746,6 +747,7 @@ impl SseOpcode {
746747
| SseOpcode::Pcmpgtd
747748
| SseOpcode::Pextrw
748749
| SseOpcode::Pinsrw
750+
| SseOpcode::Pmaddubsw
749751
| SseOpcode::Pmaddwd
750752
| SseOpcode::Pmaxsw
751753
| SseOpcode::Pmaxub
@@ -944,6 +946,7 @@ impl fmt::Debug for SseOpcode {
944946
SseOpcode::Pinsrb => "pinsrb",
945947
SseOpcode::Pinsrw => "pinsrw",
946948
SseOpcode::Pinsrd => "pinsrd",
949+
SseOpcode::Pmaddubsw => "pmaddubsw",
947950
SseOpcode::Pmaddwd => "pmaddwd",
948951
SseOpcode::Pmaxsb => "pmaxsb",
949952
SseOpcode::Pmaxsw => "pmaxsw",

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,7 @@ pub(crate) fn emit(
14831483
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
14841484
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
14851485
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
1486+
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
14861487
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
14871488
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
14881489
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),

cranelift/codegen/src/isa/x64/lower.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4927,6 +4927,128 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
49274927
}
49284928
}
49294929
}
4930+
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
4931+
// Extended pairwise addition instructions computes extended sums within adjacent
4932+
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
4933+
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
4934+
/*
4935+
let input_ty = ctx.input_ty(insn, 0);
4936+
let output_ty = ctx.output_ty(insn, 0);
4937+
let src = put_input_in_reg(ctx, inputs[0]);
4938+
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4939+
unreachable!();
4940+
match op {
4941+
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
4942+
(types::I8X16, types::I16X8) => {
4943+
static MUL_CONST: [u8; 16] = [0x01; 16];
4944+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4945+
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4946+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4947+
ctx.emit(Inst::xmm_mov(
4948+
SseOpcode::Movdqa,
4949+
RegMem::reg(mul_const_reg.to_reg()),
4950+
dst,
4951+
));
4952+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
4953+
}
4954+
(types::I16X8, types::I32X4) => {
4955+
static MUL_CONST: [u8; 16] = [
4956+
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
4957+
0x01, 0x00, 0x01, 0x00,
4958+
];
4959+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4960+
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4961+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
4962+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4963+
ctx.emit(Inst::xmm_rm_r(
4964+
SseOpcode::Pmaddwd,
4965+
RegMem::reg(mul_const_reg.to_reg()),
4966+
dst,
4967+
));
4968+
}
4969+
_ => unreachable!(
4970+
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
4971+
input_ty, output_ty, op
4972+
),
4973+
},
4974+
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
4975+
(types::I8X16, types::I16X8) => {
4976+
static MUL_CONST: [u8; 16] = [0x01; 16];
4977+
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4978+
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4979+
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4980+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4981+
ctx.emit(Inst::xmm_rm_r(
4982+
SseOpcode::Pmaddubsw,
4983+
RegMem::reg(mul_const_reg.to_reg()),
4984+
dst,
4985+
));
4986+
}
4987+
(types::I16X8, types::I32X4) => {
4988+
static PXOR_CONST: [u8; 16] = [
4989+
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
4990+
0x00, 0x80, 0x00, 0x80,
4991+
];
4992+
let pxor_const =
4993+
ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
4994+
let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4995+
ctx.emit(Inst::xmm_load_const(
4996+
pxor_const,
4997+
pxor_const_reg,
4998+
types::I16X8,
4999+
));
5000+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
5001+
ctx.emit(Inst::xmm_rm_r(
5002+
SseOpcode::Pxor,
5003+
RegMem::reg(pxor_const_reg.to_reg()),
5004+
dst,
5005+
));
5006+
5007+
static MADD_CONST: [u8; 16] = [
5008+
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
5009+
0x01, 0x00, 0x01, 0x00,
5010+
];
5011+
let madd_const =
5012+
ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
5013+
let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
5014+
ctx.emit(Inst::xmm_load_const(
5015+
madd_const,
5016+
madd_const_reg,
5017+
types::I16X8,
5018+
));
5019+
ctx.emit(Inst::xmm_rm_r(
5020+
SseOpcode::Pmaddwd,
5021+
RegMem::reg(madd_const_reg.to_reg()),
5022+
dst,
5023+
));
5024+
5025+
static ADDD_CONST2: [u8; 16] = [
5026+
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
5027+
0x00, 0x00, 0x01, 0x00,
5028+
];
5029+
let addd_const2 =
5030+
ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
5031+
let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
5032+
ctx.emit(Inst::xmm_load_const(
5033+
addd_const2,
5034+
addd_const2_reg,
5035+
types::I16X8,
5036+
));
5037+
ctx.emit(Inst::xmm_rm_r(
5038+
SseOpcode::Paddd,
5039+
RegMem::reg(addd_const2_reg.to_reg()),
5040+
dst,
5041+
));
5042+
}
5043+
_ => unreachable!(
5044+
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
5045+
input_ty, output_ty, op
5046+
),
5047+
},
5048+
_ => unreachable!("{:?} not supported.", op),
5049+
}
5050+
*/
5051+
}
49305052
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
49315053
let input_ty = ctx.input_ty(insn, 0);
49325054
let output_ty = ctx.output_ty(insn, 0);
0 Bytes
Binary file not shown.

cranelift/interpreter/src/step.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,8 @@ where
630630
Opcode::Fence => unimplemented!("Fence"),
631631
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
632632
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
633+
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
634+
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
633635

634636
// TODO: these instructions should be removed once the new backend makes these obsolete
635637
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

cranelift/wasm/src/code_translator.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,22 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
18791879
let a = pop1_with_bitcast(state, I32X4, builder);
18801880
state.push1(builder.ins().uwiden_high(a))
18811881
}
1882+
Operator::I16x8ExtAddPairwiseI8x16S => {
1883+
let a = pop1_with_bitcast(state, I8X16, builder);
1884+
state.push1(builder.ins().extended_pairwise_add_signed(a))
1885+
}
1886+
Operator::I32x4ExtAddPairwiseI16x8S => {
1887+
let a = pop1_with_bitcast(state, I16X8, builder);
1888+
state.push1(builder.ins().extended_pairwise_add_signed(a))
1889+
}
1890+
Operator::I16x8ExtAddPairwiseI8x16U => {
1891+
let a = pop1_with_bitcast(state, I8X16, builder);
1892+
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1893+
}
1894+
Operator::I32x4ExtAddPairwiseI16x8U => {
1895+
let a = pop1_with_bitcast(state, I16X8, builder);
1896+
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1897+
}
18821898
Operator::F32x4Ceil | Operator::F64x2Ceil => {
18831899
// This is something of a misuse of `type_of`, because that produces the return type
18841900
// of `op`. In this case we want the arg type, but we know it's the same as the
@@ -1982,12 +1998,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
19821998
let b_high = builder.ins().uwiden_high(b);
19831999
state.push1(builder.ins().imul(a_high, b_high));
19842000
}
1985-
Operator::I16x8ExtAddPairwiseI8x16S
1986-
| Operator::I16x8ExtAddPairwiseI8x16U
1987-
| Operator::I32x4ExtAddPairwiseI16x8S
1988-
| Operator::I32x4ExtAddPairwiseI16x8U => {
1989-
return Err(wasm_unsupported!("proposed simd operator {:?}", op));
1990-
}
19912001
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
19922002
return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
19932003
}

0 commit comments

Comments
 (0)