Skip to content

Commit e519fca

Browse files
committed
Refactor and turn on lowering for extend-add-pairwise
1 parent e373ddf commit e519fca

File tree

8 files changed

+107
-80
lines changed

8 files changed

+107
-80
lines changed

build.rs

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,8 @@ fn write_testsuite_tests(
156156
let testname = extract_name(path);
157157

158158
writeln!(out, "#[test]")?;
159-
if x64_should_panic(testsuite, &testname, strategy) {
160-
writeln!(out, r#"#[should_panic]"#)?;
161159
// Ignore when using QEMU for running tests (limited memory).
162-
} else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
160+
if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
163161
writeln!(out, "#[ignore]")?;
164162
}
165163

@@ -182,19 +180,6 @@ fn write_testsuite_tests(
182180
Ok(())
183181
}
184182

185-
/// For x64 backend features that are not supported yet, mark tests as panicking, so
186-
/// they stop "passing" once the features are properly implemented.
187-
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
188-
if !platform_is_x64() || strategy != "Cranelift" {
189-
return false;
190-
}
191-
192-
match (testsuite, testname) {
193-
_ => {}
194-
}
195-
false
196-
}
197-
198183
/// Ignore tests that aren't supported yet.
199184
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
200185
match strategy {
@@ -217,6 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
217202
("simd", _) if cfg!(feature = "old-x86-backend") => return true,
218203
// No simd support yet for s390x.
219204
("simd", _) if platform_is_s390x() => return true,
205+
// These are new instructions that are only known to be supported for x64.
206+
("simd", "simd_i16x8_extadd_pairwise_i8x16")
207+
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
208+
if !platform_is_x64() =>
209+
{
210+
return true
211+
}
220212
_ => {}
221213
},
222214
_ => panic!("unrecognized strategy"),

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4114,40 +4114,34 @@ pub(crate) fn define(
41144114
Inst::new(
41154115
"uwiden_high",
41164116
r#"
4117-
Lane-wise integer extended pairwise addition producing extended results
4118-
(twice wider results than the input)
4119-
"#,
4120-
&formats.unary,
4121-
)
4122-
.operands_in(vec![x])
4123-
.operands_out(vec![a]),
4124-
);
4125-
4126-
ig.push(
4127-
Inst::new(
4128-
"extended_pairwise_add_signed",
4129-
r#"
4130-
Widen the high lanes of `x` using signed extension.
4117+
Widen the high lanes of `x` using unsigned extension.
41314118
4132-
This will double the lane width and halve the number of lanes.
4119+
This will double the lane width and halve the number of lanes.
41334120
"#,
41344121
&formats.unary,
41354122
)
41364123
.operands_in(vec![x])
41374124
.operands_out(vec![a]),
41384125
);
41394126

4127+
let x = &Operand::new("x", I8or16or32xN);
4128+
let y = &Operand::new("y", I8or16or32xN);
4129+
let a = &Operand::new("a", I8or16or32xN);
4130+
41404131
ig.push(
41414132
Inst::new(
4142-
"extended_pairwise_add_unsigned",
4133+
"iadd_pairwise",
41434134
r#"
4144-
Widen the high lanes of `x` extending with zeros.
4145-
4146-
This will double the lane width and halve the number of lanes.
4135+
Does lane-wise integer pairwise addition on two operands, putting the
4136+
combined results into a single vector result. Here a pair refers to adjacent
4137+
lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
4138+
pairwise add results will make up the low half of the resulting vector while
4139+
the second operand pairwise add results will make up the upper half of the
4140+
resulting vector.
41474141
"#,
4148-
&formats.unary,
4142+
&formats.binary,
41494143
)
4150-
.operands_in(vec![x])
4144+
.operands_in(vec![x, y])
41514145
.operands_out(vec![a]),
41524146
);
41534147

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3519,11 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
35193519
});
35203520
}
35213521

3522-
Opcode::ExtendedPairwiseAddSigned
3523-
| Opcode::ExtendedPairwiseAddUnsigned
3524-
| Opcode::ConstAddr
3525-
| Opcode::Vconcat
3526-
| Opcode::Vsplit => unimplemented!("lowering {}", op),
3522+
Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
3523+
unimplemented!("lowering {}", op)
3524+
}
35273525
}
35283526

35293527
Ok(())

cranelift/codegen/src/isa/s390x/lower.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,8 +2869,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
28692869
| Opcode::SqmulRoundSat
28702870
| Opcode::FvpromoteLow
28712871
| Opcode::Fvdemote
2872-
| Opcode::ExtendedPairwiseAddSigned
2873-
| Opcode::ExtendedPairwiseAddUnsigned => {
2872+
| Opcode::IaddPairwise => {
28742873
// TODO
28752874
unimplemented!("Vector ops not implemented.");
28762875
}

cranelift/codegen/src/isa/x64/lower.rs

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4927,18 +4927,33 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
49274927
}
49284928
}
49294929
}
4930-
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {
4931-
// Extended pairwise addition instructions computes extended sums within adjacent
4932-
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
4933-
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
4934-
/*
4935-
let input_ty = ctx.input_ty(insn, 0);
4936-
let output_ty = ctx.output_ty(insn, 0);
4937-
let src = put_input_in_reg(ctx, inputs[0]);
4938-
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4939-
unreachable!();
4940-
match op {
4941-
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
4930+
Opcode::IaddPairwise => {
4931+
if let (Some(swiden_low), Some(swiden_high)) = (
4932+
matches_input(ctx, inputs[0], Opcode::SwidenLow),
4933+
matches_input(ctx, inputs[1], Opcode::SwidenHigh),
4934+
) {
4935+
let swiden_input = &[
4936+
InsnInput {
4937+
insn: swiden_low,
4938+
input: 0,
4939+
},
4940+
InsnInput {
4941+
insn: swiden_high,
4942+
input: 0,
4943+
},
4944+
];
4945+
4946+
let input_ty = ctx.input_ty(swiden_low, 0);
4947+
let output_ty = ctx.output_ty(insn, 0);
4948+
let src0 = put_input_in_reg(ctx, swiden_input[0]);
4949+
let src1 = put_input_in_reg(ctx, swiden_input[1]);
4950+
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4951+
if src0 != src1 {
4952+
unimplemented!(
4953+
"iadd_pairwise not implemented for general case with different inputs"
4954+
);
4955+
}
4956+
match (input_ty, output_ty) {
49424957
(types::I8X16, types::I16X8) => {
49434958
static MUL_CONST: [u8; 16] = [0x01; 16];
49444959
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
@@ -4949,7 +4964,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
49494964
RegMem::reg(mul_const_reg.to_reg()),
49504965
dst,
49514966
));
4952-
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
4967+
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
49534968
}
49544969
(types::I16X8, types::I32X4) => {
49554970
static MUL_CONST: [u8; 16] = [
@@ -4959,25 +4974,49 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
49594974
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
49604975
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
49614976
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
4962-
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4977+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
49634978
ctx.emit(Inst::xmm_rm_r(
49644979
SseOpcode::Pmaddwd,
49654980
RegMem::reg(mul_const_reg.to_reg()),
49664981
dst,
49674982
));
49684983
}
4969-
_ => unreachable!(
4970-
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
4971-
input_ty, output_ty, op
4972-
),
4973-
},
4974-
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
4984+
_ => {
4985+
unimplemented!("Type not supported for {:?}", op);
4986+
}
4987+
}
4988+
} else if let (Some(uwiden_low), Some(uwiden_high)) = (
4989+
matches_input(ctx, inputs[0], Opcode::UwidenLow),
4990+
matches_input(ctx, inputs[1], Opcode::UwidenHigh),
4991+
) {
4992+
let uwiden_input = &[
4993+
InsnInput {
4994+
insn: uwiden_low,
4995+
input: 0,
4996+
},
4997+
InsnInput {
4998+
insn: uwiden_high,
4999+
input: 0,
5000+
},
5001+
];
5002+
5003+
let input_ty = ctx.input_ty(uwiden_low, 0);
5004+
let output_ty = ctx.output_ty(insn, 0);
5005+
let src0 = put_input_in_reg(ctx, uwiden_input[0]);
5006+
let src1 = put_input_in_reg(ctx, uwiden_input[1]);
5007+
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
5008+
if src0 != src1 {
5009+
unimplemented!(
5010+
"iadd_pairwise not implemented for general case with different inputs"
5011+
);
5012+
}
5013+
match (input_ty, output_ty) {
49755014
(types::I8X16, types::I16X8) => {
49765015
static MUL_CONST: [u8; 16] = [0x01; 16];
49775016
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
49785017
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
49795018
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4980-
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
5019+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
49815020
ctx.emit(Inst::xmm_rm_r(
49825021
SseOpcode::Pmaddubsw,
49835022
RegMem::reg(mul_const_reg.to_reg()),
@@ -4997,7 +5036,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
49975036
pxor_const_reg,
49985037
types::I16X8,
49995038
));
5000-
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
5039+
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
50015040
ctx.emit(Inst::xmm_rm_r(
50025041
SseOpcode::Pxor,
50035042
RegMem::reg(pxor_const_reg.to_reg()),
@@ -5021,7 +5060,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
50215060
RegMem::reg(madd_const_reg.to_reg()),
50225061
dst,
50235062
));
5024-
50255063
static ADDD_CONST2: [u8; 16] = [
50265064
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
50275065
0x00, 0x00, 0x01, 0x00,
@@ -5040,14 +5078,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
50405078
dst,
50415079
));
50425080
}
5043-
_ => unreachable!(
5044-
"Type pattern not supported {:?}-{:?} not supported for {:?}.",
5045-
input_ty, output_ty, op
5046-
),
5047-
},
5048-
_ => unreachable!("{:?} not supported.", op),
5081+
_ => {
5082+
unimplemented!("Type not supported for {:?}", op);
5083+
}
5084+
}
5085+
} else {
5086+
unimplemented!("Operands not supported for {:?}", op);
50495087
}
5050-
*/
50515088
}
50525089
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
50535090
let input_ty = ctx.input_ty(insn, 0);
0 Bytes
Binary file not shown.

cranelift/interpreter/src/step.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,8 +630,7 @@ where
630630
Opcode::Fence => unimplemented!("Fence"),
631631
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
632632
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
633-
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
634-
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
633+
Opcode::IaddPairwise => unimplemented!("IaddPairwise"),
635634

636635
// TODO: these instructions should be removed once the new backend makes these obsolete
637636
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

cranelift/wasm/src/code_translator.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,19 +1881,27 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
18811881
}
18821882
Operator::I16x8ExtAddPairwiseI8x16S => {
18831883
let a = pop1_with_bitcast(state, I8X16, builder);
1884-
state.push1(builder.ins().extended_pairwise_add_signed(a))
1884+
let widen_low = builder.ins().swiden_low(a);
1885+
let widen_high = builder.ins().swiden_high(a);
1886+
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
18851887
}
18861888
Operator::I32x4ExtAddPairwiseI16x8S => {
18871889
let a = pop1_with_bitcast(state, I16X8, builder);
1888-
state.push1(builder.ins().extended_pairwise_add_signed(a))
1890+
let widen_low = builder.ins().swiden_low(a);
1891+
let widen_high = builder.ins().swiden_high(a);
1892+
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
18891893
}
18901894
Operator::I16x8ExtAddPairwiseI8x16U => {
18911895
let a = pop1_with_bitcast(state, I8X16, builder);
1892-
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1896+
let widen_low = builder.ins().uwiden_low(a);
1897+
let widen_high = builder.ins().uwiden_high(a);
1898+
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
18931899
}
18941900
Operator::I32x4ExtAddPairwiseI16x8U => {
18951901
let a = pop1_with_bitcast(state, I16X8, builder);
1896-
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
1902+
let widen_low = builder.ins().uwiden_low(a);
1903+
let widen_high = builder.ins().uwiden_high(a);
1904+
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
18971905
}
18981906
Operator::F32x4Ceil | Operator::F64x2Ceil => {
18991907
// This is something of a misuse of `type_of`, because that produces the return type

0 commit comments

Comments
 (0)