diff --git a/cranelift/assembler-x64/meta/src/instructions.rs b/cranelift/assembler-x64/meta/src/instructions.rs index 9c677fed3646..ddd811b4edcf 100644 --- a/cranelift/assembler-x64/meta/src/instructions.rs +++ b/cranelift/assembler-x64/meta/src/instructions.rs @@ -14,6 +14,7 @@ mod or; mod shift; mod sqrt; mod sub; +mod unpack; mod xor; use crate::dsl::Inst; @@ -36,5 +37,6 @@ pub fn list() -> Vec { all.extend(sqrt::list()); all.extend(sub::list()); all.extend(xor::list()); + all.extend(unpack::list()); all } diff --git a/cranelift/assembler-x64/meta/src/instructions/unpack.rs b/cranelift/assembler-x64/meta/src/instructions/unpack.rs new file mode 100644 index 000000000000..3bc08a0a1fb6 --- /dev/null +++ b/cranelift/assembler-x64/meta/src/instructions/unpack.rs @@ -0,0 +1,20 @@ +use crate::dsl::{Feature::*, Inst, Location::*}; +use crate::dsl::{fmt, inst, r, rex, rw}; + +#[rustfmt::skip] // Keeps instructions on a single line. +pub fn list() -> Vec { + vec![ + // Vector instructions. + inst("unpcklps", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0xF, 0x14]).r(), _64b | compat | sse), + inst("unpcklpd", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x14]).r(), _64b | compat | sse2), + inst("unpckhps", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0xF, 0x15]).r(), _64b | compat | sse), + inst("punpckhbw", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x68]).r(), _64b | compat | sse2), + inst("punpckhwd", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x69]).r(), _64b | compat | sse2), + inst("punpckhdq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x6A]).r(), _64b | compat | sse2), + inst("punpcklwd", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x61]).r(), _64b | compat | sse2), + inst("punpcklqdq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x6C]).r(), _64b | compat | sse2), + inst("punpcklbw", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x60]).r(), _64b | compat | sse2), + inst("punpckldq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x62]).r(), _64b | compat | sse2), + inst("punpckhqdq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x6D]).r(), _64b | compat | sse2), + ] +} diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 5cc0f94ebb02..a0556013172d 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -816,10 +816,6 @@ Pshufb Pshufd Ptest - Punpckhbw - Punpckhwd - Punpcklbw - Punpcklwd Rcpss Roundps Roundpd @@ -829,13 +825,6 @@ Shufps Ucomiss Ucomisd - Unpcklps - Unpcklpd - Unpckhps - Punpckhdq - Punpckldq - Punpckhqdq - Punpcklqdq Pshuflw Pshufhw Pblendw @@ -3445,7 +3434,7 @@ ;; Helper for creating `punpckhwd` instructions. (decl x64_punpckhwd (Xmm XmmMem) Xmm) (rule 0 (x64_punpckhwd src1 src2) - (xmm_rm_r (SseOpcode.Punpckhwd) src1 src2)) + (x64_punpckhwd_a src1 src2)) (rule 1 (x64_punpckhwd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpckhwd) src1 src2)) @@ -3453,7 +3442,7 @@ ;; Helper for creating `punpcklwd` instructions. (decl x64_punpcklwd (Xmm XmmMem) Xmm) (rule 0 (x64_punpcklwd src1 src2) - (xmm_rm_r (SseOpcode.Punpcklwd) src1 src2)) + (x64_punpcklwd_a src1 src2)) (rule 1 (x64_punpcklwd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpcklwd) src1 src2)) @@ -3461,7 +3450,7 @@ ;; Helper for creating `punpckldq` instructions. (decl x64_punpckldq (Xmm XmmMem) Xmm) (rule 0 (x64_punpckldq src1 src2) - (xmm_rm_r (SseOpcode.Punpckldq) src1 src2)) + (x64_punpckldq_a src1 src2)) (rule 1 (x64_punpckldq src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpckldq) src1 src2)) @@ -3469,7 +3458,7 @@ ;; Helper for creating `punpckhdq` instructions. (decl x64_punpckhdq (Xmm XmmMem) Xmm) (rule 0 (x64_punpckhdq src1 src2) - (xmm_rm_r (SseOpcode.Punpckhdq) src1 src2)) + (x64_punpckhdq_a src1 src2)) (rule 1 (x64_punpckhdq src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpckhdq) src1 src2)) @@ -3477,7 +3466,7 @@ ;; Helper for creating `punpcklqdq` instructions. (decl x64_punpcklqdq (Xmm XmmMem) Xmm) (rule 0 (x64_punpcklqdq src1 src2) - (xmm_rm_r (SseOpcode.Punpcklqdq) src1 src2)) + (x64_punpcklqdq_a src1 src2)) (rule 1 (x64_punpcklqdq src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpcklqdq) src1 src2)) @@ -3485,7 +3474,7 @@ ;; Helper for creating `punpckhqdq` instructions. (decl x64_punpckhqdq (Xmm XmmMem) Xmm) (rule 0 (x64_punpckhqdq src1 src2) - (xmm_rm_r (SseOpcode.Punpckhqdq) src1 src2)) + (x64_punpckhqdq_a src1 src2)) (rule 1 (x64_punpckhqdq src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpckhqdq) src1 src2)) @@ -3493,7 +3482,7 @@ ;; Helper for creating `unpcklps` instructions. (decl x64_unpcklps (Xmm XmmMem) Xmm) (rule 0 (x64_unpcklps src1 src2) - (xmm_rm_r (SseOpcode.Unpcklps) src1 src2)) + (x64_unpcklps_a src1 src2)) (rule 1 (x64_unpcklps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vunpcklps) src1 src2)) @@ -3501,7 +3490,7 @@ ;; Helper for creating `unpcklpd` instructions. (decl x64_unpcklpd (Xmm XmmMem) Xmm) (rule 0 (x64_unpcklpd src1 src2) - (xmm_rm_r (SseOpcode.Unpcklpd) src1 src2)) + (x64_unpcklpd_a src1 src2)) (rule 1 (x64_unpcklpd src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vunpcklpd) src1 src2)) @@ -3509,7 +3498,7 @@ ;; Helper for creating `unpckhps` instructions. (decl x64_unpckhps (Xmm XmmMem) Xmm) (rule 0 (x64_unpckhps src1 src2) - (xmm_rm_r (SseOpcode.Unpckhps) src1 src2)) + (x64_unpckhps_a src1 src2)) (rule 1 (x64_unpckhps src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vunpckhps) src1 src2)) @@ -3804,7 +3793,7 @@ ;; Helper for creating `punpcklbw` instructions. (decl x64_punpcklbw (Xmm XmmMem) Xmm) (rule 0 (x64_punpcklbw src1 src2) - (xmm_rm_r (SseOpcode.Punpcklbw) src1 src2)) + (x64_punpcklbw_a src1 src2)) (rule 1 (x64_punpcklbw src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpcklbw) src1 src2)) @@ -3812,7 +3801,7 @@ ;; Helper for creating `punpckhbw` instructions. (decl x64_punpckhbw (Xmm XmmMem) Xmm) (rule 0 (x64_punpckhbw src1 src2) - (xmm_rm_r (SseOpcode.Punpckhbw) src1 src2)) + (x64_punpckhbw_a src1 src2)) (rule 1 (x64_punpckhbw src1 src2) (if-let true (use_avx)) (xmm_rmir_vex (AvxOpcode.Vpunpckhbw) src1 src2)) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 3543020394e2..9d7ebbd5e2dd 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -886,10 +886,6 @@ pub enum SseOpcode { Pshufb, Pshufd, Ptest, - Punpckhbw, - Punpckhwd, - Punpcklbw, - Punpcklwd, Rcpss, Roundps, Roundpd, @@ -899,13 +895,6 @@ pub enum SseOpcode { Shufps, Ucomiss, Ucomisd, - Unpcklps, - Unpcklpd, - Unpckhps, - Punpckhdq, - Punpckldq, - Punpckhqdq, - Punpcklqdq, Pshuflw, Pshufhw, Pblendw, @@ -929,9 +918,7 @@ impl SseOpcode { | SseOpcode::Rcpss | SseOpcode::Rsqrtss | SseOpcode::Shufps - | SseOpcode::Ucomiss - | SseOpcode::Unpcklps - | SseOpcode::Unpckhps => SSE, + | SseOpcode::Ucomiss => SSE, SseOpcode::Cmppd | SseOpcode::Cmpsd @@ -960,18 +947,9 @@ impl SseOpcode { | SseOpcode::Pmullw | SseOpcode::Pmuludq | SseOpcode::Pshufd - | SseOpcode::Punpckhbw - | SseOpcode::Punpckhwd - | SseOpcode::Punpcklbw - | SseOpcode::Punpcklwd | SseOpcode::Ucomisd - | SseOpcode::Punpckldq - | SseOpcode::Punpckhdq - | SseOpcode::Punpcklqdq - | SseOpcode::Punpckhqdq | SseOpcode::Pshuflw - | SseOpcode::Pshufhw - | SseOpcode::Unpcklpd => SSE2, + | SseOpcode::Pshufhw => SSE2, SseOpcode::Pabsb | SseOpcode::Pabsw @@ -1109,10 +1087,6 @@ impl fmt::Debug for SseOpcode { SseOpcode::Pshufb => "pshufb", SseOpcode::Pshufd => "pshufd", SseOpcode::Ptest => "ptest", - SseOpcode::Punpckhbw => "punpckhbw", - SseOpcode::Punpckhwd => "punpckhwd", - SseOpcode::Punpcklbw => "punpcklbw", - SseOpcode::Punpcklwd => "punpcklwd", SseOpcode::Rcpss => "rcpss", SseOpcode::Roundps => "roundps", SseOpcode::Roundpd => "roundpd", @@ -1122,17 +1096,10 @@ impl fmt::Debug for SseOpcode { SseOpcode::Shufps => "shufps", SseOpcode::Ucomiss => "ucomiss", SseOpcode::Ucomisd => "ucomisd", - SseOpcode::Unpcklps => "unpcklps", - SseOpcode::Unpckhps => "unpckhps", - SseOpcode::Punpckldq => "punpckldq", - SseOpcode::Punpckhdq => "punpckhdq", - SseOpcode::Punpcklqdq => "punpcklqdq", - SseOpcode::Punpckhqdq => "punpckhqdq", SseOpcode::Pshuflw => "pshuflw", SseOpcode::Pshufhw => "pshufhw", SseOpcode::Pblendw => "pblendw", SseOpcode::Movddup => "movddup", - SseOpcode::Unpcklpd => "unpcklpd", }; write!(fmt, "{name}") } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 315e30727fcf..efebb1980d6e 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1779,18 +1779,7 @@ pub(crate) fn emit( SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2), SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2), SseOpcode::Pshufb => (LegacyPrefixes::_66, 0x0F3800, 3), - SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2), - SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2), - SseOpcode::Punpcklbw => (LegacyPrefixes::_66, 0x0F60, 2), - SseOpcode::Punpcklwd => (LegacyPrefixes::_66, 0x0F61, 2), - SseOpcode::Punpckldq => (LegacyPrefixes::_66, 0x0F62, 2), - SseOpcode::Punpcklqdq => (LegacyPrefixes::_66, 0x0F6C, 2), - SseOpcode::Punpckhdq => (LegacyPrefixes::_66, 0x0F6A, 2), - SseOpcode::Punpckhqdq => (LegacyPrefixes::_66, 0x0F6D, 2), - SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2), - SseOpcode::Unpckhps => (LegacyPrefixes::None, 0x0F15, 2), SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2), - SseOpcode::Unpcklpd => (LegacyPrefixes::_66, 0x0F14, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 47c246403816..a5cd6a15412b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2791,36 +2791,6 @@ fn test_x64_emit() { "packuswb %xmm4, %xmm9, %xmm4", )); - insns.push(( - Inst::xmm_rm_r(SseOpcode::Punpckhbw, RegMem::reg(xmm3), w_xmm2), - "660F68D3", - "punpckhbw %xmm2, %xmm3, %xmm2", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Punpckhwd, RegMem::reg(xmm13), w_xmm2), - "66410F69D5", - "punpckhwd %xmm2, %xmm13, %xmm2", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Punpcklbw, RegMem::reg(xmm1), w_xmm8), - "66440F60C1", - "punpcklbw %xmm8, %xmm1, %xmm8", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Punpcklwd, RegMem::reg(xmm11), w_xmm8), - "66450F61C3", - "punpcklwd %xmm8, %xmm11, %xmm8", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2), - "410F14D3", - "unpcklps %xmm2, %xmm11, %xmm2", - )); - // ======================================================== // XMM_RM_R: Integer Conversion diff --git a/cranelift/filetests/filetests/isa/x64/bitcast.clif b/cranelift/filetests/filetests/isa/x64/bitcast.clif index 7d7747ee4aed..a9f8fe52999a 100644 --- a/cranelift/filetests/filetests/isa/x64/bitcast.clif +++ b/cranelift/filetests/filetests/isa/x64/bitcast.clif @@ -194,7 +194,7 @@ block0(v0: i128): ; block0: ; movq %rdi, %xmm0 ; movq %rsi, %xmm5 -; punpcklqdq %xmm0, %xmm5, %xmm0 +; punpcklqdq %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -252,7 +252,7 @@ block0(v0: i128): ; block0: ; movq %rdi, %xmm0 ; movq %rsi, %xmm5 -; punpcklqdq %xmm0, %xmm5, %xmm0 +; punpcklqdq %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index 2b3143df6258..e711555f8407 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -338,7 +338,7 @@ block0(v0: i32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; unpcklps %xmm0, const(0), %xmm0 +; unpcklps (%rip), %xmm0 ; subpd (%rip), %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -1215,7 +1215,7 @@ block0(v0: i64x2): ; pshufd $238, %xmm6, %xmm2 ; movq %xmm2, %rcx ; cvtsi2sdq %rcx, %xmm1 -; unpcklpd %xmm0, %xmm1, %xmm0 +; unpcklpd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/shuffle.clif b/cranelift/filetests/filetests/isa/x64/shuffle.clif index 7bc1fce08d9f..82c3546a9fe4 100644 --- a/cranelift/filetests/filetests/isa/x64/shuffle.clif +++ b/cranelift/filetests/filetests/isa/x64/shuffle.clif @@ -11,7 +11,7 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpcklbw %xmm0, %xmm1, %xmm0 +; punpcklbw %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -36,7 +36,7 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpckhbw %xmm0, %xmm1, %xmm0 +; punpckhbw %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -64,7 +64,7 @@ block0(v0: i16x8, v1: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpcklwd %xmm0, %xmm1, %xmm0 +; punpcklwd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -92,7 +92,7 @@ block0(v0: i16x8, v1: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpckhwd %xmm0, %xmm1, %xmm0 +; punpckhwd %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -232,7 +232,7 @@ block0(v0: i32x4, v1: i32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpckldq %xmm0, %xmm1, %xmm0 +; punpckldq %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -260,7 +260,7 @@ block0(v0: i32x4, v1: i32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpckhdq %xmm0, %xmm1, %xmm0 +; punpckhdq %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -288,7 +288,7 @@ block0(v0: i64x2, v1: i64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpcklqdq %xmm0, %xmm1, %xmm0 +; punpcklqdq %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -316,7 +316,7 @@ block0(v0: i64x2, v1: i64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; punpckhqdq %xmm0, %xmm1, %xmm0 +; punpckhqdq %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index c746ea267497..568cb65a51fb 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -603,8 +603,8 @@ block0(v0: i32): ; movdqu const(0), %xmm1 ; andq $0x7, %rdi ; movdqa %xmm1, %xmm0 -; punpcklbw %xmm0, %xmm1, %xmm0 -; punpckhbw %xmm1, %xmm1, %xmm1 +; punpcklbw %xmm1, %xmm0 +; punpckhbw %xmm1, %xmm1 ; addl $0x8, %edi ; movd %edi, %xmm3 ; psraw %xmm3, %xmm0 @@ -653,8 +653,8 @@ block0(v0: i8x16, v1: i32): ; movq %rsp, %rbp ; block0: ; movdqa %xmm0, %xmm6 -; punpcklbw %xmm6, %xmm0, %xmm6 -; punpckhbw %xmm0, %xmm0, %xmm0 +; punpcklbw %xmm0, %xmm6 +; punpckhbw %xmm0, %xmm0 ; movdqa %xmm0, %xmm4 ; movdqa %xmm6, %xmm0 ; psraw $0xb, %xmm0 @@ -749,7 +749,7 @@ block0(v0: i64x2): ; pshufd $237, %xmm2, %xmm4 ; psrlq $0x1, %xmm0 ; pshufd $232, %xmm0, %xmm0 -; punpckldq %xmm0, %xmm4, %xmm0 +; punpckldq %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -784,7 +784,7 @@ block0(v0: i64x2): ; psrad $0x1f, %xmm0 ; pshufd $237, %xmm0, %xmm6 ; movdqa %xmm5, %xmm0 -; punpckldq %xmm0, %xmm6, %xmm0 +; punpckldq %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -819,7 +819,7 @@ block0(v0: i64x2): ; pshufd $237, %xmm2, %xmm4 ; psrad $0x16, %xmm0 ; pshufd $237, %xmm0, %xmm0 -; punpckldq %xmm0, %xmm4, %xmm0 +; punpckldq %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -855,7 +855,7 @@ block0(v0: i64x2): ; pshufd $237, %xmm2, %xmm4 ; psrad $0x4, %xmm0 ; pshufd $237, %xmm0, %xmm0 -; punpckldq %xmm0, %xmm4, %xmm0 +; punpckldq %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif b/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif index cc6aea11e202..e9c1ce6508b4 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-widen-mul.clif @@ -53,7 +53,7 @@ block0(v0: i16x8, v1: i16x8): ; pmulhw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 -; punpckhwd %xmm0, %xmm2, %xmm0 +; punpckhwd %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -152,7 +152,7 @@ block0(v0: i16x8, v1: i16x8): ; pmulhw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 -; punpcklwd %xmm0, %xmm2, %xmm0 +; punpcklwd %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -217,10 +217,10 @@ block0(v0: i8x16, v1: i8x16): ; block0: ; uninit %xmm2 ; pxor %xmm2, %xmm2 -; punpckhbw %xmm0, %xmm2, %xmm0 +; punpckhbw %xmm2, %xmm0 ; uninit %xmm2 ; pxor %xmm2, %xmm2 -; punpckhbw %xmm1, %xmm2, %xmm1 +; punpckhbw %xmm2, %xmm1 ; pmullw %xmm1, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -257,7 +257,7 @@ block0(v0: i16x8, v1: i16x8): ; pmulhuw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 -; punpckhwd %xmm0, %xmm2, %xmm0 +; punpckhwd %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -356,7 +356,7 @@ block0(v0: i16x8, v1: i16x8): ; pmulhuw %xmm1, %xmm0 ; movdqa %xmm0, %xmm2 ; movdqa %xmm5, %xmm0 -; punpcklwd %xmm0, %xmm2, %xmm0 +; punpcklwd %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif b/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif index 1c8a52625654..8e2e734c606a 100644 --- a/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif @@ -15,7 +15,7 @@ block0(v0: i64, v2: i8x16): ; movdqu 80(%rdi), %xmm0 ; uninit %xmm4 ; pxor %xmm4, %xmm4 -; punpckhbw %xmm0, %xmm4, %xmm0 +; punpckhbw %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/widening.clif b/cranelift/filetests/filetests/isa/x64/widening.clif index 6c239a7e213d..60549a617e65 100644 --- a/cranelift/filetests/filetests/isa/x64/widening.clif +++ b/cranelift/filetests/filetests/isa/x64/widening.clif @@ -244,7 +244,7 @@ block0(v0: i8x16): ; block0: ; uninit %xmm3 ; pxor %xmm3, %xmm3 -; punpckhbw %xmm0, %xmm3, %xmm0 +; punpckhbw %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -272,7 +272,7 @@ block0(v0: i16x8): ; block0: ; uninit %xmm3 ; pxor %xmm3, %xmm3 -; punpckhwd %xmm0, %xmm3, %xmm0 +; punpckhwd %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -300,7 +300,7 @@ block0(v0: i32x4): ; block0: ; uninit %xmm3 ; xorps %xmm3, %xmm3 -; unpckhps %xmm0, %xmm3, %xmm0 +; unpckhps %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret