diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index b2eb08953ad3..61561b5c6b5f 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -428,7 +428,6 @@ impl WastTest { "misc_testsuite/simd/issue_3327_bnot_lowering.wast", "spec_testsuite/simd_bit_shift.wast", "spec_testsuite/simd_boolean.wast", - "spec_testsuite/simd_const.wast", "spec_testsuite/simd_conversions.wast", "spec_testsuite/simd_f32x4.wast", "spec_testsuite/simd_f32x4_arith.wast", @@ -499,6 +498,7 @@ impl WastTest { "misc_testsuite/simd/unaligned-load.wast", "multi-memory/simd_memory-multi.wast", "misc_testsuite/simd/issue4807.wast", + "spec_testsuite/simd_const.wast", ]; if unsupported.iter().any(|part| self.path.ends_with(part)) { diff --git a/tests/disas/winch/x64/i16x8/add/add.wat b/tests/disas/winch/x64/i16x8/add/add.wat new file mode 100644 index 000000000000..e2ac03e2fd69 --- /dev/null +++ b/tests/disas/winch/x64/i16x8/add/add.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.add + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8/add/add_sat_s.wat b/tests/disas/winch/x64/i16x8/add/add_sat_s.wat new file mode 100644 index 000000000000..0f5076182406 --- /dev/null +++ b/tests/disas/winch/x64/i16x8/add/add_sat_s.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.add_sat_s + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddsw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8/add/add_sat_u.wat b/tests/disas/winch/x64/i16x8/add/add_sat_u.wat new file mode 100644 index 000000000000..91044110f38c --- /dev/null +++ b/tests/disas/winch/x64/i16x8/add/add_sat_u.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.add_sat_u + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddusw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8_extract_lane_s/const_avx.wat b/tests/disas/winch/x64/i16x8/extract_lane_s/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i16x8_extract_lane_s/const_avx.wat rename to tests/disas/winch/x64/i16x8/extract_lane_s/const_avx.wat diff --git a/tests/disas/winch/x64/i16x8_extract_lane_u/const.wat b/tests/disas/winch/x64/i16x8/extract_lane_u/const.wat similarity index 100% rename from tests/disas/winch/x64/i16x8_extract_lane_u/const.wat rename to tests/disas/winch/x64/i16x8/extract_lane_u/const.wat diff --git a/tests/disas/winch/x64/i16x8/mul/mul.wat b/tests/disas/winch/x64/i16x8/mul/mul.wat new file mode 100644 index 000000000000..2b2a0193b2b4 --- /dev/null +++ b/tests/disas/winch/x64/i16x8/mul/mul.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.mul + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpmullw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8_replace_lane/const_avx.wat b/tests/disas/winch/x64/i16x8/replace_lane/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i16x8_replace_lane/const_avx.wat rename to tests/disas/winch/x64/i16x8/replace_lane/const_avx.wat diff --git a/tests/disas/winch/x64/i16x8_replace_lane/param_avx.wat b/tests/disas/winch/x64/i16x8/replace_lane/param_avx.wat similarity index 100% rename from tests/disas/winch/x64/i16x8_replace_lane/param_avx.wat rename to tests/disas/winch/x64/i16x8/replace_lane/param_avx.wat diff --git a/tests/disas/winch/x64/i16x8_splat/const_avx2.wat b/tests/disas/winch/x64/i16x8/splat/const_avx2.wat similarity index 100% rename from tests/disas/winch/x64/i16x8_splat/const_avx2.wat rename to tests/disas/winch/x64/i16x8/splat/const_avx2.wat diff --git a/tests/disas/winch/x64/i16x8_splat/param_avx2.wat b/tests/disas/winch/x64/i16x8/splat/param_avx2.wat similarity index 100% rename from tests/disas/winch/x64/i16x8_splat/param_avx2.wat rename to tests/disas/winch/x64/i16x8/splat/param_avx2.wat diff --git a/tests/disas/winch/x64/i16x8/sub/sub.wat b/tests/disas/winch/x64/i16x8/sub/sub.wat new file mode 100644 index 000000000000..b028d315fba6 --- /dev/null +++ b/tests/disas/winch/x64/i16x8/sub/sub.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.sub + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8/sub/sub_sat_s.wat b/tests/disas/winch/x64/i16x8/sub/sub_sat_s.wat new file mode 100644 index 000000000000..c2a60bb6a66d --- /dev/null +++ b/tests/disas/winch/x64/i16x8/sub/sub_sat_s.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.sub_sat_s + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubsw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8/sub/sub_sat_u.wat b/tests/disas/winch/x64/i16x8/sub/sub_sat_u.wat new file mode 100644 index 000000000000..9826ba671b6e --- /dev/null +++ b/tests/disas/winch/x64/i16x8/sub/sub_sat_u.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i16x8.sub_sat_u + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubusw %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4/add/add.wat b/tests/disas/winch/x64/i32x4/add/add.wat new file mode 100644 index 000000000000..4143e0eae2d4 --- /dev/null +++ b/tests/disas/winch/x64/i32x4/add/add.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i32x4.add + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddd %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4_extract_lane/const_avx.wat b/tests/disas/winch/x64/i32x4/extract_lane/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i32x4_extract_lane/const_avx.wat rename to tests/disas/winch/x64/i32x4/extract_lane/const_avx.wat diff --git a/tests/disas/winch/x64/i32x4/mul/mul.wat b/tests/disas/winch/x64/i32x4/mul/mul.wat new file mode 100644 index 000000000000..bbdacb23e55d --- /dev/null +++ b/tests/disas/winch/x64/i32x4/mul/mul.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i32x4.mul + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpmulld %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %bh, (%rcx) +;; 51: addl $0, %eax +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4_replace_lane/const_avx.wat b/tests/disas/winch/x64/i32x4/replace_lane/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i32x4_replace_lane/const_avx.wat rename to tests/disas/winch/x64/i32x4/replace_lane/const_avx.wat diff --git a/tests/disas/winch/x64/i32x4_replace_lane/param_avx.wat b/tests/disas/winch/x64/i32x4/replace_lane/param_avx.wat similarity index 100% rename from tests/disas/winch/x64/i32x4_replace_lane/param_avx.wat rename to tests/disas/winch/x64/i32x4/replace_lane/param_avx.wat diff --git a/tests/disas/winch/x64/i32x4_splat/const_avx2.wat b/tests/disas/winch/x64/i32x4/splat/const_avx2.wat similarity index 100% rename from tests/disas/winch/x64/i32x4_splat/const_avx2.wat rename to tests/disas/winch/x64/i32x4/splat/const_avx2.wat diff --git a/tests/disas/winch/x64/i32x4_splat/param_avx2.wat b/tests/disas/winch/x64/i32x4/splat/param_avx2.wat similarity index 100% rename from tests/disas/winch/x64/i32x4_splat/param_avx2.wat rename to tests/disas/winch/x64/i32x4/splat/param_avx2.wat diff --git a/tests/disas/winch/x64/i32x4/sub/sub.wat b/tests/disas/winch/x64/i32x4/sub/sub.wat new file mode 100644 index 000000000000..72e77c7d94d3 --- /dev/null +++ b/tests/disas/winch/x64/i32x4/sub/sub.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i32x4.sub + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubd %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2/add/add.wat b/tests/disas/winch/x64/i64x2/add/add.wat new file mode 100644 index 000000000000..2014f6992ae5 --- /dev/null +++ b/tests/disas/winch/x64/i64x2/add/add.wat @@ -0,0 +1,37 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i64x2.add + (i64x2.splat (i64.const 10)) + (i64x2.splat (i64.const 10)) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x48 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpshufd $0x44, 0x1b(%rip), %xmm0 +;; vpshufd $0x44, 0x12(%rip), %xmm1 +;; vpaddq %xmm1, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 48: ud2 +;; 4a: addb %al, (%rax) +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: orb (%rax), %al +;; 52: addb %al, (%rax) +;; 54: addb %al, (%rax) +;; 56: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2_extract_lane/const.wat b/tests/disas/winch/x64/i64x2/extract_lane/const.wat similarity index 100% rename from tests/disas/winch/x64/i64x2_extract_lane/const.wat rename to tests/disas/winch/x64/i64x2/extract_lane/const.wat diff --git a/tests/disas/winch/x64/i64x2/mul/mul.wat b/tests/disas/winch/x64/i64x2/mul/mul.wat new file mode 100644 index 000000000000..8d86f137cbb4 --- /dev/null +++ b/tests/disas/winch/x64/i64x2/mul/mul.wat @@ -0,0 +1,36 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx512vl", "-Ccranelift-has-avx", "-Ccranelift-has-avx512dq", ] + +(module + (memory 1 1) + (func (result v128) + (i64x2.mul + (i64x2.splat (i64.const 10)) + (i64x2.splat (i64.const 10)) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpshufd $0x44, 0x1b(%rip), %xmm0 +;; vpshufd $0x44, 0x12(%rip), %xmm1 +;; vpmullq %xmm1, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: orb (%rax), %al +;; 52: addb %al, (%rax) +;; 54: addb %al, (%rax) +;; 56: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2/mul/mul_fallback.wat b/tests/disas/winch/x64/i64x2/mul/mul_fallback.wat new file mode 100644 index 000000000000..d344433ce2a2 --- /dev/null +++ b/tests/disas/winch/x64/i64x2/mul/mul_fallback.wat @@ -0,0 +1,40 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (param v128 v128) (result v128) + (i64x2.mul + (local.get 0) + (local.get 1) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x30, %r11 +;; cmpq %rsp, %r11 +;; ja 0x72 +;; 1c: movq %rdi, %r14 +;; subq $0x30, %rsp +;; movq %rdi, 0x28(%rsp) +;; movq %rsi, 0x20(%rsp) +;; movdqu %xmm0, 0x10(%rsp) +;; movdqu %xmm1, (%rsp) +;; movdqu (%rsp), %xmm0 +;; movdqu 0x10(%rsp), %xmm1 +;; vpsrlq $0x20, %xmm1, %xmm15 +;; vpmuldq %xmm0, %xmm15, %xmm2 +;; vpsrlq $0x20, %xmm0, %xmm15 +;; vpmuludq %xmm1, %xmm15, %xmm15 +;; vpaddq %xmm2, %xmm15, %xmm15 +;; vpsllq $0x20, %xmm15, %xmm15 +;; vpmuludq %xmm0, %xmm1, %xmm2 +;; vpaddq %xmm2, %xmm15, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x30, %rsp +;; popq %rbp +;; retq +;; 72: ud2 diff --git a/tests/disas/winch/x64/i64x2_replace_lane/const_avx.wat b/tests/disas/winch/x64/i64x2/replace_lane/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i64x2_replace_lane/const_avx.wat rename to tests/disas/winch/x64/i64x2/replace_lane/const_avx.wat diff --git a/tests/disas/winch/x64/i64x2_replace_lane/param_avx.wat b/tests/disas/winch/x64/i64x2/replace_lane/param_avx.wat similarity index 100% rename from tests/disas/winch/x64/i64x2_replace_lane/param_avx.wat rename to tests/disas/winch/x64/i64x2/replace_lane/param_avx.wat diff --git a/tests/disas/winch/x64/i64x2_splat/const_avx.wat b/tests/disas/winch/x64/i64x2/splat/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i64x2_splat/const_avx.wat rename to tests/disas/winch/x64/i64x2/splat/const_avx.wat diff --git a/tests/disas/winch/x64/i64x2_splat/param_avx.wat b/tests/disas/winch/x64/i64x2/splat/param_avx.wat similarity index 100% rename from tests/disas/winch/x64/i64x2_splat/param_avx.wat rename to tests/disas/winch/x64/i64x2/splat/param_avx.wat diff --git a/tests/disas/winch/x64/i64x2/sub/sub.wat b/tests/disas/winch/x64/i64x2/sub/sub.wat new file mode 100644 index 000000000000..d1d47143ce98 --- /dev/null +++ b/tests/disas/winch/x64/i64x2/sub/sub.wat @@ -0,0 +1,37 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i64x2.sub + (i64x2.splat (i64.const 10)) + (i64x2.splat (i64.const 10)) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x48 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; vpshufd $0x44, 0x1b(%rip), %xmm0 +;; vpshufd $0x44, 0x12(%rip), %xmm1 +;; vpsubq %xmm1, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 48: ud2 +;; 4a: addb %al, (%rax) +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: orb (%rax), %al +;; 52: addb %al, (%rax) +;; 54: addb %al, (%rax) +;; 56: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/add/add.wat b/tests/disas/winch/x64/i8x16/add/add.wat new file mode 100644 index 000000000000..dff418defad0 --- /dev/null +++ b/tests/disas/winch/x64/i8x16/add/add.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i8x16.add + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddb %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/add/add_sat_s.wat b/tests/disas/winch/x64/i8x16/add/add_sat_s.wat new file mode 100644 index 000000000000..c03ae6e3e33d --- /dev/null +++ b/tests/disas/winch/x64/i8x16/add/add_sat_s.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i8x16.add_sat_s + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddsb %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/add/add_sat_u.wat b/tests/disas/winch/x64/i8x16/add/add_sat_u.wat new file mode 100644 index 000000000000..460b2ef6f692 --- /dev/null +++ b/tests/disas/winch/x64/i8x16/add/add_sat_u.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i8x16.add_sat_u + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpaddusb %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16_extract_lane_s/const_avx.wat b/tests/disas/winch/x64/i8x16/extract_lane_s/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_extract_lane_s/const_avx.wat rename to tests/disas/winch/x64/i8x16/extract_lane_s/const_avx.wat diff --git a/tests/disas/winch/x64/i8x16_extract_lane_u/const_avx.wat b/tests/disas/winch/x64/i8x16/extract_lane_u/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_extract_lane_u/const_avx.wat rename to tests/disas/winch/x64/i8x16/extract_lane_u/const_avx.wat diff --git a/tests/disas/winch/x64/i8x16_replace_lane/const_avx.wat b/tests/disas/winch/x64/i8x16/replace_lane/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_replace_lane/const_avx.wat rename to tests/disas/winch/x64/i8x16/replace_lane/const_avx.wat diff --git a/tests/disas/winch/x64/i8x16_replace_lane/param_avx.wat b/tests/disas/winch/x64/i8x16/replace_lane/param_avx.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_replace_lane/param_avx.wat rename to tests/disas/winch/x64/i8x16/replace_lane/param_avx.wat diff --git a/tests/disas/winch/x64/i8x16_shuffle/const_avx.wat b/tests/disas/winch/x64/i8x16/shuffle/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_shuffle/const_avx.wat rename to tests/disas/winch/x64/i8x16/shuffle/const_avx.wat diff --git a/tests/disas/winch/x64/i8x16_splat/const_avx2.wat b/tests/disas/winch/x64/i8x16/splat/const_avx2.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_splat/const_avx2.wat rename to tests/disas/winch/x64/i8x16/splat/const_avx2.wat diff --git a/tests/disas/winch/x64/i8x16_splat/param_avx2.wat b/tests/disas/winch/x64/i8x16/splat/param_avx2.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_splat/param_avx2.wat rename to tests/disas/winch/x64/i8x16/splat/param_avx2.wat diff --git a/tests/disas/winch/x64/i8x16/sub/sub.wat b/tests/disas/winch/x64/i8x16/sub/sub.wat new file mode 100644 index 000000000000..11633deb12f9 --- /dev/null +++ b/tests/disas/winch/x64/i8x16/sub/sub.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i8x16.sub + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubb %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/sub/sub_sat_s.wat b/tests/disas/winch/x64/i8x16/sub/sub_sat_s.wat new file mode 100644 index 000000000000..a580be69ce1a --- /dev/null +++ b/tests/disas/winch/x64/i8x16/sub/sub_sat_s.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i8x16.sub_sat_s + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubsb %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/sub/sub_sat_u.wat b/tests/disas/winch/x64/i8x16/sub/sub_sat_u.wat new file mode 100644 index 000000000000..7e3197f95a9b --- /dev/null +++ b/tests/disas/winch/x64/i8x16/sub/sub_sat_u.wat @@ -0,0 +1,45 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (memory 1 1) + (func (result v128) + (i8x16.sub_sat_u + (v128.const i64x2 42 42) + (v128.const i64x2 1337 1337) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpsubusb %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: cmpl %eax, (%rip) +;; 56: addb %al, (%rax) +;; 58: cmpl %eax, (%rip) +;; 5e: addb %al, (%rax) +;; 60: subb (%rax), %al +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: subb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16_swizzle/const_avx.wat b/tests/disas/winch/x64/i8x16/swizzle/const_avx.wat similarity index 100% rename from tests/disas/winch/x64/i8x16_swizzle/const_avx.wat rename to tests/disas/winch/x64/i8x16/swizzle/const_avx.wat diff --git a/winch/codegen/src/codegen/error.rs b/winch/codegen/src/codegen/error.rs index 344a514e1d0c..2a3d652aa940 100644 --- a/winch/codegen/src/codegen/error.rs +++ b/winch/codegen/src/codegen/error.rs @@ -26,6 +26,12 @@ pub(crate) enum CodeGenError { /// Unimplemented due to requiring AVX2. #[error("Instruction not implemented for CPUs without AVX2 support")] UnimplementedForNoAvx2, + /// Unimplemented due to requiring AVX512VL. + #[error("Instruction not implemented for CPUs without AVX512VL support")] + UnimplementedForNoAvx512VL, + /// Unimplemented due to requiring AVX512DQ. + #[error("Instruction not implemented for CPUs without AVX512DQ support")] + UnimplementedForNoAvx512DQ, /// Unsupported eager initialization of tables. #[error("Unsupported eager initialization of tables")] UnsupportedTableEagerInit, diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 7957cf47ff40..fef5dccfef45 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -13,10 +13,10 @@ use crate::{ CallingConvention, }, masm::{ - CalleeKind, DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, Imm as I, - IntCmpKind, LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, RemKind, - ReplaceLaneKind, RmwOp, RoundingMode, SPOffset, ShiftKind, SplatKind, StackSlot, StoreKind, - TrapCode, TruncKind, VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS, + CalleeKind, DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, HandleOverflowKind, + Imm as I, IntCmpKind, LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, + RemKind, ReplaceLaneKind, RmwOp, RoundingMode, SPOffset, ShiftKind, SplatKind, StackSlot, + StoreKind, TrapCode, TruncKind, VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS, UNTRUSTED_FLAGS, }, stack::TypedReg, @@ -1101,6 +1101,36 @@ impl Masm for MacroAssembler { fn v128_any_true(&mut self, _src: Reg, _dst: WritableReg) -> Result<()> { Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) } + + fn v128_add( + &mut self, + _lhs: Reg, + _rhs: Reg, + _dst: WritableReg, + _size: OperandSize, + _handle_overflow: HandleOverflowKind, + ) -> Result<()> { + Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) + } + + fn v128_sub( + &mut self, + _lhs: Reg, + _rhs: Reg, + _dst: WritableReg, + _size: OperandSize, + _handle_overflow: HandleOverflowKind, + ) -> Result<()> { + Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) + } + + fn v128_mul( + &mut self, + _context: &mut CodeGenContext, + _lane_width: OperandSize, + ) -> Result<()> { + Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) + } } impl MacroAssembler { diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index 1249b0314c2b..ad08f9e8e7ac 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -18,10 +18,10 @@ use cranelift_codegen::{ unwind::UnwindInst, x64::{ args::{ - self, AluRmiROpcode, Amode, AvxOpcode, CmpOpcode, DivSignedness, ExtMode, - FenceKind, FromWritableReg, Gpr, GprMem, GprMemImm, Imm8Gpr, Imm8Reg, RegMem, - RegMemImm, ShiftKind as CraneliftShiftKind, SseOpcode, SyntheticAmode, WritableGpr, - WritableXmm, Xmm, XmmMem, XmmMemAligned, XmmMemImm, CC, + self, AluRmiROpcode, Amode, Avx512Opcode, AvxOpcode, CmpOpcode, DivSignedness, + ExtMode, FenceKind, FromWritableReg, Gpr, GprMem, GprMemImm, Imm8Gpr, Imm8Reg, + RegMem, RegMemImm, ShiftKind as CraneliftShiftKind, SseOpcode, SyntheticAmode, + WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemAligned, XmmMemImm, CC, }, encoding::rex::{encode_modrm, RexFlags}, settings as x64_settings, AtomicRmwSeqOp, EmitInfo, EmitState, Inst, @@ -1880,7 +1880,13 @@ impl Assembler { } } - pub fn xmm_rmi_rvex(&mut self, op: AvxOpcode, src1: Reg, src2: Reg, dst: WritableReg) { + pub fn xmm_rmi_rvex( + &mut self, + op: AvxOpcode, + src1: Reg, + src2: impl Into, + dst: WritableReg, + ) { self.emit(Inst::XmmRmiRVex { op, src1: src1.into(), @@ -2075,6 +2081,23 @@ impl Assembler { }, }); } + + pub(crate) fn xmm_rm_rvex3( + &mut self, + op: Avx512Opcode, + src1: Reg, + src2: Reg, + dst: WritableReg, + ) { + self.emit(Inst::XmmRmREvex3 { + op, + // `src1` reuses `dst`, and is ignored in emission + src1: dst.to_reg().into(), + src2: src1.into(), + src3: src2.into(), + dst: dst.map(Into::into), + }); + } } /// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted, diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 78f1811899db..460038b07ccf 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -7,10 +7,10 @@ use super::{ use anyhow::{anyhow, bail, Result}; use crate::masm::{ - DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, Imm as I, IntCmpKind, LaneSelector, - LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, RemKind, ReplaceLaneKind, - RmwOp, RoundingMode, ShiftKind, SplatKind, StoreKind, TrapCode, TruncKind, VectorCompareKind, - VectorEqualityKind, Zero, TRUSTED_FLAGS, UNTRUSTED_FLAGS, + DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, HandleOverflowKind, Imm as I, + IntCmpKind, LaneSelector, LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, + RemKind, ReplaceLaneKind, RmwOp, RoundingMode, ShiftKind, SplatKind, StoreKind, TrapCode, + TruncKind, VectorCompareKind, VectorEqualityKind, Zero, TRUSTED_FLAGS, UNTRUSTED_FLAGS, }; use crate::{ abi::{self, align_to, calculate_frame_adjustment, LocalSlot}, @@ -34,7 +34,7 @@ use cranelift_codegen::{ isa::{ unwind::UnwindInst, x64::{ - args::{AvxOpcode, FenceKind, CC}, + args::{Avx512Opcode, AvxOpcode, FenceKind, RegMemImm, XmmMemImm, CC}, settings as x64_settings, AtomicRmwSeqOp, }, }, @@ -1882,6 +1882,195 @@ impl Masm for MacroAssembler { self.asm.setcc(IntCmpKind::Ne, dst); Ok(()) } + + fn v128_add( + &mut self, + lhs: Reg, + rhs: Reg, + dst: WritableReg, + size: OperandSize, + handle_overflow_kind: HandleOverflowKind, + ) -> Result<()> { + self.ensure_has_avx()?; + + let op = match handle_overflow_kind { + HandleOverflowKind::None => match size { + OperandSize::S8 => AvxOpcode::Vpaddb, + OperandSize::S16 => AvxOpcode::Vpaddw, + OperandSize::S32 => AvxOpcode::Vpaddd, + OperandSize::S64 => AvxOpcode::Vpaddq, + OperandSize::S128 => bail!(CodeGenError::unexpected_operand_size()), + }, + HandleOverflowKind::SignedSaturating => match size { + OperandSize::S8 => AvxOpcode::Vpaddsb, + OperandSize::S16 => AvxOpcode::Vpaddsw, + _ => bail!(CodeGenError::unexpected_operand_size()), + }, + HandleOverflowKind::UnsignedSaturating => match size { + OperandSize::S8 => AvxOpcode::Vpaddusb, + OperandSize::S16 => AvxOpcode::Vpaddusw, + _ => bail!(CodeGenError::unexpected_operand_size()), + }, + }; + + self.asm.xmm_rmi_rvex(op, lhs, rhs, dst); + + Ok(()) + } + + fn v128_sub( + &mut self, + lhs: Reg, + rhs: Reg, + dst: WritableReg, + size: OperandSize, + handle_overflow_kind: HandleOverflowKind, + ) -> Result<()> { + self.ensure_has_avx()?; + + let op = match handle_overflow_kind { + HandleOverflowKind::None => match size { + OperandSize::S8 => AvxOpcode::Vpsubb, + OperandSize::S16 => AvxOpcode::Vpsubw, + OperandSize::S32 => AvxOpcode::Vpsubd, + OperandSize::S64 => AvxOpcode::Vpsubq, + OperandSize::S128 => bail!(CodeGenError::unexpected_operand_size()), + }, + HandleOverflowKind::SignedSaturating => match size { + OperandSize::S8 => AvxOpcode::Vpsubsb, + OperandSize::S16 => AvxOpcode::Vpsubsw, + _ => bail!(CodeGenError::unexpected_operand_size()), + }, + HandleOverflowKind::UnsignedSaturating => match size { + OperandSize::S8 => AvxOpcode::Vpsubusb, + OperandSize::S16 => AvxOpcode::Vpsubusw, + _ => bail!(CodeGenError::unexpected_operand_size()), + }, + }; + + self.asm.xmm_rmi_rvex(op, lhs, rhs, dst); + + Ok(()) + } + + fn v128_mul( + &mut self, + context: &mut CodeGenContext, + lane_width: OperandSize, + ) -> Result<()> { + self.ensure_has_avx()?; + + let rhs = context.pop_to_reg(self, None)?; + let lhs = context.pop_to_reg(self, None)?; + + let mul_avx = |this: &mut Self, op| { + this.asm + .xmm_rmi_rvex(op, lhs.reg, rhs.reg, writable!(lhs.reg)); + }; + + let mul_i64x2_avx512 = |this: &mut Self| { + this.asm + .xmm_rm_rvex3(Avx512Opcode::Vpmullq, lhs.reg, rhs.reg, writable!(lhs.reg)); + }; + + let mul_i64x2_fallback = + |this: &mut Self, context: &mut CodeGenContext| -> Result<()> { + // Standard AVX doesn't have an instruction for i64x2 multiplication, instead, we have to fallback + // to an instruction sequence using 32bits multiplication (taken from cranelift + // implementation, in `isa/x64/lower.isle`): + // + // > Otherwise, for i64x2 multiplication we describe a lane A as being composed of + // > a 32-bit upper half "Ah" and a 32-bit lower half "Al". The 32-bit long hand + // > multiplication can then be written as: + // + // > Ah Al + // > * Bh Bl + // > ----- + // > Al * Bl + // > + (Ah * Bl) << 32 + // > + (Al * Bh) << 32 + // + // > So for each lane we will compute: + // + // > A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32 + // + // > Note, the algorithm will use `pmuludq` which operates directly on the lower + // > 32-bit (`Al` or `Bl`) of a lane and writes the result to the full 64-bits of + // > the lane of the destination. For this reason we don't need shifts to isolate + // > the lower 32-bits, however, we will need to use shifts to isolate the high + // > 32-bits when doing calculations, i.e., `Ah == A >> 32`. + + let tmp1 = regs::scratch_xmm(); + let tmp2 = context.any_fpr(this)?; + + // tmp1 = lhs_hi = (lhs >> 32) + this.asm.xmm_rmi_rvex( + AvxOpcode::Vpsrlq, + lhs.reg, + XmmMemImm::unwrap_new(RegMemImm::imm(32)), + writable!(tmp1), + ); + // tmp2 = lhs_hi * rhs_low = tmp1 * rhs + this.asm + .xmm_rmi_rvex(AvxOpcode::Vpmuldq, tmp1, rhs.reg, writable!(tmp2)); + + // tmp1 = rhs_hi = rhs >> 32 + this.asm.xmm_rmi_rvex( + AvxOpcode::Vpsrlq, + rhs.reg, + XmmMemImm::unwrap_new(RegMemImm::imm(32)), + writable!(tmp1), + ); + + // tmp1 = lhs_low * rhs_high = tmp1 * lhs + this.asm + .xmm_rmi_rvex(AvxOpcode::Vpmuludq, tmp1, lhs.reg, writable!(tmp1)); + + // tmp1 = ((lhs_hi * rhs_low) + (lhs_lo * rhs_hi)) = tmp1 + tmp2 + this.asm + .xmm_rmi_rvex(AvxOpcode::Vpaddq, tmp1, tmp2, writable!(tmp1)); + + //tmp1 = tmp1 << 32 + this.asm.xmm_rmi_rvex( + AvxOpcode::Vpsllq, + tmp1, + XmmMemImm::unwrap_new(RegMemImm::imm(32)), + writable!(tmp1), + ); + + // tmp2 = lhs_lo + rhs_lo + this.asm + .xmm_rmi_rvex(AvxOpcode::Vpmuludq, lhs.reg, rhs.reg, writable!(tmp2)); + + // finally, with `lhs` as destination: + // lhs = (lhs_low * rhs_low) + ((lhs_hi * rhs_low) + (lhs_lo * rhs_hi)) = tmp1 + tmp2 + this.asm + .xmm_rmi_rvex(AvxOpcode::Vpaddq, tmp1, tmp2, writable!(lhs.reg)); + + context.free_reg(tmp2); + + Ok(()) + }; + + match lane_width { + OperandSize::S16 => mul_avx(self, AvxOpcode::Vpmullw), + OperandSize::S32 => mul_avx(self, AvxOpcode::Vpmulld), + // This is the fast path when AVX512 is available. + OperandSize::S64 + if self.ensure_has_avx512vl().is_ok() && self.ensure_has_avx512dq().is_ok() => + { + mul_i64x2_avx512(self) + } + // Otherwise, we emit AVX fallback sequence. + OperandSize::S64 => mul_i64x2_fallback(self, context)?, + _ => bail!(CodeGenError::unexpected_operand_size()), + } + + context.stack.push(lhs.into()); + context.free_reg(rhs); + + Ok(()) + } } impl MacroAssembler { @@ -1923,6 +2112,22 @@ impl MacroAssembler { Ok(()) } + fn ensure_has_avx512vl(&self) -> Result<()> { + anyhow::ensure!( + self.flags.has_avx512vl(), + CodeGenError::UnimplementedForNoAvx512VL + ); + Ok(()) + } + + fn ensure_has_avx512dq(&self) -> Result<()> { + anyhow::ensure!( + self.flags.has_avx512dq(), + CodeGenError::UnimplementedForNoAvx512DQ + ); + Ok(()) + } + fn increment_sp(&mut self, bytes: u32) { self.sp_offset += bytes; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index c9ed5cebc0b3..55a70419dd9e 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -225,6 +225,16 @@ pub(crate) enum Extend { __Kind(T), } +/// How to handle overflow. +pub enum HandleOverflowKind { + /// Do nothing. + None, + /// Perform signed saturation. + SignedSaturating, + /// Perform unsigned saturation. + UnsignedSaturating, +} + impl From> for ExtendKind { fn from(value: Extend) -> Self { ExtendKind::Unsigned(value) @@ -1645,4 +1655,38 @@ pub(crate) trait MacroAssembler { /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise. fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>; + + /// Perform a vector add between `lsh` and `rhs`, placing the result in `dst`, where each lane + /// is interpreted to be `lane_width` long. + /// + /// `handle_overflow` determines how overflow should be handled. + fn v128_add( + &mut self, + lhs: Reg, + rhs: Reg, + dst: WritableReg, + lane_width: OperandSize, + handle_overflow: HandleOverflowKind, + ) -> Result<()>; + + /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`, where each lane + /// is interpreted to be `lane_width` long. + /// + /// `handle_overflow` determines how overflow should be handled. + fn v128_sub( + &mut self, + lhs: Reg, + rhs: Reg, + dst: WritableReg, + lane_width: OperandSize, + handle_overflow: HandleOverflowKind, + ) -> Result<()>; + + /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`, where each lane + /// is interpreted to be `size` long. + fn v128_mul( + &mut self, + context: &mut CodeGenContext, + lane_width: OperandSize, + ) -> Result<()>; } diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index e966bba27ab3..136437ba45e6 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -10,10 +10,10 @@ use crate::codegen::{ FnCall, }; use crate::masm::{ - DivKind, Extend, ExtractLaneKind, FloatCmpKind, IntCmpKind, LoadKind, MacroAssembler, - MemMoveDirection, MulWideKind, OperandSize, RegImm, RemKind, ReplaceLaneKind, RmwOp, - RoundingMode, SPOffset, ShiftKind, Signed, SplatKind, SplatLoadKind, StoreKind, TruncKind, - V128LoadExtendKind, VectorCompareKind, VectorEqualityKind, Zero, + DivKind, Extend, ExtractLaneKind, FloatCmpKind, HandleOverflowKind, IntCmpKind, LoadKind, + MacroAssembler, MemMoveDirection, MulWideKind, OperandSize, RegImm, RemKind, ReplaceLaneKind, + RmwOp, RoundingMode, SPOffset, ShiftKind, Signed, SplatKind, SplatLoadKind, StoreKind, + TruncKind, V128LoadExtendKind, VectorCompareKind, VectorEqualityKind, Zero, }; use crate::reg::{writable, Reg}; @@ -418,6 +418,25 @@ macro_rules! def_unsupported { (emit V128Store16Lane $($rest:tt)*) => {}; (emit V128Store32Lane $($rest:tt)*) => {}; (emit V128Store64Lane $($rest:tt)*) => {}; + (emit I8x16Add $($rest:tt)*) => {}; + (emit I16x8Add $($rest:tt)*) => {}; + (emit I32x4Add $($rest:tt)*) => {}; + (emit I64x2Add $($rest:tt)*) => {}; + (emit I8x16Sub $($rest:tt)*) => {}; + (emit I16x8Sub $($rest:tt)*) => {}; + (emit I32x4Sub $($rest:tt)*) => {}; + (emit I64x2Sub $($rest:tt)*) => {}; + (emit I16x8Mul $($rest:tt)*) => {}; + (emit I32x4Mul $($rest:tt)*) => {}; + (emit I64x2Mul $($rest:tt)*) => {}; + (emit I8x16AddSatS $($rest:tt)*) => {}; + (emit I16x8AddSatS $($rest:tt)*) => {}; + (emit I8x16AddSatU $($rest:tt)*) => {}; + (emit I16x8AddSatU $($rest:tt)*) => {}; + (emit I8x16SubSatS $($rest:tt)*) => {}; + (emit I16x8SubSatS $($rest:tt)*) => {}; + (emit I8x16SubSatU $($rest:tt)*) => {}; + (emit I16x8SubSatU $($rest:tt)*) => {}; (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } @@ -3520,6 +3539,194 @@ where self.emit_wasm_store(&arg, StoreKind::vector_lane(lane, OperandSize::S64)) } + fn visit_i8x16_add(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S8, |masm, dst, src, size| { + masm.v128_add(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_add(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S16, |masm, dst, src, size| { + masm.v128_add(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i32x4_add(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S32, |masm, dst, src, size| { + masm.v128_add(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i64x2_add(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S64, |masm, dst, src, size| { + masm.v128_add(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i8x16_sub(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S8, |masm, dst, src, size| { + masm.v128_sub(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_sub(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S16, |masm, dst, src, size| { + masm.v128_sub(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i32x4_sub(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S32, |masm, dst, src, size| { + masm.v128_sub(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i64x2_sub(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S64, |masm, dst, src, size| { + masm.v128_sub(dst, src, writable!(dst), size, HandleOverflowKind::None)?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_mul(&mut self) -> Self::Output { + self.masm.v128_mul(&mut self.context, OperandSize::S16) + } + + fn visit_i32x4_mul(&mut self) -> Self::Output { + self.masm.v128_mul(&mut self.context, OperandSize::S32) + } + + fn visit_i64x2_mul(&mut self) -> Self::Output { + self.masm.v128_mul(&mut self.context, OperandSize::S64) + } + + fn visit_i8x16_add_sat_s(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S8, |masm, dst, src, size| { + masm.v128_add( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::SignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_add_sat_s(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S16, |masm, dst, src, size| { + masm.v128_add( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::SignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i8x16_add_sat_u(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S8, |masm, dst, src, size| { + masm.v128_add( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::UnsignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_add_sat_u(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S16, |masm, dst, src, size| { + masm.v128_add( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::UnsignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i8x16_sub_sat_s(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S8, |masm, dst, src, size| { + masm.v128_sub( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::SignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_sub_sat_s(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S16, |masm, dst, src, size| { + masm.v128_sub( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::SignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i8x16_sub_sat_u(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S8, |masm, dst, src, size| { + masm.v128_sub( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::UnsignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + + fn visit_i16x8_sub_sat_u(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S16, |masm, dst, src, size| { + masm.v128_sub( + dst, + src, + writable!(dst), + size, + HandleOverflowKind::UnsignedSaturating, + )?; + Ok(TypedReg::new(WasmValType::V128, dst)) + }) + } + wasmparser::for_each_visit_simd_operator!(def_unsupported); }