From 90f9ccd2b589925f9ecd496fdf6bf312db19f0d0 Mon Sep 17 00:00:00 2001 From: Peter Goodman Date: Wed, 4 Nov 2020 20:55:17 -0500 Subject: [PATCH] New x86 instructions (#457) * New x86 instructions * Add some isels * Fixes Issue #376 * Fixes Issue #433. Thanks @adahsuzixin for the semantics and tests * Fixes Issue #374 * Minor fix to the semantics for VINSERTF128, it should only look at the low bit of imm8 * Minor fixes for sparc isel naming --- include/remill/Arch/Runtime/Operators.h | 10 +- include/remill/BC/Compat/CTypes.h | 4 + lib/Arch/SPARC32/Extract.cpp | 20 +-- lib/Arch/SPARC32/Semantics/FOP.cpp | 2 +- lib/Arch/SPARC64/Extract.cpp | 20 +-- lib/Arch/SPARC64/Semantics/FOP.cpp | 8 +- lib/Arch/X86/Runtime/Instructions.cpp | 29 ++++ lib/Arch/X86/Semantics/AVX.cpp | 28 +++- lib/Arch/X86/Semantics/DATAXFER.cpp | 59 +++++--- lib/Arch/X86/Semantics/LOGICAL.cpp | 7 +- lib/Arch/X86/Semantics/MMX.cpp | 173 ++++++++++++++++++++++-- lib/Arch/X86/Semantics/SEMAPHORE.cpp | 38 +++--- lib/Arch/X86/Semantics/SSE.cpp | 155 +++++++++++++++++---- tests/AArch64/CMakeLists.txt | 10 +- tests/X86/AVX/VINSERTF128.S | 75 ++++++++++ tests/X86/CMakeLists.txt | 7 + tests/X86/DATAXFER/MOVHLPS.S | 37 +++++ tests/X86/SEMAPHORE/CMPXCHG.S | 64 +++++++-- tests/X86/SSE/UNPCKHPS.S | 60 ++++++++ tests/X86/Tests.S | 3 + 20 files changed, 690 insertions(+), 119 deletions(-) create mode 100644 tests/X86/AVX/VINSERTF128.S create mode 100644 tests/X86/DATAXFER/MOVHLPS.S create mode 100644 tests/X86/SSE/UNPCKHPS.S diff --git a/include/remill/Arch/Runtime/Operators.h b/include/remill/Arch/Runtime/Operators.h index 280f358ee..5dc4f3ec6 100644 --- a/include/remill/Arch/Runtime/Operators.h +++ b/include/remill/Arch/Runtime/Operators.h @@ -1457,11 +1457,15 @@ ALWAYS_INLINE static Memory *__remill_write_memory_128(Memory *mem, addr_t addr, return mem; } +// Issue #374: https://github.com/lifting-bits/remill/issues/374 +// +// The builtins may have defined or undefined behavior given a zero, depending +// on the target arch. #define MAKE_BUILTIN(name, size, input_size, builtin, disp) \ ALWAYS_INLINE static uint##size##_t name(uint##size##_t val) { \ - return static_cast( \ - builtin(static_cast(val))) - \ - static_cast(disp); \ + const auto in_val = static_cast(val); \ + return in_val ? (static_cast(builtin(in_val)) - \ + static_cast(disp)) : size; \ } MAKE_BUILTIN(CountLeadingZeros, 8, 32, __builtin_clz, 24) diff --git a/include/remill/BC/Compat/CTypes.h b/include/remill/BC/Compat/CTypes.h index 0a9657a2f..47c23b094 100644 --- a/include/remill/BC/Compat/CTypes.h +++ b/include/remill/BC/Compat/CTypes.h @@ -16,10 +16,14 @@ #pragma once +#if __has_include() #include + namespace llvm { // TODO(pag): This is a rather ugly hack; had some issues with anvill not // compiling on macOS due to these C types. struct LLVMOpaqueNamedMDNode; using LLVMNamedMDNodeRef = struct LLVMOpaqueNamedMDNode *; } // namespace llvm + +#endif diff --git a/lib/Arch/SPARC32/Extract.cpp b/lib/Arch/SPARC32/Extract.cpp index aae9f9a54..81e0722b0 100644 --- a/lib/Arch/SPARC32/Extract.cpp +++ b/lib/Arch/SPARC32/Extract.cpp @@ -1132,11 +1132,11 @@ DEFINE_FUNCTION(FMOVQ, SZERO, QWORD, QWORD) DEFINE_FUNCTION(FMULS, SWORD, SWORD, SWORD) DEFINE_FUNCTION(FMULD, DWORD, DWORD, DWORD) DEFINE_FUNCTION(FMULQ, QWORD, QWORD, QWORD) -DEFINE_FUNCTION(FsMULD, SWORD, SWORD, DWORD) -DEFINE_FUNCTION(FdMULQ, DWORD, DWORD, QWORD) -DEFINE_FUNCTION(FxTOs, SZERO, DWORD, SWORD) -DEFINE_FUNCTION(FxTOd, SZERO, DWORD, DWORD) -DEFINE_FUNCTION(FxTOq, SZERO, DWORD, QWORD) +DEFINE_FUNCTION(FSMULD, SWORD, SWORD, DWORD) +DEFINE_FUNCTION(FDMULQ, DWORD, DWORD, QWORD) +DEFINE_FUNCTION(FXTOS, SZERO, DWORD, SWORD) +DEFINE_FUNCTION(FXTOD, SZERO, DWORD, DWORD) +DEFINE_FUNCTION(FXTOQ, SZERO, DWORD, QWORD) DEFINE_FUNCTION(FITOS, SZERO, SWORD, SWORD) DEFINE_FUNCTION(FITOD, SZERO, SWORD, DWORD) DEFINE_FUNCTION(FITOQ, SZERO, SWORD, QWORD) @@ -1270,12 +1270,12 @@ static bool (*const kop10_op352Level[1u << 8])(Instruction &, uint32_t) = { [0b01100110] = nullptr, [0b01100111] = nullptr, [0b01101000] = nullptr, - [0b01101001] = TryDecodeFsMULD, + [0b01101001] = TryDecodeFSMULD, [0b01101010] = nullptr, [0b01101011] = nullptr, [0b01101100] = nullptr, [0b01101101] = nullptr, - [0b01101110] = TryDecodeFdMULQ, + [0b01101110] = TryDecodeFDMULQ, [0b01101111] = nullptr, [0b01110000] = nullptr, [0b01110001] = nullptr, @@ -1297,15 +1297,15 @@ static bool (*const kop10_op352Level[1u << 8])(Instruction &, uint32_t) = { [0b10000001] = TryDecodeFSTOX, [0b10000010] = TryDecodeFDTOX, [0b10000011] = TryDecodeFQTOX, - [0b10000100] = TryDecodeFxTOs, + [0b10000100] = TryDecodeFXTOS, [0b10000101] = nullptr, [0b10000110] = nullptr, [0b10000111] = nullptr, - [0b10001000] = TryDecodeFxTOd, + [0b10001000] = TryDecodeFXTOD, [0b10001001] = nullptr, [0b10001010] = nullptr, [0b10001011] = nullptr, - [0b10001100] = TryDecodeFxTOq, + [0b10001100] = TryDecodeFXTOQ, [0b10001101] = nullptr, [0b10001110] = nullptr, [0b10001111] = nullptr, diff --git a/lib/Arch/SPARC32/Semantics/FOP.cpp b/lib/Arch/SPARC32/Semantics/FOP.cpp index 135e75338..815264fcf 100644 --- a/lib/Arch/SPARC32/Semantics/FOP.cpp +++ b/lib/Arch/SPARC32/Semantics/FOP.cpp @@ -188,7 +188,7 @@ DEF_ISEL(FDIVD) = FDIVD; DEF_ISEL(FDIVQ) = FDIVD; DEF_ISEL(FSMULD) = FsMULD; -DEF_ISEL(FDMULQ) = FsMULD; +DEF_ISEL(FDMULQ) = FdMULQ; namespace { diff --git a/lib/Arch/SPARC64/Extract.cpp b/lib/Arch/SPARC64/Extract.cpp index 211da3641..f540506f4 100644 --- a/lib/Arch/SPARC64/Extract.cpp +++ b/lib/Arch/SPARC64/Extract.cpp @@ -1460,11 +1460,11 @@ DEFINE_FUNCTION(FMOVQ, SZERO, QWORD, QWORD) DEFINE_FUNCTION(FMULS, SWORD, SWORD, SWORD) DEFINE_FUNCTION(FMULD, DWORD, DWORD, DWORD) DEFINE_FUNCTION(FMULQ, QWORD, QWORD, QWORD) -DEFINE_FUNCTION(FsMULD, SWORD, SWORD, DWORD) -DEFINE_FUNCTION(FdMULQ, DWORD, DWORD, QWORD) -DEFINE_FUNCTION(FxTOs, SZERO, DWORD, SWORD) -DEFINE_FUNCTION(FxTOd, SZERO, DWORD, DWORD) -DEFINE_FUNCTION(FxTOq, SZERO, DWORD, QWORD) +DEFINE_FUNCTION(FSMULD, SWORD, SWORD, DWORD) +DEFINE_FUNCTION(FDMULQ, DWORD, DWORD, QWORD) +DEFINE_FUNCTION(FXTOS, SZERO, DWORD, SWORD) +DEFINE_FUNCTION(FXTOD, SZERO, DWORD, DWORD) +DEFINE_FUNCTION(FXTOQ, SZERO, DWORD, QWORD) DEFINE_FUNCTION(FITOS, SZERO, SWORD, SWORD) DEFINE_FUNCTION(FITOD, SZERO, SWORD, DWORD) DEFINE_FUNCTION(FITOQ, SZERO, SWORD, QWORD) @@ -1596,12 +1596,12 @@ static bool (*const kop10_op352Level[1u << 8])(Instruction &, uint32_t) = { [0b01100110] = nullptr, [0b01100111] = nullptr, [0b01101000] = nullptr, - [0b01101001] = TryDecodeFsMULD, + [0b01101001] = TryDecodeFSMULD, [0b01101010] = nullptr, [0b01101011] = nullptr, [0b01101100] = nullptr, [0b01101101] = nullptr, - [0b01101110] = TryDecodeFdMULQ, + [0b01101110] = TryDecodeFDMULQ, [0b01101111] = nullptr, [0b01110000] = nullptr, [0b01110001] = nullptr, @@ -1623,15 +1623,15 @@ static bool (*const kop10_op352Level[1u << 8])(Instruction &, uint32_t) = { [0b10000001] = TryDecodeFSTOX, [0b10000010] = TryDecodeFDTOX, [0b10000011] = TryDecodeFQTOX, - [0b10000100] = TryDecodeFxTOs, + [0b10000100] = TryDecodeFXTOS, [0b10000101] = nullptr, [0b10000110] = nullptr, [0b10000111] = nullptr, - [0b10001000] = TryDecodeFxTOd, + [0b10001000] = TryDecodeFXTOD, [0b10001001] = nullptr, [0b10001010] = nullptr, [0b10001011] = nullptr, - [0b10001100] = TryDecodeFxTOq, + [0b10001100] = TryDecodeFXTOQ, [0b10001101] = nullptr, [0b10001110] = nullptr, [0b10001111] = nullptr, diff --git a/lib/Arch/SPARC64/Semantics/FOP.cpp b/lib/Arch/SPARC64/Semantics/FOP.cpp index ea98f4955..390fa6f3c 100644 --- a/lib/Arch/SPARC64/Semantics/FOP.cpp +++ b/lib/Arch/SPARC64/Semantics/FOP.cpp @@ -318,7 +318,7 @@ DEF_ISEL(FDIVD) = FDIVD; DEF_ISEL(FMULS) = FMULS; DEF_ISEL(FMULD) = FMULD; -DEF_ISEL(FsMULD) = FsMULD; +DEF_ISEL(FSMULD) = FsMULD; namespace { @@ -447,9 +447,9 @@ DEF_ISEL(FDTOS) = FDTOS; DEF_ISEL(FITOS) = FITOS; DEF_ISEL(FITOD) = FITOD; -DEF_ISEL(FxTOs) = FXTOS; -DEF_ISEL(FxTOd) = FXTOD; -DEF_ISEL(FxTOq) = FXTOQ; +DEF_ISEL(FXTOS) = FXTOS; +DEF_ISEL(FXTOD) = FXTOD; +DEF_ISEL(FXTOQ) = FXTOQ; DEF_ISEL(FSQRTS) = FSQRTS; DEF_ISEL(FSQRTD) = FSQRTD; diff --git a/lib/Arch/X86/Runtime/Instructions.cpp b/lib/Arch/X86/Runtime/Instructions.cpp index 77a1f5368..ce9dae4b9 100644 --- a/lib/Arch/X86/Runtime/Instructions.cpp +++ b/lib/Arch/X86/Runtime/Instructions.cpp @@ -164,6 +164,35 @@ DEF_HELPER(PopFromStack)->T { Write(REG_XSP, new_xsp); return val; } + +DEF_HELPER(SquareRoot32, float32_t src_float) -> float32_t { + auto square_root = src_float; + + // Special cases for invalid square root operations. See Intel manual, Table E-10. + if (IsNaN(src_float)) { + // If src is SNaN, return the SNaN converted to a QNaN: + if (IsSignalingNaN(src_float)) { + nan32_t temp_nan = {src_float}; + temp_nan.is_quiet_nan = 1; // equivalent to a bitwise OR with 0x00400000 + square_root = temp_nan.f; + + // Else, src is a QNaN. Pass it directly to the result: + } else { + square_root = src_float; + } + } else { // a number, that is, not a NaN + // A negative operand (except -0.0) results in the QNaN indefinite value. + if (IsNegative(src_float) && src_float != -0.0) { + uint32_t indef_qnan = 0xFFC00000U; + square_root = reinterpret_cast(indef_qnan); + } else { + square_root = std::sqrt(src_float); + } + } + + return square_root; +} + } // namespace // clang-format off diff --git a/lib/Arch/X86/Semantics/AVX.cpp b/lib/Arch/X86/Semantics/AVX.cpp index 78b926d19..4f1837b6b 100644 --- a/lib/Arch/X86/Semantics/AVX.cpp +++ b/lib/Arch/X86/Semantics/AVX.cpp @@ -27,9 +27,6 @@ DEF_SEM(DoVZEROUPPER) { } return memory; } -} // namespace - -namespace { template DEF_SEM(VPBROADCASTB, D dst, S1 src1) { @@ -45,8 +42,33 @@ DEF_SEM(VPBROADCASTB, D dst, S1 src1) { return memory; } +template +DEF_SEM(VINSERTF128, VV256W dst, V256 src1, S2 src2, I8 src3) { + auto dst_vec = UReadV128(src1); + auto src2_vec = UReadV128(src2); + auto src3_i8 = Read(src3); + auto i = static_cast(src3_i8 & 1u); + dst_vec = UInsertV128(dst_vec, i, UExtractV128(src2_vec, 0)); + UWriteV128(dst, dst_vec); + return memory; +} + +//template +//DEF_SEM(VINSERTF128, VV512W dst, V512 src1, S2 src2, I8 src3) { +// auto dst_vec = UReadV128(src1); +// auto src2_vec = UReadV128(src2); +// auto src3_i8 = Read(src3); +// std::size_t i = static_cast(src3_i8 & 3u); +// dst_vec = UInsertV128(dst_vec, i, UExtractV128(src2_vec, 0)); +// UWriteV128(dst, dst_vec); +// return memory; +//} + } // namespace +DEF_ISEL(VINSERTF128_YMMqq_YMMqq_MEMdq_IMMb) = VINSERTF128; +DEF_ISEL(VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb) = VINSERTF128; + DEF_ISEL(VZEROUPPER) = DoVZEROUPPER; DEF_ISEL(VPBROADCASTB_YMMqq_XMMb) = VPBROADCASTB; diff --git a/lib/Arch/X86/Semantics/DATAXFER.cpp b/lib/Arch/X86/Semantics/DATAXFER.cpp index 17d270e95..72bd1d467 100644 --- a/lib/Arch/X86/Semantics/DATAXFER.cpp +++ b/lib/Arch/X86/Semantics/DATAXFER.cpp @@ -93,21 +93,23 @@ DEF_SEM(MOVLPS, D dst, S src) { return memory; } -template -DEF_SEM(MOVLHPS, D dst, S src) { - - /* DEST[63:0] (Unmodified) */ - /* DEST[127:64] ← SRC[63:0] */ - /* DEST[VLMAX-1:128] (Unmodified) */ - - float64v2_t temp_vec = {}; - temp_vec = FInsertV64(temp_vec, 0, FExtractV64(FReadV64(dst), 0)); - temp_vec = FInsertV64(temp_vec, 1, FExtractV64(FReadV64(src), 0)); - - FWriteV64(dst, temp_vec); +DEF_SEM(MOVLHPS, V128W dst, V128 src) { + auto res = FReadV32(dst); + auto src1 = FReadV32(src); + res = FInsertV32(res, 2, FExtractV32(src1, 0)); + res = FInsertV32(res, 3, FExtractV32(src1, 1)); + FWriteV32(dst, res); return memory; } +DEF_SEM(MOVHLPS, V128W dst, V128 src) { + auto res = FReadV32(dst); + auto src1 = FReadV32(src); + res = FInsertV32(res, 0, FExtractV32(src1, 2)); + res = FInsertV32(res, 1, FExtractV32(src1, 3)); + FWriteV32(dst, res); + return memory; +} template DEF_SEM(MOVLPD, D dst, S src) { @@ -136,14 +138,32 @@ DEF_SEM(VMOVLHPS, VV128W dst, V128 src1, V128 src2) { /* DEST[127:64] ← SRC2[63:0] */ /* DEST[VLMAX-1:128] ← 0 */ - float64v2_t temp_vec = {}; - temp_vec = FInsertV64(temp_vec, 0, FExtractV64(FReadV64(src1), 0)); - temp_vec = FInsertV64(temp_vec, 1, FExtractV64(FReadV64(src2), 0)); + auto src1_vec = FReadV32(src1); + auto src2_vec = FReadV32(src2); - FWriteV64(dst, temp_vec); + float32v4_t temp_vec = {}; + temp_vec = FInsertV32(temp_vec, 0, FExtractV32(src1_vec, 0)); + temp_vec = FInsertV32(temp_vec, 1, FExtractV32(src1_vec, 1)); + temp_vec = FInsertV32(temp_vec, 2, FExtractV32(src2_vec, 0)); + temp_vec = FInsertV32(temp_vec, 3, FExtractV32(src2_vec, 1)); + + FWriteV32(dst, temp_vec); return memory; } +DEF_SEM(VMOVHLPS, VV128W dst, V128 src1, V128 src2) { + auto src1_vec = FReadV32(src1); + auto src2_vec = FReadV32(src2); + + float32v4_t temp_vec = {}; + temp_vec = FInsertV32(temp_vec, 0, FExtractV32(src2_vec, 2)); + temp_vec = FInsertV32(temp_vec, 1, FExtractV32(src2_vec, 3)); + temp_vec = FInsertV32(temp_vec, 2, FExtractV32(src1_vec, 2)); + temp_vec = FInsertV32(temp_vec, 3, FExtractV32(src1_vec, 3)); + + FWriteV32(dst, temp_vec); + return memory; +} #endif // HAS_FEATURE_AVX @@ -447,8 +467,13 @@ DEF_ISEL(MOVLPS_XMMq_MEMq) = MOVLPS; IF_AVX(DEF_ISEL(VMOVLPS_MEMq_XMMq) = MOVLPS;) IF_AVX(DEF_ISEL(VMOVLPS_XMMdq_XMMdq_MEMq) = VMOVLPS;) -DEF_ISEL(MOVLHPS_XMMq_XMMq) = MOVLHPS; +DEF_ISEL(MOVHLPS_XMMq_XMMq) = MOVHLPS; +IF_AVX(DEF_ISEL(VMOVHLPS_XMMdq_XMMq_XMMq) = VMOVHLPS;) +IF_AVX(DEF_ISEL(VMOVHLPS_XMMdq_XMMdq_XMMdq) = VMOVHLPS;) + +DEF_ISEL(MOVLHPS_XMMq_XMMq) = MOVLHPS; IF_AVX(DEF_ISEL(VMOVLHPS_XMMdq_XMMq_XMMq) = VMOVLHPS;) +IF_AVX(DEF_ISEL(VMOVLHPS_XMMdq_XMMdq_XMMdq) = VMOVLHPS;) #if HAS_FEATURE_AVX # if HAS_FEATURE_AVX512 diff --git a/lib/Arch/X86/Semantics/LOGICAL.cpp b/lib/Arch/X86/Semantics/LOGICAL.cpp index c1583c95c..409890adf 100644 --- a/lib/Arch/X86/Semantics/LOGICAL.cpp +++ b/lib/Arch/X86/Semantics/LOGICAL.cpp @@ -232,8 +232,8 @@ IF_AVX(DEF_ISEL(VPXOR_YMMqq_YMMqq_YMMqq) = PXOR;) DEF_ISEL(XORPD_XMMpd_MEMpd) = PXOR_64; DEF_ISEL(XORPD_XMMpd_XMMpd) = PXOR_64; -DEF_ISEL(XORPD_XMMxuq_XMMxuq) = PXOR_64; DEF_ISEL(XORPD_XMMxuq_MEMxuq) = PXOR_64; +DEF_ISEL(XORPD_XMMxuq_XMMxuq) = PXOR_64; DEF_ISEL(XORPS_XMMps_MEMps) = PXOR; DEF_ISEL(XORPS_XMMps_XMMps) = PXOR; @@ -259,6 +259,8 @@ IF_AVX(DEF_ISEL(VPAND_YMMqq_YMMqq_YMMqq) = PAND;) DEF_ISEL(ANDPD_XMMpd_MEMpd) = PAND_64; DEF_ISEL(ANDPD_XMMpd_XMMpd) = PAND_64; + +DEF_ISEL(ANDPD_XMMxuq_MEMxuq) = PAND_64; DEF_ISEL(ANDPD_XMMxuq_XMMxuq) = PAND_64; DEF_ISEL(ANDPS_XMMps_MEMps) = PAND; @@ -285,6 +287,7 @@ IF_AVX(DEF_ISEL(VPANDN_YMMqq_YMMqq_YMMqq) = PANDN;) DEF_ISEL(ANDNPD_XMMpd_MEMpd) = PANDN_64; DEF_ISEL(ANDNPD_XMMpd_XMMpd) = PANDN_64; +DEF_ISEL(ANDNPD_XMMxuq_MEMxuq) = PANDN_64; DEF_ISEL(ANDNPD_XMMxuq_XMMxuq) = PANDN_64; DEF_ISEL(ANDNPS_XMMps_MEMps) = PANDN; @@ -311,10 +314,12 @@ IF_AVX(DEF_ISEL(VPOR_YMMqq_YMMqq_YMMqq) = POR;) DEF_ISEL(ORPD_XMMpd_MEMpd) = POR_64; DEF_ISEL(ORPD_XMMpd_XMMpd) = POR_64; +DEF_ISEL(ORPD_XMMxuq_MEMxuq) = POR_64; DEF_ISEL(ORPD_XMMxuq_XMMxuq) = POR_64; DEF_ISEL(ORPS_XMMps_MEMps) = POR; DEF_ISEL(ORPS_XMMps_XMMps) = POR; +DEF_ISEL(ORPS_XMMxud_MEMxud) = POR; DEF_ISEL(ORPS_XMMxud_XMMxud) = POR; IF_AVX(DEF_ISEL(VORPD_XMMdq_XMMdq_MEMdq) = POR_64;) IF_AVX(DEF_ISEL(VORPD_XMMdq_XMMdq_XMMdq) = POR_64;) diff --git a/lib/Arch/X86/Semantics/MMX.cpp b/lib/Arch/X86/Semantics/MMX.cpp index c6934fe51..362d668d2 100644 --- a/lib/Arch/X86/Semantics/MMX.cpp +++ b/lib/Arch/X86/Semantics/MMX.cpp @@ -1922,6 +1922,171 @@ DEF_ISEL(PINSRW_XMMdq_GPR32_IMMb) = PINSRW; IF_AVX(DEF_ISEL(VPINSRW_XMMdq_XMMdq_MEMw_IMMb) = PINSRW); IF_AVX(DEF_ISEL(VPINSRW_XMMdq_XMMdq_GPR32d_IMMb) = PINSRW); +namespace { + +template +DEF_SEM(PFMUL, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + FWriteV32(dst, FMulV32(src1, src2)); + return memory; +} + +template +DEF_SEM(PFADD, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + FWriteV32(dst, FAddV32(src1, src2)); + return memory; +} + +template +DEF_SEM(PFSUB, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + FWriteV32(dst, FSubV32(src1, src2)); + return memory; +} + +template +DEF_SEM(PFSUBR, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src); + auto src2 = FReadV32(src_dst); + FWriteV32(dst, FSubV32(src1, src2)); + return memory; +} + +template +DEF_SEM(PFMAX, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + auto out = src1; + _Pragma("unroll") + for (auto i = 0u; i < 2; ++i) { + auto s1_val = FExtractV32(src1, i); + auto s2_val = FExtractV32(src2, i); + if (!std::isunordered(s1_val, s2_val) && s2_val > s1_val) { + out = FInsertV32(out, i, s2_val); + } + } + FWriteV32(dst, out); + return memory; +} + +template +DEF_SEM(PFMIN, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + auto out = src1; + _Pragma("unroll") + for (auto i = 0u; i < 2; ++i) { + auto s1_val = FExtractV32(src1, i); + auto s2_val = FExtractV32(src2, i); + if (!std::isunordered(s1_val, s2_val) && s2_val < s1_val) { + out = FInsertV32(out, i, s2_val); + } + } + FWriteV32(dst, out); + return memory; +} + +template +DEF_SEM(PFCMPGT, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + uint32v2_t out = {}; + _Pragma("unroll") + for (auto i = 0u; i < 2; ++i) { + auto s1_val = FExtractV32(src1, i); + auto s2_val = FExtractV32(src2, i); + if (!std::isunordered(s1_val, s2_val) && s1_val > s2_val) { + out.elems[i] = ~0u; + } + } + UWriteV32(dst, out); + return memory; +} + +template +DEF_SEM(PFCMPGE, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + uint32v2_t out = {}; + _Pragma("unroll") + for (auto i = 0u; i < 2; ++i) { + auto s1_val = FExtractV32(src1, i); + auto s2_val = FExtractV32(src2, i); + if (!std::isunordered(s1_val, s2_val) && s1_val >= s2_val) { + out.elems[i] = ~0u; + } + } + UWriteV32(dst, out); + return memory; +} + +template +DEF_SEM(PFCMPEQ, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + uint32v2_t out = {}; + _Pragma("unroll") + for (auto i = 0u; i < 2; ++i) { + auto s1_val = FExtractV32(src1, i); + auto s2_val = FExtractV32(src2, i); + if (!std::isunordered(s1_val, s2_val) && s1_val == s2_val) { + out.elems[i] = ~0u; + } + } + UWriteV32(dst, out); + return memory; +} + +template +DEF_SEM(PFRSQRT, D dst, S1, S2 src) { + auto src2 = FReadV32(src); + auto out = FClearV32(FReadV32(dst)); + out = FInsertV32(out, 0, FDiv(1.0f, SquareRoot32(memory, state, FExtractV32(src2, 0)))); + out = FInsertV32(out, 1, FDiv(1.0f, SquareRoot32(memory, state, FExtractV32(src2, 1)))); + FWriteV32(dst, out); + return memory; +} + +template +DEF_SEM(PFACC, D dst, S1 src_dst, S2 src) { + auto src1 = FReadV32(src_dst); + auto src2 = FReadV32(src); + auto out = FClearV32(FReadV32(dst)); + out = FInsertV32(out, 0, FAdd(FExtractV32(src1, 0), FExtractV32(src1, 1))); + out = FInsertV32(out, 1, FAdd(FExtractV32(src2, 0), FExtractV32(src2, 1))); + FWriteV32(dst, out); + return memory; +} + +} // namespace + +DEF_ISEL(PFMUL_MMXq_MEMq) = PFMUL; +DEF_ISEL(PFMUL_MMXq_MMXq) = PFMUL; +DEF_ISEL(PFADD_MMXq_MEMq) = PFADD; +DEF_ISEL(PFADD_MMXq_MMXq) = PFADD; +DEF_ISEL(PFSUB_MMXq_MEMq) = PFSUB; +DEF_ISEL(PFSUB_MMXq_MMXq) = PFSUB; +DEF_ISEL(PFSUBR_MMXq_MEMq) = PFSUBR; +DEF_ISEL(PFSUBR_MMXq_MMXq) = PFSUBR; +DEF_ISEL(PFMAX_MMXq_MEMq) = PFMAX; +DEF_ISEL(PFMAX_MMXq_MMXq) = PFMAX; +DEF_ISEL(PFMIN_MMXq_MEMq) = PFMIN; +DEF_ISEL(PFMIN_MMXq_MMXq) = PFMIN; +DEF_ISEL(PFCMPGT_MMXq_MEMq) = PFCMPGT; +DEF_ISEL(PFCMPGT_MMXq_MMXq) = PFCMPGT; +DEF_ISEL(PFCMPGE_MMXq_MEMq) = PFCMPGE; +DEF_ISEL(PFCMPGE_MMXq_MMXq) = PFCMPGE; +DEF_ISEL(PFCMPEQ_MMXq_MEMq) = PFCMPEQ; +DEF_ISEL(PFCMPEQ_MMXq_MMXq) = PFCMPEQ; +DEF_ISEL(PFRSQRT_MMXq_MEMq) = PFRSQRT; +DEF_ISEL(PFRSQRT_MMXq_MMXq) = PFRSQRT; +DEF_ISEL(PFACC_MMXq_MEMq) = PFACC; +DEF_ISEL(PFACC_MMXq_MMXq) = PFACC; + /* 5547 VPINSRW VPINSRW_XMMu16_XMMu16_GPR32u16_IMM8_AVX512 AVX512 AVX512EVEX AVX512BW_128N ATTRIBUTES: 5548 VPINSRW VPINSRW_XMMu16_XMMu16_MEMu16_IMM8_AVX512 AVX512 AVX512EVEX AVX512BW_128N ATTRIBUTES: DISP8_GPR_READER_WORD @@ -1962,8 +2127,6 @@ DEF_ISEL(FEMMS) = DoEMMS; // 1584:353 PSUBD PSUBD_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 1588:354 PSUBD PSUBD_MMXq_MMXq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX -// 1760:388 PHADDSW PHADDSW_MMXq_MEMq MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX -// 1764:389 PHADDSW PHADDSW_MMXq_MMXq MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX // 1925:425 PSADBW PSADBW_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 1929:426 PSADBW PSADBW_MMXq_MMXq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 2197:482 PADDUSW PADDUSW_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX @@ -2003,10 +2166,6 @@ DEF_ISEL(FEMMS) = DoEMMS; // 3862:843 PABSW PABSW_MMXq_MMXq MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX // 4019:876 PMULHUW PMULHUW_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 4023:877 PMULHUW PMULHUW_MMXq_MMXq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX -// 4095:895 PHADDD PHADDD_MMXq_MEMq MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX -// 4099:896 PHADDD PHADDD_MMXq_MMXq MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX -// 4111:899 POR POR_MMXq_MEMq LOGICAL MMX PENTIUMMMX ATTRIBUTES: NOTSX -// 4115:900 POR POR_MMXq_MMXq LOGICAL MMX PENTIUMMMX ATTRIBUTES: NOTSX // 4385:950 PSLLD PSLLD_MMXq_IMMb MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 4393:952 PSLLD PSLLD_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 4397:953 PSLLD PSLLD_MMXq_MMXq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX @@ -2020,8 +2179,6 @@ DEF_ISEL(FEMMS) = DoEMMS; // 4688:1018 PSUBUSB PSUBUSB_MMXq_MMXq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX // 4784:1038 PMOVMSKB PMOVMSKB_GPR32_MMXq MMX MMX SSE ATTRIBUTES: NOTSX -// 5054:1088 PANDN PANDN_MMXq_MEMq LOGICAL MMX PENTIUMMMX ATTRIBUTES: NOTSX -// 5058:1089 PANDN PANDN_MMXq_MMXq LOGICAL MMX PENTIUMMMX ATTRIBUTES: NOTSX // 5203:1120 PALIGNR PALIGNR_MMXq_MEMq_IMMb MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX // 5208:1121 PALIGNR PALIGNR_MMXq_MMXq_IMMb MMX SSSE3 SSSE3 ATTRIBUTES: NOTSX // 5258:1131 PMULHW PMULHW_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: NOTSX diff --git a/lib/Arch/X86/Semantics/SEMAPHORE.cpp b/lib/Arch/X86/Semantics/SEMAPHORE.cpp index 5a0057e9b..67c3a8c11 100644 --- a/lib/Arch/X86/Semantics/SEMAPHORE.cpp +++ b/lib/Arch/X86/Semantics/SEMAPHORE.cpp @@ -16,24 +16,26 @@ namespace { -#define MAKE_CMPXCHG_XAX(xax) \ - template \ - DEF_SEM(CMPXCHG_##xax, D dst, S1 src1, S2 src2) { \ - auto desired_val = Read(src2); \ - auto check_val = Read(REG_##xax); \ - auto prev_value = check_val; \ - auto swap_flag = UCmpXchg(dst, check_val, desired_val); \ - auto sub_res = USub(prev_value, check_val); \ - WriteFlagsAddSub(state, prev_value, check_val, sub_res); \ - Write(FLAG_ZF, swap_flag); \ - WriteZExt(REG_##xax, check_val); \ - return memory; \ - } - -MAKE_CMPXCHG_XAX(AL) -MAKE_CMPXCHG_XAX(AX) -MAKE_CMPXCHG_XAX(EAX) -IF_64BIT(MAKE_CMPXCHG_XAX(RAX)) +#define MAKE_CMPXCHG_XAX(xax, xax_write, xax_read) \ + template \ + DEF_SEM(CMPXCHG_ ## xax, D dst, S1 src1, S2 src2) { \ + auto desired_val = Read(src2); \ + auto check_val = Read(REG_ ## xax_read); \ + auto prev_value = check_val; \ + auto swap_flag = UCmpXchg(dst, check_val, desired_val); \ + auto sub_res = USub(prev_value, check_val); \ + WriteFlagsAddSub(state, prev_value, check_val, sub_res); \ + Write(FLAG_ZF, swap_flag); \ + if (!swap_flag) { \ + WriteZExt(REG_ ## xax_write, check_val); \ + } \ + return memory; \ + } + +MAKE_CMPXCHG_XAX(AL, AL, AL) +MAKE_CMPXCHG_XAX(AX, AX, AX) +MAKE_CMPXCHG_XAX(EAX, XAX, EAX) +IF_64BIT(MAKE_CMPXCHG_XAX(RAX, RAX, RAX)) DEF_SEM(DoCMPXCHG8B_MEMq, M64W dst, M64 src1) { auto xdx = Read(REG_EDX); diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp index ba2cb1feb..dfd457a2a 100644 --- a/lib/Arch/X86/Semantics/SSE.cpp +++ b/lib/Arch/X86/Semantics/SSE.cpp @@ -233,6 +233,7 @@ DEF_SEM(SHUFPS, D dst, S1 src1, S2 src2, I8 src3) { } // namespace DEF_ISEL(SHUFPS_XMMps_XMMps_IMMb) = SHUFPS; +DEF_ISEL(SHUFPS_XMMps_MEMps_IMMb) = SHUFPS; namespace { @@ -1529,6 +1530,24 @@ IF_AVX(DEF_ISEL(VUNPCKLPD_YMMqq_YMMqq_YMMqq) = UNPCKLPD;) namespace { +template +DEF_SEM(UNPCKHPS, D dst, S1 src1, S2 src2) { + + // Treating src1 as another vector of 32-bit DWORDs: + auto src1_vec = FReadV32(src1); + auto src2_vec = FReadV32(src2); + + auto res = FClearV32(FReadV32(dst)); + + res = FInsertV32(res, 0, FExtractV32(src1_vec, 2)); + res = FInsertV32(res, 1, FExtractV32(src2_vec, 2)); + res = FInsertV32(res, 2, FExtractV32(src1_vec, 3)); + res = FInsertV32(res, 3, FExtractV32(src2_vec, 3)); + + FWriteV32(dst, res); // SSE: Writes to XMM, AVX: Zero-extends XMM. + return memory; +} + template DEF_SEM(UNPCKHPD, D dst, S1 src1, S2 src2) { @@ -1554,6 +1573,11 @@ DEF_ISEL(UNPCKHPD_XMMpd_XMMq) = UNPCKHPD; IF_AVX(DEF_ISEL(VUNPCKHPD_XMMdq_XMMdq_MEMdq) = UNPCKHPD;) IF_AVX(DEF_ISEL(VUNPCKHPD_XMMdq_XMMdq_XMMdq) = UNPCKHPD;) +DEF_ISEL(UNPCKHPS_XMMps_MEMdq) = UNPCKHPS; +DEF_ISEL(UNPCKHPS_XMMps_XMMdq) = UNPCKHPS; +IF_AVX(DEF_ISEL(VUNPCKHPS_XMMdq_XMMdq_MEMdq) = UNPCKHPS;) +IF_AVX(DEF_ISEL(VUNPCKHPS_XMMdq_XMMdq_XMMdq) = UNPCKHPS;) + /* 2440 VUNPCKHPD VUNPCKHPD_YMMqq_YMMqq_MEMqq AVX AVX AVX ATTRIBUTES: 2441 VUNPCKHPD VUNPCKHPD_YMMqq_YMMqq_YMMqq AVX AVX AVX ATTRIBUTES: @@ -1605,38 +1629,23 @@ IF_AVX(DEF_ISEL(VMOVDDUP_XMMdq_XMMdq) = MOVDDUP;) namespace { -DEF_HELPER(SquareRoot32, float32_t src_float)->float32_t { - auto square_root = src_float; - - // Special cases for invalid square root operations. See Intel manual, Table E-10. - if (IsNaN(src_float)) { - - // If src is SNaN, return the SNaN converted to a QNaN: - if (IsSignalingNaN(src_float)) { - nan32_t temp_nan = {src_float}; - temp_nan.is_quiet_nan = 1; // equivalent to a bitwise OR with 0x00400000 - square_root = temp_nan.f; - - // Else, src is a QNaN. Pass it directly to the result: - } else { - square_root = src_float; - } - } else { // a number, that is, not a NaN +template +DEF_SEM(SQRTSS, D dst, S1 src1) { + // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector: + auto src_float = FExtractV32(FReadV32(src1), 0); - // A negative operand (except -0.0) results in the QNaN indefinite value. - if (IsNegative(src_float) && src_float != -0.0) { - uint32_t indef_qnan = 0xFFC00000U; - square_root = reinterpret_cast(indef_qnan); - } else { - square_root = std::sqrt(src_float); - } - } + // Store the square root result in dest[32:0]: + auto square_root = SquareRoot32(memory, state, src_float); + auto temp_vec = FReadV32(dst); // initialize a destination vector + temp_vec = FInsertV32(temp_vec, 0, square_root); - return square_root; + // Write out the result and return memory state: + FWriteV32(dst, temp_vec); // SSE: Writes to XMM, AVX: Zero-extends XMM. + return memory; } template -DEF_SEM(SQRTSS, D dst, S1 src1) { +DEF_SEM(RSQRTSS, D dst, S1 src1) { // Extract a "single-precision" (32-bit) float from [31:0] of src1 vector: auto src_float = FExtractV32(FReadV32(src1), 0); @@ -1644,7 +1653,7 @@ DEF_SEM(SQRTSS, D dst, S1 src1) { // Store the square root result in dest[32:0]: auto square_root = SquareRoot32(memory, state, src_float); auto temp_vec = FReadV32(dst); // initialize a destination vector - temp_vec = FInsertV32(temp_vec, 0, square_root); + temp_vec = FInsertV32(temp_vec, 0, FDiv(1.0f, square_root)); // Write out the result and return memory state: FWriteV32(dst, temp_vec); // SSE: Writes to XMM, AVX: Zero-extends XMM. @@ -1669,8 +1678,24 @@ DEF_SEM(VSQRTSS, D dst, S1 src1, S2 src2) { FWriteV32(dst, temp_vec); // SSE: Writes to XMM, AVX: Zero-extends XMM. return memory; } -#endif // HAS_FEATURE_AVX +template +DEF_SEM(VRSQRTSS, D dst, S1 src1, S2 src2) { + // Extract the single-precision float from [31:0] of the src2 vector: + auto src_float = FExtractV32(FReadV32(src2), 0); + + // Initialize dest vector, while also copying src1[127:32] -> dst[127:32]. + auto temp_vec = FReadV32(src1); + + // Store the square root result in dest[31:0]: + auto square_root = SquareRoot32(memory, state, src_float); + temp_vec = FInsertV32(temp_vec, 0, FDiv(1.0f, square_root)); + + // Write out the result and return memory state: + FWriteV32(dst, temp_vec); // SSE: Writes to XMM, AVX: Zero-extends XMM. + return memory; +} +#endif // HAS_FEATURE_AVX } // namespace DEF_ISEL(SQRTSS_XMMss_MEMss) = SQRTSS; @@ -1683,6 +1708,10 @@ IF_AVX(DEF_ISEL(VSQRTSS_XMMdq_XMMdq_XMMd) = VSQRTSS;) 4318 VSQRTSS VSQRTSS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512 AVX512 AVX512EVEX AVX512F_SCALAR ATTRIBUTES: DISP8_SCALAR MASKOP_EVEX MEMORY_FAULT_SUPPRESSION MXCSR SIMD_SCALAR */ +DEF_ISEL(RSQRTSS_XMMss_MEMss) = RSQRTSS; +DEF_ISEL(RSQRTSS_XMMss_XMMss) = RSQRTSS; +IF_AVX(DEF_ISEL(VRSQRTSS_XMMdq_XMMdq_MEMd) = VRSQRTSS;) +IF_AVX(DEF_ISEL(VRSQRTSS_XMMdq_XMMdq_XMMd) = VRSQRTSS;) namespace { @@ -1843,6 +1872,74 @@ IF_AVX(DEF_ISEL(VPACKUSWB_YMMqq_YMMqq_MEMqq) = IF_AVX(DEF_ISEL(VPACKUSWB_YMMqq_YMMqq_YMMqq) = PACKUSWB_AVX;) +namespace { + +template +DEF_SEM(HADDPS, D dst, S1 src1, S2 src2) { + auto lhs_vec = FReadV32(src1); + auto rhs_vec = FReadV32(src2); + auto dst_vec = FClearV32(FReadV32(dst)); + + // Compute the horizontal packing + auto vec_count = NumVectorElems(lhs_vec); + _Pragma("unroll") for (size_t index = 0; index < vec_count; index += 2) { + auto v1 = FExtractV32(lhs_vec, index); + auto v2 = FExtractV32(lhs_vec, index + 1); + auto i = UDiv(UInt32(index), UInt32(2)); + dst_vec = FInsertV32(dst_vec, i, FAdd(v1, v2)); + } + _Pragma("unroll") for (size_t index = 0; index < NumVectorElems(rhs_vec); + index += 2) { + auto v1 = FExtractV32(rhs_vec, index); + auto v2 = FExtractV32(rhs_vec, index + 1); + auto i = UDiv(UAdd(UInt32(index), UInt32(vec_count)), UInt32(2)); + dst_vec = FInsertV32(dst_vec, i, FAdd(v1, v2)); + } + FWriteV32(dst, dst_vec); + return memory; +} + +template +DEF_SEM(HADDPD, D dst, S1 src1, S2 src2) { + auto lhs_vec = FReadV64(src1); + auto rhs_vec = FReadV64(src2); + auto dst_vec = FClearV64(FReadV64(dst)); + + // Compute the horizontal packing + auto vec_count = NumVectorElems(lhs_vec); + _Pragma("unroll") for (size_t index = 0; index < vec_count; index += 2) { + auto v1 = FExtractV64(lhs_vec, index); + auto v2 = FExtractV64(lhs_vec, index + 1); + auto i = UDiv(UInt32(index), UInt32(2)); + dst_vec = FInsertV64(dst_vec, i, FAdd(v1, v2)); + } + _Pragma("unroll") for (size_t index = 0; index < NumVectorElems(rhs_vec); + index += 2) { + auto v1 = FExtractV64(rhs_vec, index); + auto v2 = FExtractV64(rhs_vec, index + 1); + auto i = UDiv(UAdd(UInt32(index), UInt32(vec_count)), UInt32(2)); + dst_vec = FInsertV64(dst_vec, i, FAdd(v1, v2)); + } + FWriteV64(dst, dst_vec); + return memory; +} + +} // namespace + +DEF_ISEL(HADDPS_XMMps_XMMps) = HADDPS; +DEF_ISEL(HADDPS_XMMps_MEMps) = HADDPS; +IF_AVX(DEF_ISEL(VHADDPS_XMMdq_XMMdq_XMMdq) = HADDPS;) +IF_AVX(DEF_ISEL(VHADDPS_XMMdq_XMMdq_MEMdq) = HADDPS;) +IF_AVX(DEF_ISEL(VHADDPS_YMMqq_YMMqq_YMMqq) = HADDPS;) +IF_AVX(DEF_ISEL(VHADDPS_YMMqq_YMMqq_MEMqq) = HADDPS;) + +DEF_ISEL(HADDPD_XMMpd_XMMpd) = HADDPD; +DEF_ISEL(HADDPD_XMMpd_MEMpd) = HADDPD; +IF_AVX(DEF_ISEL(VHADDPD_XMMdq_XMMdq_XMMdq) = HADDPD;) +IF_AVX(DEF_ISEL(VHADDPD_XMMdq_XMMdq_MEMdq) = HADDPD;) +IF_AVX(DEF_ISEL(VHADDPD_YMMqq_YMMqq_YMMqq) = HADDPD;) +IF_AVX(DEF_ISEL(VHADDPD_YMMqq_YMMqq_MEMqq) = HADDPD;) + /* 555 PACKSSDW PACKSSDW_MMXq_MEMq MMX MMX PENTIUMMMX ATTRIBUTES: HALF_WIDE_OUTPUT NOTSX 556 PACKSSDW PACKSSDW_MMXq_MMXq MMX MMX PENTIUMMMX ATTRIBUTES: HALF_WIDE_OUTPUT NOTSX diff --git a/tests/AArch64/CMakeLists.txt b/tests/AArch64/CMakeLists.txt index 0f2298025..1e51cb78b 100644 --- a/tests/AArch64/CMakeLists.txt +++ b/tests/AArch64/CMakeLists.txt @@ -34,10 +34,17 @@ set_target_properties(lift-aarch64-tests PROPERTIES ) target_compile_options(lift-aarch64-tests - PRIVATE ${X86_TEST_FLAGS} + PRIVATE ${AARCH64_TEST_FLAGS} -DIN_TEST_GENERATOR ) + +file(GLOB AARCH64_TEST_FILES + "${CMAKE_CURRENT_LIST_DIR}/*/*.S" +) + +set_target_properties(lift-${name}-tests PROPERTIES OBJECT_DEPENDS "${AARCH64_TEST_FILES}") + target_link_libraries(lift-aarch64-tests PUBLIC remill ${PROJECT_LIBRARIES} ) target_include_directories(lift-aarch64-tests PUBLIC ${PROJECT_INCLUDEDIRECTORIES}) target_include_directories(lift-aarch64-tests PRIVATE ${CMAKE_SOURCE_DIR}) @@ -52,6 +59,7 @@ add_executable(run-aarch64-tests set_target_properties(run-aarch64-tests PROPERTIES POSITION_INDEPENDENT_CODE ON COMPILE_FLAGS "-fPIC -pie" + OBJECT_DEPENDS "${AARCH64_TEST_FILES}" ) add_custom_command( diff --git a/tests/X86/AVX/VINSERTF128.S b/tests/X86/AVX/VINSERTF128.S new file mode 100644 index 000000000..889bfb36c --- /dev/null +++ b/tests/X86/AVX/VINSERTF128.S @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if HAS_FEATURE_AVX + +TEST_BEGIN_64(VINSERTF128_imm00, 3) +TEST_INPUTS( + 0x7ff8000012345678,0x0123456789abcdef,0xfedcba9876543210, + 0x7ff8000012345678,0x1111222233334444,0x5555666677778888 +) + push 0; + push ARG2_64; + push 0; + push ARG3_64; + vmovdqu ymm2, [rsp]; + movq xmm3, ARG1_64; + vinsertf128 ymm1, ymm2, xmm3, 0; +TEST_END_64 + +TEST_BEGIN_64(VINSERTF128_imm01, 3) +TEST_INPUTS( + 0x7ff8000012345678,0x0123456789abcdef,0xfedcba9876543210, + 0x7ff8000012345678,0x1111222233334444,0x5555666677778888 +) + push 0; + push ARG2_64; + push 0; + push ARG3_64; + vmovdqu ymm2, [rsp]; + movq xmm3, ARG1_64; + vinsertf128 ymm1, ymm2, xmm3, 1; +TEST_END_64 + +TEST_BEGIN_64(VINSERTF128_imm10, 3) +TEST_INPUTS( + 0x7ff8000012345678,0x0123456789abcdef,0xfedcba9876543210, + 0x7ff8000012345678,0x1111222233334444,0x5555666677778888 +) + push 0; + push ARG2_64; + push 0; + push ARG3_64; + vmovdqu ymm2, [rsp]; + movq xmm3, ARG1_64; + vinsertf128 ymm1, ymm2, xmm3, 2; +TEST_END_64 + +TEST_BEGIN_64(VINSERTF128_imm11, 3) +TEST_INPUTS( + 0x7ff8000012345678,0x0123456789abcdef,0xfedcba9876543210, + 0x7ff8000012345678,0x1111222233334444,0x5555666677778888 +) + push 0; + push ARG2_64; + push 0; + push ARG3_64; + vmovdqu ymm2, [rsp]; + movq xmm3, ARG1_64; + vinsertf128 ymm1, ymm2, xmm3, 3; +TEST_END_64 + +#endif // HAS_FEATURE_AVX diff --git a/tests/X86/CMakeLists.txt b/tests/X86/CMakeLists.txt index 7be11fa68..ad5ef5437 100644 --- a/tests/X86/CMakeLists.txt +++ b/tests/X86/CMakeLists.txt @@ -34,6 +34,12 @@ function(COMPILE_X86_TESTS name address_size has_avx has_avx512) target_compile_options(lift-${name}-tests PRIVATE ${X86_TEST_FLAGS} -DIN_TEST_GENERATOR ) + + file(GLOB X86_TEST_FILES + "${CMAKE_CURRENT_LIST_DIR}/*/*.S" + ) + + set_target_properties(lift-${name}-tests PROPERTIES OBJECT_DEPENDS "${X86_TEST_FILES}") target_link_libraries(lift-${name}-tests PUBLIC remill ${gtest_LIBRARIES}) target_include_directories(lift-${name}-tests PUBLIC ${gtest_INCLUDE_DIRS}) @@ -52,6 +58,7 @@ function(COMPILE_X86_TESTS name address_size has_avx has_avx512) ) add_executable(run-${name}-tests EXCLUDE_FROM_ALL Run.cpp Tests.S tests_${name}.S) + set_target_properties(run-${name}-tests PROPERTIES OBJECT_DEPENDS "${X86_TEST_FILES}") target_link_libraries(run-${name}-tests PUBLIC remill ${gtest_LIBRARIES}) target_include_directories(run-${name}-tests PUBLIC ${gtest_INCLUDE_DIRS}) diff --git a/tests/X86/DATAXFER/MOVHLPS.S b/tests/X86/DATAXFER/MOVHLPS.S new file mode 100644 index 000000000..772e2e197 --- /dev/null +++ b/tests/X86/DATAXFER/MOVHLPS.S @@ -0,0 +1,37 @@ + +/* + * Copyright (c) 2017 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +TEST_BEGIN_64(MOVHLPSv128v128, 1) +TEST_INPUTS(0) + movhlps xmm0, xmm1 + movhlps xmm1, xmm2 + movhlps xmm3, xmm4 + movhlps xmm4, xmm0 + movhlps xmm0, xmm0 +TEST_END_64 + +#if HAS_FEATURE_AVX +TEST_BEGIN_64(VMOVHLPSvv128v128v128, 1) +TEST_INPUTS(0) + vmovhlps xmm0, xmm1, xmm2 + vmovhlps xmm1, xmm2, xmm3 + vmovhlps xmm2, xmm3, xmm4 + vmovhlps xmm4, xmm0, xmm1 + vmovhlps xmm0, xmm1, xmm1 + vmovhlps xmm0, xmm0, xmm0 +TEST_END_64 +#endif // HAS_FEATURE_AVX diff --git a/tests/X86/SEMAPHORE/CMPXCHG.S b/tests/X86/SEMAPHORE/CMPXCHG.S index c62bb509a..2a31d7bfe 100644 --- a/tests/X86/SEMAPHORE/CMPXCHG.S +++ b/tests/X86/SEMAPHORE/CMPXCHG.S @@ -23,7 +23,9 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141) mov eax, ARG1_32 mov ebx, ARG2_32 @@ -40,7 +42,9 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141) mov eax, ARG1_32 mov ebx, ARG2_32 @@ -56,7 +60,9 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141) mov eax, ARG1_32 mov ebx, ARG2_32 @@ -73,7 +79,9 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141) mov eax, ARG1_32 mov ebx, ARG2_32 @@ -89,7 +97,9 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141) mov eax, ARG1_32 mov ebx, ARG2_32 @@ -106,7 +116,9 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141) mov eax, ARG1_32 mov ebx, ARG2_32 @@ -122,10 +134,14 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) - - mov eax, ARG1_32 - mov ebx, ARG2_32 + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141, + 0xFFFFFFFFFFFFFFFF, 0, 0x4141414141414141, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x4141414141414141) + + mov rax, ARG1_64 + mov rbx, ARG2_64 mov QWORD PTR [rsp - 8], ARG3_64 cmpxchg QWORD PTR [rsp - 8], rbx TEST_END_MEM_64 @@ -139,10 +155,30 @@ TEST_INPUTS( 1, 0, 0, 1, 0, 1, 1, 1, 0, - 1, 1, 1) - - mov eax, ARG1_32 - mov ebx, ARG2_32 + 1, 1, 1, + 0xFFFFFFFF, 0, 0x41414141, + 0xFFFFFFFF, 0xFFFFFFFF, 0x41414141, + 0xFFFFFFFFFFFFFFFF, 0, 0x4141414141414141, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x4141414141414141) + + mov rax, ARG1_64 + mov rbx, ARG2_64 cmpxchg ARG3_64, rbx TEST_END_64 +TEST_BEGIN_64(CMPXCHG_issue376, 3) +TEST_INPUTS( + 0x4141414141414141, 0x4141414141414141, 0x4141414141414141, + 0x4141414141414141, 0x4141414141414141, 0xFFFFFFFFFFFFFFFF, + 0x4141414141414141, 0xFFFFFFFFFFFFFFFF, 0x4141414141414141, + 0x4141414141414141, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0x4141414141414141, 0x4141414141414141, + 0xFFFFFFFFFFFFFFFF, 0x4141414141414141, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x4141414141414141, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF) + + mov rax, ARG1_64 + mov rbx, ARG2_64 + mov rcx, ARG3_64 + cmpxchg ecx, ebx +TEST_END_64 diff --git a/tests/X86/SSE/UNPCKHPS.S b/tests/X86/SSE/UNPCKHPS.S new file mode 100644 index 000000000..893f09bac --- /dev/null +++ b/tests/X86/SSE/UNPCKHPS.S @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define UNPCKHPS_INPUTS_64 \ + 0x0000000000000000, 0x0000000000000000, /* sanity check */\ + 0xfedcba9876543210, 0x0000000000000000, /* arbitrarily chosen values */\ + 0xfedcba9876543210, 0xffffffffffffffff /* other arbitrarily chosen values */ + +TEST_BEGIN_64(UNPCKHPSv128v128, 2) +TEST_INPUTS(UNPCKHPS_INPUTS_64) + push ARG1_64; + movhps xmm0, qword ptr [rsp]; + push ARG2_64; + movhps xmm1, qword ptr [rsp]; + unpckhps xmm0, xmm1; +TEST_END_64 + +TEST_BEGIN_64(UNPCKHPSv128m128, 2) +TEST_INPUTS(UNPCKHPS_INPUTS_64) + push ARG1_64; + movhps xmm0, qword ptr [rsp]; + push ARG2_64; + push ARG2_64; + unpckhps xmm0, xmmword ptr [rsp+8]; +TEST_END_64 + +#if HAS_FEATURE_AVX + +TEST_BEGIN_64(VUNPCKHPSv128v128m128, 2) +TEST_INPUTS(UNPCKHPS_INPUTS_64) + push ARG1_64; + movhps xmm1, qword ptr [rsp]; + push ARG2_64; + push ARG2_64; + vunpckhps xmm0, xmm1, xmmword ptr [rsp+8]; +TEST_END_64 + +TEST_BEGIN_64(VUNPCKHPSv128v128v128, 2) +TEST_INPUTS(UNPCKHPS_INPUTS_64) + push ARG1_64; + movhps xmm1, qword ptr [rsp]; + push ARG2_64; + movhps xmm2, qword ptr [rsp]; + vunpckhps xmm0, xmm1, xmm2; +TEST_END_64 + +#endif diff --git a/tests/X86/Tests.S b/tests/X86/Tests.S index 1a9d82f4e..7126f84e0 100644 --- a/tests/X86/Tests.S +++ b/tests/X86/Tests.S @@ -343,6 +343,7 @@ SYMBOL(__x86_test_table_begin): #include "tests/X86/DATAXFER/MOVD.S" #include "tests/X86/DATAXFER/MOVDQA.S" #include "tests/X86/DATAXFER/MOVDQU.S" +#include "tests/X86/DATAXFER/MOVHLPS.S" #include "tests/X86/DATAXFER/MOVHPD.S" #include "tests/X86/DATAXFER/MOVHPS.S" #include "tests/X86/DATAXFER/MOVLPD.S" @@ -359,6 +360,7 @@ SYMBOL(__x86_test_table_begin): /* Bring in the rest of the semantic tests. */ +#include "tests/X86/AVX/VINSERTF128.S" #include "tests/X86/AVX/VZEROUPPER.S" #include "tests/X86/AVX/VPBROADCASTB.S" @@ -521,6 +523,7 @@ SYMBOL(__x86_test_table_begin): #include "tests/X86/SSE/UNPCKLPD.S" #include "tests/X86/SSE/UNPCKLPS.S" #include "tests/X86/SSE/UNPCKHPD.S" +#include "tests/X86/SSE/UNPCKHPS.S" #include "tests/X86/SSE/MOVDDUP.S" #include "tests/X86/SSE/SQRTSS.S" #include "tests/X86/SSE/SQRTSD.S"