diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 51875aadd7420c..6f72d0a9094df3 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -684,6 +684,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VP2INTERSECT, W("EnableAVX512VP2INTERSECT"), 1, "Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXIFMA, W("EnableAVXIFMA"), 1, "Allows AVXIFMA and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI and dependent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT, W("EnableAVXVNNIINT"), 1, "Allows VEX versions (AVXVNNI8 & AVXVNNIINT16) hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSHA, W("EnableSHA"), 1, "Allows SHA and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVAES, W("EnableVAES"), 1, "Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 68826ed36392ed..9d2f2aa9c482f6 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -78,23 +78,25 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=26, InstructionSet_VectorT256=27, InstructionSet_VectorT512=28, - InstructionSet_X86Base_X64=29, - InstructionSet_SSE42_X64=30, - InstructionSet_AVX_X64=31, - InstructionSet_AVX2_X64=32, - InstructionSet_AVX512_X64=33, - InstructionSet_AVX512v2_X64=34, - InstructionSet_AVX512v3_X64=35, - InstructionSet_AVX10v1_X64=36, - InstructionSet_AVX10v2_X64=37, - InstructionSet_AES_X64=38, - InstructionSet_AVX512VP2INTERSECT_X64=39, - InstructionSet_AVXIFMA_X64=40, - InstructionSet_AVXVNNI_X64=41, - InstructionSet_GFNI_X64=42, - InstructionSet_SHA_X64=43, - InstructionSet_WAITPKG_X64=44, - InstructionSet_X86Serialize_X64=45, + InstructionSet_AVXVNNIINT=29, + InstructionSet_AVXVNNIINT_V512=30, + InstructionSet_X86Base_X64=31, + InstructionSet_SSE42_X64=32, + InstructionSet_AVX_X64=33, + InstructionSet_AVX2_X64=34, + InstructionSet_AVX512_X64=35, + InstructionSet_AVX512v2_X64=36, + InstructionSet_AVX512v3_X64=37, + InstructionSet_AVX10v1_X64=38, + InstructionSet_AVX10v2_X64=39, + InstructionSet_AES_X64=40, + InstructionSet_AVX512VP2INTERSECT_X64=41, + InstructionSet_AVXIFMA_X64=42, + InstructionSet_AVXVNNI_X64=43, + InstructionSet_GFNI_X64=44, + InstructionSet_SHA_X64=45, + InstructionSet_WAITPKG_X64=46, + InstructionSet_X86Serialize_X64=47, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -125,23 +127,25 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=26, InstructionSet_VectorT256=27, InstructionSet_VectorT512=28, - InstructionSet_X86Base_X64=29, - InstructionSet_SSE42_X64=30, - InstructionSet_AVX_X64=31, - InstructionSet_AVX2_X64=32, - InstructionSet_AVX512_X64=33, - InstructionSet_AVX512v2_X64=34, - InstructionSet_AVX512v3_X64=35, - InstructionSet_AVX10v1_X64=36, - InstructionSet_AVX10v2_X64=37, - InstructionSet_AES_X64=38, - InstructionSet_AVX512VP2INTERSECT_X64=39, - InstructionSet_AVXIFMA_X64=40, - InstructionSet_AVXVNNI_X64=41, - InstructionSet_GFNI_X64=42, - InstructionSet_SHA_X64=43, - InstructionSet_WAITPKG_X64=44, - InstructionSet_X86Serialize_X64=45, + InstructionSet_AVXVNNIINT=29, + InstructionSet_AVXVNNIINT_V512=30, + InstructionSet_X86Base_X64=31, + InstructionSet_SSE42_X64=32, + InstructionSet_AVX_X64=33, + InstructionSet_AVX2_X64=34, + InstructionSet_AVX512_X64=35, + InstructionSet_AVX512v2_X64=36, + InstructionSet_AVX512v3_X64=37, + InstructionSet_AVX10v1_X64=38, + InstructionSet_AVX10v2_X64=39, + InstructionSet_AES_X64=40, + InstructionSet_AVX512VP2INTERSECT_X64=41, + InstructionSet_AVXIFMA_X64=42, + InstructionSet_AVXVNNI_X64=43, + InstructionSet_GFNI_X64=44, + InstructionSet_SHA_X64=45, + InstructionSet_WAITPKG_X64=46, + InstructionSet_X86Serialize_X64=47, #endif // TARGET_X86 }; @@ -503,6 +507,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -565,6 +573,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -747,6 +759,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_AVXVNNIINT : + return "AVXVNNIINT"; + case InstructionSet_AVXVNNIINT_V512 : + return "AVXVNNIINT_V512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -805,6 +821,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_AVXVNNIINT : + return "AVXVNNIINT"; + case InstructionSet_AVXVNNIINT_V512 : + return "AVXVNNIINT_V512"; #endif // TARGET_X86 default: @@ -909,6 +929,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT_V512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -974,6 +998,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT_V512; + case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT; + case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT_V512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 785e4e2a854ab7..43c4c32332df8d 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* ce8cef5e-261f-469a-b599-9f3f3e8b2448 */ - 0xce8cef5e, - 0x261f, - 0x469a, - {0xb5, 0x99, 0x9f, 0x3f, 0x3e, 0x8b, 0x24, 0x48} +constexpr GUID JITEEVersionIdentifier = { /* 5c7eb9f1-a9cb-4a35-aea6-ae93d1f54c56 */ + 0x5c7eb9f1, + 0xa9cb, + 0x4a35, + {0xae, 0xa6, 0xae, 0x93, 0xd1, 0xf5, 0x4c, 0x56} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 01f92e168c6b39..ee9e5fdc443702 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -67,6 +67,10 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Zba=57, READYTORUN_INSTRUCTION_Zbb=58, READYTORUN_INSTRUCTION_Sve2=59, + READYTORUN_INSTRUCTION_AvxVnniInt8=60, + READYTORUN_INSTRUCTION_AvxVnniInt8_V512=61, + READYTORUN_INSTRUCTION_AvxVnniInt16=62, + READYTORUN_INSTRUCTION_AvxVnniInt16_V512=63, READYTORUN_INSTRUCTION_Aes_V256=64, READYTORUN_INSTRUCTION_Aes_V512=65, READYTORUN_INSTRUCTION_AvxIfma=66, diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 36e35ba3ecc190..9a2305d3eefb1c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9604,6 +9604,13 @@ class Compiler return false; } +#ifdef FEATURE_HW_INTRINSICS + CORINFO_InstructionSet lookupInstructionSet(const char* className); + CORINFO_InstructionSet lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName); +#endif // FEATURE_HW_INTRINSICS + #ifdef DEBUG // Answer the question: Is a particular ISA supported? // Use this api when asking the question so that future diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 11ccf43e10cca0..b0a65624163f0e 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -90,6 +90,17 @@ bool emitter::IsApxOnlyInstruction(instruction ins) return (ins >= FIRST_APX_INSTRUCTION) && (ins <= LAST_APX_INSTRUCTION); } +bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) +{ + return (ins >= FIRST_AVXVNNI_INSTRUCTION && ins <= LAST_AVXVNNI_INSTRUCTION) || IsAVXVNNIINTInstruction(ins); +} + +bool emitter::IsAVXVNNIINTInstruction(instruction ins) +{ + return (ins >= FIRST_AVXVNNIINT8_INSTRUCTION && ins <= LAST_AVXVNNIINT8_INSTRUCTION) || + (ins >= FIRST_AVXVNNIINT16_INSTRUCTION && ins <= LAST_AVXVNNIINT16_INSTRUCTION); +} + bool emitter::Is3OpRmwInstruction(instruction ins) { switch (ins) @@ -113,7 +124,7 @@ bool emitter::Is3OpRmwInstruction(instruction ins) default: { return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) || - ((ins >= FIRST_AVXVNNI_INSTRUCTION) && (ins <= LAST_AVXVNNI_INSTRUCTION)) || + (IsAVXVNNIFamilyInstruction(ins)) || ((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION)); } } @@ -278,6 +289,23 @@ bool emitter::IsVexEncodableInstruction(instruction ins) const return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNI); } + case INS_vpdpwsud: + case INS_vpdpwsuds: + case INS_vpdpwusd: + case INS_vpdpwusds: + case INS_vpdpwuud: + case INS_vpdpwuuds: + case INS_vpdpbssd: + case INS_vpdpbssds: + case INS_vpdpbsud: + case INS_vpdpbsuds: + case INS_vpdpbuud: + case INS_vpdpbuuds: + { + // Vex versions of AvxVnniInt8 + AvxVnniInt16 + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT); + } + case INS_vpmadd52huq: case INS_vpmadd52luq: { @@ -325,6 +353,23 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const return emitComp->compSupportsHWIntrinsic(InstructionSet_AES_V512); } + case INS_vpdpwsud: + case INS_vpdpwsuds: + case INS_vpdpwusd: + case INS_vpdpwusds: + case INS_vpdpwuud: + case INS_vpdpwuuds: + case INS_vpdpbssd: + case INS_vpdpbssds: + case INS_vpdpbsud: + case INS_vpdpbsuds: + case INS_vpdpbuud: + case INS_vpdpbuuds: + { + // Evex versions of AvxVnniInt8 + AvxVnniInt16 will be supported + return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT_V512); + } + case INS_vpdpbusd: case INS_vpdpwssd: case INS_vpdpbusds: @@ -2928,7 +2973,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co if (sizePrefix == 0) { // no simd prefix for EVEX2 - AVX10.2 and above - assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2)); + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT_V512)); } else if (isPrefix(sizePrefix)) { @@ -3139,7 +3186,14 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con // check for a prefix in the 11 position BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) + if (sizePrefix == 0) + { + // no simd prefix for Avx-Vnni-Int* ISAs subset of instructions + // INS_vpdpbuud[,s], INS_vpdpwuud[,s] + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT) || + emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT_V512)); + } + else if (isPrefix(sizePrefix)) { // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits // @@ -3209,23 +3263,27 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con unreached(); } } + } + else + { + unreached(); + } - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); + // Now the byte in the 22 position must be an escape byte 0F + leadingBytes = check; + assert(leadingBytes == 0x0F); - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + // Get rid of both sizePrefix and escape byte + code &= 0x0000FFFFLL; - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; + // Check the byte in the 33 position to see if it is 3A or 38. + // In such a case escape bytes must be 0x0F3A or 0x0F38 + check = code & 0xFF; - if ((check == 0x3A) || (check == 0x38)) - { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; - } + if ((check == 0x3A) || (check == 0x38)) + { + leadingBytes = (leadingBytes << 8) | check; + code &= 0x0000FF00LL; } } else @@ -4378,7 +4436,7 @@ bool emitter::EncodedBySSE38orSSE3A(instruction ins) const #if defined(DEBUG) insCode = (insCode >> 16) & 0xFF; - assert((insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3)); + assert((insCode == 0x00) || (insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3)); #endif // DEBUG return true; @@ -18083,7 +18141,8 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI { case INS_TT_FULL: { - assert(inputSize == 4 || inputSize == 8); + instruction ins = id->idIns(); + assert((inputSize == 4 || inputSize == 8) || IsAVXVNNIINTInstruction(ins)); if (HasEmbeddedBroadcast(id)) { // N = input size in bytes diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index b5ce73f6a48a2d..2074cb5da3ff70 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -121,6 +121,8 @@ static bool IsSSEOrAVXInstruction(instruction ins); static bool IsAVXOnlyInstruction(instruction ins); static bool IsAvx512OnlyInstruction(instruction ins); static bool IsKMOVInstruction(instruction ins); +static bool IsAVXVNNIFamilyInstruction(instruction ins); +static bool IsAVXVNNIINTInstruction(instruction ins); static bool Is3OpRmwInstruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index fbb9b984bd4e06..d7be0beb08da26 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -930,6 +930,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 + { FIRST_NI_AVXVNNIINT, LAST_NI_AVXVNNIINT }, // AVXVNNIINT + { FIRST_NI_AVXVNNIINT_V512, LAST_NI_AVXVNNIINT_V512 }, // AVXVNNIINT_V512 { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, // X86Base_X64 { FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 }, // SSE42_X64 @@ -1180,7 +1182,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } - CORINFO_InstructionSet isa = lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName); + CORINFO_InstructionSet isa = comp->lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName); if (isa == InstructionSet_ILLEGAL) { diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 10bedd930b5aa5..6cacd4a1ae17f1 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -525,15 +525,12 @@ struct HWIntrinsicInfo static const HWIntrinsicInfo& lookup(NamedIntrinsic id); - static NamedIntrinsic lookupId(Compiler* comp, - CORINFO_SIG_INFO* sig, - const char* className, - const char* methodName, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName); - static CORINFO_InstructionSet lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName); + static NamedIntrinsic lookupId(Compiler* comp, + CORINFO_SIG_INFO* sig, + const char* className, + const char* methodName, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName); static unsigned lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 8db572f6bbc54f..efa6c434d0e6e2 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -51,7 +51,7 @@ static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa) // // Return Value: // The InstructionSet associated with className -static CORINFO_InstructionSet lookupInstructionSet(const char* className) +CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) { assert(className != nullptr); @@ -136,9 +136,9 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) // Return Value: // The InstructionSet associated with className and enclosingClassName // -CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName) +CORINFO_InstructionSet Compiler::lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName) { assert(className != nullptr); diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 1e7f79a6aa1334..6d255a1295315c 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1018,6 +1018,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX512: case InstructionSet_AVX512_X64: case InstructionSet_AVX512v2: + case InstructionSet_AVXVNNIINT: + case InstructionSet_AVXVNNIINT_V512: { genAvxFamilyIntrinsic(node, instOptions); break; @@ -3485,6 +3487,176 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: + { + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + op1Reg = op1->GetRegNum(); + regNumber op2Reg = op2->GetRegNum(); + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + var_types op3Type = node->GetAuxiliaryType(); + switch (baseType) + { + case TYP_UBYTE: + { + ins = INS_vpdpbuuds; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + ins = INS_vpdpbsuds; + break; + } + + case TYP_BYTE: + { + ins = INS_vpdpbssds; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwsuds; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + ins = INS_vpdpwuuds; + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwusds; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } + + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + + case NI_AVXVNNIINT_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: + { + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + op1Reg = op1->GetRegNum(); + regNumber op2Reg = op2->GetRegNum(); + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + + var_types op3Type = node->GetAuxiliaryType(); + switch (baseType) + { + case TYP_UBYTE: + { + ins = INS_vpdpbuud; + break; + } + + case TYP_BYTE: + { + switch (op3Type) + { + case TYP_UBYTE: + { + ins = INS_vpdpbsud; + break; + } + + case TYP_BYTE: + { + ins = INS_vpdpbssd; + break; + } + + default: + { + unreached(); + } + } + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwsud; + break; + } + + case TYP_USHORT: + { + switch (op3Type) + { + case TYP_USHORT: + { + ins = INS_vpdpwuud; + break; + } + + case TYP_SHORT: + { + ins = INS_vpdpwusd; + break; + } + + default: + { + unreached(); + } + } + break; + } + + default: + { + unreached(); + } + } + + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + default: unreached(); break; diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 39223ca652ecf1..bcde3fb59fad2e 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1106,6 +1106,27 @@ HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) #define LAST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAddSaturate +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNIINT Intrinsics +#define FIRST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +#define LAST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAddSaturate + + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNIINT_V512 Intrinsics +#define FIRST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAdd +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +#define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index ae753ee165cd30..7d4d4abd207b5f 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -44,6 +44,10 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVXIFMA_X64; case InstructionSet_AVXVNNI: return InstructionSet_AVXVNNI_X64; + case InstructionSet_AVXVNNIINT: + return InstructionSet_AVXVNNIINT; + case InstructionSet_AVXVNNIINT_V512: + return InstructionSet_AVXVNNIINT_V512; case InstructionSet_GFNI: return InstructionSet_GFNI_X64; case InstructionSet_SHA: @@ -145,6 +149,12 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_GFNI_V512; } + case InstructionSet_AVXVNNIINT: + case InstructionSet_AVXVNNIINT_V512: + { + return InstructionSet_AVXVNNIINT_V512; + } + default: { return InstructionSet_NONE; @@ -160,7 +170,7 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) // // Return Value: // The InstructionSet associated with className -static CORINFO_InstructionSet lookupInstructionSet(const char* className) +CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) { assert(className != nullptr); @@ -253,9 +263,26 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_AVXIFMA; } - else if (strcmp(className + 3, "Vnni") == 0) + else if (strncmp(className + 3, "Vnni", 4) == 0) { - return InstructionSet_AVXVNNI; + if (className[7] == '\0') + { + return InstructionSet_AVXVNNI; + } + else if (strncmp(className + 7, "Int", 3) == 0) + { + if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0)) + { + if (compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT)) + { + return InstructionSet_AVXVNNIINT; + } + else + { + return InstructionSet_AVXVNNIINT_V512; + } + } + } } } } @@ -386,7 +413,6 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) return InstructionSet_X86Serialize; } } - return InstructionSet_ILLEGAL; } @@ -400,9 +426,9 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) // // Return Value: // The InstructionSet associated with className and enclosingClassName -CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, - const char* innerEnclosingClassName, - const char* outerEnclosingClassName) +CORINFO_InstructionSet Compiler::lookupIsa(const char* className, + const char* innerEnclosingClassName, + const char* outerEnclosingClassName) { assert(className != nullptr); @@ -5229,6 +5255,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_AVXVNNIINT_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: + { + assert(sig->numArgs == 3); + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + CORINFO_CLASS_HANDLE argClass; + var_types argType = TYP_UNKNOWN; + + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + CorInfoType op3BaseJitType = getBaseJitTypeOfSIMDType(argClass); + GenTree* op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op3BaseJitType); + break; + } + case NI_AVX512_ExpandLoad: case NI_AVX512v3_ExpandLoad: { diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 7f0c53addd6b3b..d395369ffc1b07 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -636,6 +636,24 @@ INST3(vpdpwssd, "vpdpwssd", IUM_RW, BAD_CODE, BAD_CODE, INST3(vpdpwssds, "vpdpwssds", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x53), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation #define LAST_AVXVNNI_INSTRUCTION INS_vpdpwssds +#define FIRST_AVXVNNIINT8_INSTRUCTION INS_vpdpwsud +INST3(vpdpwsud, "vpdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwsuds, "vpdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusd, "vpdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwusds, "vpdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuud, "vpdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +INST3(vpdpwuuds, "vpdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results +#define LAST_AVXVNNIINT8_INSTRUCTION INS_vpdpwuuds + +#define FIRST_AVXVNNIINT16_INSTRUCTION INS_vpdpbssd +INST3(vpdpbssd, "vpdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbssds, "vpdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsud, "vpdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbsuds, "vpdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuud, "vpdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +INST3(vpdpbuuds, "vpdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results +#define LAST_AVXVNNIINT16_INSTRUCTION INS_vpdpbuuds + #define FIRST_AVXIFMA_INSTRUCTION INS_vpmadd52huq // Instructions for AVXIFMA INST3(vpmadd52huq, "vpmadd52huq", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB5), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply of Unsigned 52-Bit Unsigned Integers and Add High 52-Bit Products to 64-Bit Accumulators @@ -1112,18 +1130,6 @@ INST3(vminmaxss, "vminmaxss", IUM_WR, BAD_CODE, BAD_ INST3(vmovd_simd, "vmovd", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Move DWORD between xmm regs <-> memory/xmm regs INST3(vmovw_simd, "vmovw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_CODE, SSEFLTMAP(0x05, 0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move WORD between xmm regs <-> memory/xmm regs INST3(vmpsadbw, "vmpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), 4C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference -INST3(vpdpbssd, "vpdpbssd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x50), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbssds, "vpdpbssds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf2, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsud, "vpdpbsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x50), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbsuds, "vpdpbsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0x51), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuud, "vpdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpbuuds, "vpdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), 5C, 2X, INS_TT_FULL, Input_8Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results -INST3(vpdpwsud, "vpdpwsud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwsuds, "vpdpwsuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xf3, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusd, "vpdpwusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwusds, "vpdpwusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuud, "vpdpwuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD2), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results -INST3(vpdpwuuds, "vpdpwuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0xD3), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual words of first source operand with individual words of second source operand and add the results INST3(vucomxsd, "vucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags #define LAST_AVX512_INSTRUCTION INS_vucomxss diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 4c6a30de6bc50b..3c2f9c67c0f78f 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -409,6 +409,7 @@ RELEASE_CONFIG_INTEGER(EnableAES, "EnableAES", RELEASE_CONFIG_INTEGER(EnableAVX512VP2INTERSECT, "EnableAVX512VP2INTERSECT", 1) // Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVXIFMA, "EnableAVXIFMA", 1) // Allows AVXIFMA and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", 1) // Allows AVXVNNI and dependent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT, "EnableAVXVNNIINT", 1) // Allows VEX AVXVNNIINT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableGFNI, "EnableGFNI", 1) // Allows GFNI and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSHA, "EnableSHA", 1) // Allows SHA and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableVAES, "EnableVAES", 1) // Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e9b5d08e4c36b2..8a1ca074322921 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -10752,13 +10752,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVXVNNI_MultiplyWideningAndAdd: - case NI_AVXVNNI_MultiplyWideningAndAddSaturate: - { - TryMakeSrcContainedOrRegOptional(node, op3); - break; - } - case NI_AVX2_MultiplyNoFlags: case NI_AVX2_X64_MultiplyNoFlags: { @@ -10814,17 +10807,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_X86Base_DivRem: - case NI_X86Base_X64_DivRem: - { - // DIV only allows divisor (op3) in memory - TryMakeSrcContainedOrRegOptional(node, op3); - break; - } - default: { - unreached(); + assert((intrinsicId == NI_X86Base_DivRem) || (intrinsicId == NI_X86Base_X64_DivRem) || + (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512)); + TryMakeSrcContainedOrRegOptional(node, op3); break; } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 4c418118d4bbf2..8fb7ae7cfb6016 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2761,6 +2761,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVXVNNI_MultiplyWideningAndAdd: case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd: + case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate: { assert(numArgs == 3); diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 649d665ef6fbfa..9c8da6eac983f3 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -81,6 +81,7 @@ private static class XArchIntrinsicConstants public const int Vaes = (1 << 15); public const int WaitPkg = (1 << 16); public const int X86Serialize = (1 << 17); + public const int AvxVnniInt = (1 << 18); public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -99,7 +100,12 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) if ((flags & Avx10v1) != 0) builder.AddSupportedInstructionSet("avx10v1"); if ((flags & Avx10v2) != 0) + { builder.AddSupportedInstructionSet("avx10v2"); + builder.AddSupportedInstructionSet("avxvnniint_v512"); + } + if ((flags & AvxVnniInt) != 0) + builder.AddSupportedInstructionSet("avxvnniint"); if ((flags & Apx) != 0) builder.AddSupportedInstructionSet("apx"); @@ -188,6 +194,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_GFNI_X64 => Gfni, InstructionSet.X64_GFNI_V256 => (Gfni | Avx), InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), + InstructionSet.X64_AVXVNNIINT => AvxVnniInt, + InstructionSet.X64_AVXVNNIINT_V512 => Avx10v2, InstructionSet.X64_SHA => Sha, InstructionSet.X64_SHA_X64 => Sha, diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index baef3c0d54c72f..0fb2dd0f5c8c81 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -203,6 +203,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxifma"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v256"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v256"); @@ -222,6 +223,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512v3"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vp2intersect"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 47837ff594ce4d..30c9ceeefcab62 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -70,6 +70,10 @@ public enum ReadyToRunInstructionSet Zba=57, Zbb=58, Sve2=59, + AvxVnniInt8=60, + AvxVnniInt8_V512=61, + AvxVnniInt16=62, + AvxVnniInt16_V512=63, Aes_V256=64, Aes_V512=65, AvxIfma=66, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 8069076421aaa2..ef3c8b4d7e6b65 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -116,6 +116,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X64_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X64_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; default: throw new Exception("Unknown instruction set"); } @@ -170,6 +172,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X86_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8; + case InstructionSet.X86_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 3d92b216eb0012..0e3e406f989c8b 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -74,6 +74,8 @@ public enum InstructionSet X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, + X64_AVXVNNIINT = InstructionSet_X64.AVXVNNIINT, + X64_AVXVNNIINT_V512 = InstructionSet_X64.AVXVNNIINT_V512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE42_X64 = InstructionSet_X64.SSE42_X64, X64_AVX_X64 = InstructionSet_X64.AVX_X64, @@ -119,6 +121,8 @@ public enum InstructionSet X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, + X86_AVXVNNIINT = InstructionSet_X86.AVXVNNIINT, + X86_AVXVNNIINT_V512 = InstructionSet_X86.AVXVNNIINT_V512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE42_X64 = InstructionSet_X86.SSE42_X64, X86_AVX_X64 = InstructionSet_X86.AVX_X64, @@ -211,23 +215,25 @@ public enum InstructionSet_X64 VectorT128 = 26, VectorT256 = 27, VectorT512 = 28, - X86Base_X64 = 29, - SSE42_X64 = 30, - AVX_X64 = 31, - AVX2_X64 = 32, - AVX512_X64 = 33, - AVX512v2_X64 = 34, - AVX512v3_X64 = 35, - AVX10v1_X64 = 36, - AVX10v2_X64 = 37, - AES_X64 = 38, - AVX512VP2INTERSECT_X64 = 39, - AVXIFMA_X64 = 40, - AVXVNNI_X64 = 41, - GFNI_X64 = 42, - SHA_X64 = 43, - WAITPKG_X64 = 44, - X86Serialize_X64 = 45, + AVXVNNIINT = 29, + AVXVNNIINT_V512 = 30, + X86Base_X64 = 31, + SSE42_X64 = 32, + AVX_X64 = 33, + AVX2_X64 = 34, + AVX512_X64 = 35, + AVX512v2_X64 = 36, + AVX512v3_X64 = 37, + AVX10v1_X64 = 38, + AVX10v2_X64 = 39, + AES_X64 = 40, + AVX512VP2INTERSECT_X64 = 41, + AVXIFMA_X64 = 42, + AVXVNNI_X64 = 43, + GFNI_X64 = 44, + SHA_X64 = 45, + WAITPKG_X64 = 46, + X86Serialize_X64 = 47, } public enum InstructionSet_X86 @@ -262,23 +268,25 @@ public enum InstructionSet_X86 VectorT128 = 26, VectorT256 = 27, VectorT512 = 28, - X86Base_X64 = 29, - SSE42_X64 = 30, - AVX_X64 = 31, - AVX2_X64 = 32, - AVX512_X64 = 33, - AVX512v2_X64 = 34, - AVX512v3_X64 = 35, - AVX10v1_X64 = 36, - AVX10v2_X64 = 37, - AES_X64 = 38, - AVX512VP2INTERSECT_X64 = 39, - AVXIFMA_X64 = 40, - AVXVNNI_X64 = 41, - GFNI_X64 = 42, - SHA_X64 = 43, - WAITPKG_X64 = 44, - X86Serialize_X64 = 45, + AVXVNNIINT = 29, + AVXVNNIINT_V512 = 30, + X86Base_X64 = 31, + SSE42_X64 = 32, + AVX_X64 = 33, + AVX2_X64 = 34, + AVX512_X64 = 35, + AVX512v2_X64 = 36, + AVX512v3_X64 = 37, + AVX10v1_X64 = 38, + AVX10v2_X64 = 39, + AES_X64 = 40, + AVX512VP2INTERSECT_X64 = 41, + AVXIFMA_X64 = 42, + AVXVNNI_X64 = 43, + GFNI_X64 = 44, + SHA_X64 = 45, + WAITPKG_X64 = 46, + X86Serialize_X64 = 47, } public unsafe struct InstructionSetFlags : IEnumerable @@ -629,6 +637,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) @@ -692,6 +704,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) @@ -863,6 +879,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) @@ -926,6 +946,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_WAITPKG); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) @@ -1085,6 +1109,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt8", InstructionSet.X64_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt8_V512", InstructionSet.X64_AVXVNNIINT_V512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt16", InstructionSet.X64_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt16_V512", InstructionSet.X64_AVXVNNIINT_V512, true); break; case TargetArchitecture.X86: @@ -1154,6 +1182,10 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt8", InstructionSet.X86_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt8_V512", InstructionSet.X86_AVXVNNIINT_V512, true); + yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt16", InstructionSet.X86_AVXVNNIINT, true); + yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt16_V512", InstructionSet.X86_AVXVNNIINT_V512, true); break; } } @@ -1739,6 +1771,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_X86Serialize; } + case "AvxVnniInt8": + if (nestedTypeName == "V512") + { return InstructionSet.X64_AVXVNNIINT_V512; } + else + { return InstructionSet.X64_AVXVNNIINT; } + + case "AvxVnniInt16": + if (nestedTypeName == "V512") + { return InstructionSet.X64_AVXVNNIINT_V512; } + else + { return InstructionSet.X64_AVXVNNIINT; } + } break; @@ -1911,6 +1955,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "X86Serialize": { return InstructionSet.X86_X86Serialize; } + case "AvxVnniInt8": + if (nestedTypeName == "V512") + { return InstructionSet.X86_AVXVNNIINT_V512; } + else + { return InstructionSet.X86_AVXVNNIINT; } + + case "AvxVnniInt16": + if (nestedTypeName == "V512") + { return InstructionSet.X86_AVXVNNIINT_V512; } + else + { return InstructionSet.X86_AVXVNNIINT; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 5d067eb3311e1c..24c3f474ab14de 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -113,6 +113,12 @@ instructionset ,X86 , ,VectorT128 ,39 ,VectorT instructionset ,X86 , ,VectorT256 ,40 ,VectorT256 ,vectort256 instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 + +instructionset ,X86 ,AvxVnniInt8 , ,60 ,AVXVNNIINT ,avxvnniint +instructionset ,X86 ,AvxVnniInt8_V512 , ,61 ,AVXVNNIINT_V512 ,avxvnniint_v512 +instructionset ,X86 ,AvxVnniInt16 , ,62 ,AVXVNNIINT ,avxvnniint +instructionset ,X86 ,AvxVnniInt16_V512 , ,63 ,AVXVNNIINT_V512 ,avxvnniint_v512 + ; 64-bit Instruction Sets instructionset64bit,X86 ,X86Base @@ -179,6 +185,9 @@ implication ,X86 ,SHA ,X86Base implication ,X86 ,WAITPKG ,X86Base implication ,X86 ,X86Serialize ,X86Base +implication ,X86 ,AVXVNNIINT ,AVX2 +implication ,X86 ,AVXVNNIINT_V512 ,AVX10v2 + ; These synthetic ISAs need to appear after the core ISAs ; as they depend on the other implications being correct first ; otherwise they may not be disabled if the required isa is disabled diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index bff4f38957bcf6..d1a6a75e7d3efd 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1306,6 +1306,7 @@ void EEJitManager::SetCpuInfo() if (((cpuFeatures & XArchIntrinsicConstants_Avx10v2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) { CPUCompileFlags.Set(InstructionSet_AVX10v2); + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT_V512); } #if defined(TARGET_AMD64) @@ -1355,6 +1356,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_SHA); } + if (((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT)) + { + CPUCompileFlags.Set(InstructionSet_AVXVNNIINT); + } + if (((cpuFeatures & XArchIntrinsicConstants_WaitPkg) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWAITPKG)) { CPUCompileFlags.Set(InstructionSet_WAITPKG); diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 199b3c14fadb66..150b1d6a8e11cd 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2679,6 +2679,8 @@ + + @@ -2713,6 +2715,8 @@ + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs new file mode 100644 index 00000000000000..c91bbe0f481dc6 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [CLSCompliant(false)] + public abstract class AvxVnniInt16 : Avx2 + { + internal AvxVnniInt16() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + /// Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics. + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + } + + // VPDPWSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics. + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get { return false; } } + + // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPWUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs new file mode 100644 index 00000000000000..8e9aa059a84d2f --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs @@ -0,0 +1,98 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [Intrinsic] + [CLSCompliant(false)] + public abstract class AvxVnniInt16 : Avx2 + { + internal AvxVnniInt16() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics. + [Intrinsic] + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + + // VPDPWSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics. + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPWSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPWUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs new file mode 100644 index 00000000000000..19f79db01415c2 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs @@ -0,0 +1,97 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [CLSCompliant(false)] + public abstract class AvxVnniInt8 : Avx2 + { + internal AvxVnniInt8() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + } + + // VPDPBSSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics. + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get { return false; } } + + // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + + // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) { throw new PlatformNotSupportedException(); } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs new file mode 100644 index 00000000000000..20668158f87dad --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs @@ -0,0 +1,99 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to the x86 AVXVNNI hardware instructions via intrinsics. + [Intrinsic] + [CLSCompliant(false)] + public abstract class AvxVnniInt8 : Avx2 + { + internal AvxVnniInt8() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics. + [Intrinsic] + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + + // VPDPBSSD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBUUD xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSSD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBUUD ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSSDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBUUDS xmm1, xmm2, xmm3/m128 + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSSDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBUUDS ymm1, ymm2, ymm3/m256 + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics. + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAdd(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAdd(addend, left, right); + + // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst + public static Vector512 MultiplyWideningAndAddSaturate(Vector512 addend, Vector512 left, Vector512 right) => MultiplyWideningAndAddSaturate(addend, left, right); + } + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index b275bb3f925b2a..a36182d8a1d0cd 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -8014,6 +8014,76 @@ internal X64() { } } } + [System.CLSCompliantAttribute(false)] + public abstract partial class AvxVnniInt8 : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnniInt8() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + } + } + + [System.CLSCompliantAttribute(false)] + public abstract partial class AvxVnniInt16 : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnniInt16() { } + public static new bool IsSupported { get { throw null; } } + public static Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static Vector512 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512 addend, System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + } + } + [System.CLSCompliantAttribute(false)] public abstract partial class Avx512BW : System.Runtime.Intrinsics.X86.Avx512F { diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8785e7f2389461..9122f8411c9b52 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -385,6 +385,12 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } + if (((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) && // AVX-VNNI-INT8 + ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0)) // AVX-VNNI-INT16 + { + result |= XArchIntrinsicConstants_AvxVnniInt; + } + if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0) // AVX-IFMA { result |= XArchIntrinsicConstants_AvxIfma; diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 905fcc0f6fa7e5..92284d18899d7e 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -29,6 +29,7 @@ #define XArchIntrinsicConstants_Vaes (1 << 15) #define XArchIntrinsicConstants_WaitPkg (1 << 16) #define XArchIntrinsicConstants_X86Serialize (1 << 17) +#define XArchIntrinsicConstants_AvxVnniInt (1 << 18) #endif // HOST_X86 || HOST_AMD64 #if defined(HOST_ARM64) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index e2c655f3d171e2..30c92a29703334 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -1930,6 +1930,62 @@ ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx10v2.V512", ["LoadIsa"] = "Avx10v1.V512", ["Method"] = "MinMax", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Imm"] = "15", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "result[0] != -1.0 * (((Math.Abs(left[0]) > Math.Abs(right[0])) ? left[0] : right[0]))", ["ValidateRemainingResults"] = "result[i] != -1.0 * (((Math.Abs(left[i]) > Math.Abs(right[i])) ? left[i] : right[i]))"}), }; +(string templateFileName, Dictionary templateData)[] AvxVnniInt8Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), +}; + +(string templateFileName, Dictionary templateData)[] AvxVnniInt8_V512Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]", ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])", ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt8.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}), +}; + +(string templateFileName, Dictionary templateData)[] AvxVnniInt16Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16", ["LoadIsa"] = "Avx2", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), +}; + +(string templateFileName, Dictionary templateData)[] AvxVnniInt16_V512Inputs = new [] +{ + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAdd", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])", ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}), + + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}), + ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "AvxVnniInt16.V512", ["LoadIsa"] = "Avx10v2.V512", ["Method"] = "MultiplyWideningAndAddSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)", ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}), +}; + (string templateFileName, Dictionary templateData)[] Avx512F_ScalarUpperInputs = new [] { ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "GetExponentScalar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "result[0] != Avx512Verify.GetExponent(right[0])", ["ValidateRemainingResults"] = "result[i] != left[i]"}), @@ -4461,6 +4517,10 @@ bool isImmTemplate(string name) ProcessInputs("Gfni.V512", GfniV512Inputs); ProcessInputs("Avx10v2", Avx10v2Inputs); ProcessInputs("Avx10v2_V512", Avx10v2_V512Inputs); +ProcessInputs("AvxVnniInt8", AvxVnniInt8Inputs); +ProcessInputs("AvxVnniInt8_V512", AvxVnniInt8_V512Inputs); +ProcessInputs("AvxVnniInt16", AvxVnniInt16Inputs); +ProcessInputs("AvxVnniInt16_V512", AvxVnniInt16_V512Inputs); void ProcessInputs(string groupName, (string templateFileName, Dictionary templateData)[] inputs) @@ -4494,6 +4554,11 @@ void ProcessInput(StreamWriter testListFile, string groupName, (string templateF var suffix = ""; + if (input.templateFileName == "SimpleTernOpTest.template") + { + testName += $"{input.templateData["Op1VectorType"]}.{input.templateData["Op1BaseType"]}{input.templateData["Op2VectorType"]}.{input.templateData["Op2BaseType"]}.{input.templateData["Op3VectorType"]}.{input.templateData["Op3BaseType"]}"; + } + if (input.templateFileName == "SimpleUnOpConvTest.template" || input.templateFileName == "SimdScalarUnOpConvTest.template" ) { testName = $"{input.templateData["Method"]}.{input.templateData["Op1VectorType"]}{input.templateData["Op1BaseType"]}"; diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template index cd9caf6b7072b5..1b22d0b096fd40 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template @@ -20,9 +20,9 @@ namespace JIT.HardwareIntrinsics.X86 public static partial class Program { [Fact] - public static void {Method}{RetBaseType}() + public static void {Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}() { - var test = new {TemplateName}TernaryOpTest__{Method}{RetBaseType}(); + var test = new {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}(); if (test.IsSupported) { @@ -78,7 +78,7 @@ namespace JIT.HardwareIntrinsics.X86 } } - public sealed unsafe class {TemplateName}TernaryOpTest__{Method}{RetBaseType} + public sealed unsafe class {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType} { private struct TestStruct { @@ -100,7 +100,7 @@ namespace JIT.HardwareIntrinsics.X86 return testStruct; } - public void RunStructFldScenario({TemplateName}TernaryOpTest__{Method}{RetBaseType} testClass) + public void RunStructFldScenario({TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType} testClass) { var result = {Isa}.{Method}(_fld1, _fld2, _fld3); @@ -126,7 +126,7 @@ namespace JIT.HardwareIntrinsics.X86 private SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}> _dataTable; - public {TemplateName}TernaryOpTest__{Method}{RetBaseType}() + public {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}() { Succeeded = true; diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs new file mode 100644 index 00000000000000..f79ba7132aee3a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt16 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt16SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt16.IsSupported) + { + Console.WriteLine("AvxVnniInt16 supported"); + } + else { + Console.WriteLine("AvxVnniInt16 not supported"); + } + if (AvxVnniInt16.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt16_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt16_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj new file mode 100644 index 00000000000000..b0e0c15535b643 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj new file mode 100644 index 00000000000000..37af53d8b83004 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj new file mode 100644 index 00000000000000..a2dd5040f106b9 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj new file mode 100644 index 00000000000000..47301f1344bf55 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs new file mode 100644 index 00000000000000..7c9a1f9d2418d6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt16 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs new file mode 100644 index 00000000000000..a3faf347bcb7b8 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt16_V512 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt16_V512SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt16.IsSupported) + { + Console.WriteLine("AvxVnniInt16 supported"); + } + else { + Console.WriteLine("AvxVnniInt16 not supported"); + } + if (AvxVnniInt16.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt16_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt16_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj new file mode 100644 index 00000000000000..913145750c6879 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj new file mode 100644 index 00000000000000..d301293f0763ea --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj new file mode 100644 index 00000000000000..05c40b2a2ec0a2 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj new file mode 100644 index 00000000000000..4b159ee2cdd3f0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt16_V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs new file mode 100644 index 00000000000000..52895c8b02fdf5 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt16_V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs new file mode 100644 index 00000000000000..551b151f79e056 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs @@ -0,0 +1,56 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt8 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getMWA(Vector128 v1, Vector128 v2, Vector128 v3) + { + return AvxVnniInt8.MultiplyWideningAndAdd(v1, v2, v3); + } + + [Fact] + public static unsafe void AvxVnniInt8SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt8.IsSupported) + { + Console.WriteLine("AvxVnniInt8 supported"); + } + else { + Console.WriteLine("AvxVnniInt8 not supported"); + } + if (AvxVnniInt8.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt8_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt8_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj new file mode 100644 index 00000000000000..f5a1b8ec79165a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj new file mode 100644 index 00000000000000..19edfb7a1e4a3e --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj new file mode 100644 index 00000000000000..4860476c221996 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj new file mode 100644 index 00000000000000..85c963490ba6e8 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs new file mode 100644 index 00000000000000..e20f252e9e9cb4 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt8 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs new file mode 100644 index 00000000000000..ce0f11ec035755 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; +using Xunit; + +namespace IntelHardwareIntrinsicTest._AvxVnniInt8_V512 +{ + public partial class Program + { + const float EPS = Single.Epsilon * 5; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector128 getAbs128(Vector128 val) + { + return Avx10v2.Abs(val); + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static Vector256 getAbs256(Vector256 val) + { + return Avx10v2.Abs(val); + } + + [Fact] + public static unsafe void AvxVnniInt8_V512SampleTest () + { + Console.WriteLine("Test executed"); + if (AvxVnniInt8.IsSupported) + { + Console.WriteLine("AvxVnniInt8 supported"); + } + else { + Console.WriteLine("AvxVnniInt8 not supported"); + } + if (AvxVnniInt8.V512.IsSupported) + { + Console.WriteLine("AvxVnniInt8_V512 supported"); + } + else { + Console.WriteLine("AvxVnniInt8_V512 not supported"); + } + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj new file mode 100644 index 00000000000000..9dad95ac6905a3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj new file mode 100644 index 00000000000000..1a9d7fa07349a6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj new file mode 100644 index 00000000000000..bf04181f2eed27 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj new file mode 100644 index 00000000000000..44720c47ad4db3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_AvxVnniInt8_V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs new file mode 100644 index 00000000000000..f1910642eed9ec --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._AvxVnniInt8_V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index afbe2b6a18c21d..3083d9f6c583cf 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -79,9 +79,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; bool? ExpectedAvxIfma = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV256 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV256 = false; @@ -121,9 +124,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; bool? ExpectedAvxIfma = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV256 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV256 = false; @@ -148,6 +154,7 @@ static int Main() bool? ExpectedPclmulqdq = null; bool? ExpectedAvxIfma = null; bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedSha = null; @@ -168,7 +175,9 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; @@ -208,9 +217,12 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; bool? ExpectedAvxIfma = false; bool? ExpectedAvxVnni = false; + bool? ExpectedAvxVnniInt = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; @@ -232,6 +244,7 @@ static int Main() bool? ExpectedPclmulqdq = null; bool? ExpectedAvxIfma = null; bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedSha = null; @@ -252,7 +265,9 @@ static int Main() bool? ExpectedAvx512Fp16 = false; bool? ExpectedAvx10v1 = false; bool? ExpectedAvx10v1V512 = false; + bool? ExpectedAvx10v2 = false; bool? ExpectedAvx512Vp2intersect = false; + bool? ExpectedAvxVnniIntV512 = false; bool? ExpectedGfniV512 = false; bool? ExpectedAesV512 = false; bool? ExpectedPclmulqdqV512 = false; @@ -282,11 +297,14 @@ static int Main() bool? ExpectedAvx512Fp16 = null; bool? ExpectedAvx10v1 = null; bool? ExpectedAvx10v1V512 = null; + bool? ExpectedAvx10v2 = null; bool? ExpectedAes = null; bool? ExpectedPclmulqdq = null; bool? ExpectedAvx512Vp2intersect = null; bool? ExpectedAvxIfma = null; bool? ExpectedAvxVnni = null; + bool? ExpectedAvxVnniInt = null; + bool? ExpectedAvxVnniIntV512 = null; bool? ExpectedGfni = null; bool? ExpectedGfniV256 = null; bool? ExpectedGfniV512 = null; @@ -297,6 +315,50 @@ static int Main() bool? ExpectedPclmulqdqV512 = null; bool? ExpectedWaitPkg = null; bool? ExpectedX86Serialize = null; +#elif AVX10v2_INTRINSICS + bool? ExpectedSse3 = true; + bool? ExpectedSsse3 = true; + bool? ExpectedSse41 = true; + bool? ExpectedSse42 = true; + bool? ExpectedPopcnt = true; + bool? ExpectedAvx = true; + bool? ExpectedAvx2 = true; + bool? ExpectedBmi1 = true; + bool? ExpectedBmi2 = true; + bool? ExpectedF16c = true; + bool? ExpectedFma = true; + bool? ExpectedLzcnt = true; + bool? ExpectedAvx512F = true; + bool? ExpectedAvx512BW = true; + bool? ExpectedAvx512CD = true; + bool? ExpectedAvx512DQ = true; + bool? ExpectedAvx512Vbmi = true; + bool? ExpectedAvx512Bitalg = true; + bool? ExpectedAvx512Vbmi2 = true; + bool? ExpectedAvx512Vpopcntdq = true; + bool? ExpectedAvx512Bf16 = true; + bool? ExpectedAvx512Fp16 = true; + bool? ExpectedAvx10v1 = true; + bool? ExpectedAvx10v1V512 = true; + bool? ExpectedAvx10v2 = true; + bool? ExpectedAvxVnni = true; + bool? ExpectedAvxVnniIntV512 = true; + + bool? ExpectedAes = null; + bool? ExpectedPclmulqdq = null; + bool? ExpectedAvx512Vp2intersect = null; + bool? ExpectedAvxIfma = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = null; + bool? ExpectedSha = null; + bool? ExpectedAesV256 = null; + bool? ExpectedAesV512 = null; + bool? ExpectedPclmulqdqV256 = null; + bool? ExpectedPclmulqdqV512 = null; + bool? ExpectedWaitPkg = null; + bool? ExpectedX86Serialize = null; + bool? ExpectedAvxVnniInt = null; #else #error Who dis? #endif @@ -366,6 +428,13 @@ static int Main() Check("Lzcnt", ExpectedLzcnt, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32); Check("Lzcnt.X64", ExpectedLzcnt, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64); + Check("AvxVnniInt", ExpectedAvxVnniInt, &AvxVnniIntIsSupported, AvxVnniInt8.IsSupported, () => AvxVnniInt8.MultiplyWideningAndAdd(Vector128.Zero, Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + + Check("AvxVnniIntV512", ExpectedAvxVnniIntV512, &AvxVnniIntV512IsSupported, AvxVnniInt16.V512.IsSupported, () => AvxVnniInt16.V512.MultiplyWideningAndAdd(Vector512.Zero, Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); + + Check("Avx10v2", ExpectedAvx10v2, &Avx10v2IsSupported, Avx10v2.IsSupported, () => Avx10v2.MinMax(Vector128.Zero, Vector128.Zero, 0x00).Equals(Vector128.Zero)); + Check("Avx10v2.X64", ExpectedAvx10v2, &Avx10v2X64IsSupported, Avx10v2.X64.IsSupported, null); + Check("Avx512F", ExpectedAvx512F, &Avx512FIsSupported, Avx512F.IsSupported, () => Avx512F.Abs(Vector512.Zero).Equals(Vector512.Zero)); Check("Avx512F.VL", ExpectedAvx512F, &Avx512FVLIsSupported, Avx512F.VL.IsSupported, null); Check("Avx512F.X64", ExpectedAvx512F, &Avx512FX64IsSupported, Avx512F.X64.IsSupported, null); @@ -543,6 +612,8 @@ static int Main() static bool Avx10v1X64IsSupported() => Avx10v1.X64.IsSupported; static bool Avx10v1V512IsSupported() => Avx10v1.V512.IsSupported; static bool Avx10v1V512X64IsSupported() => Avx10v1.V512.X64.IsSupported; + static bool Avx10v2IsSupported() => Avx10v2.IsSupported; + static bool Avx10v2X64IsSupported() => Avx10v2.X64.IsSupported; static bool AesIsSupported() => Aes.IsSupported; static bool AesX64IsSupported() => Aes.X64.IsSupported; @@ -559,6 +630,8 @@ static int Main() static bool AvxVnniIsSupported() => AvxVnni.IsSupported; static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported; + static bool AvxVnniIntIsSupported() => AvxVnniInt8.IsSupported; + static bool AvxVnniIntV512IsSupported() => AvxVnniInt16.V512.IsSupported; static bool GfniIsSupported() => Gfni.IsSupported; static bool GfniV256IsSupported() => Gfni.V256.IsSupported; diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj new file mode 100644 index 00000000000000..4e8381cac23f00 --- /dev/null +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj @@ -0,0 +1,31 @@ + + + Exe + 0 + true + + true + + true + true + $(DefineConstants);AVX10v2_INTRINSICS;VECTORT512_INTRINSICS + true + false + + + + + + + + + + + + + +