diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 51875aadd7420c..6f72d0a9094df3 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -684,6 +684,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES,                    W("EnableAES"),
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VP2INTERSECT,     W("EnableAVX512VP2INTERSECT"),  1, "Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXIFMA,                W("EnableAVXIFMA"),             1, "Allows AVXIFMA and dependent hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI,                W("EnableAVXVNNI"),             1, "Allows AVXVNNI and dependent hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNIINT,             W("EnableAVXVNNIINT"),          1, "Allows VEX versions (AVXVNNI8 & AVXVNNIINT16) hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI,                   W("EnableGFNI"),                1, "Allows GFNI and dependent hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSHA,                    W("EnableSHA"),                 1, "Allows SHA and dependent hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVAES,                   W("EnableVAES"),                1, "Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled")
diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h
index 68826ed36392ed..9d2f2aa9c482f6 100644
--- a/src/coreclr/inc/corinfoinstructionset.h
+++ b/src/coreclr/inc/corinfoinstructionset.h
@@ -78,23 +78,25 @@ enum CORINFO_InstructionSet
     InstructionSet_VectorT128=26,
     InstructionSet_VectorT256=27,
     InstructionSet_VectorT512=28,
-    InstructionSet_X86Base_X64=29,
-    InstructionSet_SSE42_X64=30,
-    InstructionSet_AVX_X64=31,
-    InstructionSet_AVX2_X64=32,
-    InstructionSet_AVX512_X64=33,
-    InstructionSet_AVX512v2_X64=34,
-    InstructionSet_AVX512v3_X64=35,
-    InstructionSet_AVX10v1_X64=36,
-    InstructionSet_AVX10v2_X64=37,
-    InstructionSet_AES_X64=38,
-    InstructionSet_AVX512VP2INTERSECT_X64=39,
-    InstructionSet_AVXIFMA_X64=40,
-    InstructionSet_AVXVNNI_X64=41,
-    InstructionSet_GFNI_X64=42,
-    InstructionSet_SHA_X64=43,
-    InstructionSet_WAITPKG_X64=44,
-    InstructionSet_X86Serialize_X64=45,
+    InstructionSet_AVXVNNIINT=29,
+    InstructionSet_AVXVNNIINT_V512=30,
+    InstructionSet_X86Base_X64=31,
+    InstructionSet_SSE42_X64=32,
+    InstructionSet_AVX_X64=33,
+    InstructionSet_AVX2_X64=34,
+    InstructionSet_AVX512_X64=35,
+    InstructionSet_AVX512v2_X64=36,
+    InstructionSet_AVX512v3_X64=37,
+    InstructionSet_AVX10v1_X64=38,
+    InstructionSet_AVX10v2_X64=39,
+    InstructionSet_AES_X64=40,
+    InstructionSet_AVX512VP2INTERSECT_X64=41,
+    InstructionSet_AVXIFMA_X64=42,
+    InstructionSet_AVXVNNI_X64=43,
+    InstructionSet_GFNI_X64=44,
+    InstructionSet_SHA_X64=45,
+    InstructionSet_WAITPKG_X64=46,
+    InstructionSet_X86Serialize_X64=47,
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
     InstructionSet_X86Base=1,
@@ -125,23 +127,25 @@ enum CORINFO_InstructionSet
     InstructionSet_VectorT128=26,
     InstructionSet_VectorT256=27,
     InstructionSet_VectorT512=28,
-    InstructionSet_X86Base_X64=29,
-    InstructionSet_SSE42_X64=30,
-    InstructionSet_AVX_X64=31,
-    InstructionSet_AVX2_X64=32,
-    InstructionSet_AVX512_X64=33,
-    InstructionSet_AVX512v2_X64=34,
-    InstructionSet_AVX512v3_X64=35,
-    InstructionSet_AVX10v1_X64=36,
-    InstructionSet_AVX10v2_X64=37,
-    InstructionSet_AES_X64=38,
-    InstructionSet_AVX512VP2INTERSECT_X64=39,
-    InstructionSet_AVXIFMA_X64=40,
-    InstructionSet_AVXVNNI_X64=41,
-    InstructionSet_GFNI_X64=42,
-    InstructionSet_SHA_X64=43,
-    InstructionSet_WAITPKG_X64=44,
-    InstructionSet_X86Serialize_X64=45,
+    InstructionSet_AVXVNNIINT=29,
+    InstructionSet_AVXVNNIINT_V512=30,
+    InstructionSet_X86Base_X64=31,
+    InstructionSet_SSE42_X64=32,
+    InstructionSet_AVX_X64=33,
+    InstructionSet_AVX2_X64=34,
+    InstructionSet_AVX512_X64=35,
+    InstructionSet_AVX512v2_X64=36,
+    InstructionSet_AVX512v3_X64=37,
+    InstructionSet_AVX10v1_X64=38,
+    InstructionSet_AVX10v2_X64=39,
+    InstructionSet_AES_X64=40,
+    InstructionSet_AVX512VP2INTERSECT_X64=41,
+    InstructionSet_AVXIFMA_X64=42,
+    InstructionSet_AVXVNNI_X64=43,
+    InstructionSet_GFNI_X64=44,
+    InstructionSet_SHA_X64=45,
+    InstructionSet_WAITPKG_X64=46,
+    InstructionSet_X86Serialize_X64=47,
 #endif // TARGET_X86
 
 };
@@ -503,6 +507,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_WAITPKG);
         if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
             resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
+        if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+            resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT);
+        if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2))
+            resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512);
         if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
             resultflags.RemoveInstructionSet(InstructionSet_Vector128);
         if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
@@ -565,6 +573,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_WAITPKG);
         if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
             resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
+        if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+            resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT);
+        if (resultflags.HasInstructionSet(InstructionSet_AVXVNNIINT_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2))
+            resultflags.RemoveInstructionSet(InstructionSet_AVXVNNIINT_V512);
         if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
             resultflags.RemoveInstructionSet(InstructionSet_Vector128);
         if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
@@ -747,6 +759,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "VectorT256";
         case InstructionSet_VectorT512 :
             return "VectorT512";
+        case InstructionSet_AVXVNNIINT :
+            return "AVXVNNIINT";
+        case InstructionSet_AVXVNNIINT_V512 :
+            return "AVXVNNIINT_V512";
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
         case InstructionSet_X86Base :
@@ -805,6 +821,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "VectorT256";
         case InstructionSet_VectorT512 :
             return "VectorT512";
+        case InstructionSet_AVXVNNIINT :
+            return "AVXVNNIINT";
+        case InstructionSet_AVXVNNIINT_V512 :
+            return "AVXVNNIINT_V512";
 #endif // TARGET_X86
 
         default:
@@ -909,6 +929,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
         case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
         case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
+        case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT;
+        case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT_V512;
+        case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT;
+        case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT_V512;
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
         case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
@@ -974,6 +998,10 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
         case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
         case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
+        case READYTORUN_INSTRUCTION_AvxVnniInt8: return InstructionSet_AVXVNNIINT;
+        case READYTORUN_INSTRUCTION_AvxVnniInt8_V512: return InstructionSet_AVXVNNIINT_V512;
+        case READYTORUN_INSTRUCTION_AvxVnniInt16: return InstructionSet_AVXVNNIINT;
+        case READYTORUN_INSTRUCTION_AvxVnniInt16_V512: return InstructionSet_AVXVNNIINT_V512;
 #endif // TARGET_X86
 
         default:
diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
index 785e4e2a854ab7..43c4c32332df8d 100644
--- a/src/coreclr/inc/jiteeversionguid.h
+++ b/src/coreclr/inc/jiteeversionguid.h
@@ -37,11 +37,11 @@
 
 #include <minipal/guid.h>
 
-constexpr GUID JITEEVersionIdentifier = { /* ce8cef5e-261f-469a-b599-9f3f3e8b2448 */
-    0xce8cef5e,
-    0x261f,
-    0x469a,
-    {0xb5, 0x99, 0x9f, 0x3f, 0x3e, 0x8b, 0x24, 0x48}
+constexpr GUID JITEEVersionIdentifier = { /* 5c7eb9f1-a9cb-4a35-aea6-ae93d1f54c56 */
+    0x5c7eb9f1,
+    0xa9cb,
+    0x4a35,
+    {0xae, 0xa6, 0xae, 0x93, 0xd1, 0xf5, 0x4c, 0x56}
   };
 
 #endif // JIT_EE_VERSIONING_GUID_H
diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h
index 01f92e168c6b39..ee9e5fdc443702 100644
--- a/src/coreclr/inc/readytoruninstructionset.h
+++ b/src/coreclr/inc/readytoruninstructionset.h
@@ -67,6 +67,10 @@ enum ReadyToRunInstructionSet
     READYTORUN_INSTRUCTION_Zba=57,
     READYTORUN_INSTRUCTION_Zbb=58,
     READYTORUN_INSTRUCTION_Sve2=59,
+    READYTORUN_INSTRUCTION_AvxVnniInt8=60,
+    READYTORUN_INSTRUCTION_AvxVnniInt8_V512=61,
+    READYTORUN_INSTRUCTION_AvxVnniInt16=62,
+    READYTORUN_INSTRUCTION_AvxVnniInt16_V512=63,
     READYTORUN_INSTRUCTION_Aes_V256=64,
     READYTORUN_INSTRUCTION_Aes_V512=65,
     READYTORUN_INSTRUCTION_AvxIfma=66,
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 36e35ba3ecc190..9a2305d3eefb1c 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -9604,6 +9604,13 @@ class Compiler
         return false;
     }
 
+#ifdef FEATURE_HW_INTRINSICS
+    CORINFO_InstructionSet lookupInstructionSet(const char* className);
+    CORINFO_InstructionSet lookupIsa(const char* className,
+                                     const char* innerEnclosingClassName,
+                                     const char* outerEnclosingClassName);
+#endif // FEATURE_HW_INTRINSICS
+
 #ifdef DEBUG
     // Answer the question: Is a particular ISA supported?
     // Use this api when asking the question so that future
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 11ccf43e10cca0..b0a65624163f0e 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -90,6 +90,17 @@ bool emitter::IsApxOnlyInstruction(instruction ins)
     return (ins >= FIRST_APX_INSTRUCTION) && (ins <= LAST_APX_INSTRUCTION);
 }
 
+bool emitter::IsAVXVNNIFamilyInstruction(instruction ins)
+{
+    return (ins >= FIRST_AVXVNNI_INSTRUCTION && ins <= LAST_AVXVNNI_INSTRUCTION) || IsAVXVNNIINTInstruction(ins);
+}
+
+bool emitter::IsAVXVNNIINTInstruction(instruction ins)
+{
+    return (ins >= FIRST_AVXVNNIINT8_INSTRUCTION && ins <= LAST_AVXVNNIINT8_INSTRUCTION) ||
+           (ins >= FIRST_AVXVNNIINT16_INSTRUCTION && ins <= LAST_AVXVNNIINT16_INSTRUCTION);
+}
+
 bool emitter::Is3OpRmwInstruction(instruction ins)
 {
     switch (ins)
@@ -113,7 +124,7 @@ bool emitter::Is3OpRmwInstruction(instruction ins)
         default:
         {
             return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) ||
-                   ((ins >= FIRST_AVXVNNI_INSTRUCTION) && (ins <= LAST_AVXVNNI_INSTRUCTION)) ||
+                   (IsAVXVNNIFamilyInstruction(ins)) ||
                    ((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION));
         }
     }
@@ -278,6 +289,23 @@ bool emitter::IsVexEncodableInstruction(instruction ins) const
             return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNI);
         }
 
+        case INS_vpdpwsud:
+        case INS_vpdpwsuds:
+        case INS_vpdpwusd:
+        case INS_vpdpwusds:
+        case INS_vpdpwuud:
+        case INS_vpdpwuuds:
+        case INS_vpdpbssd:
+        case INS_vpdpbssds:
+        case INS_vpdpbsud:
+        case INS_vpdpbsuds:
+        case INS_vpdpbuud:
+        case INS_vpdpbuuds:
+        {
+            // Vex versions of AvxVnniInt8 + AvxVnniInt16
+            return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT);
+        }
+
         case INS_vpmadd52huq:
         case INS_vpmadd52luq:
         {
@@ -325,6 +353,23 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const
             return emitComp->compSupportsHWIntrinsic(InstructionSet_AES_V512);
         }
 
+        case INS_vpdpwsud:
+        case INS_vpdpwsuds:
+        case INS_vpdpwusd:
+        case INS_vpdpwusds:
+        case INS_vpdpwuud:
+        case INS_vpdpwuuds:
+        case INS_vpdpbssd:
+        case INS_vpdpbssds:
+        case INS_vpdpbsud:
+        case INS_vpdpbsuds:
+        case INS_vpdpbuud:
+        case INS_vpdpbuuds:
+        {
+            // Evex versions of AvxVnniInt8 + AvxVnniInt16 will be supported
+            return emitComp->compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT_V512);
+        }
+
         case INS_vpdpbusd:
         case INS_vpdpwssd:
         case INS_vpdpbusds:
@@ -2928,7 +2973,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
         if (sizePrefix == 0)
         {
             // no simd prefix for EVEX2 - AVX10.2 and above
-            assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2));
+            assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) ||
+                   emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT) ||
+                   emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT_V512));
         }
         else if (isPrefix(sizePrefix))
         {
@@ -3139,7 +3186,14 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con
         // check for a prefix in the 11 position
         BYTE sizePrefix = (code >> 16) & 0xFF;
 
-        if ((sizePrefix != 0) && isPrefix(sizePrefix))
+        if (sizePrefix == 0)
+        {
+            // no simd prefix for Avx-Vnni-Int* ISAs subset of instructions
+            // INS_vpdpbuud[,s], INS_vpdpwuud[,s]
+            assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT) ||
+                   emitComp->compIsaSupportedDebugOnly(InstructionSet_AVXVNNIINT_V512));
+        }
+        else if (isPrefix(sizePrefix))
         {
             // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
             //
@@ -3209,23 +3263,27 @@ emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) con
                     unreached();
                 }
             }
+        }
+        else
+        {
+            unreached();
+        }
 
-            // Now the byte in the 22 position must be an escape byte 0F
-            leadingBytes = check;
-            assert(leadingBytes == 0x0F);
+        // Now the byte in the 22 position must be an escape byte 0F
+        leadingBytes = check;
+        assert(leadingBytes == 0x0F);
 
-            // Get rid of both sizePrefix and escape byte
-            code &= 0x0000FFFFLL;
+        // Get rid of both sizePrefix and escape byte
+        code &= 0x0000FFFFLL;
 
-            // Check the byte in the 33 position to see if it is 3A or 38.
-            // In such a case escape bytes must be 0x0F3A or 0x0F38
-            check = code & 0xFF;
+        // Check the byte in the 33 position to see if it is 3A or 38.
+        // In such a case escape bytes must be 0x0F3A or 0x0F38
+        check = code & 0xFF;
 
-            if ((check == 0x3A) || (check == 0x38))
-            {
-                leadingBytes = (leadingBytes << 8) | check;
-                code &= 0x0000FF00LL;
-            }
+        if ((check == 0x3A) || (check == 0x38))
+        {
+            leadingBytes = (leadingBytes << 8) | check;
+            code &= 0x0000FF00LL;
         }
     }
     else
@@ -4378,7 +4436,7 @@ bool emitter::EncodedBySSE38orSSE3A(instruction ins) const
 
 #if defined(DEBUG)
     insCode = (insCode >> 16) & 0xFF;
-    assert((insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3));
+    assert((insCode == 0x00) || (insCode == 0x66) || (insCode == 0xF2) || (insCode == 0xF3));
 #endif // DEBUG
 
     return true;
@@ -18083,7 +18141,8 @@ ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspI
     {
         case INS_TT_FULL:
         {
-            assert(inputSize == 4 || inputSize == 8);
+            instruction ins = id->idIns();
+            assert((inputSize == 4 || inputSize == 8) || IsAVXVNNIINTInstruction(ins));
             if (HasEmbeddedBroadcast(id))
             {
                 // N = input size in bytes
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index b5ce73f6a48a2d..2074cb5da3ff70 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -121,6 +121,8 @@ static bool IsSSEOrAVXInstruction(instruction ins);
 static bool IsAVXOnlyInstruction(instruction ins);
 static bool IsAvx512OnlyInstruction(instruction ins);
 static bool IsKMOVInstruction(instruction ins);
+static bool IsAVXVNNIFamilyInstruction(instruction ins);
+static bool IsAVXVNNIINTInstruction(instruction ins);
 static bool Is3OpRmwInstruction(instruction ins);
 static bool IsBMIInstruction(instruction ins);
 static bool IsKInstruction(instruction ins);
diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp
index fbb9b984bd4e06..d7be0beb08da26 100644
--- a/src/coreclr/jit/hwintrinsic.cpp
+++ b/src/coreclr/jit/hwintrinsic.cpp
@@ -930,6 +930,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
     { NI_Illegal, NI_Illegal },                                 //      VectorT128
     { NI_Illegal, NI_Illegal },                                 //      VectorT256
     { NI_Illegal, NI_Illegal },                                 //      VectorT512
+    { FIRST_NI_AVXVNNIINT, LAST_NI_AVXVNNIINT },                // AVXVNNIINT
+    { FIRST_NI_AVXVNNIINT_V512, LAST_NI_AVXVNNIINT_V512 },      // AVXVNNIINT_V512
 
     { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 },              // X86Base_X64
     { FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 },                  // SSE42_X64
@@ -1180,7 +1182,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler*         comp,
         return NI_Illegal;
     }
 
-    CORINFO_InstructionSet isa = lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName);
+    CORINFO_InstructionSet isa = comp->lookupIsa(className, innerEnclosingClassName, outerEnclosingClassName);
 
     if (isa == InstructionSet_ILLEGAL)
     {
diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h
index 10bedd930b5aa5..6cacd4a1ae17f1 100644
--- a/src/coreclr/jit/hwintrinsic.h
+++ b/src/coreclr/jit/hwintrinsic.h
@@ -525,15 +525,12 @@ struct HWIntrinsicInfo
 
     static const HWIntrinsicInfo& lookup(NamedIntrinsic id);
 
-    static NamedIntrinsic         lookupId(Compiler*         comp,
-                                           CORINFO_SIG_INFO* sig,
-                                           const char*       className,
-                                           const char*       methodName,
-                                           const char*       innerEnclosingClassName,
-                                           const char*       outerEnclosingClassName);
-    static CORINFO_InstructionSet lookupIsa(const char* className,
-                                            const char* innerEnclosingClassName,
-                                            const char* outerEnclosingClassName);
+    static NamedIntrinsic lookupId(Compiler*         comp,
+                                   CORINFO_SIG_INFO* sig,
+                                   const char*       className,
+                                   const char*       methodName,
+                                   const char*       innerEnclosingClassName,
+                                   const char*       outerEnclosingClassName);
 
     static unsigned lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig);
 
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index 8db572f6bbc54f..efa6c434d0e6e2 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -51,7 +51,7 @@ static CORINFO_InstructionSet Arm64VersionOfIsa(CORINFO_InstructionSet isa)
 //
 // Return Value:
 //    The InstructionSet associated with className
-static CORINFO_InstructionSet lookupInstructionSet(const char* className)
+CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className)
 {
     assert(className != nullptr);
 
@@ -136,9 +136,9 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
 // Return Value:
 //    The InstructionSet associated with className and enclosingClassName
 //
-CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className,
-                                                  const char* innerEnclosingClassName,
-                                                  const char* outerEnclosingClassName)
+CORINFO_InstructionSet Compiler::lookupIsa(const char* className,
+                                           const char* innerEnclosingClassName,
+                                           const char* outerEnclosingClassName)
 {
     assert(className != nullptr);
 
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 1e7f79a6aa1334..6d255a1295315c 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -1018,6 +1018,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
         case InstructionSet_AVX512:
         case InstructionSet_AVX512_X64:
         case InstructionSet_AVX512v2:
+        case InstructionSet_AVXVNNIINT:
+        case InstructionSet_AVXVNNIINT_V512:
         {
             genAvxFamilyIntrinsic(node, instOptions);
             break;
@@ -3485,6 +3487,176 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
             break;
         }
 
+        case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate:
+        case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate:
+        {
+            GenTree* op2 = node->Op(2);
+            GenTree* op3 = node->Op(3);
+
+            op1Reg           = op1->GetRegNum();
+            regNumber op2Reg = op2->GetRegNum();
+            assert(targetReg != REG_NA);
+            assert(op1Reg != REG_NA);
+            assert(op2Reg != REG_NA);
+
+            var_types op3Type = node->GetAuxiliaryType();
+            switch (baseType)
+            {
+                case TYP_UBYTE:
+                {
+                    ins = INS_vpdpbuuds;
+                    break;
+                }
+
+                case TYP_BYTE:
+                {
+                    switch (op3Type)
+                    {
+                        case TYP_UBYTE:
+                        {
+                            ins = INS_vpdpbsuds;
+                            break;
+                        }
+
+                        case TYP_BYTE:
+                        {
+                            ins = INS_vpdpbssds;
+                            break;
+                        }
+
+                        default:
+                        {
+                            unreached();
+                        }
+                    }
+                    break;
+                }
+
+                case TYP_SHORT:
+                {
+                    ins = INS_vpdpwsuds;
+                    break;
+                }
+
+                case TYP_USHORT:
+                {
+                    switch (op3Type)
+                    {
+                        case TYP_USHORT:
+                        {
+                            ins = INS_vpdpwuuds;
+                            break;
+                        }
+
+                        case TYP_SHORT:
+                        {
+                            ins = INS_vpdpwusds;
+                            break;
+                        }
+
+                        default:
+                        {
+                            unreached();
+                        }
+                    }
+                    break;
+                }
+
+                default:
+                {
+                    unreached();
+                }
+            }
+
+            genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, instOptions);
+            break;
+        }
+
+        case NI_AVXVNNIINT_MultiplyWideningAndAdd:
+        case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd:
+        {
+            GenTree* op2 = node->Op(2);
+            GenTree* op3 = node->Op(3);
+
+            op1Reg           = op1->GetRegNum();
+            regNumber op2Reg = op2->GetRegNum();
+            assert(targetReg != REG_NA);
+            assert(op1Reg != REG_NA);
+            assert(op2Reg != REG_NA);
+
+            var_types op3Type = node->GetAuxiliaryType();
+            switch (baseType)
+            {
+                case TYP_UBYTE:
+                {
+                    ins = INS_vpdpbuud;
+                    break;
+                }
+
+                case TYP_BYTE:
+                {
+                    switch (op3Type)
+                    {
+                        case TYP_UBYTE:
+                        {
+                            ins = INS_vpdpbsud;
+                            break;
+                        }
+
+                        case TYP_BYTE:
+                        {
+                            ins = INS_vpdpbssd;
+                            break;
+                        }
+
+                        default:
+                        {
+                            unreached();
+                        }
+                    }
+                    break;
+                }
+
+                case TYP_SHORT:
+                {
+                    ins = INS_vpdpwsud;
+                    break;
+                }
+
+                case TYP_USHORT:
+                {
+                    switch (op3Type)
+                    {
+                        case TYP_USHORT:
+                        {
+                            ins = INS_vpdpwuud;
+                            break;
+                        }
+
+                        case TYP_SHORT:
+                        {
+                            ins = INS_vpdpwusd;
+                            break;
+                        }
+
+                        default:
+                        {
+                            unreached();
+                        }
+                    }
+                    break;
+                }
+
+                default:
+                {
+                    unreached();
+                }
+            }
+
+            genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3, instOptions);
+            break;
+        }
+
         default:
             unreached();
             break;
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 39223ca652ecf1..bcde3fb59fad2e 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -1106,6 +1106,27 @@ HARDWARE_INTRINSIC(AVXVNNI,         MultiplyWideningAndAdd,
 HARDWARE_INTRINSIC(AVXVNNI,         MultiplyWideningAndAddSaturate,             -1,              3,     {INS_invalid,           INS_vpdpbusds,           INS_vpdpwssds,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics)
 #define LAST_NI_AVXVNNI             NI_AVXVNNI_MultiplyWideningAndAddSaturate
 
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                                                    SIMD size       NumArg                                                                                                                            Instructions                                                                                                                  Category                            Flags
+//                                                                                                                          {TYP_BYTE,            TYP_UBYTE,              TYP_SHORT,            TYP_USHORT,           TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  AVXVNNIINT Intrinsics
+#define FIRST_NI_AVXVNNIINT        NI_AVXVNNIINT_MultiplyWideningAndAdd
+HARDWARE_INTRINSIC(AVXVNNIINT,     MultiplyWideningAndAdd,                                          -1,             3,     {INS_invalid,          INS_invalid,           INS_invalid,           INS_invalid,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,        INS_invalid},                 HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(AVXVNNIINT,     MultiplyWideningAndAddSaturate,                                  -1,             3,     {INS_invalid,          INS_invalid,           INS_invalid,           INS_invalid,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,        INS_invalid},                 HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+#define LAST_NI_AVXVNNIINT         NI_AVXVNNIINT_MultiplyWideningAndAddSaturate
+
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA                  Function name                                                    SIMD size       NumArg                                                                                                                            Instructions                                                                                                                  Category                            Flags
+//                                                                                                                              {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  AVXVNNIINT_V512 Intrinsics
+#define FIRST_NI_AVXVNNIINT_V512        NI_AVXVNNIINT_V512_MultiplyWideningAndAdd
+HARDWARE_INTRINSIC(AVXVNNIINT_V512,     MultiplyWideningAndAdd,                                          -1,             3,     {INS_invalid,          INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,        INS_invalid},                 HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(AVXVNNIINT_V512,     MultiplyWideningAndAddSaturate,                                  -1,             3,     {INS_invalid,          INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,        INS_invalid},                 HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+#define LAST_NI_AVXVNNIINT_V512         NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate
+
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                               SIMD size       NumArg                                                                                                                            Instructions                                                                                                                  Category                            Flags
 //                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index ae753ee165cd30..7d4d4abd207b5f 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -44,6 +44,10 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
             return InstructionSet_AVXIFMA_X64;
         case InstructionSet_AVXVNNI:
             return InstructionSet_AVXVNNI_X64;
+        case InstructionSet_AVXVNNIINT:
+            return InstructionSet_AVXVNNIINT;
+        case InstructionSet_AVXVNNIINT_V512:
+            return InstructionSet_AVXVNNIINT_V512;
         case InstructionSet_GFNI:
             return InstructionSet_GFNI_X64;
         case InstructionSet_SHA:
@@ -145,6 +149,12 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
             return InstructionSet_GFNI_V512;
         }
 
+        case InstructionSet_AVXVNNIINT:
+        case InstructionSet_AVXVNNIINT_V512:
+        {
+            return InstructionSet_AVXVNNIINT_V512;
+        }
+
         default:
         {
             return InstructionSet_NONE;
@@ -160,7 +170,7 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
 //
 // Return Value:
 //    The InstructionSet associated with className
-static CORINFO_InstructionSet lookupInstructionSet(const char* className)
+CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className)
 {
     assert(className != nullptr);
 
@@ -253,9 +263,26 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
             {
                 return InstructionSet_AVXIFMA;
             }
-            else if (strcmp(className + 3, "Vnni") == 0)
+            else if (strncmp(className + 3, "Vnni", 4) == 0)
             {
-                return InstructionSet_AVXVNNI;
+                if (className[7] == '\0')
+                {
+                    return InstructionSet_AVXVNNI;
+                }
+                else if (strncmp(className + 7, "Int", 3) == 0)
+                {
+                    if ((strcmp(className + 10, "8") == 0) || (strcmp(className + 10, "16") == 0))
+                    {
+                        if (compSupportsHWIntrinsic(InstructionSet_AVXVNNIINT))
+                        {
+                            return InstructionSet_AVXVNNIINT;
+                        }
+                        else
+                        {
+                            return InstructionSet_AVXVNNIINT_V512;
+                        }
+                    }
+                }
             }
         }
     }
@@ -386,7 +413,6 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
             return InstructionSet_X86Serialize;
         }
     }
-
     return InstructionSet_ILLEGAL;
 }
 
@@ -400,9 +426,9 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
 //
 // Return Value:
 //    The InstructionSet associated with className and enclosingClassName
-CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className,
-                                                  const char* innerEnclosingClassName,
-                                                  const char* outerEnclosingClassName)
+CORINFO_InstructionSet Compiler::lookupIsa(const char* className,
+                                           const char* innerEnclosingClassName,
+                                           const char* outerEnclosingClassName)
 {
     assert(className != nullptr);
 
@@ -5229,6 +5255,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_AVXVNNIINT_MultiplyWideningAndAdd:
+        case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate:
+        case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd:
+        case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate:
+        {
+            assert(sig->numArgs == 3);
+
+            CORINFO_ARG_LIST_HANDLE argList = sig->args;
+            CORINFO_CLASS_HANDLE    argClass;
+            var_types               argType = TYP_UNKNOWN;
+
+            CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
+            CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
+
+            argType                    = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
+            CorInfoType op3BaseJitType = getBaseJitTypeOfSIMDType(argClass);
+            GenTree*    op3            = getArgForHWIntrinsic(argType, argClass);
+
+            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
+            op2     = getArgForHWIntrinsic(argType, argClass);
+
+            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+            op1     = getArgForHWIntrinsic(argType, argClass);
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
+            retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op3BaseJitType);
+            break;
+        }
+
         case NI_AVX512_ExpandLoad:
         case NI_AVX512v3_ExpandLoad:
         {
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 7f0c53addd6b3b..d395369ffc1b07 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -636,6 +636,24 @@ INST3(vpdpwssd,         "vpdpwssd",         IUM_RW, BAD_CODE,     BAD_CODE,
 INST3(vpdpwssds,        "vpdpwssds",        IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0x53),                             5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Multiply and Add Signed Word Integers with Saturation
 #define LAST_AVXVNNI_INSTRUCTION INS_vpdpwssds
 
+#define FIRST_AVXVNNIINT8_INSTRUCTION INS_vpdpwsud
+INST3(vpdpwsud,         "vpdpwsud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0xD2),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwsuds,        "vpdpwsuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0xD3),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwusd,         "vpdpwusd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0xD2),                   5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwusds,        "vpdpwusds",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0xD3),                   5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwuud,         "vpdpwuud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0xD2),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual words of first source operand with individual words of second source operand and add the results
+INST3(vpdpwuuds,        "vpdpwuuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0xD3),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual words of first source operand with individual words of second source operand and add the results
+#define LAST_AVXVNNIINT8_INSTRUCTION INS_vpdpwuuds
+
+#define FIRST_AVXVNNIINT16_INSTRUCTION INS_vpdpbssd
+INST3(vpdpbssd,         "vpdpbssd",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf2, 0x50),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbssds,        "vpdpbssds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf2, 0x51),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbsud,         "vpdpbsud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0x50),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbsuds,        "vpdpbsuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0x51),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbuud,         "vpdpbuud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0x50),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+INST3(vpdpbuuds,        "vpdpbuuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0x51),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstSrcSrcAVXInstruction)                                                                                           // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
+#define LAST_AVXVNNIINT16_INSTRUCTION INS_vpdpbuuds
+
 #define FIRST_AVXIFMA_INSTRUCTION INS_vpmadd52huq
 // Instructions for AVXIFMA
 INST3(vpmadd52huq,      "vpmadd52huq",      IUM_RW, BAD_CODE,     BAD_CODE,     SSE38(0xB5),                             ILLEGAL,           ILLEGAL,    INS_TT_FULL,                         Input_64Bit    | KMask_Base2     | REX_W1       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction)                                                                                           // Packed Multiply of Unsigned 52-Bit Unsigned Integers and Add High 52-Bit Products to 64-Bit Accumulators
@@ -1112,18 +1130,6 @@ INST3(vminmaxss,        "vminmaxss",        IUM_WR, BAD_CODE,               BAD_
 INST3(vmovd_simd,       "vmovd",            IUM_WR, PCKDBL(0xD6),           BAD_CODE,     SSEFLT(0x7E),                  ILLEGAL,           ILLEGAL,    INS_TT_TUPLE1_SCALAR,                Input_32Bit                      | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Move DWORD between xmm regs <-> memory/xmm regs
 INST3(vmovw_simd,       "vmovw",            IUM_WR, SSEFLTMAP(0x05, 0x7E),  BAD_CODE,     SSEFLTMAP(0x05, 0x6E),         ILLEGAL,           ILLEGAL,    INS_TT_TUPLE1_SCALAR,                Input_16Bit                      | REX_W0                       | Encoding_EVEX)                                                                                                                                  // Move WORD  between xmm regs <-> memory/xmm regs
 INST3(vmpsadbw,         "vmpsadbw",         IUM_WR, BAD_CODE,               BAD_CODE,     AVX3A(0x42),                   4C,                2C,         INS_TT_FULL_MEM,                                      KMask_Base8     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstDstSrcAVXInstruction)                                         // Compute Multiple Packed Sums of Absolute Difference
-INST3(vpdpbssd,         "vpdpbssd",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf2, 0x50),            ILLEGAL,           ILLEGAL,    INS_TT_FULL,                         Input_8Bit     | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
-INST3(vpdpbssds,        "vpdpbssds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf2, 0x51),            ILLEGAL,           ILLEGAL,    INS_TT_FULL,                         Input_8Bit     | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
-INST3(vpdpbsud,         "vpdpbsud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0x50),            5C,                2X,         INS_TT_FULL,                         Input_8Bit     | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
-INST3(vpdpbsuds,        "vpdpbsuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0x51),            5C,                2X,         INS_TT_FULL,                         Input_8Bit     | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
-INST3(vpdpbuud,         "vpdpbuud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0x50),            5C,                2X,         INS_TT_FULL,                         Input_8Bit     | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
-INST3(vpdpbuuds,        "vpdpbuuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0x51),            5C,                2X,         INS_TT_FULL,                         Input_8Bit     | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results
-INST3(vpdpwsud,         "vpdpwsud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0xD2),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual words of first source operand with individual words of second source operand and add the results
-INST3(vpdpwsuds,        "vpdpwsuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0xf3, 0xD3),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual words of first source operand with individual words of second source operand and add the results
-INST3(vpdpwusd,         "vpdpwusd",         IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0xD2),                   5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual words of first source operand with individual words of second source operand and add the results
-INST3(vpdpwusds,        "vpdpwusds",        IUM_WR, BAD_CODE,               BAD_CODE,     SSE38(0xD3),                   5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual words of first source operand with individual words of second source operand and add the results
-INST3(vpdpwuud,         "vpdpwuud",         IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0xD2),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual words of first source operand with individual words of second source operand and add the results
-INST3(vpdpwuuds,        "vpdpwuuds",        IUM_WR, BAD_CODE,               BAD_CODE,     PSSE38(0x00, 0xD3),            5C,                2X,         INS_TT_FULL,                         Input_32Bit    | KMask_Base4     | REX_W0                       | Encoding_EVEX                                                    | INS_Flags_IsDstSrcSrcAVXInstruction)                                         // Multiply individual words of first source operand with individual words of second source operand and add the results
 INST3(vucomxsd,         "vucomxsd",         IUM_RD, BAD_CODE,               BAD_CODE,     SSEFLT(0x2f),                  3C,                1C,         INS_TT_TUPLE1_SCALAR,                Input_64Bit                      | REX_W1                       | Encoding_EVEX    | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF)                                                       // Perform an unordered compare of double precision floating point values and set flags
 INST3(vucomxss,         "vucomxss",         IUM_RD, BAD_CODE,               BAD_CODE,     SSEDBL(0x2E),                  3C,                1C,         INS_TT_TUPLE1_SCALAR,                Input_32Bit                      | REX_W0                       | Encoding_EVEX    | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF)                                                       // Perform an unordered compare of single precision floating point values and set flags
 #define LAST_AVX512_INSTRUCTION INS_vucomxss
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 4c6a30de6bc50b..3c2f9c67c0f78f 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -409,6 +409,7 @@ RELEASE_CONFIG_INTEGER(EnableAES,                   "EnableAES",
 RELEASE_CONFIG_INTEGER(EnableAVX512VP2INTERSECT,    "EnableAVX512VP2INTERSECT",  1) // Allows AVX512VP2INTERSECT and dependent hardware intrinsics to be disabled
 RELEASE_CONFIG_INTEGER(EnableAVXIFMA,               "EnableAVXIFMA",             1) // Allows AVXIFMA and dependent hardware intrinsics to be disabled
 RELEASE_CONFIG_INTEGER(EnableAVXVNNI,               "EnableAVXVNNI",             1) // Allows AVXVNNI and dependent hardware intrinsics to be disabled
+RELEASE_CONFIG_INTEGER(EnableAVXVNNIINT,            "EnableAVXVNNIINT",          1) // Allows VEX AVXVNNIINT+ hardware intrinsics to be disabled
 RELEASE_CONFIG_INTEGER(EnableGFNI,                  "EnableGFNI",                1) // Allows GFNI and dependent hardware intrinsics to be disabled
 RELEASE_CONFIG_INTEGER(EnableSHA,                   "EnableSHA",                 1) // Allows SHA and dependent hardware intrinsics to be disabled
 RELEASE_CONFIG_INTEGER(EnableVAES,                  "EnableVAES",                1) // Allows VAES, VPCLMULQDQ, and dependent hardware intrinsics to be disabled
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index e9b5d08e4c36b2..8a1ca074322921 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -10752,13 +10752,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                                 break;
                             }
 
-                            case NI_AVXVNNI_MultiplyWideningAndAdd:
-                            case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
-                            {
-                                TryMakeSrcContainedOrRegOptional(node, op3);
-                                break;
-                            }
-
                             case NI_AVX2_MultiplyNoFlags:
                             case NI_AVX2_X64_MultiplyNoFlags:
                             {
@@ -10814,17 +10807,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                                 break;
                             }
 
-                            case NI_X86Base_DivRem:
-                            case NI_X86Base_X64_DivRem:
-                            {
-                                // DIV only allows divisor (op3) in memory
-                                TryMakeSrcContainedOrRegOptional(node, op3);
-                                break;
-                            }
-
                             default:
                             {
-                                unreached();
+                                assert((intrinsicId == NI_X86Base_DivRem) || (intrinsicId == NI_X86Base_X64_DivRem) ||
+                                       (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512));
+                                TryMakeSrcContainedOrRegOptional(node, op3);
                                 break;
                             }
                         }
diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp
index 4c418118d4bbf2..8fb7ae7cfb6016 100644
--- a/src/coreclr/jit/lsraxarch.cpp
+++ b/src/coreclr/jit/lsraxarch.cpp
@@ -2761,6 +2761,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
 
             case NI_AVXVNNI_MultiplyWideningAndAdd:
             case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+            case NI_AVXVNNIINT_MultiplyWideningAndAdd:
+            case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate:
+            case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd:
+            case NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate:
             {
                 assert(numArgs == 3);
 
diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
index 649d665ef6fbfa..9c8da6eac983f3 100644
--- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
+++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs
@@ -81,6 +81,7 @@ private static class XArchIntrinsicConstants
             public const int Vaes = (1 << 15);
             public const int WaitPkg = (1 << 16);
             public const int X86Serialize = (1 << 17);
+            public const int AvxVnniInt = (1 << 18);
 
             public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
             {
@@ -99,7 +100,12 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
                 if ((flags & Avx10v1) != 0)
                     builder.AddSupportedInstructionSet("avx10v1");
                 if ((flags & Avx10v2) != 0)
+                {
                     builder.AddSupportedInstructionSet("avx10v2");
+                    builder.AddSupportedInstructionSet("avxvnniint_v512");
+                }
+                if ((flags & AvxVnniInt) != 0)
+                    builder.AddSupportedInstructionSet("avxvnniint");
                 if ((flags & Apx) != 0)
                     builder.AddSupportedInstructionSet("apx");
 
@@ -188,6 +194,8 @@ public static int FromInstructionSet(InstructionSet instructionSet)
                     InstructionSet.X64_GFNI_X64 => Gfni,
                     InstructionSet.X64_GFNI_V256 => (Gfni | Avx),
                     InstructionSet.X64_GFNI_V512 => (Gfni | Avx512),
+                    InstructionSet.X64_AVXVNNIINT => AvxVnniInt,
+                    InstructionSet.X64_AVXVNNIINT_V512 => Avx10v2,
 
                     InstructionSet.X64_SHA => Sha,
                     InstructionSet.X64_SHA_X64 => Sha,
diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs
index baef3c0d54c72f..0fb2dd0f5c8c81 100644
--- a/src/coreclr/tools/Common/InstructionSetHelpers.cs
+++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs
@@ -203,6 +203,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx2");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxifma");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v256");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v256");
 
@@ -222,6 +223,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512v3");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2");
+                    optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint_v512");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vp2intersect");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v512");
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512");
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
index 47837ff594ce4d..30c9ceeefcab62 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
@@ -70,6 +70,10 @@ public enum ReadyToRunInstructionSet
         Zba=57,
         Zbb=58,
         Sve2=59,
+        AvxVnniInt8=60,
+        AvxVnniInt8_V512=61,
+        AvxVnniInt16=62,
+        AvxVnniInt16_V512=63,
         Aes_V256=64,
         Aes_V512=65,
         AvxIfma=66,
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
index 8069076421aaa2..ef3c8b4d7e6b65 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
@@ -116,6 +116,8 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
                             case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256;
                             case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512;
+                            case InstructionSet.X64_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8;
+                            case InstructionSet.X64_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512;
 
                             default: throw new Exception("Unknown instruction set");
                         }
@@ -170,6 +172,8 @@ public static class ReadyToRunInstructionSetHelper
                             case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128;
                             case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256;
                             case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512;
+                            case InstructionSet.X86_AVXVNNIINT: return ReadyToRunInstructionSet.AvxVnniInt8;
+                            case InstructionSet.X86_AVXVNNIINT_V512: return ReadyToRunInstructionSet.AvxVnniInt8_V512;
 
                             default: throw new Exception("Unknown instruction set");
                         }
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
index 3d92b216eb0012..0e3e406f989c8b 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
@@ -74,6 +74,8 @@ public enum InstructionSet
         X64_VectorT128 = InstructionSet_X64.VectorT128,
         X64_VectorT256 = InstructionSet_X64.VectorT256,
         X64_VectorT512 = InstructionSet_X64.VectorT512,
+        X64_AVXVNNIINT = InstructionSet_X64.AVXVNNIINT,
+        X64_AVXVNNIINT_V512 = InstructionSet_X64.AVXVNNIINT_V512,
         X64_X86Base_X64 = InstructionSet_X64.X86Base_X64,
         X64_SSE42_X64 = InstructionSet_X64.SSE42_X64,
         X64_AVX_X64 = InstructionSet_X64.AVX_X64,
@@ -119,6 +121,8 @@ public enum InstructionSet
         X86_VectorT128 = InstructionSet_X86.VectorT128,
         X86_VectorT256 = InstructionSet_X86.VectorT256,
         X86_VectorT512 = InstructionSet_X86.VectorT512,
+        X86_AVXVNNIINT = InstructionSet_X86.AVXVNNIINT,
+        X86_AVXVNNIINT_V512 = InstructionSet_X86.AVXVNNIINT_V512,
         X86_X86Base_X64 = InstructionSet_X86.X86Base_X64,
         X86_SSE42_X64 = InstructionSet_X86.SSE42_X64,
         X86_AVX_X64 = InstructionSet_X86.AVX_X64,
@@ -211,23 +215,25 @@ public enum InstructionSet_X64
         VectorT128 = 26,
         VectorT256 = 27,
         VectorT512 = 28,
-        X86Base_X64 = 29,
-        SSE42_X64 = 30,
-        AVX_X64 = 31,
-        AVX2_X64 = 32,
-        AVX512_X64 = 33,
-        AVX512v2_X64 = 34,
-        AVX512v3_X64 = 35,
-        AVX10v1_X64 = 36,
-        AVX10v2_X64 = 37,
-        AES_X64 = 38,
-        AVX512VP2INTERSECT_X64 = 39,
-        AVXIFMA_X64 = 40,
-        AVXVNNI_X64 = 41,
-        GFNI_X64 = 42,
-        SHA_X64 = 43,
-        WAITPKG_X64 = 44,
-        X86Serialize_X64 = 45,
+        AVXVNNIINT = 29,
+        AVXVNNIINT_V512 = 30,
+        X86Base_X64 = 31,
+        SSE42_X64 = 32,
+        AVX_X64 = 33,
+        AVX2_X64 = 34,
+        AVX512_X64 = 35,
+        AVX512v2_X64 = 36,
+        AVX512v3_X64 = 37,
+        AVX10v1_X64 = 38,
+        AVX10v2_X64 = 39,
+        AES_X64 = 40,
+        AVX512VP2INTERSECT_X64 = 41,
+        AVXIFMA_X64 = 42,
+        AVXVNNI_X64 = 43,
+        GFNI_X64 = 44,
+        SHA_X64 = 45,
+        WAITPKG_X64 = 46,
+        X86Serialize_X64 = 47,
     }
 
     public enum InstructionSet_X86
@@ -262,23 +268,25 @@ public enum InstructionSet_X86
         VectorT128 = 26,
         VectorT256 = 27,
         VectorT512 = 28,
-        X86Base_X64 = 29,
-        SSE42_X64 = 30,
-        AVX_X64 = 31,
-        AVX2_X64 = 32,
-        AVX512_X64 = 33,
-        AVX512v2_X64 = 34,
-        AVX512v3_X64 = 35,
-        AVX10v1_X64 = 36,
-        AVX10v2_X64 = 37,
-        AES_X64 = 38,
-        AVX512VP2INTERSECT_X64 = 39,
-        AVXIFMA_X64 = 40,
-        AVXVNNI_X64 = 41,
-        GFNI_X64 = 42,
-        SHA_X64 = 43,
-        WAITPKG_X64 = 44,
-        X86Serialize_X64 = 45,
+        AVXVNNIINT = 29,
+        AVXVNNIINT_V512 = 30,
+        X86Base_X64 = 31,
+        SSE42_X64 = 32,
+        AVX_X64 = 33,
+        AVX2_X64 = 34,
+        AVX512_X64 = 35,
+        AVX512v2_X64 = 36,
+        AVX512v3_X64 = 37,
+        AVX10v1_X64 = 38,
+        AVX10v2_X64 = 39,
+        AES_X64 = 40,
+        AVX512VP2INTERSECT_X64 = 41,
+        AVXIFMA_X64 = 42,
+        AVXVNNI_X64 = 43,
+        GFNI_X64 = 44,
+        SHA_X64 = 45,
+        WAITPKG_X64 = 46,
+        X86Serialize_X64 = 47,
     }
 
     public unsafe struct InstructionSetFlags : IEnumerable<InstructionSet>
@@ -629,6 +637,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNIINT_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256))
@@ -692,6 +704,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target
                         resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize))
                         resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNIINT_V512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128))
                         resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256))
@@ -863,6 +879,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X64_WAITPKG);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNIINT_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_Vector128);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
@@ -926,6 +946,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe
                         resultflags.AddInstructionSet(InstructionSet.X86_WAITPKG);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNIINT_V512);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X86_Vector128);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
@@ -1085,6 +1109,10 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
                     yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true);
                     yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true);
                     yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true);
+                    yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt8", InstructionSet.X64_AVXVNNIINT, true);
+                    yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt8_V512", InstructionSet.X64_AVXVNNIINT_V512, true);
+                    yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt16", InstructionSet.X64_AVXVNNIINT, true);
+                    yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt16_V512", InstructionSet.X64_AVXVNNIINT_V512, true);
                     break;
 
                 case TargetArchitecture.X86:
@@ -1154,6 +1182,10 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
                     yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true);
                     yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true);
                     yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true);
+                    yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt8", InstructionSet.X86_AVXVNNIINT, true);
+                    yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt8_V512", InstructionSet.X86_AVXVNNIINT_V512, true);
+                    yield return new InstructionSetInfo("avxvnniint", "AvxVnniInt16", InstructionSet.X86_AVXVNNIINT, true);
+                    yield return new InstructionSetInfo("avxvnniint_v512", "AvxVnniInt16_V512", InstructionSet.X86_AVXVNNIINT_V512, true);
                     break;
             }
         }
@@ -1739,6 +1771,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                         else
                         { return InstructionSet.X64_X86Serialize; }
 
+                    case "AvxVnniInt8":
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X64_AVXVNNIINT_V512; }
+                        else
+                        { return InstructionSet.X64_AVXVNNIINT; }
+
+                    case "AvxVnniInt16":
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X64_AVXVNNIINT_V512; }
+                        else
+                        { return InstructionSet.X64_AVXVNNIINT; }
+
                 }
                 break;
 
@@ -1911,6 +1955,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite
                     case "X86Serialize":
                         { return InstructionSet.X86_X86Serialize; }
 
+                    case "AvxVnniInt8":
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X86_AVXVNNIINT_V512; }
+                        else
+                        { return InstructionSet.X86_AVXVNNIINT; }
+
+                    case "AvxVnniInt16":
+                        if (nestedTypeName == "V512")
+                        { return InstructionSet.X86_AVXVNNIINT_V512; }
+                        else
+                        { return InstructionSet.X86_AVXVNNIINT; }
+
                 }
                 break;
 
diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
index 5d067eb3311e1c..24c3f474ab14de 100644
--- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
+++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
@@ -113,6 +113,12 @@ instructionset     ,X86   ,                     ,VectorT128         ,39 ,VectorT
 instructionset     ,X86   ,                     ,VectorT256         ,40 ,VectorT256            ,vectort256
 instructionset     ,X86   ,                     ,VectorT512         ,41 ,VectorT512            ,vectort512
 
+
+instructionset     ,X86   ,AvxVnniInt8          ,                   ,60 ,AVXVNNIINT            ,avxvnniint
+instructionset     ,X86   ,AvxVnniInt8_V512     ,                   ,61 ,AVXVNNIINT_V512       ,avxvnniint_v512
+instructionset     ,X86   ,AvxVnniInt16         ,                   ,62 ,AVXVNNIINT            ,avxvnniint
+instructionset     ,X86   ,AvxVnniInt16_V512    ,                   ,63 ,AVXVNNIINT_V512       ,avxvnniint_v512
+
 ; 64-bit Instruction Sets
 
 instructionset64bit,X86   ,X86Base
@@ -179,6 +185,9 @@ implication        ,X86   ,SHA                  ,X86Base
 implication        ,X86   ,WAITPKG              ,X86Base
 implication        ,X86   ,X86Serialize         ,X86Base
 
+implication        ,X86   ,AVXVNNIINT           ,AVX2
+implication        ,X86   ,AVXVNNIINT_V512      ,AVX10v2
+
 ; These synthetic ISAs need to appear after the core ISAs
 ; as they depend on the other implications being correct first
 ; otherwise they may not be disabled if the required isa is disabled
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index bff4f38957bcf6..d1a6a75e7d3efd 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -1306,6 +1306,7 @@ void EEJitManager::SetCpuInfo()
     if (((cpuFeatures & XArchIntrinsicConstants_Avx10v2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2))
     {
         CPUCompileFlags.Set(InstructionSet_AVX10v2);
+        CPUCompileFlags.Set(InstructionSet_AVXVNNIINT_V512);
     }
 
 #if defined(TARGET_AMD64)
@@ -1355,6 +1356,11 @@ void EEJitManager::SetCpuInfo()
         CPUCompileFlags.Set(InstructionSet_SHA);
     }
 
+    if (((cpuFeatures & XArchIntrinsicConstants_AvxVnniInt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNIINT))
+    {
+        CPUCompileFlags.Set(InstructionSet_AVXVNNIINT);
+    }
+
     if (((cpuFeatures & XArchIntrinsicConstants_WaitPkg) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWAITPKG))
     {
         CPUCompileFlags.Set(InstructionSet_WAITPKG);
diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
index 199b3c14fadb66..150b1d6a8e11cd 100644
--- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
+++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
@@ -2679,6 +2679,8 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx2$(NotSupportedOnMono).cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx10v1$(NotSupportedOnMono).cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx10v2$(NotSupportedOnMono).cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnniInt8$(NotSupportedOnMono).cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnniInt16$(NotSupportedOnMono).cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx512BW$(NotSupportedOnMono).cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx512CD$(NotSupportedOnMono).cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx512DQ$(NotSupportedOnMono).cs" />
@@ -2713,6 +2715,8 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx512F.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx10v1.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx10v2.PlatformNotSupported.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnniInt8.PlatformNotSupported.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnniInt16.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx512Vbmi.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx512Vbmi2.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnni.PlatformNotSupported.cs" />
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs
new file mode 100644
index 00000000000000..c91bbe0f481dc6
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.PlatformNotSupported.cs
@@ -0,0 +1,99 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Runtime.Intrinsics.X86
+{
+    /// <summary>Provides access to the x86 AVXVNNI hardware instructions via intrinsics.</summary>
+    [CLSCompliant(false)]
+    public abstract class AvxVnniInt16 : Avx2
+    {
+        internal AvxVnniInt16() { }
+
+        /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+        /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+        /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+        public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+        /// <summary>Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics.</summary>
+        public new abstract class X64 : Avx2.X64
+        {
+            internal X64() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static new bool IsSupported { [Intrinsic] get { return false; } }
+        }
+
+        // VPDPWSUD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUSD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<ushort> left, Vector128<short> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUUD xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAdd(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWSUD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<short> left, Vector256<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUSD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<ushort> left, Vector256<short> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUUD ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAdd(Vector256<uint> addend, Vector256<ushort> left, Vector256<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWSUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<short> left, Vector128<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUSDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<ushort> left, Vector128<short> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAddSaturate(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWSUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<short> left, Vector256<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUSDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<ushort> left, Vector256<short> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPWUUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAddSaturate(Vector256<uint> addend, Vector256<ushort> left, Vector256<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+        /// <summary>Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics.</summary>
+        [Intrinsic]
+        public abstract class V512
+        {
+            internal V512() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static bool IsSupported { [Intrinsic] get { return false; } }
+
+            // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<short> left, Vector512<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPWUSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<ushort> left, Vector512<short> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPWUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAdd(Vector512<uint> addend, Vector512<ushort> left, Vector512<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPWSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<short> left, Vector512<ushort> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPWUSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<ushort> left, Vector512<short> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPWUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAddSaturate(Vector512<uint> addend, Vector512<ushort> left, Vector512<ushort> right)  { throw new PlatformNotSupportedException(); }
+        }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs
new file mode 100644
index 00000000000000..8e9aa059a84d2f
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt16.cs
@@ -0,0 +1,98 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.Intrinsics.X86
+{
+    /// <summary>Provides access to the x86 AVXVNNI hardware instructions via intrinsics.</summary>
+    [Intrinsic]
+    [CLSCompliant(false)]
+    public abstract class AvxVnniInt16 : Avx2
+    {
+        internal AvxVnniInt16() { }
+
+        /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+        /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+        /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+        public static new bool IsSupported { get => IsSupported; }
+
+        /// <summary>Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics.</summary>
+        [Intrinsic]
+        public new abstract class X64 : Avx2.X64
+        {
+            internal X64() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static new bool IsSupported { get => IsSupported; }
+        }
+
+        // VPDPWSUD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<ushort> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPWUSD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<ushort> left, Vector128<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPWUUD xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAdd(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPWSUD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<short> left, Vector256<ushort> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPWUSD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<ushort> left, Vector256<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPWUUD ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAdd(Vector256<uint> addend, Vector256<ushort> left, Vector256<ushort> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPWSUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<short> left, Vector128<ushort> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPWUSDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<ushort> left, Vector128<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPWUUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAddSaturate(Vector128<uint> addend, Vector128<ushort> left, Vector128<ushort> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPWSUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<short> left, Vector256<ushort> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPWUSDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<ushort> left, Vector256<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPWUUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAddSaturate(Vector256<uint> addend, Vector256<ushort> left, Vector256<ushort> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        /// <summary>Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT16 via intrinsics.</summary>
+        [Intrinsic]
+        public abstract class V512
+        {
+            internal V512() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static bool IsSupported { get => IsSupported; }
+
+            // VPDPWSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<short> left, Vector512<ushort> right) => MultiplyWideningAndAdd(addend, left, right);
+
+            // VPDPWUSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<ushort> left, Vector512<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+            // VPDPWUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAdd(Vector512<uint> addend, Vector512<ushort> left, Vector512<ushort> right) => MultiplyWideningAndAdd(addend, left, right);
+
+            // VPDPWSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<short> left, Vector512<ushort> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+            // VPDPWUSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<ushort> left, Vector512<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+            // VPDPWUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAddSaturate(Vector512<uint> addend, Vector512<ushort> left, Vector512<ushort> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+        }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs
new file mode 100644
index 00000000000000..19f79db01415c2
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.PlatformNotSupported.cs
@@ -0,0 +1,97 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Runtime.Intrinsics.X86
+{
+    /// <summary>Provides access to the x86 AVXVNNI hardware instructions via intrinsics.</summary>
+    [CLSCompliant(false)]
+    public abstract class AvxVnniInt8 : Avx2
+    {
+        internal AvxVnniInt8() { }
+
+        /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+        /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+        /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+        public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+        public new abstract class X64 : Avx2.X64
+        {
+            internal X64() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static new bool IsSupported { [Intrinsic] get { return false; } }
+        }
+
+        // VPDPBSSD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<sbyte> left, Vector128<sbyte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSUD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<sbyte> left, Vector128<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBUUD xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAdd(Vector128<uint> addend, Vector128<byte> left, Vector128<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSSD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<sbyte> left, Vector256<sbyte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSUD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<sbyte> left, Vector256<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBUUD ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAdd(Vector256<uint> addend, Vector256<byte> left, Vector256<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSSDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<sbyte> left, Vector128<sbyte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<sbyte> left, Vector128<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBUUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAddSaturate(Vector128<uint> addend, Vector128<byte> left, Vector128<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSSDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<sbyte> left, Vector256<sbyte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBSUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<sbyte> left, Vector256<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        // VPDPBUUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAddSaturate(Vector256<uint> addend, Vector256<byte> left, Vector256<byte> right)  { throw new PlatformNotSupportedException(); }
+
+        /// <summary>Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics.</summary>
+        public abstract class V512
+        {
+            internal V512() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static bool IsSupported { [Intrinsic] get { return false; } }
+
+            // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<sbyte> left, Vector512<sbyte> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<sbyte> left, Vector512<byte> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAdd(Vector512<uint> addend, Vector512<byte> left, Vector512<byte> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<sbyte> left, Vector512<sbyte> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<sbyte> left, Vector512<byte> right)  { throw new PlatformNotSupportedException(); }
+
+            // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAddSaturate(Vector512<uint> addend, Vector512<byte> left, Vector512<byte> right)  { throw new PlatformNotSupportedException(); }
+        }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs
new file mode 100644
index 00000000000000..20668158f87dad
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnniInt8.cs
@@ -0,0 +1,99 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.Intrinsics.X86
+{
+    /// <summary>Provides access to the x86 AVXVNNI hardware instructions via intrinsics.</summary>
+    [Intrinsic]
+    [CLSCompliant(false)]
+    public abstract class AvxVnniInt8 : Avx2
+    {
+        internal AvxVnniInt8() { }
+
+        /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+        /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+        /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+        public static new bool IsSupported { get => IsSupported; }
+
+        /// <summary>Provides access to the x86 AVX-VNNI-INT8 hardware instructions, that are only available to 64-bit processes, via intrinsics.</summary>
+        [Intrinsic]
+        public new abstract class X64 : Avx2.X64
+        {
+            internal X64() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static new bool IsSupported { get => IsSupported; }
+        }
+
+        // VPDPBSSD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<sbyte> left, Vector128<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPBSUD xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<sbyte> left, Vector128<byte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPBUUD xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAdd(Vector128<uint> addend, Vector128<byte> left, Vector128<byte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPBSSD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<sbyte> left, Vector256<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPBSUD ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<sbyte> left, Vector256<byte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPBUUD ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAdd(Vector256<uint> addend, Vector256<byte> left, Vector256<byte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        // VPDPBSSDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<sbyte> left, Vector128<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPBSUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<sbyte> left, Vector128<byte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPBUUDS xmm1, xmm2, xmm3/m128
+        public static Vector128<uint> MultiplyWideningAndAddSaturate(Vector128<uint> addend, Vector128<byte> left, Vector128<byte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPBSSDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<sbyte> left, Vector256<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPBSUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<sbyte> left, Vector256<byte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        // VPDPBUUDS ymm1, ymm2, ymm3/m256
+        public static Vector256<uint> MultiplyWideningAndAddSaturate(Vector256<uint> addend, Vector256<byte> left, Vector256<byte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        /// <summary>Provides access to the x86 AVX10.2/512 hardware instructions for AVX-VNNI-INT8 via intrinsics.</summary>
+        [Intrinsic]
+        public abstract class V512
+        {
+            internal V512() { }
+
+            /// <summary>Gets a value that indicates whether the APIs in this class are supported.</summary>
+            /// <value><see langword="true" /> if the APIs are supported; otherwise, <see langword="false" />.</value>
+            /// <remarks>A value of <see langword="false" /> indicates that the APIs will throw <see cref="PlatformNotSupportedException" />.</remarks>
+            public static bool IsSupported { get => IsSupported; }
+
+            // VPDPBSSD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<sbyte> left, Vector512<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+            // VPDPBSUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAdd(Vector512<int> addend, Vector512<sbyte> left, Vector512<byte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+            // VPDPBUUD zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAdd(Vector512<uint> addend, Vector512<byte> left, Vector512<byte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+            // VPDPBSSDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<sbyte> left, Vector512<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+            // VPDPBSUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<int> MultiplyWideningAndAddSaturate(Vector512<int> addend, Vector512<sbyte> left, Vector512<byte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+            // VPDPBUUDS zmm1{k1}{z}, zmm2, zmm3/m512/m32bcst
+            public static Vector512<uint> MultiplyWideningAndAddSaturate(Vector512<uint> addend, Vector512<byte> left, Vector512<byte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index b275bb3f925b2a..a36182d8a1d0cd 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -8014,6 +8014,76 @@ internal X64() { }
         }
     }
 
+    [System.CLSCompliantAttribute(false)]
+    public abstract partial class AvxVnniInt8 : System.Runtime.Intrinsics.X86.Avx2
+    {
+        internal AvxVnniInt8() { }
+        public static new bool IsSupported { get { throw null; } }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<sbyte> left, System.Runtime.Intrinsics.Vector128<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<sbyte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<uint> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<uint> addend, System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<sbyte> left, System.Runtime.Intrinsics.Vector256<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<sbyte> left, System.Runtime.Intrinsics.Vector256<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<uint> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<uint> addend, System.Runtime.Intrinsics.Vector256<byte> left, System.Runtime.Intrinsics.Vector256<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<sbyte> left, System.Runtime.Intrinsics.Vector128<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<sbyte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<uint> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<uint> addend, System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<sbyte> left, System.Runtime.Intrinsics.Vector256<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<sbyte> left, System.Runtime.Intrinsics.Vector256<byte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<uint> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<uint> addend, System.Runtime.Intrinsics.Vector256<byte> left, System.Runtime.Intrinsics.Vector256<byte> right) { throw null; }
+        public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64
+        {
+            internal X64() { }
+            public static new bool IsSupported { get { throw null; } }
+        }
+        public abstract partial class V512
+        {
+            internal V512() { }
+            public static bool IsSupported { get { throw null; } }
+            public static System.Runtime.Intrinsics.Vector512<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<sbyte> left, System.Runtime.Intrinsics.Vector512<sbyte> right) { throw null; }
+            public static System.Runtime.Intrinsics.Vector512<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<sbyte> left, System.Runtime.Intrinsics.Vector512<byte> right) { throw null; }
+            public static System.Runtime.Intrinsics.Vector512<uint> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512<uint> addend, System.Runtime.Intrinsics.Vector512<byte> left, System.Runtime.Intrinsics.Vector512<byte> right) { throw null; }
+            public static System.Runtime.Intrinsics.Vector512<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<sbyte> left, System.Runtime.Intrinsics.Vector512<sbyte> right) { throw null; }
+            public static System.Runtime.Intrinsics.Vector512<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<sbyte> left, System.Runtime.Intrinsics.Vector512<byte> right) { throw null; }
+            public static System.Runtime.Intrinsics.Vector512<uint> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512<uint> addend, System.Runtime.Intrinsics.Vector512<byte> left, System.Runtime.Intrinsics.Vector512<byte> right) { throw null; }
+        }
+    }
+
+    [System.CLSCompliantAttribute(false)]
+    public abstract partial class AvxVnniInt16 : System.Runtime.Intrinsics.X86.Avx2
+    {
+        internal AvxVnniInt16() { }
+        public static new bool IsSupported { get { throw null; } }
+        public static Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<ushort> right) { throw null; }
+        public static Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<ushort> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
+        public static Vector128<uint> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<uint> addend, System.Runtime.Intrinsics.Vector128<ushort> left, System.Runtime.Intrinsics.Vector128<ushort> right) { throw null; }
+        public static Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<short> left, System.Runtime.Intrinsics.Vector256<ushort> right) { throw null; }
+        public static Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<ushort> left, System.Runtime.Intrinsics.Vector256<short> right) { throw null; }
+        public static Vector256<uint> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<uint> addend, System.Runtime.Intrinsics.Vector256<ushort> left, System.Runtime.Intrinsics.Vector256<ushort> right) { throw null; }
+        public static Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<ushort> right) { throw null; }
+        public static Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<ushort> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
+        public static Vector128<uint> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<uint> addend, System.Runtime.Intrinsics.Vector128<ushort> left, System.Runtime.Intrinsics.Vector128<ushort> right) { throw null; }
+        public static Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<short> left, System.Runtime.Intrinsics.Vector256<ushort> right) { throw null; }
+        public static Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<ushort> left, System.Runtime.Intrinsics.Vector256<short> right) { throw null; }
+        public static Vector256<uint> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<uint> addend, System.Runtime.Intrinsics.Vector256<ushort> left, System.Runtime.Intrinsics.Vector256<ushort> right) { throw null; }
+        public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64
+        {
+            internal X64() { }
+            public static new bool IsSupported { get { throw null; } }
+        }
+        public abstract partial class V512
+        {
+            internal V512() { }
+            public static bool IsSupported { get { throw null; } }
+            public static Vector512<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<short> left, System.Runtime.Intrinsics.Vector512<ushort> right) { throw null; }
+            public static Vector512<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<ushort> left, System.Runtime.Intrinsics.Vector512<short> right) { throw null; }
+            public static Vector512<uint> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector512<uint> addend, System.Runtime.Intrinsics.Vector512<ushort> left, System.Runtime.Intrinsics.Vector512<ushort> right) { throw null; }
+            public static Vector512<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<short> left, System.Runtime.Intrinsics.Vector512<ushort> right) { throw null; }
+            public static Vector512<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512<int> addend, System.Runtime.Intrinsics.Vector512<ushort> left, System.Runtime.Intrinsics.Vector512<short> right) { throw null; }
+            public static Vector512<uint> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector512<uint> addend, System.Runtime.Intrinsics.Vector512<ushort> left, System.Runtime.Intrinsics.Vector512<ushort> right) { throw null; }
+        }
+    }
+
     [System.CLSCompliantAttribute(false)]
     public abstract partial class Avx512BW : System.Runtime.Intrinsics.X86.Avx512F
     {
diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c
index 8785e7f2389461..9122f8411c9b52 100644
--- a/src/native/minipal/cpufeatures.c
+++ b/src/native/minipal/cpufeatures.c
@@ -385,6 +385,12 @@ int minipal_getcpufeatures(void)
                 result |= XArchIntrinsicConstants_AvxVnni;
             }
 
+            if (((cpuidInfo[CPUID_EDX] & (1 << 4)) != 0) &&                                                    // AVX-VNNI-INT8
+                ((cpuidInfo[CPUID_EDX] & (1 << 10)) != 0))                                                     // AVX-VNNI-INT16
+            {
+                result |= XArchIntrinsicConstants_AvxVnniInt;
+            }
+
             if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0)                                                        // AVX-IFMA
             {
                 result |= XArchIntrinsicConstants_AvxIfma;
diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h
index 905fcc0f6fa7e5..92284d18899d7e 100644
--- a/src/native/minipal/cpufeatures.h
+++ b/src/native/minipal/cpufeatures.h
@@ -29,6 +29,7 @@
 #define XArchIntrinsicConstants_Vaes (1 << 15)
 #define XArchIntrinsicConstants_WaitPkg (1 << 16)
 #define XArchIntrinsicConstants_X86Serialize (1 << 17)
+#define XArchIntrinsicConstants_AvxVnniInt (1 << 18)
 #endif // HOST_X86 || HOST_AMD64
 
 #if defined(HOST_ARM64)
diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs
index e2c655f3d171e2..30c92a29703334 100644
--- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs
+++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs
@@ -1930,6 +1930,62 @@
     ("ImmBinOpTest.template",                    new Dictionary<string, string> { ["Isa"] = "Avx10v2.V512",            ["LoadIsa"] = "Avx10v1.V512", ["Method"] = "MinMax",                                                  ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single",                          ["Imm"] = "15",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",      ["ValidateFirstResult"] = "result[0] != -1.0 * (((Math.Abs(left[0]) > Math.Abs(right[0])) ? left[0] : right[0]))",                                                                                                                                                                          ["ValidateRemainingResults"] = "result[i] != -1.0 * (((Math.Abs(left[i]) > Math.Abs(right[i])) ? left[i] : right[i]))"}),
 };
 
+(string templateFileName, Dictionary<string, string> templateData)[] AvxVnniInt8Inputs = new []
+{
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte",                                          ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",     ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]",                                                                                    ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]",                                                                                    ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",   ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])",                                                                    ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte",                                          ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",     ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]",                                                                                    ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]",                                                                                    ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte",   ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])",                                                                    ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}),
+
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte",                                          ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte",   ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)",                          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte",                                          ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte",   ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)",                          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}),
+};
+
+(string templateFileName, Dictionary<string, string> templateData)[] AvxVnniInt8_V512Inputs = new []
+{
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAdd",                                     ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte",                                          ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",     ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]",                                                                                    ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAdd",                                     ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != firstOp[0] + (int)secondOp[0]*(int)thirdOp[0] + (int)secondOp[1]*(int)thirdOp[1] + (int)secondOp[2]*(int)thirdOp[2] + (int)secondOp[3]*(int)thirdOp[3]",                                                                                    ["ValidateRemainingResults"] = "result[i] != firstOp[i] + (int)secondOp[4*i]*(int)thirdOp[4*i] + (int)secondOp[4*i + 1]*(int)thirdOp[4*i + 1] + (int)secondOp[4*i + 2]*(int)thirdOp[4*i + 2] + (int)secondOp[4*i + 3]*(int)thirdOp[4*i + 3]"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAdd",                                     ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte",   ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != (uint)(firstOp[0] + (uint)secondOp[0]*(uint)thirdOp[0] + (uint)secondOp[1]*(uint)thirdOp[1] + (uint)secondOp[2]*(uint)thirdOp[2] + (uint)secondOp[3]*(uint)thirdOp[3])",                                                                    ["ValidateRemainingResults"] = "result[i] != (uint)(firstOp[i] + (uint)secondOp[4*i]*(uint)thirdOp[4*i] + (uint)secondOp[4*i + 1]*(uint)thirdOp[4*i + 1] + (uint)secondOp[4*i + 2]*(uint)thirdOp[4*i + 2] + (uint)secondOp[4*i + 3]*(uint)thirdOp[4*i + 3])"}),
+
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte",                                          ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSByte()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",  ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt8.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte",   ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte",                                           ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp3"] = "TestLibrary.Generator.GetByte()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1] + (long)secondOp[2]*(long)thirdOp[2] + (long)secondOp[3]*(long)thirdOp[3], 0, (long)uint.MaxValue)",                          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[4*i]*(long)thirdOp[4*i] + (long)secondOp[4*i + 1]*(long)thirdOp[4*i + 1] + (long)secondOp[4*i + 2]*(long)thirdOp[4*i + 2] + (long)secondOp[4*i + 3]*(long)thirdOp[4*i + 3], 0, (long)uint.MaxValue)"}),
+};
+
+(string templateFileName, Dictionary<string, string> templateData)[] AvxVnniInt16Inputs = new []
+{
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16",                                            ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",    ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                       ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16",                                            ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",      ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                       ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",   ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16",                                          ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                      ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16",                                            ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",    ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                       ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16",                                            ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",      ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                       ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16",   ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16",                                          ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                      ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16",                                    ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",    ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",  ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16",                                    ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",   ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16",                                  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)",                          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16",                                    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",    ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16",  ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16",                                    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16",            ["LoadIsa"] = "Avx2",                      ["Method"] = "MultiplyWideningAndAddSaturate",                             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16",   ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16",                                  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)",                          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}),
+};
+
+(string templateFileName, Dictionary<string, string> templateData)[] AvxVnniInt16_V512Inputs = new []
+{
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16",                                    ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",    ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                       ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16",                                    ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",      ["ValidateFirstResult"] = "result[0] != (int)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                       ["ValidateRemainingResults"] = "result[i] != (int)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAdd",                             ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16",   ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16",                                  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != (uint)((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1])",                                                      ["ValidateRemainingResults"] = "result[i] != (uint)((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1])"}),
+
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAddSaturate",                     ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16",                                    ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",    ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAddSaturate",                     ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16",  ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16",                                    ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()",      ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], (long)int.MinValue, (long)int.MaxValue)",          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], (long)int.MinValue, (long)int.MaxValue)"}),
+    ("SimpleTernOpTest.template",      new Dictionary<string, string> { ["Isa"] = "AvxVnniInt16.V512",            ["LoadIsa"] = "Avx10v2.V512",                      ["Method"] = "MultiplyWideningAndAddSaturate",                     ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16",   ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16",                                  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()",   ["NextValueOp3"] = "TestLibrary.Generator.GetUInt16()",     ["ValidateFirstResult"] = "result[0] != Math.Clamp((long)firstOp[0] + (long)secondOp[0]*(long)thirdOp[0] + (long)secondOp[1]*(long)thirdOp[1], 0, (long)uint.MaxValue)",                          ["ValidateRemainingResults"] = "result[i] != Math.Clamp((long)firstOp[i] + (long)secondOp[2*i]*(long)thirdOp[2*i] + (long)secondOp[2*i + 1]*(long)thirdOp[2*i + 1], 0, (long)uint.MaxValue)"}),
+};
+
 (string templateFileName, Dictionary<string, string> templateData)[] Avx512F_ScalarUpperInputs = new []
 {
     ("SimpleBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx512F",            ["LoadIsa"] = "Avx512F", ["Method"] = "GetExponentScalar",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "result[0] != Avx512Verify.GetExponent(right[0])",                                                                                                                                                                                                    ["ValidateRemainingResults"] = "result[i] != left[i]"}),
@@ -4461,6 +4517,10 @@ bool isImmTemplate(string name)
 ProcessInputs("Gfni.V512", GfniV512Inputs);
 ProcessInputs("Avx10v2", Avx10v2Inputs);
 ProcessInputs("Avx10v2_V512", Avx10v2_V512Inputs);
+ProcessInputs("AvxVnniInt8", AvxVnniInt8Inputs);
+ProcessInputs("AvxVnniInt8_V512", AvxVnniInt8_V512Inputs);
+ProcessInputs("AvxVnniInt16", AvxVnniInt16Inputs);
+ProcessInputs("AvxVnniInt16_V512", AvxVnniInt16_V512Inputs);
 
 
 void ProcessInputs(string groupName, (string templateFileName, Dictionary<string, string> templateData)[] inputs)
@@ -4494,6 +4554,11 @@ void ProcessInput(StreamWriter testListFile, string groupName, (string templateF
 
     var suffix = "";
 
+    if (input.templateFileName == "SimpleTernOpTest.template")
+    {
+        testName += $"{input.templateData["Op1VectorType"]}.{input.templateData["Op1BaseType"]}{input.templateData["Op2VectorType"]}.{input.templateData["Op2BaseType"]}.{input.templateData["Op3VectorType"]}.{input.templateData["Op3BaseType"]}";
+    }
+
     if (input.templateFileName == "SimpleUnOpConvTest.template" || input.templateFileName == "SimdScalarUnOpConvTest.template" )
     {
         testName = $"{input.templateData["Method"]}.{input.templateData["Op1VectorType"]}{input.templateData["Op1BaseType"]}";
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template
index cd9caf6b7072b5..1b22d0b096fd40 100644
--- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template
+++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template
@@ -20,9 +20,9 @@ namespace JIT.HardwareIntrinsics.X86
     public static partial class Program
     {
         [Fact]
-        public static void {Method}{RetBaseType}()
+        public static void {Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}()
         {
-            var test = new {TemplateName}TernaryOpTest__{Method}{RetBaseType}();
+            var test = new {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}();
 
             if (test.IsSupported)
             {
@@ -78,7 +78,7 @@ namespace JIT.HardwareIntrinsics.X86
         }
     }
 
-    public sealed unsafe class {TemplateName}TernaryOpTest__{Method}{RetBaseType}
+    public sealed unsafe class {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}
     {
         private struct TestStruct
         {
@@ -100,7 +100,7 @@ namespace JIT.HardwareIntrinsics.X86
                 return testStruct;
             }
 
-            public void RunStructFldScenario({TemplateName}TernaryOpTest__{Method}{RetBaseType} testClass)
+            public void RunStructFldScenario({TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType} testClass)
             {
                 var result = {Isa}.{Method}(_fld1, _fld2, _fld3);
 
@@ -126,7 +126,7 @@ namespace JIT.HardwareIntrinsics.X86
 
         private SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}> _dataTable;
 
-        public {TemplateName}TernaryOpTest__{Method}{RetBaseType}()
+        public {TemplateName}TernaryOpTest__{Method}{RetBaseType}{Op1VectorType}{Op1BaseType}{Op2VectorType}{Op2BaseType}{Op3VectorType}{Op3BaseType}()
         {
             Succeeded = true;
 
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs
new file mode 100644
index 00000000000000..f79ba7132aee3a
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16SampleTest.cs
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+using Xunit;
+
+namespace IntelHardwareIntrinsicTest._AvxVnniInt16
+{
+    public partial class Program
+    {
+        const float EPS = Single.Epsilon * 5;
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector128<ulong> getAbs128(Vector128<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector256<ulong> getAbs256(Vector256<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [Fact]
+        public static unsafe void AvxVnniInt16SampleTest ()
+        {
+            Console.WriteLine("Test executed");
+            if (AvxVnniInt16.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt16 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt16 not supported");
+            }
+            if (AvxVnniInt16.V512.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt16_V512 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt16_V512 not supported");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj
new file mode 100644
index 00000000000000..b0e0c15535b643
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_handwritten_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt16SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj
new file mode 100644
index 00000000000000..37af53d8b83004
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_handwritten_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_handwritten_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt16SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj
new file mode 100644
index 00000000000000..a2dd5040f106b9
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt16.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj
new file mode 100644
index 00000000000000..47301f1344bf55
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/AvxVnniInt16_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt16.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs
new file mode 100644
index 00000000000000..7c9a1f9d2418d6
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16/Program.AvxVnniInt16.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86._AvxVnniInt16
+{
+    public static partial class Program
+    {
+        static Program()
+        {
+
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs
new file mode 100644
index 00000000000000..a3faf347bcb7b8
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512SampleTest.cs
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+using Xunit;
+
+namespace IntelHardwareIntrinsicTest._AvxVnniInt16_V512
+{
+    public partial class Program
+    {
+        const float EPS = Single.Epsilon * 5;
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector128<ulong> getAbs128(Vector128<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector256<ulong> getAbs256(Vector256<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [Fact]
+        public static unsafe void AvxVnniInt16_V512SampleTest ()
+        {
+            Console.WriteLine("Test executed");
+            if (AvxVnniInt16.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt16 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt16 not supported");
+            }
+            if (AvxVnniInt16.V512.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt16_V512 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt16_V512 not supported");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj
new file mode 100644
index 00000000000000..913145750c6879
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_V512_handwritten_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt16_V512SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj
new file mode 100644
index 00000000000000..d301293f0763ea
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_handwritten_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_V512_handwritten_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt16_V512SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj
new file mode 100644
index 00000000000000..05c40b2a2ec0a2
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_V512_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt16_V512.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj
new file mode 100644
index 00000000000000..4b159ee2cdd3f0
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/AvxVnniInt16_V512_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt16_V512_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt16_V512.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs
new file mode 100644
index 00000000000000..52895c8b02fdf5
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt16_V512/Program.AvxVnniInt16_V512.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86._AvxVnniInt16_V512
+{
+    public static partial class Program
+    {
+        static Program()
+        {
+
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs
new file mode 100644
index 00000000000000..551b151f79e056
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8SampleTest.cs
@@ -0,0 +1,56 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+using Xunit;
+
+namespace IntelHardwareIntrinsicTest._AvxVnniInt8
+{
+    public partial class Program
+    {
+        const float EPS = Single.Epsilon * 5;
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector128<ulong> getAbs128(Vector128<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector256<ulong> getAbs256(Vector256<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector128<uint> getMWA(Vector128<uint> v1, Vector128<byte> v2, Vector128<byte> v3)
+        {
+            return AvxVnniInt8.MultiplyWideningAndAdd(v1, v2, v3);
+        }
+
+        [Fact]
+        public static unsafe void AvxVnniInt8SampleTest ()
+        {
+            Console.WriteLine("Test executed");
+            if (AvxVnniInt8.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt8 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt8 not supported");
+            }
+            if (AvxVnniInt8.V512.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt8_V512 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt8_V512 not supported");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj
new file mode 100644
index 00000000000000..f5a1b8ec79165a
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_handwritten_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt8SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj
new file mode 100644
index 00000000000000..19edfb7a1e4a3e
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_handwritten_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_handwritten_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt8SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj
new file mode 100644
index 00000000000000..4860476c221996
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt8.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj
new file mode 100644
index 00000000000000..85c963490ba6e8
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/AvxVnniInt8_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt8.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs
new file mode 100644
index 00000000000000..e20f252e9e9cb4
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8/Program.AvxVnniInt8.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86._AvxVnniInt8
+{
+    public static partial class Program
+    {
+        static Program()
+        {
+
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs
new file mode 100644
index 00000000000000..ce0f11ec035755
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512SampleTest.cs
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+using Xunit;
+
+namespace IntelHardwareIntrinsicTest._AvxVnniInt8_V512
+{
+    public partial class Program
+    {
+        const float EPS = Single.Epsilon * 5;
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector128<ulong> getAbs128(Vector128<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [MethodImplAttribute(MethodImplOptions.NoInlining)]
+        public static Vector256<ulong> getAbs256(Vector256<long> val)
+        {
+            return Avx10v2.Abs(val);
+        }
+
+        [Fact]
+        public static unsafe void AvxVnniInt8_V512SampleTest ()
+        {
+            Console.WriteLine("Test executed");
+            if (AvxVnniInt8.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt8 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt8 not supported");
+            }
+            if (AvxVnniInt8.V512.IsSupported)
+            {
+                Console.WriteLine("AvxVnniInt8_V512 supported");
+            }
+            else {
+                Console.WriteLine("AvxVnniInt8_V512 not supported");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj
new file mode 100644
index 00000000000000..9dad95ac6905a3
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_V512_handwritten_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt8_V512SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj
new file mode 100644
index 00000000000000..1a9d7fa07349a6
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_handwritten_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_V512_handwritten_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="..\..\X86\Shared\SimpleUnOpTest_DataTable.cs" />
+    <Compile Include="AvxVnniInt8_V512SampleTest.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj
new file mode 100644
index 00000000000000..bf04181f2eed27
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_V512_r</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt8_V512.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj
new file mode 100644
index 00000000000000..44720c47ad4db3
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/AvxVnniInt8_V512_ro.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <AssemblyName>X86_AvxVnniInt8_V512_ro</AssemblyName>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Program.AvxVnniInt8_V512.cs" />
+    <Compile Include="..\..\X86\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs
new file mode 100644
index 00000000000000..f1910642eed9ec
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx/AvxVnniInt8_V512/Program.AvxVnniInt8_V512.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86._AvxVnniInt8_V512
+{
+    public static partial class Program
+    {
+        static Program()
+        {
+
+        }
+    }
+}
diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs
index afbe2b6a18c21d..3083d9f6c583cf 100644
--- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs
+++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs
@@ -79,9 +79,12 @@ static int Main()
         bool? ExpectedAvx512Fp16 = false;
         bool? ExpectedAvx10v1 = false;
         bool? ExpectedAvx10v1V512 = false;
+        bool? ExpectedAvx10v2 = false;
         bool? ExpectedAvx512Vp2intersect = false;
         bool? ExpectedAvxIfma = false;
         bool? ExpectedAvxVnni = false;
+        bool? ExpectedAvxVnniInt = false;
+        bool? ExpectedAvxVnniIntV512 = false;
         bool? ExpectedGfniV256 = false;
         bool? ExpectedGfniV512 = false;
         bool? ExpectedAesV256 = false;
@@ -121,9 +124,12 @@ static int Main()
         bool? ExpectedAvx512Fp16 = false;
         bool? ExpectedAvx10v1 = false;
         bool? ExpectedAvx10v1V512 = false;
+        bool? ExpectedAvx10v2 = false;
         bool? ExpectedAvx512Vp2intersect = false;
         bool? ExpectedAvxIfma = false;
         bool? ExpectedAvxVnni = false;
+        bool? ExpectedAvxVnniInt = false;
+        bool? ExpectedAvxVnniIntV512 = false;
         bool? ExpectedGfniV256 = false;
         bool? ExpectedGfniV512 = false;
         bool? ExpectedAesV256 = false;
@@ -148,6 +154,7 @@ static int Main()
         bool? ExpectedPclmulqdq = null;
         bool? ExpectedAvxIfma = null;
         bool? ExpectedAvxVnni = null;
+        bool? ExpectedAvxVnniInt = null;
         bool? ExpectedGfni = null;
         bool? ExpectedGfniV256 = null;
         bool? ExpectedSha = null;
@@ -168,7 +175,9 @@ static int Main()
         bool? ExpectedAvx512Fp16 = false;
         bool? ExpectedAvx10v1 = false;
         bool? ExpectedAvx10v1V512 = false;
+        bool? ExpectedAvx10v2 = false;
         bool? ExpectedAvx512Vp2intersect = false;
+        bool? ExpectedAvxVnniIntV512 = false;
         bool? ExpectedGfniV512 = false;
         bool? ExpectedAesV512 = false;
         bool? ExpectedPclmulqdqV512 = false;
@@ -208,9 +217,12 @@ static int Main()
         bool? ExpectedAvx512Fp16 = false;
         bool? ExpectedAvx10v1 = false;
         bool? ExpectedAvx10v1V512 = false;
+        bool? ExpectedAvx10v2 = false;
         bool? ExpectedAvx512Vp2intersect = false;
         bool? ExpectedAvxIfma = false;
         bool? ExpectedAvxVnni = false;
+        bool? ExpectedAvxVnniInt = false;
+        bool? ExpectedAvxVnniIntV512 = false;
         bool? ExpectedGfniV512 = false;
         bool? ExpectedAesV512 = false;
         bool? ExpectedPclmulqdqV512 = false;
@@ -232,6 +244,7 @@ static int Main()
         bool? ExpectedPclmulqdq = null;
         bool? ExpectedAvxIfma = null;
         bool? ExpectedAvxVnni = null;
+        bool? ExpectedAvxVnniInt = null;
         bool? ExpectedGfni = null;
         bool? ExpectedGfniV256 = null;
         bool? ExpectedSha = null;
@@ -252,7 +265,9 @@ static int Main()
         bool? ExpectedAvx512Fp16 = false;
         bool? ExpectedAvx10v1 = false;
         bool? ExpectedAvx10v1V512 = false;
+        bool? ExpectedAvx10v2 = false;
         bool? ExpectedAvx512Vp2intersect = false;
+        bool? ExpectedAvxVnniIntV512 = false;
         bool? ExpectedGfniV512 = false;
         bool? ExpectedAesV512 = false;
         bool? ExpectedPclmulqdqV512 = false;
@@ -282,11 +297,14 @@ static int Main()
         bool? ExpectedAvx512Fp16 = null;
         bool? ExpectedAvx10v1 = null;
         bool? ExpectedAvx10v1V512 = null;
+        bool? ExpectedAvx10v2 = null;
         bool? ExpectedAes = null;
         bool? ExpectedPclmulqdq = null;
         bool? ExpectedAvx512Vp2intersect = null;
         bool? ExpectedAvxIfma = null;
         bool? ExpectedAvxVnni = null;
+        bool? ExpectedAvxVnniInt = null;
+        bool? ExpectedAvxVnniIntV512 = null;
         bool? ExpectedGfni = null;
         bool? ExpectedGfniV256 = null;
         bool? ExpectedGfniV512 = null;
@@ -297,6 +315,50 @@ static int Main()
         bool? ExpectedPclmulqdqV512 = null;
         bool? ExpectedWaitPkg = null;
         bool? ExpectedX86Serialize = null;
+#elif AVX10v2_INTRINSICS
+        bool? ExpectedSse3 = true;
+        bool? ExpectedSsse3 = true;
+        bool? ExpectedSse41 = true;
+        bool? ExpectedSse42 = true;
+        bool? ExpectedPopcnt = true;
+        bool? ExpectedAvx = true;
+        bool? ExpectedAvx2 = true;
+        bool? ExpectedBmi1 = true;
+        bool? ExpectedBmi2 = true;
+        bool? ExpectedF16c = true;
+        bool? ExpectedFma = true;
+        bool? ExpectedLzcnt = true;
+        bool? ExpectedAvx512F = true;
+        bool? ExpectedAvx512BW = true;
+        bool? ExpectedAvx512CD = true;
+        bool? ExpectedAvx512DQ = true;
+        bool? ExpectedAvx512Vbmi = true;
+        bool? ExpectedAvx512Bitalg = true;
+        bool? ExpectedAvx512Vbmi2 = true;
+        bool? ExpectedAvx512Vpopcntdq = true;
+        bool? ExpectedAvx512Bf16 = true;
+        bool? ExpectedAvx512Fp16 = true;
+        bool? ExpectedAvx10v1 = true;
+        bool? ExpectedAvx10v1V512 = true;
+        bool? ExpectedAvx10v2 = true;
+        bool? ExpectedAvxVnni = true;
+        bool? ExpectedAvxVnniIntV512 = true;
+
+        bool? ExpectedAes = null;
+        bool? ExpectedPclmulqdq = null;
+        bool? ExpectedAvx512Vp2intersect = null;
+        bool? ExpectedAvxIfma = null;
+        bool? ExpectedGfni = null;
+        bool? ExpectedGfniV256 = null;
+        bool? ExpectedGfniV512 = null;
+        bool? ExpectedSha = null;
+        bool? ExpectedAesV256 = null;
+        bool? ExpectedAesV512 = null;
+        bool? ExpectedPclmulqdqV256 = null;
+        bool? ExpectedPclmulqdqV512 = null;
+        bool? ExpectedWaitPkg = null;
+        bool? ExpectedX86Serialize = null;
+        bool? ExpectedAvxVnniInt = null;
 #else
 #error Who dis?
 #endif
@@ -366,6 +428,13 @@ static int Main()
         Check("Lzcnt", ExpectedLzcnt, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32);
         Check("Lzcnt.X64", ExpectedLzcnt, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64);
 
+        Check("AvxVnniInt", ExpectedAvxVnniInt, &AvxVnniIntIsSupported, AvxVnniInt8.IsSupported, () => AvxVnniInt8.MultiplyWideningAndAdd(Vector128<int>.Zero, Vector128<sbyte>.Zero, Vector128<sbyte>.Zero).Equals(Vector128<int>.Zero));
+
+        Check("AvxVnniIntV512", ExpectedAvxVnniIntV512, &AvxVnniIntV512IsSupported, AvxVnniInt16.V512.IsSupported, () => AvxVnniInt16.V512.MultiplyWideningAndAdd(Vector512<int>.Zero, Vector512<short>.Zero, Vector512<ushort>.Zero).Equals(Vector512<int>.Zero));
+
+        Check("Avx10v2", ExpectedAvx10v2, &Avx10v2IsSupported, Avx10v2.IsSupported, () => Avx10v2.MinMax(Vector128<double>.Zero, Vector128<double>.Zero, 0x00).Equals(Vector128<double>.Zero));
+        Check("Avx10v2.X64", ExpectedAvx10v2, &Avx10v2X64IsSupported, Avx10v2.X64.IsSupported, null);
+
         Check("Avx512F", ExpectedAvx512F, &Avx512FIsSupported, Avx512F.IsSupported, () => Avx512F.Abs(Vector512<int>.Zero).Equals(Vector512<uint>.Zero));
         Check("Avx512F.VL", ExpectedAvx512F, &Avx512FVLIsSupported, Avx512F.VL.IsSupported, null);
         Check("Avx512F.X64", ExpectedAvx512F, &Avx512FX64IsSupported, Avx512F.X64.IsSupported, null);
@@ -543,6 +612,8 @@ static int Main()
     static bool Avx10v1X64IsSupported() => Avx10v1.X64.IsSupported;
     static bool Avx10v1V512IsSupported() => Avx10v1.V512.IsSupported;
     static bool Avx10v1V512X64IsSupported() => Avx10v1.V512.X64.IsSupported;
+    static bool Avx10v2IsSupported() => Avx10v2.IsSupported;
+    static bool Avx10v2X64IsSupported() => Avx10v2.X64.IsSupported;
 
     static bool AesIsSupported() => Aes.IsSupported;
     static bool AesX64IsSupported() => Aes.X64.IsSupported;
@@ -559,6 +630,8 @@ static int Main()
 
     static bool AvxVnniIsSupported() => AvxVnni.IsSupported;
     static bool AvxVnniX64IsSupported() => AvxVnni.X64.IsSupported;
+    static bool AvxVnniIntIsSupported() => AvxVnniInt8.IsSupported;
+    static bool AvxVnniIntV512IsSupported() => AvxVnniInt16.V512.IsSupported;
 
     static bool GfniIsSupported() => Gfni.IsSupported;
     static bool GfniV256IsSupported() => Gfni.V256.IsSupported;
diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj
new file mode 100644
index 00000000000000..4e8381cac23f00
--- /dev/null
+++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx10v2.csproj
@@ -0,0 +1,31 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>0</CLRTestPriority>
+    <CLRTestTargetUnsupported Condition="'$(TargetArchitecture)' != 'x64'">true</CLRTestTargetUnsupported>
+    <!-- Sanitizers increase the binary size, so it ends up outside of our expected range. -->
+    <CLRTestTargetUnsupported>true</CLRTestTargetUnsupported>
+    <!-- Test infra issue on apple devices: https://github.com/dotnet/runtime/issues/89917 -->
+    <CLRTestTargetUnsupported Condition="'$(TargetsAppleMobile)' == 'true'">true</CLRTestTargetUnsupported>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <DefineConstants>$(DefineConstants);AVX10v2_INTRINSICS;VECTORT512_INTRINSICS</DefineConstants>
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <ReferenceXUnitWrapperGenerator>false</ReferenceXUnitWrapperGenerator>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <IlcArg Include="--instruction-set:avx10v2,avx10v2_v512" />
+  </ItemGroup>
+
+  <PropertyGroup>
+    <CLRTestBashPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+    echo No support for AVX10v2, test not applicable.
+    exit /B 0
+]]></CLRTestBashPreCommands>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+  </ItemGroup>
+</Project>