Skip to content

Commit b2cf1ea

Browse files
committed
AVX-512 detection for AMD
1 parent 5fda461 commit b2cf1ea

File tree

4 files changed

+54
-0
lines changed

4 files changed

+54
-0
lines changed

include/private/dsp/arch/x86/cpuid.h

+10
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@
8383
#define X86_CPUID7_INTEL_ECX_AVX512VBMI (1 << 1)
8484

8585
#define X86_CPUID7_AMD_EBX_AVX2 (1 << 5)
86+
#define X86_CPUID7_AMD_EBX_AVX512F (1 << 16)
87+
#define X86_CPUID7_AMD_EBX_AVX512DQ (1 << 17)
88+
#define X86_CPUID7_AMD_EBX_AVX512IFMA (1 << 21)
89+
#define X86_CPUID7_AMD_EBX_AVX512PF (1 << 26)
90+
#define X86_CPUID7_AMD_EBX_AVX512ER (1 << 27)
91+
#define X86_CPUID7_AMD_EBX_AVX512CD (1 << 28)
92+
#define X86_CPUID7_AMD_EBX_AVX512BW (1 << 30)
93+
#define X86_CPUID7_AMD_EBX_AVX512VL (1 << 31)
94+
95+
#define X86_CPUID7_AMD_ECX_AVX512VBMI (1 << 1)
8696

8797
//-------------------------------------------------------------------------
8898
// Function 80000001

include/private/dsp/arch/x86/features.h

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
FEAT_FAST_MOVS, // Processor implements optimized MOVS instruction
9191
FEAT_FAST_AVX, // Fast AVX implementation
9292
FEAT_FAST_FMA3, // Fast FMA3 implementation
93+
FEAT_FAST_AVX512, // Fast AVX-512 implementation
9394
FEAT_BELOW_ZEN3 // CPU has AMD architecture and is below Zen3
9495
};
9596

src/main/x86/avx512.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@
8484

8585
void dsp_init(const cpu_features_t *f)
8686
{
87+
// Enable AVX-512 only for CPUs that really support it well
88+
const bool favx512 = feature_check(f, FEAT_FAST_AVX512);
89+
if (!favx512)
90+
return;
91+
8792
const bool vl = (f->features & (CPU_OPTION_AVX512F | CPU_OPTION_AVX512VL)) ==
8893
(CPU_OPTION_AVX512F | CPU_OPTION_AVX512VL);
8994

src/main/x86/x86.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,30 @@
299299
if (info.ebx & X86_CPUID7_AMD_EBX_AVX2)
300300
f->features |= CPU_OPTION_AVX2;
301301
}
302+
303+
// Additional check for AVX512 support
304+
if ((xcr0 & XCR_FLAGS_AVX512) == XCR_FLAGS_AVX512)
305+
{
306+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512F)
307+
f->features |= CPU_OPTION_AVX512F;
308+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512DQ)
309+
f->features |= CPU_OPTION_AVX512DQ;
310+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512IFMA)
311+
f->features |= CPU_OPTION_AVX512IFMA;
312+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512PF)
313+
f->features |= CPU_OPTION_AVX512PF;
314+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512ER)
315+
f->features |= CPU_OPTION_AVX512ER;
316+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512CD)
317+
f->features |= CPU_OPTION_AVX512CD;
318+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512BW)
319+
f->features |= CPU_OPTION_AVX512BW;
320+
if (info.ebx & X86_CPUID7_AMD_EBX_AVX512VL)
321+
f->features |= CPU_OPTION_AVX512VL;
322+
323+
if (info.ecx & X86_CPUID7_AMD_ECX_AVX512VBMI)
324+
f->features |= CPU_OPTION_AVX512VBMI;
325+
}
302326
}
303327

304328
// FUNCTION 0x80000001
@@ -542,6 +566,20 @@
542566
}
543567
break;
544568

569+
case FEAT_FAST_AVX512:
570+
if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU seems to be good enough with AVX-512
571+
return true;
572+
// Only starting with ZEN 1 architecture AMD's implementation of AVX is fast enough
573+
if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON))
574+
{
575+
if (f->family < AMD_FAMILY_ZEN_3_4)
576+
return false;
577+
if (f->family == AMD_FAMILY_DHYANA)
578+
return false;
579+
return true;
580+
}
581+
break;
582+
545583
case FEAT_BELOW_ZEN3: // Test that this is AMD and below Zen 3 architecture
546584
if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON))
547585
{

0 commit comments

Comments
 (0)