Skip to content

Commit a2fa319

Browse files
authored
Add AArch64 extension handling and SVE/SVE2 feature detection (#443)
* Add Arm extension handling in preparation for feature detection This commit continues to assume that only Neon is present, but adds the helper functions and call sites to match the existing x86 behaviour in preparation for adding feature detection logic in a later commit. * Add SVE and SVE2 to Arm extensions enum Plus wire up Linux feature detection and amend init switches to just fall back to the Neon cases for now. * Guard AArch64 SVE/SVE2 features by new CMake options Introduce new options VVENC_ENABLE_ARM_SIMD_SVE and VVENC_ENABLE_ARM_SIMD_SVE2 to control whether SVE and SVE2 are enabled, plus add #if guards to disable feature detection if the feature is not available. This commit does not include guarding which source files are actually built with SVE/SVE2 flags enabled since there are currently zero SVE/SVE2 source files.
1 parent 2f25b8a commit a2fa319

File tree

8 files changed

+194
-54
lines changed

8 files changed

+194
-54
lines changed

CMakeLists.txt

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@ if( VVENC_TARGET_ARCH STREQUAL "ARM" )
3535
endif()
3636

3737
# we enable x86 intrinsics for all target architectures, because they are implemented through simd-everywhere on non-x86
38-
set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "enable x86 intrinsics" )
39-
set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "enable ARM intrinsics" )
38+
set( VVENC_ENABLE_X86_SIMD TRUE CACHE BOOL "Enable x86 intrinsics" )
39+
set( VVENC_ENABLE_ARM_SIMD ${VVENC_ARM_SIMD_DEFAULT} CACHE BOOL "Enable Arm Neon intrinsics" )
40+
set( VVENC_ENABLE_ARM_SIMD_SVE FALSE CACHE BOOL "Enable Arm SVE intrinsics" )
41+
set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE CACHE BOOL "Enable Arm SVE2 intrinsics" )
4042

4143
check_problematic_compiler( VVENC_PROBLEMATIC_COMPILER "MSVC" 19.38 19.39 )
4244
if( VVENC_PROBLEMATIC_COMPILER )
@@ -72,12 +74,35 @@ if( VVENC_ENABLE_X86_SIMD )
7274
endif()
7375

7476
message( STATUS "x86 SIMD intrinsics enabled (using SIMDE for non-x86 targets)" )
75-
add_compile_definitions( TARGET_SIMD_X86 )
77+
add_compile_definitions( TARGET_SIMD_X86=1 )
7678
endif()
7779

78-
if( VVENC_ENABLE_ARM_SIMD )
79-
message( STATUS "ARM SIMD intrinsics enabled" )
80-
add_compile_definitions( TARGET_SIMD_ARM )
80+
if( VVENC_TARGET_ARCH STREQUAL "ARM" )
81+
if( VVENC_ENABLE_ARM_SIMD )
82+
message( STATUS "Arm Neon intrinsics enabled" )
83+
add_compile_definitions( TARGET_SIMD_ARM=1 )
84+
else()
85+
message( STATUS "Arm Neon intrinsics disabled, disabling Arm SVE/SVE2 intrinsics" )
86+
# If Neon is disabled make sure that SVE/SVE2 are also disabled.
87+
set( VVENC_ENABLE_ARM_SIMD_SVE FALSE )
88+
set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE )
89+
endif()
90+
91+
if( VVENC_ENABLE_ARM_SIMD_SVE )
92+
message( STATUS "Arm SVE intrinsics enabled" )
93+
add_compile_definitions( TARGET_SIMD_ARM_SVE=1 )
94+
else()
95+
message( STATUS "Arm SVE intrinsics disabled, disabling Arm SVE2 intrinsics" )
96+
# If SVE is disabled make sure that SVE2 are also disabled.
97+
set( VVENC_ENABLE_ARM_SIMD_SVE2 FALSE )
98+
endif()
99+
100+
if( VVENC_ENABLE_ARM_SIMD_SVE2 )
101+
message( STATUS "Arm SVE2 intrinsics enabled" )
102+
add_compile_definitions( TARGET_SIMD_ARM_SVE2=1 )
103+
else()
104+
message( STATUS "Arm SVE2 intrinsics disabled" )
105+
endif()
81106
endif()
82107

83108
if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )

source/Lib/CommonLib/CommonDef.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -728,12 +728,14 @@ namespace x86_simd
728728
namespace arm_simd
729729
{
730730
#ifdef TARGET_SIMD_ARM
731-
typedef enum
732-
{
733-
UNDEFINED = -1,
734-
SCALAR = 0,
735-
NEON,
736-
} ARM_VEXT;
731+
typedef enum
732+
{
733+
UNDEFINED = -1,
734+
SCALAR = 0,
735+
NEON,
736+
SVE,
737+
SVE2,
738+
} ARM_VEXT;
737739
#endif // TARGET_SIMD_ARM
738740
} // namespace arm_simd
739741

source/Lib/CommonLib/arm/CommonDefARM.cpp

Lines changed: 105 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,25 +40,127 @@ POSSIBILITY OF SUCH DAMAGE.
4040
4141
------------------------------------------------------------------------------------------- */
4242

43-
/** \file CommonDefX86.cpp
44-
*/
43+
/** \file CommonDefARM.cpp
44+
*/
4545

4646
#include "CommonDefARM.h"
4747

48+
#if defined( __linux__ )
49+
#include <sys/auxv.h> // getauxval
50+
#endif
51+
4852
namespace vvenc
4953
{
5054
using namespace arm_simd;
5155

56+
const static std::vector<std::pair<ARM_VEXT, std::string>> vext_names{
57+
{ UNDEFINED, "" },
58+
{ SCALAR, "SCALAR" },
59+
{ NEON, "NEON" },
60+
#if TARGET_SIMD_ARM_SVE
61+
{ SVE, "SVE" },
62+
#endif
63+
#if TARGET_SIMD_ARM_SVE2
64+
{ SVE2, "SVE2" },
65+
#endif
66+
};
67+
68+
const std::string& arm_vext_to_string( ARM_VEXT vext )
69+
{
70+
for( auto& it : vext_names )
71+
{
72+
if( it.first == vext )
73+
{
74+
return it.second;
75+
}
76+
}
77+
THROW( "Invalid SIMD extension value " << vext );
78+
}
79+
80+
ARM_VEXT string_to_arm_vext( const std::string& ext_name )
81+
{
82+
if( ext_name.empty() )
83+
{
84+
return UNDEFINED;
85+
}
86+
87+
for( auto& it : vext_names )
88+
{
89+
if( it.second == ext_name )
90+
{
91+
return it.first;
92+
}
93+
}
94+
95+
THROW( "Invalid SIMD Mode string: \"" << ext_name << "\"" );
96+
}
97+
98+
#if defined( __linux__ )
99+
100+
// Define hwcap values ourselves: building with an old auxv header where these
101+
// hwcap values are not defined should not prevent features from being enabled.
102+
#define AARCH64_HWCAP_SVE ( 1 << 22 )
103+
#define AARCH64_HWCAP2_SVE2 ( 1 << 1 )
104+
105+
static ARM_VEXT _get_arm_extensions()
106+
{
107+
// We assume Neon is always supported for relevant Arm processors.
108+
ARM_VEXT ext = NEON;
109+
110+
#if TARGET_SIMD_ARM_SVE
111+
unsigned long hwcap = getauxval( AT_HWCAP );
112+
#endif
113+
#if TARGET_SIMD_ARM_SVE2
114+
unsigned long hwcap2 = getauxval( AT_HWCAP2 );
115+
#endif
116+
117+
#if TARGET_SIMD_ARM_SVE
118+
if( hwcap & AARCH64_HWCAP_SVE )
119+
{
120+
ext = SVE;
121+
#if TARGET_SIMD_ARM_SVE2
122+
if( hwcap2 & AARCH64_HWCAP2_SVE2 )
123+
{
124+
ext = SVE2;
125+
}
126+
#endif
127+
}
128+
#endif
129+
130+
return ext;
131+
}
132+
133+
#else
134+
135+
static ARM_VEXT _get_arm_extensions()
136+
{
137+
// We assume Neon is always supported for relevant Arm processors.
138+
// No other extensions supported on non-Linux platforms for now.
139+
return NEON;
140+
}
141+
142+
#endif
143+
52144
ARM_VEXT read_arm_extension_flags( ARM_VEXT request )
53145
{
54-
static ARM_VEXT ext_flags = NEON; // We assume NEON is always supported for relevant ARM processors
146+
static ARM_VEXT max_supported = _get_arm_extensions();
147+
static ARM_VEXT ext_flags = max_supported;
55148

56149
if( request != UNDEFINED )
57150
{
151+
if( request > max_supported )
152+
{
153+
THROW( "requested SIMD level (" << request << ") not supported by current CPU (max " << max_supported << ")." );
154+
}
58155
ext_flags = request;
59156
}
60157

61158
return ext_flags;
62159
};
63160

161+
const std::string& read_arm_extension_name()
162+
{
163+
return arm_vext_to_string( read_arm_extension_flags() );
164+
}
165+
64166
} // namespace

source/Lib/CommonLib/arm/CommonDefARM.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,11 @@ namespace vvenc
5858
{
5959
using namespace arm_simd;
6060

61-
ARM_VEXT read_arm_extension_flags( ARM_VEXT request = arm_simd::UNDEFINED );
62-
// std::string read_arm_extension_name();
61+
const std::string& arm_vext_to_string( ARM_VEXT vext );
62+
ARM_VEXT string_to_arm_vext( const std::string& ext_name );
63+
64+
ARM_VEXT read_arm_extension_flags( ARM_VEXT request = arm_simd::UNDEFINED );
65+
const std::string& read_arm_extension_name();
6366

6467
} // namespace
6568

source/Lib/CommonLib/arm/InitARM.cpp

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,9 @@ namespace vvenc
7070
void InterpolationFilter::initInterpolationFilterARM()
7171
{
7272
auto vext = read_arm_extension_flags();
73-
switch( vext )
73+
if( vext >= NEON )
7474
{
75-
case NEON:
7675
_initInterpolationFilterARM<NEON>();
77-
break;
78-
default:
79-
break;
8076
}
8177
}
8278
#endif
@@ -85,13 +81,9 @@ void InterpolationFilter::initInterpolationFilterARM()
8581
void PelBufferOps::initPelBufOpsARM()
8682
{
8783
auto vext = read_arm_extension_flags();
88-
switch( vext )
84+
if( vext >= NEON )
8985
{
90-
case NEON:
9186
_initPelBufOpsARM<NEON>();
92-
break;
93-
default:
94-
break;
9587
}
9688
}
9789
#endif
@@ -100,13 +92,9 @@ void PelBufferOps::initPelBufOpsARM()
10092
void RdCost::initRdCostARM()
10193
{
10294
auto vext = read_arm_extension_flags();
103-
switch( vext )
95+
if( vext >= NEON )
10496
{
105-
case NEON:
10697
_initRdCostARM<NEON>();
107-
break;
108-
default:
109-
break;
11098
}
11199
}
112100
#endif
@@ -115,13 +103,9 @@ void RdCost::initRdCostARM()
115103
void MCTF::initMCTF_ARM()
116104
{
117105
auto vext = read_arm_extension_flags();
118-
switch( vext )
106+
if( vext >= NEON )
119107
{
120-
case NEON:
121108
_initMCTF_ARM<NEON>();
122-
break;
123-
default:
124-
break;
125109
}
126110
}
127111
#endif // ENABLE_SIMD_OPT_MCTF

source/Lib/CommonLib/x86/CommonDefX86.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ const static std::map<X86_VEXT, std::string> vext_names{ { UNDEFINED, "" }, { SC
7979
# endif
8080
# endif // !REAL_TARGET_X86
8181

82-
const std::string& vext_to_string( X86_VEXT vext )
82+
const std::string& x86_vext_to_string( X86_VEXT vext )
8383
{
8484
try
8585
{
@@ -91,7 +91,7 @@ const std::string& vext_to_string( X86_VEXT vext )
9191
}
9292
}
9393

94-
X86_VEXT string_to_vext( const std::string& ext_name )
94+
X86_VEXT string_to_x86_vext( const std::string& ext_name )
9595
{
9696
if( ext_name.empty() )
9797
{
@@ -287,7 +287,7 @@ X86_VEXT read_x86_extension_flags( X86_VEXT request )
287287

288288
const std::string& read_x86_extension_name()
289289
{
290-
return vext_to_string( read_x86_extension_flags() );
290+
return x86_vext_to_string( read_x86_extension_flags() );
291291
}
292292

293293
} // namespace vvenc

source/Lib/CommonLib/x86/CommonDefX86.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ namespace vvenc
8787

8888
using namespace x86_simd;
8989

90-
const std::string& vext_to_string( X86_VEXT vext );
91-
X86_VEXT string_to_vext( const std::string& ext_name );
90+
const std::string& x86_vext_to_string( X86_VEXT vext );
91+
X86_VEXT string_to_x86_vext( const std::string& ext_name );
9292

9393
X86_VEXT read_x86_extension_flags( X86_VEXT request = x86_simd::UNDEFINED );
9494
const std::string& read_x86_extension_name();

source/Lib/vvenc/vvencimpl.cpp

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -802,19 +802,37 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId )
802802
try
803803
# endif // HANDLE_EXCEPTION
804804
{
805-
X86_VEXT request_ext = string_to_vext( simdReqStr );
805+
#if defined( REAL_TARGET_ARM )
806+
ARM_VEXT arm_ext = string_to_arm_vext( simdReqStr );
807+
X86_VEXT x86_ext = arm_ext == arm_simd::UNDEFINED ? x86_simd::UNDEFINED
808+
: arm_ext == arm_simd::SCALAR ? x86_simd::SCALAR
809+
: SIMD_EVERYWHERE_EXTENSION_LEVEL;
810+
try
811+
{
812+
read_x86_extension_flags( x86_ext );
813+
read_arm_extension_flags( arm_ext );
814+
}
815+
catch( Exception& )
816+
{
817+
// Not using the actual message from the exception here, because we need to insert the SIMD-level name instead of
818+
// the enum.
819+
THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max "
820+
<< read_arm_extension_name() << ")." );
821+
}
822+
#else
823+
X86_VEXT request_ext = string_to_x86_vext( simdReqStr );
806824
try
807825
{
808826
read_x86_extension_flags( request_ext );
809-
#if defined( TARGET_SIMD_ARM )
810-
read_arm_extension_flags( request_ext == x86_simd::UNDEFINED ? arm_simd::UNDEFINED : request_ext != x86_simd::SCALAR ? arm_simd::NEON : arm_simd::SCALAR );
811-
#endif
812827
}
813828
catch( Exception& )
814829
{
815-
// not using the actual message from the exception here, because we need to insert the SIMD-level name instead of the enum
816-
THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max " << read_x86_extension_name() << ")." );
830+
// Not using the actual message from the exception here, because we need to insert the SIMD-level name instead of
831+
// the enum.
832+
THROW( "requested SIMD level (" << simdReqStr << ") not supported by current CPU (max "
833+
<< read_x86_extension_name() << ")." );
817834
}
835+
#endif
818836

819837
#if ENABLE_SIMD_OPT_BUFFER
820838
#if defined( TARGET_SIMD_X86 )
@@ -825,10 +843,14 @@ const char* VVEncImpl::setSIMDExtension( const char* simdId )
825843
#endif
826844
#endif
827845
#if ENABLE_SIMD_TRAFO
828-
g_tCoeffOps.initTCoeffOpsX86();
846+
g_tCoeffOps.initTCoeffOpsX86();
829847
#endif
830848

849+
#if defined( REAL_TARGET_ARM )
850+
return read_arm_extension_name().c_str();
851+
#else
831852
return read_x86_extension_name().c_str();
853+
#endif
832854
}
833855
#if HANDLE_EXCEPTION
834856
catch( Exception& e )
@@ -863,13 +885,15 @@ std::string VVEncImpl::getCompileInfoString()
863885
std::string VVEncImpl::createEncoderInfoStr()
864886
{
865887
std::stringstream cssCap;
866-
#if defined( TARGET_SIMD_X86 )
867-
setSIMDExtension( nullptr ); // ensure SIMD-detection is finished
888+
#if defined( TARGET_SIMD_ARM )
889+
setSIMDExtension( nullptr ); // Ensure SIMD-detection is finished
890+
cssCap << getCompileInfoString() << "[SIMD=" << read_arm_extension_name() << "]";
891+
#elif defined( TARGET_SIMD_X86 )
892+
setSIMDExtension( nullptr ); // Ensure SIMD-detection is finished
868893
cssCap << getCompileInfoString() << "[SIMD=" << read_x86_extension_name() <<"]";
869-
#else // !TARGET_SIMD_X86
894+
#else // !TARGET_SIMD_X86 && !TARGET_SIMD_ARM
870895
cssCap << getCompileInfoString() << "[SIMD=SCALAR]";
871-
#endif // !TARGET_SIMD_X86
872-
896+
#endif
873897

874898
std::string cInfoStr;
875899
cInfoStr = "VVenC, the Fraunhofer H.266/VVC Encoder, version " VVENC_VERSION;

0 commit comments

Comments
 (0)