Skip to content

Commit 510dd14

Browse files
S390x simd implementation (#25757)
### Description This change adds SIMD-optimized implementation of functions for s390x. This implementation is based on similar functions for ppc64le. #### Build System Integration (onnxruntime_mlas.cmake): * Adds a new S390X flag to the CMake build system to detect the target architecture. * Includes new source files specific to s390x (SgemmKernel.cpp, DgemmKernel.cpp, Quantize.cpp, qgemm_kernel_zvector.cpp, etc.). * Sets the necessary compiler flags (-mvx, -mzvector, -march=z15) to enable z/Vector extensions. #### Platform Abstraction (mlasi.h, platform.cpp): * Defines MLAS_TARGET_S390X and MLAS_ZVECTOR_INTRINSICS for conditional compilation. * Integrates the new s390x kernels into the MLAS_PLATFORM dispatch table. * platform.cpp now checks for z/Vector support at runtime using getauxval(AT_HWCAP) and HWCAP_S390_VXE, allowing it to fall back to scalar implementations if the hardware support is not present. #### New Kernel Implementations: * qgemm_kernel_zvector.cpp: Implements quantized integer matrix multiplication. This is the core of the performance improvement for quantized models. * SgemmKernelZVECTOR.cpp / DgemmKernelZVECTOR.h: Implements single and double-precision floating-point GEMM. * QuantizeZVECTOR.cpp / Quantize.cpp: Implements quantization and requantization kernels. * FgemmKernelZVECTOR.h: A generic header providing templates and macros for both single and double-precision GEMM, similar to the ppc64le implementation. ### Motivation and Context This change improves performance of onnxruntime on s390x.
1 parent e0569fd commit 510dd14

28 files changed

+4397
-35
lines changed

cmake/external/eigen.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ set(EIGEN_BUILD_LAPACK OFF CACHE BOOL "" FORCE)
44
set(EIGEN_BUILD_PKGCONFIG OFF CACHE BOOL "" FORCE)
55
set(EIGEN_BUILD_CMAKE_PACKAGE ON CACHE BOOL "" FORCE)
66

7+
set(PATCH_EIGEN_S390X ${PROJECT_SOURCE_DIR}/patches/eigen/s390x-build.patch)
8+
79
onnxruntime_fetchcontent_declare(
810
Eigen3
911
URL ${DEP_URL_eigen}
1012
URL_HASH SHA1=${DEP_SHA1_eigen}
13+
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PATCH_EIGEN_S390X}
1114
EXCLUDE_FROM_ALL
1215
)
1316
onnxruntime_fetchcontent_makeavailable(Eigen3)

cmake/onnxruntime_mlas.cmake

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ else()
384384
set(X86_64 TRUE)
385385
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^loongarch64.*")
386386
set(LOONGARCH64 TRUE)
387+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x$")
388+
set(S390X TRUE)
387389
endif()
388390
endif()
389391

@@ -792,6 +794,24 @@ endif()
792794
set(MLAS_SOURCE_IS_NOT_SET 0)
793795
endif()
794796
endif()
797+
if(S390X AND MLAS_SOURCE_IS_NOT_SET)
798+
set(mlas_platform_srcs
799+
${MLAS_SRC_DIR}/s390x/SgemmKernel.cpp
800+
${MLAS_SRC_DIR}/s390x/SgemmKernelZVECTOR.cpp
801+
${MLAS_SRC_DIR}/dgemm.cpp
802+
${MLAS_SRC_DIR}/s390x/DgemmKernel.cpp
803+
${MLAS_SRC_DIR}/s390x/Quantize.cpp
804+
${MLAS_SRC_DIR}/s390x/QuantizeZVECTOR.cpp
805+
${MLAS_SRC_DIR}/s390x/qgemm_kernel_zvector.cpp
806+
)
807+
set_source_files_properties(${MLAS_SRC_DIR}/s390x/SgemmKernel.cpp PROPERTIES COMPILE_FLAGS "-DSINGLE")
808+
set_source_files_properties(${MLAS_SRC_DIR}/s390x/SgemmKernelZVECTOR.cpp PROPERTIES COMPILE_FLAGS "-DSINGLE")
809+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mvx -mzvector -march=z15")
810+
811+
if(NOT ONNXRUNTIME_MLAS_MULTI_ARCH)
812+
set(MLAS_SOURCE_IS_NOT_SET 0)
813+
endif()
814+
endif()
795815
if(NOT ONNXRUNTIME_MLAS_MULTI_ARCH AND MLAS_SOURCE_IS_NOT_SET)
796816
file(GLOB_RECURSE mlas_platform_srcs
797817
"${MLAS_SRC_DIR}/scalar/*.cpp")

0 commit comments

Comments
 (0)