Skip to content

Commit 871ce93

Browse files
committed
MLAS QMX Integration
* Added Cmake build option onnxruntime_USE_QMX_KLEIDIAI_COEXIST * Addressed Copilot code review comments
1 parent f5846fe commit 871ce93

File tree

9 files changed

+24
-28
lines changed

9 files changed

+24
-28
lines changed

cmake/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ option(onnxruntime_USE_SVE "Build with SVE support in MLAS" OFF)
9191
option(onnxruntime_USE_ARM_NEON_NCHWC "Build with ARM Neon NCHWc kernels in MLAS" OFF)
9292

9393
option(onnxruntime_USE_KLEIDIAI "Build with KleidiAI integration in MLAS" OFF)
94+
option(onnxruntime_USE_QMX_KLEIDIAI_COEXIST "Build with QMX and Arm KLEIDIAI libraries" OFF)
9495
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
9596
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
9697
option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)

cmake/external/onnxruntime_external_deps.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,7 @@ if(onnxruntime_USE_KLEIDIAI)
846846
onnxruntime_fetchcontent_declare(kleidiai URL ${DEP_URL_kleidiai} URL_HASH SHA1=${DEP_SHA1_kleidiai} EXCLUDE_FROM_ALL)
847847
onnxruntime_fetchcontent_makeavailable(kleidiai)
848848
# Fetch Qualcomm's kleidiai library
849-
if(ENABLE_KLEIDIAI_QMX_COEXIST)
849+
if(onnxruntime_USE_QMX_KLEIDIAI_COEXIST)
850850
onnxruntime_fetchcontent_declare(kleidiai-qmx URL ${DEP_URL_kleidiai-qmx} URL_HASH SHA1=${DEP_SHA1_kleidiai-qmx}
851851
EXCLUDE_FROM_ALL)
852852
onnxruntime_fetchcontent_makeavailable(kleidiai-qmx)

cmake/onnxruntime_mlas.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ function(setup_kleidiai)
284284
)
285285
target_link_libraries(onnxruntime_mlas PRIVATE kleidiai)
286286
list(APPEND onnxruntime_EXTERNAL_LIBRARIES kleidiai)
287-
if(ENABLE_KLEIDIAI_QMX_COEXIST)
287+
if(onnxruntime_USE_QMX_KLEIDIAI_COEXIST)
288288
target_link_libraries(onnxruntime_mlas PRIVATE kleidiai-qmx)
289289
target_compile_definitions(onnxruntime_mlas PRIVATE ENABLE_QMX_KERNELS=1)
290290
list(APPEND onnxruntime_EXTERNAL_LIBRARIES kleidiai-qmx)
@@ -308,7 +308,7 @@ function(setup_kleidiai)
308308
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
309309
endif()
310310

311-
if(ENABLE_KLEIDIAI_QMX_COEXIST)
311+
if(onnxruntime_USE_QMX_KLEIDIAI_COEXIST)
312312
install(TARGETS kleidiai-qmx EXPORT ${PROJECT_NAME}Targets
313313
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
314314
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}

onnxruntime/core/mlas/lib/kai_ukernel_interface.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.h"
2020
#include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.h"
2121
#include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme_mopa.h"
22-
#if(ENABLE_QMX_KERNELS)
22+
#if defined(ENABLE_QMX_KERNELS)
2323
#include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_qmx_mopa.h"
2424
#endif // ENABLE_QMX_KERNELS
2525

@@ -125,7 +125,7 @@ const kai_matmul_clamp_f32_f32p_f32p_ukernel sgemm_gemm_sme2 =
125125
kai_get_dst_size_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa,
126126
kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa};
127127

128-
#if(ENABLE_QMX_KERNELS)
128+
#if defined(ENABLE_QMX_KERNELS)
129129
const kai_matmul_clamp_f32_f32p_f32p_ukernel sgemm_gemm_qmx =
130130
{kai_get_m_step_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_qmx_mopa,
131131
kai_get_n_step_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_qmx_mopa,
@@ -160,18 +160,17 @@ const kai_matmul_clamp_f32_f32p_f32p_ukernel& GetKleidiAISGemmUKernel() {
160160
if (MLAS_CPUIDINFO::GetCPUIDInfo().HasArm_SME2()) {
161161
return sgemm_gemm_sme2;
162162
} else {
163-
#if(ENABLE_QMX_KERNELS)
164-
if (ArmKleidiAI::vendor_name.compare("Qualcomm") == 0)
165-
{
166-
KLEIDIAI_KERNEL_LOG("SGEMM: Using QMX Kernel");
167-
return sgemm_gemm_qmx;
168-
169-
} else {
170-
return sgemm_gemm_sme;
171-
}
172-
#else
163+
#if defined(ENABLE_QMX_KERNELS)
164+
if (ArmKleidiAI::vendor_name.compare("Qualcomm") == 0)
165+
{
166+
KLEIDIAI_KERNEL_LOG("SGEMM: Using QMX Kernel");
167+
return sgemm_gemm_qmx;
168+
} else {
169+
return sgemm_gemm_sme;
170+
}
171+
#else
173172
return sgemm_gemm_sme;
174-
#endif // ENABLE_QMX_KERNELS
173+
#endif // ENABLE_QMX_KERNELS
175174
}
176175
}
177176

onnxruntime/core/mlas/lib/kleidiai/convolve_kleidiai.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "kai/ukernels/matmul/imatmul_clamp_f32_f32p_f32p/kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_sme2_mopa.h"
1717
#include "kai/ukernels/matmul/pack/kai_lhs_imatmul_pack_x32p2vlx1_x32p_sme.h"
1818
#include "kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x32p2vlx1b_x32_x32_sme.h"
19-
#if(ENABLE_QMX_KERNELS)
19+
#if defined(ENABLE_QMX_KERNELS)
2020
#include "kai/ukernels/matmul/imatmul_clamp_f32_f32p_f32p/kai_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_qmx_mopa.h"
2121
#endif // ENABLE_QMX_KERNELS
2222

@@ -600,7 +600,7 @@ static void ConvolveSme(const size_t co, //channels out
600600
-std::numeric_limits<float>::max(), std::numeric_limits<float>::max()
601601
);
602602
} else {
603-
#if(ENABLE_QMX_KERNELS)
603+
#if defined(ENABLE_QMX_KERNELS)
604604
if (ArmKleidiAI::vendor_name.compare("Qualcomm") == 0)
605605
{
606606
KLEIDIAI_KERNEL_LOG("kai_run_imatmul_clamp_f32_f32p2vlx1_f32p2vlx1b_2vlx2vl_qmx_mopa" << " M=" << TileSizeM << " N=" << TileSizeN << " k_chunk_count=" << (d_kh * d_kw) << " k_chunk_length=" << ci);

onnxruntime/core/mlas/lib/kleidiai/qgemm_kleidiai.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.h"
1414
#include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme_mopa.h"
1515
#include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.h"
16-
#if(ENABLE_QMX_KERNELS)
16+
#if defined(ENABLE_QMX_KERNELS)
1717
#include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_qmx_mopa.h"
1818
#endif // ENABLE_QMX_KERNELS
1919

@@ -125,7 +125,7 @@ ArmKleidiAI::MlasDynamicQGemmBatch(
125125
}
126126
else {
127127

128-
#if(ENABLE_QMX_KERNELS)
128+
#if defined(ENABLE_QMX_KERNELS)
129129
if (ArmKleidiAI::vendor_name.compare("Qualcomm") == 0)
130130
{
131131
KLEIDIAI_KERNEL_LOG("kai_run_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_qmx_mopa");

onnxruntime/core/mlas/lib/kleidiai/sgemm_kleidiai.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "mlasi_kleidiai.h"
1717
#include "kai_ukernel_interface.h"
1818

19-
#if(ENABLE_QMX_KERNELS)
19+
#if defined(ENABLE_QMX_KERNELS)
2020
#include "kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_qmx_mopa.h"
2121
#endif // ENABLE_QMX_KERNELS
2222

onnxruntime/core/mlas/lib/qgemm.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,13 +206,7 @@ MLASCALL
206206
MlasIsDynamicQGemmAvailable()
207207
{
208208
#if defined(USE_KLEIDIAI)
209-
if(ArmKleidiAI::UseSME2 || ArmKleidiAI::UseSME) {
210-
return true;
211-
}
212-
else {
213-
return false;
214-
}
215-
209+
return (ArmKleidiAI::UseSME2 || ArmKleidiAI::UseSME);
216210
#else
217211
return false;
218212
#endif

tools/ci_build/build.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,8 @@ def generate_build_tree(
897897
if not args.no_kleidiai:
898898
cmake_args += ["-Donnxruntime_USE_KLEIDIAI=ON"]
899899
if args.use_qmx:
900+
cmake_args += ["-Donnxruntime_USE_QMX_KLEIDIAI_COEXIST=ON"]
901+
# QMX KleidiAI build option
900902
cmake_args += ["-DENABLE_KLEIDIAI_QMX_COEXIST=ON"]
901903

902904
if args.enable_arm_neon_nchwc:

0 commit comments

Comments
 (0)