@@ -45,6 +45,8 @@ onnxruntime_add_static_library(onnxruntime_mlas
4545 ${MLAS_SRC_DIR} /qdwconv_kernelsize.cpp
4646 ${MLAS_SRC_DIR} /qnbitgemm.h
4747 ${MLAS_SRC_DIR} /qnbitgemm.cpp
48+ ${MLAS_SRC_DIR} /qlutgemm.h
49+ ${MLAS_SRC_DIR} /qlutgemm.cpp
4850 ${MLAS_SRC_DIR} /sqnbitgemm_q8_block.h
4951 ${MLAS_SRC_DIR} /flashattn.cpp
5052 ${MLAS_SRC_DIR} /cast.cpp
@@ -113,6 +115,7 @@ function(setup_mlas_source_for_windows)
113115 ${MLAS_SRC_DIR} /eltwise_kernel_neon.cpp
114116 ${MLAS_SRC_DIR} /eltwise_kernel_neon_fp16.cpp
115117 ${MLAS_SRC_DIR} /sqnbitgemm_kernel_neon_int8_i8mm.cpp
118+ ${MLAS_SRC_DIR} /sconv_nchw_kernel_neon.cpp
116119 )
117120
118121 set (mlas_platform_preprocess_srcs
@@ -209,6 +212,8 @@ function(setup_mlas_source_for_windows)
209212 ${MLAS_SRC_DIR} /qgemm_kernel_sse.cpp
210213 ${MLAS_SRC_DIR} /qgemm_kernel_sse41.cpp
211214 ${MLAS_SRC_DIR} /intrinsics/avx512/quantize_avx512f.cpp
215+ ${MLAS_SRC_DIR} /sqnbitgemm_lut_kernel_avx2.h
216+ ${MLAS_SRC_DIR} /sqnbitgemm_lut_kernel_avx2.cpp
212217 ${MLAS_SRC_DIR} /sqnbitgemm_kernel_avx2.cpp
213218 ${MLAS_SRC_DIR} /sqnbitgemm_kernel_avx512.cpp
214219 ${MLAS_SRC_DIR} /sqnbitgemm_kernel_avx512vnni.cpp
@@ -284,6 +289,11 @@ function(setup_kleidiai)
284289 )
285290 target_link_libraries (onnxruntime_mlas PRIVATE kleidiai)
286291 list (APPEND onnxruntime_EXTERNAL_LIBRARIES kleidiai)
292+ if (onnxruntime_USE_QMX_KLEIDIAI_COEXIST)
293+ target_link_libraries (onnxruntime_mlas PRIVATE kleidiai-qmx)
294+ target_compile_definitions (onnxruntime_mlas PRIVATE ENABLE_QMX_KERNELS=1)
295+ list (APPEND onnxruntime_EXTERNAL_LIBRARIES kleidiai-qmx)
296+ endif ()
287297 set (onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES} PARENT_SCOPE)
288298
289299 # If KLEIDIAI_DEBUG is enabled that implies both DEBUG and KERNEL messages.
@@ -302,13 +312,21 @@ function(setup_kleidiai)
302312 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
303313 FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR} )
304314 endif ()
315+
316+ if (onnxruntime_USE_QMX_KLEIDIAI_COEXIST)
317+ install (TARGETS kleidiai-qmx EXPORT ${PROJECT_NAME} Targets
318+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
319+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
320+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
321+ FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR} )
322+ endif ()
305323endfunction ()
306324
307325function (setup_arm_neon_nchwc)
308326 target_sources (onnxruntime_mlas PRIVATE
309- ${MLAS_SRC_DIR} /sconv .h
310- ${MLAS_SRC_DIR} /sconv_kernel_neon .cpp
311- ${MLAS_SRC_DIR} /spool_kernel_neon .cpp
327+ ${MLAS_SRC_DIR} /sconv_nchwc_kernel_neon .h
328+ ${MLAS_SRC_DIR} /sconv_nchwc_kernel_neon .cpp
329+ ${MLAS_SRC_DIR} /spool_nchwc_kernel_neon .cpp
312330 )
313331 list (APPEND mlas_private_compile_definitions MLAS_USE_ARM_NEON_NCHWC)
314332 set (mlas_private_compile_definitions ${mlas_private_compile_definitions} PARENT_SCOPE)
@@ -460,6 +478,7 @@ else()
460478 ${MLAS_SRC_DIR} /eltwise_kernel_neon.h
461479 ${MLAS_SRC_DIR} /eltwise_kernel_neon.cpp
462480 ${MLAS_SRC_DIR} /sqnbitgemm_kernel_neon_int8_i8mm.cpp
481+ ${MLAS_SRC_DIR} /sconv_nchw_kernel_neon.cpp
463482 )
464483
465484 # Conditionally add the SVE implementation if compiler supports it
@@ -496,6 +515,7 @@ else()
496515 ${MLAS_SRC_DIR} /qgemm_kernel_smmla.cpp
497516 ${MLAS_SRC_DIR} /qgemm_kernel_ummla.cpp
498517 ${MLAS_SRC_DIR} /sbgemm_kernel_neon.cpp
518+ ${MLAS_SRC_DIR} /sbconv_kernel_neon.cpp
499519 ${MLAS_SRC_DIR} /cast_kernel_neon.cpp
500520 ${MLAS_SRC_DIR} /hqnbitgemm_kernel_neon_fp16.cpp
501521 ${MLAS_SRC_DIR} /rotary_embedding_kernel_neon_fp16.cpp
@@ -511,6 +531,7 @@ else()
511531 set_source_files_properties (${MLAS_SRC_DIR} /dwconv.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 " )
512532 set_source_files_properties (${MLAS_SRC_DIR} /pooling_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 " )
513533 set_source_files_properties (${MLAS_SRC_DIR} /sbgemm_kernel_neon.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+bf16 " )
534+ set_source_files_properties (${MLAS_SRC_DIR} /sbconv_kernel_neon.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+bf16 " )
514535 set_source_files_properties (${MLAS_SRC_DIR} /cast_kernel_neon.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 " )
515536 set_source_files_properties (${MLAS_SRC_DIR} /hqnbitgemm_kernel_neon_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 " )
516537 set_source_files_properties (${MLAS_SRC_DIR} /rotary_embedding_kernel_neon_fp16.cpp PROPERTIES COMPILE_FLAGS " -march=armv8.2-a+fp16 " )
@@ -693,6 +714,8 @@ else()
693714 ${MLAS_SRC_DIR} /intrinsics/avx2/qdwconv_avx2.cpp
694715 ${MLAS_SRC_DIR} /intrinsics/avx2/saturation_check_avx2.cpp
695716 ${MLAS_SRC_DIR} /sqnbitgemm_kernel_avx2.cpp
717+ ${MLAS_SRC_DIR} /sqnbitgemm_lut_kernel_avx2.h
718+ ${MLAS_SRC_DIR} /sqnbitgemm_lut_kernel_avx2.cpp
696719 ${MLAS_SRC_DIR} /rotary_embedding_kernel_avx2.h
697720 ${MLAS_SRC_DIR} /rotary_embedding_kernel_avx2.cpp
698721 ${MLAS_SRC_DIR} /rotary_embedding_kernel_avx2.cpp
0 commit comments