Skip to content

Commit 1377426

Browse files
authored
Allow MSVC to build KleidiAI in Windows on Arm environments. (#26995)
### Description Remove the limitations on using onnxruntime_USE_KLEIDIAI in a Windows on Arm environment. ### Motivation and Context Historically the KleidiAI build had difficulties with using Microsoft compiler for Arm environments (MSVC). As a result a hard exclusion of onnxruntime_USE_KLEIDIAI and MSVC was added and subsequently consolidated into cmake/CMakeLists.txt by [this](2e8a45a) commit. The problems in KleidiAI were resolved in their v1.14.0 release. v1.15.0 was introduced via [this](8fe4804) commit. This PR removes the limitation, allowing MSVC to be used to compile with onnxruntime_USE_KLEIDIAI enabled in a Winodws on Arm environment. In addition there were legacy restrictions in CMakeLists.txt relating to DOTPROD and I8MM CPU features. This is already handled in the KleidiAI build. ### Verification Following the Windows build instructions [here](https://onnxruntime.ai/docs/build/inferencing.html#windows) KleidiAI and its associated logic in MLAS will be built when ARM64 is detected. **Note**: As is made clear in these build instructions MSVC must include support for ARM64. Both Python and Cmake must be native ARM64. Signed-off-by: Colm Donelan <colm.donelan@arm.com>
1 parent 4b50b2f commit 1377426

File tree

6 files changed

+13
-32
lines changed

6 files changed

+13
-32
lines changed

cmake/CMakeLists.txt

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -539,25 +539,6 @@ if(onnxruntime_USE_KLEIDIAI)
539539
set(${is_supported_var} FALSE PARENT_SCOPE)
540540
return()
541541
endif()
542-
543-
# check for compiler support
544-
if(MSVC)
545-
# TODO detect on MSVC
546-
else()
547-
check_cxx_compiler_flag(-march=armv8.2-a+dotprod HAS_ARM64_DOTPROD)
548-
check_cxx_compiler_flag(-march=armv8.2-a+i8mm HAS_ARM64_I8MM)
549-
if(NOT HAS_ARM64_DOTPROD)
550-
message(WARNING "The compiler doesn't support dotprod instructions.")
551-
endif()
552-
if(NOT HAS_ARM64_I8MM)
553-
message(WARNING "The compiler doesn't support i8mm instructions.")
554-
endif()
555-
if(NOT HAS_ARM64_DOTPROD OR NOT HAS_ARM64_I8MM)
556-
set(${is_supported_var} FALSE PARENT_SCOPE)
557-
return()
558-
endif()
559-
endif()
560-
561542
set(${is_supported_var} TRUE PARENT_SCOPE)
562543
endfunction()
563544

onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ class DynamicQuantizeMatMul final : public MatMulIntegerToFloatBase {
163163

164164
Status Compute(OpKernelContext* context) const override;
165165

166-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
166+
#if defined(USE_KLEIDIAI)
167167
Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
168168
/*out*/ bool& is_packed,
169169
/*out*/ PrePackedWeights* prepacked_weights) override {
@@ -307,7 +307,7 @@ class DynamicQuantizeMatMul final : public MatMulIntegerToFloatBase {
307307
private:
308308
// Indicates when MlasDynamicQGemmBatch() can be used
309309
bool can_use_dynamic_quant_mlas_{false};
310-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
310+
#if defined(USE_KLEIDIAI)
311311
// Indicates that the biases are a constant input and thus already quantized / packed
312312
bool dynamic_quant_mlas_bias_data_was_packed_{false};
313313
#endif
@@ -382,7 +382,7 @@ Status DynamicQuantizeMatMul::Compute(OpKernelContext* ctx) const {
382382
}
383383
// Guard against KleidiAI functions being called in non kleidi builds
384384
// TODO: migrate to a suitable override function call for kleidi dynamic qgemm function calls
385-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
385+
#if defined(USE_KLEIDIAI)
386386
else {
387387
MatMulComputeHelper helper;
388388
ORT_RETURN_IF_ERROR(helper.Compute(ctx->Input<Tensor>(IN_A)->Shape(),

onnxruntime/core/mlas/inc/mlas.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2116,7 +2116,7 @@ MlasFlashAttention(
21162116
MLAS_THREADPOOL* ThreadPool
21172117
);
21182118

2119-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
2119+
#if defined(USE_KLEIDIAI)
21202120
/**
21212121
* @brief Function to override the packing mechanism decision if kleidi ai is included
21222122
* @param enable enable kleidiai packing (allow or disallow depending on true/false)

onnxruntime/core/mlas/lib/platform.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Module Name:
1919
#ifdef MLAS_USE_SVE
2020
#include "sve/mlasi_sve.h"
2121
#endif
22-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
22+
#if defined(USE_KLEIDIAI)
2323
#include "kleidiai/mlasi_kleidiai.h"
2424
#endif
2525

@@ -603,7 +603,7 @@ Return Value:
603603
this->ConvSymS8S8Dispatch = &MlasConvSymS8DispatchDot;
604604
}
605605

606-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
606+
#if defined(USE_KLEIDIAI)
607607
if(MLAS_CPUIDINFO::GetCPUIDInfo().HasArm_SME()){
608608
this->MlasGemmBatchOverride = ArmKleidiAI::MlasGemmBatch;
609609
this->MlasGemmPackBSizeOverride = ArmKleidiAI::MlasGemmPackBSize;

onnxruntime/core/mlas/lib/qgemm.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Module Name:
1919
#include "qgemm.h"
2020

2121
// TODO: When overrides are implemented, remove this
22-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
22+
#if defined(USE_KLEIDIAI)
2323
#include "kleidiai/mlasi_kleidiai.h"
2424
#endif
2525

@@ -205,7 +205,7 @@ bool
205205
MLASCALL
206206
MlasIsDynamicQGemmAvailable()
207207
{
208-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
208+
#if defined(USE_KLEIDIAI)
209209
return ArmKleidiAI::UseSME2;
210210
#else
211211
return false;
@@ -222,7 +222,7 @@ MlasDynamicQGemmBatch (
222222
) {
223223
assert(MlasIsDynamicQGemmAvailable());
224224

225-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
225+
#if defined(USE_KLEIDIAI)
226226
//No fallback
227227
ArmKleidiAI::MlasDynamicQGemmBatch(Shape, DataParams, BatchN, ThreadPool);
228228
#endif
@@ -346,7 +346,7 @@ MlasDynamicQgemmPackBSize(
346346
assert(MlasIsDynamicQGemmAvailable());
347347

348348
size_t bytes = 0;
349-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
349+
#if defined(USE_KLEIDIAI)
350350
//No fallback available
351351
//TODO: Insert Override
352352
bytes = ArmKleidiAI::MlasDynamicQgemmPackBSize(N, K);
@@ -440,7 +440,7 @@ MlasDynamicQgemmPackB(
440440
{
441441
assert(MlasIsDynamicQGemmAvailable());
442442

443-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
443+
#if defined(USE_KLEIDIAI)
444444
//No fallback
445445
ArmKleidiAI::MlasDynamicQgemmPackB(N, K, B, Scales, Bias, PackedB);
446446
#endif

onnxruntime/core/mlas/lib/sgemm.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,7 @@ Return Value:
16701670
// Compute the number of bytes required to hold the packed buffer.
16711671
//
16721672
// KleidiAI or other override
1673-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
1673+
#if defined(USE_KLEIDIAI)
16741674
if (GetMlasPlatform().MlasGemmPackBSizeOverride != nullptr &&
16751675
// TODO: Remove once KAI supports transposing for A
16761676
TransA != CBLAS_TRANSPOSE::CblasTrans) {
@@ -1737,7 +1737,7 @@ Return Value:
17371737
17381738
--*/
17391739
{
1740-
#if defined(USE_KLEIDIAI) && !defined(_MSC_VER)
1740+
#if defined(USE_KLEIDIAI)
17411741
if (GetMlasPlatform().MlasGemmPackBOverride != nullptr &&
17421742
// TODO: Remove once KAI supports transposing for A
17431743
TransA != CBLAS_TRANSPOSE::CblasTrans &&

0 commit comments

Comments
 (0)