Skip to content

Internal change only. #8224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 28 additions & 28 deletions build_params.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -36,115 +36,115 @@ def xnnpack_select_if(cond = None, val_true = [], val_false = []):

def xnnpack_configurable_defines():
return xnnpack_select_if(
":cpuinfo_enabled",
"//:cpuinfo_enabled",
["XNN_ENABLE_CPUINFO=1"],
["XNN_ENABLE_CPUINFO=0"],
) + xnnpack_select_if(
":memopt_enabled",
"//:memopt_enabled",
["XNN_ENABLE_MEMOPT=1"],
["XNN_ENABLE_MEMOPT=1"],
) + xnnpack_select_if(
":sparse_enabled",
"//:sparse_enabled",
["XNN_ENABLE_SPARSE=1"],
["XNN_ENABLE_SPARSE=0"],
) + xnnpack_select_if(
":assembly_enabled",
"//:assembly_enabled",
["XNN_ENABLE_ASSEMBLY=1"],
["XNN_ENABLE_ASSEMBLY=0"],
) + xnnpack_select_if(
":arm_fp16_scalar_enabled",
"//:arm_fp16_scalar_enabled",
["XNN_ENABLE_ARM_FP16_SCALAR=1"],
["XNN_ENABLE_ARM_FP16_SCALAR=0"],
) + xnnpack_select_if(
":arm_fp16_vector_enabled",
"//:arm_fp16_vector_enabled",
["XNN_ENABLE_ARM_FP16_VECTOR=1"],
["XNN_ENABLE_ARM_FP16_VECTOR=0"],
) + xnnpack_select_if(
":arm_bf16_enabled",
"//:arm_bf16_enabled",
["XNN_ENABLE_ARM_BF16=1"],
["XNN_ENABLE_ARM_BF16=0"],
) + xnnpack_select_if(
":arm_dotprod_enabled",
"//:arm_dotprod_enabled",
["XNN_ENABLE_ARM_DOTPROD=1"],
["XNN_ENABLE_ARM_DOTPROD=0"],
) + xnnpack_select_if(
":arm_i8mm_enabled",
"//:arm_i8mm_enabled",
["XNN_ENABLE_ARM_I8MM=1"],
["XNN_ENABLE_ARM_I8MM=0"],
) + xnnpack_select_if(
":riscv_fp16_vector_enabled",
"//:riscv_fp16_vector_enabled",
["XNN_ENABLE_RISCV_FP16_VECTOR=1"],
["XNN_ENABLE_RISCV_FP16_VECTOR=0"],
) + xnnpack_select_if(
":avx512amx_enabled",
"//:avx512amx_enabled",
["XNN_ENABLE_AVX512AMX=1"],
["XNN_ENABLE_AVX512AMX=0"],
) + xnnpack_select_if(
":avx512fp16_enabled",
"//:avx512fp16_enabled",
["XNN_ENABLE_AVX512FP16=1"],
["XNN_ENABLE_AVX512FP16=0"],
) + xnnpack_select_if(
":avx512bf16_enabled",
"//:avx512bf16_enabled",
["XNN_ENABLE_AVX512BF16=1"],
["XNN_ENABLE_AVX512BF16=0"],
) + xnnpack_select_if(
":avxvnni_enabled",
"//:avxvnni_enabled",
["XNN_ENABLE_AVXVNNI=1"],
["XNN_ENABLE_AVXVNNI=0"],
) + xnnpack_select_if(
":avxvnniint8_enabled",
"//:avxvnniint8_enabled",
["XNN_ENABLE_AVXVNNIINT8=1"],
["XNN_ENABLE_AVXVNNIINT8=0"],
) + xnnpack_select_if(
":avx512f_enabled",
"//:avx512f_enabled",
["XNN_ENABLE_AVX512F=1"],
["XNN_ENABLE_AVX512F=0"],
) + xnnpack_select_if(
":avx256skx_enabled",
"//:avx256skx_enabled",
["XNN_ENABLE_AVX256SKX=1"],
["XNN_ENABLE_AVX256SKX=0"],
) + xnnpack_select_if(
":avx256vnni_enabled",
"//:avx256vnni_enabled",
["XNN_ENABLE_AVX256VNNI=1"],
["XNN_ENABLE_AVX256VNNI=0"],
) + xnnpack_select_if(
":avx256vnnigfni_enabled",
"//:avx256vnnigfni_enabled",
["XNN_ENABLE_AVX256VNNIGFNI=1"],
["XNN_ENABLE_AVX256VNNIGFNI=0"],
) + xnnpack_select_if(
":avx512skx_enabled",
"//:avx512skx_enabled",
["XNN_ENABLE_AVX512SKX=1"],
["XNN_ENABLE_AVX512SKX=0"],
) + xnnpack_select_if(
":avx512vbmi_enabled",
"//:avx512vbmi_enabled",
["XNN_ENABLE_AVX512VBMI=1"],
["XNN_ENABLE_AVX512VBMI=0"],
) + xnnpack_select_if(
":avx512vnni_enabled",
"//:avx512vnni_enabled",
["XNN_ENABLE_AVX512VNNI=1"],
["XNN_ENABLE_AVX512VNNI=0"],
) + xnnpack_select_if(
":avx512vnnigfni_enabled",
"//:avx512vnnigfni_enabled",
["XNN_ENABLE_AVX512VNNIGFNI=1"],
["XNN_ENABLE_AVX512VNNIGFNI=0"],
) + xnnpack_select_if(
":hvx_enabled",
"//:hvx_enabled",
["XNN_ENABLE_HVX=1"],
["XNN_ENABLE_HVX=0"],
) + xnnpack_select_if(
":kleidiai_enabled",
"//:kleidiai_enabled",
["XNN_ENABLE_KLEIDIAI=1"],
["XNN_ENABLE_KLEIDIAI=0"],
) + xnnpack_select_if(
":arm_sme_enabled",
"//:arm_sme_enabled",
["XNN_ENABLE_ARM_SME=1"],
["XNN_ENABLE_SRM_SME=0"],
) + xnnpack_select_if(
":arm_sme2_enabled",
"//:arm_sme2_enabled",
["XNN_ENABLE_ARM_SME2=1"],
["XNN_ENABLE_ARM_SME2=0"],
) + xnnpack_select_if(
":wasm_revectorize_enabled",
"//:wasm_revectorize_enabled",
["XNN_ENABLE_WASM_REVECTORIZE=1"],
["XNN_ENABLE_WASM_REVECTORIZE=0"],
) + xnnpack_slinky_defines()
Expand Down
108 changes: 60 additions & 48 deletions src/configs/hardware-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#if XNN_ENABLE_CPUINFO
#include <cpuinfo.h>
Expand Down Expand Up @@ -68,58 +70,68 @@ static struct xnn_hardware_config hardware_config = {0};

XNN_INIT_ONCE_GUARD(hardware);

static void init_hardware_config(void) {
#if XNN_ARCH_ARM64 || XNN_ARCH_ARM
#if XNN_PLATFORM_WINDOWS
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
switch (system_info.wProcessorLevel) {
case 0x803: // Kryo 385 Silver
hardware_config.use_arm_neon_fp16_arith = true;
break;
default:
// Assume that Dot Product support implies FP16 support.
// ARM manuals don't guarantee that, but it holds in practice.
hardware_config.use_arm_neon_fp16_arith = !!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
break;
}
hardware_config.use_arm_fp16_arith = hardware_config.use_arm_neon_fp16_arith;
// TODO(b/409244409): Remove before end of 2025/Q2.
#if XNN_ARCH_ARM64
int32_t xnn_enable_arm_sme2_default = XNN_ENABLE_ARM_SME2;
#endif // XNN_ARCH_ARM64

hardware_config.use_arm_neon_bf16 = false;
hardware_config.use_arm_neon_dot = !!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
#else
hardware_config.use_arm_fp16_arith = cpuinfo_has_arm_fp16_arith();
hardware_config.use_arm_neon_fp16_arith = cpuinfo_has_arm_neon_fp16_arith();
hardware_config.use_arm_neon_bf16 = cpuinfo_has_arm_neon_bf16();
hardware_config.use_arm_neon_dot = cpuinfo_has_arm_neon_dot();
#endif
hardware_config.use_arm_vfpv3 = cpuinfo_has_arm_vfpv3();
hardware_config.use_arm_neon = cpuinfo_has_arm_neon();
hardware_config.use_arm_neon_fp16 = cpuinfo_has_arm_neon_fp16();
hardware_config.use_arm_neon_fma = cpuinfo_has_arm_neon_fma();
hardware_config.use_arm_neon_v8 = cpuinfo_has_arm_neon_v8();
#endif
static void init_hardware_config(void) {
#if XNN_ARCH_ARM64 || XNN_ARCH_ARM
#if XNN_PLATFORM_WINDOWS
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
switch (system_info.wProcessorLevel) {
case 0x803: // Kryo 385 Silver
hardware_config.use_arm_neon_fp16_arith = true;
break;
default:
// Assume that Dot Product support implies FP16 support.
// ARM manuals don't guarantee that, but it holds in practice.
hardware_config.use_arm_neon_fp16_arith =
!!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
break;
}
hardware_config.use_arm_fp16_arith = hardware_config.use_arm_neon_fp16_arith;

hardware_config.use_arm_neon_bf16 = false;
hardware_config.use_arm_neon_dot =
!!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
#else
hardware_config.use_arm_fp16_arith = cpuinfo_has_arm_fp16_arith();
hardware_config.use_arm_neon_fp16_arith = cpuinfo_has_arm_neon_fp16_arith();
hardware_config.use_arm_neon_bf16 = cpuinfo_has_arm_neon_bf16();
hardware_config.use_arm_neon_dot = cpuinfo_has_arm_neon_dot();
#endif
hardware_config.use_arm_vfpv3 = cpuinfo_has_arm_vfpv3();
hardware_config.use_arm_neon = cpuinfo_has_arm_neon();
hardware_config.use_arm_neon_fp16 = cpuinfo_has_arm_neon_fp16();
hardware_config.use_arm_neon_fma = cpuinfo_has_arm_neon_fma();
hardware_config.use_arm_neon_v8 = cpuinfo_has_arm_neon_v8();
#endif

#if XNN_ARCH_ARM
hardware_config.use_arm_v6 = cpuinfo_has_arm_v6();
hardware_config.use_arm_vfpv2 = cpuinfo_has_arm_vfpv2();
#endif
#if XNN_ARCH_ARM
hardware_config.use_arm_v6 = cpuinfo_has_arm_v6();
hardware_config.use_arm_vfpv2 = cpuinfo_has_arm_vfpv2();
#endif

#if XNN_ARCH_ARM64
hardware_config.use_arm_neon_i8mm = cpuinfo_has_arm_i8mm();
hardware_config.use_arm_sve = cpuinfo_has_arm_sve();
hardware_config.use_arm_sve2 = cpuinfo_has_arm_sve2();
hardware_config.use_arm_sme = cpuinfo_has_arm_sme();
hardware_config.use_arm_sme2 = cpuinfo_has_arm_sme2();
#endif
#if XNN_ARCH_ARM64
hardware_config.use_arm_neon_i8mm = cpuinfo_has_arm_i8mm();
hardware_config.use_arm_sve = cpuinfo_has_arm_sve();
hardware_config.use_arm_sve2 = cpuinfo_has_arm_sve2();
hardware_config.use_arm_sme = cpuinfo_has_arm_sme();
// TODO(b/409244409): Remove before end of 2025/Q2.
hardware_config.use_arm_sme2 =
xnn_enable_arm_sme2_default && cpuinfo_has_arm_sme2();
xnn_enable_arm_sme2_default = -1;
#endif

#if XNN_ARCH_X86 || XNN_ARCH_X86_64
hardware_config.use_x86_ssse3 = cpuinfo_has_x86_ssse3();
hardware_config.use_x86_sse4_1 = cpuinfo_has_x86_sse4_1();
hardware_config.use_x86_avx = cpuinfo_has_x86_avx();
hardware_config.use_x86_f16c = cpuinfo_has_x86_f16c();
hardware_config.use_x86_fma3 = cpuinfo_has_x86_fma3();
hardware_config.use_x86_avx2 = cpuinfo_has_x86_avx2();
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
hardware_config.use_x86_ssse3 = cpuinfo_has_x86_ssse3();
hardware_config.use_x86_sse4_1 = cpuinfo_has_x86_sse4_1();
hardware_config.use_x86_avx = cpuinfo_has_x86_avx();
hardware_config.use_x86_f16c = cpuinfo_has_x86_f16c();
hardware_config.use_x86_fma3 = cpuinfo_has_x86_fma3();
hardware_config.use_x86_avx2 = cpuinfo_has_x86_avx2();
#if XNN_ENABLE_AVX512F
hardware_config.use_x86_avx512f = cpuinfo_has_x86_avx512f();
#else
Expand Down
5 changes: 4 additions & 1 deletion src/xnnpack/hardware-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#pragma once
#ifndef THIRD_PARTY_XNNPACK_SRC_XNNPACK_HARDWARE_CONFIG_H_
#define THIRD_PARTY_XNNPACK_SRC_XNNPACK_HARDWARE_CONFIG_H_

#include <stdbool.h>
#include <stddef.h>
Expand Down Expand Up @@ -221,3 +222,5 @@ static inline bool xnn_is_f16_supported_natively(
#ifdef __cplusplus
} // extern "C"
#endif

#endif // THIRD_PARTY_XNNPACK_SRC_XNNPACK_HARDWARE_CONFIG_H_
Loading