@@ -28,7 +28,10 @@ if (is_clang) {
2828# can be read with GN too.
2929import (" gen/scalar_microkernels.bzl" )
3030
31+ import (" gen/aarch32_microkernels.bzl" )
3132import (" gen/aarch64_microkernels.bzl" )
33+ import (" gen/armsimd32_microkernels.bzl" )
34+ import (" gen/fp16arith_microkernels.bzl" )
3235import (" gen/neon_aarch64_microkernels.bzl" )
3336import (" gen/neon_microkernels.bzl" )
3437import (" gen/neonbf16_microkernels.bzl" )
@@ -193,6 +196,14 @@ config("xnnpack_private_config") {
193196 } else {
194197 defines += [ " XNN_ENABLE_ARM_SME2=0" ]
195198 }
199+ } else if (current_cpu == " arm" ) {
200+ defines += [
201+ " XNN_ENABLE_ARM_BF16=1" ,
202+ " XNN_ENABLE_ARM_DOTPROD=1" ,
203+ " XNN_ENABLE_ARM_FP16_SCALAR=1" ,
204+ " XNN_ENABLE_ARM_FP16_VECTOR=1" ,
205+ " XNN_ENABLE_ASSEMBLY=1" ,
206+ ]
196207 } else if (current_cpu == " x64" || current_cpu == " x86" ) {
197208 # x64 support baseline, which is SSSE3 and below to match Chromium.
198209 defines += [
@@ -572,6 +583,223 @@ if (current_cpu == "arm64") {
572583 }
573584}
574585
586+ if (current_cpu == " arm" ) {
587+ config (" armsimd32_config" ) {
588+ cflags = [
589+ " -marm" ,
590+ " -mfpu=vfp" ,
591+ " -munaligned-access" ,
592+ ]
593+ }
594+
595+ config (" aarch32_asm_config" ) {
596+ asmflags = [
597+ " -marm" ,
598+ " -march=armv8.2-a+dotprod+fp16" ,
599+ " -mfpu=neon-fp-armv8" ,
600+ ]
601+ }
602+
603+ config (" neon_config" ) {
604+ cflags = [
605+ " -marm" ,
606+ " -march=armv7-a" ,
607+ " -mfpu=neon" ,
608+ ]
609+ }
610+
611+ config (" neonfp16_config" ) {
612+ cflags = [
613+ " -marm" ,
614+ " -march=armv7-a" ,
615+ " -mfpu=neon-fp16" ,
616+ ]
617+ }
618+
619+ config (" neonfma_config" ) {
620+ cflags = [
621+ " -marm" ,
622+ " -march=armv7-a" ,
623+ " -mfpu=neon-vfpv4" ,
624+ ]
625+ }
626+
627+ config (" neonv8_config" ) {
628+ cflags = [
629+ " -marm" ,
630+ " -march=armv8-a" ,
631+ " -mfpu=neon-fp-armv8" ,
632+ ]
633+ }
634+
635+ config (" fp16arith_config" ) {
636+ cflags = [
637+ " -marm" ,
638+ " -march=armv8.2-a+fp16" ,
639+ " -mfpu=neon-fp-armv8" ,
640+ " -Xclang" ,
641+ " -target-feature" ,
642+ " -Xclang" ,
643+ " +fullfp16" ,
644+ ]
645+ }
646+
647+ config (" neonfp16arith_config" ) {
648+ cflags = [
649+ " -marm" ,
650+ " -march=armv8.2-a+fp16" ,
651+ " -mfpu=neon-fp-armv8" ,
652+ " -Xclang" ,
653+ " -target-feature" ,
654+ " -Xclang" ,
655+ " +fullfp16" ,
656+ ]
657+ }
658+
659+ config (" neon_dotprod_config" ) {
660+ cflags = [
661+ " -marm" ,
662+ " -march=armv8.2-a+dotprod" ,
663+ " -mfpu=neon-fp-armv8" ,
664+ " -Xclang" ,
665+ " -target-feature" ,
666+ " -Xclang" ,
667+ " +dotprod" ,
668+ ]
669+ }
670+
671+ config (" neon_dotprodfp16arith_config" ) {
672+ cflags = [
673+ " -marm" ,
674+ " -march=armv8.2-a+dotprod+fp16" ,
675+ " -mfpu=neon-fp-armv8" ,
676+ " -Xclang" ,
677+ " -target-feature" ,
678+ " -Xclang" ,
679+ " +dotprod" ,
680+ " -Xclang" ,
681+ " -target-feature" ,
682+ " -Xclang" ,
683+ " +fullfp16" ,
684+ ]
685+ }
686+
687+ config (" neon_bf16_config" ) {
688+ cflags = [
689+ " -marm" ,
690+ " -march=armv8.2-a+bf16" ,
691+ " -mfpu=neon-fp-armv8" ,
692+ " -Xclang" ,
693+ " -target-feature" ,
694+ " -Xclang" ,
695+ " +dotprod" ,
696+ " -Xclang" ,
697+ " -target-feature" ,
698+ " -Xclang" ,
699+ " +fullfp16" ,
700+ ]
701+ }
702+
703+ xnnpack_source_set (" armsimd32_microkernels" ) {
704+ deps = [
705+ " :microkernel_defs" ,
706+ " :microkernel_headers" ,
707+ ]
708+ sources = ALL_ARMSIMD32_MICROKERNEL_SRCS
709+ configs = [ " :armsimd32_config" ]
710+ }
711+
712+ xnnpack_source_set (" aarch32_asm_microkernels" ) {
713+ deps = [
714+ " :microkernel_defs" ,
715+ " :microkernel_headers" ,
716+ ]
717+ sources = AARCH32_ASM_MICROKERNEL_SRCS
718+ configs = [ " :aarch32_asm_config" ]
719+ }
720+
721+ xnnpack_source_set (" neon_microkernels" ) {
722+ deps = [
723+ " :microkernel_defs" ,
724+ " :microkernel_headers" ,
725+ ]
726+ sources = ALL_NEON_MICROKERNEL_SRCS
727+ configs = [ " :neon_config" ]
728+ }
729+
730+ xnnpack_source_set (" neonfp16_microkernels" ) {
731+ deps = [
732+ " :microkernel_defs" ,
733+ " :microkernel_headers" ,
734+ ]
735+ sources = ALL_NEONFP16_MICROKERNEL_SRCS
736+ configs = [ " :neonfp16_config" ]
737+ }
738+
739+ xnnpack_source_set (" neonfma_microkernels" ) {
740+ deps = [
741+ " :microkernel_defs" ,
742+ " :microkernel_headers" ,
743+ ]
744+ sources = ALL_NEONFMA_MICROKERNEL_SRCS
745+ configs = [ " :neonfma_config" ]
746+ }
747+
748+ xnnpack_source_set (" neonv8_microkernels" ) {
749+ deps = [
750+ " :microkernel_defs" ,
751+ " :microkernel_headers" ,
752+ ]
753+ sources = ALL_NEONV8_MICROKERNEL_SRCS
754+ configs = [ " :neonv8_config" ]
755+ }
756+
757+ xnnpack_source_set (" fp16arith_microkernels" ) {
758+ deps = [
759+ " :microkernel_defs" ,
760+ " :microkernel_headers" ,
761+ ]
762+ sources = ALL_FP16ARITH_MICROKERNEL_SRCS
763+ configs = [ " :fp16arith_config" ]
764+ }
765+
766+ xnnpack_source_set (" neonfp16arith_microkernels" ) {
767+ deps = [
768+ " :microkernel_defs" ,
769+ " :microkernel_headers" ,
770+ ]
771+ sources = ALL_NEONFP16ARITH_MICROKERNEL_SRCS
772+ configs = [ " :neonfp16arith_config" ]
773+ }
774+
775+ xnnpack_source_set (" neon_dotprod_microkernels" ) {
776+ deps = [
777+ " :microkernel_defs" ,
778+ " :microkernel_headers" ,
779+ ]
780+ sources = ALL_NEONDOT_MICROKERNEL_SRCS
781+ configs = [ " :neon_dotprod_config" ]
782+ }
783+
784+ xnnpack_source_set (" neon_dotprodfp16arith_microkernels" ) {
785+ deps = [
786+ " :microkernel_defs" ,
787+ " :microkernel_headers" ,
788+ ]
789+ sources = ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS
790+ configs = [ " :neon_dotprodfp16arith_config" ]
791+ }
792+
793+ xnnpack_source_set (" neon_bf16_microkernels" ) {
794+ deps = [
795+ " :microkernel_defs" ,
796+ " :microkernel_headers" ,
797+ ]
798+ sources = ALL_NEONBF16_MICROKERNEL_SRCS
799+ configs = [ " :neon_bf16_config" ]
800+ }
801+ }
802+
575803if (current_cpu == " x64" || current_cpu == " x86" ) {
576804 xnnpack_source_set (" avxvnni_microkernels" ) {
577805 deps = [
@@ -771,6 +999,20 @@ xnnpack_source_set("xnnpack") {
771999 if (xnnpack_enable_arm_sme2 ) {
7721000 deps += [ " :sme2_microkernels" ]
7731001 }
1002+ } else if (current_cpu == " arm" ) {
1003+ deps += [
1004+ " :aarch32_asm_microkernels" ,
1005+ " :armsimd32_microkernels" ,
1006+ " :fp16arith_microkernels" ,
1007+ " :neon_bf16_microkernels" ,
1008+ " :neon_dotprod_microkernels" ,
1009+ " :neon_dotprodfp16arith_microkernels" ,
1010+ " :neon_microkernels" ,
1011+ " :neonfma_microkernels" ,
1012+ " :neonfp16_microkernels" ,
1013+ " :neonfp16arith_microkernels" ,
1014+ " :neonv8_microkernels" ,
1015+ ]
7741016 } else if (current_cpu == " x64" || current_cpu == " x86" ) {
7751017 if (xnnpack_enable_avx ) {
7761018 deps += [ " :avx_microkernels" ]
0 commit comments