@@ -28,7 +28,10 @@ if (is_clang) {
2828# can be read with GN too.
2929import (" gen/scalar_microkernels.bzl" )
3030
31+ import (" gen/aarch32_microkernels.bzl" )
3132import (" gen/aarch64_microkernels.bzl" )
33+ import (" gen/armsimd32_microkernels.bzl" )
34+ import (" gen/fp16arith_microkernels.bzl" )
3235import (" gen/neon_aarch64_microkernels.bzl" )
3336import (" gen/neon_microkernels.bzl" )
3437import (" gen/neonbf16_microkernels.bzl" )
@@ -116,7 +119,8 @@ declare_args() {
116119 xnnpack_enable_arm_dotprod = false
117120}
118121
119- assert (target_cpu == " arm64" || target_cpu == " x64" || target_cpu == " x86" ,
122+ assert (target_cpu == " arm64" || target_cpu == " x64" || target_cpu == " x86" ||
123+ target_cpu == " arm" ,
120124 " Unsupported target_cpu '${ target_cpu } '." )
121125
122126assert (! xnnpack_enable_arm_kleidiai || target_cpu == " arm64" ,
@@ -193,6 +197,14 @@ config("xnnpack_private_config") {
193197 } else {
194198 defines += [ " XNN_ENABLE_ARM_SME2=0" ]
195199 }
200+ } else if (current_cpu == " arm" ) {
201+ defines += [
202+ " XNN_ENABLE_ARM_BF16=1" ,
203+ " XNN_ENABLE_ARM_DOTPROD=1" ,
204+ " XNN_ENABLE_ARM_FP16_SCALAR=1" ,
205+ " XNN_ENABLE_ARM_FP16_VECTOR=1" ,
206+ " XNN_ENABLE_ASSEMBLY=1" ,
207+ ]
196208 } else if (current_cpu == " x64" || current_cpu == " x86" ) {
197209 # x64 support baseline, which is SSSE3 and below to match Chromium.
198210 defines += [
@@ -360,20 +372,6 @@ xnnpack_source_set("operators") {
360372 }
361373}
362374
363- xnnpack_source_set (" subgraph_matchers" ) {
364- sources = [
365- " test/subgraph/rewrites/subgraph_matcher.cc" ,
366- " test/subgraph/rewrites/subgraph_matcher.h" ,
367- ]
368-
369- public_deps = [
370- " :xnnpack_headers" ,
371- " //third_party/googletest:gtest" ,
372- ]
373-
374- testonly = true
375- }
376-
377375xnnpack_source_set (" subgraph" ) {
378376 deps = [
379377 " :microkernel_headers" ,
@@ -572,6 +570,223 @@ if (current_cpu == "arm64") {
572570 }
573571}
574572
573+ if (current_cpu == " arm" ) {
574+ config (" armsimd32_config" ) {
575+ cflags = [
576+ " -marm" ,
577+ " -mfpu=vfp" ,
578+ " -munaligned-access" ,
579+ ]
580+ }
581+
582+ config (" aarch32_asm_config" ) {
583+ asmflags = [
584+ " -marm" ,
585+ " -march=armv8.2-a+dotprod+fp16" ,
586+ " -mfpu=neon-fp-armv8" ,
587+ ]
588+ }
589+
590+ config (" neon_config" ) {
591+ cflags = [
592+ " -marm" ,
593+ " -march=armv7-a" ,
594+ " -mfpu=neon" ,
595+ ]
596+ }
597+
598+ config (" neonfp16_config" ) {
599+ cflags = [
600+ " -marm" ,
601+ " -march=armv7-a" ,
602+ " -mfpu=neon-fp16" ,
603+ ]
604+ }
605+
606+ config (" neonfma_config" ) {
607+ cflags = [
608+ " -marm" ,
609+ " -march=armv7-a" ,
610+ " -mfpu=neon-vfpv4" ,
611+ ]
612+ }
613+
614+ config (" neonv8_config" ) {
615+ cflags = [
616+ " -marm" ,
617+ " -march=armv8-a" ,
618+ " -mfpu=neon-fp-armv8" ,
619+ ]
620+ }
621+
622+ config (" fp16arith_config" ) {
623+ cflags = [
624+ " -marm" ,
625+ " -march=armv8.2-a+fp16" ,
626+ " -mfpu=neon-fp-armv8" ,
627+ " -Xclang" ,
628+ " -target-feature" ,
629+ " -Xclang" ,
630+ " +fullfp16" ,
631+ ]
632+ }
633+
634+ config (" neonfp16arith_config" ) {
635+ cflags = [
636+ " -marm" ,
637+ " -march=armv8.2-a+fp16" ,
638+ " -mfpu=neon-fp-armv8" ,
639+ " -Xclang" ,
640+ " -target-feature" ,
641+ " -Xclang" ,
642+ " +fullfp16" ,
643+ ]
644+ }
645+
646+ config (" neon_dotprod_config" ) {
647+ cflags = [
648+ " -marm" ,
649+ " -march=armv8.2-a+dotprod" ,
650+ " -mfpu=neon-fp-armv8" ,
651+ " -Xclang" ,
652+ " -target-feature" ,
653+ " -Xclang" ,
654+ " +dotprod" ,
655+ ]
656+ }
657+
658+ config (" neon_dotprodfp16arith_config" ) {
659+ cflags = [
660+ " -marm" ,
661+ " -march=armv8.2-a+dotprod+fp16" ,
662+ " -mfpu=neon-fp-armv8" ,
663+ " -Xclang" ,
664+ " -target-feature" ,
665+ " -Xclang" ,
666+ " +dotprod" ,
667+ " -Xclang" ,
668+ " -target-feature" ,
669+ " -Xclang" ,
670+ " +fullfp16" ,
671+ ]
672+ }
673+
674+ config (" neon_bf16_config" ) {
675+ cflags = [
676+ " -marm" ,
677+ " -march=armv8.2-a+bf16" ,
678+ " -mfpu=neon-fp-armv8" ,
679+ " -Xclang" ,
680+ " -target-feature" ,
681+ " -Xclang" ,
682+ " +dotprod" ,
683+ " -Xclang" ,
684+ " -target-feature" ,
685+ " -Xclang" ,
686+ " +fullfp16" ,
687+ ]
688+ }
689+
690+ xnnpack_source_set (" armsimd32_microkernels" ) {
691+ deps = [
692+ " :microkernel_defs" ,
693+ " :microkernel_headers" ,
694+ ]
695+ sources = ALL_ARMSIMD32_MICROKERNEL_SRCS
696+ configs = [ " :armsimd32_config" ]
697+ }
698+
699+ xnnpack_source_set (" aarch32_asm_microkernels" ) {
700+ deps = [
701+ " :microkernel_defs" ,
702+ " :microkernel_headers" ,
703+ ]
704+ sources = AARCH32_ASM_MICROKERNEL_SRCS
705+ configs = [ " :aarch32_asm_config" ]
706+ }
707+
708+ xnnpack_source_set (" neon_microkernels" ) {
709+ deps = [
710+ " :microkernel_defs" ,
711+ " :microkernel_headers" ,
712+ ]
713+ sources = ALL_NEON_MICROKERNEL_SRCS
714+ configs = [ " :neon_config" ]
715+ }
716+
717+ xnnpack_source_set (" neonfp16_microkernels" ) {
718+ deps = [
719+ " :microkernel_defs" ,
720+ " :microkernel_headers" ,
721+ ]
722+ sources = ALL_NEONFP16_MICROKERNEL_SRCS
723+ configs = [ " :neonfp16_config" ]
724+ }
725+
726+ xnnpack_source_set (" neonfma_microkernels" ) {
727+ deps = [
728+ " :microkernel_defs" ,
729+ " :microkernel_headers" ,
730+ ]
731+ sources = ALL_NEONFMA_MICROKERNEL_SRCS
732+ configs = [ " :neonfma_config" ]
733+ }
734+
735+ xnnpack_source_set (" neonv8_microkernels" ) {
736+ deps = [
737+ " :microkernel_defs" ,
738+ " :microkernel_headers" ,
739+ ]
740+ sources = ALL_NEONV8_MICROKERNEL_SRCS
741+ configs = [ " :neonv8_config" ]
742+ }
743+
744+ xnnpack_source_set (" fp16arith_microkernels" ) {
745+ deps = [
746+ " :microkernel_defs" ,
747+ " :microkernel_headers" ,
748+ ]
749+ sources = ALL_FP16ARITH_MICROKERNEL_SRCS
750+ configs = [ " :fp16arith_config" ]
751+ }
752+
753+ xnnpack_source_set (" neonfp16arith_microkernels" ) {
754+ deps = [
755+ " :microkernel_defs" ,
756+ " :microkernel_headers" ,
757+ ]
758+ sources = ALL_NEONFP16ARITH_MICROKERNEL_SRCS
759+ configs = [ " :neonfp16arith_config" ]
760+ }
761+
762+ xnnpack_source_set (" neon_dotprod_microkernels" ) {
763+ deps = [
764+ " :microkernel_defs" ,
765+ " :microkernel_headers" ,
766+ ]
767+ sources = ALL_NEONDOT_MICROKERNEL_SRCS
768+ configs = [ " :neon_dotprod_config" ]
769+ }
770+
771+ xnnpack_source_set (" neon_dotprodfp16arith_microkernels" ) {
772+ deps = [
773+ " :microkernel_defs" ,
774+ " :microkernel_headers" ,
775+ ]
776+ sources = ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS
777+ configs = [ " :neon_dotprodfp16arith_config" ]
778+ }
779+
780+ xnnpack_source_set (" neon_bf16_microkernels" ) {
781+ deps = [
782+ " :microkernel_defs" ,
783+ " :microkernel_headers" ,
784+ ]
785+ sources = ALL_NEONBF16_MICROKERNEL_SRCS
786+ configs = [ " :neon_bf16_config" ]
787+ }
788+ }
789+
575790if (current_cpu == " x64" || current_cpu == " x86" ) {
576791 xnnpack_source_set (" avxvnni_microkernels" ) {
577792 deps = [
@@ -771,6 +986,20 @@ xnnpack_source_set("xnnpack") {
771986 if (xnnpack_enable_arm_sme2 ) {
772987 deps += [ " :sme2_microkernels" ]
773988 }
989+ } else if (current_cpu == " arm" ) {
990+ deps += [
991+ " :aarch32_asm_microkernels" ,
992+ " :armsimd32_microkernels" ,
993+ " :fp16arith_microkernels" ,
994+ " :neon_bf16_microkernels" ,
995+ " :neon_dotprod_microkernels" ,
996+ " :neon_dotprodfp16arith_microkernels" ,
997+ " :neon_microkernels" ,
998+ " :neonfma_microkernels" ,
999+ " :neonfp16_microkernels" ,
1000+ " :neonfp16arith_microkernels" ,
1001+ " :neonv8_microkernels" ,
1002+ ]
7741003 } else if (current_cpu == " x64" || current_cpu == " x86" ) {
7751004 if (xnnpack_enable_avx ) {
7761005 deps += [ " :avx_microkernels" ]
0 commit comments