@@ -28,7 +28,10 @@ if (is_clang) {
2828# can be read with GN too.
2929import (" gen/scalar_microkernels.bzl" )
3030
31+ import (" gen/aarch32_microkernels.bzl" )
3132import (" gen/aarch64_microkernels.bzl" )
33+ import (" gen/armsimd32_microkernels.bzl" )
34+ import (" gen/fp16arith_microkernels.bzl" )
3235import (" gen/neon_aarch64_microkernels.bzl" )
3336import (" gen/neon_microkernels.bzl" )
3437import (" gen/neonbf16_microkernels.bzl" )
@@ -117,7 +120,8 @@ declare_args() {
117120 xnnpack_enable_arm_dotprod = false
118121}
119122
120- assert (target_cpu == " arm64" || target_cpu == " x64" || target_cpu == " x86" ,
123+ assert (target_cpu == " arm64" || target_cpu == " x64" || target_cpu == " x86" ||
124+ target_cpu == " arm" ,
121125 " Unsupported target_cpu '${ target_cpu } '." )
122126
123127assert (! xnnpack_enable_arm_kleidiai || target_cpu == " arm64" ,
@@ -194,6 +198,14 @@ config("xnnpack_private_config") {
194198 } else {
195199 defines += [ " XNN_ENABLE_ARM_SME2=0" ]
196200 }
201+ } else if (current_cpu == " arm" ) {
202+ defines += [
203+ " XNN_ENABLE_ARM_BF16=1" ,
204+ " XNN_ENABLE_ARM_DOTPROD=1" ,
205+ " XNN_ENABLE_ARM_FP16_SCALAR=1" ,
206+ " XNN_ENABLE_ARM_FP16_VECTOR=1" ,
207+ " XNN_ENABLE_ASSEMBLY=1" ,
208+ ]
197209 } else if (current_cpu == " x64" || current_cpu == " x86" ) {
198210 # x64 support baseline, which is SSSE3 and below to match Chromium.
199211 defines += [
@@ -361,20 +373,6 @@ xnnpack_source_set("operators") {
361373 }
362374}
363375
364- xnnpack_source_set (" subgraph_matchers" ) {
365- sources = [
366- " test/subgraph/rewrites/subgraph_matcher.cc" ,
367- " test/subgraph/rewrites/subgraph_matcher.h" ,
368- ]
369-
370- public_deps = [
371- " :xnnpack_headers" ,
372- " //third_party/googletest:gtest" ,
373- ]
374-
375- testonly = true
376- }
377-
378376xnnpack_source_set (" subgraph" ) {
379377 deps = [
380378 " :microkernel_headers" ,
@@ -573,6 +571,223 @@ if (current_cpu == "arm64") {
573571 }
574572}
575573
574+ if (current_cpu == " arm" ) {
575+ config (" armsimd32_config" ) {
576+ cflags = [
577+ " -marm" ,
578+ " -mfpu=vfp" ,
579+ " -munaligned-access" ,
580+ ]
581+ }
582+
583+ config (" aarch32_asm_config" ) {
584+ asmflags = [
585+ " -marm" ,
586+ " -march=armv8.2-a+dotprod+fp16" ,
587+ " -mfpu=neon-fp-armv8" ,
588+ ]
589+ }
590+
591+ config (" neon_config" ) {
592+ cflags = [
593+ " -marm" ,
594+ " -march=armv7-a" ,
595+ " -mfpu=neon" ,
596+ ]
597+ }
598+
599+ config (" neonfp16_config" ) {
600+ cflags = [
601+ " -marm" ,
602+ " -march=armv7-a" ,
603+ " -mfpu=neon-fp16" ,
604+ ]
605+ }
606+
607+ config (" neonfma_config" ) {
608+ cflags = [
609+ " -marm" ,
610+ " -march=armv7-a" ,
611+ " -mfpu=neon-vfpv4" ,
612+ ]
613+ }
614+
615+ config (" neonv8_config" ) {
616+ cflags = [
617+ " -marm" ,
618+ " -march=armv8-a" ,
619+ " -mfpu=neon-fp-armv8" ,
620+ ]
621+ }
622+
623+ config (" fp16arith_config" ) {
624+ cflags = [
625+ " -marm" ,
626+ " -march=armv8.2-a+fp16" ,
627+ " -mfpu=neon-fp-armv8" ,
628+ " -Xclang" ,
629+ " -target-feature" ,
630+ " -Xclang" ,
631+ " +fullfp16" ,
632+ ]
633+ }
634+
635+ config (" neonfp16arith_config" ) {
636+ cflags = [
637+ " -marm" ,
638+ " -march=armv8.2-a+fp16" ,
639+ " -mfpu=neon-fp-armv8" ,
640+ " -Xclang" ,
641+ " -target-feature" ,
642+ " -Xclang" ,
643+ " +fullfp16" ,
644+ ]
645+ }
646+
647+ config (" neon_dotprod_config" ) {
648+ cflags = [
649+ " -marm" ,
650+ " -march=armv8.2-a+dotprod" ,
651+ " -mfpu=neon-fp-armv8" ,
652+ " -Xclang" ,
653+ " -target-feature" ,
654+ " -Xclang" ,
655+ " +dotprod" ,
656+ ]
657+ }
658+
659+ config (" neon_dotprodfp16arith_config" ) {
660+ cflags = [
661+ " -marm" ,
662+ " -march=armv8.2-a+dotprod+fp16" ,
663+ " -mfpu=neon-fp-armv8" ,
664+ " -Xclang" ,
665+ " -target-feature" ,
666+ " -Xclang" ,
667+ " +dotprod" ,
668+ " -Xclang" ,
669+ " -target-feature" ,
670+ " -Xclang" ,
671+ " +fullfp16" ,
672+ ]
673+ }
674+
675+ config (" neon_bf16_config" ) {
676+ cflags = [
677+ " -marm" ,
678+ " -march=armv8.2-a+bf16" ,
679+ " -mfpu=neon-fp-armv8" ,
680+ " -Xclang" ,
681+ " -target-feature" ,
682+ " -Xclang" ,
683+ " +dotprod" ,
684+ " -Xclang" ,
685+ " -target-feature" ,
686+ " -Xclang" ,
687+ " +fullfp16" ,
688+ ]
689+ }
690+
691+ xnnpack_source_set (" armsimd32_microkernels" ) {
692+ deps = [
693+ " :microkernel_defs" ,
694+ " :microkernel_headers" ,
695+ ]
696+ sources = ALL_ARMSIMD32_MICROKERNEL_SRCS
697+ configs = [ " :armsimd32_config" ]
698+ }
699+
700+ xnnpack_source_set (" aarch32_asm_microkernels" ) {
701+ deps = [
702+ " :microkernel_defs" ,
703+ " :microkernel_headers" ,
704+ ]
705+ sources = AARCH32_ASM_MICROKERNEL_SRCS
706+ configs = [ " :aarch32_asm_config" ]
707+ }
708+
709+ xnnpack_source_set (" neon_microkernels" ) {
710+ deps = [
711+ " :microkernel_defs" ,
712+ " :microkernel_headers" ,
713+ ]
714+ sources = ALL_NEON_MICROKERNEL_SRCS
715+ configs = [ " :neon_config" ]
716+ }
717+
718+ xnnpack_source_set (" neonfp16_microkernels" ) {
719+ deps = [
720+ " :microkernel_defs" ,
721+ " :microkernel_headers" ,
722+ ]
723+ sources = ALL_NEONFP16_MICROKERNEL_SRCS
724+ configs = [ " :neonfp16_config" ]
725+ }
726+
727+ xnnpack_source_set (" neonfma_microkernels" ) {
728+ deps = [
729+ " :microkernel_defs" ,
730+ " :microkernel_headers" ,
731+ ]
732+ sources = ALL_NEONFMA_MICROKERNEL_SRCS
733+ configs = [ " :neonfma_config" ]
734+ }
735+
736+ xnnpack_source_set (" neonv8_microkernels" ) {
737+ deps = [
738+ " :microkernel_defs" ,
739+ " :microkernel_headers" ,
740+ ]
741+ sources = ALL_NEONV8_MICROKERNEL_SRCS
742+ configs = [ " :neonv8_config" ]
743+ }
744+
745+ xnnpack_source_set (" fp16arith_microkernels" ) {
746+ deps = [
747+ " :microkernel_defs" ,
748+ " :microkernel_headers" ,
749+ ]
750+ sources = ALL_FP16ARITH_MICROKERNEL_SRCS
751+ configs = [ " :fp16arith_config" ]
752+ }
753+
754+ xnnpack_source_set (" neonfp16arith_microkernels" ) {
755+ deps = [
756+ " :microkernel_defs" ,
757+ " :microkernel_headers" ,
758+ ]
759+ sources = ALL_NEONFP16ARITH_MICROKERNEL_SRCS
760+ configs = [ " :neonfp16arith_config" ]
761+ }
762+
763+ xnnpack_source_set (" neon_dotprod_microkernels" ) {
764+ deps = [
765+ " :microkernel_defs" ,
766+ " :microkernel_headers" ,
767+ ]
768+ sources = ALL_NEONDOT_MICROKERNEL_SRCS
769+ configs = [ " :neon_dotprod_config" ]
770+ }
771+
772+ xnnpack_source_set (" neon_dotprodfp16arith_microkernels" ) {
773+ deps = [
774+ " :microkernel_defs" ,
775+ " :microkernel_headers" ,
776+ ]
777+ sources = ALL_NEONDOTFP16ARITH_MICROKERNEL_SRCS
778+ configs = [ " :neon_dotprodfp16arith_config" ]
779+ }
780+
781+ xnnpack_source_set (" neon_bf16_microkernels" ) {
782+ deps = [
783+ " :microkernel_defs" ,
784+ " :microkernel_headers" ,
785+ ]
786+ sources = ALL_NEONBF16_MICROKERNEL_SRCS
787+ configs = [ " :neon_bf16_config" ]
788+ }
789+ }
790+
576791if (current_cpu == " x64" || current_cpu == " x86" ) {
577792 xnnpack_source_set (" avxvnni_microkernels" ) {
578793 deps = [
@@ -766,6 +981,20 @@ xnnpack_source_set("xnnpack") {
766981 if (xnnpack_enable_arm_sme2 ) {
767982 deps += [ " :sme2_microkernels" ]
768983 }
984+ } else if (current_cpu == " arm" ) {
985+ deps += [
986+ " :aarch32_asm_microkernels" ,
987+ " :armsimd32_microkernels" ,
988+ " :fp16arith_microkernels" ,
989+ " :neon_bf16_microkernels" ,
990+ " :neon_dotprod_microkernels" ,
991+ " :neon_dotprodfp16arith_microkernels" ,
992+ " :neon_microkernels" ,
993+ " :neonfma_microkernels" ,
994+ " :neonfp16_microkernels" ,
995+ " :neonfp16arith_microkernels" ,
996+ " :neonv8_microkernels" ,
997+ ]
769998 } else if (current_cpu == " x64" || current_cpu == " x86" ) {
770999 if (xnnpack_enable_avx ) {
7711000 deps += [ " :avx_microkernels" ]
0 commit comments