Skip to content

Commit add70ac

Browse files
author
Jenkins
committed
Compute Library v23.11
1 parent 874e0c7 commit add70ac

File tree

2,083 files changed

+93476
-67512
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,083 files changed

+93476
-67512
lines changed

Android.bp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@ opencl_srcs = [
2828
"src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl",
2929
"src/core/CL/cl_kernels/common/elementwise_unary.cl",
3030
"src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl",
31-
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h",
32-
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl",
33-
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl",
34-
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl",
35-
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_elementwise_op_helpers.h",
36-
"src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h",
3731
"src/core/CL/cl_kernels/common/fft.cl",
3832
"src/core/CL/cl_kernels/common/fft_digit_reverse.cl",
3933
"src/core/CL/cl_kernels/common/fft_scale.cl",
@@ -53,6 +47,7 @@ opencl_srcs = [
5347
"src/core/CL/cl_kernels/common/mat_mul.cl",
5448
"src/core/CL/cl_kernels/common/mat_mul_mmul.cl",
5549
"src/core/CL/cl_kernels/common/mat_mul_quantized.cl",
50+
"src/core/CL/cl_kernels/common/mat_mul_quantized_mmul.cl",
5651
"src/core/CL/cl_kernels/common/mean_stddev_normalization.cl",
5752
"src/core/CL/cl_kernels/common/memset.cl",
5853
"src/core/CL/cl_kernels/common/minmax_layer.cl",
@@ -73,7 +68,6 @@ opencl_srcs = [
7368
"src/core/CL/cl_kernels/common/select.cl",
7469
"src/core/CL/cl_kernels/common/slice_ops.cl",
7570
"src/core/CL/cl_kernels/common/softmax_layer.cl",
76-
"src/core/CL/cl_kernels/common/softmax_layer_quantized.cl",
7771
"src/core/CL/cl_kernels/common/stack_layer.cl",
7872
"src/core/CL/cl_kernels/common/tile.cl",
7973
"src/core/CL/cl_kernels/common/transpose.cl",
@@ -218,9 +212,12 @@ cc_library_static {
218212
"src/core/AccessWindowAutoPadding.cpp",
219213
"src/core/AccessWindowStatic.cpp",
220214
"src/core/AccessWindowTranspose.cpp",
215+
"src/core/CL/CLCommandBuffer.cpp",
216+
"src/core/CL/CLCompatCommandBuffer.cpp",
221217
"src/core/CL/CLCompileContext.cpp",
222218
"src/core/CL/CLHelpers.cpp",
223219
"src/core/CL/CLKernelLibrary.cpp",
220+
"src/core/CL/CLMutableCommandBuffer.cpp",
224221
"src/core/CL/CLUtils.cpp",
225222
"src/core/CL/DefaultLWSHeuristics.cpp",
226223
"src/core/CL/ICLKernel.cpp",
@@ -396,13 +393,15 @@ cc_library_static {
396393
"src/core/Validate.cpp",
397394
"src/core/Version.cpp",
398395
"src/core/helpers/SoftmaxHelpers.cpp",
396+
"src/core/helpers/Utils.cpp",
399397
"src/core/helpers/WindowHelpers.cpp",
400398
"src/core/utils/ActivationFunctionUtils.cpp",
401399
"src/core/utils/AssemblyUtils.cpp",
402400
"src/core/utils/DataLayoutUtils.cpp",
403401
"src/core/utils/DataTypeUtils.cpp",
404402
"src/core/utils/FormatUtils.cpp",
405403
"src/core/utils/InterpolationPolicyUtils.cpp",
404+
"src/core/utils/Math.cpp",
406405
"src/core/utils/ScaleUtils.cpp",
407406
"src/core/utils/StringUtils.cpp",
408407
"src/core/utils/helpers/fft.cpp",
@@ -485,28 +484,28 @@ cc_library_static {
485484
"src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp",
486485
"src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp",
487486
"src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp",
488-
"src/cpu/kernels/cast/generic/neon/bfloat16.cpp",
489487
"src/cpu/kernels/cast/generic/neon/fp16.cpp",
490488
"src/cpu/kernels/crop/generic/neon/fp16.cpp",
491489
"src/cpu/kernels/crop/generic/neon/fp32.cpp",
492-
"src/cpu/kernels/crop/generic/neon/impl.cpp",
493490
"src/cpu/kernels/crop/generic/neon/integer.cpp",
494491
"src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp",
495492
"src/cpu/kernels/depthwiseconv2d/generic/neon/fp32.cpp",
496493
"src/cpu/kernels/depthwiseconv2d/generic/neon/impl.cpp",
497494
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8.cpp",
498495
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8_signed.cpp",
499496
"src/cpu/kernels/directconv2d/nchw/all.cpp",
497+
"src/cpu/kernels/directconv2d/nchw/fp16.cpp",
498+
"src/cpu/kernels/directconv2d/nhwc/neon/fp16.cpp",
500499
"src/cpu/kernels/directconv2d/nhwc/neon/fp32.cpp",
501500
"src/cpu/kernels/directconv2d/nhwc/neon/impl.cpp",
501+
"src/cpu/kernels/directconv2d/nhwc/neon/qasymm8.cpp",
502502
"src/cpu/kernels/elementwise_binary/generic/neon/fp16.cpp",
503503
"src/cpu/kernels/elementwise_binary/generic/neon/fp32.cpp",
504504
"src/cpu/kernels/elementwise_binary/generic/neon/integer.cpp",
505505
"src/cpu/kernels/elementwise_binary/generic/neon/qasymm8.cpp",
506506
"src/cpu/kernels/elementwise_binary/generic/neon/qasymm8_signed.cpp",
507507
"src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp",
508508
"src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp",
509-
"src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp",
510509
"src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp",
511510
"src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp",
512511
"src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp",
@@ -515,11 +514,9 @@ cc_library_static {
515514
"src/cpu/kernels/floor/neon/fp32.cpp",
516515
"src/cpu/kernels/fuse_batch_normalization/generic/fp16.cpp",
517516
"src/cpu/kernels/fuse_batch_normalization/generic/fp32.cpp",
518-
"src/cpu/kernels/fuse_batch_normalization/generic/impl.cpp",
519517
"src/cpu/kernels/fuse_batch_normalization/nchw/all.cpp",
520518
"src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp16.cpp",
521519
"src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp32.cpp",
522-
"src/cpu/kernels/fuse_batch_normalization/nhwc/neon/impl.cpp",
523520
"src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp",
524521
"src/cpu/kernels/gemm_matrix_add/generic/neon/fp32.cpp",
525522
"src/cpu/kernels/gemm_matrix_add/generic/neon/impl.cpp",
@@ -537,11 +534,9 @@ cc_library_static {
537534
"src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp",
538535
"src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp",
539536
"src/cpu/kernels/l2normlayer/generic/neon/fp32.cpp",
540-
"src/cpu/kernels/l2normlayer/generic/neon/impl.cpp",
541537
"src/cpu/kernels/lut/generic/neon/u8.cpp",
542538
"src/cpu/kernels/maxunpool/generic/neon/fp16.cpp",
543539
"src/cpu/kernels/maxunpool/generic/neon/fp32.cpp",
544-
"src/cpu/kernels/maxunpool/generic/neon/impl.cpp",
545540
"src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp",
546541
"src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp",
547542
"src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp",
@@ -555,16 +550,13 @@ cc_library_static {
555550
"src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp",
556551
"src/cpu/kernels/pool3d/neon/fp16.cpp",
557552
"src/cpu/kernels/pool3d/neon/fp32.cpp",
558-
"src/cpu/kernels/pool3d/neon/impl.cpp",
559553
"src/cpu/kernels/pool3d/neon/qasymm8.cpp",
560554
"src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp",
561555
"src/cpu/kernels/range/generic/neon/fp16.cpp",
562556
"src/cpu/kernels/range/generic/neon/fp32.cpp",
563-
"src/cpu/kernels/range/generic/neon/impl.cpp",
564557
"src/cpu/kernels/range/generic/neon/integer.cpp",
565558
"src/cpu/kernels/roialign/generic/neon/fp16.cpp",
566559
"src/cpu/kernels/roialign/generic/neon/fp32.cpp",
567-
"src/cpu/kernels/roialign/generic/neon/impl.cpp",
568560
"src/cpu/kernels/roialign/generic/neon/qasymm8.cpp",
569561
"src/cpu/kernels/roialign/generic/neon/qasymm8_signed.cpp",
570562
"src/cpu/kernels/scale/neon/fp16.cpp",
@@ -573,13 +565,13 @@ cc_library_static {
573565
"src/cpu/kernels/scale/neon/qasymm8_signed.cpp",
574566
"src/cpu/kernels/select/generic/neon/fp16.cpp",
575567
"src/cpu/kernels/select/generic/neon/fp32.cpp",
576-
"src/cpu/kernels/select/generic/neon/impl.cpp",
577568
"src/cpu/kernels/select/generic/neon/integer.cpp",
578569
"src/cpu/kernels/softmax/generic/neon/fp16.cpp",
579570
"src/cpu/kernels/softmax/generic/neon/fp32.cpp",
580571
"src/cpu/kernels/softmax/generic/neon/impl.cpp",
581572
"src/cpu/kernels/softmax/generic/neon/qasymm8.cpp",
582573
"src/cpu/kernels/softmax/generic/neon/qasymm8_signed.cpp",
574+
"src/cpu/kernels/sub/neon/fp16.cpp",
583575
"src/cpu/kernels/sub/neon/qasymm8.cpp",
584576
"src/cpu/kernels/sub/neon/qasymm8_signed.cpp",
585577
"src/cpu/kernels/sub/neon/qsymm16.cpp",
@@ -628,6 +620,7 @@ cc_library_static {
628620
"src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
629621
"src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
630622
"src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
623+
"src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp",
631624
"src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
632625
"src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
633626
"src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
@@ -647,8 +640,12 @@ cc_library_static {
647640
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
648641
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
649642
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
643+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp",
650644
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
651645
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
646+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp",
647+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp",
648+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp",
652649
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
653650
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
654651
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
@@ -657,6 +654,7 @@ cc_library_static {
657654
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
658655
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
659656
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
657+
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp",
660658
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
661659
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
662660
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
@@ -666,6 +664,7 @@ cc_library_static {
666664
"src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
667665
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
668666
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
667+
"src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp",
669668
"src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
670669
"src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
671670
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
@@ -729,6 +728,7 @@ cc_library_static {
729728
"src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.cpp",
730729
"src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp",
731730
"src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp",
731+
"src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.cpp",
732732
"src/gpu/cl/kernels/ClMatMulNativeKernel.cpp",
733733
"src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp",
734734
"src/gpu/cl/kernels/ClMulKernel.cpp",
@@ -756,6 +756,7 @@ cc_library_static {
756756
"src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp",
757757
"src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp",
758758
"src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp",
759+
"src/gpu/cl/kernels/helpers/MatMulKernelHelpers.cpp",
759760
"src/gpu/cl/operators/ClActivation.cpp",
760761
"src/gpu/cl/operators/ClAdd.cpp",
761762
"src/gpu/cl/operators/ClCast.cpp",
@@ -1310,6 +1311,7 @@ cc_library_static {
13101311
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
13111312
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp",
13121313
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp",
1314+
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16fp32fp16_dot_16VL/generic.cpp",
13131315
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp",
13141316
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp",
13151317
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp",

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2828
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
2929
project(
3030
ArmCompute
31-
VERSION 32.0.0
31+
VERSION 33.0.0
3232
DESCRIPTION
3333
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
3434
LANGUAGES C CXX ASM)

0 commit comments

Comments
 (0)