@@ -28,12 +28,6 @@ opencl_srcs = [
2828 " src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl" ,
2929 " src/core/CL/cl_kernels/common/elementwise_unary.cl" ,
3030 " src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl" ,
31- " src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h" ,
32- " src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl" ,
33- " src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl" ,
34- " src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" ,
35- " src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_elementwise_op_helpers.h" ,
36- " src/core/CL/cl_kernels/common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h" ,
3731 " src/core/CL/cl_kernels/common/fft.cl" ,
3832 " src/core/CL/cl_kernels/common/fft_digit_reverse.cl" ,
3933 " src/core/CL/cl_kernels/common/fft_scale.cl" ,
@@ -53,6 +47,7 @@ opencl_srcs = [
5347 " src/core/CL/cl_kernels/common/mat_mul.cl" ,
5448 " src/core/CL/cl_kernels/common/mat_mul_mmul.cl" ,
5549 " src/core/CL/cl_kernels/common/mat_mul_quantized.cl" ,
50+ " src/core/CL/cl_kernels/common/mat_mul_quantized_mmul.cl" ,
5651 " src/core/CL/cl_kernels/common/mean_stddev_normalization.cl" ,
5752 " src/core/CL/cl_kernels/common/memset.cl" ,
5853 " src/core/CL/cl_kernels/common/minmax_layer.cl" ,
@@ -73,7 +68,6 @@ opencl_srcs = [
7368 " src/core/CL/cl_kernels/common/select.cl" ,
7469 " src/core/CL/cl_kernels/common/slice_ops.cl" ,
7570 " src/core/CL/cl_kernels/common/softmax_layer.cl" ,
76- " src/core/CL/cl_kernels/common/softmax_layer_quantized.cl" ,
7771 " src/core/CL/cl_kernels/common/stack_layer.cl" ,
7872 " src/core/CL/cl_kernels/common/tile.cl" ,
7973 " src/core/CL/cl_kernels/common/transpose.cl" ,
@@ -218,9 +212,12 @@ cc_library_static {
218212 " src/core/AccessWindowAutoPadding.cpp" ,
219213 " src/core/AccessWindowStatic.cpp" ,
220214 " src/core/AccessWindowTranspose.cpp" ,
215+ " src/core/CL/CLCommandBuffer.cpp" ,
216+ " src/core/CL/CLCompatCommandBuffer.cpp" ,
221217 " src/core/CL/CLCompileContext.cpp" ,
222218 " src/core/CL/CLHelpers.cpp" ,
223219 " src/core/CL/CLKernelLibrary.cpp" ,
220+ " src/core/CL/CLMutableCommandBuffer.cpp" ,
224221 " src/core/CL/CLUtils.cpp" ,
225222 " src/core/CL/DefaultLWSHeuristics.cpp" ,
226223 " src/core/CL/ICLKernel.cpp" ,
@@ -396,13 +393,15 @@ cc_library_static {
396393 " src/core/Validate.cpp" ,
397394 " src/core/Version.cpp" ,
398395 " src/core/helpers/SoftmaxHelpers.cpp" ,
396+ " src/core/helpers/Utils.cpp" ,
399397 " src/core/helpers/WindowHelpers.cpp" ,
400398 " src/core/utils/ActivationFunctionUtils.cpp" ,
401399 " src/core/utils/AssemblyUtils.cpp" ,
402400 " src/core/utils/DataLayoutUtils.cpp" ,
403401 " src/core/utils/DataTypeUtils.cpp" ,
404402 " src/core/utils/FormatUtils.cpp" ,
405403 " src/core/utils/InterpolationPolicyUtils.cpp" ,
404+ " src/core/utils/Math.cpp" ,
406405 " src/core/utils/ScaleUtils.cpp" ,
407406 " src/core/utils/StringUtils.cpp" ,
408407 " src/core/utils/helpers/fft.cpp" ,
@@ -485,28 +484,28 @@ cc_library_static {
485484 " src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp" ,
486485 " src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp" ,
487486 " src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp" ,
488- " src/cpu/kernels/cast/generic/neon/bfloat16.cpp" ,
489487 " src/cpu/kernels/cast/generic/neon/fp16.cpp" ,
490488 " src/cpu/kernels/crop/generic/neon/fp16.cpp" ,
491489 " src/cpu/kernels/crop/generic/neon/fp32.cpp" ,
492- " src/cpu/kernels/crop/generic/neon/impl.cpp" ,
493490 " src/cpu/kernels/crop/generic/neon/integer.cpp" ,
494491 " src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp" ,
495492 " src/cpu/kernels/depthwiseconv2d/generic/neon/fp32.cpp" ,
496493 " src/cpu/kernels/depthwiseconv2d/generic/neon/impl.cpp" ,
497494 " src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8.cpp" ,
498495 " src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8_signed.cpp" ,
499496 " src/cpu/kernels/directconv2d/nchw/all.cpp" ,
497+ " src/cpu/kernels/directconv2d/nchw/fp16.cpp" ,
498+ " src/cpu/kernels/directconv2d/nhwc/neon/fp16.cpp" ,
500499 " src/cpu/kernels/directconv2d/nhwc/neon/fp32.cpp" ,
501500 " src/cpu/kernels/directconv2d/nhwc/neon/impl.cpp" ,
501+ " src/cpu/kernels/directconv2d/nhwc/neon/qasymm8.cpp" ,
502502 " src/cpu/kernels/elementwise_binary/generic/neon/fp16.cpp" ,
503503 " src/cpu/kernels/elementwise_binary/generic/neon/fp32.cpp" ,
504504 " src/cpu/kernels/elementwise_binary/generic/neon/integer.cpp" ,
505505 " src/cpu/kernels/elementwise_binary/generic/neon/qasymm8.cpp" ,
506506 " src/cpu/kernels/elementwise_binary/generic/neon/qasymm8_signed.cpp" ,
507507 " src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp" ,
508508 " src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp" ,
509- " src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp" ,
510509 " src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp" ,
511510 " src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp" ,
512511 " src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp" ,
@@ -515,11 +514,9 @@ cc_library_static {
515514 " src/cpu/kernels/floor/neon/fp32.cpp" ,
516515 " src/cpu/kernels/fuse_batch_normalization/generic/fp16.cpp" ,
517516 " src/cpu/kernels/fuse_batch_normalization/generic/fp32.cpp" ,
518- " src/cpu/kernels/fuse_batch_normalization/generic/impl.cpp" ,
519517 " src/cpu/kernels/fuse_batch_normalization/nchw/all.cpp" ,
520518 " src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp16.cpp" ,
521519 " src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp32.cpp" ,
522- " src/cpu/kernels/fuse_batch_normalization/nhwc/neon/impl.cpp" ,
523520 " src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp" ,
524521 " src/cpu/kernels/gemm_matrix_add/generic/neon/fp32.cpp" ,
525522 " src/cpu/kernels/gemm_matrix_add/generic/neon/impl.cpp" ,
@@ -537,11 +534,9 @@ cc_library_static {
537534 " src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp" ,
538535 " src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp" ,
539536 " src/cpu/kernels/l2normlayer/generic/neon/fp32.cpp" ,
540- " src/cpu/kernels/l2normlayer/generic/neon/impl.cpp" ,
541537 " src/cpu/kernels/lut/generic/neon/u8.cpp" ,
542538 " src/cpu/kernels/maxunpool/generic/neon/fp16.cpp" ,
543539 " src/cpu/kernels/maxunpool/generic/neon/fp32.cpp" ,
544- " src/cpu/kernels/maxunpool/generic/neon/impl.cpp" ,
545540 " src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp" ,
546541 " src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp" ,
547542 " src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp" ,
@@ -555,16 +550,13 @@ cc_library_static {
555550 " src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ,
556551 " src/cpu/kernels/pool3d/neon/fp16.cpp" ,
557552 " src/cpu/kernels/pool3d/neon/fp32.cpp" ,
558- " src/cpu/kernels/pool3d/neon/impl.cpp" ,
559553 " src/cpu/kernels/pool3d/neon/qasymm8.cpp" ,
560554 " src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp" ,
561555 " src/cpu/kernels/range/generic/neon/fp16.cpp" ,
562556 " src/cpu/kernels/range/generic/neon/fp32.cpp" ,
563- " src/cpu/kernels/range/generic/neon/impl.cpp" ,
564557 " src/cpu/kernels/range/generic/neon/integer.cpp" ,
565558 " src/cpu/kernels/roialign/generic/neon/fp16.cpp" ,
566559 " src/cpu/kernels/roialign/generic/neon/fp32.cpp" ,
567- " src/cpu/kernels/roialign/generic/neon/impl.cpp" ,
568560 " src/cpu/kernels/roialign/generic/neon/qasymm8.cpp" ,
569561 " src/cpu/kernels/roialign/generic/neon/qasymm8_signed.cpp" ,
570562 " src/cpu/kernels/scale/neon/fp16.cpp" ,
@@ -573,13 +565,13 @@ cc_library_static {
573565 " src/cpu/kernels/scale/neon/qasymm8_signed.cpp" ,
574566 " src/cpu/kernels/select/generic/neon/fp16.cpp" ,
575567 " src/cpu/kernels/select/generic/neon/fp32.cpp" ,
576- " src/cpu/kernels/select/generic/neon/impl.cpp" ,
577568 " src/cpu/kernels/select/generic/neon/integer.cpp" ,
578569 " src/cpu/kernels/softmax/generic/neon/fp16.cpp" ,
579570 " src/cpu/kernels/softmax/generic/neon/fp32.cpp" ,
580571 " src/cpu/kernels/softmax/generic/neon/impl.cpp" ,
581572 " src/cpu/kernels/softmax/generic/neon/qasymm8.cpp" ,
582573 " src/cpu/kernels/softmax/generic/neon/qasymm8_signed.cpp" ,
574+ " src/cpu/kernels/sub/neon/fp16.cpp" ,
583575 " src/cpu/kernels/sub/neon/qasymm8.cpp" ,
584576 " src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ,
585577 " src/cpu/kernels/sub/neon/qsymm16.cpp" ,
@@ -628,6 +620,7 @@ cc_library_static {
628620 " src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp" ,
629621 " src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp" ,
630622 " src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp" ,
623+ " src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp" ,
631624 " src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp" ,
632625 " src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp" ,
633626 " src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp" ,
@@ -647,8 +640,12 @@ cc_library_static {
647640 " src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp" ,
648641 " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp" ,
649642 " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp" ,
643+ " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.cpp" ,
650644 " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp" ,
651645 " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp" ,
646+ " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwMatMul.cpp" ,
647+ " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp" ,
648+ " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp" ,
652649 " src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp" ,
653650 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp" ,
654651 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp" ,
@@ -657,6 +654,7 @@ cc_library_static {
657654 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp" ,
658655 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp" ,
659656 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp" ,
657+ " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp" ,
660658 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp" ,
661659 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp" ,
662660 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp" ,
@@ -666,6 +664,7 @@ cc_library_static {
666664 " src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp" ,
667665 " src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp" ,
668666 " src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp" ,
667+ " src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp" ,
669668 " src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp" ,
670669 " src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp" ,
671670 " src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp" ,
@@ -729,6 +728,7 @@ cc_library_static {
729728 " src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.cpp" ,
730729 " src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp" ,
731730 " src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp" ,
731+ " src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.cpp" ,
732732 " src/gpu/cl/kernels/ClMatMulNativeKernel.cpp" ,
733733 " src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp" ,
734734 " src/gpu/cl/kernels/ClMulKernel.cpp" ,
@@ -756,6 +756,7 @@ cc_library_static {
756756 " src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp" ,
757757 " src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp" ,
758758 " src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp" ,
759+ " src/gpu/cl/kernels/helpers/MatMulKernelHelpers.cpp" ,
759760 " src/gpu/cl/operators/ClActivation.cpp" ,
760761 " src/gpu/cl/operators/ClAdd.cpp" ,
761762 " src/gpu/cl/operators/ClCast.cpp" ,
@@ -1310,6 +1311,7 @@ cc_library_static {
13101311 " src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp" ,
13111312 " src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp" ,
13121313 " src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp" ,
1314+ " src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16fp32fp16_dot_16VL/generic.cpp" ,
13131315 " src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp" ,
13141316 " src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp" ,
13151317 " src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp" ,
0 commit comments