Skip to content

Commit fee5c12

Browse files
committed
QS8 / QU8 PReLU microkernels
- Implementations for various ISAs: - x86 AVX2 - Scalar ISA - Unit tests Signed-Off-by: Ravi Kumar Soni <[email protected]> Signed-off-by: Swami, Preksha <[email protected]>
1 parent a108468 commit fee5c12

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+3961
-2
lines changed

BUILD.bazel

+4
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ MICROKERNEL_DEFS = [
176176
"src/qs8-vmul/qs8-vmul-minmax-rndnu.h",
177177
"src/qs8-vmulc/qs8-vmulc-minmax-fp32.h",
178178
"src/qs8-vmulc/qs8-vmulc-minmax-rndnu.h",
179+
"src/qs8-vprelu/qs8-vprelu.h",
180+
"src/qs8-vpreluc/qs8-vpreluc.h",
179181
"src/qu8-avgpool/qu8-avgpool-minmax.h",
180182
"src/qu8-dwconv/qu8-dwconv-minmax-multipass-fp32.h",
181183
"src/qu8-dwconv/qu8-dwconv-minmax-multipass-rndnu.h",
@@ -190,6 +192,8 @@ MICROKERNEL_DEFS = [
190192
"src/qu8-vmul/qu8-vmul-minmax-rndnu.h",
191193
"src/qu8-vmulc/qu8-vmulc-minmax-fp32.h",
192194
"src/qu8-vmulc/qu8-vmulc-minmax-rndnu.h",
195+
"src/qu8-vprelu/qu8-vprelu.h",
196+
"src/qu8-vpreluc/qu8-vpreluc.h",
193197
"src/s8-maxpool/s8-maxpool-minmax.h",
194198
"src/s8-vclamp/s8-vclamp.h",
195199
"src/u8-maxpool/u8-maxpool-minmax.h",

CMakeLists.txt

+5-1
Original file line numberDiff line numberDiff line change
@@ -1679,12 +1679,16 @@ IF(XNNPACK_BUILD_TESTS)
16791679
qs8-vaddc-minmax
16801680
qs8-vmul-minmax-fp32
16811681
qs8-vmulc-minmax-fp32
1682+
qs8-vprelu
1683+
qs8-vpreluc
16821684
qu8-vadd-minmax
16831685
qu8-vaddc-minmax
16841686
qu8-vmul-minmax-fp32
16851687
qu8-vmul-minmax-rndnu
16861688
qu8-vmulc-minmax-fp32
1687-
qu8-vmulc-minmax-rndnu)
1689+
qu8-vmulc-minmax-rndnu
1690+
qu8-vprelu
1691+
qu8-vpreluc)
16881692
FOREACH(TEST ${MICROKERNEL_VBINARY_UNIT_TESTS})
16891693
ADD_EXECUTABLE(${TEST}-test test/${TEST}.cc)
16901694
TARGET_INCLUDE_DIRECTORIES(${TEST}-test PRIVATE include src test)

cmake/gen/avx2_microkernels.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ SET(PROD_AVX2_MICROKERNEL_SRCS
6262
src/qs8-vaddc/gen/qs8-vaddc-minmax-avx2-mul32-ld64-u16.c
6363
src/qs8-vcvt/gen/qs8-vcvt-avx2-u32.c
6464
src/qs8-vlrelu/gen/qs8-vlrelu-avx2-u32.c
65+
src/qs8-vprelu/gen/qs8-vprelu-avx2-u16.c
66+
src/qs8-vpreluc/gen/qs8-vpreluc-avx2-u16.c
6567
src/qu8-dwconv/gen/qu8-dwconv-9p16c-minmax-fp32-avx2-mul32.c
6668
src/qu8-dwconv/gen/qu8-dwconv-25p16c-minmax-fp32-avx2-mul32.c
6769
src/qu8-f32-vcvt/gen/qu8-f32-vcvt-avx2-u16.c
@@ -74,6 +76,8 @@ SET(PROD_AVX2_MICROKERNEL_SRCS
7476
src/qu8-vaddc/gen/qu8-vaddc-minmax-avx2-mul32-ld64-u16.c
7577
src/qu8-vcvt/gen/qu8-vcvt-avx2-u32.c
7678
src/qu8-vlrelu/gen/qu8-vlrelu-avx2-u32.c
79+
src/qu8-vprelu/gen/qu8-vprelu-avx2-u16.c
80+
src/qu8-vpreluc/gen/qu8-vpreluc-avx2-u16.c
7781
src/s8-vclamp/s8-vclamp-avx2-u128.c
7882
src/u8-vclamp/u8-vclamp-avx2-u128.c
7983
src/x8-lut/gen/x8-lut-avx2-u128.c

cmake/gen/scalar_microkernels.cmake

+16
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS
186186
src/qs8-vlrelu/gen/qs8-vlrelu-scalar-select-u4.c
187187
src/qs8-vmul/gen/qs8-vmul-minmax-fp32-scalar-u4.c
188188
src/qs8-vmulc/gen/qs8-vmulc-minmax-fp32-scalar-u4.c
189+
src/qs8-vprelu/gen/qs8-vprelu-scalar-u8.c
190+
src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u8.c
189191
src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-scalar-imagic-c1.c
190192
src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-scalar-imagic-c1.c
191193
src/qu8-dwconv/gen/qu8-dwconv-9p1c-minmax-fp32-scalar-fmagic.c
@@ -216,6 +218,8 @@ SET(PROD_SCALAR_MICROKERNEL_SRCS
216218
src/qu8-vlrelu/gen/qu8-vlrelu-scalar-select-u4.c
217219
src/qu8-vmul/gen/qu8-vmul-minmax-fp32-scalar-u4.c
218220
src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-scalar-u4.c
221+
src/qu8-vprelu/gen/qu8-vprelu-scalar-u8.c
222+
src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u8.c
219223
src/s8-ibilinear/gen/s8-ibilinear-scalar-c1.c
220224
src/s8-maxpool/s8-maxpool-9p8x-minmax-scalar-c1.c
221225
src/s8-vclamp/s8-vclamp-scalar-u4.c
@@ -708,6 +712,12 @@ SET(NON_PROD_SCALAR_MICROKERNEL_SRCS
708712
src/qs8-vmul/gen/qs8-vmul-minmax-fp32-scalar-u2.c
709713
src/qs8-vmulc/gen/qs8-vmulc-minmax-fp32-scalar-u1.c
710714
src/qs8-vmulc/gen/qs8-vmulc-minmax-fp32-scalar-u2.c
715+
src/qs8-vprelu/gen/qs8-vprelu-scalar-u1.c
716+
src/qs8-vprelu/gen/qs8-vprelu-scalar-u2.c
717+
src/qs8-vprelu/gen/qs8-vprelu-scalar-u4.c
718+
src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u1.c
719+
src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u2.c
720+
src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u4.c
711721
src/qu8-dwconv/gen/qu8-dwconv-5f5m5l1c1s1r-minmax-fp32-scalar-fmagic.c
712722
src/qu8-dwconv/gen/qu8-dwconv-5f5m5l1c1s1r-minmax-fp32-scalar-imagic.c
713723
src/qu8-dwconv/gen/qu8-dwconv-5f5m5l1c1s1r-minmax-fp32-scalar-lrintf.c
@@ -821,6 +831,12 @@ SET(NON_PROD_SCALAR_MICROKERNEL_SRCS
821831
src/qu8-vmul/gen/qu8-vmul-minmax-fp32-scalar-u2.c
822832
src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-scalar-u1.c
823833
src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-scalar-u2.c
834+
src/qu8-vprelu/gen/qu8-vprelu-scalar-u1.c
835+
src/qu8-vprelu/gen/qu8-vprelu-scalar-u2.c
836+
src/qu8-vprelu/gen/qu8-vprelu-scalar-u4.c
837+
src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u1.c
838+
src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u2.c
839+
src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u4.c
824840
src/s8-ibilinear/gen/s8-ibilinear-scalar-c2.c
825841
src/s8-ibilinear/gen/s8-ibilinear-scalar-c4.c
826842
src/u8-ibilinear/gen/u8-ibilinear-scalar-c2.c

gen/avx2_microkernels.bzl

+4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ PROD_AVX2_MICROKERNEL_SRCS = [
5858
"src/qs8-vaddc/gen/qs8-vaddc-minmax-avx2-mul32-ld64-u16.c",
5959
"src/qs8-vcvt/gen/qs8-vcvt-avx2-u32.c",
6060
"src/qs8-vlrelu/gen/qs8-vlrelu-avx2-u32.c",
61+
"src/qs8-vprelu/gen/qs8-vprelu-avx2-u16.c",
62+
"src/qs8-vpreluc/gen/qs8-vpreluc-avx2-u16.c",
6163
"src/qu8-dwconv/gen/qu8-dwconv-9p16c-minmax-fp32-avx2-mul32.c",
6264
"src/qu8-dwconv/gen/qu8-dwconv-25p16c-minmax-fp32-avx2-mul32.c",
6365
"src/qu8-f32-vcvt/gen/qu8-f32-vcvt-avx2-u16.c",
@@ -70,6 +72,8 @@ PROD_AVX2_MICROKERNEL_SRCS = [
7072
"src/qu8-vaddc/gen/qu8-vaddc-minmax-avx2-mul32-ld64-u16.c",
7173
"src/qu8-vcvt/gen/qu8-vcvt-avx2-u32.c",
7274
"src/qu8-vlrelu/gen/qu8-vlrelu-avx2-u32.c",
75+
"src/qu8-vprelu/gen/qu8-vprelu-avx2-u16.c",
76+
"src/qu8-vpreluc/gen/qu8-vpreluc-avx2-u16.c",
7377
"src/s8-vclamp/s8-vclamp-avx2-u128.c",
7478
"src/u8-vclamp/u8-vclamp-avx2-u128.c",
7579
"src/x8-lut/gen/x8-lut-avx2-u128.c",

gen/scalar_microkernels.bzl

+16
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
182182
"src/qs8-vlrelu/gen/qs8-vlrelu-scalar-select-u4.c",
183183
"src/qs8-vmul/gen/qs8-vmul-minmax-fp32-scalar-u4.c",
184184
"src/qs8-vmulc/gen/qs8-vmulc-minmax-fp32-scalar-u4.c",
185+
"src/qs8-vprelu/gen/qs8-vprelu-scalar-u8.c",
186+
"src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u8.c",
185187
"src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-scalar-imagic-c1.c",
186188
"src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-scalar-imagic-c1.c",
187189
"src/qu8-dwconv/gen/qu8-dwconv-9p1c-minmax-fp32-scalar-fmagic.c",
@@ -212,6 +214,8 @@ PROD_SCALAR_MICROKERNEL_SRCS = [
212214
"src/qu8-vlrelu/gen/qu8-vlrelu-scalar-select-u4.c",
213215
"src/qu8-vmul/gen/qu8-vmul-minmax-fp32-scalar-u4.c",
214216
"src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-scalar-u4.c",
217+
"src/qu8-vprelu/gen/qu8-vprelu-scalar-u8.c",
218+
"src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u8.c",
215219
"src/s8-ibilinear/gen/s8-ibilinear-scalar-c1.c",
216220
"src/s8-maxpool/s8-maxpool-9p8x-minmax-scalar-c1.c",
217221
"src/s8-vclamp/s8-vclamp-scalar-u4.c",
@@ -705,6 +709,12 @@ NON_PROD_SCALAR_MICROKERNEL_SRCS = [
705709
"src/qs8-vmul/gen/qs8-vmul-minmax-fp32-scalar-u2.c",
706710
"src/qs8-vmulc/gen/qs8-vmulc-minmax-fp32-scalar-u1.c",
707711
"src/qs8-vmulc/gen/qs8-vmulc-minmax-fp32-scalar-u2.c",
712+
"src/qs8-vprelu/gen/qs8-vprelu-scalar-u1.c",
713+
"src/qs8-vprelu/gen/qs8-vprelu-scalar-u2.c",
714+
"src/qs8-vprelu/gen/qs8-vprelu-scalar-u4.c",
715+
"src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u1.c",
716+
"src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u2.c",
717+
"src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u4.c",
708718
"src/qu8-dwconv/gen/qu8-dwconv-5f5m5l1c1s1r-minmax-fp32-scalar-fmagic.c",
709719
"src/qu8-dwconv/gen/qu8-dwconv-5f5m5l1c1s1r-minmax-fp32-scalar-imagic.c",
710720
"src/qu8-dwconv/gen/qu8-dwconv-5f5m5l1c1s1r-minmax-fp32-scalar-lrintf.c",
@@ -818,6 +828,12 @@ NON_PROD_SCALAR_MICROKERNEL_SRCS = [
818828
"src/qu8-vmul/gen/qu8-vmul-minmax-fp32-scalar-u2.c",
819829
"src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-scalar-u1.c",
820830
"src/qu8-vmulc/gen/qu8-vmulc-minmax-fp32-scalar-u2.c",
831+
"src/qu8-vprelu/gen/qu8-vprelu-scalar-u1.c",
832+
"src/qu8-vprelu/gen/qu8-vprelu-scalar-u2.c",
833+
"src/qu8-vprelu/gen/qu8-vprelu-scalar-u4.c",
834+
"src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u1.c",
835+
"src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u2.c",
836+
"src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u4.c",
821837
"src/s8-ibilinear/gen/s8-ibilinear-scalar-c2.c",
822838
"src/s8-ibilinear/gen/s8-ibilinear-scalar-c4.c",
823839
"src/u8-ibilinear/gen/u8-ibilinear-scalar-c2.c",

scripts/generate-qs8-vprelu.sh

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
#
3+
# Redistribution and use in source and binary forms, with or without modification,
4+
# are permitted provided that the following conditions are met:
5+
#
6+
# 1. Redistributions of source code must retain the above copyright notice,
7+
# this list of conditions and the following disclaimer.
8+
# 2. Redistributions in binary form must reproduce the above copyright notice,
9+
# this list of conditions and the following disclaimer in the documentation
10+
# and/or other materials provided with the distribution.
11+
# 3. Neither the name of the copyright holder nor the names of its contributors
12+
# may be used to endorse or promote products derived from this software
13+
# without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
19+
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20+
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
#
27+
#
28+
# SPDX-License-Identifier: BSD-3-Clause
29+
30+
#################################### Scalar ###################################
31+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QS8 -o src/qs8-vprelu/gen/qs8-vprelu-scalar-u1.c &
32+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QS8 -o src/qs8-vprelu/gen/qs8-vprelu-scalar-u2.c &
33+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QS8 -o src/qs8-vprelu/gen/qs8-vprelu-scalar-u4.c &
34+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=8 -D DATATYPE=QS8 -o src/qs8-vprelu/gen/qs8-vprelu-scalar-u8.c &
35+
36+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QU8 -o src/qu8-vprelu/gen/qu8-vprelu-scalar-u1.c &
37+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QU8 -o src/qu8-vprelu/gen/qu8-vprelu-scalar-u2.c &
38+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QU8 -o src/qu8-vprelu/gen/qu8-vprelu-scalar-u4.c &
39+
tools/xngen src/qs8-vprelu/scalar.c.in -D BATCH_TILE=8 -D DATATYPE=QU8 -o src/qu8-vprelu/gen/qu8-vprelu-scalar-u8.c &
40+
41+
#################################### AVX2 ###################################
42+
tools/xngen src/qs8-vprelu/avx2.c.in -D BATCH_TILE=16 -D AVX=1 -D DATATYPE=QS8 -o src/qs8-vprelu/gen/qs8-vprelu-avx2-u16.c &
43+
44+
tools/xngen src/qs8-vprelu/avx2.c.in -D BATCH_TILE=16 -D AVX=1 -D DATATYPE=QU8 -o src/qu8-vprelu/gen/qu8-vprelu-avx2-u16.c &
45+
46+
47+
wait
48+

scripts/generate-qs8-vpreluc.sh

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
#
3+
# Redistribution and use in source and binary forms, with or without modification,
4+
# are permitted provided that the following conditions are met:
5+
#
6+
# 1. Redistributions of source code must retain the above copyright notice,
7+
# this list of conditions and the following disclaimer.
8+
# 2. Redistributions in binary form must reproduce the above copyright notice,
9+
# this list of conditions and the following disclaimer in the documentation
10+
# and/or other materials provided with the distribution.
11+
# 3. Neither the name of the copyright holder nor the names of its contributors
12+
# may be used to endorse or promote products derived from this software
13+
# without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
19+
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20+
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21+
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
#
27+
#
28+
# SPDX-License-Identifier: BSD-3-Clause
29+
30+
#################################### Scalar ###################################
31+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QS8 -o src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u1.c &
32+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QS8 -o src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u2.c &
33+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QS8 -o src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u4.c &
34+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=8 -D DATATYPE=QS8 -o src/qs8-vpreluc/gen/qs8-vpreluc-scalar-u8.c &
35+
36+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=1 -D DATATYPE=QU8 -o src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u1.c &
37+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=2 -D DATATYPE=QU8 -o src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u2.c &
38+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=4 -D DATATYPE=QU8 -o src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u4.c &
39+
tools/xngen src/qs8-vpreluc/scalar.c.in -D BATCH_TILE=8 -D DATATYPE=QU8 -o src/qu8-vpreluc/gen/qu8-vpreluc-scalar-u8.c &
40+
41+
#################################### AVX2 ###################################
42+
tools/xngen src/qs8-vpreluc/avx2.c.in -D BATCH_TILE=16 -D AVX=1 -D DATATYPE=QS8 -o src/qs8-vpreluc/gen/qs8-vpreluc-avx2-u16.c &
43+
44+
tools/xngen src/qs8-vpreluc/avx2.c.in -D BATCH_TILE=16 -D AVX=1 -D DATATYPE=QU8 -o src/qu8-vpreluc/gen/qu8-vpreluc-avx2-u16.c &
45+
46+
47+
wait
48+

scripts/generate-tests.sh

+5
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --broadcast_b -
130130
tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --broadcast_b --ukernel qu8-vmulc-minmax-fp32 --output test/qu8-vmulc-minmax-fp32.cc &
131131
tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --broadcast_b --ukernel qu8-vmulc-minmax-rndnu --output test/qu8-vmulc-minmax-rndnu.cc &
132132

133+
tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --ukernel qs8-vprelu --output test/qs8-vprelu.cc &
134+
tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --ukernel qs8-vpreluc --output test/qs8-vpreluc.cc &
135+
tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --ukernel qu8-vprelu --output test/qu8-vprelu.cc &
136+
tools/generate-vbinary-test.py --tester VBinaryMicrokernelTester --ukernel qu8-vpreluc --output test/qu8-vpreluc.cc &
137+
133138
### Tests for VUnary micro-kernels
134139
tools/generate-vunary-test.py --ukernel f16-vclamp --output test/f16-vclamp.cc &
135140
tools/generate-vunary-test.py --ukernel f16-velu --output test/f16-velu.cc &

src/configs/binary-elementwise-config.c

+62-1
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ static struct xnn_binary_elementwise_config f32_vsqrdiff_config = {0};
3434

3535
static struct xnn_binary_elementwise_config qs8_vadd_config = {0};
3636
static struct xnn_binary_elementwise_config qs8_vmul_config = {0};
37+
static struct xnn_binary_elementwise_config qs8_vprelu_config = {0};
3738

3839
static struct xnn_binary_elementwise_config qu8_vadd_config = {0};
3940
static struct xnn_binary_elementwise_config qu8_vmul_config = {0};
41+
static struct xnn_binary_elementwise_config qu8_vprelu_config = {0};
4042

4143
XNN_INIT_ONCE_GUARD(f16_vadd);
4244
XNN_INIT_ONCE_GUARD(f16_vdiv);
@@ -57,9 +59,10 @@ XNN_INIT_ONCE_GUARD(f32_vsub);
5759
XNN_INIT_ONCE_GUARD(f32_vsqrdiff);
5860
XNN_INIT_ONCE_GUARD(qs8_vadd);
5961
XNN_INIT_ONCE_GUARD(qs8_vmul);
62+
XNN_INIT_ONCE_GUARD(qs8_vprelu);
6063
XNN_INIT_ONCE_GUARD(qu8_vadd);
6164
XNN_INIT_ONCE_GUARD(qu8_vmul);
62-
65+
XNN_INIT_ONCE_GUARD(qu8_vprelu);
6366

6467
static void init_f16_vadd_config(void) {
6568
#if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
@@ -1105,6 +1108,26 @@ static void init_qs8_vmul_config(void) {
11051108
#endif
11061109
}
11071110

1111+
static void init_qs8_vprelu_config(void) {
1112+
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1113+
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1114+
assert(hardware_config != NULL);
1115+
if (hardware_config->use_x86_avx2) {
1116+
qs8_vprelu_config.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vprelu_ukernel__avx2_u16;
1117+
qs8_vprelu_config.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vpreluc_ukernel__avx2_u16;
1118+
qs8_vprelu_config.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vpreluc_ukernel__avx2_u16;
1119+
qs8_vprelu_config.init = (xnn_init_binary_params_fn) xnn_init_qs8_vprelu_scalar_params;
1120+
qs8_vprelu_config.element_tile = 16;
1121+
} else {
1122+
qs8_vprelu_config.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vprelu_ukernel__scalar_u8;
1123+
qs8_vprelu_config.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vpreluc_ukernel__scalar_u8;
1124+
qs8_vprelu_config.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vpreluc_ukernel__scalar_u8;
1125+
qs8_vprelu_config.init = (xnn_init_binary_params_fn) xnn_init_qs8_vprelu_scalar_params;
1126+
qs8_vprelu_config.element_tile = 8;
1127+
}
1128+
#endif
1129+
}
1130+
11081131
static void init_qu8_vadd_config(void) {
11091132
#if XNN_ARCH_ARM
11101133
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
@@ -1246,6 +1269,26 @@ static void init_qu8_vmul_config(void) {
12461269
#endif
12471270
}
12481271

1272+
static void init_qu8_vprelu_config(void) {
1273+
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1274+
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1275+
assert(hardware_config != NULL);
1276+
if (hardware_config->use_x86_avx2) {
1277+
qu8_vprelu_config.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vprelu_ukernel__avx2_u16;
1278+
qu8_vprelu_config.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vpreluc_ukernel__avx2_u16;
1279+
qu8_vprelu_config.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vpreluc_ukernel__avx2_u16;
1280+
qu8_vprelu_config.init = (xnn_init_binary_params_fn) xnn_init_qu8_vprelu_scalar_params;
1281+
qu8_vprelu_config.element_tile = 16;
1282+
} else {
1283+
qu8_vprelu_config.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vprelu_ukernel__scalar_u8;
1284+
qu8_vprelu_config.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vpreluc_ukernel__scalar_u8;
1285+
qu8_vprelu_config.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vpreluc_ukernel__scalar_u8;
1286+
qu8_vprelu_config.init = (xnn_init_binary_params_fn) xnn_init_qu8_vprelu_scalar_params;
1287+
qu8_vprelu_config.element_tile = 8;
1288+
}
1289+
#endif
1290+
}
1291+
12491292
const struct xnn_binary_elementwise_config* xnn_init_f16_vadd_config() {
12501293
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
12511294
if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) {
@@ -1417,6 +1460,15 @@ const struct xnn_binary_elementwise_config* xnn_init_qs8_vmul_config() {
14171460
return &qs8_vmul_config;
14181461
}
14191462

1463+
const struct xnn_binary_elementwise_config* xnn_init_qs8_vprelu_config() {
1464+
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1465+
if (hardware_config == NULL) {
1466+
return NULL;
1467+
}
1468+
XNN_INIT_ONCE(qs8_vprelu);
1469+
return &qs8_vprelu_config;
1470+
}
1471+
14201472
const struct xnn_binary_elementwise_config* xnn_init_qu8_vadd_config() {
14211473
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
14221474
if (hardware_config == NULL) {
@@ -1434,3 +1486,12 @@ const struct xnn_binary_elementwise_config* xnn_init_qu8_vmul_config() {
14341486
XNN_INIT_ONCE(qu8_vmul);
14351487
return &qu8_vmul_config;
14361488
}
1489+
1490+
const struct xnn_binary_elementwise_config* xnn_init_qu8_vprelu_config() {
1491+
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1492+
if (hardware_config == NULL) {
1493+
return NULL;
1494+
}
1495+
XNN_INIT_ONCE(qu8_vprelu);
1496+
return &qu8_vprelu_config;
1497+
}

0 commit comments

Comments
 (0)