Skip to content

Commit 6544567

Browse files
committed
AVX-512 implementation of h_dotp, h_sqr_dotp, h_abs_dotp, updated performance tests
1 parent 847a2a5 commit 6544567

File tree

12 files changed

+873
-63
lines changed

12 files changed

+873
-63
lines changed

include/private/dsp/arch/x86/avx512/hmath.h

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#endif /* PRIVATE_DSP_ARCH_X86_AVX512_IMPL */
2828

2929

30+
#include <private/dsp/arch/x86/avx512/hmath/hdotp.h>
3031
#include <private/dsp/arch/x86/avx512/hmath/hsum.h>
3132

3233

include/private/dsp/arch/x86/avx512/hmath/hdotp.h

+354
Large diffs are not rendered by default.

src/main/x86/avx512.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2023 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 24 мая 2023 г.
@@ -326,6 +326,10 @@
326326
CEXPORT1(vl, h_sum);
327327
CEXPORT1(vl, h_sqr_sum);
328328
CEXPORT1(vl, h_abs_sum);
329+
330+
CEXPORT1(vl, h_dotp);
331+
CEXPORT1(vl, h_sqr_dotp);
332+
CEXPORT1(vl, h_abs_dotp);
329333
}
330334
} /* namespace avx2 */
331335
} /* namespace lsp */

src/test/ptest/hmath/hdotp.cpp renamed to src/test/ptest/hmath/h_abs_dotp.cpp

+12-30
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
6-
* Created on: 31 мар. 2020 г.
6+
* Created on: 11 дек. 2024 г.
77
*
88
* lsp-dsp-lib is free software: you can redistribute it and/or modify
99
* it under the terms of the GNU Lesser General Public License as published by
@@ -25,56 +25,51 @@
2525
#include <lsp-plug.in/test-fw/helpers.h>
2626
#include <lsp-plug.in/test-fw/ptest.h>
2727

28-
#define MIN_RANK 8
28+
#define MIN_RANK 5
2929
#define MAX_RANK 16
3030

3131
namespace lsp
3232
{
3333
namespace generic
3434
{
35-
float h_dotp(const float *a, const float *b, size_t count);
36-
float h_sqr_dotp(const float *a, const float *b, size_t count);
3735
float h_abs_dotp(const float *a, const float *b, size_t count);
3836
}
3937

4038
IF_ARCH_X86(
4139
namespace sse
4240
{
43-
float h_dotp(const float *a, const float *b, size_t count);
44-
float h_sqr_dotp(const float *a, const float *b, size_t count);
4541
float h_abs_dotp(const float *a, const float *b, size_t count);
4642
}
4743

4844
namespace avx
4945
{
50-
float h_dotp(const float *a, const float *b, size_t count);
51-
float h_sqr_dotp(const float *a, const float *b, size_t count);
46+
float h_abs_dotp(const float *a, const float *b, size_t count);
47+
}
48+
49+
namespace avx512
50+
{
5251
float h_abs_dotp(const float *a, const float *b, size_t count);
5352
}
5453
)
5554

5655
IF_ARCH_ARM(
5756
namespace neon_d32
5857
{
59-
float h_dotp(const float *a, const float *b, size_t count);
60-
float h_sqr_dotp(const float *a, const float *b, size_t count);
6158
float h_abs_dotp(const float *a, const float *b, size_t count);
6259
}
6360
)
6461

6562
IF_ARCH_AARCH64(
6663
namespace asimd
6764
{
68-
float h_dotp(const float *a, const float *b, size_t count);
69-
float h_sqr_dotp(const float *a, const float *b, size_t count);
7065
float h_abs_dotp(const float *a, const float *b, size_t count);
7166
}
7267
)
7368

7469
typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
7570
}
7671

77-
PTEST_BEGIN("dsp.hmath", hdotp, 5, 10000)
72+
PTEST_BEGIN("dsp.hmath", h_abs_dotp, 5, 5000)
7873

7974
void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
8075
{
@@ -106,26 +101,13 @@ PTEST_BEGIN("dsp.hmath", hdotp, 5, 10000)
106101
{
107102
size_t count = 1 << i;
108103

109-
CALL(generic::h_dotp);
110-
IF_ARCH_X86(CALL(sse::h_dotp));
111-
IF_ARCH_X86(CALL(avx::h_dotp));
112-
IF_ARCH_ARM(CALL(neon_d32::h_dotp));
113-
IF_ARCH_AARCH64(CALL(asimd::h_dotp));
114-
PTEST_SEPARATOR;
115-
116-
CALL(generic::h_sqr_dotp);
117-
IF_ARCH_X86(CALL(sse::h_sqr_dotp));
118-
IF_ARCH_X86(CALL(avx::h_sqr_dotp));
119-
IF_ARCH_ARM(CALL(neon_d32::h_sqr_dotp));
120-
IF_ARCH_AARCH64(CALL(asimd::h_sqr_dotp));
121-
PTEST_SEPARATOR;
122-
123104
CALL(generic::h_abs_dotp);
124105
IF_ARCH_X86(CALL(sse::h_abs_dotp));
125106
IF_ARCH_X86(CALL(avx::h_abs_dotp));
107+
IF_ARCH_X86(CALL(avx512::h_abs_dotp));
126108
IF_ARCH_ARM(CALL(neon_d32::h_abs_dotp));
127109
IF_ARCH_AARCH64(CALL(asimd::h_abs_dotp));
128-
PTEST_SEPARATOR2;
110+
PTEST_SEPARATOR;
129111
}
130112

131113
free_aligned(data);

src/test/ptest/hmath/h_abs_sum.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include <lsp-plug.in/test-fw/helpers.h>
2525
#include <lsp-plug.in/common/alloc.h>
2626

27-
#define MIN_RANK 8
27+
#define MIN_RANK 5
2828
#define MAX_RANK 16
2929

3030
namespace lsp
@@ -68,7 +68,7 @@ namespace lsp
6868
typedef float (* h_sum_t)(const float *src, size_t count);
6969
}
7070

71-
PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 10000)
71+
PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 5000)
7272

7373
void call(const char *label, float *src, size_t count, h_sum_t func)
7474
{

src/test/ptest/hmath/h_dotp.cpp

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
4+
*
5+
* This file is part of lsp-dsp-lib
6+
* Created on: 31 мар. 2020 г.
7+
*
8+
* lsp-dsp-lib is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Lesser General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* any later version.
12+
*
13+
* lsp-dsp-lib is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Lesser General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Lesser General Public License
19+
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
20+
*/
21+
22+
#include <lsp-plug.in/common/alloc.h>
23+
#include <lsp-plug.in/common/types.h>
24+
#include <lsp-plug.in/dsp/dsp.h>
25+
#include <lsp-plug.in/test-fw/helpers.h>
26+
#include <lsp-plug.in/test-fw/ptest.h>
27+
28+
#define MIN_RANK 5
29+
#define MAX_RANK 16
30+
31+
namespace lsp
32+
{
33+
namespace generic
34+
{
35+
float h_dotp(const float *a, const float *b, size_t count);
36+
}
37+
38+
IF_ARCH_X86(
39+
namespace sse
40+
{
41+
float h_dotp(const float *a, const float *b, size_t count);
42+
}
43+
44+
namespace avx
45+
{
46+
float h_dotp(const float *a, const float *b, size_t count);
47+
}
48+
49+
namespace avx512
50+
{
51+
float h_dotp(const float *a, const float *b, size_t count);
52+
}
53+
)
54+
55+
IF_ARCH_ARM(
56+
namespace neon_d32
57+
{
58+
float h_dotp(const float *a, const float *b, size_t count);
59+
}
60+
)
61+
62+
IF_ARCH_AARCH64(
63+
namespace asimd
64+
{
65+
float h_dotp(const float *a, const float *b, size_t count);
66+
}
67+
)
68+
69+
typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
70+
}
71+
72+
PTEST_BEGIN("dsp.hmath", h_dotp, 5, 5000)
73+
74+
void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
75+
{
76+
if (!PTEST_SUPPORTED(func))
77+
return;
78+
79+
char buf[80];
80+
snprintf(buf, sizeof(buf), "%s x %d", label, int(count));
81+
printf("Testing %s numbers...\n", buf);
82+
83+
PTEST_LOOP(buf,
84+
func(a, b, count);
85+
);
86+
}
87+
88+
PTEST_MAIN
89+
{
90+
size_t buf_size = 1 << MAX_RANK;
91+
uint8_t *data = NULL;
92+
float *a = alloc_aligned<float>(data, buf_size * 2, 64);
93+
float *b = &a[buf_size];
94+
95+
randomize_sign(a, buf_size * 2);
96+
97+
#define CALL(func) \
98+
call(#func, a, b, count, func)
99+
100+
for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
101+
{
102+
size_t count = 1 << i;
103+
104+
CALL(generic::h_dotp);
105+
IF_ARCH_X86(CALL(sse::h_dotp));
106+
IF_ARCH_X86(CALL(avx::h_dotp));
107+
IF_ARCH_X86(CALL(avx512::h_dotp));
108+
IF_ARCH_ARM(CALL(neon_d32::h_dotp));
109+
IF_ARCH_AARCH64(CALL(asimd::h_dotp));
110+
PTEST_SEPARATOR;
111+
}
112+
113+
free_aligned(data);
114+
}
115+
116+
PTEST_END
117+
118+
119+

src/test/ptest/hmath/h_sqr_dotp.cpp

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
4+
*
5+
* This file is part of lsp-dsp-lib
6+
* Created on: 11 дек. 2024 г.
7+
*
8+
* lsp-dsp-lib is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Lesser General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* any later version.
12+
*
13+
* lsp-dsp-lib is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Lesser General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Lesser General Public License
19+
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
20+
*/
21+
22+
#include <lsp-plug.in/common/alloc.h>
23+
#include <lsp-plug.in/common/types.h>
24+
#include <lsp-plug.in/dsp/dsp.h>
25+
#include <lsp-plug.in/test-fw/helpers.h>
26+
#include <lsp-plug.in/test-fw/ptest.h>
27+
28+
#define MIN_RANK 5
29+
#define MAX_RANK 16
30+
31+
namespace lsp
32+
{
33+
namespace generic
34+
{
35+
float h_sqr_dotp(const float *a, const float *b, size_t count);
36+
}
37+
38+
IF_ARCH_X86(
39+
namespace sse
40+
{
41+
float h_sqr_dotp(const float *a, const float *b, size_t count);
42+
}
43+
44+
namespace avx
45+
{
46+
float h_sqr_dotp(const float *a, const float *b, size_t count);
47+
}
48+
49+
namespace avx512
50+
{
51+
float h_sqr_dotp(const float *a, const float *b, size_t count);
52+
}
53+
)
54+
55+
IF_ARCH_ARM(
56+
namespace neon_d32
57+
{
58+
float h_sqr_dotp(const float *a, const float *b, size_t count);
59+
}
60+
)
61+
62+
IF_ARCH_AARCH64(
63+
namespace asimd
64+
{
65+
float h_sqr_dotp(const float *a, const float *b, size_t count);
66+
}
67+
)
68+
69+
typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
70+
}
71+
72+
PTEST_BEGIN("dsp.hmath", h_sqr_dotp, 5, 5000)
73+
74+
void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
75+
{
76+
if (!PTEST_SUPPORTED(func))
77+
return;
78+
79+
char buf[80];
80+
snprintf(buf, sizeof(buf), "%s x %d", label, int(count));
81+
printf("Testing %s numbers...\n", buf);
82+
83+
PTEST_LOOP(buf,
84+
func(a, b, count);
85+
);
86+
}
87+
88+
PTEST_MAIN
89+
{
90+
size_t buf_size = 1 << MAX_RANK;
91+
uint8_t *data = NULL;
92+
float *a = alloc_aligned<float>(data, buf_size * 2, 64);
93+
float *b = &a[buf_size];
94+
95+
randomize_sign(a, buf_size * 2);
96+
97+
#define CALL(func) \
98+
call(#func, a, b, count, func)
99+
100+
for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
101+
{
102+
size_t count = 1 << i;
103+
104+
CALL(generic::h_sqr_dotp);
105+
IF_ARCH_X86(CALL(sse::h_sqr_dotp));
106+
IF_ARCH_X86(CALL(avx::h_sqr_dotp));
107+
IF_ARCH_X86(CALL(avx512::h_sqr_dotp));
108+
IF_ARCH_ARM(CALL(neon_d32::h_sqr_dotp));
109+
IF_ARCH_AARCH64(CALL(asimd::h_sqr_dotp));
110+
PTEST_SEPARATOR;
111+
}
112+
113+
free_aligned(data);
114+
}
115+
116+
PTEST_END
117+
118+
119+

0 commit comments

Comments
 (0)