Skip to content

Commit 8f999d7

Browse files
committed
more benchmarks
1 parent 7b6093c commit 8f999d7

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

src/aerobus.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4492,6 +4492,14 @@ namespace aerobus {
44924492
template<typename T>
44934493
static T cos(const T& x);
44944494

4495+
// works only in [-pi/4, pi/4]
4496+
// purpose is to allow vectorization
4497+
template<typename T>
4498+
static INLINED T fast_sin(const T& x) {
4499+
using poly = internal::sin_poly<T>::type;
4500+
return x * poly::eval(x*x);
4501+
}
4502+
44954503
template<typename T>
44964504
static T sin(const T& x) {
44974505
using upper_type = aerobus::internal::arithmetic_helpers<T>::upper_type;

src/benchmarks.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,27 @@ static void BM_aero_sin_12(benchmark::State &state) {
7272
free(out);
7373
}
7474

75+
76+
static void BM_aero_fast_sin_12(benchmark::State &state) {
77+
float *in = aerobus::aligned_malloc<float>(state.range(0), 64);
78+
float *out = aerobus::aligned_malloc<float>(state.range(0), 64);
79+
#pragma omp parallel for
80+
for (int64_t i = 0; i < state.range(0); ++i) {
81+
in[i] = rand(-0.01, 0.01);
82+
}
83+
for (auto _ : state) {
84+
#pragma omp parallel for
85+
for (int64_t i = 0; i < state.range(0); ++i) {
86+
out[i] = aerobus::libm::fast_sin(aerobus::libm::fast_sin(aerobus::libm::fast_sin(
87+
aerobus::libm::fast_sin(aerobus::libm::fast_sin(aerobus::libm::fast_sin(
88+
aerobus::libm::fast_sin(aerobus::libm::fast_sin(aerobus::libm::fast_sin(in[i])))))))));
89+
}
90+
}
91+
92+
free(in);
93+
free(out);
94+
}
95+
7596
static void BM_std_sin_12(benchmark::State &state) {
7697
double *in = aerobus::aligned_malloc<double>(state.range(0), 64);
7798
double *out = aerobus::aligned_malloc<double>(state.range(0), 64);
@@ -208,12 +229,17 @@ static void BM_horner_double(benchmark::State &state) {
208229

209230
BENCHMARK(BM_std_cos_12)->Range(1 << 10, 1 << 24);
210231
BENCHMARK(BM_aero_cos_12)->Range(1 << 10, 1 << 24);
232+
211233
BENCHMARK(BM_std_sin_12)->Range(1 << 10, 1 << 24);
212234
BENCHMARK(BM_aero_sin_12)->Range(1 << 10, 1 << 24);
235+
BENCHMARK(BM_aero_fast_sin_12)->Range(1 << 10, 1 << 24);
236+
213237
BENCHMARK(BM_std_expm1_12)->Range(1 << 10, 1 << 24);
214238
BENCHMARK(BM_aero_expm1_12)->Range(1 << 10, 1 << 24);
239+
215240
BENCHMARK(BM_std_hermite)->Range(1 << 10, 1 << 24);
216241
BENCHMARK(BM_aero_hermite)->Range(1 << 10, 1 << 24);
242+
217243
BENCHMARK(BM_horner_double)->Range(1 << 10, 1 << 24);
218244
BENCHMARK(BM_compensated_horner_float)->Range(1 << 10, 1 << 24);
219245

0 commit comments

Comments
 (0)