Open
Description
test code:
void mean(const std::vector<double>& a, const std::vector<double>& b, std::vector<double>& res) {
std::size_t size = res.size();
for (std::size_t i = 0; i < size; ++i) {
res[i] = (a[i] + b[i]) / 2;
}
}
void meanAVXUnaligned(const std::vector<double>& a, const std::vector<double>& b, std::vector<double>& res) {
using b_type = xsimd::batch<double, xsimd::avx>;
std::size_t inc = b_type::size; // step.
std::size_t size = res.size();
std::size_t vec_size = size - (size % inc);
for (size_t i = 0; i < vec_size; i+= inc) {
b_type avec = b_type::load_unaligned(&a[i]); // load
b_type bvec = b_type::load_unaligned(&b[i]);
b_type resv = (avec + bvec) / 2; // compute
resv.store_unaligned(&res[i]); // store
}
// Remaining part that cannot be vectorize
for (std::size_t i = vec_size; i < size; ++i) {
res[i] = (a[i] + b[i]) / 2;
}
}
using vector_type = std::vector<double, xsimd::aligned_allocator<double>>;
void meanAVXAligned(const vector_type& a, const vector_type& b, vector_type& res) {
using b_type = xsimd::batch<double, xsimd::avx>;
std::size_t inc = b_type::size;
std::size_t size = res.size();
// size for which the vectorization is possible
std::size_t vec_size = size - size % inc;
for (std::size_t i = 0; i < vec_size; i += inc) {
b_type avec = b_type::load_aligned(&a[i]);
b_type bvec = b_type::load_aligned(&b[i]);
b_type rvec = (avec + bvec) / 2;
rvec.store_aligned(&res[i]);
}
// Remaining part that cannot be vectorize
for (std::size_t i = vec_size; i < size; ++i) {
res[i] = (a[i] + b[i]) / 2;
}
}
template<bool enable = false>
void benchVectorMean(benchmark::State& state) {
size_t count = state.range(0);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 10000.0);
std::vector<double> a;
std::vector<double> b;
std::vector<double> c;
for (size_t i = 0; i < count; i++) {
a.push_back(dis(gen));
b.push_back(dis(gen));
c.push_back(0); // init
}
for (auto _: state) {
if constexpr (enable) {
meanAVXUnaligned(a, b, c);
} else {
mean(a, b, c);
}
}
}
BENCHMARK_TEMPLATE(benchVectorMean)->RangeMultiplier(8)->Range(800, 51200);
BENCHMARK_TEMPLATE(benchVectorMean, true)->RangeMultiplier(8)->Range(800, 51200);
void benchVectorMeanAligned(benchmark::State& state) {
size_t count = state.range(0);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 10000.0);
std::vector<double, xsimd::aligned_allocator<double>> a;
std::vector<double, xsimd::aligned_allocator<double>> b;
std::vector<double, xsimd::aligned_allocator<double>> c;
for (size_t i = 0; i < count; i++) {
a.push_back(dis(gen));
b.push_back(dis(gen));
c.push_back(0); // init
}
for (auto _: state) {
meanAVXAligned(a, b, c);
}
}
BENCHMARK(benchVectorMeanAligned)->RangeMultiplier(8)->Range(800, 51200);
test tesult:
DId I do something wrong?
Metadata
Metadata
Assignees
Labels
No labels