Skip to content

Commit b258aa1

Browse files
committed
Added nbody benchmark: behaves as expected
1 parent 1d7e8b2 commit b258aa1

File tree

5 files changed

+140
-1
lines changed

5 files changed

+140
-1
lines changed

benchmark.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,4 +380,70 @@ void BM_CPUHardRW(benchmark::State &state, T t)
380380
state.counters["n_elem"] = n;
381381
}
382382

383+
inline float rand_float() {
384+
return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
385+
}
386+
387+
template <typename T>
388+
void BM_nbody(benchmark::State &state, T t)
389+
{
390+
auto n = state.range(0);
391+
float dt = 0.01f;
392+
const float softening = 1e-9f;
393+
394+
std::vector<float> Fx(n, 0.0f);
395+
std::vector<float> Fy(n, 0.0f);
396+
std::vector<float> Fz(n, 0.0f);
397+
398+
// Inizializza le posizioni e velocità
399+
for (int i = 0; i < n; ++i) {
400+
MEMBER_ACCESS(t, x, i) = rand_float();
401+
MEMBER_ACCESS(t, y, i) = rand_float();
402+
MEMBER_ACCESS(t, z, i) = rand_float();
403+
MEMBER_ACCESS(t, vx, i) = rand_float();
404+
MEMBER_ACCESS(t, vy, i) = rand_float();
405+
MEMBER_ACCESS(t, vz, i) = rand_float();
406+
}
407+
408+
for (auto _ : state) {
409+
// Calcolo delle forze
410+
for (int i = 0; i < n; ++i) {
411+
Fx[i] = 0.0f;
412+
Fy[i] = 0.0f;
413+
Fz[i] = 0.0f;
414+
415+
for (int j = 0; j < n; ++j) {
416+
if (i != j) {
417+
float dx = MEMBER_ACCESS(t, x, j) - MEMBER_ACCESS(t, x, i);
418+
float dy = MEMBER_ACCESS(t, y, j) - MEMBER_ACCESS(t, y, i);
419+
float dz = MEMBER_ACCESS(t, z, j) - MEMBER_ACCESS(t, z, i);
420+
float distSqr = dx * dx + dy * dy + dz * dz + softening;
421+
float invDist = 1.0f / std::sqrt(distSqr);
422+
float invDist3 = invDist * invDist * invDist;
423+
424+
Fx[i] += dx * invDist3;
425+
Fy[i] += dy * invDist3;
426+
Fz[i] += dz * invDist3;
427+
}
428+
}
429+
430+
MEMBER_ACCESS(t, vx, i) += dt * Fx[i];
431+
MEMBER_ACCESS(t, vy, i) += dt * Fy[i];
432+
MEMBER_ACCESS(t, vz, i) += dt * Fz[i];
433+
}
434+
435+
// Integrazione posizioni
436+
for (int i = 0; i < n; ++i) {
437+
MEMBER_ACCESS(t, x, i) += MEMBER_ACCESS(t, vx, i) * dt;
438+
MEMBER_ACCESS(t, y, i) += MEMBER_ACCESS(t, vy, i) * dt;
439+
MEMBER_ACCESS(t, z, i) += MEMBER_ACCESS(t, vz, i) * dt;
440+
}
441+
}
442+
443+
state.counters["n_elem"] = n;
444+
state.counters["N^2_interactions"] = benchmark::Counter(
445+
static_cast<double>(n) * static_cast<double>(n),
446+
benchmark::Counter::kIsRate);
447+
}
448+
383449
#endif // BENCHMARK_H

soa_boost.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@ GENERATE_SOA_LAYOUT(BigSoALayout,
9797
using BigSoA = BigSoALayout<>;
9898
using BigSoAView = BigSoA::View;
9999

100+
GENERATE_SOA_LAYOUT(SoANbodyLayout,
101+
SOA_COLUMN(float, x),
102+
SOA_COLUMN(float, y),
103+
SOA_COLUMN(float, z),
104+
SOA_COLUMN(float, vx),
105+
SOA_COLUMN(float, vy),
106+
SOA_COLUMN(float, vz))
107+
108+
using SoANbody = SoANbodyLayout<>;
109+
using SoANbodyView = SoANbody::View;
110+
100111
int main(int argc, char** argv) {
101112
std::vector<void *> free_list;
102113

@@ -133,6 +144,14 @@ int main(int argc, char** argv) {
133144
free_list.push_back(buffer);
134145
}
135146

147+
for (auto n : N) {
148+
auto buffer = reinterpret_cast<std::byte *>(aligned_alloc(SoANbody::alignment, SoANbody::computeDataSize(n)));
149+
SoANbody nbodySoA(buffer, n);
150+
SoANbodyView nbodySoAView{nbodySoA};
151+
benchmark::RegisterBenchmark("BM_nbody", BM_nbody<SoANbodyView>, nbodySoAView)->Arg(n)->Unit(benchmark::kMillisecond);
152+
free_list.push_back(buffer);
153+
}
154+
136155
benchmark::Initialize(&argc, argv);
137156
benchmark::RunSpecifiedBenchmarks();
138157
benchmark::Shutdown();

soa_manual.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,31 @@ struct S64 {
226226
}
227227
};
228228

229+
struct Snbody {
230+
float* __restrict__ x, *__restrict__ y, *__restrict__ z;
231+
float* __restrict__ vx, *__restrict__ vy, *__restrict__ vz;
232+
233+
Snbody(std::byte* buf, size_t n) {
234+
size_t offset = 0;
235+
236+
x = reinterpret_cast<float* __restrict__>(buf);
237+
offset += align_size(n * sizeof(float));
238+
y = reinterpret_cast<float* __restrict__>(buf + offset);
239+
offset += align_size(n * sizeof(float));
240+
z = reinterpret_cast<float* __restrict__>(buf + offset);
241+
offset += align_size(n * sizeof(float));
242+
vx = reinterpret_cast<float* __restrict__>(buf + offset);
243+
offset += align_size(n * sizeof(float));
244+
vy = reinterpret_cast<float* __restrict__>(buf + offset);
245+
offset += align_size(n * sizeof(float));
246+
vz = reinterpret_cast<float* __restrict__>(buf + offset);
247+
}
248+
249+
static size_t size_bytes(size_t n) {
250+
return align_size(sizeof(float[n])) * 6;
251+
}
252+
};
253+
229254
int main(int argc, char** argv) {
230255
benchmark::Initialize(&argc, argv);
231256
std::vector<void *> free_list;
@@ -258,6 +283,13 @@ int main(int argc, char** argv) {
258283
free_list.push_back(buffer);
259284
}
260285

286+
for (auto n : N) {
287+
auto buffer = reinterpret_cast<std::byte * __restrict__>(std::aligned_alloc(Alignment, Snbody::size_bytes(n)));
288+
Snbody t64(buffer, n);
289+
benchmark::RegisterBenchmark("BM_nbody", BM_nbody<Snbody>, t64)->Arg(n)->Unit(benchmark::kMillisecond);
290+
free_list.push_back(buffer);
291+
}
292+
261293
benchmark::RunSpecifiedBenchmarks();
262294
benchmark::Shutdown();
263295

soa_wrapper.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ struct S64 {
4141
F<Eigen::Matrix3d> x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63;
4242
};
4343

44+
template <template <class> class F>
45+
struct Snbody {
46+
template<template <class> class F_new>
47+
operator Snbody<F_new>() { return {x, y, z, vx, vy, vz}; }
48+
F<float> x, y, z, vx, vy, vz;
49+
};
50+
4451
int main(int argc, char** argv) {
4552
constexpr wrapper::layout L = wrapper::layout::soa;
4653

@@ -86,6 +93,16 @@ int main(int argc, char** argv) {
8693
benchmark::RegisterBenchmark("BM_CPUHardRW", BM_CPUHardRW<wrapper_type>, t_span)->Arg(n)->Unit(benchmark::kMillisecond);
8794
}
8895

96+
for (std::size_t n : N) {
97+
// n * 6 * sizeof(float);
98+
std::size_t bytes = n * factory::get_size_in_bytes<Snbody, L>();
99+
buffer_pointers.emplace_back(new std::byte[bytes]);
100+
auto tnbody = factory::buffer_wrapper<Snbody, L>(buffer_pointers.back(), bytes);
101+
using wrapper_type = wrapper::wrapper<Snbody, std::span, L>;
102+
wrapper_type t_span(tnbody);
103+
benchmark::RegisterBenchmark("BM_nbody", BM_nbody<wrapper_type>, t_span)->Arg(n)->Unit(benchmark::kMillisecond);
104+
}
105+
89106
benchmark::Initialize(&argc, argv);
90107
benchmark::RunSpecifiedBenchmarks();
91108
benchmark::Shutdown();

wrapper/helper.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ constexpr std::size_t CountMembers() {
3535
if constexpr (detail::is_aggregate_constructible_from_n<Argument, 2>::value) return 2;
3636
else if constexpr (detail::is_aggregate_constructible_from_n<Argument, 10>::value) return 10;
3737
else if constexpr (detail::is_aggregate_constructible_from_n<Argument, 64>::value) return 64;
38+
else if constexpr (detail::is_aggregate_constructible_from_n<Argument, 6>::value) return 6;
3839
else return 100; // Silence warnings about missing return value
3940
}
4041

@@ -65,7 +66,11 @@ template <
6566
m40, m41, m42, m43, m44, m45, m46, m47, m48, m49,
6667
m50, m51, m52, m53, m54, m55, m56, m57, m58, m59,
6768
m60, m61, m62, m63);
68-
} else return void(); // Silence warnings about missing return value
69+
} else if constexpr (M == 6) {
70+
auto& [m00, m01, m02, m03, m04, m05] = arg;
71+
return f(m00, m01, m02, m03, m04, m05);
72+
}
73+
else return void(); // Silence warnings about missing return value
6974
}
7075

7176
template <

0 commit comments

Comments
 (0)