i'm still busy refactoring and i'm currently relying on intel's oneAPI and TBB for multithreading.
But you can check the code here https://github.com/ker2x/particle-life/tree/oneapi-dpl/particle_life/src , and perhaps backport the modification to a normal compiler and normal lib. (or i'll dot it myself some day i guess).
It's not fully optimized yet but, notable change :
- Using Vertex Buffer (vbo) instead of bruteforcing call to circle.
void Draw(colorGroup group)
{
ofSetColor(group.color);
vbo.setVertexData(group.pos.data(), group.pos.size(), GL_DYNAMIC_DRAW);
vbo.draw(GL_POINTS, 0, group.pos.size());
}
- Using SOA instead of AOS. better possible vectorization, and it was needed to efficiently use VBO anyway
struct colorGroup {
std::vector<ofVec2f> pos;
std::vector<float> vx;
std::vector<float> vy;
ofColor color;
};
void ofApp::interaction(colorGroup& Group1, const colorGroup& Group2,
const float G, const float radius, bool boundsToggle) const
{
assert(Group1.pos.size() % 64 == 0);
assert(Group2.pos.size() % 64 == 0);
const float g = G / -100; // attraction coefficient
// oneapi::tbb::parallel_for(
// oneapi::tbb::blocked_range<size_t>(0, group1size),
// [&Group1, &Group2, group1size, group2size, radius, g, this]
// (const oneapi::tbb::blocked_range<size_t>& r) {
for (size_t i = 0; i < Group1.pos.size(); i++)
{
float fx = 0; // force on x
float fy = 0; // force on y
for (size_t j = 0; j < Group2.pos.size(); j++)
{
const float distance = Group1.pos[i].distance(Group2.pos[j]);
if ((distance < radius)) {
const float force = 1 / std::max(std::numeric_limits<float>::epsilon(), distance); // avoid dividing by zero
fx += ((Group1.pos[i].x - Group2.pos[j].x) * force);
fy += ((Group1.pos[i].y - Group2.pos[j].y) * force);
}
}
// Wall Repel
if (wallRepel > 0.0F)
{
if (Group1.pos[i].x < wallRepel) Group1.vx[i] += (wallRepel - Group1.pos[i].x) * 0.1;
if (Group1.pos[i].x > boundWidth - wallRepel) Group1.vx[i] += (boundWidth - wallRepel - Group1.pos[i].x) * 0.1;
if (Group1.pos[i].y < wallRepel) Group1.vy[i] += (wallRepel - Group1.pos[i].y) * 0.1;
if (Group1.pos[i].y > boundHeight - wallRepel) Group1.vy[i] += (boundHeight - wallRepel - Group1.pos[i].y) * 0.1;
}
// Viscosity & gravity
Group1.vx[i] = (Group1.vx[i] + (fx * g)) * (1.0 - viscosity);
Group1.vy[i] = (Group1.vy[i] + (fy * g)) * (1.0 - viscosity) + worldGravity;
// Group1.vx[i] = std::fmaf(Group1.vx[i], (1.0F - viscosity), std::fmaf(fx, g, 0.0F));
// Group1.vy[i] = std::fmaf(Group1.vy[i], (1.0F - viscosity), std::fmaf(fy, g, worldGravity));
//Update position
Group1.pos[i].x += Group1.vx[i];
Group1.pos[i].y += Group1.vy[i];
}
if (boundsToggle) {
for (auto& p : Group1.pos)
{
p.x = std::min(std::max(p.x, 0.0F), static_cast<float>(boundWidth));
p.y = std::min(std::max(p.y, 0.0F), static_cast<float>(boundHeight));
}
}
}
i still have some crap to clean :)
- using oneapi::parallel_invoke for parallelization
oneapi::tbb::parallel_invoke(
[&] { interaction(red, red, powerSliderRR, vSliderRR, boundsToggle); },
[&] { interaction(red, green, powerSliderRR, vSliderRG, boundsToggle); },
[&] { interaction(red, blue, powerSliderRR, vSliderRB, boundsToggle); },
[&] { interaction(red, white, powerSliderRR, vSliderRW, boundsToggle); },
[&] { interaction(green, red, powerSliderGR, vSliderGR, boundsToggle); },
[&] { interaction(green, green, powerSliderGG, vSliderGG, boundsToggle); },
[&] { interaction(green, blue, powerSliderGB, vSliderGB, boundsToggle); },
[&] { interaction(green, white, powerSliderGW, vSliderGW, boundsToggle); },
[&] { interaction(blue, red, powerSliderBR, vSliderBR, boundsToggle); },
[&] { interaction(blue, green, powerSliderBG, vSliderBG, boundsToggle); },
[&] { interaction(blue, blue, powerSliderBB, vSliderBB, boundsToggle); },
[&] { interaction(blue, white, powerSliderBW, vSliderBW, boundsToggle); },
[&] { interaction(white, red, powerSliderWR, vSliderWR, boundsToggle); },
[&] { interaction(white, green, powerSliderWG, vSliderWG, boundsToggle); },
[&] { interaction(white, blue, powerSliderWB, vSliderWB, boundsToggle); },
[&] { interaction(white, white, powerSliderWW, vSliderWW, boundsToggle); }
);
this is me slowly learning to use oneAPI and SYCL in order to offload all the parallel code to the GPU in the future (in a new project)
The biggest performance improvement come from the use of SOA and VBO.
i'm still busy refactoring and i'm currently relying on intel's oneAPI and TBB for multithreading.
But you can check the code here https://github.com/ker2x/particle-life/tree/oneapi-dpl/particle_life/src , and perhaps backport the modification to a normal compiler and normal lib. (or i'll dot it myself some day i guess).
It's not fully optimized yet but, notable change :
it should also allow to add more color more easily (i hope)
major cleanup of interaction code
i still have some crap to clean :)
this is me slowly learning to use oneAPI and SYCL in order to offload all the parallel code to the GPU in the future (in a new project)
The biggest performance improvement come from the use of SOA and VBO.