Skip to content

Commit d74f4fa

Browse files
RudolfWeeberjngrad
authored andcommitted
Kokkos implementation of for_each_local_particle()
1 parent 6361c0d commit d74f4fa

File tree

4 files changed

+52
-11
lines changed

4 files changed

+52
-11
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,8 @@ if(ESPRESSO_BUILD_WITH_FFTW)
503503
endif()
504504

505505
if(ESPRESSO_BUILD_WITH_SHARED_MEMORY_PARALLELISM)
506+
find_package(OpenMP REQUIRED)
507+
506508
if(NOT EXISTS ${FETCHCONTENT_BASE_DIR}/kokkos-src)
507509
find_package(Kokkos 4.3 QUIET)
508510
endif()

src/core/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,11 @@ install(TARGETS espresso_core
9090
target_link_libraries(
9191
espresso_core
9292
PRIVATE
93-
espresso::config espresso::utils::mpi espresso::shapes espresso::cpp_flags
93+
espresso::config
94+
espresso::utils::mpi
95+
espresso::shapes
96+
espresso::cpp_flags
97+
$<$<BOOL:${ESPRESSO_BUILD_WITH_SHARED_MEMORY_PARALLELISM}>:OpenMP::OpenMP_CXX>
9498
$<$<BOOL:${ESPRESSO_BUILD_WITH_SHARED_MEMORY_PARALLELISM}>:Kokkos::kokkos>
9599
$<$<BOOL:${ESPRESSO_BUILD_WITH_SHARED_MEMORY_PARALLELISM}>:Cabana::Core>
96100
PUBLIC espresso::utils MPI::MPI_CXX Random123 espresso::particle_observables

src/core/cell_system/CellStructure.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
#include <utility>
5252
#include <vector>
5353

54+
#ifdef SHARED_MEMORY_PARALLELISM
55+
#include <Kokkos_Core.hpp>
56+
#endif
57+
5458
CellStructure::CellStructure(BoxGeometry const &box)
5559
: m_decomposition{std::make_unique<AtomDecomposition>(box)} {}
5660

@@ -327,3 +331,22 @@ void CellStructure::update_ghosts_and_resort_particle(unsigned data_parts) {
327331
ghosts_update(data_parts & ~resort_only_parts);
328332
}
329333
}
334+
335+
#ifdef SHARED_MEMORY_PARALLELISM
336+
void CellStructure::parallel_for_each_particle_impl(
337+
std::span<Cell *const> cells, ParticleUnaryOp &f) const {
338+
if (cells.size() > 1) {
339+
Kokkos::parallel_for( // loop over cells
340+
"for_each_local_particle", cells.size(), [&](int cell_idx) {
341+
for (auto &p : cells[cell_idx]->particles())
342+
f(p);
343+
});
344+
} else if (cells.size() == 1) {
345+
auto const &cell = cells[0];
346+
Kokkos::parallel_for( // loop over particles
347+
"for_each_local_particle", cell->particles().size(),
348+
[&](int part_idx) { f(*(cell->particles().begin() + part_idx)); });
349+
}
350+
}
351+
352+
#endif

src/core/cell_system/CellStructure.hpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <algorithm>
4646
#include <cassert>
4747
#include <concepts>
48+
#include <functional>
4849
#include <iterator>
4950
#include <memory>
5051
#include <optional>
@@ -54,10 +55,7 @@
5455
#include <utility>
5556
#include <vector>
5657

57-
template <typename Callable>
58-
concept ParticleCallback = requires(Callable c, Particle &p) {
59-
{ c(p) } -> std::same_as<void>;
60-
};
58+
using ParticleUnaryOp = std::function<void(Particle &)>;
6159

6260
namespace Cells {
6361
enum Resort : unsigned {
@@ -280,14 +278,26 @@ struct CellStructure : public System::Leaf<CellStructure> {
280278
ParticleRange ghost_particles() const {
281279
return Cells::particles(decomposition().ghost_cells());
282280
}
281+
/** @brief whether to use parallel version of for_each_local_particle */
282+
bool use_parallel_for_each_local_particle() const {
283+
#ifdef SHARED_MEMORY_PARALLELISM
284+
return true;
285+
#else
286+
return false;
287+
#endif
288+
}
283289

284290
/**
285291
* @brief Run a kernel on all local particles.
286292
* The kernel is assumed to be thread-safe.
287293
*/
288-
template <typename Kernel>
289-
requires ParticleCallback<Kernel>
290-
void for_each_local_particle(Kernel f) const {
294+
void for_each_local_particle(ParticleUnaryOp &&f) const {
295+
#ifdef SHARED_MEMORY_PARALLELISM
296+
if (use_parallel_for_each_local_particle()) {
297+
parallel_for_each_particle_impl(decomposition().local_cells(), f);
298+
return;
299+
}
300+
#endif
291301
for (auto &p : local_particles()) {
292302
f(p);
293303
}
@@ -297,14 +307,16 @@ struct CellStructure : public System::Leaf<CellStructure> {
297307
* @brief Run a kernel on all ghost particles.
298308
* The kernel is assumed to be thread-safe.
299309
*/
300-
template <typename Kernel>
301-
requires ParticleCallback<Kernel>
302-
void for_each_ghost_particle(Kernel f) const {
310+
void for_each_ghost_particle(ParticleUnaryOp &&f) const {
303311
for (auto &p : ghost_particles()) {
304312
f(p);
305313
}
306314
}
307315

316+
#ifdef SHARED_MEMORY_PARALLELISM
317+
void parallel_for_each_particle_impl(std::span<Cell *const> cells,
318+
ParticleUnaryOp &f) const;
319+
#endif
308320
private:
309321
/** Cell system dependent function to find the right cell for a
310322
* particle.

0 commit comments

Comments
 (0)