Skip to content

Commit d3be0e0

Browse files
efaulhabersvchb
andauthored
Add full support for parallelization backends (#109)
* Add full support for parallelization backends * Fix tests * Update sph benchmark * Change version number to make tests pass * Fix tests * Fix benchmark tests * Fix remaining tests * Update src/neighborhood_search.jl Co-authored-by: Sven Berger <berger.sven@gmail.com> * Update src/neighborhood_search.jl Co-authored-by: Sven Berger <berger.sven@gmail.com> --------- Co-authored-by: Sven Berger <berger.sven@gmail.com>
1 parent 9f517d7 commit d3be0e0

17 files changed

Lines changed: 200 additions & 245 deletions

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PointNeighbors"
22
uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
33
authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
4-
version = "0.5"
4+
version = "0.4.9-dev"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

benchmarks/count_neighbors.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,20 @@ implementations are highlighted. On the other hand, this is the least realistic
1212
1313
For a computationally heavier benchmark, see [`benchmark_n_body`](@ref).
1414
"""
15-
function benchmark_count_neighbors(neighborhood_search, coordinates; parallel = true)
15+
function benchmark_count_neighbors(neighborhood_search, coordinates;
16+
parallelization_backend = default_backend(coordinates))
1617
n_neighbors = zeros(Int, size(coordinates, 2))
1718

18-
function count_neighbors!(n_neighbors, coordinates, neighborhood_search, parallel)
19+
function count_neighbors!(n_neighbors, coordinates, neighborhood_search,
20+
parallelization_backend)
1921
n_neighbors .= 0
2022

21-
foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
22-
parallel = parallel) do i, _, _, _
23+
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
24+
parallelization_backend) do i, _, _, _
2325
n_neighbors[i] += 1
2426
end
2527
end
2628

2729
return @belapsed $count_neighbors!($n_neighbors, $coordinates,
28-
$neighborhood_search, $parallel)
30+
$neighborhood_search, $parallelization_backend)
2931
end

benchmarks/n_body.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,26 @@ This is a more realistic benchmark for particle-based simulations than
1212
However, due to the higher computational cost, differences between neighborhood search
1313
implementations are less pronounced.
1414
"""
15-
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
15+
function benchmark_n_body(neighborhood_search, coordinates_;
16+
parallelization_backend = default_backend(coordinates_))
1617
# Passing a different backend like `CUDA.CUDABackend`
1718
# allows us to change the type of the array to run the benchmark on the GPU.
1819
# Passing `parallel = true` or `parallel = false` will not change anything here.
19-
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
20-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
20+
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
21+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
2122

2223
# This preserves the data type of `coordinates`, which makes it work for GPU types
2324
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
2425
G = 6.6743e-11
2526

2627
dv = similar(coordinates)
2728

28-
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search, parallel)
29+
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search,
30+
parallelization_backend)
2931
dv .= 0.0
3032

31-
foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
32-
parallel = parallel) do i, j, pos_diff, distance
33+
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
34+
parallelization_backend) do i, j, pos_diff, distance
3335
# Only consider particles with a distance > 0
3436
distance < sqrt(eps()) && return
3537

@@ -43,5 +45,6 @@ function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
4345
return dv
4446
end
4547

46-
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
48+
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs,
49+
$parallelization_backend)
4750
end

benchmarks/plot.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ include("benchmarks/benchmarks.jl")
3636
plot_benchmarks(benchmark_count_neighbors, (10, 10), 3)
3737
"""
3838
function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
39-
parallel = true, title = "",
39+
parallelization_backend = PolyesterBackend(), title = "",
4040
seed = 1, perturbation_factor_position = 1.0)
4141
neighborhood_searches_names = ["TrivialNeighborhoodSearch";;
4242
"GridNeighborhoodSearch";;
@@ -69,7 +69,7 @@ function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
6969
neighborhood_search = neighborhood_searches[i]
7070
initialize!(neighborhood_search, coordinates, coordinates)
7171

72-
time = benchmark(neighborhood_search, coordinates, parallel = parallel)
72+
time = benchmark(neighborhood_search, coordinates; parallelization_backend)
7373
times[iter, i] = time
7474
time_string = BenchmarkTools.prettytime(time * 1e9)
7575
println("$(neighborhood_searches_names[i])")

benchmarks/smoothed_particle_hydrodynamics.jl

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ A benchmark of the right-hand side of a full real-life Weakly Compressible
99
Smoothed Particle Hydrodynamics (WCSPH) simulation with TrixiParticles.jl.
1010
This method is used to simulate an incompressible fluid.
1111
"""
12-
function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
12+
function benchmark_wcsph(neighborhood_search, coordinates;
13+
parallelization_backend = default_backend(coordinates))
1314
density = 1000.0
1415
fluid = InitialCondition(; coordinates, density, mass = 0.1)
1516

@@ -34,19 +35,12 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
3435
smoothing_length, viscosity = viscosity,
3536
density_diffusion = density_diffusion)
3637

37-
# Note that we cannot just disable parallelism in TrixiParticles.
38-
# But passing a different backend like `CUDA.CUDABackend`
39-
# allows us to change the type of the array to run the benchmark on the GPU.
40-
if parallel isa Bool
41-
system = fluid_system
42-
nhs = neighborhood_search
43-
else
44-
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
45-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
46-
end
38+
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
39+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
4740

48-
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
49-
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
41+
v = PointNeighbors.Adapt.adapt(parallelization_backend,
42+
vcat(fluid.velocity, fluid.density'))
43+
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
5044
dv = zero(v)
5145

5246
# Initialize the system
@@ -61,7 +55,8 @@ end
6155
6256
Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
6357
"""
64-
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
58+
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
59+
parallelization_backend = default_backend(coordinates_))
6560
coordinates = convert(Matrix{Float32}, coordinates_)
6661
density = 1000.0f0
6762
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)
@@ -88,19 +83,12 @@ function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true
8883
acceleration = (0.0f0, 0.0f0, 0.0f0),
8984
density_diffusion = density_diffusion)
9085

91-
# Note that we cannot just disable parallelism in TrixiParticles.
92-
# But passing a different backend like `CUDA.CUDABackend`
93-
# allows us to change the type of the array to run the benchmark on the GPU.
94-
if parallel isa Bool
95-
system = fluid_system
96-
nhs = neighborhood_search
97-
else
98-
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
99-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
100-
end
86+
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
87+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
10188

102-
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
103-
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
89+
v = PointNeighbors.Adapt.adapt(parallelization_backend,
90+
vcat(fluid.velocity, fluid.density'))
91+
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
10492
dv = zero(v)
10593

10694
# Initialize the system
@@ -117,7 +105,8 @@ A benchmark of the right-hand side of a full real-life Total Lagrangian
117105
Smoothed Particle Hydrodynamics (TLSPH) simulation with TrixiParticles.jl.
118106
This method is used to simulate an elastic structure.
119107
"""
120-
function benchmark_tlsph(neighborhood_search, coordinates; parallel = true)
108+
function benchmark_tlsph(neighborhood_search, coordinates;
109+
parallelization_backend = default_backend(coordinates))
121110
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
122111
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
123112

benchmarks/update.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ include("../test/point_cloud.jl")
99
1010
Benchmark neighborhood search initialization with the given `coordinates`.
1111
"""
12-
function benchmark_initialize(neighborhood_search, coordinates; parallel = true)
12+
function benchmark_initialize(neighborhood_search, coordinates;
13+
parallelization_backend = default_backend(coordinates))
1314
return @belapsed $initialize!($neighborhood_search, $coordinates, $coordinates)
1415
end
1516

@@ -21,7 +22,8 @@ perturbed point clouds.
2122
2223
This is a good benchmark for incremental updates, since most particles stay in their cells.
2324
"""
24-
function benchmark_update_alternating(neighborhood_search, coordinates; parallel = true)
25+
function benchmark_update_alternating(neighborhood_search, coordinates;
26+
parallelization_backend = default_backend(coordinates))
2527
coordinates2 = copy(coordinates)
2628
# Perturb all coordinates with a perturbation factor of `0.015`.
2729
# This factor was tuned so that ~0.5% of the particles change their cell during an

src/PointNeighbors.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ using LinearAlgebra: dot
1111
using Polyester: Polyester
1212
@reexport using StaticArrays: SVector
1313

14-
include("util.jl")
1514
include("vector_of_vectors.jl")
15+
include("util.jl")
1616
include("neighborhood_search.jl")
1717
include("nhs_trivial.jl")
1818
include("cell_lists/cell_lists.jl")
@@ -25,9 +25,10 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo
2525
export DictionaryCellList, FullGridCellList
2626
export ParallelUpdate, SemiParallelUpdate, SerialIncrementalUpdate, SerialUpdate,
2727
ParallelIncrementalUpdate
28-
export requires_update, requires_resizing
28+
export requires_update
2929
export initialize!, update!, initialize_grid!, update_grid!
30-
export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
30+
export SerialBackend, PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend,
31+
default_backend
3132
export PeriodicBox, copy_neighborhood_search
3233

3334
end # module PointNeighbors

src/cell_lists/full_grid.jl

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ function Base.empty!(cell_list::FullGridCellList)
126126
(; cells) = cell_list
127127

128128
# `Base.empty!.(cells)`, but for all backends
129-
for i in eachindex(cells)
129+
@threaded default_backend(cells) for i in eachindex(cells)
130130
emptyat!(cells, i)
131131
end
132132

@@ -225,6 +225,22 @@ end
225225
@inline function check_cell_bounds(cell_list::FullGridCellList, cell::Tuple)
226226
(; linear_indices) = cell_list
227227

228+
# Make sure that points are not added to the outer padding layer, which is needed
229+
# to ensure that neighboring cells in all directions of all non-empty cells exist.
230+
if !all(cell[i] in 2:(size(linear_indices, i) - 1) for i in eachindex(cell))
231+
size_ = [2:(size(linear_indices, i) - 1) for i in eachindex(cell)]
232+
print_size_ = "[$(join(size_, ", "))]"
233+
error("particle coordinates are NaN or outside the domain bounds of the cell list\n" *
234+
"cell $cell is out of bounds for cell grid of size $print_size_")
235+
end
236+
end
237+
238+
# On GPUs, we can't throw a proper error message because string interpolation is not allowed
239+
@inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any,
240+
<:AbstractGPUArray}},
241+
cell::Tuple)
242+
(; linear_indices) = cell_list
243+
228244
# Make sure that points are not added to the outer padding layer, which is needed
229245
# to ensure that neighboring cells in all directions of all non-empty cells exist.
230246
if !all(cell[i] in 2:(size(linear_indices, i) - 1) for i in eachindex(cell))

src/gpu.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,3 @@ function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
3131
return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
3232
cell_size, update_buffer, nhs.update_strategy)
3333
end
34-
35-
# This is useful to pass the backend directly to `@threaded`
36-
KernelAbstractions.get_backend(backend::KernelAbstractions.Backend) = backend

0 commit comments

Comments
 (0)