Skip to content

Commit b388fd6

Browse files
committed
Merge branch 'main' into ef/inactive-particles
2 parents a9c127a + 0a19569 commit b388fd6

17 files changed

Lines changed: 229 additions & 231 deletions

.github/workflows/SpellCheck.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ jobs:
1010
- name: Checkout Actions Repository
1111
uses: actions/checkout@v4
1212
- name: Check spelling
13-
uses: crate-ci/typos@v1.31.1
13+
uses: crate-ci/typos@v1.31.2

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PointNeighbors"
22
uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
33
authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
4-
version = "0.4.12-pre"
4+
version = "0.4.9-dev"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

benchmarks/count_neighbors.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,20 @@ implementations are highlighted. On the other hand, this is the least realistic
1212
1313
For a computationally heavier benchmark, see [`benchmark_n_body`](@ref).
1414
"""
15-
function benchmark_count_neighbors(neighborhood_search, coordinates; parallel = true)
15+
function benchmark_count_neighbors(neighborhood_search, coordinates;
16+
parallelization_backend = default_backend(coordinates))
1617
n_neighbors = zeros(Int, size(coordinates, 2))
1718

18-
function count_neighbors!(n_neighbors, coordinates, neighborhood_search, parallel)
19+
function count_neighbors!(n_neighbors, coordinates, neighborhood_search,
20+
parallelization_backend)
1921
n_neighbors .= 0
2022

21-
foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
22-
parallel = parallel) do i, _, _, _
23+
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
24+
parallelization_backend) do i, _, _, _
2325
n_neighbors[i] += 1
2426
end
2527
end
2628

2729
return @belapsed $count_neighbors!($n_neighbors, $coordinates,
28-
$neighborhood_search, $parallel)
30+
$neighborhood_search, $parallelization_backend)
2931
end

benchmarks/n_body.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,26 @@ This is a more realistic benchmark for particle-based simulations than
1212
However, due to the higher computational cost, differences between neighborhood search
1313
implementations are less pronounced.
1414
"""
15-
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
15+
function benchmark_n_body(neighborhood_search, coordinates_;
16+
parallelization_backend = default_backend(coordinates_))
1617
# Passing a different backend like `CUDA.CUDABackend`
1718
# allows us to change the type of the array to run the benchmark on the GPU.
1819
# Passing `parallel = true` or `parallel = false` will not change anything here.
19-
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
20-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
20+
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
21+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
2122

2223
# This preserves the data type of `coordinates`, which makes it work for GPU types
2324
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
2425
G = 6.6743e-11
2526

2627
dv = similar(coordinates)
2728

28-
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search, parallel)
29+
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search,
30+
parallelization_backend)
2931
dv .= 0.0
3032

31-
foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
32-
parallel = parallel) do i, j, pos_diff, distance
33+
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
34+
parallelization_backend) do i, j, pos_diff, distance
3335
# Only consider particles with a distance > 0
3436
distance < sqrt(eps()) && return
3537

@@ -43,5 +45,6 @@ function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
4345
return dv
4446
end
4547

46-
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
48+
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs,
49+
$parallelization_backend)
4750
end

benchmarks/plot.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ include("benchmarks/benchmarks.jl")
3636
plot_benchmarks(benchmark_count_neighbors, (10, 10), 3)
3737
"""
3838
function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
39-
parallel = true, title = "",
39+
parallelization_backend = PolyesterBackend(), title = "",
4040
seed = 1, perturbation_factor_position = 1.0)
4141
neighborhood_searches_names = ["TrivialNeighborhoodSearch";;
4242
"GridNeighborhoodSearch";;
@@ -69,7 +69,7 @@ function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
6969
neighborhood_search = neighborhood_searches[i]
7070
initialize!(neighborhood_search, coordinates, coordinates)
7171

72-
time = benchmark(neighborhood_search, coordinates, parallel = parallel)
72+
time = benchmark(neighborhood_search, coordinates; parallelization_backend)
7373
times[iter, i] = time
7474
time_string = BenchmarkTools.prettytime(time * 1e9)
7575
println("$(neighborhood_searches_names[i])")

benchmarks/smoothed_particle_hydrodynamics.jl

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ A benchmark of the right-hand side of a full real-life Weakly Compressible
99
Smoothed Particle Hydrodynamics (WCSPH) simulation with TrixiParticles.jl.
1010
This method is used to simulate an incompressible fluid.
1111
"""
12-
function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
12+
function benchmark_wcsph(neighborhood_search, coordinates;
13+
parallelization_backend = default_backend(coordinates))
1314
density = 1000.0
1415
fluid = InitialCondition(; coordinates, density, mass = 0.1)
1516

@@ -34,19 +35,12 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
3435
smoothing_length, viscosity = viscosity,
3536
density_diffusion = density_diffusion)
3637

37-
# Note that we cannot just disable parallelism in TrixiParticles.
38-
# But passing a different backend like `CUDA.CUDABackend`
39-
# allows us to change the type of the array to run the benchmark on the GPU.
40-
if parallel isa Bool
41-
system = fluid_system
42-
nhs = neighborhood_search
43-
else
44-
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
45-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
46-
end
38+
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
39+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
4740

48-
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
49-
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
41+
v = PointNeighbors.Adapt.adapt(parallelization_backend,
42+
vcat(fluid.velocity, fluid.density'))
43+
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
5044
dv = zero(v)
5145

5246
# Initialize the system
@@ -61,7 +55,8 @@ end
6155
6256
Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
6357
"""
64-
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
58+
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
59+
parallelization_backend = default_backend(coordinates_))
6560
coordinates = convert(Matrix{Float32}, coordinates_)
6661
density = 1000.0f0
6762
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)
@@ -88,19 +83,12 @@ function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true
8883
acceleration = (0.0f0, 0.0f0, 0.0f0),
8984
density_diffusion = density_diffusion)
9085

91-
# Note that we cannot just disable parallelism in TrixiParticles.
92-
# But passing a different backend like `CUDA.CUDABackend`
93-
# allows us to change the type of the array to run the benchmark on the GPU.
94-
if parallel isa Bool
95-
system = fluid_system
96-
nhs = neighborhood_search
97-
else
98-
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
99-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
100-
end
86+
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
87+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
10188

102-
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
103-
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
89+
v = PointNeighbors.Adapt.adapt(parallelization_backend,
90+
vcat(fluid.velocity, fluid.density'))
91+
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
10492
dv = zero(v)
10593

10694
# Initialize the system
@@ -117,7 +105,8 @@ A benchmark of the right-hand side of a full real-life Total Lagrangian
117105
Smoothed Particle Hydrodynamics (TLSPH) simulation with TrixiParticles.jl.
118106
This method is used to simulate an elastic structure.
119107
"""
120-
function benchmark_tlsph(neighborhood_search, coordinates; parallel = true)
108+
function benchmark_tlsph(neighborhood_search, coordinates;
109+
parallelization_backend = default_backend(coordinates))
121110
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
122111
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
123112

benchmarks/update.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ include("../test/point_cloud.jl")
99
1010
Benchmark neighborhood search initialization with the given `coordinates`.
1111
"""
12-
function benchmark_initialize(neighborhood_search, coordinates; parallel = true)
12+
function benchmark_initialize(neighborhood_search, coordinates;
13+
parallelization_backend = default_backend(coordinates))
1314
return @belapsed $initialize!($neighborhood_search, $coordinates, $coordinates)
1415
end
1516

@@ -21,7 +22,8 @@ perturbed point clouds.
2122
2223
This is a good benchmark for incremental updates, since most particles stay in their cells.
2324
"""
24-
function benchmark_update_alternating(neighborhood_search, coordinates; parallel = true)
25+
function benchmark_update_alternating(neighborhood_search, coordinates;
26+
parallelization_backend = default_backend(coordinates))
2527
coordinates2 = copy(coordinates)
2628
# Perturb all coordinates with a perturbation factor of `0.015`.
2729
# This factor was tuned so that ~0.5% of the particles change their cell during an

src/PointNeighbors.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ using LinearAlgebra: dot
1111
using Polyester: Polyester
1212
@reexport using StaticArrays: SVector
1313

14-
include("util.jl")
1514
include("vector_of_vectors.jl")
15+
include("util.jl")
1616
include("neighborhood_search.jl")
1717
include("nhs_trivial.jl")
1818
include("cell_lists/cell_lists.jl")
@@ -25,8 +25,10 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo
2525
export DictionaryCellList, FullGridCellList
2626
export ParallelUpdate, SemiParallelUpdate, SerialIncrementalUpdate, SerialUpdate,
2727
ParallelIncrementalUpdate
28-
export requires_update, initialize!, update!, initialize_grid!, update_grid!
29-
export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
28+
export requires_update
29+
export initialize!, update!, initialize_grid!, update_grid!
30+
export SerialBackend, PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend,
31+
default_backend
3032
export PeriodicBox, copy_neighborhood_search
3133

3234
end # module PointNeighbors

src/cell_lists/full_grid.jl

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,19 +126,7 @@ function Base.empty!(cell_list::FullGridCellList)
126126
(; cells) = cell_list
127127

128128
# `Base.empty!.(cells)`, but for all backends
129-
@threaded cells for i in eachindex(cells)
130-
emptyat!(cells, i)
131-
end
132-
133-
return cell_list
134-
end
135-
136-
# TODO Let `@threaded` use `get_backend`
137-
function Base.empty!(cell_list::FullGridCellList{<:DynamicVectorOfVectors})
138-
(; cells) = cell_list
139-
140-
# `Base.empty!.(cells)`, but for all backends
141-
@threaded cells.backend for i in eachindex(cells)
129+
@threaded default_backend(cells) for i in eachindex(cells)
142130
emptyat!(cells, i)
143131
end
144132

@@ -234,6 +222,24 @@ function max_points_per_cell(cells)
234222
return 100
235223
end
236224

225+
@inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any,
226+
<:Array}},
227+
cell::Tuple)
228+
(; linear_indices) = cell_list
229+
230+
# Make sure that points are not added to the outer padding layer, which is needed
231+
# to ensure that neighboring cells in all directions of all non-empty cells exist.
232+
if !all(cell[i] in 2:(size(linear_indices, i) - 1) for i in eachindex(cell))
233+
size_ = [2:(size(linear_indices, i) - 1) for i in eachindex(cell)]
234+
print_size_ = "[$(join(size_, ", "))]"
235+
error("particle coordinates are NaN or outside the domain bounds of the cell list\n" *
236+
"cell $cell is out of bounds for cell grid of size $print_size_")
237+
end
238+
end
239+
240+
# On GPUs, we can't throw a proper error message because string interpolation is not
241+
# allowed. Note that we cannot dispatch on `AbstractGPUArray`, as we are inside a kernel,
242+
# so the array types are something like `CuDeviceArray`, which is not an `AbstractGPUArray`.
237243
@inline function check_cell_bounds(cell_list::FullGridCellList, cell::Tuple)
238244
(; linear_indices) = cell_list
239245

src/gpu.jl

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,3 @@ function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
3131
return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
3232
cell_size, update_buffer, nhs.update_strategy)
3333
end
34-
35-
# This is useful to pass the backend directly to `@threaded`
36-
@inline KernelAbstractions.get_backend(backend::KernelAbstractions.Backend) = backend
37-
38-
@inline function KernelAbstractions.get_backend(vov::DynamicVectorOfVectors)
39-
return KernelAbstractions.get_backend(vov.backend)
40-
end

0 commit comments

Comments
 (0)