Skip to content

Commit cb668fa

Browse files
authored
Merge branch 'main' into 0.6.4
2 parents cbcbd1e + bfc3628 commit cb668fa

File tree

10 files changed

+184
-56
lines changed

10 files changed

+184
-56
lines changed

benchmarks/n_body.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ function benchmark_n_body(neighborhood_search, coordinates_;
1818
# Passing a different backend like `CUDA.CUDABackend`
1919
# allows us to change the type of the array to run the benchmark on the GPU.
2020
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
21-
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
21+
22+
# Remove unnecessary data structures that are only used for initialization
23+
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
24+
25+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
2226

2327
# This preserves the data type of `coordinates`, which makes it work for GPU types
2428
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)

benchmarks/run_benchmarks.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ end
149149
Shortcut to call [`run_benchmark`](@ref) with all GPU-compatible neighborhood search
150150
implementations:
151151
- `GridNeighborhoodSearch` with `FullGridCellList`
152+
- `PrecomputedNeighborhoodSearch`
152153
153154
# Arguments
154155
- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref),
@@ -174,13 +175,15 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs
174175

175176
min_corner = 0.0f0 .* n_points_per_dimension
176177
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
177-
neighborhood_searches = [
178-
GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
179-
cell_list = FullGridCellList(; search_radius = 0.0f0,
180-
min_corner, max_corner))
181-
]
182-
183-
names = ["GridNeighborhoodSearch with FullGridCellList";;]
178+
neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
179+
cell_list = FullGridCellList(;
180+
search_radius = 0.0f0,
181+
min_corner,
182+
max_corner))
183+
PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)]
184+
185+
names = ["GridNeighborhoodSearch with FullGridCellList";;
186+
"PrecomputedNeighborhoodSearch"]
184187

185188
run_benchmark(benchmark, n_points_per_dimension, iterations,
186189
neighborhood_searches; names, kwargs...)

benchmarks/smoothed_particle_hydrodynamics.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
8686
density_diffusion = density_diffusion)
8787

8888
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
89-
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
89+
90+
# Remove unnecessary data structures that are only used for initialization
91+
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
92+
93+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
9094
semi = DummySemidiscretization(nhs, parallelization_backend, true)
9195

9296
v = PointNeighbors.Adapt.adapt(parallelization_backend,

src/cell_lists/full_grid.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ See [`copy_neighborhood_search`](@ref) for more details.
2121
- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store the actual
2222
cell lists. Can be
2323
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
24-
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits.
24+
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
25+
and GPU-compatible.
2526
- `max_points_per_cell = 100`: Maximum number of points per cell. This will be used to
2627
allocate the `DynamicVectorOfVectors`. It is not used with
2728
the `Vector{Vector{Int32}}` backend.
@@ -186,7 +187,7 @@ function copy_cell_list(cell_list::FullGridCellList, search_radius, periodic_box
186187

187188
return FullGridCellList(; min_corner, max_corner, search_radius,
188189
backend = typeof(cell_list.cells),
189-
max_points_per_cell = max_points_per_cell(cell_list.cells))
190+
max_points_per_cell = max_inner_length(cell_list.cells, 100))
190191
end
191192

192193
@inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any,

src/cell_lists/spatial_hashing.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""
2-
SpatialHashingCellList{NDIMS}(; list_size,
2+
SpatialHashingCellList{NDIMS}(; list_size,
33
backend = DynamicVectorOfVectors{Int32},
44
max_points_per_cell = 100)
55
6-
A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells,
7-
and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell,
8-
as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells,
9-
the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles
6+
A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells,
7+
and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell,
8+
as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells,
9+
the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles
1010
to balance memory consumption against the likelihood of hash collisions.
1111
1212
# Arguments
@@ -128,7 +128,8 @@ function copy_cell_list(cell_list::SpatialHashingCellList, search_radius,
128128

129129
return SpatialHashingCellList{NDIMS}(list_size = list_size,
130130
backend = typeof(cell_list.cells),
131-
max_points_per_cell = max_points_per_cell(cell_list.cells))
131+
max_points_per_cell = max_inner_length(cell_list.cells,
132+
100))
132133
end
133134

134135
@inline function Base.getindex(cell_list::SpatialHashingCellList, cell::Tuple)

src/gpu.jl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# `Adapt.@adapt_structure` automatically generates the `adapt` function for our custom types.
1010
Adapt.@adapt_structure FullGridCellList
1111
Adapt.@adapt_structure DynamicVectorOfVectors
12+
Adapt.@adapt_structure GridNeighborhoodSearch
1213

1314
# `adapt(CuArray, ::SVector)::SVector`, but `adapt(Array, ::SVector)::Vector`.
1415
# We don't want to change the type of the `SVector` here.
@@ -22,14 +23,14 @@ function Adapt.adapt_structure(to::typeof(Array), range::UnitRange)
2223
return range
2324
end
2425

25-
function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
26-
(; search_radius, periodic_box, n_cells, cell_size) = nhs
26+
function Adapt.adapt_structure(to, nhs::PrecomputedNeighborhoodSearch)
27+
neighbor_lists = Adapt.adapt_structure(to, nhs.neighbor_lists)
28+
search_radius = Adapt.adapt_structure(to, nhs.search_radius)
29+
periodic_box = Adapt.adapt_structure(to, nhs.periodic_box)
30+
neighborhood_search = Adapt.adapt_structure(to, nhs.neighborhood_search)
2731

28-
cell_list = Adapt.adapt_structure(to, nhs.cell_list)
29-
update_buffer = Adapt.adapt_structure(to, nhs.update_buffer)
30-
31-
return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
32-
cell_size, update_buffer, nhs.update_strategy)
32+
return PrecomputedNeighborhoodSearch{ndims(nhs)}(neighbor_lists, search_radius,
33+
periodic_box, neighborhood_search)
3334
end
3435

3536
function Adapt.adapt_structure(to, cell_list::SpatialHashingCellList{NDIMS}) where {NDIMS}

src/neighborhood_search.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ GridNeighborhoodSearch{2, Float64, ...}(...)
109109
return nothing
110110
end
111111

112+
@inline function freeze_neighborhood_search(search::AbstractNeighborhoodSearch)
113+
# Indicate that the neighborhood search is static and will not be updated anymore.
114+
# Some implementations might use this to strip unnecessary data structures for updating.
115+
# Notably, this is used for the `PrecomputedNeighborhoodSearch` to strip a potentially
116+
# not GPU-compatible inner neighborhood search that is used only for initialization.
117+
return search
118+
end
119+
112120
"""
113121
PeriodicBox(; min_corner, max_corner)
114122

src/nhs_precomputed.jl

Lines changed: 119 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
@doc raw"""
22
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
3-
periodic_box = nothing, update_strategy = nothing)
3+
periodic_box = nothing, update_strategy = nothing,
4+
update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(),
5+
backend = DynamicVectorOfVectors{Int32},
6+
max_neighbors = max_neighbors(NDIMS))
47
58
Neighborhood search with precomputed neighbor lists. A list of all neighbors is computed
69
for each point during initialization and update.
@@ -10,6 +13,9 @@ slower [`update!`](@ref).
1013
A [`GridNeighborhoodSearch`](@ref) is used internally to compute the neighbor lists during
1114
initialization and update.
1215
16+
When used on the GPU, use `freeze_neighborhood_search` after the initialization
17+
to strip the internal neighborhood search, which is not needed anymore.
18+
1319
# Arguments
1420
- `NDIMS`: Number of dimensions.
1521
@@ -22,71 +28,118 @@ initialization and update.
2228
[`PeriodicBox`](@ref).
2329
- `update_strategy`: Strategy to parallelize `update!` of the internally used
2430
`GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref)
25-
for available options.
31+
for available options. This is only used for the default value
32+
of `update_neighborhood_search` below.
33+
- `update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; periodic_box, update_strategy)`:
34+
The neighborhood search used to compute the neighbor lists.
35+
By default, a [`GridNeighborhoodSearch`](@ref) is used.
36+
If the precomputed NHS is to be used on the GPU, make sure to
37+
either freeze it after initialization and never update it again,
38+
or pass a GPU-compatible neighborhood search here.
39+
- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store
40+
the neighbor lists. Can be
41+
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
42+
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
43+
and GPU-compatible.
44+
- `max_neighbors`: Maximum number of neighbors per particle. This will be used to
45+
allocate the `DynamicVectorOfVectors`. It is not used with
46+
other backends. The default is 64 in 2D and 324 in 3D.
2647
"""
27-
struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB} <: AbstractNeighborhoodSearch
28-
neighborhood_search :: NHS
48+
struct PrecomputedNeighborhoodSearch{NDIMS, NL, ELTYPE, PB, NHS} <:
49+
AbstractNeighborhoodSearch
2950
neighbor_lists :: NL
51+
search_radius :: ELTYPE
3052
periodic_box :: PB
53+
neighborhood_search :: NHS
3154

32-
function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
33-
periodic_box = nothing,
34-
update_strategy = nothing) where {NDIMS}
35-
nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
36-
periodic_box, update_strategy)
55+
function PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
56+
periodic_box,
57+
update_neighborhood_search) where {NDIMS}
58+
return new{NDIMS, typeof(neighbor_lists),
59+
typeof(search_radius),
60+
typeof(periodic_box),
61+
typeof(update_neighborhood_search)}(neighbor_lists, search_radius,
62+
periodic_box,
63+
update_neighborhood_search)
64+
end
65+
end
3766

38-
neighbor_lists = Vector{Vector{Int}}()
67+
function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
68+
periodic_box = nothing,
69+
update_strategy = nothing,
70+
update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(;
71+
search_radius,
72+
n_points,
73+
periodic_box,
74+
update_strategy),
75+
backend = DynamicVectorOfVectors{Int32},
76+
max_neighbors = max_neighbors(NDIMS)) where {NDIMS}
77+
neighbor_lists = construct_backend(backend, n_points, max_neighbors)
78+
79+
PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
80+
periodic_box, update_neighborhood_search)
81+
end
3982

40-
new{NDIMS, typeof(nhs),
41-
typeof(neighbor_lists),
42-
typeof(periodic_box)}(nhs, neighbor_lists, periodic_box)
83+
# Default values for maximum neighbor count
84+
function max_neighbors(NDIMS)
85+
if NDIMS == 1
86+
return 32
87+
elseif NDIMS == 2
88+
return 64
89+
elseif NDIMS == 3
90+
return 320
4391
end
92+
93+
throw(ArgumentError("`NDIMS` must be 1, 2, or 3"))
4494
end
4595

4696
@inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS
4797

4898
@inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true)
4999

50-
@inline function search_radius(search::PrecomputedNeighborhoodSearch)
51-
return search_radius(search.neighborhood_search)
52-
end
53-
54100
function initialize!(search::PrecomputedNeighborhoodSearch,
55101
x::AbstractMatrix, y::AbstractMatrix;
56102
parallelization_backend = default_backend(x),
57103
eachindex_y = axes(y, 2))
58104
(; neighborhood_search, neighbor_lists) = search
59105

106+
if eachindex_y != axes(y, 2)
107+
error("this neighborhood search does not support inactive points")
108+
end
109+
60110
# Initialize grid NHS
61-
initialize!(neighborhood_search, x, y; eachindex_y, parallelization_backend)
111+
initialize!(neighborhood_search, x, y; parallelization_backend)
62112

63113
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
64-
parallelization_backend, eachindex_y)
114+
parallelization_backend)
115+
116+
return search
65117
end
66118

67-
# WARNING! Experimental feature:
68-
# By default, determine the parallelization backend from the type of `x`.
69-
# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code
70-
# on this backend. This can be useful to run GPU kernels on the CPU by passing
71-
# `parallelization_backend = KernelAbstractions.CPU()`, even though `x isa Array`.
72119
function update!(search::PrecomputedNeighborhoodSearch,
73120
x::AbstractMatrix, y::AbstractMatrix;
74121
points_moving = (true, true), parallelization_backend = default_backend(x),
75122
eachindex_y = axes(y, 2))
76123
(; neighborhood_search, neighbor_lists) = search
77124

78-
# Update grid NHS
79-
update!(neighborhood_search, x, y; eachindex_y, points_moving, parallelization_backend)
125+
if eachindex_y != axes(y, 2)
126+
error("this neighborhood search does not support inactive points")
127+
end
128+
129+
# Update the internal neighborhood search
130+
update!(neighborhood_search, x, y; points_moving, parallelization_backend)
80131

81132
# Skip update if both point sets are static
82133
if any(points_moving)
83134
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
84-
parallelization_backend, eachindex_y)
135+
parallelization_backend)
85136
end
137+
138+
return search
86139
end
87140

88141
function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
89-
parallelization_backend, eachindex_y)
142+
parallelization_backend)
90143
# Initialize neighbor lists
91144
empty!(neighbor_lists)
92145
resize!(neighbor_lists, size(x, 2))
@@ -95,12 +148,28 @@ function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
95148
end
96149

97150
# Fill neighbor lists
98-
foreach_point_neighbor(x, y, neighborhood_search; parallelization_backend,
99-
points = eachindex_y) do point, neighbor, _, _
151+
foreach_point_neighbor(x, y, neighborhood_search;
152+
parallelization_backend) do point, neighbor, _, _
100153
push!(neighbor_lists[point], neighbor)
101154
end
102155
end
103156

157+
function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors,
158+
neighborhood_search, x, y, parallelization_backend)
159+
resize!(neighbor_lists, size(x, 2))
160+
161+
# `Base.empty!.(neighbor_lists)`, but for all backends
162+
@threaded parallelization_backend for i in eachindex(neighbor_lists)
163+
emptyat!(neighbor_lists, i)
164+
end
165+
166+
# Fill neighbor lists
167+
foreach_point_neighbor(x, y, neighborhood_search;
168+
parallelization_backend) do point, neighbor, _, _
169+
pushat!(neighbor_lists, point, neighbor)
170+
end
171+
end
172+
104173
@inline function foreach_neighbor(f, neighbor_system_coords,
105174
neighborhood_search::PrecomputedNeighborhoodSearch,
106175
point, point_coords, search_radius)
@@ -135,8 +204,26 @@ end
135204

136205
function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
137206
search_radius, n_points; eachpoint = 1:n_points)
138-
update_strategy_ = nhs.neighborhood_search.update_strategy
207+
update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search,
208+
search_radius, n_points;
209+
eachpoint)
210+
211+
# For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback.
212+
# This should never be used because this backend doesn't require a `max_neighbors`.
213+
max_neighbors_ = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs)))
139214
return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points,
140215
periodic_box = nhs.periodic_box,
141-
update_strategy = update_strategy_)
216+
update_neighborhood_search,
217+
backend = typeof(nhs.neighbor_lists),
218+
max_neighbors = max_neighbors_)
219+
end
220+
221+
@inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch)
222+
# Indicate that the neighborhood search is static and will not be updated anymore.
223+
# For the `PrecomputedNeighborhoodSearch`, strip the inner neighborhood search,
224+
# which is used only for initialization and updating.
225+
return PrecomputedNeighborhoodSearch{ndims(search)}(search.neighbor_lists,
226+
search.search_radius,
227+
search.periodic_box,
228+
nothing)
142229
end

0 commit comments

Comments
 (0)