-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathnhs_precomputed.jl
More file actions
142 lines (116 loc) · 6.29 KB
/
nhs_precomputed.jl
File metadata and controls
142 lines (116 loc) · 6.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@doc raw"""
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing, update_strategy = nothing)
Neighborhood search with precomputed neighbor lists. A list of all neighbors is computed
for each point during initialization and update.
This neighborhood search maximizes the performance of neighbor loops at the cost of a much
slower [`update!`](@ref).
A [`GridNeighborhoodSearch`](@ref) is used internally to compute the neighbor lists during
initialization and update.
# Arguments
- `NDIMS`: Number of dimensions.
# Keywords
- `search_radius = 0.0`: The fixed search radius. The default of `0.0` is useful together
with [`copy_neighborhood_search`](@ref).
- `n_points = 0`: Total number of points. The default of `0` is useful together
with [`copy_neighborhood_search`](@ref).
- `periodic_box = nothing`: In order to use a (rectangular) periodic domain, pass a
[`PeriodicBox`](@ref).
- `update_strategy`: Strategy to parallelize `update!` of the internally used
`GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref)
for available options.
"""
struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB} <: AbstractNeighborhoodSearch
neighborhood_search :: NHS
neighbor_lists :: NL
periodic_box :: PB
function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing,
update_strategy = nothing) where {NDIMS}
nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
periodic_box, update_strategy)
neighbor_lists = Vector{Vector{Int}}()
new{NDIMS, typeof(nhs),
typeof(neighbor_lists),
typeof(periodic_box)}(nhs, neighbor_lists, periodic_box)
end
end
@inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS
@inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true)
@inline function search_radius(search::PrecomputedNeighborhoodSearch)
return search_radius(search.neighborhood_search)
end
function initialize!(search::PrecomputedNeighborhoodSearch,
x::AbstractMatrix, y::AbstractMatrix;
parallelization_backend = default_backend(x),
eachindex_y = axes(y, 2))
(; neighborhood_search, neighbor_lists) = search
# Initialize grid NHS
initialize!(neighborhood_search, x, y; eachindex_y, parallelization_backend)
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
parallelization_backend, eachindex_y)
end
# WARNING! Experimental feature:
# By default, determine the parallelization backend from the type of `x`.
# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code
# on this backend. This can be useful to run GPU kernels on the CPU by passing
# `parallelization_backend = KernelAbstractions.CPU()`, even though `x isa Array`.
function update!(search::PrecomputedNeighborhoodSearch,
x::AbstractMatrix, y::AbstractMatrix;
points_moving = (true, true), parallelization_backend = default_backend(x),
eachindex_y = axes(y, 2))
(; neighborhood_search, neighbor_lists) = search
# Update grid NHS
update!(neighborhood_search, x, y; eachindex_y, points_moving, parallelization_backend)
# Skip update if both point sets are static
if any(points_moving)
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
parallelization_backend, eachindex_y)
end
end
function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
parallelization_backend, eachindex_y)
# Initialize neighbor lists
empty!(neighbor_lists)
resize!(neighbor_lists, size(x, 2))
for i in eachindex(neighbor_lists)
neighbor_lists[i] = Int[]
end
# Fill neighbor lists
foreach_point_neighbor(x, y, neighborhood_search; parallelization_backend,
points = eachindex_y) do point, neighbor, _, _
push!(neighbor_lists[point], neighbor)
end
end
@inline function foreach_neighbor(f, neighbor_system_coords,
neighborhood_search::PrecomputedNeighborhoodSearch,
point, point_coords, search_radius)
(; periodic_box, neighbor_lists) = neighborhood_search
neighbors = @inbounds neighbor_lists[point]
for neighbor_ in eachindex(neighbors)
neighbor = @inbounds neighbors[neighbor_]
# Making this `@inbounds` is not perfectly safe because
# `neighbor` (extracted from the neighbor list) is only guaranteed to be in bounds
# if the neighbor lists were constructed correctly and have not been corrupted.
# However, adding this `@inbounds` yields a ~20% speedup for TLSPH on GPUs (A4500).
neighbor_coords = @inbounds extract_svector(neighbor_system_coords,
Val(ndims(neighborhood_search)),
neighbor)
pos_diff = convert.(eltype(neighborhood_search), point_coords - neighbor_coords)
distance2 = dot(pos_diff, pos_diff)
pos_diff,
distance2 = compute_periodic_distance(pos_diff, distance2, search_radius,
periodic_box)
distance = sqrt(distance2)
# Inline to avoid loss of performance
# compared to not using `foreach_point_neighbor`.
@inline f(point, neighbor, pos_diff, distance)
end
end
function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
search_radius, n_points; eachpoint = 1:n_points)
update_strategy_ = nhs.neighborhood_search.update_strategy
return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points,
periodic_box = nhs.periodic_box,
update_strategy = update_strategy_)
end