Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PointNeighbors"
uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
version = "0.5"
version = "0.4.9-dev"
Comment thread
svchb marked this conversation as resolved.

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
12 changes: 7 additions & 5 deletions benchmarks/count_neighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,20 @@ implementations are highlighted. On the other hand, this is the least realistic

For a computationally heavier benchmark, see [`benchmark_n_body`](@ref).
"""
function benchmark_count_neighbors(neighborhood_search, coordinates; parallel = true)
function benchmark_count_neighbors(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
n_neighbors = zeros(Int, size(coordinates, 2))

function count_neighbors!(n_neighbors, coordinates, neighborhood_search, parallel)
function count_neighbors!(n_neighbors, coordinates, neighborhood_search,
parallelization_backend)
n_neighbors .= 0

foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
parallel = parallel) do i, _, _, _
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
parallelization_backend) do i, _, _, _
n_neighbors[i] += 1
end
end

return @belapsed $count_neighbors!($n_neighbors, $coordinates,
$neighborhood_search, $parallel)
$neighborhood_search, $parallelization_backend)
end
17 changes: 10 additions & 7 deletions benchmarks/n_body.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,26 @@ This is a more realistic benchmark for particle-based simulations than
However, due to the higher computational cost, differences between neighborhood search
implementations are less pronounced.
"""
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
function benchmark_n_body(neighborhood_search, coordinates_;
parallelization_backend = default_backend(coordinates_))
# Passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
# Passing `parallel = true` or `parallel = false` will not change anything here.
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)

# This preserves the data type of `coordinates`, which makes it work for GPU types
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
G = 6.6743e-11

dv = similar(coordinates)

function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search, parallel)
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search,
parallelization_backend)
dv .= 0.0

foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
parallel = parallel) do i, j, pos_diff, distance
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
parallelization_backend) do i, j, pos_diff, distance
# Only consider particles with a distance > 0
distance < sqrt(eps()) && return

Expand All @@ -43,5 +45,6 @@ function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
return dv
end

return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs,
$parallelization_backend)
end
4 changes: 2 additions & 2 deletions benchmarks/plot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ include("benchmarks/benchmarks.jl")
plot_benchmarks(benchmark_count_neighbors, (10, 10), 3)
"""
function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
parallel = true, title = "",
parallelization_backend = PolyesterBackend(), title = "",
seed = 1, perturbation_factor_position = 1.0)
neighborhood_searches_names = ["TrivialNeighborhoodSearch";;
"GridNeighborhoodSearch";;
Expand Down Expand Up @@ -69,7 +69,7 @@ function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
neighborhood_search = neighborhood_searches[i]
initialize!(neighborhood_search, coordinates, coordinates)

time = benchmark(neighborhood_search, coordinates, parallel = parallel)
time = benchmark(neighborhood_search, coordinates; parallelization_backend)
times[iter, i] = time
time_string = BenchmarkTools.prettytime(time * 1e9)
println("$(neighborhood_searches_names[i])")
Expand Down
43 changes: 16 additions & 27 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ A benchmark of the right-hand side of a full real-life Weakly Compressible
Smoothed Particle Hydrodynamics (WCSPH) simulation with TrixiParticles.jl.
This method is used to simulate an incompressible fluid.
"""
function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
function benchmark_wcsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
density = 1000.0
fluid = InitialCondition(; coordinates, density, mass = 0.1)

Expand All @@ -34,19 +35,12 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
smoothing_length, viscosity = viscosity,
density_diffusion = density_diffusion)

# Note that we cannot just disable parallelism in TrixiParticles.
# But passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
if parallel isa Bool
system = fluid_system
nhs = neighborhood_search
else
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
end
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)

v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
v = PointNeighbors.Adapt.adapt(parallelization_backend,
vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
dv = zero(v)

# Initialize the system
Expand All @@ -61,7 +55,8 @@ end

Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
parallelization_backend = default_backend(coordinates_))
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)
Expand All @@ -88,19 +83,12 @@ function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true
acceleration = (0.0f0, 0.0f0, 0.0f0),
density_diffusion = density_diffusion)

# Note that we cannot just disable parallelism in TrixiParticles.
# But passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
if parallel isa Bool
system = fluid_system
nhs = neighborhood_search
else
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
end
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)

v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
v = PointNeighbors.Adapt.adapt(parallelization_backend,
vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
dv = zero(v)

# Initialize the system
Expand All @@ -117,7 +105,8 @@ A benchmark of the right-hand side of a full real-life Total Lagrangian
Smoothed Particle Hydrodynamics (TLSPH) simulation with TrixiParticles.jl.
This method is used to simulate an elastic structure.
"""
function benchmark_tlsph(neighborhood_search, coordinates; parallel = true)
function benchmark_tlsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)

Expand Down
6 changes: 4 additions & 2 deletions benchmarks/update.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ include("../test/point_cloud.jl")

Benchmark neighborhood search initialization with the given `coordinates`.
"""
function benchmark_initialize(neighborhood_search, coordinates; parallel = true)
function benchmark_initialize(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
return @belapsed $initialize!($neighborhood_search, $coordinates, $coordinates)
end

Expand All @@ -21,7 +22,8 @@ perturbed point clouds.

This is a good benchmark for incremental updates, since most particles stay in their cells.
"""
function benchmark_update_alternating(neighborhood_search, coordinates; parallel = true)
function benchmark_update_alternating(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
coordinates2 = copy(coordinates)
# Perturb all coordinates with a perturbation factor of `0.015`.
# This factor was tuned so that ~0.5% of the particles change their cell during an
Expand Down
7 changes: 4 additions & 3 deletions src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ using LinearAlgebra: dot
using Polyester: Polyester
@reexport using StaticArrays: SVector

include("util.jl")
include("vector_of_vectors.jl")
include("util.jl")
include("neighborhood_search.jl")
include("nhs_trivial.jl")
include("cell_lists/cell_lists.jl")
Expand All @@ -25,9 +25,10 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo
export DictionaryCellList, FullGridCellList
export ParallelUpdate, SemiParallelUpdate, SerialIncrementalUpdate, SerialUpdate,
ParallelIncrementalUpdate
export requires_update, requires_resizing
export requires_update
export initialize!, update!, initialize_grid!, update_grid!
export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
export SerialBackend, PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend,
default_backend
export PeriodicBox, copy_neighborhood_search

end # module PointNeighbors
18 changes: 17 additions & 1 deletion src/cell_lists/full_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ function Base.empty!(cell_list::FullGridCellList)
(; cells) = cell_list

# `Base.empty!.(cells)`, but for all backends
for i in eachindex(cells)
@threaded default_backend(cells) for i in eachindex(cells)
emptyat!(cells, i)
end

Expand Down Expand Up @@ -225,6 +225,22 @@ end
@inline function check_cell_bounds(cell_list::FullGridCellList, cell::Tuple)
(; linear_indices) = cell_list

# Make sure that points are not added to the outer padding layer, which is needed
# to ensure that neighboring cells in all directions of all non-empty cells exist.
if !all(cell[i] in 2:(size(linear_indices, i) - 1) for i in eachindex(cell))
size_ = [2:(size(linear_indices, i) - 1) for i in eachindex(cell)]
print_size_ = "[$(join(size_, ", "))]"
error("particle coordinates are NaN or outside the domain bounds of the cell list\n" *
"cell $cell is out of bounds for cell grid of size $print_size_")
end
end

# On GPUs, we can't throw a proper error message because string interpolation is not allowed
@inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any,
<:AbstractGPUArray}},
cell::Tuple)
(; linear_indices) = cell_list

# Make sure that points are not added to the outer padding layer, which is needed
# to ensure that neighboring cells in all directions of all non-empty cells exist.
if !all(cell[i] in 2:(size(linear_indices, i) - 1) for i in eachindex(cell))
Expand Down
3 changes: 0 additions & 3 deletions src/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,3 @@ function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
cell_size, update_buffer, nhs.update_strategy)
end

# This is useful to pass the backend directly to `@threaded`
KernelAbstractions.get_backend(backend::KernelAbstractions.Backend) = backend
Loading
Loading