Skip to content
Draft
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PointNeighbors"
uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
version = "0.6.6-dev"
version = "0.6.6"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
91 changes: 74 additions & 17 deletions benchmarks/run_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ run_benchmark(benchmark_count_neighbors, (10, 10), 3,
```
"""
function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches;
search_radius_factor = 3.0,
parallelization_backend = PolyesterBackend(),
names = ["Neighborhood search $i"
for i in 1:length(neighborhood_searches)]',
seed = 1, perturbation_factor_position = 1.0)
seed = 1, perturbation_factor_position = 1.0, shuffle = false)
# Multiply number of points in each iteration (roughly) by this factor
scaling_factor = 4
per_dimension_factor = scaling_factor^(1 / length(n_points_per_dimension))
Expand All @@ -64,24 +65,29 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho
times = zeros(iterations, length(neighborhood_searches))

for iter in 1:iterations
coordinates = point_cloud(sizes[iter]; seed, perturbation_factor_position)
coordinates_ = point_cloud(sizes[iter], search_radius_factor;
seed, perturbation_factor_position, shuffle)
coordinates = convert.(typeof(search_radius_factor), coordinates_)
domain_size = maximum(sizes[iter]) + 1

# Normalize domain size to 1
coordinates ./= domain_size

# Make this Float32 to make sure that Float32 benchmarks use Float32 exclusively
search_radius = 4.0f0 / domain_size
search_radius = search_radius_factor / domain_size
n_particles = size(coordinates, 2)

neighborhood_searches_copy = copy_neighborhood_search.(neighborhood_searches,
search_radius, n_particles)

for i in eachindex(neighborhood_searches_copy)
neighborhood_search = neighborhood_searches_copy[i]
PointNeighbors.initialize!(neighborhood_search, coordinates, coordinates)
neighborhood_search_ = neighborhood_searches_copy[i]
neighborhood_search = PointNeighbors.Adapt.adapt(parallelization_backend,
neighborhood_search_)
coords = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
PointNeighbors.initialize!(neighborhood_search, coords, coords)

time = benchmark(neighborhood_search, coordinates; parallelization_backend)
time = benchmark(neighborhood_search, coords; parallelization_backend)
times[iter, i] = time
time_string = BenchmarkTools.prettytime(time * 1e9)
time_string_per_particle = BenchmarkTools.prettytime(time * 1e9 / n_particles)
Expand Down Expand Up @@ -170,23 +176,74 @@ include("benchmarks/benchmarks.jl")
run_benchmark_gpu(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs...)
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
cell_list = FullGridCellList(;
search_radius = 0.0f0,
min_corner,
max_corner))
PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)]

names = ["GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"]
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
transpose_backend = parallelization_backend isa PointNeighbors.KernelAbstractions.GPU
neighborhood_searches = [
grid_nhs
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0,
update_neighborhood_search = grid_nhs,
transpose_backend)#, max_neighbors=128)
]

names = [
"GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"
]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, kwargs...)
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; kwargs...)

Shortcut to call [`run_benchmark`](@ref) with a `GridNeighborhoodSearch` with a
`FullGridCellList`. This is the neighborhood search implementation that is used
in TrixiParticles.jl when performance is important.
Use this function to benchmark and profile TrixiParticles.jl kernels.

# Arguments
- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref),
[`benchmark_n_body`](@ref), [`benchmark_wcsph`](@ref),
[`benchmark_wcsph_fp32`](@ref) and [`benchmark_tlsph`](@ref).
- `n_points_per_dimension`: Initial resolution as tuple. The product is the initial number
of points. For example, use `(100, 100)` for a 2D benchmark or
`(10, 10, 10)` for a 3D benchmark.
- `iterations`: Number of refinement iterations

# Keywords
See [`run_benchmark`](@ref) for a list of available keywords.

# Examples
```julia
include("benchmarks/benchmarks.jl")

run_benchmark_full_grid(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
neighborhood_searches = [grid_nhs]

names = ["GridNeighborhoodSearch with FullGridCellList";;]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
Expand Down
103 changes: 50 additions & 53 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using PointNeighbors
using PointNeighbors.Adapt
using TrixiParticles
using BenchmarkTools

Expand Down Expand Up @@ -43,47 +44,30 @@ This method is used to simulate an incompressible fluid.
"""
function benchmark_wcsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
density = 1000.0
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1, particle_spacing)

sound_speed = 10.0
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02, beta = 0.0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end

"""
benchmark_wcsph_fp32(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))

Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
parallelization_backend = default_backend(coordinates_))
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
density = convert(ELTYPE, 1000.0)
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1f0, particle_spacing)
fluid = InitialCondition(; coordinates = coordinates_cpu, density,
mass = convert(ELTYPE, 0.1) * particle_spacing,
particle_spacing)

sound_speed = 10.0f0
# Make sure that the computed forces are not all zero
for i in eachindex(fluid.density)
fluid.density[i] += rand(eltype(fluid.density))
end

sound_speed = convert(ELTYPE, 10.0)
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end
viscosity = ArtificialViscosityMonaghan(alpha = convert(ELTYPE, 0.02),
beta = convert(ELTYPE, 0.0))
density_diffusion = DensityDiffusionMolteniColagrossi(delta = convert(ELTYPE, 0.1))

function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_equation,
viscosity, density_diffusion, parallelization_backend)
# Compact support == 2 * smoothing length for these kernels
smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2
if ndims(neighborhood_search) == 1
Expand All @@ -92,23 +76,20 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

fluid_system = WeaklyCompressibleSPHSystem(initial_condition, ContinuityDensity(),
fluid_system = WeaklyCompressibleSPHSystem(fluid; density_calculator = ContinuityDensity(),
state_equation, smoothing_kernel,
smoothing_length, viscosity = viscosity,
density_diffusion = density_diffusion)
smoothing_length, viscosity, density_diffusion)

system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
system = Adapt.adapt(parallelization_backend, fluid_system)

# Remove unnecessary data structures that are only used for initialization
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)

nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = PointNeighbors.Adapt.adapt(parallelization_backend,
vcat(initial_condition.velocity,
initial_condition.density'))
u = PointNeighbors.Adapt.adapt(parallelization_backend, initial_condition.coordinates)
v = Adapt.adapt(parallelization_backend,
vcat(fluid.velocity, fluid.density'))
u = Adapt.adapt(parallelization_backend, fluid.coordinates)
dv = zero(v)

# Initialize the system
Expand All @@ -128,8 +109,18 @@ This method is used to simulate an elastic structure.
"""
function benchmark_tlsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6),
nu = convert(ELTYPE, 0.4))

# The `particle_spacing` is only required for setting the type of the initial condition
solid = InitialCondition(; coordinates = coordinates_cpu,
density = material.density, mass = convert(ELTYPE, 0.1),
particle_spacing = search_radius)

# Compact support == 2 * smoothing length for these kernels
smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2
Expand All @@ -140,17 +131,23 @@ function benchmark_tlsph(neighborhood_search, coordinates;
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

penalty_force = PenaltyForceGanzenmueller(alpha = convert(ELTYPE, 0.1))
solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_length,
material.E, material.nu)
semi = DummySemidiscretization(neighborhood_search, parallelization_backend, true)
material.E, material.nu; penalty_force)
system_ = Adapt.adapt(parallelization_backend, solid_system)

v = copy(solid.velocity)
u = copy(solid.coordinates)
# Remove unnecessary data structures that are only used for initialization
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
system = TrixiParticles.@set system_.self_interaction_nhs = nhs

semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = Adapt.adapt(parallelization_backend, copy(solid.velocity))
u = Adapt.adapt(parallelization_backend, copy(solid.coordinates))
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(solid_system, semi)
TrixiParticles.initialize!(system, semi)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u,
$solid_system, $solid_system, $semi)
return @belapsed TrixiParticles.interact_structure_structure!($dv, $v, $system, $semi)
end
3 changes: 2 additions & 1 deletion src/PointNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ include("nhs_grid.jl")
include("nhs_precomputed.jl")
include("gpu.jl")

export foreach_point_neighbor, foreach_neighbor
export foreach_point_neighbor, foreach_point_neighbor_unsafe,
foreach_neighbor, foreach_neighbor_unsafe
export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoodSearch
export DictionaryCellList, FullGridCellList, SpatialHashingCellList
export DynamicVectorOfVectors
Expand Down
Loading