Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 74 additions & 17 deletions benchmarks/run_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ run_benchmark(benchmark_count_neighbors, (10, 10), 3,
```
"""
function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches;
search_radius_factor = 3.0,
parallelization_backend = PolyesterBackend(),
names = ["Neighborhood search $i"
for i in 1:length(neighborhood_searches)]',
seed = 1, perturbation_factor_position = 1.0)
seed = 1, perturbation_factor_position = 1.0, shuffle = false)
# Multiply number of points in each iteration (roughly) by this factor
scaling_factor = 4
per_dimension_factor = scaling_factor^(1 / length(n_points_per_dimension))
Expand All @@ -64,24 +65,29 @@ function run_benchmark(benchmark, n_points_per_dimension, iterations, neighborho
times = zeros(iterations, length(neighborhood_searches))

for iter in 1:iterations
coordinates = point_cloud(sizes[iter]; seed, perturbation_factor_position)
coordinates_ = point_cloud(sizes[iter], search_radius_factor;
seed, perturbation_factor_position, shuffle)
coordinates = convert.(typeof(search_radius_factor), coordinates_)
domain_size = maximum(sizes[iter]) + 1

# Normalize domain size to 1
coordinates ./= domain_size

# Make this Float32 to make sure that Float32 benchmarks use Float32 exclusively
search_radius = 4.0f0 / domain_size
search_radius = search_radius_factor / domain_size
n_particles = size(coordinates, 2)

neighborhood_searches_copy = copy_neighborhood_search.(neighborhood_searches,
search_radius, n_particles)

for i in eachindex(neighborhood_searches_copy)
neighborhood_search = neighborhood_searches_copy[i]
PointNeighbors.initialize!(neighborhood_search, coordinates, coordinates)
neighborhood_search_ = neighborhood_searches_copy[i]
neighborhood_search = PointNeighbors.Adapt.adapt(parallelization_backend,
neighborhood_search_)
coords = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
PointNeighbors.initialize!(neighborhood_search, coords, coords)

time = benchmark(neighborhood_search, coordinates; parallelization_backend)
time = benchmark(neighborhood_search, coords; parallelization_backend)
times[iter, i] = time
time_string = BenchmarkTools.prettytime(time * 1e9)
time_string_per_particle = BenchmarkTools.prettytime(time * 1e9 / n_particles)
Expand Down Expand Up @@ -170,23 +176,74 @@ include("benchmarks/benchmarks.jl")
run_benchmark_gpu(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs...)
function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
cell_list = FullGridCellList(;
search_radius = 0.0f0,
min_corner,
max_corner))
PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)]

names = ["GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"]
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
transpose_backend = parallelization_backend isa PointNeighbors.KernelAbstractions.GPU
neighborhood_searches = [
grid_nhs
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0,
update_neighborhood_search = grid_nhs,
transpose_backend)#, max_neighbors=128)
]

names = [
"GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"
]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, kwargs...)
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations; kwargs...)

Shortcut to call [`run_benchmark`](@ref) with a `GridNeighborhoodSearch` with a
`FullGridCellList`. This is the neighborhood search implementation that is used
in TrixiParticles.jl when performance is important.
Use this function to benchmark and profile TrixiParticles.jl kernels.

# Arguments
- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref),
[`benchmark_n_body`](@ref), [`benchmark_wcsph`](@ref),
[`benchmark_wcsph_fp32`](@ref) and [`benchmark_tlsph`](@ref).
- `n_points_per_dimension`: Initial resolution as tuple. The product is the initial number
of points. For example, use `(100, 100)` for a 2D benchmark or
`(10, 10, 10)` for a 3D benchmark.
- `iterations`: Number of refinement iterations

# Keywords
See [`run_benchmark`](@ref) for a list of available keywords.

# Examples
```julia
include("benchmarks/benchmarks.jl")

run_benchmark_full_grid(benchmark_n_body, (10, 10), 3)
```
"""
function run_benchmark_full_grid(benchmark, n_points_per_dimension, iterations;
parallelization_backend=PolyesterBackend(), kwargs...)
NDIMS = length(n_points_per_dimension)

min_corner = 0.0f0 .* n_points_per_dimension
max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension))
cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)
grid_nhs = GridNeighborhoodSearch{NDIMS}(; search_radius = 0.0f0, cell_list,
update_strategy = ParallelUpdate())
neighborhood_searches = [grid_nhs]

names = ["GridNeighborhoodSearch with FullGridCellList";;]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, parallelization_backend, kwargs...)
end

"""
Expand Down
94 changes: 44 additions & 50 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using PointNeighbors
using PointNeighbors.Adapt
using TrixiParticles
using BenchmarkTools

Expand Down Expand Up @@ -43,47 +44,30 @@ This method is used to simulate an incompressible fluid.
"""
function benchmark_wcsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
density = 1000.0
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1, particle_spacing)

sound_speed = 10.0
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02, beta = 0.0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end

"""
benchmark_wcsph_fp32(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))

Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
parallelization_backend = default_backend(coordinates_))
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
density = convert(ELTYPE, 1000.0)
particle_spacing = PointNeighbors.search_radius(neighborhood_search) / 3
fluid = InitialCondition(; coordinates, density, mass = 0.1f0, particle_spacing)
fluid = InitialCondition(; coordinates = coordinates_cpu, density,
mass = convert(ELTYPE, 0.1) * particle_spacing,
particle_spacing)

sound_speed = 10.0f0
# Make sure that the computed forces are not all zero
for i in eachindex(fluid.density)
fluid.density[i] += rand(eltype(fluid.density))
end

sound_speed = convert(ELTYPE, 10.0)
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)
viscosity = ArtificialViscosityMonaghan(alpha = convert(ELTYPE, 0.02),
beta = convert(ELTYPE, 0.0))
density_diffusion = DensityDiffusionMolteniColagrossi(delta = convert(ELTYPE, 0.1))

__benchmark_wcsph_inner(neighborhood_search, fluid, state_equation,
viscosity, density_diffusion, parallelization_backend)
end

function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_equation,
viscosity, density_diffusion, parallelization_backend)
# Compact support == 2 * smoothing length for these kernels
smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2
if ndims(neighborhood_search) == 1
Expand All @@ -92,23 +76,21 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

fluid_system = WeaklyCompressibleSPHSystem(initial_condition, ContinuityDensity(),
fluid_system = WeaklyCompressibleSPHSystem(fluid, ContinuityDensity(),
state_equation, smoothing_kernel,
smoothing_length, viscosity = viscosity,
density_diffusion = density_diffusion)

system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
system = Adapt.adapt(parallelization_backend, fluid_system)

# Remove unnecessary data structures that are only used for initialization
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)

nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = PointNeighbors.Adapt.adapt(parallelization_backend,
vcat(initial_condition.velocity,
initial_condition.density'))
u = PointNeighbors.Adapt.adapt(parallelization_backend, initial_condition.coordinates)
v = Adapt.adapt(parallelization_backend,
vcat(fluid.velocity, fluid.density'))
u = Adapt.adapt(parallelization_backend, fluid.coordinates)
dv = zero(v)

# Initialize the system
Expand All @@ -128,8 +110,15 @@ This method is used to simulate an elastic structure.
"""
function benchmark_tlsph(neighborhood_search, coordinates;
parallelization_backend = default_backend(coordinates))
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
# System initialization has to happen on the CPU
coordinates_cpu = PointNeighbors.Adapt.adapt(Array, coordinates)

search_radius = PointNeighbors.search_radius(neighborhood_search)
ELTYPE = typeof(search_radius)
material = (density = convert(ELTYPE, 1000.0), E = convert(ELTYPE, 1.4e6),
nu = convert(ELTYPE, 0.4))
solid = InitialCondition(; coordinates = coordinates_cpu,
density = material.density, mass = convert(ELTYPE, 0.1))

# Compact support == 2 * smoothing length for these kernels
smoothing_length_ = PointNeighbors.search_radius(neighborhood_search) / 2
Expand All @@ -142,15 +131,20 @@ function benchmark_tlsph(neighborhood_search, coordinates;

solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_length,
material.E, material.nu)
semi = DummySemidiscretization(neighborhood_search, parallelization_backend, true)
system_ = Adapt.adapt(parallelization_backend, solid_system)

v = copy(solid.velocity)
u = copy(solid.coordinates)
# Remove unnecessary data structures that are only used for initialization
nhs = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
system = TrixiParticles.@set system_.self_interaction_nhs = nhs

semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = Adapt.adapt(parallelization_backend, copy(solid.velocity))
u = Adapt.adapt(parallelization_backend, copy(solid.coordinates))
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(solid_system, semi)
TrixiParticles.initialize!(system, semi)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u,
$solid_system, $solid_system, $semi)
return @belapsed TrixiParticles.interact_structure_structure2!($dv, $v, $system, $semi)
end
6 changes: 3 additions & 3 deletions test/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,14 @@
"($(seed == 1 ? "`initialize!`" : "`update!`"))"
@testset verbose=true "$(name(cloud_size, seed)))" for cloud_size in cloud_sizes,
seed in seeds
coords = point_cloud(cloud_size, seed = seed)
search_radius = 2.5
coords = point_cloud(cloud_size, search_radius, seed = seed)
NDIMS = length(cloud_size)
n_points = size(coords, 2)
search_radius = 2.5

# Use different coordinates for `initialize!` and then `update!` with the
# correct coordinates to make sure that `update!` is working as well.
coords_initialize = point_cloud(cloud_size, seed = 1)
coords_initialize = point_cloud(cloud_size, search_radius, seed = 1)

# Compute expected neighbor lists by brute-force looping over all points
# as potential neighbors (`TrivialNeighborhoodSearch`).
Expand Down
31 changes: 28 additions & 3 deletions test/point_cloud.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
using Random

# Generate a rectangular point cloud, optionally with a perturbation in the point positions
function point_cloud(n_points_per_dimension;
seed = 1, perturbation_factor_position = 1.0)
function point_cloud(n_points_per_dimension, search_radius;
seed = 1, perturbation_factor_position = 1.0,
sort = true, shuffle = false)
# Fixed seed to ensure reproducibility
Random.seed!(seed)

n_dims = length(n_points_per_dimension)
coordinates = Array{Float64}(undef, n_dims, prod(n_points_per_dimension))
cartesian_indices = CartesianIndices(n_points_per_dimension)

# Extra data structures for the sorting code below
cell_coords = Vector{SVector{n_dims, Int}}(undef, size(coordinates, 2))
cell_size = ntuple(dim -> search_radius, n_dims)

for i in axes(coordinates, 2)
coordinates[:, i] .= Tuple(cartesian_indices[i])
point_coords = SVector(Tuple(cartesian_indices[i]))
coordinates[:, i] .= point_coords
cell_coords[i] = PointNeighbors.cell_coords(point_coords, nothing, nothing,
cell_size) .+ 1
end

# A standard deviation of 0.05 in the particle coordinates
Expand All @@ -21,6 +29,23 @@ function point_cloud(n_points_per_dimension;
# The benchmark results are also consistent with the timer output of the simulation.
perturb!(coordinates, perturbation_factor_position * 0.05)

# Sort by linear cell index
if sort
if shuffle
throw(ArgumentError("cannot sort and shuffle at the same time"))
end

# Sort by Z index (with `using Morton`)
# permutation = sortperm(cell_coords, by = c -> cartesian2morton(c))

permutation = sortperm(cell_coords)
coordinates .= coordinates[:, permutation]
elseif shuffle
# Sort randomly
permutation = shuffle(axes(coordinates, 2))
coordinates .= coordinates[:, permutation]
end

return coordinates
end

Expand Down
Loading