Skip to content
8 changes: 8 additions & 0 deletions src/general/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ end
@inline function PointNeighbors.parallel_foreach(f, iterator, semi::Semidiscretization)
PointNeighbors.parallel_foreach(f, iterator, semi.parallelization_backend)
end

function allocate(backend::KernelAbstractions.Backend, ELTYPE, size)
return KernelAbstractions.allocate(backend, ELTYPE, size)
end

function allocate(backend, ELTYPE, size)
return Array{ELTYPE, length(size)}(undef, size)
end
125 changes: 96 additions & 29 deletions src/general/interpolation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,13 @@ function interpolate_line(start, end_, n_points, semi, ref_system, v_ode, u_ode;
# Convert to coordinate matrix
points_coords_ = collect(reinterpret(reshape, eltype(start_svector), points_coords))

return interpolate_points(points_coords_, semi, ref_system, v_ode, u_ode;
if semi.parallelization_backend isa KernelAbstractions.Backend
points_coords_new = Adapt.adapt(semi.parallelization_backend, points_coords_)
else
points_coords_new = points_coords_
end

return interpolate_points(points_coords_new, semi, ref_system, v_ode, u_ode;
smoothing_length=smoothing_length,
cut_off_bnd=cut_off_bnd, clip_negative_pressure)
end
Expand Down Expand Up @@ -512,12 +518,17 @@ end

n_points = size(point_coords, 2)
ELTYPE = eltype(point_coords)
computed_density = zeros(ELTYPE, n_points)
other_density = zeros(ELTYPE, n_points)
neighbor_count = zeros(Int, n_points)
shepard_coefficient = zeros(ELTYPE, n_points)
computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
other_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points)
neighbor_count = allocate(semi.parallelization_backend, Int, n_points)

cache = create_cache_interpolation(ref_system, n_points)
set_zero!(computed_density)
set_zero!(other_density)
set_zero!(shepard_coefficient)
set_zero!(neighbor_count)

cache = create_cache_interpolation(ref_system, n_points, semi)

ref_id = system_indices(ref_system, semi)
ref_smoothing_kernel = ref_system.smoothing_kernel
Expand Down Expand Up @@ -565,42 +576,52 @@ end
computed_density[point] = NaN
neighbor_count[point] = 0

foreach(cache) do field
if field isa AbstractVector
field[point] = NaN
else
field[:, point] .= NaN
end
foreach(Tuple(cache)) do field
Comment thread
LasNikas marked this conversation as resolved.
set_nan!(field, point)
end
else
# Normalize all quantities by the shepard coefficient
foreach(cache) do field
if field isa AbstractVector
field[point] /= shepard_coefficient[point]
else
field[:, point] ./= shepard_coefficient[point]
end
foreach(Tuple(cache)) do field
divide_by_shepard_coefficient!(field, shepard_coefficient, point)
end
end
end

return (; computed_density=computed_density, neighbor_count, point_coords, cache...)
cache_ = map(x -> Array(x), cache)

return (; computed_density=Array(computed_density), point_coords=Array(point_coords),
neighbor_count=Array(neighbor_count), cache_...)
Comment thread
LasNikas marked this conversation as resolved.
Outdated
end

@inline function create_cache_interpolation(ref_system::FluidSystem, n_points)
velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
pressure = zeros(eltype(ref_system), n_points)
density = zeros(eltype(ref_system), n_points)
@inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
(; parallelization_backend) = semi

velocity = allocate(parallelization_backend, eltype(ref_system),
(ndims(ref_system), n_points))
pressure = allocate(parallelization_backend, eltype(ref_system), n_points)
density = allocate(parallelization_backend, eltype(ref_system), n_points)

set_zero!(velocity)
set_zero!(pressure)
set_zero!(density)

return (; velocity, pressure, density)
end

@inline function create_cache_interpolation(ref_system::SolidSystem, n_points)
velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
jacobian = zeros(eltype(ref_system), n_points)
von_mises_stress = zeros(eltype(ref_system), n_points)
cauchy_stress = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system),
n_points)
@inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi)
(; parallelization_backend) = semi

velocity = allocate(parallelization_backend, eltype(ref_system),
(ndims(ref_system), n_points))
jacobian = allocate(parallelization_backend, eltype(ref_system), n_points)
von_mises_stress = allocate(parallelization_backend, eltype(ref_system), n_points)
cauchy_stress = allocate(parallelization_backend, eltype(ref_system),
(ndims(ref_system), ndims(ref_system), n_points))

set_zero!(velocity)
set_zero!(jacobian)
set_zero!(von_mises_stress)
set_zero!(cauchy_stress)

return (; velocity, jacobian, von_mises_stress, cauchy_stress)
end
Expand Down Expand Up @@ -643,3 +664,49 @@ end

return cache
end

function set_nan!(field::AbstractVector, point)
field[point] = NaN

return field
end

function set_nan!(field::AbstractArray{T, 2}, point) where {T}
Comment thread
LasNikas marked this conversation as resolved.
Outdated
for dim in axes(field, 1)
field[dim, point] = NaN
end

return field
end

function set_nan!(field::AbstractArray{T, 3}, point) where {T}
for j in axes(field, 2), i in axes(field, 1)
field[i, j, point] = NaN
end

return field
end

function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficient, point)
field[point] /= shepard_coefficient[point]

return field
end

function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient,
point) where {T}
Comment thread
LasNikas marked this conversation as resolved.
Outdated
for dim in axes(field, 1)
field[dim, point] /= shepard_coefficient[point]
end

return field
end

function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient,
point) where {T}
for j in axes(field, 2), i in axes(field, 1)
field[i, j, point] /= shepard_coefficient[point]
end

return field
end
17 changes: 8 additions & 9 deletions src/general/semidiscretization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -282,19 +282,18 @@ function semidiscretize(semi, tspan; reset_threads=true)
sizes_u = (u_nvariables(system) * n_moving_particles(system) for system in systems)
sizes_v = (v_nvariables(system) * n_moving_particles(system) for system in systems)

# Use either the specified backend, e.g., `CUDABackend` or `MetalBackend` or
# use CPU vectors for all CPU backends.
u0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_u))
v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_v))

if semi.parallelization_backend isa KernelAbstractions.Backend
# Use the specified backend, e.g., `CUDABackend` or `MetalBackend`
u0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
sum(sizes_u))
v0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
sum(sizes_v))
u0_ode = u0_ode_
v0_ode = v0_ode_
else
# Use CPU vectors for all CPU backends.
# These are wrapped in `ThreadedBroadcastArray`s
# CPU vectors are wrapped in `ThreadedBroadcastArray`s
# to make broadcasting (which is done by OrdinaryDiffEq.jl) multithreaded.
# See https://github.com/trixi-framework/TrixiParticles.jl/pull/722 for more details.
u0_ode_ = Vector{ELTYPE}(undef, sum(sizes_u))
v0_ode_ = Vector{ELTYPE}(undef, sum(sizes_v))
u0_ode = ThreadedBroadcastArray(u0_ode_;
parallelization_backend=semi.parallelization_backend)
v0_ode = ThreadedBroadcastArray(v0_ode_;
Expand Down
69 changes: 69 additions & 0 deletions test/examples/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -392,3 +392,72 @@ end
end
end
end

@testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin
@testset verbose=true "Interpolation" begin
# Import variables into scope
trixi_include_changeprecision(Float32, @__MODULE__,
joinpath(examples_dir(), "fluid",
"hydrostatic_water_column_2d.jl"),
sol=nothing, ode=nothing)

# Neighborhood search with `FullGridCellList` for GPU compatibility
min_corner = minimum(tank.boundary.coordinates, dims=2)
max_corner = maximum(tank.boundary.coordinates, dims=2)
cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=500)
semi_fullgrid = Semidiscretization(fluid_system, boundary_system,
neighborhood_search=GridNeighborhoodSearch{2}(;
cell_list),
parallelization_backend=Main.parallelization_backend)

trixi_include_changeprecision(Float32, @__MODULE__,
joinpath(examples_dir(),
"fluid", "hydrostatic_water_column_2d.jl");
semi=semi_fullgrid, tspan=(0.0f0, 0.1f0))

semi_new = TrixiParticles.Adapt.adapt(semi_fullgrid.parallelization_backend,
sol.prob.p)
Comment thread
LasNikas marked this conversation as resolved.
Outdated

# Interpolation parameters
position_x = tank_size[1] / 2
n_interpolation_points = 10
start_point = [position_x, -fluid_particle_spacing]
end_point = [position_x, tank_size[2]]

result = interpolate_line(start_point, end_point, n_interpolation_points,
Comment thread
LasNikas marked this conversation as resolved.
Outdated
semi_new, semi_new.systems[1], sol; cut_off_bnd=false)

@test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN
Float32[62.50176,
1053.805,
1061.2959,
1055.8348,
1043.9069,
1038.2051,
1033.1708,
1014.2249,
672.61566])

@test isapprox(result.density[1:(end - 1)], # Exclude last NaN
Float32[1078.3738,
1070.8535,
1061.2003,
1052.4126,
1044.5074,
1037.0444,
1028.4813,
1014.7941,
1003.6117])

@test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN
Float32[9940.595,
8791.842,
7368.837,
6143.6562,
5093.711,
4143.313,
3106.1575,
1552.1078,
366.71414])
end
end
Loading