trixi-framework · svchb · Jun 5, 2025 · May 21, 2025 · May 22, 2025 · May 22, 2025
diff --git a/src/general/gpu.jl b/src/general/gpu.jl
@@ -29,3 +29,11 @@ end
 @inline function PointNeighbors.parallel_foreach(f, iterator, semi::Semidiscretization)
     PointNeighbors.parallel_foreach(f, iterator, semi.parallelization_backend)
 end
+
+function allocate(backend::KernelAbstractions.Backend, ELTYPE, size)
+    return KernelAbstractions.allocate(backend, ELTYPE, size)
+end
+
+function allocate(backend, ELTYPE, size)
+    return Array{ELTYPE, length(size)}(undef, size)
+end
diff --git a/src/general/interpolation.jl b/src/general/interpolation.jl
@@ -393,7 +393,13 @@ function interpolate_line(start, end_, n_points, semi, ref_system, v_ode, u_ode;
     # Convert to coordinate matrix
     points_coords_ = collect(reinterpret(reshape, eltype(start_svector), points_coords))
 
-    return interpolate_points(points_coords_, semi, ref_system, v_ode, u_ode;
+    if semi.parallelization_backend isa KernelAbstractions.Backend
+        points_coords_new = Adapt.adapt(semi.parallelization_backend, points_coords_)
+    else
+        points_coords_new = points_coords_
+    end
+
+    return interpolate_points(points_coords_new, semi, ref_system, v_ode, u_ode;
                               smoothing_length=smoothing_length,
                               cut_off_bnd=cut_off_bnd, clip_negative_pressure)
 end
@@ -512,12 +518,17 @@ end
 
     n_points = size(point_coords, 2)
     ELTYPE = eltype(point_coords)
-    computed_density = zeros(ELTYPE, n_points)
-    other_density = zeros(ELTYPE, n_points)
-    neighbor_count = zeros(Int, n_points)
-    shepard_coefficient = zeros(ELTYPE, n_points)
+    computed_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    other_density = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    shepard_coefficient = allocate(semi.parallelization_backend, ELTYPE, n_points)
+    neighbor_count = allocate(semi.parallelization_backend, Int, n_points)
 
-    cache = create_cache_interpolation(ref_system, n_points)
+    set_zero!(computed_density)
+    set_zero!(other_density)
+    set_zero!(shepard_coefficient)
+    set_zero!(neighbor_count)
+
+    cache = create_cache_interpolation(ref_system, n_points, semi)
 
     ref_id = system_indices(ref_system, semi)
     ref_smoothing_kernel = ref_system.smoothing_kernel
@@ -565,42 +576,52 @@ end
             computed_density[point] = NaN
             neighbor_count[point] = 0
 
-            foreach(cache) do field
-                if field isa AbstractVector
-                    field[point] = NaN
-                else
-                    field[:, point] .= NaN
-                end
+            foreach(Tuple(cache)) do field
+                set_nan!(field, point)
             end
         else
             # Normalize all quantities by the shepard coefficient
-            foreach(cache) do field
-                if field isa AbstractVector
-                    field[point] /= shepard_coefficient[point]
-                else
-                    field[:, point] ./= shepard_coefficient[point]
-                end
+            foreach(Tuple(cache)) do field
+                divide_by_shepard_coefficient!(field, shepard_coefficient, point)
             end
         end
     end
 
-    return (; computed_density=computed_density, neighbor_count, point_coords, cache...)
+    cache_ = map(x -> Array(x), cache)
+
+    return (; computed_density=Array(computed_density), point_coords=Array(point_coords),
+            neighbor_count=Array(neighbor_count), cache_...)
 end
 
-@inline function create_cache_interpolation(ref_system::FluidSystem, n_points)
-    velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    pressure = zeros(eltype(ref_system), n_points)
-    density = zeros(eltype(ref_system), n_points)
+@inline function create_cache_interpolation(ref_system::FluidSystem, n_points, semi)
+    (; parallelization_backend) = semi
+
+    velocity = allocate(parallelization_backend, eltype(ref_system),
+                        (ndims(ref_system), n_points))
+    pressure = allocate(parallelization_backend, eltype(ref_system), n_points)
+    density = allocate(parallelization_backend, eltype(ref_system), n_points)
+
+    set_zero!(velocity)
+    set_zero!(pressure)
+    set_zero!(density)
 
     return (; velocity, pressure, density)
 end
 
-@inline function create_cache_interpolation(ref_system::SolidSystem, n_points)
-    velocity = zeros(eltype(ref_system), ndims(ref_system), n_points)
-    jacobian = zeros(eltype(ref_system), n_points)
-    von_mises_stress = zeros(eltype(ref_system), n_points)
-    cauchy_stress = zeros(eltype(ref_system), ndims(ref_system), ndims(ref_system),
-                          n_points)
+@inline function create_cache_interpolation(ref_system::SolidSystem, n_points, semi)
+    (; parallelization_backend) = semi
+
+    velocity = allocate(parallelization_backend, eltype(ref_system),
+                        (ndims(ref_system), n_points))
+    jacobian = allocate(parallelization_backend, eltype(ref_system), n_points)
+    von_mises_stress = allocate(parallelization_backend, eltype(ref_system), n_points)
+    cauchy_stress = allocate(parallelization_backend, eltype(ref_system),
+                             (ndims(ref_system), ndims(ref_system), n_points))
+
+    set_zero!(velocity)
+    set_zero!(jacobian)
+    set_zero!(von_mises_stress)
+    set_zero!(cauchy_stress)
 
     return (; velocity, jacobian, von_mises_stress, cauchy_stress)
 end
@@ -643,3 +664,49 @@ end
 
     return cache
 end
+
+function set_nan!(field::AbstractVector, point)
+    field[point] = NaN
+
+    return field
+end
+
+function set_nan!(field::AbstractArray{T, 2}, point) where {T}
+    for dim in axes(field, 1)
+        field[dim, point] = NaN
+    end
+
+    return field
+end
+
+function set_nan!(field::AbstractArray{T, 3}, point) where {T}
+    for j in axes(field, 2), i in axes(field, 1)
+        field[i, j, point] = NaN
+    end
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractVector, shepard_coefficient, point)
+    field[point] /= shepard_coefficient[point]
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractArray{T, 2}, shepard_coefficient,
+                                        point) where {T}
+    for dim in axes(field, 1)
+        field[dim, point] /= shepard_coefficient[point]
+    end
+
+    return field
+end
+
+function divide_by_shepard_coefficient!(field::AbstractArray{T, 3}, shepard_coefficient,
+                                        point) where {T}
+    for j in axes(field, 2), i in axes(field, 1)
+        field[i, j, point] /= shepard_coefficient[point]
+    end
+
+    return field
+end
diff --git a/src/general/semidiscretization.jl b/src/general/semidiscretization.jl
@@ -282,19 +282,18 @@ function semidiscretize(semi, tspan; reset_threads=true)
     sizes_u = (u_nvariables(system) * n_moving_particles(system) for system in systems)
     sizes_v = (v_nvariables(system) * n_moving_particles(system) for system in systems)
 
+    # Use either the specified backend, e.g., `CUDABackend` or `MetalBackend` or
+    # use CPU vectors for all CPU backends.
+    u0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_u))
+    v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_v))
+
     if semi.parallelization_backend isa KernelAbstractions.Backend
-        # Use the specified backend, e.g., `CUDABackend` or `MetalBackend`
-        u0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
-                                             sum(sizes_u))
-        v0_ode = KernelAbstractions.allocate(semi.parallelization_backend, ELTYPE,
-                                             sum(sizes_v))
+        u0_ode = u0_ode_
+        v0_ode = v0_ode_
     else
-        # Use CPU vectors for all CPU backends.
-        # These are wrapped in `ThreadedBroadcastArray`s
+        # CPU vectors are wrapped in `ThreadedBroadcastArray`s
         # to make broadcasting (which is done by OrdinaryDiffEq.jl) multithreaded.
         # See https://github.com/trixi-framework/TrixiParticles.jl/pull/722 for more details.
-        u0_ode_ = Vector{ELTYPE}(undef, sum(sizes_u))
-        v0_ode_ = Vector{ELTYPE}(undef, sum(sizes_v))
         u0_ode = ThreadedBroadcastArray(u0_ode_;
                                         parallelization_backend=semi.parallelization_backend)
         v0_ode = ThreadedBroadcastArray(v0_ode_;

diff --git a/test/examples/gpu.jl b/test/examples/gpu.jl
@@ -392,3 +392,72 @@ end
         end
     end
 end
+
+@testset verbose=true "Postprocessing $TRIXIPARTICLES_TEST_" begin
+    @testset verbose=true "Interpolation" begin
+        # Import variables into scope
+        trixi_include_changeprecision(Float32, @__MODULE__,
+                                      joinpath(examples_dir(), "fluid",
+                                               "hydrostatic_water_column_2d.jl"),
+                                      sol=nothing, ode=nothing)
+
+        # Neighborhood search with `FullGridCellList` for GPU compatibility
+        min_corner = minimum(tank.boundary.coordinates, dims=2)
+        max_corner = maximum(tank.boundary.coordinates, dims=2)
+        cell_list = FullGridCellList(; min_corner, max_corner, max_points_per_cell=500)
+        semi_fullgrid = Semidiscretization(fluid_system, boundary_system,
+                                           neighborhood_search=GridNeighborhoodSearch{2}(;
+                                                                                         cell_list),
+                                           parallelization_backend=Main.parallelization_backend)
+
+        trixi_include_changeprecision(Float32, @__MODULE__,
+                                      joinpath(examples_dir(),
+                                               "fluid", "hydrostatic_water_column_2d.jl");
+                                      semi=semi_fullgrid, tspan=(0.0f0, 0.1f0))
+
+        semi_new = TrixiParticles.Adapt.adapt(semi_fullgrid.parallelization_backend,
+                                              sol.prob.p)
+
+        # Interpolation parameters
+        position_x = tank_size[1] / 2
+        n_interpolation_points = 10
+        start_point = [position_x, -fluid_particle_spacing]
+        end_point = [position_x, tank_size[2]]
+
+        result = interpolate_line(start_point, end_point, n_interpolation_points,
+                                  semi_new, semi_new.systems[1], sol; cut_off_bnd=false)
+
+        @test isapprox(result.computed_density[1:(end - 1)], # Exclude last NaN
+                       Float32[62.50176,
+                               1053.805,
+                               1061.2959,
+                               1055.8348,
+                               1043.9069,
+                               1038.2051,
+                               1033.1708,
+                               1014.2249,
+                               672.61566])
+
+        @test isapprox(result.density[1:(end - 1)], # Exclude last NaN
+                       Float32[1078.3738,
+                               1070.8535,
+                               1061.2003,
+                               1052.4126,
+                               1044.5074,
+                               1037.0444,
+                               1028.4813,
+                               1014.7941,
+                               1003.6117])
+
+        @test isapprox(result.pressure[1:(end - 1)], # Exclude last NaN
+                       Float32[9940.595,
+                               8791.842,
+                               7368.837,
+                               6143.6562,
+                               5093.711,
+                               4143.313,
+                               3106.1575,
+                               1552.1078,
+                               366.71414])
+    end
+end