trixi-framework
diff --git a/‎.github/workflows/SpellCheck.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/SpellCheck.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Project.toml‎
Lines changed: 1 addition & 1 deletion b/‎Project.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/count_neighbors.jl‎
Lines changed: 7 additions & 5 deletions b/‎benchmarks/count_neighbors.jl‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎benchmarks/n_body.jl‎
Lines changed: 10 additions & 7 deletions b/‎benchmarks/n_body.jl‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎benchmarks/plot.jl‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/plot.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/smoothed_particle_hydrodynamics.jl‎
Lines changed: 16 additions & 27 deletions b/‎benchmarks/smoothed_particle_hydrodynamics.jl‎
Lines changed: 16 additions & 27 deletions
diff --git a/‎benchmarks/update.jl‎
Lines changed: 4 additions & 2 deletions b/‎benchmarks/update.jl‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/PointNeighbors.jl‎
Lines changed: 5 additions & 3 deletions b/‎src/PointNeighbors.jl‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/cell_lists/full_grid.jl‎
Lines changed: 19 additions & 13 deletions b/‎src/cell_lists/full_grid.jl‎
Lines changed: 19 additions & 13 deletions
diff --git a/‎src/gpu.jl‎
Lines changed: 0 additions & 7 deletions b/‎src/gpu.jl‎
Lines changed: 0 additions & 7 deletions
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v4
       - name: Check spelling
-        uses: crate-ci/typos@v1.31.1
+        uses: crate-ci/typos@v1.31.2
@@ -1,7 +1,7 @@
 name = "PointNeighbors"
 uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
 authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
-version = "0.4.12-pre"
+version = "0.4.9-dev"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 
@@ -12,18 +12,20 @@ implementations are highlighted. On the other hand, this is the least realistic
 
 For a computationally heavier benchmark, see [`benchmark_n_body`](@ref).
 """
-function benchmark_count_neighbors(neighborhood_search, coordinates; parallel = true)
+function benchmark_count_neighbors(neighborhood_search, coordinates;
+                                   parallelization_backend = default_backend(coordinates))
     n_neighbors = zeros(Int, size(coordinates, 2))
 
-    function count_neighbors!(n_neighbors, coordinates, neighborhood_search, parallel)
+    function count_neighbors!(n_neighbors, coordinates, neighborhood_search,
+                              parallelization_backend)
         n_neighbors .= 0
 
-        foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
-                               parallel = parallel) do i, _, _, _
+        foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
+                               parallelization_backend) do i, _, _, _
             n_neighbors[i] += 1
         end
     end
 
     return @belapsed $count_neighbors!($n_neighbors, $coordinates,
-                                       $neighborhood_search, $parallel)
+                                       $neighborhood_search, $parallelization_backend)
 end
@@ -12,24 +12,26 @@ This is a more realistic benchmark for particle-based simulations than
 However, due to the higher computational cost, differences between neighborhood search
 implementations are less pronounced.
 """
-function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
+function benchmark_n_body(neighborhood_search, coordinates_;
+                          parallelization_backend = default_backend(coordinates_))
     # Passing a different backend like `CUDA.CUDABackend`
     # allows us to change the type of the array to run the benchmark on the GPU.
     # Passing `parallel = true` or `parallel = false` will not change anything here.
-    coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
-    nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
+    coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
+    nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
 
     # This preserves the data type of `coordinates`, which makes it work for GPU types
     mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
     G = 6.6743e-11
 
     dv = similar(coordinates)
 
-    function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search, parallel)
+    function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search,
+                                   parallelization_backend)
         dv .= 0.0
 
-        foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
-                               parallel = parallel) do i, j, pos_diff, distance
+        foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
+                               parallelization_backend) do i, j, pos_diff, distance
             # Only consider particles with a distance > 0
             distance < sqrt(eps()) && return
 
@@ -43,5 +45,6 @@ function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
         return dv
     end
 
-    return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
+    return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs,
+                                            $parallelization_backend)
 end
@@ -36,7 +36,7 @@ include("benchmarks/benchmarks.jl")
 plot_benchmarks(benchmark_count_neighbors, (10, 10), 3)
 """
 function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
-                         parallel = true, title = "",
+                         parallelization_backend = PolyesterBackend(), title = "",
                          seed = 1, perturbation_factor_position = 1.0)
     neighborhood_searches_names = ["TrivialNeighborhoodSearch";;
                                    "GridNeighborhoodSearch";;
@@ -69,7 +69,7 @@ function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
             neighborhood_search = neighborhood_searches[i]
             initialize!(neighborhood_search, coordinates, coordinates)
 
-            time = benchmark(neighborhood_search, coordinates, parallel = parallel)
+            time = benchmark(neighborhood_search, coordinates; parallelization_backend)
             times[iter, i] = time
             time_string = BenchmarkTools.prettytime(time * 1e9)
             println("$(neighborhood_searches_names[i])")
 
@@ -9,7 +9,8 @@ A benchmark of the right-hand side of a full real-life Weakly Compressible
 Smoothed Particle Hydrodynamics (WCSPH) simulation with TrixiParticles.jl.
 This method is used to simulate an incompressible fluid.
 """
-function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
+function benchmark_wcsph(neighborhood_search, coordinates;
+                         parallelization_backend = default_backend(coordinates))
     density = 1000.0
     fluid = InitialCondition(; coordinates, density, mass = 0.1)
 
@@ -34,19 +35,12 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
                                                smoothing_length, viscosity = viscosity,
                                                density_diffusion = density_diffusion)
 
-    # Note that we cannot just disable parallelism in TrixiParticles.
-    # But passing a different backend like `CUDA.CUDABackend`
-    # allows us to change the type of the array to run the benchmark on the GPU.
-    if parallel isa Bool
-        system = fluid_system
-        nhs = neighborhood_search
-    else
-        system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
-        nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
-    end
+    system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
+    nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
 
-    v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
-    u = PointNeighbors.Adapt.adapt(parallel, coordinates)
+    v = PointNeighbors.Adapt.adapt(parallelization_backend,
+                                   vcat(fluid.velocity, fluid.density'))
+    u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
     dv = zero(v)
 
     # Initialize the system
@@ -61,7 +55,8 @@ end
 
 Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
 """
-function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
+function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
+                              parallelization_backend = default_backend(coordinates_))
     coordinates = convert(Matrix{Float32}, coordinates_)
     density = 1000.0f0
     fluid = InitialCondition(; coordinates, density, mass = 0.1f0)
@@ -88,19 +83,12 @@ function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true
                                                acceleration = (0.0f0, 0.0f0, 0.0f0),
                                                density_diffusion = density_diffusion)
 
-    # Note that we cannot just disable parallelism in TrixiParticles.
-    # But passing a different backend like `CUDA.CUDABackend`
-    # allows us to change the type of the array to run the benchmark on the GPU.
-    if parallel isa Bool
-        system = fluid_system
-        nhs = neighborhood_search
-    else
-        system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
-        nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
-    end
+    system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
+    nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
 
-    v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
-    u = PointNeighbors.Adapt.adapt(parallel, coordinates)
+    v = PointNeighbors.Adapt.adapt(parallelization_backend,
+                                   vcat(fluid.velocity, fluid.density'))
+    u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
     dv = zero(v)
 
     # Initialize the system
@@ -117,7 +105,8 @@ A benchmark of the right-hand side of a full real-life Total Lagrangian
 Smoothed Particle Hydrodynamics (TLSPH) simulation with TrixiParticles.jl.
 This method is used to simulate an elastic structure.
 """
-function benchmark_tlsph(neighborhood_search, coordinates; parallel = true)
+function benchmark_tlsph(neighborhood_search, coordinates;
+                         parallelization_backend = default_backend(coordinates))
     material = (density = 1000.0, E = 1.4e6, nu = 0.4)
     solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
 
 
@@ -9,7 +9,8 @@ include("../test/point_cloud.jl")
 
 Benchmark neighborhood search initialization with the given `coordinates`.
 """
-function benchmark_initialize(neighborhood_search, coordinates; parallel = true)
+function benchmark_initialize(neighborhood_search, coordinates;
+                              parallelization_backend = default_backend(coordinates))
     return @belapsed $initialize!($neighborhood_search, $coordinates, $coordinates)
 end
 
@@ -21,7 +22,8 @@ perturbed point clouds.
 
 This is a good benchmark for incremental updates, since most particles stay in their cells.
 """
-function benchmark_update_alternating(neighborhood_search, coordinates; parallel = true)
+function benchmark_update_alternating(neighborhood_search, coordinates;
+                                      parallelization_backend = default_backend(coordinates))
     coordinates2 = copy(coordinates)
     # Perturb all coordinates with a perturbation factor of `0.015`.
     # This factor was tuned so that ~0.5% of the particles change their cell during an
 
@@ -11,8 +11,8 @@ using LinearAlgebra: dot
 using Polyester: Polyester
 @reexport using StaticArrays: SVector
 
-include("util.jl")
 include("vector_of_vectors.jl")
+include("util.jl")
 include("neighborhood_search.jl")
 include("nhs_trivial.jl")
 include("cell_lists/cell_lists.jl")
@@ -25,8 +25,10 @@ export TrivialNeighborhoodSearch, GridNeighborhoodSearch, PrecomputedNeighborhoo
 export DictionaryCellList, FullGridCellList
 export ParallelUpdate, SemiParallelUpdate, SerialIncrementalUpdate, SerialUpdate,
        ParallelIncrementalUpdate
-export requires_update, initialize!, update!, initialize_grid!, update_grid!
-export PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend
+export requires_update
+export initialize!, update!, initialize_grid!, update_grid!
+export SerialBackend, PolyesterBackend, ThreadsDynamicBackend, ThreadsStaticBackend,
+       default_backend
 export PeriodicBox, copy_neighborhood_search
 
 end # module PointNeighbors
@@ -126,19 +126,7 @@ function Base.empty!(cell_list::FullGridCellList)
     (; cells) = cell_list
 
     # `Base.empty!.(cells)`, but for all backends
-    @threaded cells for i in eachindex(cells)
-        emptyat!(cells, i)
-    end
-
-    return cell_list
-end
-
-# TODO Let `@threaded` use `get_backend`
-function Base.empty!(cell_list::FullGridCellList{<:DynamicVectorOfVectors})
-    (; cells) = cell_list
-
-    # `Base.empty!.(cells)`, but for all backends
-    @threaded cells.backend for i in eachindex(cells)
+    @threaded default_backend(cells) for i in eachindex(cells)
         emptyat!(cells, i)
     end
 
@@ -234,6 +222,24 @@ function max_points_per_cell(cells)
     return 100
 end
 
+@inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any,
+                                                                                        <:Array}},
+                                   cell::Tuple)
+    (; linear_indices) = cell_list
+
+    # Make sure that points are not added to the outer padding layer, which is needed
+    # to ensure that neighboring cells in all directions of all non-empty cells exist.
+    if !all(cell[i] in 2:(size(linear_indices, i) - 1) for i in eachindex(cell))
+        size_ = [2:(size(linear_indices, i) - 1) for i in eachindex(cell)]
+        print_size_ = "[$(join(size_, ", "))]"
+        error("particle coordinates are NaN or outside the domain bounds of the cell list\n" *
+              "cell $cell is out of bounds for cell grid of size $print_size_")
+    end
+end
+
+# On GPUs, we can't throw a proper error message because string interpolation is not
+# allowed. Note that we cannot dispatch on `AbstractGPUArray`, as we are inside a kernel,
+# so the array types are something like `CuDeviceArray`, which is not an `AbstractGPUArray`.
 @inline function check_cell_bounds(cell_list::FullGridCellList, cell::Tuple)
     (; linear_indices) = cell_list
 
 
@@ -31,10 +31,3 @@ function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
     return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
                                   cell_size, update_buffer, nhs.update_strategy)
 end
-
-# This is useful to pass the backend directly to `@threaded`
-@inline KernelAbstractions.get_backend(backend::KernelAbstractions.Backend) = backend
-
-@inline function KernelAbstractions.get_backend(vov::DynamicVectorOfVectors)
-    return KernelAbstractions.get_backend(vov.backend)
-end