Skip to content

Commit 40bc128

Browse files
authored
Merge branch 'main' into ef/localmem-kernel
2 parents 80cf700 + b6d11e6 commit 40bc128

31 files changed

+943
-391
lines changed

.github/workflows/FormatCheck.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
#
2626
# julia -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter", version = "0.13.0"))'
2727
run: |
28-
julia -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter", version="1.0.62"))'
28+
julia -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter", version="2.1.2"))'
2929
julia -e 'using JuliaFormatter; format(".")'
3030
- name: Format check
3131
run: |

.github/workflows/SpellCheck.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ jobs:
1010
- name: Checkout Actions Repository
1111
uses: actions/checkout@v4
1212
- name: Check spelling
13-
uses: crate-ci/typos@v1.28.1
13+
uses: crate-ci/typos@v1.31.2

.github/workflows/ci.yml

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,39 +34,37 @@ concurrency:
3434

3535
jobs:
3636
build:
37-
name: Run Tests (Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }})
38-
runs-on: ubuntu-latest
37+
name: Run Tests (Julia ${{ matrix.version }} - ${{ matrix.os }})
38+
runs-on: ${{ matrix.os }}
3939
strategy:
4040
# Don't cancel all running jobs when one job fails
4141
fail-fast: false
4242
matrix:
4343
version:
44-
- '1.9'
45-
- '1'
44+
- 'min' # the earliest supported version compatible with the project
45+
- '1' # the latest stable 1.x release
4646
os:
4747
- ubuntu-latest
48-
arch:
49-
- x64
50-
include:
51-
# Also run tests on Windows and macOS-ARM, but only with the latest Julia version
52-
- version: '1'
53-
os: windows-latest
54-
arch: x64
55-
- version: '1'
56-
os: macos-14
57-
arch: arm64
48+
- windows-latest
49+
- macos-14
5850

5951
steps:
6052
- name: Check out project
6153
uses: actions/checkout@v4
54+
6255
- name: Set up Julia
6356
uses: julia-actions/setup-julia@v2
6457
with:
6558
version: ${{ matrix.version }}
66-
- run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
59+
60+
- name: Display Julia version
61+
run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
62+
6763
- uses: julia-actions/cache@v2
64+
6865
- name: Build package
6966
uses: julia-actions/julia-buildpkg@v1
67+
7068
- name: Run unit tests
7169
uses: julia-actions/julia-runtest@v1
7270
with:
@@ -75,12 +73,14 @@ jobs:
7573
coverage: ${{ matrix.os == 'ubuntu-latest' && matrix.version == '1' }}
7674
env:
7775
POINTNEIGHBORS_TEST: unit
76+
7877
- name: Process coverage results
7978
# Only run coverage in one Job (Ubuntu and latest Julia version)
8079
if: matrix.os == 'ubuntu-latest' && matrix.version == '1'
8180
uses: julia-actions/julia-processcoverage@v1
8281
with:
8382
directories: src,test
83+
8484
- name: Upload coverage report to Codecov
8585
# Only run coverage in one Job (Ubuntu and latest Julia version)
8686
if: matrix.os == 'ubuntu-latest' && matrix.version == '1'
@@ -91,6 +91,7 @@ jobs:
9191
flags: unit
9292
env:
9393
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
94+
9495
- name: Run benchmark tests
9596
uses: julia-actions/julia-runtest@v1
9697
with:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ coverage_report/
99
*.jl.*.cov
1010
.vscode/
1111
run
12+
out/*
1213

1314
.DS_Store
1415

Project.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PointNeighbors"
22
uuid = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
33
authors = ["Erik Faulhaber <erik.faulhaber@uni-koeln.de>"]
4-
version = "0.4.6-dev"
4+
version = "0.6.3"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -14,12 +14,12 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1414
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
1515

1616
[compat]
17-
Adapt = "3, 4"
18-
Atomix = "0.1, 1"
19-
GPUArraysCore = "0.1, 0.2"
17+
Adapt = "4"
18+
Atomix = "1"
19+
GPUArraysCore = "0.2"
2020
KernelAbstractions = "0.9"
2121
LinearAlgebra = "1"
2222
Polyester = "0.7.5"
2323
Reexport = "1"
2424
StaticArrays = "1"
25-
julia = "1.9"
25+
julia = "1.10"

benchmarks/count_neighbors.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,20 @@ implementations are highlighted. On the other hand, this is the least realistic
1212
1313
For a computationally heavier benchmark, see [`benchmark_n_body`](@ref).
1414
"""
15-
function benchmark_count_neighbors(neighborhood_search, coordinates; parallel = true)
15+
function benchmark_count_neighbors(neighborhood_search, coordinates;
16+
parallelization_backend = default_backend(coordinates))
1617
n_neighbors = zeros(Int, size(coordinates, 2))
1718

18-
function count_neighbors!(n_neighbors, coordinates, neighborhood_search, parallel)
19+
function count_neighbors!(n_neighbors, coordinates, neighborhood_search,
20+
parallelization_backend)
1921
n_neighbors .= 0
2022

21-
foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
22-
parallel = parallel) do i, _, _, _
23+
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
24+
parallelization_backend) do i, _, _, _
2325
n_neighbors[i] += 1
2426
end
2527
end
2628

2729
return @belapsed $count_neighbors!($n_neighbors, $coordinates,
28-
$neighborhood_search, $parallel)
30+
$neighborhood_search, $parallelization_backend)
2931
end

benchmarks/n_body.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,26 @@ This is a more realistic benchmark for particle-based simulations than
1212
However, due to the higher computational cost, differences between neighborhood search
1313
implementations are less pronounced.
1414
"""
15-
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
15+
function benchmark_n_body(neighborhood_search, coordinates_;
16+
parallelization_backend = default_backend(coordinates_))
1617
# Passing a different backend like `CUDA.CUDABackend`
1718
# allows us to change the type of the array to run the benchmark on the GPU.
1819
# Passing `parallel = true` or `parallel = false` will not change anything here.
19-
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
20-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
20+
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
21+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
2122

2223
# This preserves the data type of `coordinates`, which makes it work for GPU types
2324
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
2425
G = 6.6743e-11
2526

2627
dv = similar(coordinates)
2728

28-
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search, parallel)
29+
function compute_acceleration!(dv, coordinates, mass, G, neighborhood_search,
30+
parallelization_backend)
2931
dv .= 0.0
3032

31-
foreach_point_neighbor(coordinates, coordinates, neighborhood_search,
32-
parallel = parallel) do i, j, pos_diff, distance
33+
foreach_point_neighbor(coordinates, coordinates, neighborhood_search;
34+
parallelization_backend) do i, j, pos_diff, distance
3335
# Only consider particles with a distance > 0
3436
distance < sqrt(eps()) && return
3537

@@ -43,5 +45,6 @@ function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
4345
return dv
4446
end
4547

46-
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
48+
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs,
49+
$parallelization_backend)
4750
end

benchmarks/plot.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ include("benchmarks/benchmarks.jl")
3636
plot_benchmarks(benchmark_count_neighbors, (10, 10), 3)
3737
"""
3838
function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
39-
parallel = true, title = "",
39+
parallelization_backend = PolyesterBackend(), title = "",
4040
seed = 1, perturbation_factor_position = 1.0)
4141
neighborhood_searches_names = ["TrivialNeighborhoodSearch";;
4242
"GridNeighborhoodSearch";;
@@ -69,7 +69,7 @@ function plot_benchmarks(benchmark, n_points_per_dimension, iterations;
6969
neighborhood_search = neighborhood_searches[i]
7070
initialize!(neighborhood_search, coordinates, coordinates)
7171

72-
time = benchmark(neighborhood_search, coordinates, parallel = parallel)
72+
time = benchmark(neighborhood_search, coordinates; parallelization_backend)
7373
times[iter, i] = time
7474
time_string = BenchmarkTools.prettytime(time * 1e9)
7575
println("$(neighborhood_searches_names[i])")

benchmarks/smoothed_particle_hydrodynamics.jl

Lines changed: 55 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,38 @@ using PointNeighbors
22
using TrixiParticles
33
using BenchmarkTools
44

5+
# Create a dummy semidiscretization type to be able to use a specific neighborhood search
6+
struct DummySemidiscretization{N, P}
7+
neighborhood_search :: N
8+
parallelization_backend :: P
9+
end
10+
11+
@inline function PointNeighbors.parallel_foreach(f, iterator, semi::DummySemidiscretization)
12+
PointNeighbors.parallel_foreach(f, iterator, semi.parallelization_backend)
13+
end
14+
15+
@inline function TrixiParticles.get_neighborhood_search(_, _, semi::DummySemidiscretization)
16+
return semi.neighborhood_search
17+
end
18+
19+
@inline function TrixiParticles.get_neighborhood_search(_, semi::DummySemidiscretization)
20+
return semi.neighborhood_search
21+
end
22+
523
"""
624
benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
725
826
A benchmark of the right-hand side of a full real-life Weakly Compressible
927
Smoothed Particle Hydrodynamics (WCSPH) simulation with TrixiParticles.jl.
1028
This method is used to simulate an incompressible fluid.
1129
"""
12-
function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
30+
function benchmark_wcsph(neighborhood_search, coordinates;
31+
parallelization_backend = default_backend(coordinates))
1332
density = 1000.0
1433
fluid = InitialCondition(; coordinates, density, mass = 0.1)
1534

16-
# Compact support == smoothing length for the Wendland kernel
17-
smoothing_length = PointNeighbors.search_radius(neighborhood_search)
35+
# Compact support == 2 * smoothing length for these kernels
36+
smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2
1837
if ndims(neighborhood_search) == 1
1938
smoothing_kernel = SchoenbergCubicSplineKernel{1}()
2039
else
@@ -34,40 +53,36 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
3453
smoothing_length, viscosity = viscosity,
3554
density_diffusion = density_diffusion)
3655

37-
# Note that we cannot just disable parallelism in TrixiParticles.
38-
# But passing a different backend like `CUDA.CUDABackend`
39-
# allows us to change the type of the array to run the benchmark on the GPU.
40-
if parallel isa Bool
41-
system = fluid_system
42-
nhs = neighborhood_search
43-
else
44-
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
45-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
46-
end
56+
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
57+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
58+
semi = DummySemidiscretization(nhs, parallelization_backend)
4759

48-
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
49-
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
60+
v = PointNeighbors.Adapt.adapt(parallelization_backend,
61+
vcat(fluid.velocity, fluid.density'))
62+
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
5063
dv = zero(v)
5164

5265
# Initialize the system
53-
TrixiParticles.initialize!(system, nhs)
54-
TrixiParticles.compute_pressure!(system, v)
66+
TrixiParticles.initialize!(system, semi)
67+
TrixiParticles.compute_pressure!(system, v, semi)
5568

56-
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
69+
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $system, $system, $semi)
5770
end
5871

5972
"""
6073
benchmark_wcsph_fp32(neighborhood_search, coordinates; parallel = true)
6174
6275
Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
6376
"""
64-
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
77+
function benchmark_wcsph_fp32(neighborhood_search, coordinates_;
78+
parallelization_backend = default_backend(coordinates_))
6579
coordinates = convert(Matrix{Float32}, coordinates_)
6680
density = 1000.0f0
6781
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)
6882

69-
# Compact support == smoothing length for the Wendland kernel
70-
smoothing_length = convert(Float32, PointNeighbors.search_radius(neighborhood_search))
83+
# Compact support == 2 * smoothing length for these kernels
84+
smoothing_length = convert(Float32,
85+
PointNeighbors.search_radius(neighborhood_search) / 2)
7186
if ndims(neighborhood_search) == 1
7287
smoothing_kernel = SchoenbergCubicSplineKernel{1}()
7388
else
@@ -85,29 +100,24 @@ function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true
85100
fluid_system = WeaklyCompressibleSPHSystem(fluid, fluid_density_calculator,
86101
state_equation, smoothing_kernel,
87102
smoothing_length, viscosity = viscosity,
88-
acceleration = (0.0f0, 0.0f0, 0.0f0),
103+
acceleration = ntuple(_ -> 0.0f0,
104+
Val(ndims(neighborhood_search))),
89105
density_diffusion = density_diffusion)
90106

91-
# Note that we cannot just disable parallelism in TrixiParticles.
92-
# But passing a different backend like `CUDA.CUDABackend`
93-
# allows us to change the type of the array to run the benchmark on the GPU.
94-
if parallel isa Bool
95-
system = fluid_system
96-
nhs = neighborhood_search
97-
else
98-
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
99-
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
100-
end
107+
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
108+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
109+
semi = DummySemidiscretization(nhs, parallelization_backend)
101110

102-
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
103-
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
111+
v = PointNeighbors.Adapt.adapt(parallelization_backend,
112+
vcat(fluid.velocity, fluid.density'))
113+
u = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates)
104114
dv = zero(v)
105115

106116
# Initialize the system
107-
TrixiParticles.initialize!(system, nhs)
108-
TrixiParticles.compute_pressure!(system, v)
117+
TrixiParticles.initialize!(system, semi)
118+
TrixiParticles.compute_pressure!(system, v, semi)
109119

110-
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
120+
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $system, $system, $semi)
111121
end
112122

113123
"""
@@ -117,12 +127,13 @@ A benchmark of the right-hand side of a full real-life Total Lagrangian
117127
Smoothed Particle Hydrodynamics (TLSPH) simulation with TrixiParticles.jl.
118128
This method is used to simulate an elastic structure.
119129
"""
120-
function benchmark_tlsph(neighborhood_search, coordinates; parallel = true)
130+
function benchmark_tlsph(neighborhood_search, coordinates;
131+
parallelization_backend = default_backend(coordinates))
121132
material = (density = 1000.0, E = 1.4e6, nu = 0.4)
122133
solid = InitialCondition(; coordinates, density = material.density, mass = 0.1)
123134

124-
# Compact support == smoothing length for the Wendland kernel
125-
smoothing_length = PointNeighbors.search_radius(neighborhood_search)
135+
# Compact support == 2 * smoothing length for these kernels
136+
smoothing_length = PointNeighbors.search_radius(neighborhood_search) / 2
126137
if ndims(neighborhood_search) == 1
127138
smoothing_kernel = SchoenbergCubicSplineKernel{1}()
128139
else
@@ -131,14 +142,15 @@ function benchmark_tlsph(neighborhood_search, coordinates; parallel = true)
131142

132143
solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_length,
133144
material.E, material.nu)
145+
semi = DummySemidiscretization(neighborhood_search, parallelization_backend)
134146

135147
v = copy(solid.velocity)
136148
u = copy(solid.coordinates)
137149
dv = zero(v)
138150

139151
# Initialize the system
140-
TrixiParticles.initialize!(solid_system, neighborhood_search)
152+
TrixiParticles.initialize!(solid_system, semi)
141153

142-
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $neighborhood_search,
143-
$solid_system, $solid_system)
154+
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u,
155+
$solid_system, $solid_system, $semi)
144156
end

0 commit comments

Comments
 (0)