Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
d50532f
add source term gpu kernel
MarcoArtiano May 17, 2026
453f04d
add dispatch for no source terms
MarcoArtiano May 17, 2026
5b7e0e8
add 1d and 3d source gpu kernels
MarcoArtiano May 17, 2026
158e481
specify signature
MarcoArtiano May 17, 2026
8f8b0ef
add tests
MarcoArtiano May 18, 2026
5add261
Update test/test_amdgpu_2d.jl
JoshuaLampert May 18, 2026
0a55992
Update test/test_amdgpu_2d.jl
JoshuaLampert May 18, 2026
244504d
refactoring
MarcoArtiano May 18, 2026
d897a73
format
MarcoArtiano May 18, 2026
74e140d
refactor also 3D kernels
MarcoArtiano May 18, 2026
7ecf686
Merge branch 'main' into ma/source_gpu
MarcoArtiano May 18, 2026
f1209fe
format
MarcoArtiano May 18, 2026
c978036
fix tests
MarcoArtiano May 18, 2026
5a5948f
fix tests
MarcoArtiano May 19, 2026
d66c664
fix elixirs
MarcoArtiano May 19, 2026
b9bbea5
fix tests
MarcoArtiano May 19, 2026
69f13e4
Merge branch 'main' into ma/source_gpu
MarcoArtiano May 19, 2026
55ded1f
fix tests
MarcoArtiano May 19, 2026
303cd35
Apply suggestions from code review
MarcoArtiano May 19, 2026
8733fdd
rename tests, delete 1D P4est
MarcoArtiano May 19, 2026
01c2764
add kernel abstraction tests
MarcoArtiano May 19, 2026
cf75145
add cuda tests
MarcoArtiano May 19, 2026
20e890d
fix tests
MarcoArtiano May 19, 2026
6d8c9d0
Merge branch 'main' into ma/source_gpu
ranocha May 19, 2026
031528d
fix allocations
MarcoArtiano May 19, 2026
6044e49
fix allocations 3D
MarcoArtiano May 19, 2026
e19fe5f
fix allocations
MarcoArtiano May 19, 2026
8fda9ee
Merge branch 'main' into ma/source_gpu
ranocha May 21, 2026
f3e650d
Apply suggestions from code review
MarcoArtiano May 21, 2026
b15439e
Update test/test_amdgpu_3d.jl
MarcoArtiano May 21, 2026
594eefd
Merge branch 'main' into ma/source_gpu
MarcoArtiano May 21, 2026
7f62e6e
Apply suggestions from code review
MarcoArtiano May 21, 2026
a743ff5
Merge branch 'main' into ma/source_gpu
MarcoArtiano May 21, 2026
d4f6ff9
add comments about storage_type and real_type choices
MarcoArtiano May 21, 2026
d66b690
Merge branch 'main' into ma/source_gpu
MarcoArtiano May 21, 2026
34215b6
Apply suggestions from code review
MarcoArtiano May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions examples/p4est_2d_dgsem/elixir_euler_source_terms.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# The same setup as tree_2d_dgsem/elixir_euler_source_terms.jl
# to verify the P4estMesh implementation against TreeMesh

using OrdinaryDiffEqLowStorageRK
using Trixi

###############################################################################
# semidiscretization of the compressible Euler equations
gamma = 1.4
equations = CompressibleEulerEquations2D(gamma)

initial_condition = initial_condition_convergence_test

# Up to version 0.13.0, `max_abs_speed_naive` was used as the default wave speed estimate of
# `const flux_lax_friedrichs = FluxLaxFriedrichs(), i.e., `FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)`.
# In the `StepsizeCallback`, though, the less diffusive `max_abs_speeds` is employed which is consistent with `max_abs_speed`.
# Thus, we exchanged in PR#2458 the default wave speed used in the LLF flux to `max_abs_speed`.
# To ensure that every example still runs we specify explicitly `FluxLaxFriedrichs(max_abs_speed_naive)`.
# We remark, however, that the now default `max_abs_speed` is in general recommended due to compliance with the
# `StepsizeCallback` (CFL-Condition) and less diffusion.
solver = DGSEM(polydeg = 3, surface_flux = FluxLaxFriedrichs(max_abs_speed_naive))

coordinates_min = (0.0, 0.0)
coordinates_max = (2.0, 2.0)

trees_per_dimension = (16, 16)
mesh = P4estMesh(trees_per_dimension,
polydeg = 3, initial_refinement_level = 0,
coordinates_min = coordinates_min, coordinates_max = coordinates_max,
periodicity = true)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
source_terms = source_terms_convergence_test,
boundary_conditions = boundary_condition_periodic)

###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0, 2.0)
# Create ODE problem with time span from 0.0 to 1.0
# Setting `real_type` allows to change the real number type, e.g., to `Float32`.
# This is particularly useful when changing the `storage_type` to a GPU array
# type such as `ROCArray` (AMD) or `CuArray` (NVIDIA CUDA).
ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing)
Comment thread
ranocha marked this conversation as resolved.
Comment thread
MarcoArtiano marked this conversation as resolved.

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval)

alive_callback = AliveCallback(analysis_interval = analysis_interval)

save_solution = SaveSolutionCallback(interval = 100,
save_initial_solution = true,
save_final_solution = true,
solution_variables = cons2prim)

stepsize_callback = StepsizeCallback(cfl = 1.0)

callbacks = CallbackSet(summary_callback,
analysis_callback, alive_callback,
save_solution,
stepsize_callback)

###############################################################################
# run the simulation

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false);
dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
ode_default_options()..., callback = callbacks);
72 changes: 72 additions & 0 deletions examples/p4est_3d_dgsem/elixir_euler_source_terms.jl
Comment thread
ranocha marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# The same setup as tree_3d_dgsem/elixir_euler_source_terms.jl
# to verify the StructuredMesh implementation against TreeMesh

using OrdinaryDiffEqLowStorageRK
using Trixi

###############################################################################
# semidiscretization of the compressible Euler equations
gamma = 1.4
equations = CompressibleEulerEquations3D(gamma)

initial_condition = initial_condition_convergence_test

# Up to version 0.13.0, `max_abs_speed_naive` was used as the default wave speed estimate of
# `const flux_lax_friedrichs = FluxLaxFriedrichs(), i.e., `FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)`.
# In the `StepsizeCallback`, though, the less diffusive `max_abs_speeds` is employed which is consistent with `max_abs_speed`.
# Thus, we exchanged in PR#2458 the default wave speed used in the LLF flux to `max_abs_speed`.
# To ensure that every example still runs we specify explicitly `FluxLaxFriedrichs(max_abs_speed_naive)`.
# We remark, however, that the now default `max_abs_speed` is in general recommended due to compliance with the
# `StepsizeCallback` (CFL-Condition) and less diffusion.
solver = DGSEM(polydeg = 3, surface_flux = FluxLaxFriedrichs(max_abs_speed_naive),
volume_integral = VolumeIntegralWeakForm())

coordinates_min = (0.0, 0.0, 0.0)
coordinates_max = (2.0, 2.0, 2.0)

trees_per_dimension = (4, 4, 4)

mesh = P4estMesh(trees_per_dimension, polydeg = 3,
coordinates_min = coordinates_min, coordinates_max = coordinates_max,
initial_refinement_level = 1,
periodicity = true)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
source_terms = source_terms_convergence_test,
boundary_conditions = boundary_condition_periodic)

###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0, 5.0)
# Create ODE problem with time span from 0.0 to 1.0
# Setting `real_type` allows to change the real number type, e.g., to `Float32`.
# This is particularly useful when changing the `storage_type` to a GPU array
# type such as `ROCArray` (AMD) or `CuArray` (NVIDIA CUDA).
ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing)
Comment thread
MarcoArtiano marked this conversation as resolved.

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval)

alive_callback = AliveCallback(analysis_interval = analysis_interval)

save_solution = SaveSolutionCallback(interval = 100,
save_initial_solution = true,
save_final_solution = true,
solution_variables = cons2prim)

stepsize_callback = StepsizeCallback(cfl = 0.6)

callbacks = CallbackSet(summary_callback,
analysis_callback, alive_callback,
save_solution,
stepsize_callback)

###############################################################################
# run the simulation

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false);
dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
ode_default_options()..., callback = callbacks);
1 change: 1 addition & 0 deletions src/solvers/dgsem_p4est/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ include("dg_2d_gpu.jl")
include("dg_3d.jl")
include("dg_3d_parabolic.jl")
include("dg_parallel.jl")
include("dg_3d_gpu.jl")

# Subcell limiters
include("subcell_limiters.jl")
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/dgsem_p4est/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,7 @@ function rhs!(du, u, t, u_parent, semis,

# Calculate source terms
@trixi_timeit timer() "source terms" begin
calc_sources!(du, u, t, source_terms, equations, dg, cache)
calc_sources!(backend, du, u, t, source_terms, equations, dg, cache)
end

return nothing
Expand Down
29 changes: 29 additions & 0 deletions src/solvers/dgsem_p4est/dg_2d_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,33 @@ end
apply_jacobian_per_quadrature_node!(du, MeshT, equations, dg, inverse_jacobian,
i, j, element)
end

@kernel function calc_sources_KAkernel!(du, u, t, source_terms,
node_coordinates,
equations::AbstractEquations{2}, dg, cache)
i, j, element = @index(Global, NTuple)
u_local = get_node_vars(u, equations, dg, i, j, element)
x_local = get_node_coords(node_coordinates, equations, dg, i, j, element)

du_local = source_terms(u_local, x_local, t, equations)

add_to_node_vars!(du, du_local, equations, dg, i, j, element)
end

function calc_sources!(backend::Backend, du, u, t, source_terms,
equations::AbstractEquations{2}, dg::DG, cache)
nelements(dg, cache) == 0 && return nothing
@unpack node_coordinates = cache.elements
kernel_cache = kernel_filter_cache(cache)
kernel! = calc_sources_KAkernel!(backend)
kernel!(du, u, t, source_terms, node_coordinates, equations, dg, kernel_cache,
ndrange = (nnodes(dg), nnodes(dg), nelements(dg, cache)))

return nothing
end

function calc_sources!(backend::Backend, du, u, t, source_terms::Nothing,
equations::AbstractEquations{2}, dg::DG, cache)
return nothing
end
end #muladd
76 changes: 0 additions & 76 deletions src/solvers/dgsem_p4est/dg_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,27 +106,6 @@ function prolong2interfaces!(backend::Nothing, cache, u,
return nothing
end

function prolong2interfaces!(backend::Backend, cache, u,
mesh::Union{P4estMesh{3}, T8codeMesh{3}},
equations, dg::DG)
@unpack interfaces = cache
@unpack neighbor_ids, node_indices = cache.interfaces
index_range = eachnode(dg)

kernel! = prolong2interfaces_KAkernel!(backend)
kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices,
index_range,
ndrange = ninterfaces(interfaces))
return nothing
end

@kernel function prolong2interfaces_KAkernel!(interface_u, u, MeshT, equations,
neighbor_ids, node_indices, index_range)
interface = @index(Global)
prolong2interfaces_per_interface!(interface_u, u, MeshT, equations, neighbor_ids,
node_indices, index_range, interface)
end

@inline function prolong2interfaces_per_interface!(u_interface, u,
::Type{<:Union{P4estMesh{3},
T8codeMesh{3}}},
Expand Down Expand Up @@ -223,38 +202,6 @@ function calc_interface_flux!(backend::Nothing, surface_flux_values,
return nothing
end

function calc_interface_flux!(backend::Backend, surface_flux_values,
mesh::Union{P4estMesh{3}, T8codeMesh{3}},
have_nonconservative_terms,
equations, surface_integral, dg::DG, cache)
@unpack neighbor_ids, node_indices = cache.interfaces
@unpack contravariant_vectors = cache.elements
index_range = eachnode(dg)

kernel! = calc_interface_flux_KAkernel!(backend)
kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, equations,
surface_integral, typeof(dg), cache.interfaces.u,
neighbor_ids, node_indices, contravariant_vectors, index_range,
ndrange = ninterfaces(cache.interfaces))
return nothing
end

@kernel function calc_interface_flux_KAkernel!(surface_flux_values, MeshT,
have_nonconservative_terms, equations,
surface_integral, SolverT, u_interface,
neighbor_ids, node_indices,
contravariant_vectors, index_range)
interface = @index(Global)
calc_interface_flux_per_interface!(surface_flux_values,
MeshT,
have_nonconservative_terms,
equations, surface_integral, SolverT,
u_interface,
neighbor_ids, node_indices,
contravariant_vectors,
index_range, interface)
end

@inline function calc_interface_flux_per_interface!(surface_flux_values,
MeshT::Type{<:Union{P4estMesh{3},
T8codeMesh{3}}},
Expand Down Expand Up @@ -1026,29 +973,6 @@ function calc_surface_integral!(backend::Nothing, du, u,
return nothing
end

function calc_surface_integral!(backend::Backend, du, u,
mesh::Union{P4estMesh{3}, T8codeMesh{3}},
equations,
surface_integral::SurfaceIntegralWeakForm,
dg::DGSEM, cache)
@unpack inverse_weights = dg.basis
@unpack surface_flux_values = cache.elements

kernel! = calc_surface_integral_KAkernel!(backend)
kernel!(du, typeof(mesh), equations, surface_integral, dg, inverse_weights[1],
surface_flux_values, ndrange = nelements(cache.elements))
return nothing
end

@kernel function calc_surface_integral_KAkernel!(du, MeshT, equations,
surface_integral, dg, factor,
surface_flux_values)
element = @index(Global)
calc_surface_integral_per_element!(du, MeshT,
equations, surface_integral, dg, factor,
surface_flux_values, element)
end

@inline function calc_surface_integral_per_element!(du,
::Type{<:Union{P4estMesh{3},
T8codeMesh{3}}},
Expand Down
Loading
Loading