Skip to content
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
eeee0d2
add combined for mpi 3D
MarcoArtiano Jun 4, 2026
8d3a6a4
Update src/solvers/dgsem_p4est/dg_3d_parallel.jl
MarcoArtiano Jun 4, 2026
7cb76a3
Update test/test_mpi_p4est_3d.jl
MarcoArtiano Jun 4, 2026
42f0b4a
add nonconservative volume kernel and boundary flux
MarcoArtiano Jun 4, 2026
6e2e66a
add tests
MarcoArtiano Jun 4, 2026
e0f0466
move flux definitions outside
MarcoArtiano Jun 4, 2026
e949772
Merge branch 'ma/combined_noncons_mpi' of github.com:trixi-framework/…
MarcoArtiano Jun 4, 2026
639e967
fix tests
MarcoArtiano Jun 4, 2026
f5e429e
Update src/solvers/dgsem_p4est/dg_3d_parallel.jl
MarcoArtiano Jun 4, 2026
92b8a2e
create elixir
MarcoArtiano Jun 11, 2026
3344c55
git pull Merge branch 'ma/combined_noncons_mpi' of github.com:trixi-f…
MarcoArtiano Jun 11, 2026
6c4ee51
Merge branch 'main' into ma/combined_noncons_mpi
MarcoArtiano Jun 11, 2026
a4572ed
Update test/test_mpi_p4est_3d.jl
MarcoArtiano Jun 11, 2026
da3b66c
add comments and fix bc
MarcoArtiano Jun 11, 2026
78e5543
Apply suggestions from code review
MarcoArtiano Jun 11, 2026
d78e3e4
Apply suggestions from code review
MarcoArtiano Jun 11, 2026
8888ee5
add elixir and combined boundary flux
MarcoArtiano Jun 11, 2026
e329549
Merge branch 'ma/combined_noncons_mpi' into ma/noncons_gpu
MarcoArtiano Jun 11, 2026
6a63f24
Apply suggestions from code review
MarcoArtiano Jun 11, 2026
9742f3d
Update src/solvers/dgsem_p4est/dg_3d_gpu.jl
MarcoArtiano Jun 12, 2026
6981dc0
Merge branch 'main' into ma/noncons_gpu
MarcoArtiano Jun 12, 2026
c805e1f
fix tests
MarcoArtiano Jun 13, 2026
05ccb4b
Merge branch 'ma/noncons_gpu' of github.com:trixi-framework/Trixi.jl …
MarcoArtiano Jun 13, 2026
4d0b2ac
Merge branch 'main' into ma/noncons_gpu
MarcoArtiano Jun 13, 2026
f13a7e5
fix type glm speed callback
MarcoArtiano Jun 13, 2026
f2200a5
Merge branch 'ma/noncons_gpu' of github.com:trixi-framework/Trixi.jl …
MarcoArtiano Jun 13, 2026
4c977bb
remove show statements
MarcoArtiano Jun 13, 2026
ba7ebbc
Merge branch 'main' into ma/noncons_gpu
MarcoArtiano Jun 13, 2026
ac11959
Update src/callbacks_step/glm_speed.jl
MarcoArtiano Jun 13, 2026
1079063
Merge branch 'main' into ma/noncons_gpu
MarcoArtiano Jun 15, 2026
24d689a
Merge branch 'main' into ma/noncons_gpu
MarcoArtiano Jun 22, 2026
4bea090
remove callbacks from tests
MarcoArtiano Jun 22, 2026
49d5c4d
add analysis callback
MarcoArtiano Jun 22, 2026
9051023
increase gpu allowed allocations
MarcoArtiano Jun 22, 2026
89e51cf
override default intergrals
MarcoArtiano Jun 22, 2026
0f3e153
remove false true pattern for nonconservative systems
MarcoArtiano Jun 22, 2026
80dfd81
Update src/callbacks_step/glm_speed.jl
MarcoArtiano Jun 23, 2026
66110be
remove obsolete comment
MarcoArtiano Jun 23, 2026
57a1a54
Merge branch 'main' into ma/noncons_gpu
MarcoArtiano Jun 23, 2026
13e9818
restore save solution callback
MarcoArtiano Jun 23, 2026
3feed70
Merge branch 'main' into ma/noncons_gpu
ranocha Jun 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -233,18 +233,16 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
# ODE solvers, callbacks etc.

tspan = (0.0, 1.0)
ode = semidiscretize(semi, tspan)
ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing)

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
alive_callback = AliveCallback(analysis_interval = analysis_interval)

save_solution = SaveSolutionCallback(interval = 100,
save_initial_solution = true,
save_final_solution = true,
solution_variables = cons2prim)
analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
analysis_integrals = (entropy,))

alive_callback = AliveCallback(analysis_interval = analysis_interval)

cfl = 1.0
stepsize_callback = StepsizeCallback(cfl = cfl)
Expand All @@ -253,7 +251,6 @@ glm_speed_callback = GlmSpeedCallback(glm_scale = 0.5, cfl = cfl)

callbacks = CallbackSet(summary_callback,
analysis_callback, alive_callback,
save_solution,
Comment thread
ranocha marked this conversation as resolved.
stepsize_callback,
glm_speed_callback)

Expand Down
6 changes: 3 additions & 3 deletions src/callbacks_step/glm_speed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function Base.show(io::IO, ::MIME"text/plain",
end
end

function GlmSpeedCallback(; glm_scale = 0.5, cfl, semi_indices = Int[])
function GlmSpeedCallback(; glm_scale = 0.5f0, cfl, semi_indices = Int[])
@assert 0<=glm_scale<=1 "glm_scale must be between 0 and 1"

cfl_function = isa(cfl, Real) ? Returns(cfl) : cfl
Expand All @@ -86,9 +86,9 @@ function update_cleaning_speed!(semi, glm_speed_callback, dt, t)
@unpack glm_scale, cfl = glm_speed_callback

mesh, equations, solver, cache = mesh_equations_solver_cache(semi)

# compute time step for GLM linear advection equation with c_h=1 (redone due to the possible AMR)
c_h_deltat = calc_dt_for_cleaning_speed(cfl(t), mesh, equations, solver, cache)
c_h_deltat = calc_dt_for_cleaning_speed(cfl(t), mesh, equations, solver,
cache)
Comment thread
MarcoArtiano marked this conversation as resolved.
Outdated

# c_h is proportional to its own time step divided by the complete MHD time step
# We use @reset here since the equations are immutable (to work on GPUs etc.).
Expand Down
193 changes: 183 additions & 10 deletions src/solvers/dgsem_p4est/dg_3d_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
combine_conservative_and_nonconservative_fluxes(volume_integral.volume_flux,
equations),
dg,
volume_integral, Val(NNODES),
volume_integral.volume_flux, Val(NNODES),
derivative_split,
contravariant_vectors,
ndrange = (NNODES, NNODES, NNODES, nelements(dg, cache)))
Expand All @@ -33,7 +33,7 @@ end
have_nonconservative_terms::False,
combine_conservative_and_nonconservative_fluxes::False,
dg::DGSEM,
volume_integral,
volume_flux,
::Val{NNODES},
derivative_split,
contravariant_vectors,
Expand All @@ -42,8 +42,6 @@ end
# This can (hopefully) be optimized away due to constant propagation.
i, j, k, element = @index(Global, NTuple)

@unpack volume_flux = volume_integral

u_node = get_node_vars(u, equations, dg, i, j, k, element)

# pull the contravariant vectors in each coordinate direction
Expand Down Expand Up @@ -137,6 +135,117 @@ end
end
end

@kernel function flux_differencing_KAkernel!(du, u, equations,
MeshT::Type{<:Union{P4estMesh{3},
T8codeMesh{3}}},
have_nonconservative_terms::True,
combine_conservative_and_nonconservative_fluxes::True,
dg::DGSEM,
volume_flux,
::Val{NNODES},
derivative_split,
contravariant_vectors,
alpha = true) where {NNODES}
# `true * [some floating point value] == [exactly the same floating point value]`
# This can (hopefully) be optimized away due to constant propagation.
i, j, k, element = @index(Global, NTuple)

u_node = get_node_vars(u, equations, dg, i, j, k, element)

# pull the contravariant vectors in each coordinate direction
Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)

# All diagonal entries of `derivative_split` are zero. Thus, we can skip
# the computation of the diagonal terms. In addition, we use the symmetry
# of the `volume_flux` to save half of the possible two-point flux
# computations.
#
# Instead of assigning thread i the partners i+1, …, N,
# we distribute the half-sweep cyclically: each thread visits
# half = div(N,2) partners at a fixed rotating offset.
# Every unordered pair is still covered exactly
# once, but now every thread performs the same number of loop iterations.
# When N is even (odd polynomial degree) the antipodal pair at
# offset half is shared by two threads, so its contribution is weighted by
# 1/2 to avoid double counting.
#
# See Section 4.1 (Eq. 6) of
# - Waterhouse, Waruszewski, Wilcox, Giraldo (2026)
# GPU Performance of an Entropy-Stable Discontinuous Galerkin Euler Solver
# with Non-Conservative Terms.
# arXiv (pre-print): https://arxiv.org/abs/2605.16684

half_nnodes = div(NNODES, 2)
even_nodes = iseven(NNODES)

KernelAbstractions.Extras.@unroll for offset in 1:half_nnodes
# weight the antipodal pair by 1/2 only when the number of nodes is even
weight = (even_nodes && offset == half_nnodes) ? 0.5f0 : 1.0f0

# first coordinate direction: rotate the partner index along `i`
ii = mod(i - 1 + offset, NNODES) + 1
u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
# pull the contravariant vectors and compute the average
Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors,
ii, j, k, element)
Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
# compute the contravariant volume flux in the direction of the
# averaged contravariant vector
fluxtilde1_left, fluxtilde1_right = volume_flux(u_node, u_node_ii, Ja1_avg,
Comment thread
ranocha marked this conversation as resolved.
equations)
multiply_add_to_first_axis_atomic!(du,
weight * alpha * derivative_split[i, ii],
fluxtilde1_left,
i, j, k, element)
multiply_add_to_first_axis_atomic!(du,
weight * alpha * derivative_split[ii, i],
fluxtilde1_right,
ii, j, k, element)

# second coordinate direction: rotate the partner index along `j`
jj = mod(j - 1 + offset, NNODES) + 1
u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
# pull the contravariant vectors and compute the average
Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors,
i, jj, k, element)
Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
# compute the contravariant volume flux in the direction of the
# averaged contravariant vector
fluxtilde2_left, fluxtilde2_right = volume_flux(u_node, u_node_jj, Ja2_avg,
equations)
multiply_add_to_first_axis_atomic!(du,
weight * alpha * derivative_split[j, jj],
fluxtilde2_left,
i, j, k, element)
multiply_add_to_first_axis_atomic!(du,
weight * alpha * derivative_split[jj, j],
fluxtilde2_right,
i, jj, k, element)

# third coordinate direction: rotate the partner index along `k`
kk = mod(k - 1 + offset, NNODES) + 1
u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
# pull the contravariant vectors and compute the average
Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors,
i, j, kk, element)
Ja3_avg = 0.5f0 * (Ja3_node + Ja3_node_kk)
# compute the contravariant volume flux in the direction of the
# averaged contravariant vector
fluxtilde3_left, fluxtilde3_right = volume_flux(u_node, u_node_kk, Ja3_avg,
equations)
multiply_add_to_first_axis_atomic!(du,
weight * alpha * derivative_split[k, kk],
fluxtilde3_left,
i, j, k, element)
multiply_add_to_first_axis_atomic!(du,
weight * alpha * derivative_split[kk, k],
fluxtilde3_right,
i, j, kk, element)
end
end

function prolong2interfaces!(backend::Backend, cache, u,
mesh::Union{P4estMesh{3}, T8codeMesh{3}},
equations, dg::DG)
Expand Down Expand Up @@ -372,30 +481,94 @@ end
surface_integral, dg, cache,
i_index, j_index, k_index, i_node_index,
j_node_index,
direction_index, element,
boundary, node_coordinates,
direction_index, element_index,
boundary_index, node_coordinates,
Comment thread
ranocha marked this conversation as resolved.
contravariant_vectors)
@unpack surface_flux = surface_integral

# Extract solution data from boundary container
u_inner = get_node_vars(u, equations, dg, i_node_index, j_node_index, boundary)
u_inner = get_node_vars(u, equations, dg, i_node_index, j_node_index,
boundary_index)

# Outward-pointing normal direction (not normalized)
normal_direction = get_normal_direction(direction_index, contravariant_vectors,
i_index, j_index, k_index, element)
i_index, j_index, k_index, element_index)

# Coordinates at boundary node
x = get_node_coords(node_coordinates, equations, dg,
i_index, j_index, k_index, element)
i_index, j_index, k_index, element_index)

flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations)

# Copy flux to element storage in the correct orientation
for v in eachvariable(equations)
surface_flux_values[v, i_node_index, j_node_index, direction_index, element] = flux_[v]
surface_flux_values[v, i_node_index, j_node_index, direction_index, element_index] = flux_[v]
end
end

@inline function calc_boundary_flux!(u, surface_flux_values, t, boundary_condition,
MeshT::Type{<:Union{P4estMesh{3},
T8codeMesh{3}}},
have_nonconservative_terms::True, equations,
surface_integral, dg, cache, i_index, j_index,
k_index, i_node_index, j_node_index,
direction_index,
element_index, boundary_index, node_coordinates,
contravariant_vectors)
calc_boundary_flux!(u, surface_flux_values, t, boundary_condition, MeshT,
have_nonconservative_terms,
combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux,
equations),
equations,
surface_integral, dg, cache,
i_index, j_index, k_index, i_node_index, j_node_index,
direction_index, element_index, boundary_index,
node_coordinates, contravariant_vectors)
return nothing
end

@inline function calc_boundary_flux!(u, surface_flux_values, t, boundary_condition,
MeshT::Type{<:Union{P4estMesh{3},
T8codeMesh{3}}},
have_nonconservative_terms::True,
combine_conservative_and_nonconservative_fluxes::True,
equations,
surface_integral, dg::DG, cache, i_index, j_index,
k_index, i_node_index, j_node_index,
direction_index,
element_index, boundary_index, node_coordinates,
contravariant_vectors)
Comment thread
ranocha marked this conversation as resolved.
@unpack surface_flux = surface_integral

# Extract solution data from boundary container
u_inner = get_node_vars(u, equations, dg, i_node_index, j_node_index,
boundary_index)

# Outward-pointing normal direction (not normalized)
normal_direction = get_normal_direction(direction_index, contravariant_vectors,
i_index, j_index, k_index, element_index)

# Coordinates at boundary node
x = get_node_coords(node_coordinates, equations, dg,
i_index, j_index, k_index, element_index)

# Call pointwise numerical flux functions for the conservative and nonconservative part
# in the normal direction on the boundary
flux = boundary_condition(u_inner, normal_direction, x, t,
surface_flux, equations)

# Copy flux to element storage in the correct orientation
for v in eachvariable(equations)
# Note the factor 0.5 necessary for the nonconservative fluxes based on
# the interpretation of global SBP operators coupled discontinuously via
# central fluxes/SATs
Comment thread
ranocha marked this conversation as resolved.
Outdated
Comment thread
MarcoArtiano marked this conversation as resolved.
Outdated
surface_flux_values[v, i_node_index, j_node_index,
direction_index, element_index] = flux[v]
end
Comment thread
ranocha marked this conversation as resolved.

return nothing
end

function calc_surface_integral!(backend::Backend, du, u,
mesh::Union{P4estMesh{3}, T8codeMesh{3}},
equations,
Expand Down
Loading
Loading