Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
610c7e4
add first working version
MarcoArtiano Jun 18, 2026
8555250
add tests
MarcoArtiano Jun 18, 2026
eb860cf
format
MarcoArtiano Jun 18, 2026
ab3063a
minor fix
MarcoArtiano Jun 18, 2026
f3bc84d
nonconservative unstable kernel
MarcoArtiano Jun 19, 2026
6567042
format
MarcoArtiano Jun 19, 2026
3d06855
fix nonconservative kernel + tests
MarcoArtiano Jun 19, 2026
690bfd4
increase readability
MarcoArtiano Jun 19, 2026
8bba097
fix tests
MarcoArtiano Jun 19, 2026
8256ff1
rename normal to normal direction
MarcoArtiano Jun 19, 2026
160023b
explicit AbstractSIMD
MarcoArtiano Jun 19, 2026
61e96fa
Update src/solvers/dgsem_structured/dg_3d_turbo.jl
MarcoArtiano Jun 19, 2026
ac3a2d8
Update src/solvers/dgsem_structured/dg_3d_turbo.jl
MarcoArtiano Jun 19, 2026
80a9813
simplify flux args
MarcoArtiano Jun 19, 2026
4a0a3c1
add comments
MarcoArtiano Jun 20, 2026
2a8c436
remove nonconservative part
MarcoArtiano Jun 20, 2026
86ffed2
minor change
MarcoArtiano Jun 20, 2026
9e731e5
Merge branch 'main' into ma/generated_turbo
MarcoArtiano Jun 20, 2026
8c718d4
move fluxes to numerical fluxes
MarcoArtiano Jun 20, 2026
11e3551
change export position
MarcoArtiano Jun 20, 2026
d34c047
fix typo
MarcoArtiano Jun 20, 2026
93f0373
Update src/equations/numerical_fluxes.jl
MarcoArtiano Jun 20, 2026
72547ef
Merge branch 'main' into ma/generated_turbo
MarcoArtiano Jun 22, 2026
3659c1e
Apply suggestions from code review
MarcoArtiano Jun 23, 2026
4c9dedb
apply some code review suggestions
MarcoArtiano Jun 23, 2026
7a51c08
more robust tests
MarcoArtiano Jun 23, 2026
2a1f47a
remove inner constructor
MarcoArtiano Jun 23, 2026
c2fe52d
add news and minor changes
MarcoArtiano Jun 23, 2026
18c98d3
Merge branch 'main' into ma/generated_turbo
MarcoArtiano Jun 23, 2026
5eac91d
improve docstring
ranocha Jun 23, 2026
8446f34
fix tests
ranocha Jun 23, 2026
d70ceab
fix name
ranocha Jun 23, 2026
8a61bb7
Apply suggestions from code review
MarcoArtiano Jun 23, 2026
4967a12
add fallback tests, fix name
MarcoArtiano Jun 24, 2026
313e7b1
Merge branch 'main' into ma/generated_turbo
MarcoArtiano Jun 24, 2026
9d064d8
Apply suggestions from code review
MarcoArtiano Jun 24, 2026
cd37459
Apply suggestions from code review
MarcoArtiano Jun 24, 2026
59fa643
Update test/test_performance_specializations_3d.jl
MarcoArtiano Jun 24, 2026
1d45aeb
Update test/test_performance_specializations_3d.jl
MarcoArtiano Jun 24, 2026
47cc826
Apply suggestions from code review
MarcoArtiano Jun 25, 2026
d4b6d63
Merge branch 'main' into ma/generated_turbo
MarcoArtiano Jun 25, 2026
cc92253
Update test/test_performance_specializations_3d.jl
MarcoArtiano Jun 25, 2026
19885c9
Update examples/tree_1d_dgsem/elixir_euler_modified_sod.jl
MarcoArtiano Jun 26, 2026
050943e
rename volume flux turbo in flux turbo
MarcoArtiano Jun 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Trixi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle,
FluxRotated,
flux_shima_etal_turbo, flux_ranocha_turbo,
FluxUpwind,
FluxTracerEquationsCentral
FluxTracerEquationsCentral, FluxVolumeTurbo

export splitting_steger_warming, splitting_vanleer_haenel,
splitting_coirier_vanleer, splitting_lax_friedrichs,
Expand Down
29 changes: 28 additions & 1 deletion src/auxiliary/math.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using LoopVectorization: AbstractSIMD
Comment thread
MarcoArtiano marked this conversation as resolved.
Outdated
# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
# Since these FMAs can increase the performance of many numerical algorithms,
# we need to opt-in explicitly.
Expand Down Expand Up @@ -135,7 +136,7 @@ end
See also [`Trixi.set_log_type!`](@ref).
"""
@inline log(x::Real) = x < zero(x) ? oftype(x, NaN) : Base.log(x)

@inline log(x::AbstractSIMD) = ifelse(x < zero(x), oftype(x, NaN), Base.log(x))
@inline log(x::Float64) = ccall("llvm.log.f64", llvmcall, Float64, (Float64,), x)
@inline log(x::Float32) = ccall("llvm.log.f32", llvmcall, Float32, (Float32,), x)
@inline log(x::Float16) = ccall("llvm.log.f16", llvmcall, Float16, (Float16,), x)
Comment thread
MarcoArtiano marked this conversation as resolved.
Outdated
Expand Down Expand Up @@ -205,6 +206,19 @@ Given ε = 1.0e-4, we use the following algorithm.
end
end

@inline function ln_mean(x::AbstractSIMD, y::AbstractSIMD)
Comment thread
MarcoArtiano marked this conversation as resolved.
RealT = eltype(x)
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
return ifelse(f2 < epsilon_f2,
(x + y) / @evalpoly(f2,
convert(RealT, 2),
convert(RealT, 2 / 3),
convert(RealT, 2 / 5),
convert(RealT, 2 / 7)),
(y - x) / log(y / x))
end

"""
Trixi.inv_ln_mean(x::Real, y::Real)

Expand All @@ -231,6 +245,19 @@ multiplication.
end
end

@inline function inv_ln_mean(x::AbstractSIMD, y::AbstractSIMD)
Comment thread
MarcoArtiano marked this conversation as resolved.
RealT = eltype(x)
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
return ifelse(f2 < epsilon_f2,
@evalpoly(f2,
convert(RealT, 2),
convert(RealT, 2 / 3),
convert(RealT, 2 / 5),
convert(RealT, 2 / 7)) / (x + y),
log(y / x) / (y - x))
end

# `Base.max` and `Base.min` perform additional checks for signed zeros and `NaN`s
# which are not present in comparable functions in Fortran/C++. For example,
# ```julia
Expand Down
55 changes: 55 additions & 0 deletions src/equations/numerical_fluxes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -578,4 +578,59 @@ end
end

Base.show(io::IO, f::FluxUpwind) = print(io, "FluxUpwind(", f.splitting, ")")

"""
FluxVolumeTurbo(volume_flux)

Specialize the volume flux to use the SIMD instructions via LoopVectorization.jl
"""
Comment thread
ranocha marked this conversation as resolved.
struct FluxVolumeTurbo{VolumeFlux}
volume_flux::VolumeFlux
function FluxVolumeTurbo{VolumeFlux}(volume_flux) where {VolumeFlux}
return new{VolumeFlux}(volume_flux)
end
end
Comment thread
ranocha marked this conversation as resolved.
Outdated

# Helper function for conservative systems.
function FluxVolumeTurbo(volume_flux)
turbo_flux = combined_turbo_flux(volume_flux)
return FluxVolumeTurbo{typeof(turbo_flux)}(turbo_flux)
end

# By default the turbo flux has no specialization and re-uses
# the numerical flux in terms of conservative variables.
@inline combined_turbo_flux(volume_flux) = volume_flux
Comment thread
ranocha marked this conversation as resolved.
Outdated

# By default the turbo flux has the same number of precomputed variables
Comment thread
MarcoArtiano marked this conversation as resolved.
# as the number of variables.
@inline n_turbo_flux_aux_node_vars(volume_flux, equations) = Val(nvariables(equations))

# Transform the conserved variables in precomputed auxiliary variables to speed up the computation
# of the numerical flux. When no specialization is given, this gives cons2cons.
@inline cons2fluxauxiliary(volume_flux, conserved_and_equations...) = Base.front(conserved_and_equations)

# Numerical volume flux that recalls the plain volume flux when no specialization is given.
@inline function volume_flux_turbo(volume_flux, aux_and_normals_and_equations...)
equations = last(aux_and_normals_and_equations)
volume_flux_turbo(volume_flux, have_nonconservative_terms(equations),
aux_and_normals_and_equations...)
end

@inline function volume_flux_turbo(volume_flux, have_nonconservative_terms::False,
aux_and_normals_and_equations...)
equations = last(aux_and_normals_and_equations)
n = nvariables(equations)
u_ll = SVector(ntuple(v -> aux_and_normals_and_equations[v], Val(n)))
u_rr = SVector(ntuple(v -> aux_and_normals_and_equations[n + v], Val(n)))
normal_direction = SVector(aux_and_normals_and_equations[end - 3],
aux_and_normals_and_equations[end - 2],
aux_and_normals_and_equations[end - 1])
return volume_flux(u_ll, u_rr, normal_direction, equations)
end
Comment thread
ranocha marked this conversation as resolved.
Outdated

# Allow LoopVectorization to use SIMD instructions on volume_flux_turbo and cons2fluxauxiliary
LoopVectorization.can_turbo(::typeof(volume_flux_turbo), ::Val) = true
LoopVectorization.can_turbo(::typeof(cons2fluxauxiliary), ::Val) = true

Base.show(io::IO, f::FluxUpwind) = print(io, "FluxVolumeTurbo(", f.volume_flux, ")")
Comment thread
MarcoArtiano marked this conversation as resolved.
Outdated
end # @muladd
1 change: 1 addition & 0 deletions src/solvers/dgsem_structured/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,5 @@ include("dg_2d_subcell_limiters.jl")
# Specialized implementations used to improve performance
include("dg_2d_compressible_euler.jl")
include("dg_3d_compressible_euler.jl")
include("dg_3d_turbo.jl")
end # @muladd
Loading
Loading