Skip to content

Add preference to disable LoopVectorization #2295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions src/Trixi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ See also: [trixi-framework/Trixi.jl](https://github.com/trixi-framework/Trixi.jl
"""
module Trixi

using Preferences: @load_preference, set_preferences!
const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN")
const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN")
const _PREFERENCE_POLYESTER = @load_preference("polyester", true)
const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true)

# Include other packages that are used in Trixi.jl
# (standard library packages first, other packages next, all of them sorted alphabetically)

Expand Down Expand Up @@ -53,7 +59,13 @@ using FillArrays: Ones, Zeros
using ForwardDiff: ForwardDiff
using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace
using LinearMaps: LinearMap
using LoopVectorization: LoopVectorization, @turbo, indices
if _PREFERENCE_LOOPVECTORIZATION
using LoopVectorization: LoopVectorization, @turbo, indices
else
using LoopVectorization: LoopVectorization, indices
include("auxiliary/mock_turbo.jl")
end

using StaticArrayInterface: static_length # used by LoopVectorization
using MuladdMacro: @muladd
using Octavian: Octavian, matmul!
Expand Down Expand Up @@ -81,11 +93,6 @@ using SimpleUnPack: @pack!
using DataStructures: BinaryHeap, FasterForward, extract_all!

using UUIDs: UUID
using Preferences: @load_preference, set_preferences!

const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN")
const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN")
const _PREFERENCE_POLYESTER = @load_preference("polyester", true)

# finite difference SBP operators
using SummationByPartsOperators: AbstractDerivativeOperator,
Expand Down
15 changes: 15 additions & 0 deletions src/auxiliary/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@ function set_polyester!(toggle::Bool; force = true)
@info "Please restart Julia and reload Trixi.jl for the `polyester` change to take effect"
end

"""
Trixi.set_loop_vectorization!(toggle::Bool; force = true)

Toggle the usage of [LoopVectorization.jl](https://github.com/JuliaSIMD/LoopVectorization.jl).
By default, LoopVectorization.jl is enabled, but it can
be useful for performance comparisons to switch to the Julia core backend.

This does not fully disable LoopVectorization.jl,
but only its internal use as part of Trixi.jl.
"""
function set_loop_vectorization!(toggle::Bool; force = true)
set_preferences!(TRIXI_UUID, "loop_vectorization" => toggle, force = force)
@info "Please restart Julia and reload Trixi.jl for the `loop_vectorization` change to take effect"
end

"""
Trixi.set_sqrt_type!(type; force = true)

Expand Down
37 changes: 37 additions & 0 deletions src/auxiliary/mock_turbo.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copies some of the LoopVectorization functionality,
# but solely using Julia base functionality. It is equivalent to `@simd`
# at every loop level
macro turbo(exprs...)
# Find the outermost for loop
body = nothing
for expr in exprs
if Meta.isexpr(expr, :for)
body = expr
end
end
@assert body !== nothing

# We want to visit each nested for loop and insert a `Loopinfo` expression at every level.
function insert_loopinfo!(expr)
recurse = Meta.isexpr(expr, :for) || Meta.isexpr(expr, :block) ||
Meta.isexpr(expr, :let)
if recurse
foreach(insert_loopinfo!, expr.args)
end
if Meta.isexpr(expr, :for)
# We could insert additional LLVM loopinfo or `julia.ivdep`.
# For now we just encourage vectorization.
# `Expr(:loopinfo)` corresponds to https://llvm.org/docs/LangRef.html#llvm-loop with two additional nodes
# `julia.simdloop` & `julia.ivdep`
# x-ref: https://github.com/JuliaLang/julia/pull/31376
push!(expr.args, Expr(:loopinfo, Symbol("julia.simdloop")))
end
end
insert_loopinfo!(body)

body = Expr(:block,
Expr(:inbounds, true),
body,
Expr(:inbounds, :pop))
return esc(body)
end
3 changes: 3 additions & 0 deletions src/callbacks_step/summary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator;
if !_PREFERENCE_POLYESTER
push!(setup, "Polyester" => "disabled")
end
if !_PREFERENCE_LOOPVECTORIZATION
push!(setup, "LoopVectorization" => "disabled")
end
if mpi_isparallel()
push!(setup,
"#MPI ranks" => mpi_nranks())
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ end
nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
end
# See comments on the DGSEM version above
if LoopVectorization.check_args(u_ode)
if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode)
# Here, we do not specialize on the number of nodes using `StaticInt` since
# - it will not be type stable (SBP operators just store it as a runtime value)
# - FD methods tend to use high node counts
Expand Down
Loading