Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DFTK"
uuid = "acf6eb54-70d9-11e9-0013-234b7a5f5337"
authors = ["Michael F. Herbst <info@michael-herbst.com>", "Antoine Levitt <antoine.levitt@inria.fr>"]
version = "0.7.19"
authors = ["Michael F. Herbst <info@michael-herbst.com>", "Antoine Levitt <antoine.levitt@inria.fr>"]

[deps]
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
Expand All @@ -13,12 +13,14 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DftFunctionals = "6bd331d2-b28d-4fd3-880e-1a1c7f37947f"
DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Libxc = "66e17ffc-8502-11e9-23b5-c9248d0eb96d"
LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand Down Expand Up @@ -90,6 +92,7 @@ DftFunctionals = "0.3"
DifferentiationInterface = "0.6.39, 0.7"
DocStringExtensions = "0.9"
DoubleFloats = "1"
ExprTools = "0.1.10"
FFTW = "1.5"
FiniteDiff = "2"
FiniteDifferences = "0.12"
Expand All @@ -103,6 +106,7 @@ IterTools = "1"
JLD2 = "0.4, 0.5, 0.6"
JSON3 = "1"
KrylovKit = "0.8.3, 0.9, 0.10"
Libdl = "1"
Libxc = "0.3.17"
LineSearches = "7"
LinearAlgebra = "1"
Expand Down
27 changes: 27 additions & 0 deletions ext/DFTKAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module DFTKAMDGPUExt
using AMDGPU
using PrecompileTools
import Libdl
using LinearAlgebra
import DFTK: CPU, GPU, precompilation_workflow
using DFTK
Expand All @@ -11,6 +12,32 @@ function DFTK.memory_usage(::GPU{<:AMDGPU.ROCArray})
merge(DFTK.memory_usage(CPU()), (; gpu=AMDGPU.memory_stats().live))
end

global const libroctx = Ref{String}("")

function __init__()
# Register rocTX instrumentation callbacks if available
libroctx[] = Libdl.find_library("libroctx64")
if libroctx[] != ""
function push_range(message::Cstring)
ccall((:roctxRangePushA, libroctx[]), Cvoid, (Cstring,), message)
end

function pop_range(sync_device::Bool)
ccall((:roctxRangePop, libroctx[]), Cvoid, ())
if sync_device
AMDGPU.synchronize()
end
end

DFTK.register_instrumentation_callback(
"ROC-TX",
push_range,
pop_range)
else
@warn "libroctx64 is unavailable, ROCm instrumentation will be disabled."
end
end

# Temporary workaround to not trigger https://github.com/JuliaGPU/AMDGPU.jl/issues/734
function LinearAlgebra.cholesky(A::Hermitian{T, <:AMDGPU.ROCArray}) where {T}
Acopy, info = AMDGPU.rocSOLVER.potrf!(A.uplo, copy(A.data))
Expand Down
1 change: 1 addition & 0 deletions src/DFTK.jl
Original file line number Diff line number Diff line change
Expand Up @@ -278,4 +278,5 @@ end
precompilation_workflow(lattice, atoms, positions, magnetic_moments)
end
end

end # module DFTK
102 changes: 100 additions & 2 deletions src/common/timer.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ExprTools: splitdef, combinedef
using Preferences

# Control whether timings are enabled or not, by default yes
Expand All @@ -10,20 +11,54 @@ const timer = TimerOutput()
"""
Shortened version of the `@timeit` macro from `TimerOutputs`,
which writes to the DFTK timer.

Also wraps the code in [`push_range`](@ref)/[`pop_range`](@ref) calls for
instrumentation when running on the GPU.
"""
macro timing(args...)
length(args) >= 1 || error("@timing requires at least one argument: an expression to time")
length(args) <= 2 || error("@timing takes at most two arguments: a label and an expression")
@static if @load_preference("timer_enabled", "true") == "true"
# Copy of https://github.com/KristofferC/TimerOutputs.jl/blob/master/src/TimerOutput.jl#L174
# because macros calling macros does not work easily in Julia
blocks = TimerOutputs.timer_expr(__source__, __module__, false,
:($(DFTK.timer)), args...)
if blocks isa Expr
blocks
# This should be a function definition wrapped in esc.
@assert blocks.head == :escape
@assert length(blocks.args) == 1

# Split function definition
def = splitdef(blocks.args[1])
label = length(args) == 2 ? args[1] : string(def[:name])

@gensym val
def[:body] = quote
$(push_range)($(label))
$(Expr(
:tryfinally,
:($val = $(def[:body])),
:($(pop_range)()),
))
$val
end

esc(combinedef(def))
else
# This should be a standard expression, for which a label must have been provided.
@assert length(args) == 2
label = args[1]

Expr(:block,
blocks[1], # the timing setup
Expr(:tryfinally,
:($(esc(args[end]))), # the user expr
Expr(:block,
:(push_range($(esc(label)))),
Expr(:tryfinally,
:($(esc(args[end]))), # the user expr
:(pop_range()),
),
),
:($(blocks[2])) # the timing finally
)
)
Expand All @@ -33,7 +68,70 @@ macro timing(args...)
end
end

"""
Wraps the code in [`push_range`](@ref)/[`pop_range`](@ref) calls for
instrumentation when running on the GPU.
"""
macro instrument(label, expr)
@static if @load_preference("timer_enabled", "true") == "true"
Expr(:block,
:(push_range($(esc(label)))),
Expr(:tryfinally,
:($(esc(expr))),
:(pop_range()),
),
)
else
:($(esc(expr)))
end
end

function set_timer_enabled!(state=true)
@set_preferences!("timer_enabled" => string(state))
@info "timer_enabled preference changed. This is a permanent change, restart julia to see the effect."
end

# TODO: should probably use FunctionWrappers since closure cfunction won't work on arm...
"""
Registered pair of instrumentation callbacks.
We use function pointers to avoid the overhead of dynamic dispatch.
"""
struct InstrumentationCallback
name
# (message::Cstring,) -> Cvoid
push_range::Base.CFunction
# (sync_device::Bool,) -> Cvoid
pop_range::Base.CFunction
end

const instrumentation_callbacks = InstrumentationCallback[]

function register_instrumentation_callback(name, push_cb, pop_cb)
push!(instrumentation_callbacks, InstrumentationCallback(
name,
@cfunction($push_cb, Cvoid, (Cstring,)),
@cfunction($pop_cb, Cvoid, (Bool,)),
))
nothing
end

"""
Push a new range to the instrumentation callbacks.
This should be followed by a corresponding [`pop_range`](@ref) call,
preferably using a `try...finally` block.
"""
function push_range(message::String)
for cb in instrumentation_callbacks
ccall(Base.unsafe_convert(Ptr{Cvoid}, cb.push_range), Cvoid, (Cstring,), message)
end
end

"""
Pop the current range from the instrumentation callbacks.
"""
function pop_range()
# TODO: config option to sync device here
for cb in instrumentation_callbacks
ccall(Base.unsafe_convert(Ptr{Cvoid}, cb.pop_range), Cvoid, (Bool,), false)
end
end
4 changes: 2 additions & 2 deletions src/terms/Hamiltonian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,14 @@ end
to = TimerOutput() # Thread-local timer output
ψ_real = storage.ψ_reals

@timeit to "local" begin
@timeit to "local" @instrument "local" begin
ifft!(ψ_real, H.basis, H.kpoint, ψ[:, iband]; normalize=false)
ψ_real .*= potential
fft!(Hψ[:, iband], H.basis, H.kpoint, ψ_real; normalize=false) # overwrites ψ_real
end

if have_divAgrad
@timeit to "divAgrad" begin
@timeit to "divAgrad" @instrument "divAgrad" begin
apply!((; fourier=Hψ[:, iband], real=nothing),
H.divAgrad_op,
(; fourier=ψ[:, iband], real=nothing);
Expand Down
Loading