Skip to content

Commit 38d2c2e

Browse files
abussymfherbst
andauthored
Optimizing XC instantiation for GPUs (#1262)
Co-authored-by: Michael F. Herbst <info@michael-herbst.com>
1 parent cc20ea2 commit 38d2c2e

8 files changed

Lines changed: 190 additions & 139 deletions

File tree

src/density_methods.jl

Lines changed: 73 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ function atomic_density_superposition(basis::PlaneWaveBasis{T},
163163

164164
# Pre-allocation of large arrays for GPU efficiency
165165
Gs = G_vectors(basis)
166-
ρ = to_device(basis.architecture, zeros(Complex{T}, length(Gs)))
167-
ρ_tmp = similar(ρ)
168166
indices = to_device(basis.architecture, collect(1:length(Gs)))
167+
ρ = zeros_like(indices, Complex{T})
168+
ρ_tmp = similar(ρ)
169169

170170
for (igroup, group) in enumerate(basis.model.atom_groups)
171171
for iatom in group
@@ -184,8 +184,7 @@ end
184184
Returns the form factors at unique values of |G + q| (in Cartesian coordinates).
185185
Additionally, returns a mapping from any G index to the corresponding entry in the form_factors array.
186186
"""
187-
function atomic_density_form_factors(basis::PlaneWaveBasis{T},
188-
method::AtomicDensity ) where {T<:Real}
187+
function atomic_density_form_factors(basis::PlaneWaveBasis{T}, method::AtomicDensity) where {T<:Real}
189188
G_cart = to_cpu(G_vectors_cart(basis))
190189

191190
iG2ifnorm_cpu = zeros(Int, length(G_cart))
@@ -194,49 +193,94 @@ function atomic_density_form_factors(basis::PlaneWaveBasis{T},
194193
p = norm(G)
195194
iG2ifnorm_cpu[iG] = get!(norm_indices, p, length(norm_indices) + 1)
196195
end
196+
iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu)
197197

198-
form_factors_cpu = zeros(T, length(norm_indices), length(basis.model.atom_groups))
199-
for (p, ifnorm) in norm_indices
200-
for (igroup, group) in enumerate(basis.model.atom_groups)
201-
element = basis.model.atoms[first(group)]
202-
form_factors_cpu[ifnorm, igroup] = atomic_density(element, p, method)
203-
end
198+
ni_pairs = collect(pairs(norm_indices))
199+
ps = to_device(basis.architecture, first.(ni_pairs))
200+
indices = to_device(basis.architecture, last.(ni_pairs))
201+
202+
form_factors = similar(ps, length(norm_indices), length(basis.model.atom_groups))
203+
for (igroup, group) in enumerate(basis.model.atom_groups)
204+
element = basis.model.atoms[first(group)]
205+
@inbounds form_factors[indices, igroup] .= atomic_density(element, ps, method)
204206
end
205207

206-
form_factors = to_device(basis.architecture, form_factors_cpu)
207-
iG2ifnorm = to_device(basis.architecture, iG2ifnorm_cpu)
208208
(; form_factors, iG2ifnorm)
209209
end
210210

211-
function atomic_density(element::Element, Gnorm::T,
212-
::ValenceDensityGaussian)::T where {T <: Real}
213-
gaussian_valence_charge_density_fourier(element, Gnorm)
214-
end
211+
#
212+
# Check for presence of certain densities in elements
213+
#
214+
215+
"""Check presence of model valence density (as an initial guess)."""
216+
has_valence_density(::Element) = false
217+
218+
"""Check presence of model core charge density (non-linear core correction)."""
219+
has_core_density(::Element) = false
220+
221+
"""Check presence of model core kinetic energy density (non-linear core correction for τ)."""
222+
has_core_kinetic_energy_density(::Element) = false
223+
224+
has_core_density(el::ElementPsp) = has_core_density(el.psp)
225+
has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp)
226+
has_valence_density(el::ElementPsp) = has_valence_density(el.psp)
215227

216-
function atomic_density(element::Element, Gnorm::T,
217-
::ValenceDensityPseudo)::T where {T <: Real}
218-
eval_psp_valence_density_fourier(element.psp, Gnorm)
228+
#
229+
# Atomic density dispatches
230+
#
231+
232+
function atomic_density(element::Union{Element,<:ElementPsp}, Gnorm::Real, dens::AtomicDensity)
233+
first(atomic_density(element, [Gnorm], dens)) # Dispatch to vector version
219234
end
220235

221-
function atomic_density(element::Element, Gnorm::T,
222-
::ValenceDensityAuto)::T where {T <: Real}
223-
valence_charge_density_fourier(element, Gnorm)
236+
"""Gaussian valence charge density using Abinit's coefficient table, in Fourier space."""
237+
function atomic_density(element::Element, Gnorms::AbstractVector, ::ValenceDensityGaussian)
238+
charge = charge_ionic(element)
239+
decay_length = atom_decay_length(element)
240+
map(Gnorms) do Gnorm
241+
# Note: cannot pass element to GPU kernel, because not isbit
242+
charge * exp(-(Gnorm * decay_length)^2)
243+
end
244+
end
245+
function atomic_density(element::Element, Gnorms::AbstractVector, ::ValenceDensityAuto)
246+
if has_valence_density(element)
247+
atomic_density(element, Gnorms, ValenceDensityPseudo())
248+
else
249+
atomic_density(element, Gnorms, ValenceDensityGaussian())
250+
end
224251
end
225252

226-
function atomic_density(element::Element, Gnorm::T,
227-
::CoreDensity)::T where {T <: Real}
228-
has_core_density(element) ? core_charge_density_fourier(element, Gnorm) : zero(T)
253+
function atomic_density(element::Element, Gnorms::AbstractVector, ::CoreDensity)
254+
@assert !has_core_density(element)
255+
zeros_like(Gnorms)
256+
end
257+
function atomic_density(element::Element, Gnorms::AbstractVector, ::CoreKineticEnergyDensity)
258+
@assert !has_core_kinetic_energy_density(element)
259+
zeros_like(Gnorms)
229260
end
230261

231-
function atomic_density(element::Element, Gnorm::T,
232-
::CoreKineticEnergyDensity)::T where {T <: Real}
262+
function atomic_density(element::ElementPsp, Gnorms::AbstractVector, ::ValenceDensityPseudo)
263+
eval_psp_valence_density_fourier(element.psp, Gnorms)
264+
end
265+
function atomic_density(element::ElementPsp, Gnorms::AbstractVector, ::CoreDensity)
266+
if has_core_density(element)
267+
eval_psp_core_density_fourier(element.psp, Gnorms)
268+
else
269+
zeros_like(Gnorms)
270+
end
271+
end
272+
function atomic_density(element::ElementPsp, Gnorms::AbstractVector, ::CoreKineticEnergyDensity)
233273
if has_core_kinetic_energy_density(element)
234-
eval_psp_core_kinetic_energy_density_fourier(element.psp, Gnorm)
274+
eval_psp_core_kinetic_energy_density_fourier(element.psp, Gnorms)
235275
else
236-
zero(T)
276+
zeros_like(Gnorms)
237277
end
238278
end
239279

280+
#
281+
# Some data we need for the Gaussian densities
282+
#
283+
240284
# Get the lengthscale of the valence density for an atom with `n_elec_core` core
241285
# and `n_elec_valence` valence electrons.
242286
function atom_decay_length(n_elec_core, n_elec_valence)

src/elements.jl

Lines changed: 21 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ using AtomsBase
77
# Very likely `species`, and `charge_ionic` need to be defined as well.
88
abstract type Element end
99

10+
"""Return the local potential for a passed coordinate or vector of coordinates in frequency space"""
11+
function local_potential_fourier end
12+
13+
"""Return the local potential for a passed coordinate or vector of coordinates in real space"""
14+
function local_potential_real end
15+
16+
1017
"""Return the chemical species corresponding to an element"""
1118
AtomsBase.species(::Element) = ChemicalSpecies(0) # dummy atom
1219

@@ -31,40 +38,23 @@ n_elec_core(el::Element) = charge_nuclear(el) - charge_ionic(el)
3138
"""Return the pseudopotential family for the element if this is known, else `nothing`"""
3239
pseudofamily(::Element) = nothing
3340

34-
"""Check presence of model core charge density (non-linear core correction)."""
35-
has_core_density(::Element) = false
36-
37-
"""Check presence of model core kinetic energy density (non-linear core correction for τ)."""
38-
has_core_kinetic_energy_density(::Element) = false
39-
4041
# The preceding functions are fallback implementations that should be altered as needed.
4142

4243
eval_psp_energy_correction(T, ::Element) = zero(T)
43-
eval_psp_energy_correction(psp::Element) = eval_psp_energy_correction(Float64, psp)
44+
eval_psp_energy_correction(el::Element) = eval_psp_energy_correction(Float64, el)
4445

4546
# Fall back to the Gaussian table for Elements without pseudopotentials
46-
function valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real}
47-
gaussian_valence_charge_density_fourier(el, p)
48-
end
4947

50-
"""Gaussian valence charge density using Abinit's coefficient table, in Fourier space."""
51-
function gaussian_valence_charge_density_fourier(el::Element, p::T)::T where {T <: Real}
52-
charge_ionic(el) * exp(-(p * atom_decay_length(el))^2)
48+
# Vector to single-argument fallbacks in the Elements
49+
for fn in [:local_potential_fourier, :local_potential_real]
50+
@eval begin
51+
function DFTK.$fn(el::Element, arg::AbstractVector{<:Real})
52+
arch = architecture(arg)
53+
to_device(arch, map(p -> $fn(el, p), to_cpu(arg)))
54+
end
55+
end
5356
end
5457

55-
function core_charge_density_fourier(::Element, ::T)::T where {T <: Real}
56-
error("Abstract elements do not necesesarily provide core charge density.")
57-
end
58-
59-
function core_kinetic_energy_density_fourier(::Element, ::T)::T where {T <: Real}
60-
error("Abstract elements do not necesesarily provide core kinetic energy density.")
61-
end
62-
63-
# Generic vectorized version of local_potential_fourier, GPU-safe
64-
function local_potential_fourier(el::Element, ps::AbstractVector{T}) where {T <: Real}
65-
arch = architecture(ps)
66-
to_device(arch, map(p -> local_potential_fourier(el, p), to_cpu(ps)))
67-
end
6858

6959
Base.show(io::IO, el::Element) = print(io, "$(typeof(el))(:$(species(el)))")
7060

@@ -171,30 +161,13 @@ end
171161
AtomsBase.mass(el::ElementPsp) = el.mass
172162
AtomsBase.species(el::ElementPsp) = el.species
173163
charge_ionic(el::ElementPsp) = charge_ionic(el.psp)
174-
has_core_density(el::ElementPsp) = has_core_density(el.psp)
175-
has_core_kinetic_energy_density(el::ElementPsp) = has_core_kinetic_energy_density(el.psp)
176164
eval_psp_energy_correction(T, el::ElementPsp) = eval_psp_energy_correction(T, el.psp)
177165

178-
function local_potential_fourier(el::ElementPsp, p::T) where {T <: Real}
179-
eval_psp_local_fourier(el.psp, p)
180-
end
181-
# Vectorized version of the above
182-
function local_potential_fourier(el::ElementPsp, ps::AbstractVector{T}) where {T <: Real}
183-
eval_psp_local_fourier(el.psp, ps)
184-
end
166+
# Function forwarding for ElementPsp (for each case both versions are needed to resolve ambiguities)
167+
local_potential_fourier(el::ElementPsp, p::Real) = eval_psp_local_fourier(el.psp, p)
168+
local_potential_fourier(el::ElementPsp, p::AbstractVector{<:Real}) = eval_psp_local_fourier(el.psp, p)
185169
local_potential_real(el::ElementPsp, r::Real) = eval_psp_local_real(el.psp, r)
186-
187-
function valence_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real}
188-
if has_valence_density(el.psp)
189-
eval_psp_valence_density_fourier(el.psp, p)
190-
else
191-
gaussian_valence_charge_density_fourier(el, p)
192-
end
193-
end
194-
function core_charge_density_fourier(el::ElementPsp, p::T) where {T <: Real}
195-
eval_psp_core_density_fourier(el.psp, p)
196-
end
197-
170+
local_potential_real(el::ElementPsp, r::AbstractVector{<:Real}) = eval_psp_local_real(el.psp, r)
198171

199172
#
200173
# ElementCohenBergstresser
@@ -264,7 +237,6 @@ function local_potential_fourier(el::ElementCohenBergstresser, p::T) where {T <:
264237
end
265238
# TODO Strictly speaking needs a eval_psp_energy_correction
266239

267-
268240
#
269241
# ElementGaussian
270242
#
@@ -289,7 +261,7 @@ function ElementGaussian(α, L; symbol=:X, mass=nothing)
289261
T = promote_type(typeof(α), typeof(L))
290262
ElementGaussian{T}(α, L, symbol, mass)
291263
end
292-
function local_potential_real(el::ElementGaussian, r)
264+
function local_potential_real(el::ElementGaussian, r::Real)
293265
-el.α / ((2π) * el.L) * exp(- (r / el.L)^2 / 2)
294266
end
295267
function local_potential_fourier(el::ElementGaussian, p::Real)

0 commit comments

Comments
 (0)