Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 132 additions & 17 deletions src/terms/nonlocal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,126 @@ function build_projection_coefficients(T::Type, psp::NormConservingPsp)
proj_coeffs
end

"""
Projector set of a single atom (independent of the atom's position),
and the structure factor for the atom position. Used inside NonlocalProjectors
such that the projector set can be reused for multiple atoms in the same atom group.
"""
struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix}
# nbasis
structure_factors::VT
# nbasis x nproj
projectors::PT
end

"""
Matrix-like type to represent the nonlocal projection vectors P without
allocating the full matrix.
This type extends AbstractMatrix, but it does not implement all
the required methods, only those that were shown to be needed.
In particular, random access to the matrix elements is not supported.
"""
struct NonlocalProjectors{T <: Number,
PT <: AtomProjectors,
} <: AbstractMatrix{T}
atoms::Vector{PT}

function NonlocalProjectors(atoms::Vector{PT}) where {PT}
# also serves as a check for length(atoms) > 0
at1 = first(atoms)
new{promote_type(eltype(at1.structure_factors),
eltype(at1.projectors)),
PT}(atoms)
end
end

function Base.size(P::NonlocalProjectors)
n = length(first(P.atoms).structure_factors)
m = sum(size(at.projectors, 2) for at in P.atoms)
(n, m)
end
function Base.Matrix(P::NonlocalProjectors{T}) where {T}
n, m = size(P)
out = Matrix{T}(undef, n, m)
iproj = 1
for at in P.atoms
for proj in eachcol(at.projectors)
out[:, iproj] .= at.structure_factors .* proj
iproj += 1
end
end
out
end

function Base.show(io::IO, P::NonlocalProjectors)
print(io, "DFTK.NonlocalProjectors{")
show(io, P.atoms)
print(io, "}")
end
function Base.show(io::IO, ::MIME"text/plain", P::NonlocalProjectors)
print(io, summary(P))
end

# Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia.
LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors},
ψk::AbstractVector) = _mul!(C, A, ψk)
LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors},
ψk::AbstractMatrix) = _mul!(C, A, ψk)

LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector,
α::Number, β::Number) = _mul!(C, A, B, α, β)
LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix,
α::Number, β::Number) = _mul!(C, A, B, α, β)
Comment on lines +207 to +216

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed ? Surprises me a bit.


function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors},
ψk::AbstractVecOrMat)
if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2)
throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))"))
end

iproj = 1
for at in A.parent.atoms
for proj in eachcol(at.projectors)
Comment on lines +225 to +226

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

join these loops to avoid too deep nesting.

for iband in axes(ψk, 2)
C[iproj, iband] = conj(dot(@view(ψk[:, iband]),
Diagonal(at.structure_factors),
proj))
end
iproj += 1
end
end
C
end

function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray{BT},
α::Number, β::Number) where {T, BT}
if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2)
throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
end

C .*= β

maxproj = maximum(at -> size(at.projectors, 2), A.atoms)
# TODO: could store in a threadsafe cache to avoid repeated allocations
Pbuffer = Matrix{T}(undef, size(A, 1), maxproj)

iproj = 1
for at in A.atoms
nproj = size(at.projectors, 2)

Pwork = @view Pbuffer[:, 1:nproj]
Pwork .= at.structure_factors .* at.projectors
Bwork = if BT <: AbstractVector
@view(B[iproj:iproj+nproj-1])
else
@view(B[iproj:iproj+nproj-1, :])
end
Comment on lines +256 to +260

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks weird.

mul!(C, Pwork, Bwork, α, 1)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not put β here ?


iproj += nproj
end
C
end

@doc raw"""
Build projection vectors for a atoms array generated by term_nonlocal
Expand Down Expand Up @@ -171,36 +291,31 @@ function build_projection_vectors(basis::PlaneWaveBasis{T}, kpt::Kpoint,
psps::AbstractVector{<: NormConservingPsp},
psp_positions) where {T}
unit_cell_volume = basis.model.unit_cell_volume
n_proj = count_n_proj(psps, psp_positions)
n_G = length(G_vectors(basis, kpt))
proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj)
G_plus_k = to_cpu(Gplusk_vectors(basis, kpt))

# Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G)
# Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as
# \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p).
# The first term is the structure factor, the second the form factor.
offset = 0 # offset into proj_vectors
for (psp, positions) in zip(psps, psp_positions)
atom_projectors = reduce(vcat, map(zip(psps, psp_positions)) do (psp, positions)
# Compute position-independent form factors
G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt))
form_factors = build_projector_form_factors(psp, G_plus_k_cart)
psp_form_factors = build_projector_form_factors(psp, G_plus_k_cart)
psp_form_factors ./= sqrt(unit_cell_volume)
# Offload potential values to a device (like a GPU),
# and make sure to share this allocation for all atoms in the group
psp_form_factors = to_device(basis.architecture, psp_form_factors)

# Combine with structure factors
for r in positions
map(positions) do r
# k+G in this formula can also be G, this only changes an unimportant phase factor
structure_factors = map(p -> cis2pi(-dot(p, r)), G_plus_k)
@views for iproj = 1:count_n_proj(psp)
proj_vectors[:, offset+iproj] .=
structure_factors .* form_factors[:, iproj] ./ sqrt(unit_cell_volume)
end
offset += count_n_proj(psp)
structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k))
AtomProjectors(structure_factors, psp_form_factors)
end
end
@assert offset == n_proj
end)

# Offload potential values to a device (like a GPU)
to_device(basis.architecture, proj_vectors)
NonlocalProjectors(atom_projectors)
end

"""
Expand Down Expand Up @@ -282,7 +397,7 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk)
D = build_projection_coefficients(basis, psp_groups)
P = build_projection_vectors(basis, kpt, psp_groups, positions)
P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions)
P * (D * P_minus_q' * ψk)
P * (D * (P_minus_q' * ψk))
end

function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation,
Expand Down
2 changes: 1 addition & 1 deletion src/terms/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ end
function apply!(Hψ, op::NonlocalOperator, ψ)
mul!(Hψ.fourier, op.P, (op.D * (op.P' * ψ.fourier)), 1, 1)
end
Matrix(op::NonlocalOperator) = op.P * op.D * op.P'
Matrix(op::NonlocalOperator) = op.P * op.D * Matrix(op.P)'

"""
Magnetic field operator A⋅(-i∇).
Expand Down
34 changes: 34 additions & 0 deletions test/nonlocal.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
@testitem "Test NonlocalProjectors" tags=[:psp] setup=[mPspUpf] begin
using DFTK
using LinearAlgebra

for (element, psp) in mPspUpf.upf_pseudos
lattice = 5 * I(3)
el = ElementPsp(element, psp)
atoms = [el, el, el]
positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)]
model = model_DFT(lattice, atoms, positions; functionals=LDA())
basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2])

n_bands = 7
ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints]
occ = [2.0 * ones(n_bands) for _ in basis.kpoints]
ρ = DFTK.compute_density(basis, ψ, occ)

energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ)

for (hamblock, ψk) in zip(ham.blocks, ψ)
nonloc = hamblock.nonlocal_op
Hψk = zero(ψk)
DFTK.apply!((; fourier=Hψk), nonloc, (; fourier=ψk))

nonloc_dense = Matrix(nonloc)
Hψk_dense = nonloc_dense * ψk

Pψk = nonloc.P' * ψk
DPψk = nonloc.D * Pψk
@assert nonloc.P * DPψk ≈ Matrix(nonloc.P) * DPψk atol=1e-10
@assert Hψk ≈ Hψk_dense atol=1e-10
end
end
end
Loading