diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 55f57cd0ba..4c90c7c7bf 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -144,6 +144,126 @@ function build_projection_coefficients(T::Type, psp::NormConservingPsp) proj_coeffs end +""" +Projector set of a single atom (independent of the atom's position), +and the structure factor for the atom position. Used inside NonlocalProjectors +such that the projector set can be reused for multiple atoms in the same atom group. +""" +struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix} + # nbasis + structure_factors::VT + # nbasis x nproj + projectors::PT +end + +""" +Matrix-like type to represent the nonlocal projection vectors P without +allocating the full matrix. +This type extends AbstractMatrix, but it does not implement all +the required methods, only those that were shown to be needed. +In particular, random access to the matrix elements is not supported. +""" +struct NonlocalProjectors{T <: Number, + PT <: AtomProjectors, + } <: AbstractMatrix{T} + atoms::Vector{PT} + + function NonlocalProjectors(atoms::Vector{PT}) where {PT} + # also serves as a check for length(atoms) > 0 + at1 = first(atoms) + new{promote_type(eltype(at1.structure_factors), + eltype(at1.projectors)), + PT}(atoms) + end +end + +function Base.size(P::NonlocalProjectors) + n = length(first(P.atoms).structure_factors) + m = sum(size(at.projectors, 2) for at in P.atoms) + (n, m) +end +function Base.Matrix(P::NonlocalProjectors{T}) where {T} + n, m = size(P) + out = Matrix{T}(undef, n, m) + iproj = 1 + for at in P.atoms + for proj in eachcol(at.projectors) + out[:, iproj] .= at.structure_factors .* proj + iproj += 1 + end + end + out +end + +function Base.show(io::IO, P::NonlocalProjectors) + print(io, "DFTK.NonlocalProjectors{") + show(io, P.atoms) + print(io, "}") +end +function Base.show(io::IO, ::MIME"text/plain", P::NonlocalProjectors) + print(io, summary(P)) +end + +# Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. +LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractVector) = _mul!(C, A, ψk) +LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractMatrix) = _mul!(C, A, ψk) + +LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector, + α::Number, β::Number) = _mul!(C, A, B, α, β) +LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, + α::Number, β::Number) = _mul!(C, A, B, α, β) + +function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractVecOrMat) + if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2) + throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))")) + end + + iproj = 1 + for at in A.parent.atoms + for proj in eachcol(at.projectors) + for iband in axes(ψk, 2) + C[iproj, iband] = conj(dot(@view(ψk[:, iband]), + Diagonal(at.structure_factors), + proj)) + end + iproj += 1 + end + end + C +end + +function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray{BT}, + α::Number, β::Number) where {T, BT} + if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) + throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) + end + + C .*= β + + maxproj = maximum(at -> size(at.projectors, 2), A.atoms) + # TODO: could store in a threadsafe cache to avoid repeated allocations + Pbuffer = Matrix{T}(undef, size(A, 1), maxproj) + + iproj = 1 + for at in A.atoms + nproj = size(at.projectors, 2) + + Pwork = @view Pbuffer[:, 1:nproj] + Pwork .= at.structure_factors .* at.projectors + Bwork = if BT <: AbstractVector + @view(B[iproj:iproj+nproj-1]) + else + @view(B[iproj:iproj+nproj-1, :]) + end + mul!(C, Pwork, Bwork, α, 1) + + iproj += nproj + end + C +end @doc raw""" Build projection vectors for a atoms array generated by term_nonlocal @@ -171,36 +291,31 @@ function build_projection_vectors(basis::PlaneWaveBasis{T}, kpt::Kpoint, psps::AbstractVector{<: NormConservingPsp}, psp_positions) where {T} unit_cell_volume = basis.model.unit_cell_volume - n_proj = count_n_proj(psps, psp_positions) n_G = length(G_vectors(basis, kpt)) - proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj) G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) # Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) # Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as # \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p). # The first term is the structure factor, the second the form factor. - offset = 0 # offset into proj_vectors - for (psp, positions) in zip(psps, psp_positions) + atom_projectors = reduce(vcat, map(zip(psps, psp_positions)) do (psp, positions) # Compute position-independent form factors G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt)) - form_factors = build_projector_form_factors(psp, G_plus_k_cart) + psp_form_factors = build_projector_form_factors(psp, G_plus_k_cart) + psp_form_factors ./= sqrt(unit_cell_volume) + # Offload potential values to a device (like a GPU), + # and make sure to share this allocation for all atoms in the group + psp_form_factors = to_device(basis.architecture, psp_form_factors) # Combine with structure factors - for r in positions + map(positions) do r # k+G in this formula can also be G, this only changes an unimportant phase factor - structure_factors = map(p -> cis2pi(-dot(p, r)), G_plus_k) - @views for iproj = 1:count_n_proj(psp) - proj_vectors[:, offset+iproj] .= - structure_factors .* form_factors[:, iproj] ./ sqrt(unit_cell_volume) - end - offset += count_n_proj(psp) + structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k)) + AtomProjectors(structure_factors, psp_form_factors) end - end - @assert offset == n_proj + end) - # Offload potential values to a device (like a GPU) - to_device(basis.architecture, proj_vectors) + NonlocalProjectors(atom_projectors) end """ @@ -282,7 +397,7 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk) D = build_projection_coefficients(basis, psp_groups) P = build_projection_vectors(basis, kpt, psp_groups, positions) P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions) - P * (D * P_minus_q' * ψk) + P * (D * (P_minus_q' * ψk)) end function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation, diff --git a/src/terms/operators.jl b/src/terms/operators.jl index 40c8a523e6..bad43b413f 100644 --- a/src/terms/operators.jl +++ b/src/terms/operators.jl @@ -110,7 +110,7 @@ end function apply!(Hψ, op::NonlocalOperator, ψ) mul!(Hψ.fourier, op.P, (op.D * (op.P' * ψ.fourier)), 1, 1) end -Matrix(op::NonlocalOperator) = op.P * op.D * op.P' +Matrix(op::NonlocalOperator) = op.P * op.D * Matrix(op.P)' """ Magnetic field operator A⋅(-i∇). diff --git a/test/nonlocal.jl b/test/nonlocal.jl new file mode 100644 index 0000000000..bc55722eb5 --- /dev/null +++ b/test/nonlocal.jl @@ -0,0 +1,34 @@ +@testitem "Test NonlocalProjectors" tags=[:psp] setup=[mPspUpf] begin + using DFTK + using LinearAlgebra + + for (element, psp) in mPspUpf.upf_pseudos + lattice = 5 * I(3) + el = ElementPsp(element, psp) + atoms = [el, el, el] + positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)] + model = model_DFT(lattice, atoms, positions; functionals=LDA()) + basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2]) + + n_bands = 7 + ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] + occ = [2.0 * ones(n_bands) for _ in basis.kpoints] + ρ = DFTK.compute_density(basis, ψ, occ) + + energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ) + + for (hamblock, ψk) in zip(ham.blocks, ψ) + nonloc = hamblock.nonlocal_op + Hψk = zero(ψk) + DFTK.apply!((; fourier=Hψk), nonloc, (; fourier=ψk)) + + nonloc_dense = Matrix(nonloc) + Hψk_dense = nonloc_dense * ψk + + Pψk = nonloc.P' * ψk + DPψk = nonloc.D * Pψk + @assert nonloc.P * DPψk ≈ Matrix(nonloc.P) * DPψk atol=1e-10 + @assert Hψk ≈ Hψk_dense atol=1e-10 + end + end +end