From ebbcc6a773742bd1246f3d57fd76e4e36fc0fd3b Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 13 Mar 2025 14:42:00 +0100 Subject: [PATCH 01/16] WIP --- src/terms/nonlocal.jl | 63 ++++++++++++++++++++++++++++++++++++++++-- src/terms/operators.jl | 3 +- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 7add0bf9e7..0be2901cc9 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -17,9 +17,9 @@ function (::AtomicNonlocal)(basis::PlaneWaveBasis{T}) where {T} isempty(psp_groups) && return TermNoop() ops = map(basis.kpoints) do kpt - P = build_projection_vectors(basis, kpt, psps, psp_positions) + (P1, P2) = build_projection_vectors(basis, kpt, psps, psp_positions) D = build_projection_coefficients(T, psps, psp_positions) - NonlocalOperator(basis, kpt, P, to_device(basis.architecture, D)) + NonlocalOperator(basis, kpt, P1, P2, to_device(basis.architecture, D)) end TermAtomicNonlocal(ops) end @@ -144,6 +144,65 @@ function build_projection_coefficients(T::Type, psp::NormConservingPsp) proj_coeffs end +@doc raw""" +Build projection vectors for a atoms array generated by term_nonlocal + +```math +\begin{aligned} +H_{\rm at} &= \sum_{ij} C_{ij} \ket{{\rm proj}_i} \bra{{\rm proj}_j} \\ +H_{\rm per} &= \sum_R \sum_{ij} C_{ij} \ket{{\rm proj}_i(x-R)} \bra{{\rm proj}_j(x-R)} +\end{aligned} +``` + +```math +\begin{aligned} +\braket{e_k(G') \middle| H_{\rm per}}{e_k(G)} + &= \ldots \\ + &= \frac{1}{Ω} \sum_{ij} C_{ij} \widehat{\rm proj}_i(k+G') \widehat{\rm proj}_j^*(k+G), +\end{aligned} +``` + +where ``\widehat{\rm proj}_i(p) = ∫_{ℝ^3} {\rm proj}_i(r) e^{-ip·r} dr``. + +We store ``\frac{1}{\sqrt Ω} \widehat{\rm proj}_i(k+G)`` in `proj_vectors`. +""" +function build_projection_form_factors(basis::PlaneWaveBasis{T}, kpt::Kpoint, + psps::AbstractVector{<: NormConservingPsp}, + psp_positions) where {T} + unit_cell_volume = basis.model.unit_cell_volume + n_proj = count_n_proj(psps, psp_positions) + G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) + form_factors = Vector(undef, n_proj) + structure_factors = Vector(undef, n_proj) + + # Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) + # Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as + # \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p). + # The first term is the structure factor, the second the form factor. + offset = 0 # offset into form_factors and structure_factors + for (psp, positions) in zip(psps, psp_positions) do + # Compute position-independent form factors + G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt)) + psp_form_factors = build_projector_form_factors(psp, G_plus_k_cart) + psp_form_factors ./= sqrt(unit_cell_volume) + # Make sure to reuse the allocation of the columns for all atoms in the group + form_factor_cols = [to_device(basis.architecture, Vector{eltype(col)}(col)) + for col in eachcol(psp_form_factors)] + + for r in positions + # k+G in this formula can also be G, this only changes an unimportant phase factor + # Once again make sure to reuse the allocation for all projectors + atom_structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k)) + for iproj = 1:count_n_proj(psp) + form_factors[offset] = form_factor_cols[iproj] + structure_factors[offset] = atom_structure_factors + offset += 1 + end + end + end + (form_factors, structure_factors) +end + @doc raw""" Build projection vectors for a atoms array generated by term_nonlocal diff --git a/src/terms/operators.jl b/src/terms/operators.jl index 40c8a523e6..5a6891761f 100644 --- a/src/terms/operators.jl +++ b/src/terms/operators.jl @@ -104,7 +104,8 @@ struct NonlocalOperator{T <: Real, PT, DT} <: RealFourierOperator basis::PlaneWaveBasis{T} kpoint::Kpoint{T} # not typed, can be anything that supports PDP'ψ - P::PT + P1::PT + P2::PT D::DT end function apply!(Hψ, op::NonlocalOperator, ψ) From 1c3b8a9cc398536f7d93d48a62699787510ea5b2 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Wed, 30 Apr 2025 18:05:56 +0200 Subject: [PATCH 02/16] WIP --- src/DFTK.jl | 6 ++--- src/terms/nonlocal.jl | 53 ++++++++++++++++++++++++++---------------- src/terms/operators.jl | 3 +-- test/PspUpf.jl | 22 ++++++++++++++++++ 4 files changed, 59 insertions(+), 25 deletions(-) diff --git a/src/DFTK.jl b/src/DFTK.jl index ab698b2691..01c038fe86 100644 --- a/src/DFTK.jl +++ b/src/DFTK.jl @@ -266,8 +266,8 @@ end positions = [ones(3)/8, -ones(3)/8] magnetic_moments = [2, -2] - @compile_workload begin - precompilation_workflow(lattice, atoms, positions, magnetic_moments) - end + # @compile_workload begin + # precompilation_workflow(lattice, atoms, positions, magnetic_moments) + # end end end # module DFTK diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 0be2901cc9..03d3255a0d 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -17,9 +17,9 @@ function (::AtomicNonlocal)(basis::PlaneWaveBasis{T}) where {T} isempty(psp_groups) && return TermNoop() ops = map(basis.kpoints) do kpt - (P1, P2) = build_projection_vectors(basis, kpt, psps, psp_positions) + P = build_projection_vectors_mem(basis, kpt, psps, psp_positions) D = build_projection_coefficients(T, psps, psp_positions) - NonlocalOperator(basis, kpt, P1, P2, to_device(basis.architecture, D)) + NonlocalOperator(basis, kpt, P, to_device(basis.architecture, D)) end TermAtomicNonlocal(ops) end @@ -28,6 +28,22 @@ struct TermAtomicNonlocal <: Term ops::Vector{NonlocalOperator} end +struct AtomProjectors{T <: Real, + VT <: AbstractVector{<: Complex{T}}, + PT <: AbstractMatrix{<: Complex{T}}} + # nbasis + structure_factors::VT + # nbasis x nproj + projectors::PT +end + +# TODO: implement AbstractMatrix? +struct NonlocalProjectors{T <: Real} + # nbasis + ψ_scratch::AbstractVector{<:Complex{T}} + projectors::Vector{<:AtomProjectors{T}} +end + @timing "ene_ops: nonlocal" function ene_ops(term::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation; kwargs...) where {T} @@ -166,41 +182,38 @@ where ``\widehat{\rm proj}_i(p) = ∫_{ℝ^3} {\rm proj}_i(r) e^{-ip·r} dr``. We store ``\frac{1}{\sqrt Ω} \widehat{\rm proj}_i(k+G)`` in `proj_vectors`. """ -function build_projection_form_factors(basis::PlaneWaveBasis{T}, kpt::Kpoint, +function build_projection_vectors_mem(basis::PlaneWaveBasis{T}, kpt::Kpoint, psps::AbstractVector{<: NormConservingPsp}, psp_positions) where {T} unit_cell_volume = basis.model.unit_cell_volume n_proj = count_n_proj(psps, psp_positions) + n_G = length(G_vectors(basis, kpt)) + proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj) G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) - form_factors = Vector(undef, n_proj) - structure_factors = Vector(undef, n_proj) # Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) # Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as # \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p). # The first term is the structure factor, the second the form factor. - offset = 0 # offset into form_factors and structure_factors - for (psp, positions) in zip(psps, psp_positions) do + atom_projectors = reduce(vcat, map(zip(psps, psp_positions)) do (psp, positions) # Compute position-independent form factors G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt)) psp_form_factors = build_projector_form_factors(psp, G_plus_k_cart) psp_form_factors ./= sqrt(unit_cell_volume) - # Make sure to reuse the allocation of the columns for all atoms in the group - form_factor_cols = [to_device(basis.architecture, Vector{eltype(col)}(col)) - for col in eachcol(psp_form_factors)] + # Offload potential values to a device (like a GPU), + # and make sure to share this allocation for all atoms in the group + psp_form_factors = to_device(basis.architecture, psp_form_factors) - for r in positions + # Combine with structure factors + map(positions) do r # k+G in this formula can also be G, this only changes an unimportant phase factor - # Once again make sure to reuse the allocation for all projectors - atom_structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k)) - for iproj = 1:count_n_proj(psp) - form_factors[offset] = form_factor_cols[iproj] - structure_factors[offset] = atom_structure_factors - offset += 1 - end + structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k)) + AtomProjectors(structure_factors, psp_form_factors) end - end - (form_factors, structure_factors) + end) + + # TODO: to_device + NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) end diff --git a/src/terms/operators.jl b/src/terms/operators.jl index 5a6891761f..40c8a523e6 100644 --- a/src/terms/operators.jl +++ b/src/terms/operators.jl @@ -104,8 +104,7 @@ struct NonlocalOperator{T <: Real, PT, DT} <: RealFourierOperator basis::PlaneWaveBasis{T} kpoint::Kpoint{T} # not typed, can be anything that supports PDP'ψ - P1::PT - P2::PT + P::PT D::DT end function apply!(Hψ, op::NonlocalOperator, ψ) diff --git a/test/PspUpf.jl b/test/PspUpf.jl index 9de8aeb40b..4262d6388c 100644 --- a/test/PspUpf.jl +++ b/test/PspUpf.jl @@ -250,3 +250,25 @@ end end end end + +@testitem "Test nonlocal term operations" tags=[:psp] setup=[mPspUpf] begin + using DFTK + using LinearAlgebra + + lattice = 5 * I(3) + positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)] + for (element, psp) in mPspUpf.upf_pseudos + if sum(psp.r2_ρion) > 0 # Otherwise, it's all 0 in the UPF as a placeholder + el = ElementPsp(element, psp) + atoms = [el, el, el] + model = model_DFT(lattice, atoms, positions; functionals=LDA()) + basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2]) + n_bands = 7 + ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] + occ = [2.0 * ones(n_bands) for _ in basis.kpoints] + ρ = DFTK.compute_density(basis, ψ, occ) + + DFTK.energy(basis, ψ, occ) + end + end +end From 49c904b728665bcbe32b60ccb5859af6f0c7f042 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 1 May 2025 15:38:48 +0200 Subject: [PATCH 03/16] WIP --- src/terms/nonlocal.jl | 61 ++++++++++++++++++++++++++++++++++++++++-- src/terms/operators.jl | 2 +- test/PspUpf.jl | 7 ++++- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 03d3255a0d..a2959c26d7 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -38,12 +38,64 @@ struct AtomProjectors{T <: Real, end # TODO: implement AbstractMatrix? -struct NonlocalProjectors{T <: Real} +struct NonlocalProjectors{T <: Real} <: AbstractMatrix{Complex{T}} # nbasis ψ_scratch::AbstractVector{<:Complex{T}} projectors::Vector{<:AtomProjectors{T}} end +function Base.size(P::NonlocalProjectors) + n = length(P.ψ_scratch) + m = sum(size(p.projectors, 2) for p in P.projectors) + (n, m) +end +function Base.Matrix(P::NonlocalProjectors{T}) where {T} + n, m = size(P) + out = zeros(Complex{T}, n, m) + iproj = 1 + for p in P.projectors + for proj in eachcol(p.projectors) + out[:, iproj] .= p.structure_factors .* proj + iproj += 1 + end + end + out +end + +function LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{TA, <:NonlocalProjectors}, ψk::AbstractMatrix) where {TA} + # TODO: check dimensions? + iproj = 1 + ψ_scratch = A.parent.ψ_scratch + for p in A.parent.projectors + for proj in eachcol(p.projectors) + # TODO: what allocates here? and does it use BLAS? + ψ_scratch .= p.structure_factors .* proj + C[iproj, :] .= dropdims(ψ_scratch' * ψk; dims=1) + iproj += 1 + end + end + C +end + +function LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, α::Number, β::Number) + # TODO: check dimensions? + C .*= β + + iproj = 1 + ψ_scratch = A.ψ_scratch + for p in A.projectors + for proj in eachcol(p.projectors) + # TODO: what allocates here? and does it use BLAS? + ψ_scratch .= p.structure_factors .* proj + for iband in size(B, 2) + C[:, iband] .+= ψ_scratch .* (α * B[iproj, iband]) + end + iproj += 1 + end + end + C +end + @timing "ene_ops: nonlocal" function ene_ops(term::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation; kwargs...) where {T} @@ -213,7 +265,12 @@ function build_projection_vectors_mem(basis::PlaneWaveBasis{T}, kpt::Kpoint, end) # TODO: to_device - NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) + ret = NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) + + # TODO: remove this + @assert Matrix(ret) ≈ build_projection_vectors(basis, kpt, psps, psp_positions) + + ret end diff --git a/src/terms/operators.jl b/src/terms/operators.jl index 40c8a523e6..bad43b413f 100644 --- a/src/terms/operators.jl +++ b/src/terms/operators.jl @@ -110,7 +110,7 @@ end function apply!(Hψ, op::NonlocalOperator, ψ) mul!(Hψ.fourier, op.P, (op.D * (op.P' * ψ.fourier)), 1, 1) end -Matrix(op::NonlocalOperator) = op.P * op.D * op.P' +Matrix(op::NonlocalOperator) = op.P * op.D * Matrix(op.P)' """ Magnetic field operator A⋅(-i∇). diff --git a/test/PspUpf.jl b/test/PspUpf.jl index 4262d6388c..2f09a96949 100644 --- a/test/PspUpf.jl +++ b/test/PspUpf.jl @@ -268,7 +268,12 @@ end occ = [2.0 * ones(n_bands) for _ in basis.kpoints] ρ = DFTK.compute_density(basis, ψ, occ) - DFTK.energy(basis, ψ, occ) + energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ) + hamψ = ham * ψ + + hblock = ham.blocks[1] + nonloc = hblock.nonlocal_op + nonlocal_dense = Matrix(nonloc) end end end From 1bd11c62b707d7b9a2cf3d3f7935b4ca2400d353 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Sun, 4 May 2025 22:48:47 +0200 Subject: [PATCH 04/16] Fix correctness and allocations --- src/terms/nonlocal.jl | 18 ++++++++++-------- test/PspUpf.jl | 12 ++++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index a2959c26d7..55bb00063c 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -38,10 +38,13 @@ struct AtomProjectors{T <: Real, end # TODO: implement AbstractMatrix? -struct NonlocalProjectors{T <: Real} <: AbstractMatrix{Complex{T}} +struct NonlocalProjectors{T <: Real, + ST <: AbstractVector{Complex{T}}, + PT <: AtomProjectors{T} + } <: AbstractMatrix{Complex{T}} # nbasis - ψ_scratch::AbstractVector{<:Complex{T}} - projectors::Vector{<:AtomProjectors{T}} + ψ_scratch::ST + projectors::Vector{PT} end function Base.size(P::NonlocalProjectors) @@ -68,9 +71,8 @@ function LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{TA, <:NonlocalProjecto ψ_scratch = A.parent.ψ_scratch for p in A.parent.projectors for proj in eachcol(p.projectors) - # TODO: what allocates here? and does it use BLAS? ψ_scratch .= p.structure_factors .* proj - C[iproj, :] .= dropdims(ψ_scratch' * ψk; dims=1) + @views mul!(C[iproj:iproj, :], ψ_scratch', ψk) iproj += 1 end end @@ -85,10 +87,10 @@ function LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::Abstrac ψ_scratch = A.ψ_scratch for p in A.projectors for proj in eachcol(p.projectors) - # TODO: what allocates here? and does it use BLAS? + # TODO: does this use BLAS? ψ_scratch .= p.structure_factors .* proj - for iband in size(B, 2) - C[:, iband] .+= ψ_scratch .* (α * B[iproj, iband]) + for iband in axes(B, 2) + @views C[:, iband] .+= ψ_scratch .* (α * B[iproj, iband]) end iproj += 1 end diff --git a/test/PspUpf.jl b/test/PspUpf.jl index 2f09a96949..cab255384c 100644 --- a/test/PspUpf.jl +++ b/test/PspUpf.jl @@ -274,6 +274,18 @@ end hblock = ham.blocks[1] nonloc = hblock.nonlocal_op nonlocal_dense = Matrix(nonloc) + ψk = ψ[1] + + Hψk = zero(ψk) + DFTK.apply!((;fourier=Hψk), nonloc, (; fourier=ψk)) + Hψk_dense = nonlocal_dense * ψk + + Pψk = nonloc.P' * ψk + DPψk = nonloc.D * Pψk + @show norm(nonloc.P * DPψk) norm(Matrix(nonloc.P) * DPψk) + @assert @show(norm(nonloc.P * DPψk - Matrix(nonloc.P) * DPψk)) < 1e-10 + + @assert @show(norm(Hψk - Hψk_dense)) < 1e-10 end end end From 903bf9c885172419813edd0d59e9caae208c0205 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Mon, 5 May 2025 19:25:13 +0200 Subject: [PATCH 05/16] Missing matvec mul overloads + cleanup --- src/terms/nonlocal.jl | 69 +++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 55bb00063c..0fdf4e1563 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -28,69 +28,94 @@ struct TermAtomicNonlocal <: Term ops::Vector{NonlocalOperator} end +""" +Projector set of a single atom (independent of the atom's position), +and the structure factor for the atom position. Used inside NonlocalProjectors +such that the projector set can be reused for multiple atoms in the same atom group. +""" struct AtomProjectors{T <: Real, - VT <: AbstractVector{<: Complex{T}}, - PT <: AbstractMatrix{<: Complex{T}}} + VT <: AbstractVector{Complex{T}}, + PT <: AbstractMatrix{Complex{T}}} # nbasis structure_factors::VT # nbasis x nproj projectors::PT end -# TODO: implement AbstractMatrix? +""" +Matrix-like type to represent the nonlocal projectors P without +allocating the full matrix. +This type extends AbstractMatrix, but it does not implement all +the required methods, only those that were shown to be needed. +In particular, random access to the matrix elements is not supported. +""" struct NonlocalProjectors{T <: Real, ST <: AbstractVector{Complex{T}}, PT <: AtomProjectors{T} } <: AbstractMatrix{Complex{T}} + # TODO: this is a real problem wrt. thread-safety, no? # nbasis - ψ_scratch::ST - projectors::Vector{PT} + proj_scratch::ST + atoms::Vector{PT} end function Base.size(P::NonlocalProjectors) - n = length(P.ψ_scratch) - m = sum(size(p.projectors, 2) for p in P.projectors) + n = length(P.proj_scratch) + m = sum(size(at.projectors, 2) for at in P.atoms) (n, m) end function Base.Matrix(P::NonlocalProjectors{T}) where {T} n, m = size(P) out = zeros(Complex{T}, n, m) iproj = 1 - for p in P.projectors - for proj in eachcol(p.projectors) - out[:, iproj] .= p.structure_factors .* proj + for at in P.atoms + for proj in eachcol(at.projectors) + out[:, iproj] .= at.structure_factors .* proj iproj += 1 end end out end -function LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{TA, <:NonlocalProjectors}, ψk::AbstractMatrix) where {TA} +# Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. +LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractVector) = _mul!(C, A, ψk) +LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractMatrix) = _mul!(C, A, ψk) + +LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector, + α::Number, β::Number) = _mul!(C, A, B, α, β) +LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, + α::Number, β::Number) = _mul!(C, A, B, α, β) + +function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractVecOrMat) # TODO: check dimensions? iproj = 1 - ψ_scratch = A.parent.ψ_scratch - for p in A.parent.projectors - for proj in eachcol(p.projectors) - ψ_scratch .= p.structure_factors .* proj - @views mul!(C[iproj:iproj, :], ψ_scratch', ψk) + proj_scratch = A.parent.proj_scratch + for at in A.parent.atoms + for proj in eachcol(at.projectors) + proj_scratch .= at.structure_factors .* proj + @views mul!(C[iproj:iproj, :], proj_scratch', ψk) iproj += 1 end end C end -function LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, α::Number, β::Number) +function _mul!(C::AbstractArray, A::NonlocalProjectors, B::AbstractArray, + α::Number, β::Number) # TODO: check dimensions? C .*= β iproj = 1 - ψ_scratch = A.ψ_scratch - for p in A.projectors - for proj in eachcol(p.projectors) + proj_scratch = A.proj_scratch + for at in A.atoms + for proj in eachcol(at.projectors) # TODO: does this use BLAS? - ψ_scratch .= p.structure_factors .* proj + proj_scratch .= at.structure_factors .* proj for iband in axes(B, 2) - @views C[:, iband] .+= ψ_scratch .* (α * B[iproj, iband]) + @views C[:, iband] .+= proj_scratch .* (α * B[iproj, iband]) end iproj += 1 end From c2d6d66b72b008ef65d724caf168a644ff42d764 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Mon, 5 May 2025 19:31:52 +0200 Subject: [PATCH 06/16] Add dimension checks --- src/terms/nonlocal.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 0fdf4e1563..6181f40ea8 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -90,7 +90,10 @@ LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, ψk::AbstractVecOrMat) - # TODO: check dimensions? + if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2) + throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))")) + end + iproj = 1 proj_scratch = A.parent.proj_scratch for at in A.parent.atoms @@ -105,7 +108,10 @@ end function _mul!(C::AbstractArray, A::NonlocalProjectors, B::AbstractArray, α::Number, β::Number) - # TODO: check dimensions? + if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) + throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) + end + C .*= β iproj = 1 From 3df5bb0b17c246ee14117ffac08cd12b15c4e156 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Tue, 6 May 2025 14:09:52 +0200 Subject: [PATCH 07/16] Show norms in assertion failure --- src/terms/nonlocal.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 6181f40ea8..6da3f9497b 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -301,7 +301,11 @@ function build_projection_vectors_mem(basis::PlaneWaveBasis{T}, kpt::Kpoint, ret = NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) # TODO: remove this - @assert Matrix(ret) ≈ build_projection_vectors(basis, kpt, psps, psp_positions) + denseold = build_projection_vectors(basis, kpt, psps, psp_positions) + densenew = Matrix(ret) + if !(denseold ≈ densenew) + throw(AssertionError("Old and new projectors don't match. Norm of difference: $(norm(denseold - densenew)). Norm of old: $(norm(denseold)). Norm of new: $(norm(densenew)).")) + end ret end From 9146d614da2ecf6fe88adee3bebe5ecc04a4f814 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Tue, 6 May 2025 15:56:31 +0200 Subject: [PATCH 08/16] Remove old build_projection_vectors implementation --- src/terms/nonlocal.jl | 77 +++---------------------------------------- 1 file changed, 4 insertions(+), 73 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 6da3f9497b..b6e253d8be 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -17,7 +17,7 @@ function (::AtomicNonlocal)(basis::PlaneWaveBasis{T}) where {T} isempty(psp_groups) && return TermNoop() ops = map(basis.kpoints) do kpt - P = build_projection_vectors_mem(basis, kpt, psps, psp_positions) + P = build_projection_vectors(basis, kpt, psps, psp_positions) D = build_projection_coefficients(T, psps, psp_positions) NonlocalOperator(basis, kpt, P, to_device(basis.architecture, D)) end @@ -267,13 +267,11 @@ where ``\widehat{\rm proj}_i(p) = ∫_{ℝ^3} {\rm proj}_i(r) e^{-ip·r} dr``. We store ``\frac{1}{\sqrt Ω} \widehat{\rm proj}_i(k+G)`` in `proj_vectors`. """ -function build_projection_vectors_mem(basis::PlaneWaveBasis{T}, kpt::Kpoint, +function build_projection_vectors(basis::PlaneWaveBasis{T}, kpt::Kpoint, psps::AbstractVector{<: NormConservingPsp}, psp_positions) where {T} unit_cell_volume = basis.model.unit_cell_volume - n_proj = count_n_proj(psps, psp_positions) n_G = length(G_vectors(basis, kpt)) - proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj) G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) # Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) @@ -298,75 +296,7 @@ function build_projection_vectors_mem(basis::PlaneWaveBasis{T}, kpt::Kpoint, end) # TODO: to_device - ret = NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) - - # TODO: remove this - denseold = build_projection_vectors(basis, kpt, psps, psp_positions) - densenew = Matrix(ret) - if !(denseold ≈ densenew) - throw(AssertionError("Old and new projectors don't match. Norm of difference: $(norm(denseold - densenew)). Norm of old: $(norm(denseold)). Norm of new: $(norm(densenew)).")) - end - - ret -end - - -@doc raw""" -Build projection vectors for a atoms array generated by term_nonlocal - -```math -\begin{aligned} -H_{\rm at} &= \sum_{ij} C_{ij} \ket{{\rm proj}_i} \bra{{\rm proj}_j} \\ -H_{\rm per} &= \sum_R \sum_{ij} C_{ij} \ket{{\rm proj}_i(x-R)} \bra{{\rm proj}_j(x-R)} -\end{aligned} -``` - -```math -\begin{aligned} -\braket{e_k(G') \middle| H_{\rm per}}{e_k(G)} - &= \ldots \\ - &= \frac{1}{Ω} \sum_{ij} C_{ij} \widehat{\rm proj}_i(k+G') \widehat{\rm proj}_j^*(k+G), -\end{aligned} -``` - -where ``\widehat{\rm proj}_i(p) = ∫_{ℝ^3} {\rm proj}_i(r) e^{-ip·r} dr``. - -We store ``\frac{1}{\sqrt Ω} \widehat{\rm proj}_i(k+G)`` in `proj_vectors`. -""" -function build_projection_vectors(basis::PlaneWaveBasis{T}, kpt::Kpoint, - psps::AbstractVector{<: NormConservingPsp}, - psp_positions) where {T} - unit_cell_volume = basis.model.unit_cell_volume - n_proj = count_n_proj(psps, psp_positions) - n_G = length(G_vectors(basis, kpt)) - proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj) - G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) - - # Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) - # Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as - # \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p). - # The first term is the structure factor, the second the form factor. - offset = 0 # offset into proj_vectors - for (psp, positions) in zip(psps, psp_positions) - # Compute position-independent form factors - G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt)) - form_factors = build_projector_form_factors(psp, G_plus_k_cart) - - # Combine with structure factors - for r in positions - # k+G in this formula can also be G, this only changes an unimportant phase factor - structure_factors = map(p -> cis2pi(-dot(p, r)), G_plus_k) - @views for iproj = 1:count_n_proj(psp) - proj_vectors[:, offset+iproj] .= - structure_factors .* form_factors[:, iproj] ./ sqrt(unit_cell_volume) - end - offset += count_n_proj(psp) - end - end - @assert offset == n_proj - - # Offload potential values to a device (like a GPU) - to_device(basis.architecture, proj_vectors) + NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) end """ @@ -448,6 +378,7 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk) D = build_projection_coefficients(basis, psp_groups) P = build_projection_vectors(basis, kpt, psp_groups, positions) P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions) + # TODO: probably needs an extra parenthesis to first compute P'ψ P * (D * P_minus_q' * ψk) end From 4f18296455cb3757e9d64ee4b88eb72eccbdb0b5 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Tue, 6 May 2025 22:01:50 +0200 Subject: [PATCH 09/16] Bit a bit more clever with types to fix phonon testcase --- src/terms/nonlocal.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index b6e253d8be..4cf0c50067 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -33,9 +33,7 @@ Projector set of a single atom (independent of the atom's position), and the structure factor for the atom position. Used inside NonlocalProjectors such that the projector set can be reused for multiple atoms in the same atom group. """ -struct AtomProjectors{T <: Real, - VT <: AbstractVector{Complex{T}}, - PT <: AbstractMatrix{Complex{T}}} +struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix} # nbasis structure_factors::VT # nbasis x nproj @@ -51,13 +49,19 @@ In particular, random access to the matrix elements is not supported. """ struct NonlocalProjectors{T <: Real, ST <: AbstractVector{Complex{T}}, - PT <: AtomProjectors{T} + PT <: AtomProjectors, } <: AbstractMatrix{Complex{T}} # TODO: this is a real problem wrt. thread-safety, no? # nbasis proj_scratch::ST atoms::Vector{PT} end +function NonlocalProjectors(atoms::Vector{<:AtomProjectors}) + at = first(atoms) + T = promote_type(eltype(at.structure_factors), eltype(at.projectors)) + proj_scratch = similar(at.structure_factors, T) + NonlocalProjectors(proj_scratch, atoms) +end function Base.size(P::NonlocalProjectors) n = length(P.proj_scratch) @@ -295,8 +299,7 @@ function build_projection_vectors(basis::PlaneWaveBasis{T}, kpt::Kpoint, end end) - # TODO: to_device - NonlocalProjectors(zeros(Complex{eltype(psp_positions[1][1])}, n_G), atom_projectors) + NonlocalProjectors(atom_projectors) end """ From 3555c317e94589e5c97e7fb815a4eeb77ed65723 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Tue, 6 May 2025 22:04:44 +0200 Subject: [PATCH 10/16] Move NonlocalProjectors code closer to where it's used --- src/terms/nonlocal.jl | 210 +++++++++++++++++++++--------------------- 1 file changed, 105 insertions(+), 105 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 4cf0c50067..8de6aefdd6 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -28,111 +28,6 @@ struct TermAtomicNonlocal <: Term ops::Vector{NonlocalOperator} end -""" -Projector set of a single atom (independent of the atom's position), -and the structure factor for the atom position. Used inside NonlocalProjectors -such that the projector set can be reused for multiple atoms in the same atom group. -""" -struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix} - # nbasis - structure_factors::VT - # nbasis x nproj - projectors::PT -end - -""" -Matrix-like type to represent the nonlocal projectors P without -allocating the full matrix. -This type extends AbstractMatrix, but it does not implement all -the required methods, only those that were shown to be needed. -In particular, random access to the matrix elements is not supported. -""" -struct NonlocalProjectors{T <: Real, - ST <: AbstractVector{Complex{T}}, - PT <: AtomProjectors, - } <: AbstractMatrix{Complex{T}} - # TODO: this is a real problem wrt. thread-safety, no? - # nbasis - proj_scratch::ST - atoms::Vector{PT} -end -function NonlocalProjectors(atoms::Vector{<:AtomProjectors}) - at = first(atoms) - T = promote_type(eltype(at.structure_factors), eltype(at.projectors)) - proj_scratch = similar(at.structure_factors, T) - NonlocalProjectors(proj_scratch, atoms) -end - -function Base.size(P::NonlocalProjectors) - n = length(P.proj_scratch) - m = sum(size(at.projectors, 2) for at in P.atoms) - (n, m) -end -function Base.Matrix(P::NonlocalProjectors{T}) where {T} - n, m = size(P) - out = zeros(Complex{T}, n, m) - iproj = 1 - for at in P.atoms - for proj in eachcol(at.projectors) - out[:, iproj] .= at.structure_factors .* proj - iproj += 1 - end - end - out -end - -# Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. -LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, - ψk::AbstractVector) = _mul!(C, A, ψk) -LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors}, - ψk::AbstractMatrix) = _mul!(C, A, ψk) - -LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector, - α::Number, β::Number) = _mul!(C, A, B, α, β) -LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, - α::Number, β::Number) = _mul!(C, A, B, α, β) - -function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, - ψk::AbstractVecOrMat) - if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2) - throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))")) - end - - iproj = 1 - proj_scratch = A.parent.proj_scratch - for at in A.parent.atoms - for proj in eachcol(at.projectors) - proj_scratch .= at.structure_factors .* proj - @views mul!(C[iproj:iproj, :], proj_scratch', ψk) - iproj += 1 - end - end - C -end - -function _mul!(C::AbstractArray, A::NonlocalProjectors, B::AbstractArray, - α::Number, β::Number) - if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) - throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) - end - - C .*= β - - iproj = 1 - proj_scratch = A.proj_scratch - for at in A.atoms - for proj in eachcol(at.projectors) - # TODO: does this use BLAS? - proj_scratch .= at.structure_factors .* proj - for iband in axes(B, 2) - @views C[:, iband] .+= proj_scratch .* (α * B[iproj, iband]) - end - iproj += 1 - end - end - C -end - @timing "ene_ops: nonlocal" function ene_ops(term::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation; kwargs...) where {T} @@ -249,6 +144,111 @@ function build_projection_coefficients(T::Type, psp::NormConservingPsp) proj_coeffs end +""" +Projector set of a single atom (independent of the atom's position), +and the structure factor for the atom position. Used inside NonlocalProjectors +such that the projector set can be reused for multiple atoms in the same atom group. +""" +struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix} + # nbasis + structure_factors::VT + # nbasis x nproj + projectors::PT +end + +""" +Matrix-like type to represent the nonlocal projection vectors P without +allocating the full matrix. +This type extends AbstractMatrix, but it does not implement all +the required methods, only those that were shown to be needed. +In particular, random access to the matrix elements is not supported. +""" +struct NonlocalProjectors{T <: Real, + ST <: AbstractVector{Complex{T}}, + PT <: AtomProjectors, + } <: AbstractMatrix{Complex{T}} + # TODO: this is a real problem wrt. thread-safety, no? + # nbasis + proj_scratch::ST + atoms::Vector{PT} +end +function NonlocalProjectors(atoms::Vector{<:AtomProjectors}) + at = first(atoms) + T = promote_type(eltype(at.structure_factors), eltype(at.projectors)) + proj_scratch = similar(at.structure_factors, T) + NonlocalProjectors(proj_scratch, atoms) +end + +function Base.size(P::NonlocalProjectors) + n = length(P.proj_scratch) + m = sum(size(at.projectors, 2) for at in P.atoms) + (n, m) +end +function Base.Matrix(P::NonlocalProjectors{T}) where {T} + n, m = size(P) + out = zeros(Complex{T}, n, m) + iproj = 1 + for at in P.atoms + for proj in eachcol(at.projectors) + out[:, iproj] .= at.structure_factors .* proj + iproj += 1 + end + end + out +end + +# Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. +LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractVector) = _mul!(C, A, ψk) +LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractMatrix) = _mul!(C, A, ψk) + +LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector, + α::Number, β::Number) = _mul!(C, A, B, α, β) +LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, + α::Number, β::Number) = _mul!(C, A, B, α, β) + +function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, + ψk::AbstractVecOrMat) + if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2) + throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))")) + end + + iproj = 1 + proj_scratch = A.parent.proj_scratch + for at in A.parent.atoms + for proj in eachcol(at.projectors) + proj_scratch .= at.structure_factors .* proj + @views mul!(C[iproj:iproj, :], proj_scratch', ψk) + iproj += 1 + end + end + C +end + +function _mul!(C::AbstractArray, A::NonlocalProjectors, B::AbstractArray, + α::Number, β::Number) + if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) + throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) + end + + C .*= β + + iproj = 1 + proj_scratch = A.proj_scratch + for at in A.atoms + for proj in eachcol(at.projectors) + # TODO: does this use BLAS? + proj_scratch .= at.structure_factors .* proj + for iband in axes(B, 2) + @views C[:, iband] .+= proj_scratch .* (α * B[iproj, iband]) + end + iproj += 1 + end + end + C +end + @doc raw""" Build projection vectors for a atoms array generated by term_nonlocal From 138ea8dfa3304524c6b4aa0867ee95735a5a6b4d Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Tue, 13 May 2025 18:46:47 +0200 Subject: [PATCH 11/16] Add show overrides --- src/terms/nonlocal.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 8de6aefdd6..63d1384367 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -197,6 +197,15 @@ function Base.Matrix(P::NonlocalProjectors{T}) where {T} out end +function Base.show(io::IO, P::NonlocalProjectors) + print(io, "DFTK.NonlocalProjectors{") + show(io, P.atoms) + print(io, "}") +end +function Base.show(io::IO, ::MIME"text/plain", P::NonlocalProjectors) + print(io, summary(P)) +end + # Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, ψk::AbstractVector) = _mul!(C, A, ψk) From 55608ba6f65ae5580b2c8b787689e2f3dd3d2f49 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 10 Jul 2025 10:59:15 +0200 Subject: [PATCH 12/16] Remove scratch vector --- src/terms/nonlocal.jl | 58 ++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 1448dd4142..642cfe1b0e 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -163,30 +163,28 @@ This type extends AbstractMatrix, but it does not implement all the required methods, only those that were shown to be needed. In particular, random access to the matrix elements is not supported. """ -struct NonlocalProjectors{T <: Real, - ST <: AbstractVector{Complex{T}}, +struct NonlocalProjectors{T <: Number, PT <: AtomProjectors, - } <: AbstractMatrix{Complex{T}} - # TODO: this is a real problem wrt. thread-safety, no? - # nbasis - proj_scratch::ST + } <: AbstractMatrix{T} atoms::Vector{PT} -end -function NonlocalProjectors(atoms::Vector{<:AtomProjectors}) - at = first(atoms) - T = promote_type(eltype(at.structure_factors), eltype(at.projectors)) - proj_scratch = similar(at.structure_factors, T) - NonlocalProjectors(proj_scratch, atoms) + + function NonlocalProjectors(atoms::Vector{PT}) where {PT} + # also serves as a check for length(atoms) > 0 + at1 = first(atoms) + new{promote_type(eltype(at1.structure_factors), + eltype(at1.projectors)), + PT}(atoms) + end end function Base.size(P::NonlocalProjectors) - n = length(P.proj_scratch) + n = length(first(P.atoms).structure_factors) m = sum(size(at.projectors, 2) for at in P.atoms) (n, m) end function Base.Matrix(P::NonlocalProjectors{T}) where {T} n, m = size(P) - out = zeros(Complex{T}, n, m) + out = Matrix{T}(undef, n, m) iproj = 1 for at in P.atoms for proj in eachcol(at.projectors) @@ -224,36 +222,40 @@ function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, end iproj = 1 - proj_scratch = A.parent.proj_scratch for at in A.parent.atoms for proj in eachcol(at.projectors) - proj_scratch .= at.structure_factors .* proj - @views mul!(C[iproj:iproj, :], proj_scratch', ψk) + for iband in axes(ψk, 2) + C[iproj, iband] = conj(dot(@view(ψk[:, iband]), + Diagonal(at.structure_factors), + proj)) + end iproj += 1 end end C end -function _mul!(C::AbstractArray, A::NonlocalProjectors, B::AbstractArray, - α::Number, β::Number) +function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray, + α::Number, β::Number) where {T} if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end C .*= β + maxproj = maximum(at -> size(at.projectors, 2), A.atoms) + # TODO: store in Channel to avoid repeated allocations? + Pbuffer = Matrix{T}(undef, size(A, 1), maxproj) + iproj = 1 - proj_scratch = A.proj_scratch for at in A.atoms - for proj in eachcol(at.projectors) - # TODO: does this use BLAS? - proj_scratch .= at.structure_factors .* proj - for iband in axes(B, 2) - @views C[:, iband] .+= proj_scratch .* (α * B[iproj, iband]) - end - iproj += 1 - end + nproj = size(at.projectors, 2) + + Pwork = @view Pbuffer[:, 1:nproj] + Pwork .= at.structure_factors .* at.projectors + mul!(C, Pwork, @view(B[iproj:iproj+nproj-1, :]), α, 1) + + iproj += nproj end C end From 9d7e6b3b72f61a1aeb5c0301d4158104dee21068 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:00:07 +0200 Subject: [PATCH 13/16] Don't comment out precompile workflow --- src/DFTK.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DFTK.jl b/src/DFTK.jl index 84d9fbe316..a03a4e7c49 100644 --- a/src/DFTK.jl +++ b/src/DFTK.jl @@ -270,8 +270,8 @@ end positions = [ones(3)/8, -ones(3)/8] magnetic_moments = [2, -2] - # @compile_workload begin - # precompilation_workflow(lattice, atoms, positions, magnetic_moments) - # end + @compile_workload begin + precompilation_workflow(lattice, atoms, positions, magnetic_moments) + end end end # module DFTK From c1279fa526a35de504c056a863b8f430808b9654 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:07:18 +0200 Subject: [PATCH 14/16] Add parenthesis around P * psi --- src/terms/nonlocal.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 642cfe1b0e..3c1c5c6002 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -392,8 +392,7 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk) D = build_projection_coefficients(basis, psp_groups) P = build_projection_vectors(basis, kpt, psp_groups, positions) P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions) - # TODO: probably needs an extra parenthesis to first compute P'ψ - P * (D * P_minus_q' * ψk) + P * (D * (P_minus_q' * ψk)) end function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation, From 8c4510826aa4a11bb18188b4dd8bce7d9513fdd9 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:33:34 +0200 Subject: [PATCH 15/16] A bit of cleanup --- src/terms/nonlocal.jl | 2 +- test/PspUpf.jl | 39 --------------------------------------- test/nonlocal.jl | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 40 deletions(-) create mode 100644 test/nonlocal.jl diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 3c1c5c6002..2b2b98fbb2 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -244,7 +244,7 @@ function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray, C .*= β maxproj = maximum(at -> size(at.projectors, 2), A.atoms) - # TODO: store in Channel to avoid repeated allocations? + # TODO: could store in a threadsafe cache to avoid repeated allocations Pbuffer = Matrix{T}(undef, size(A, 1), maxproj) iproj = 1 diff --git a/test/PspUpf.jl b/test/PspUpf.jl index cab255384c..9de8aeb40b 100644 --- a/test/PspUpf.jl +++ b/test/PspUpf.jl @@ -250,42 +250,3 @@ end end end end - -@testitem "Test nonlocal term operations" tags=[:psp] setup=[mPspUpf] begin - using DFTK - using LinearAlgebra - - lattice = 5 * I(3) - positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)] - for (element, psp) in mPspUpf.upf_pseudos - if sum(psp.r2_ρion) > 0 # Otherwise, it's all 0 in the UPF as a placeholder - el = ElementPsp(element, psp) - atoms = [el, el, el] - model = model_DFT(lattice, atoms, positions; functionals=LDA()) - basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2]) - n_bands = 7 - ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] - occ = [2.0 * ones(n_bands) for _ in basis.kpoints] - ρ = DFTK.compute_density(basis, ψ, occ) - - energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ) - hamψ = ham * ψ - - hblock = ham.blocks[1] - nonloc = hblock.nonlocal_op - nonlocal_dense = Matrix(nonloc) - ψk = ψ[1] - - Hψk = zero(ψk) - DFTK.apply!((;fourier=Hψk), nonloc, (; fourier=ψk)) - Hψk_dense = nonlocal_dense * ψk - - Pψk = nonloc.P' * ψk - DPψk = nonloc.D * Pψk - @show norm(nonloc.P * DPψk) norm(Matrix(nonloc.P) * DPψk) - @assert @show(norm(nonloc.P * DPψk - Matrix(nonloc.P) * DPψk)) < 1e-10 - - @assert @show(norm(Hψk - Hψk_dense)) < 1e-10 - end - end -end diff --git a/test/nonlocal.jl b/test/nonlocal.jl new file mode 100644 index 0000000000..bc55722eb5 --- /dev/null +++ b/test/nonlocal.jl @@ -0,0 +1,34 @@ +@testitem "Test NonlocalProjectors" tags=[:psp] setup=[mPspUpf] begin + using DFTK + using LinearAlgebra + + for (element, psp) in mPspUpf.upf_pseudos + lattice = 5 * I(3) + el = ElementPsp(element, psp) + atoms = [el, el, el] + positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)] + model = model_DFT(lattice, atoms, positions; functionals=LDA()) + basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2]) + + n_bands = 7 + ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] + occ = [2.0 * ones(n_bands) for _ in basis.kpoints] + ρ = DFTK.compute_density(basis, ψ, occ) + + energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ) + + for (hamblock, ψk) in zip(ham.blocks, ψ) + nonloc = hamblock.nonlocal_op + Hψk = zero(ψk) + DFTK.apply!((; fourier=Hψk), nonloc, (; fourier=ψk)) + + nonloc_dense = Matrix(nonloc) + Hψk_dense = nonloc_dense * ψk + + Pψk = nonloc.P' * ψk + DPψk = nonloc.D * Pψk + @assert nonloc.P * DPψk ≈ Matrix(nonloc.P) * DPψk atol=1e-10 + @assert Hψk ≈ Hψk_dense atol=1e-10 + end + end +end From f8f7009ecd2d098cc6fe9bb7a5342cf7e5866932 Mon Sep 17 00:00:00 2001 From: Bruno Ploumhans <13494793+Technici4n@users.noreply.github.com> Date: Thu, 10 Jul 2025 13:46:47 +0200 Subject: [PATCH 16/16] Fix multiplication by vector --- src/terms/nonlocal.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index 2b2b98fbb2..4c90c7c7bf 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -235,8 +235,8 @@ function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, C end -function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray, - α::Number, β::Number) where {T} +function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray{BT}, + α::Number, β::Number) where {T, BT} if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end @@ -253,7 +253,12 @@ function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray, Pwork = @view Pbuffer[:, 1:nproj] Pwork .= at.structure_factors .* at.projectors - mul!(C, Pwork, @view(B[iproj:iproj+nproj-1, :]), α, 1) + Bwork = if BT <: AbstractVector + @view(B[iproj:iproj+nproj-1]) + else + @view(B[iproj:iproj+nproj-1, :]) + end + mul!(C, Pwork, Bwork, α, 1) iproj += nproj end