-
Notifications
You must be signed in to change notification settings - Fork 108
Reduce RAM usage of nonlocal term #1088
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
ebbcc6a
1c3b8a9
49c904b
1bd11c6
903bf9c
c2d6d66
3df5bb0
9146d61
4f18296
3555c31
138ea8d
d788668
55608ba
9d7e6b3
c1279fa
8c45108
f8f7009
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -144,6 +144,126 @@ function build_projection_coefficients(T::Type, psp::NormConservingPsp) | |
| proj_coeffs | ||
| end | ||
|
|
||
| """ | ||
| Projector set of a single atom (independent of the atom's position), | ||
| and the structure factor for the atom position. Used inside NonlocalProjectors | ||
| such that the projector set can be reused for multiple atoms in the same atom group. | ||
| """ | ||
| struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix} | ||
| # nbasis | ||
| structure_factors::VT | ||
| # nbasis x nproj | ||
| projectors::PT | ||
| end | ||
|
|
||
| """ | ||
| Matrix-like type to represent the nonlocal projection vectors P without | ||
| allocating the full matrix. | ||
| This type extends AbstractMatrix, but it does not implement all | ||
| the required methods, only those that were shown to be needed. | ||
| In particular, random access to the matrix elements is not supported. | ||
| """ | ||
| struct NonlocalProjectors{T <: Number, | ||
| PT <: AtomProjectors, | ||
| } <: AbstractMatrix{T} | ||
| atoms::Vector{PT} | ||
|
|
||
| function NonlocalProjectors(atoms::Vector{PT}) where {PT} | ||
| # also serves as a check for length(atoms) > 0 | ||
| at1 = first(atoms) | ||
| new{promote_type(eltype(at1.structure_factors), | ||
| eltype(at1.projectors)), | ||
| PT}(atoms) | ||
| end | ||
| end | ||
|
|
||
| function Base.size(P::NonlocalProjectors) | ||
| n = length(first(P.atoms).structure_factors) | ||
| m = sum(size(at.projectors, 2) for at in P.atoms) | ||
| (n, m) | ||
| end | ||
| function Base.Matrix(P::NonlocalProjectors{T}) where {T} | ||
| n, m = size(P) | ||
| out = Matrix{T}(undef, n, m) | ||
| iproj = 1 | ||
| for at in P.atoms | ||
| for proj in eachcol(at.projectors) | ||
| out[:, iproj] .= at.structure_factors .* proj | ||
| iproj += 1 | ||
| end | ||
| end | ||
| out | ||
| end | ||
|
|
||
| function Base.show(io::IO, P::NonlocalProjectors) | ||
| print(io, "DFTK.NonlocalProjectors{") | ||
| show(io, P.atoms) | ||
| print(io, "}") | ||
| end | ||
| function Base.show(io::IO, ::MIME"text/plain", P::NonlocalProjectors) | ||
| print(io, summary(P)) | ||
| end | ||
|
|
||
| # Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. | ||
| LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, | ||
| ψk::AbstractVector) = _mul!(C, A, ψk) | ||
| LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors}, | ||
| ψk::AbstractMatrix) = _mul!(C, A, ψk) | ||
|
|
||
| LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector, | ||
| α::Number, β::Number) = _mul!(C, A, B, α, β) | ||
| LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, | ||
| α::Number, β::Number) = _mul!(C, A, B, α, β) | ||
|
|
||
| function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, | ||
| ψk::AbstractVecOrMat) | ||
| if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2) | ||
| throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))")) | ||
| end | ||
|
|
||
| iproj = 1 | ||
| for at in A.parent.atoms | ||
| for proj in eachcol(at.projectors) | ||
|
Comment on lines
+225
to
+226
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. join these loops to avoid too deep nesting. |
||
| for iband in axes(ψk, 2) | ||
| C[iproj, iband] = conj(dot(@view(ψk[:, iband]), | ||
| Diagonal(at.structure_factors), | ||
| proj)) | ||
| end | ||
| iproj += 1 | ||
| end | ||
| end | ||
| C | ||
| end | ||
|
|
||
| function _mul!(C::AbstractArray, A::NonlocalProjectors{T}, B::AbstractArray{BT}, | ||
| α::Number, β::Number) where {T, BT} | ||
| if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) | ||
| throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) | ||
| end | ||
|
|
||
| C .*= β | ||
|
|
||
| maxproj = maximum(at -> size(at.projectors, 2), A.atoms) | ||
| # TODO: could store in a threadsafe cache to avoid repeated allocations | ||
| Pbuffer = Matrix{T}(undef, size(A, 1), maxproj) | ||
|
|
||
| iproj = 1 | ||
| for at in A.atoms | ||
| nproj = size(at.projectors, 2) | ||
|
|
||
| Pwork = @view Pbuffer[:, 1:nproj] | ||
| Pwork .= at.structure_factors .* at.projectors | ||
| Bwork = if BT <: AbstractVector | ||
| @view(B[iproj:iproj+nproj-1]) | ||
| else | ||
| @view(B[iproj:iproj+nproj-1, :]) | ||
| end | ||
|
Comment on lines
+256
to
+260
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this looks weird. |
||
| mul!(C, Pwork, Bwork, α, 1) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not put |
||
|
|
||
| iproj += nproj | ||
| end | ||
| C | ||
| end | ||
|
|
||
| @doc raw""" | ||
| Build projection vectors for a atoms array generated by term_nonlocal | ||
|
|
@@ -171,36 +291,31 @@ function build_projection_vectors(basis::PlaneWaveBasis{T}, kpt::Kpoint, | |
| psps::AbstractVector{<: NormConservingPsp}, | ||
| psp_positions) where {T} | ||
| unit_cell_volume = basis.model.unit_cell_volume | ||
| n_proj = count_n_proj(psps, psp_positions) | ||
| n_G = length(G_vectors(basis, kpt)) | ||
| proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj) | ||
| G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) | ||
|
|
||
| # Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) | ||
| # Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as | ||
| # \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p). | ||
| # The first term is the structure factor, the second the form factor. | ||
| offset = 0 # offset into proj_vectors | ||
| for (psp, positions) in zip(psps, psp_positions) | ||
| atom_projectors = reduce(vcat, map(zip(psps, psp_positions)) do (psp, positions) | ||
| # Compute position-independent form factors | ||
| G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt)) | ||
| form_factors = build_projector_form_factors(psp, G_plus_k_cart) | ||
| psp_form_factors = build_projector_form_factors(psp, G_plus_k_cart) | ||
| psp_form_factors ./= sqrt(unit_cell_volume) | ||
| # Offload potential values to a device (like a GPU), | ||
| # and make sure to share this allocation for all atoms in the group | ||
| psp_form_factors = to_device(basis.architecture, psp_form_factors) | ||
|
|
||
| # Combine with structure factors | ||
| for r in positions | ||
| map(positions) do r | ||
| # k+G in this formula can also be G, this only changes an unimportant phase factor | ||
| structure_factors = map(p -> cis2pi(-dot(p, r)), G_plus_k) | ||
| @views for iproj = 1:count_n_proj(psp) | ||
| proj_vectors[:, offset+iproj] .= | ||
| structure_factors .* form_factors[:, iproj] ./ sqrt(unit_cell_volume) | ||
| end | ||
| offset += count_n_proj(psp) | ||
| structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k)) | ||
| AtomProjectors(structure_factors, psp_form_factors) | ||
| end | ||
| end | ||
| @assert offset == n_proj | ||
| end) | ||
|
|
||
| # Offload potential values to a device (like a GPU) | ||
| to_device(basis.architecture, proj_vectors) | ||
| NonlocalProjectors(atom_projectors) | ||
| end | ||
|
|
||
| """ | ||
|
|
@@ -282,7 +397,7 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk) | |
| D = build_projection_coefficients(basis, psp_groups) | ||
| P = build_projection_vectors(basis, kpt, psp_groups, positions) | ||
| P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions) | ||
| P * (D * P_minus_q' * ψk) | ||
| P * (D * (P_minus_q' * ψk)) | ||
| end | ||
|
|
||
| function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| @testitem "Test NonlocalProjectors" tags=[:psp] setup=[mPspUpf] begin | ||
| using DFTK | ||
| using LinearAlgebra | ||
|
|
||
| for (element, psp) in mPspUpf.upf_pseudos | ||
| lattice = 5 * I(3) | ||
| el = ElementPsp(element, psp) | ||
| atoms = [el, el, el] | ||
| positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)] | ||
| model = model_DFT(lattice, atoms, positions; functionals=LDA()) | ||
| basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2]) | ||
|
|
||
| n_bands = 7 | ||
| ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] | ||
| occ = [2.0 * ones(n_bands) for _ in basis.kpoints] | ||
| ρ = DFTK.compute_density(basis, ψ, occ) | ||
|
|
||
| energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ) | ||
|
|
||
| for (hamblock, ψk) in zip(ham.blocks, ψ) | ||
| nonloc = hamblock.nonlocal_op | ||
| Hψk = zero(ψk) | ||
| DFTK.apply!((; fourier=Hψk), nonloc, (; fourier=ψk)) | ||
|
|
||
| nonloc_dense = Matrix(nonloc) | ||
| Hψk_dense = nonloc_dense * ψk | ||
|
|
||
| Pψk = nonloc.P' * ψk | ||
| DPψk = nonloc.D * Pψk | ||
| @assert nonloc.P * DPψk ≈ Matrix(nonloc.P) * DPψk atol=1e-10 | ||
| @assert Hψk ≈ Hψk_dense atol=1e-10 | ||
| end | ||
| end | ||
| end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this needed ? Surprises me a bit.