From 1e277478ab80341e7b58fd2d1f972dd6b32dc63b Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 26 May 2026 08:32:43 -0400 Subject: [PATCH 1/2] Code in support of Davidson * clean up Davidson code * add some options in ReplicatedMatrix * save eigenvectors of H in ProjectedMatrices --- src/DavidsonSolver.cc | 77 ++++++++++++++++++-------------------- src/DavidsonSolver.h | 5 ++- src/ProjectedMatrices.cc | 6 +-- src/ProjectedMatrices.h | 12 ++++++ src/ProjectedMatrices2N.cc | 4 +- src/ProjectedMatrices2N.h | 3 +- src/ReplicatedMatrix.cc | 27 +++++++++++++ src/ReplicatedMatrix.h | 8 +++- 8 files changed, 92 insertions(+), 50 deletions(-) diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index 4804af64..d6bd1d06 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -44,6 +44,21 @@ double evalEntropy(ProjectedMatricesInterface* projmatrices, return ts; } +// print 10 eigenvalues / row +void printEigenvalues(const std::vector& eval, std::ostream& os) +{ + os << std::fixed << std::setprecision(4); + for (unsigned int i = 0; i < eval.size(); i++) + { + os << eval[i]; + if (i % 10 == 9) + os << std::endl; + else + os << " "; + } + os << std::endl; +} + template DavidsonSolver::DavidsonSolver(std::ostream& os, Ions& ions, Hamiltonian* hamiltonian, Rho* rho, @@ -225,8 +240,9 @@ double DavidsonSolver::evaluateDerivative( template void DavidsonSolver::buildTarget2N_MVP( - MatrixType& h11, MatrixType& h12, MatrixType& h21, MatrixType& h22, - MatrixType& s11, MatrixType& s22, MatrixType& target) + const MatrixType& h11, const MatrixType& h12, const MatrixType& h21, + const MatrixType& h22, const MatrixType& s11, const MatrixType& s22, + MatrixType& target) { target_tm_.start(); @@ -427,7 +443,6 @@ int DavidsonSolver::solve( OrbitalsType hphi("Davidson_hphi", orbitals); MatrixType dm2Ninit("dm2N", 2 * numst_, 2 * numst_); std::vector eval(2 * numst_); - MatrixType evect("EigVect", 2 * numst_, 2 * numst_); MatrixType dm11("dm11", numst_, numst_); MatrixType dm12("dm12", numst_, numst_); @@ -681,21 +696,13 @@ int DavidsonSolver::solve( } // inner iterations // update orbitals + MatrixType evect("EigVect", 2 * numst_, 2 * numst_); proj_mat2N_->diagonalizeDM(eval, evect); #if 1 if (mmpi.PE0() && ct.verbose > 2) { os_ << "Eigenvalues of Interpolated DM: " << std::endl; - os_ << std::fixed << std::setprecision(4); - for (unsigned int i = 0; i < eval.size(); i++) - { - os_ << eval[i]; - if (i % 10 == 9) - os_ << std::endl; - else - os_ << " "; - } - os_ << std::endl; + printEigenvalues(eval, os_); double tot = 0.; for (int i = 0; i < numst_; i++) { @@ -717,18 +724,22 @@ int DavidsonSolver::solve( // 2*numst_); swapColumnsVect(evect, proj_mat2N_->getMatHB(), // eval, z2N ); - dm12.getsub(evect, numst_, numst_, 0, numst_); - dm22.getsub(evect, numst_, numst_, numst_, numst_); - // replace orbitals with eigenvectors corresponding to largest // eigenvalues of DM - if (mmpi.PE0() && ct.verbose > 2) - os_ << "Update trial eigenvectors..." << std::endl; - orbitals.multiply_by_matrix(dm12, 0., orbitals); - work_orbitals.multiply_by_matrix(dm22, 1., orbitals); - orbitals.incrementIterativeIndex(); - orbitals.incrementIterativeIndex(); - work_orbitals.incrementIterativeIndex(2); + { + MatrixType v12("v12", numst_, numst_); + MatrixType v22("v22", numst_, numst_); + v12.getsub(evect, numst_, numst_, 0, numst_); + v22.getsub(evect, numst_, numst_, numst_, numst_); + + if (mmpi.PE0() && ct.verbose > 2) + os_ << "Update trial eigenvectors..." << std::endl; + orbitals.multiply_by_matrix(v12, 0., orbitals); + work_orbitals.multiply_by_matrix(v22, 1., orbitals); + orbitals.incrementIterativeIndex(); + orbitals.incrementIterativeIndex(); + work_orbitals.incrementIterativeIndex(2); + } std::vector new_occ(numst_); double tocc = 0.; @@ -771,14 +782,7 @@ int DavidsonSolver::solve( if( onpe0 ) { os_<<"New occupations: "<::solve( && ct.verbose > 2) { os_ << "Final Occupations: " << std::endl; - os_ << std::fixed << std::setprecision(4); - for (int i = 0; i < numst_; i++) - { - os_ << new_occ[i]; - if (i % 10 == 9) - os_ << std::endl; - else - os_ << " "; - } - os_ << std::endl; + printEigenvalues(new_occ, os_); } #endif diff --git a/src/DavidsonSolver.h b/src/DavidsonSolver.h index 512cab5b..c058c90e 100644 --- a/src/DavidsonSolver.h +++ b/src/DavidsonSolver.h @@ -57,8 +57,9 @@ class DavidsonSolver int checkConvergence(const double e0, const int it, const double tol); double evaluateDerivative( MatrixType& dm2Ninit, MatrixType& delta_dm, const double ts0); - void buildTarget2N_MVP(MatrixType& h11, MatrixType& h12, MatrixType& h21, - MatrixType& h22, MatrixType& s11, MatrixType& s22, MatrixType& target); + void buildTarget2N_MVP(const MatrixType& h11, const MatrixType& h12, + const MatrixType& h21, const MatrixType& h22, const MatrixType& s11, + const MatrixType& s22, MatrixType& target); // void buildTarget2N_new(MatrixType& h11, // MatrixType& h12, // MatrixType& h21, diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index 50a6cd21..df18bcc8 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -390,18 +390,18 @@ void ProjectedMatrices::updateDMwithEigenstates() if (mmpi.PE0() && ct.verbose > 1) (*MPIdata::sout) << "ProjectedMatrices: Compute DM using eigenstates\n"; - MatrixType zz("Z", dim_, dim_); + if (ev_ == nullptr) ev_.reset(new MatrixType("ev", dim_, dim_)); // solves generalized eigenvalue problem // and return solution in zz and val - solveGenEigenProblem(zz); + solveGenEigenProblem(*ev_); computeChemicalPotentialAndOccupations(); if (mmpi.instancePE0() && ct.verbose > 1) std::cout << "Final mu_ = " << 0.5 * mu_ << " [Ha]" << std::endl; // Build the density matrix X // X = Z * gamma * Z^T - buildDM(zz); + buildDM(*ev_); } //"replicated" implementation of SP2. diff --git a/src/ProjectedMatrices.h b/src/ProjectedMatrices.h index 9827c7ef..da1bf112 100644 --- a/src/ProjectedMatrices.h +++ b/src/ProjectedMatrices.h @@ -97,6 +97,12 @@ class ProjectedMatrices : public ProjectedMatricesInterface */ std::unique_ptr> dm_; + /*! + * Matrix of eigenvectors used to build DM + * (if used) + */ + std::unique_ptr ev_; + /*! * Gram matrix of orbitals overlaps */ @@ -242,7 +248,13 @@ class ProjectedMatrices : public ProjectedMatricesInterface } void assignH(const MatrixType& matH) { *matH_ = matH; } + const MatrixType& getH() { return *matH_; } + const MatrixType& getEigenvectors() + { + assert(ev_ != nullptr); + return *ev_; + } void setHB2H() override { diff --git a/src/ProjectedMatrices2N.cc b/src/ProjectedMatrices2N.cc index 60f2cc80..3fc53e8b 100644 --- a/src/ProjectedMatrices2N.cc +++ b/src/ProjectedMatrices2N.cc @@ -31,8 +31,8 @@ ProjectedMatrices2N::~ProjectedMatrices2N() } template -void ProjectedMatrices2N::assignBlocksH( - MatrixType& h11, MatrixType& h12, MatrixType& h21, MatrixType& h22) +void ProjectedMatrices2N::assignBlocksH(const MatrixType& h11, + const MatrixType& h12, const MatrixType& h21, const MatrixType& h22) { ProjectedMatrices::matH_->assign(h11, 0, 0); ProjectedMatrices::matH_->assign(h12, 0, bdim_); diff --git a/src/ProjectedMatrices2N.h b/src/ProjectedMatrices2N.h index a87bf83f..b3cdc900 100644 --- a/src/ProjectedMatrices2N.h +++ b/src/ProjectedMatrices2N.h @@ -24,7 +24,8 @@ class ProjectedMatrices2N : public ProjectedMatrices ~ProjectedMatrices2N() override; - void assignBlocksH(MatrixType&, MatrixType&, MatrixType&, MatrixType&); + void assignBlocksH(const MatrixType&, const MatrixType&, const MatrixType&, + const MatrixType&); void iterativeUpdateDMwithEigenstates( const double occ_width, const bool flag_reduce_T = true); diff --git a/src/ReplicatedMatrix.cc b/src/ReplicatedMatrix.cc index 04c56e0d..062f13ad 100644 --- a/src/ReplicatedMatrix.cc +++ b/src/ReplicatedMatrix.cc @@ -17,6 +17,7 @@ using Memory = MemorySpace::Memory; constexpr double gpuroundup = 32; #else +#include "MGmol_blas1.h" #include "blas3_c.h" #include "fc_mangle.h" #include "lapack_c.h" @@ -687,6 +688,32 @@ int ReplicatedMatrix::iamax(const int j, double& val) return indx; } +double ReplicatedMatrix::nrm2(const int j) +{ +#ifdef HAVE_MAGMA + (void)os; + std::cerr << "ReplicatedMatrix::nrm2() not implemented" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +#else + int ione = 1; + return DNRM2(&dim_, data_.get() + j * ld_, &ione); +#endif +} + +void ReplicatedMatrix::swapColumns(const int i, const int j) +{ +#ifdef HAVE_MAGMA + (void)os; + std::cerr << "ReplicatedMatrix::swapColumns) not implemented" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +#else + std::vector tmp(data_.get() + i * ld_, data_.get() + (i + 1) * ld_); + + memcpy(data_.get() + i * ld_, data_.get() + j * ld_, ld_ * sizeof(double)); + memcpy(data_.get() + j * ld_, tmp.data(), ld_ * sizeof(double)); +#endif +} + void ReplicatedMatrix::setVal(const int i, const int j, const double val) { #ifdef HAVE_MAGMA diff --git a/src/ReplicatedMatrix.h b/src/ReplicatedMatrix.h index ff72b296..891e41eb 100644 --- a/src/ReplicatedMatrix.h +++ b/src/ReplicatedMatrix.h @@ -18,6 +18,8 @@ class ReplicatedVector; #include #include +// Square matrices +// MPI operations assume replication by each MPI task class ReplicatedMatrix { static MPI_Comm comm_; @@ -29,7 +31,7 @@ class ReplicatedMatrix // leading dimension for storage size_t ld_; - // matrix data + // matrix data stored in column major order std::unique_ptr data_; std::string name_; @@ -123,7 +125,11 @@ class ReplicatedMatrix void setVal(const int i, const int j, const double val); void setDiagonal(const std::vector& diag_values); + int iamax(const int j, double& val); + double nrm2(const int j); + void swapColumns(const int i, const int j); + double norm(char ty); double traceProduct(const ReplicatedMatrix&) const; void shift(const double); From d3ddf4d0b2e1bfe73e43da9556022f0cee832516 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 29 May 2026 16:18:25 -0400 Subject: [PATCH 2/2] Use eigenvectors of H in Davidson --- src/DavidsonSolver.cc | 140 ++++++++++++++++++++++++-- src/DavidsonSolver.h | 7 ++ src/ReplicatedMatrix.cc | 46 +++++---- src/ReplicatedMatrix.h | 3 +- tests/Davidson/davidson.cfg | 2 +- tests/DavidsonMixing/davidson.cfg | 2 +- tests/DavidsonReplicated/davidson.cfg | 2 +- 7 files changed, 172 insertions(+), 30 deletions(-) diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index d6bd1d06..a2502198 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -25,6 +25,7 @@ #include "tools.h" #include +#include // for std::iota template Timer DavidsonSolver::solve_tm_( @@ -250,11 +251,11 @@ void DavidsonSolver::buildTarget2N_MVP( proj_mat2N_->assignBlocksH(h11, h12, h21, h22); + // Build s2N using diagonal blocks s11 and s22 + // assuming s12 = s21 = 0 // if( onpe0 )os_<<"Build S2N..."<::buildTarget2N_MVP( target_tm_.stop(); } +// to differentiate very low occupation vectors, extract those with +// lowest energy +template +void DavidsonSolver::reorderEigenvectors( + MatrixType& evect, std::vector& eval) +{ + Control& ct = *(Control::instance()); + MGmol_MPI& mmpi(*(MGmol_MPI::instance())); + + // get eigenvectors of H used to build target DM + const MatrixType& ev(proj_mat2N_->getEigenvectors()); + + MatrixType p("p", 2 * numst_, 2 * numst_); + p.gemm('t', 'n', 1, ev, evect, 0); + +#ifndef NDEBUG + // verify that the sum of the elements squrade in each column is 1 + for (int j = 0; j < 2 * numst_; j++) + { + double a = p.nrm2(j); + std::cout << j << " " << a * a << std::endl; + assert(std::abs(a - 1) < 1.e-2); + } +#endif + + // now find evect with largest overlap with first numst_ columns of + // ev zero out elements associated with last numst_ columns of ev + // (high energy ones) + for (int i = numst_; i < 2 * numst_; i++) + for (int j = 0; j < 2 * numst_; j++) + p.setVal(i, j, 0); + + // do not include eigenstates with overlap below threshold + const double overlap_threshold = 0.2; + // keep vectors associated with occupation above threshold unchanged + const double occ_threshold = 0.01; + std::vector w(2 * numst_); + int count = 0; + for (int j = 0; j < 2 * numst_; j++) + { + // first compute the norm of the projection onto ev lowest energy states + double a = p.nrm2(j); + w[j] = a * a; + // std::cout << j << " " << w[j] << std::endl; + // Set weight to 0 if below tolerance + // By doing that we exclude eigenvectors with very little + // overlap with lowest eigenstates of H in subsequent reordering + if (w[j] < overlap_threshold) w[j] = 0; + + if (eval[j] > occ_threshold) w[j] = 1; + + if (w[j] > 0 && w[j] < 1) count++; + } + if (mmpi.instancePE0() && ct.verbose > 1) + { + std::cout << "Sort " << count << " eigenvectors" << std::endl; + for (int j = 0; j < 2 * numst_; j++) + { + if (w[j] > 0 && w[j] < 1) + std::cout << "eval [" << j << "] = " << eval[j] << std::endl; + } + } + + // Create a vector of indices [0, 1, 2, ...] + std::vector indices(w.size()); + std::iota(indices.begin(), indices.end(), 0); + + // Sort indices based on values in 'w' + std::sort(indices.begin(), indices.end(), + [&](int i1, int i2) { return w[i1] < w[i2]; }); + for (int idx : indices) + { + if (w[idx] > 0 && w[idx] < 1 && mmpi.instancePE0() && ct.verbose > 2) + std::cout << "Original Index: " << idx << ", overlap " << w[idx] + << std::endl; + } + + // Broadcast permutations to make sure all MPI tasks use the same + MPI_Comm comm = mmpi.commSameSpin(); + MPI_Bcast(indices.data(), indices.size(), MPI_INT, 0, comm); + + // permutation of eigenvalues + { + std::vector tmp_eval = eval; + int j = 0; + for (int idx : indices) + { + if (idx != j && w[idx] > 0 && w[idx] < 1 && mmpi.instancePE0() + && ct.verbose > 2) + std::cout << idx << " <-> " << j << std::endl; + eval[j] = tmp_eval[idx]; + j++; + } + } + + // permutation of eigenvectors in evect + { + MatrixType perm("perm", 2 * numst_, 2 * numst_); + int j = 0; + for (int idx : indices) + { + perm.setVal(idx, j, 1); + j++; + } + MatrixType tmp("tmp", 2 * numst_, 2 * numst_); + tmp.gemm('n', 'n', 1, evect, perm, 0); + evect = tmp; + } +#if 0 + // repeat to check result + p.gemm('t', 'n', 1, ev, evect, 0); + for (int i = numst_; i < 2 * numst_; i++) + for (int j = 0; j < 2 * numst_; j++) + p.setVal(i, j, 0); + for (int j = 0; j < 2 * numst_; j++) + { + double a = p.nrm2(j); + w[j] = a * a; + std::cout << j << " " << w[j] << std::endl; + } +#endif +}; + // template // void DavidsonSolver::buildTarget2N_new( // MatrixType& h11, @@ -592,7 +716,9 @@ int DavidsonSolver::solve( MatrixType delta_dm("delta_dm", 2 * numst_, 2 * numst_); buildTarget2N_MVP(h11, h12, h21, h22, s11, s22, target); - if (mmpi.instancePE0() && ct.verbose > 1 && (outer_it % 10 == 0)) + const int out_freq = ct.verbose > 2 ? 1 : 10; + if (mmpi.instancePE0() && ct.verbose > 1 + && (outer_it % out_freq == 0)) { proj_mat2N_->printEigenvalues(os_); proj_mat2N_->printOccupations(os_); @@ -698,6 +824,7 @@ int DavidsonSolver::solve( // update orbitals MatrixType evect("EigVect", 2 * numst_, 2 * numst_); proj_mat2N_->diagonalizeDM(eval, evect); + #if 1 if (mmpi.PE0() && ct.verbose > 2) { @@ -720,12 +847,9 @@ int DavidsonSolver::solve( // to differentiate very low occupation vectors, extract those with // lowest energy - // MatrixType z2N("z2N",2*numst_, - // 2*numst_); swapColumnsVect(evect, proj_mat2N_->getMatHB(), - // eval, z2N ); + reorderEigenvectors(evect, eval); - // replace orbitals with eigenvectors corresponding to largest - // eigenvalues of DM + // replace orbitals with rightmost numst_ columns of evect { MatrixType v12("v12", numst_, numst_); MatrixType v22("v22", numst_, numst_); diff --git a/src/DavidsonSolver.h b/src/DavidsonSolver.h index c058c90e..1b11a118 100644 --- a/src/DavidsonSolver.h +++ b/src/DavidsonSolver.h @@ -70,6 +70,13 @@ class DavidsonSolver // const std::vector& auxenergies, const double kbT, // const double eta, MatrixType& target); + /*! + * To differentiate very low occupation vectors, extract those with + * lowest energy by reordering evect/eval + * (rightmost vectors will be used in next outer itertaion) + */ + void reorderEigenvectors(MatrixType& evect, std::vector& eval); + public: DavidsonSolver(std::ostream& os, Ions& ions, Hamiltonian* hamiltonian, Rho* rho, diff --git a/src/ReplicatedMatrix.cc b/src/ReplicatedMatrix.cc index 062f13ad..9f0e72d6 100644 --- a/src/ReplicatedMatrix.cc +++ b/src/ReplicatedMatrix.cc @@ -170,6 +170,30 @@ void ReplicatedMatrix::consolidate() #endif } +void ReplicatedMatrix::bcast(const int root) +{ + assert(comm_ != MPI_COMM_NULL); + +#ifdef HAVE_MAGMA + std::vector mat(dim_ * ld_); + auto& magma_singleton = MagmaSingleton::get_magma_singleton(); + + // copy from GPU to CPU + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), ld_, magma_singleton.queue_); + double* data = mat.data(); +#else + double* data = data_.get(); +#endif + MPI_Bcast(data, dim_ * ld_, MPI_DOUBLE, root, comm_); + +#ifdef HAVE_MAGMA + // copy from CPU to GPU + magma_dsetmatrix( + dim_, dim_, data, ld_, data_.get(), ld_, magma_singleton.queue_); +#endif +} + void ReplicatedMatrix::assign( const ReplicatedMatrix& src, const int ib, const int jb) { @@ -181,10 +205,10 @@ void ReplicatedMatrix::assign( magma_dcopymatrix(src.dim_, src.dim_, src.data_.get(), src.ld_, data_.get() + jb * ld_ + ib, ld_, magma_singleton.queue_); #else - char uplo = 'a'; - int dim = src.dim_; - int lda = src.ld_; - int ldb = ld_; + char uplo = 'a'; + int dim = src.dim_; + int lda = src.ld_; + int ldb = ld_; DLACPY(&uplo, &dim, &dim, src.data_.get(), &lda, data_.get() + jb * ld_ + ib, &ldb); #endif @@ -700,20 +724,6 @@ double ReplicatedMatrix::nrm2(const int j) #endif } -void ReplicatedMatrix::swapColumns(const int i, const int j) -{ -#ifdef HAVE_MAGMA - (void)os; - std::cerr << "ReplicatedMatrix::swapColumns) not implemented" << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); -#else - std::vector tmp(data_.get() + i * ld_, data_.get() + (i + 1) * ld_); - - memcpy(data_.get() + i * ld_, data_.get() + j * ld_, ld_ * sizeof(double)); - memcpy(data_.get() + j * ld_, tmp.data(), ld_ * sizeof(double)); -#endif -} - void ReplicatedMatrix::setVal(const int i, const int j, const double val) { #ifdef HAVE_MAGMA diff --git a/src/ReplicatedMatrix.h b/src/ReplicatedMatrix.h index 891e41eb..d2fd2305 100644 --- a/src/ReplicatedMatrix.h +++ b/src/ReplicatedMatrix.h @@ -90,6 +90,8 @@ class ReplicatedMatrix // sum up values across MPI tasks void consolidate(); + void bcast(const int root); + void axpy(const double alpha, const ReplicatedMatrix& a); void init(const double* const ha, const int lda); @@ -128,7 +130,6 @@ class ReplicatedMatrix int iamax(const int j, double& val); double nrm2(const int j); - void swapColumns(const int i, const int j); double norm(char ty); double traceProduct(const ReplicatedMatrix&) const; diff --git a/tests/Davidson/davidson.cfg b/tests/Davidson/davidson.cfg index e34f407b..f21d0221 100644 --- a/tests/Davidson/davidson.cfg +++ b/tests/Davidson/davidson.cfg @@ -20,7 +20,7 @@ solver=PCG type=QUENCH [Quench] solver=Davidson -max_steps=200 +max_steps=50 atol=1.e-8 [Orbitals] nempty=10 diff --git a/tests/DavidsonMixing/davidson.cfg b/tests/DavidsonMixing/davidson.cfg index 39c7d457..7d1be6ab 100644 --- a/tests/DavidsonMixing/davidson.cfg +++ b/tests/DavidsonMixing/davidson.cfg @@ -19,7 +19,7 @@ pseudopotential=pseudo.H_ONCV_PBE_SG15 type=QUENCH [Quench] solver=Davidson -max_steps=100 +max_steps=50 atol=1.e-8 [Orbitals] nempty=1 diff --git a/tests/DavidsonReplicated/davidson.cfg b/tests/DavidsonReplicated/davidson.cfg index 87f0ea1f..c6beff67 100644 --- a/tests/DavidsonReplicated/davidson.cfg +++ b/tests/DavidsonReplicated/davidson.cfg @@ -20,7 +20,7 @@ solver=PCG type=QUENCH [Quench] solver=Davidson -max_steps=200 +max_steps=50 atol=1.e-8 [Orbitals] nempty=10