Implement new algorithm and fix test cases (#438)

fevangelista · web-flow · commit ad08d94ba3f2 · 2025-01-14T21:11:53.000-05:00
diff --git a/forte/modules/general_cc.py b/forte/modules/general_cc.py
@@ -135,7 +135,7 @@ def make_cluster_operator(max_exc, naelpi, mo_space_info, psi4_wfn):
                                 den = e_bvir + e_avir - e_aocc - e_bocc
                                 denominators.append(den)
 
-    print(f"Number of amplitudes: {sop.size()}")
+    print(f"Number of amplitudes: {len(sop)}")
     return (sop, denominators)
 
 
@@ -203,7 +203,7 @@ def solve_cc_equations(
         residual, e, e_proj = residual_equations(cc_type, t, op, selected_op, ref, ham, exp, compute_threshold, linked)
 
         residual_norm = 0.0
-        for l in range(selected_op.size()):
+        for l in range(len(selected_op)):
             t[l] -= residual[op_pool[l]] / denominators[op_pool[l]]
             residual_norm += abs(residual[op_pool[l]]) ** 2
 
@@ -442,15 +442,15 @@ def _run(self, data: ForteData) -> ForteData:
 
         # the list of operators selected from the full list
         if self.select_type is None:
-            op_pool = list(range(op.size()))
-            t = [0.0] * op.size()
-            print(f"\n The excitation operator pool contains {op.size()} elements")
+            op_pool = list(range(len(op)))
+            t = [0.0] * len(op)
+            print(f"\n The excitation operator pool contains {len(op)} elements")
         else:
             raise RuntimeError("Selected CC methods are not implemented yet")
             print(f"\n Selecting operators using the {selec_type} scheme")
             t = []
             op_pool = []
-            print(f"\n The selected operator pool contains {selected_op.size()} elements")
+            print(f"\n The selected operator pool contains {len(selected_op)} elements")
 
         old_e = 0.0
         start = time.time()
diff --git a/forte/sparse_ci/sparse_state_functions.cc b/forte/sparse_ci/sparse_state_functions.cc
@@ -5,7 +5,7 @@
  * that implements a variety of quantum chemistry methods for strongly
  * correlated electrons.
  *
- * Copyright (c) 2012-2024 by its authors (see COPYING, COPYING.LESSER, AUTHORS).
+ * Copyright (c) 2012-2025 by its authors (see COPYING, COPYING.LESSER, AUTHORS).
  *
  * The copyrights for code used from other parties are included in
  * the corresponding files.
@@ -39,20 +39,26 @@
 
 namespace forte {
 
+// This is a naive implementation of the operator application that is used for testing
 SparseState apply_operator_impl_naive(bool is_antihermitian, const SparseOperator& sop,
                                       const SparseState& state, double screen_thresh);
 
+// This is the grouped implementation of the operator application. Fast, but scaling is not optimal.
 SparseState apply_operator_impl_grouped(bool is_antihermitian, const SparseOperator& sop,
                                         const SparseState& state, double screen_thresh);
 
+// The default implementation is the grouped implementation with grouping into alfa strings
+SparseState apply_operator_impl_grouped_string(bool is_antihermitian, const SparseOperator& sop,
+                                               const SparseState& state, double screen_thresh);
+
 SparseState apply_operator_lin(const SparseOperator& sop, const SparseState& state,
                                double screen_thresh) {
-    return apply_operator_impl_grouped(false, sop, state, screen_thresh);
+    return apply_operator_impl_grouped_string(false, sop, state, screen_thresh);
 }
 
 SparseState apply_operator_antiherm(const SparseOperator& sop, const SparseState& state,
                                     double screen_thresh) {
-    return apply_operator_impl_grouped(true, sop, state, screen_thresh);
+    return apply_operator_impl_grouped_string(true, sop, state, screen_thresh);
 }
 
 // This is a naive implementation of the operator application that is used for testing
@@ -172,6 +178,87 @@ SparseState apply_operator_impl_grouped(bool is_antihermitian, const SparseOpera
     return new_terms;
 }
 
+// This is a kernel that applies the operator to the state using a grouped approach
+// It has a lower cost complexity
+// It assumes that the operator is grouped by the annihilation operators and that these are prepared
+// in another function calling this kernel
+template <bool positive>
+void apply_operator_kernel_string(const auto& sop_groups, const auto& state_groups,
+                                  const auto& screen_thresh, auto& new_terms) {
+    Determinant new_det;
+    Determinant sign_mask;
+    Determinant idx;
+    for (const auto& [sqop_ann_a, sqop_group] : sop_groups) {
+        for (const auto& [det_a, state_group] : state_groups) {
+            // can we annihilate the alfa string?
+            if (det_a.fast_a_and_b_equal_b(sqop_ann_a)) {
+                // loop over the creation operators in this group
+                for (const auto& [sqop_ann, sqop_cre, t] : sqop_group) {
+                    for (const auto& [det, c] : state_group) {
+                        if (det.faster_can_apply_operator(sqop_cre, sqop_ann)) {
+                            if (std::abs(c * t) > screen_thresh) {
+                                compute_sign_mask(sqop_cre, sqop_ann, sign_mask, idx);
+                                const auto value = faster_apply_operator_to_det(
+                                    det, new_det, sqop_cre, sqop_ann, sign_mask);
+                                if constexpr (positive) {
+                                    new_terms[new_det] += value * t * c;
+                                } else {
+                                    new_terms[new_det] -= value * t * c;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+// This is the grouped implementation of the operator application. It mostly prepares the
+// operator and state and then calls the kernel to apply the operator
+SparseState apply_operator_impl_grouped_string(bool is_antihermitian, const SparseOperator& sop,
+                                               const SparseState& state, double screen_thresh) {
+    if (screen_thresh < 0) {
+        throw std::invalid_argument(
+            "apply_operator_impl_grouped:screen_thresh must be non-negative");
+    }
+    SparseState new_terms;
+
+    // Group the determinants by common alfa strings
+    std::unordered_map<String, std::vector<std::pair<Determinant, sparse_scalar_t>>, String::Hash>
+        state_groups;
+    for (const auto& [det, c] : state) {
+        state_groups[det.get_alfa_bits()].emplace_back(det, c);
+    }
+
+    // Group the operators by common alfa annihilation strings
+    std::unordered_map<String, std::vector<std::tuple<Determinant, Determinant, sparse_scalar_t>>,
+                       String::Hash>
+        sop_groups;
+    for (const auto& [sqop, t] : sop.elements()) {
+        sop_groups[sqop.ann().get_alfa_bits()].emplace_back(sqop.ann(), sqop.cre(), t);
+    }
+
+    // Call the kernel to apply the operator (adding the result)
+    apply_operator_kernel_string<true>(sop_groups, state_groups, screen_thresh, new_terms);
+
+    if (not is_antihermitian) {
+        return new_terms;
+    }
+
+    // Group the operators by common alfa creation strings
+    // Here we swap the annihilation and creation operators for the antihermitian case
+    sop_groups.clear();
+    for (const auto& [sqop, t] : sop.elements()) {
+        sop_groups[sqop.cre().get_alfa_bits()].emplace_back(sqop.cre(), sqop.ann(), t);
+    }
+
+    // Call the kernel to apply the operator (subtracting the result)
+    apply_operator_kernel_string<false>(sop_groups, state_groups, screen_thresh, new_terms);
+
+    return new_terms;
+}
+
 std::vector<sparse_scalar_t> get_projection(const SparseOperatorList& sop, const SparseState& ref,
                                             const SparseState& state) {
     local_timer t;
diff --git a/tests/performance/sparse/sparse_operator_timing.py b/tests/performance/sparse/sparse_operator_timing.py
@@ -43,7 +43,7 @@ def sparse_operator_correctness():
     opH = forte.sparse_operator_hamiltonian(as_ints)
 
     print("\n\n  Number of determinants: ", len(dets))
-    print("  Number of integrals: ", opH.size())
+    print("  Number of integrals: ", len(opH))
 
     # Apply the Hamiltonian to a state that spans the entire Hilbert space
     c = 1 / np.sqrt(len(dets))
@@ -86,7 +86,7 @@ def sparse_operator_timing_1():
     opH = forte.sparse_operator_hamiltonian(as_ints)
 
     print("\n\n  Number of determinants: ", len(dets))
-    print("  Number of integrals: ", opH.size())
+    print("  Number of integrals: ", len(opH))
 
     # Apply the Hamiltonian to a state that spans the entire Hilbert space
     c = 1 / np.sqrt(len(dets))
@@ -133,7 +133,7 @@ def sparse_operator_timing_2():
 
     ndets = 7000
     print("\n\n  Number of determinants: ", ndets)
-    print("  Number of integrals: ", opH.size())
+    print("  Number of integrals: ", len(opH))
 
     # Apply the Hamiltonian to a state that spans 7000 determinants
     c = 1 / np.sqrt(ndets)
@@ -188,7 +188,7 @@ def sparse_operator_timing_3():
 
     ndets = 20000
     print("\n\n  Number of determinants: ", ndets)
-    print("  Number of integrals: ", opH.size())
+    print("  Number of integrals: ", len(opH))
 
     # Apply the Hamiltonian to a state that spans 7000 determinants
     c = 1 / np.sqrt(ndets)
diff --git a/tests/pytest/sparse_ci/test_sparse_operator.py b/tests/pytest/sparse_ci/test_sparse_operator.py
@@ -475,7 +475,7 @@ def test_sparse_operator_list_reverse():
     sopl.add("[1a+ 1a-]", 1.0)
     sopl.add("[0a+ 0a-]", 2.0)
     reversed_sopl = sopl.reverse()
-    assert sopl.size() == 2
+    assert len(sopl) == 2
     assert reversed_sopl[0] == 2.0
     assert reversed_sopl[1] == 1.0
     assert reversed_sopl(0)[0].str() == "[0a+ 0a-]"
@@ -487,7 +487,7 @@ def test_sparse_operator_list_remove():
     sopl.add("[1a+ 1a-]", 1.0)
     sopl.add("[0a+ 0a-]", 1.0)
     sopl.remove("[1a+ 1a-]")
-    assert sopl.size() == 1
+    assert len(sopl) == 1
 
 
 if __name__ == "__main__":