DOP853Integrator now runs quickly without hanging, but still inaccurate.

WenyinWei · WenyinWei · commit b6b3c5e97ec7 · 2025-07-05T20:33:28.000+08:00
diff --git a/include/integrators/ode/dop853.hpp b/include/integrators/ode/dop853.hpp
@@ -4,6 +4,8 @@
 #include <cmath>
 #include <array>
 #include <algorithm>
+#include <fstream>
+#include <chrono>
 
 namespace diffeq::integrators::ode {
 
@@ -47,58 +49,84 @@ class DOP853Integrator : public AdaptiveIntegrator<S, T> {
     }
 
     time_type adaptive_step(state_type& state, time_type dt) override {
+        // Debug log setup (append mode, allow env override for test isolation)
+        static std::string log_name = [](){
+            const char* env = std::getenv("DOP853_DEBUG_LOG");
+            return env ? std::string(env) : std::string("dop853_debug.log");
+        }();
+        static std::ofstream debug_log(log_name, std::ios::app);
+        static constexpr int MAX_STEPS = 1000000; // safety limit
+        static constexpr double MAX_SECONDS = 30.0; // timeout in seconds
+        int step_count = 0;
+        auto start_time = std::chrono::steady_clock::now();
+
         time_type h_abs = std::abs(dt);
-        time_type min_step = 10 * std::abs(std::nextafter(this->current_time_, 
-                            this->current_time_ + dt) - this->current_time_);
-        
+        time_type min_step = 10 * std::abs(std::nextafter(this->current_time_, this->current_time_ + dt) - this->current_time_);
+
         if (h_abs > this->dt_max_) {
             h_abs = this->dt_max_;
         } else if (h_abs < std::max(min_step, this->dt_min_)) {
             h_abs = std::max(min_step, this->dt_min_);
         }
-        
+
         bool step_accepted = false;
         bool step_rejected = false;
         time_type actual_dt = 0;
-        
+
         while (!step_accepted) {
+            ++step_count;
+            auto now = std::chrono::steady_clock::now();
+            double elapsed = std::chrono::duration<double>(now - start_time).count();
+            if (step_count > MAX_STEPS || elapsed > MAX_SECONDS) {
+                debug_log << "[TIMEOUT] Step limit or time exceeded. Aborting.\n";
+                throw std::runtime_error("DOP853 adaptive_step timeout or too many steps");
+            }
+
             time_type error_norm = rk_step(state, state, h_abs);
-            
+            debug_log << "step: " << step_count << ", t: " << this->current_time_ << ", h: " << h_abs << ", error_norm: " << error_norm << ", accepted: " << step_accepted << ", rejected: " << step_rejected << std::endl;
+
+            if (std::isnan(error_norm) || std::isinf(error_norm)) {
+                debug_log << "[ERROR] error_norm is NaN or Inf. Aborting.\n";
+                throw std::runtime_error("DOP853 error_norm is NaN or Inf");
+            }
+
             if (error_norm < 1.0) {
                 step_accepted = true;
                 actual_dt = h_abs;
                 this->advance_time(h_abs);
-                
+
                 if (!step_rejected) {
                     // Suggest next step size
-                    time_type factor = std::min(max_factor_, 
-                                              safety_ * std::pow(error_norm, error_exponent_));
+                    time_type factor = std::min(max_factor_, safety_ * std::pow(std::max(error_norm, static_cast<time_type>(1e-10)), error_exponent_));
                     h_abs = std::min(this->dt_max_, h_abs * factor);
                 }
             } else {
                 step_rejected = true;
-                time_type factor = std::max(min_factor_, 
-                                          safety_ * std::pow(error_norm, error_exponent_));
+                time_type factor = std::max(min_factor_, safety_ * std::pow(std::max(error_norm, static_cast<time_type>(1e-10)), error_exponent_));
                 h_abs = std::max(this->dt_min_, h_abs * factor);
             }
         }
-        
+
+        debug_log << "[COMPLETE] t: " << this->current_time_ << ", dt: " << actual_dt << std::endl;
+        debug_log.flush();
         return actual_dt;
     }
 
 private:
     std::array<time_type, N_STAGES + 1> C_;
     std::array<std::array<time_type, N_STAGES>, N_STAGES> A_;
     std::array<time_type, N_STAGES> B_;
+    std::array<time_type, N_STAGES> ER_; // Error estimation weights (Fortran er1, er6, ...)
+    time_type BHH1_, BHH2_, BHH3_; // Error estimation weights (Fortran bhh1, bhh2, bhh3)
     std::array<time_type, N_STAGES + 1> E3_;  // 3rd order error estimate
     std::array<time_type, N_STAGES + 1> E5_;  // 5th order error estimate
-    
+
     // Adaptive step control parameters
     time_type safety_;
     time_type min_factor_;
     time_type max_factor_;
     static constexpr time_type error_exponent_ = -1.0 / 8.0;  // -1/(order+1) for 7th order error estimator
-    
+
     void initialize_coefficients() {
         // C coefficients (times for stages)
         C_[0] = static_cast<time_type>(0.0);
@@ -126,92 +154,149 @@ class DOP853Integrator : public AdaptiveIntegrator<S, T> {
             }
         }
         
-        // Fill in some key coefficients (simplified)
-        A_[1][0] = static_cast<time_type>(0.526001519587677318785587544488e-01);
-        A_[2][0] = static_cast<time_type>(0.197250569845378994544595329183e-01);
-        A_[2][1] = static_cast<time_type>(0.591751709536137983633785987549e-01);
-        // ... (many more coefficients in full implementation)
-        
-        // B coefficients for final solution (simplified)
-        B_[0] = static_cast<time_type>(0.0295532805322554043052460699239e+00);
-        B_[1] = static_cast<time_type>(0.0);
-        B_[2] = static_cast<time_type>(0.0);
-        B_[3] = static_cast<time_type>(0.0);
-        B_[4] = static_cast<time_type>(0.0681942582430981642978628893916e+00);
-        // ... (more coefficients)
-        
-        // Error estimation coefficients (simplified)
+        // Fill in some key coefficients (Fortran DOP853, partial, user should complete for production)
+        A_[1][0] = static_cast<time_type>(5.26001519587677318785587544488e-2); // a21
+        A_[2][0] = static_cast<time_type>(1.97250569845378994544595329183e-2); // a31
+        A_[2][1] = static_cast<time_type>(5.91751709536136983633785987549e-2); // a32
+        A_[3][0] = static_cast<time_type>(2.95875854768068491816892993775e-2); // a41
+        A_[3][2] = static_cast<time_type>(8.87627564304205475450678981324e-2); // a43
+        A_[4][0] = static_cast<time_type>(2.41365134159266685502369798665e-1); // a51
+        A_[4][2] = static_cast<time_type>(-8.84549479328286085344864962717e-1); // a53
+        A_[4][3] = static_cast<time_type>(9.24834003261792003115737966543e-1); // a54
+        A_[5][0] = static_cast<time_type>(3.7037037037037037037037037037e-2); // a61
+        A_[5][3] = static_cast<time_type>(1.70828608729473871279604482173e-1); // a64
+        A_[5][4] = static_cast<time_type>(1.25467687566822425016691814123e-1); // a65
+        A_[6][0] = static_cast<time_type>(3.7109375e-2); // a71
+        A_[6][3] = static_cast<time_type>(1.70252211019544039314978060272e-1); // a74
+        A_[6][4] = static_cast<time_type>(6.02165389804559606850219397283e-2); // a75
+        A_[6][5] = static_cast<time_type>(-1.7578125e-2); // a76
+        A_[7][0] = static_cast<time_type>(3.70920001185047927108779319836e-2); // a81
+        A_[7][3] = static_cast<time_type>(1.70383925712239993810214054705e-1); // a84
+        A_[7][4] = static_cast<time_type>(1.07262030446373284651809199168e-1); // a85
+        A_[7][5] = static_cast<time_type>(-1.53194377486244017527936158236e-2); // a86
+        A_[7][6] = static_cast<time_type>(8.27378916381402288758473766002e-3); // a87
+        // ... (fill in all A_[8][*], A_[9][*], A_[10][*], A_[11][*] as needed)
+
+        // B coefficients for final solution (Fortran b1, b6, b7, ...)
+        B_[0] = static_cast<time_type>(5.42937341165687622380535766363e-2); // b1
+        B_[5] = static_cast<time_type>(4.45031289275240888144113950566e0); // b6
+        B_[6] = static_cast<time_type>(1.89151789931450038304281599044e0); // b7
+        B_[7] = static_cast<time_type>(-5.8012039600105847814672114227e0); // b8
+        B_[8] = static_cast<time_type>(3.1116436695781989440891606237e-1); // b9
+        B_[9] = static_cast<time_type>(-1.52160949662516078556178806805e-1); // b10
+        B_[10] = static_cast<time_type>(2.01365400804030348374776537501e-1); // b11
+        B_[11] = static_cast<time_type>(4.47106157277725905176885569043e-2); // b12
+
+        // Error estimation weights (Fortran er1, er6, ...)
+        ER_.fill(static_cast<time_type>(0.0));
+        ER_[0] = static_cast<time_type>(0.1312004499419488073250102996e-1); // er1
+        ER_[5] = static_cast<time_type>(-0.1225156446376204440720569753e+01); // er6
+        ER_[6] = static_cast<time_type>(-0.4957589496572501915214079952e+00); // er7
+        ER_[7] = static_cast<time_type>(0.1664377182454986536961530415e+01); // er8
+        ER_[8] = static_cast<time_type>(-0.3503288487499736816886487290e+00); // er9
+        ER_[9] = static_cast<time_type>(0.3341791187130174790297318841e+00); // er10
+        ER_[10] = static_cast<time_type>(0.8192320648511571246570742613e-01); // er11
+        ER_[11] = static_cast<time_type>(-0.2235530786388629525884427845e-01); // er12
+
+        // Error estimation weights (Fortran bhh1, bhh2, bhh3)
+        BHH1_ = static_cast<time_type>(0.244094488188976377952755905512e+00);
+        BHH2_ = static_cast<time_type>(0.733846688281611857341361741547e+00);
+        BHH3_ = static_cast<time_type>(0.220588235294117647058823529412e-01);
+
+        // Error estimation coefficients (simplified, legacy)
         for (int i = 0; i <= N_STAGES; ++i) {
             E3_[i] = E5_[i] = static_cast<time_type>(0.0);
         }
         E3_[0] = static_cast<time_type>(1e-6);  // Simplified error estimate
         E5_[0] = static_cast<time_type>(1e-8);  // Simplified error estimate
     }
     
-    // Core RK step implementation following scipy's rk_step function
+    // DOP853 12-stage Runge-Kutta step, Fortran-aligned
     time_type rk_step(const state_type& y, state_type& y_new, time_type h) {
-        // Simplified implementation using RK4 with better error estimation
-        // A full DOP853 would implement all 13 stages
-        
-        state_type k1 = StateCreator<state_type>::create(y);
-        state_type k2 = StateCreator<state_type>::create(y);
-        state_type k3 = StateCreator<state_type>::create(y);
-        state_type k4 = StateCreator<state_type>::create(y);
-        state_type temp = StateCreator<state_type>::create(y);
-        
+        constexpr int N = N_STAGES;
+        std::vector<state_type> k(N, StateCreator<state_type>::create(y));
+        state_type y1 = StateCreator<state_type>::create(y);
         time_type t = this->current_time_;
-        
-        // k1 = f(t, y)
-        this->sys_(t, y, k1);
-        
-        // k2 = f(t + h/2, y + h*k1/2)
-        for (std::size_t i = 0; i < y.size(); ++i) {
-            auto y_it = y.begin();
-            auto k1_it = k1.begin();
-            auto temp_it = temp.begin();
-            temp_it[i] = y_it[i] + h * k1_it[i] / static_cast<time_type>(2);
-        }
-        this->sys_(t + h / static_cast<time_type>(2), temp, k2);
-        
-        // k3 = f(t + h/2, y + h*k2/2)
-        for (std::size_t i = 0; i < y.size(); ++i) {
-            auto y_it = y.begin();
-            auto k2_it = k2.begin();
-            auto temp_it = temp.begin();
-            temp_it[i] = y_it[i] + h * k2_it[i] / static_cast<time_type>(2);
-        }
-        this->sys_(t + h / static_cast<time_type>(2), temp, k3);
-        
-        // k4 = f(t + h, y + h*k3)
-        for (std::size_t i = 0; i < y.size(); ++i) {
-            auto y_it = y.begin();
-            auto k3_it = k3.begin();
-            auto temp_it = temp.begin();
-            temp_it[i] = y_it[i] + h * k3_it[i];
-        }
-        this->sys_(t + h, temp, k4);
-        
-        // Final solution
-        for (std::size_t i = 0; i < y.size(); ++i) {
-            auto y_it = y.begin();
-            auto k1_it = k1.begin();
-            auto k2_it = k2.begin();
-            auto k3_it = k3.begin();
-            auto k4_it = k4.begin();
-            auto y_new_it = y_new.begin();
-            
-            y_new_it[i] = y_it[i] + h * (k1_it[i] + static_cast<time_type>(2) * k2_it[i] + 
-                         static_cast<time_type>(2) * k3_it[i] + k4_it[i]) / static_cast<time_type>(6);
+
+        // Stage 1: k1 = f(t, y)
+        this->sys_(t, y, k[0]);
+
+        // Stage 2: y1 = y + h*a21*k1
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * A_[1][0] * k[0][i];
+        this->sys_(t + C_[1] * h, y1, k[1]);
+
+        // Stage 3: y1 = y + h*(a31*k1 + a32*k2)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[2][0] * k[0][i] + A_[2][1] * k[1][i]);
+        this->sys_(t + C_[2] * h, y1, k[2]);
+
+        // Stage 4: y1 = y + h*(a41*k1 + a43*k3)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[3][0] * k[0][i] + A_[3][2] * k[2][i]);
+        this->sys_(t + C_[3] * h, y1, k[3]);
+
+        // Stage 5: y1 = y + h*(a51*k1 + a53*k3 + a54*k4)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[4][0] * k[0][i] + A_[4][2] * k[2][i] + A_[4][3] * k[3][i]);
+        this->sys_(t + C_[4] * h, y1, k[4]);
+
+        // Stage 6: y1 = y + h*(a61*k1 + a64*k4 + a65*k5)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[5][0] * k[0][i] + A_[5][3] * k[3][i] + A_[5][4] * k[4][i]);
+        this->sys_(t + C_[5] * h, y1, k[5]);
+
+        // Stage 7: y1 = y + h*(a71*k1 + a74*k4 + a75*k5 + a76*k6)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[6][0] * k[0][i] + A_[6][3] * k[3][i] + A_[6][4] * k[4][i] + A_[6][5] * k[5][i]);
+        this->sys_(t + C_[6] * h, y1, k[6]);
+
+        // Stage 8: y1 = y + h*(a81*k1 + a84*k4 + a85*k5 + a86*k6 + a87*k7)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[7][0] * k[0][i] + A_[7][3] * k[3][i] + A_[7][4] * k[4][i] + A_[7][5] * k[5][i] + A_[7][6] * k[6][i]);
+        this->sys_(t + C_[7] * h, y1, k[7]);
+
+        // Stage 9: y1 = y + h*(a91*k1 + a94*k4 + a95*k5 + a96*k6 + a97*k7 + a98*k8)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[8][0] * k[0][i] + A_[8][3] * k[3][i] + A_[8][4] * k[4][i] + A_[8][5] * k[5][i] + A_[8][6] * k[6][i] + A_[8][7] * k[7][i]);
+        this->sys_(t + C_[8] * h, y1, k[8]);
+
+        // Stage 10: y1 = y + h*(a101*k1 + a104*k4 + a105*k5 + a106*k6 + a107*k7 + a108*k8 + a109*k9)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[9][0] * k[0][i] + A_[9][3] * k[3][i] + A_[9][4] * k[4][i] + A_[9][5] * k[5][i] + A_[9][6] * k[6][i] + A_[9][7] * k[7][i] + A_[9][8] * k[8][i]);
+        this->sys_(t + C_[9] * h, y1, k[9]);
+
+        // Stage 11: y1 = y + h*(a111*k1 + a114*k4 + a115*k5 + a116*k6 + a117*k7 + a118*k8 + a119*k9 + a1110*k10)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[10][0] * k[0][i] + A_[10][3] * k[3][i] + A_[10][4] * k[4][i] + A_[10][5] * k[5][i] + A_[10][6] * k[6][i] + A_[10][7] * k[7][i] + A_[10][8] * k[8][i] + A_[10][9] * k[9][i]);
+        this->sys_(t + C_[10] * h, y1, k[10]);
+
+        // Stage 12: y1 = y + h*(a121*k1 + a124*k4 + a125*k5 + a126*k6 + a127*k7 + a128*k8 + a129*k9 + a1210*k10 + a1211*k11)
+        for (size_t i = 0; i < y.size(); ++i)
+            y1[i] = y[i] + h * (A_[11][0] * k[0][i] + A_[11][3] * k[3][i] + A_[11][4] * k[4][i] + A_[11][5] * k[5][i] + A_[11][6] * k[6][i] + A_[11][7] * k[7][i] + A_[11][8] * k[8][i] + A_[11][9] * k[9][i] + A_[11][10] * k[10][i]);
+        this->sys_(t + C_[11] * h, y1, k[11]);
+
+        // 8th order solution (main step)
+        for (size_t i = 0; i < y.size(); ++i) {
+            y_new[i] = y[i]
+                + h * (B_[0] * k[0][i] + B_[5] * k[5][i] + B_[6] * k[6][i] + B_[7] * k[7][i] + B_[8] * k[8][i] + B_[9] * k[9][i] + B_[10] * k[10][i] + B_[11] * k[11][i]);
         }
-        
-        // Error estimation (simplified)
-        state_type error = StateCreator<state_type>::create(y);
-        for (std::size_t i = 0; i < error.size(); ++i) {
-            auto error_it = error.begin();
-            error_it[i] = h * static_cast<time_type>(1e-8);  // Simplified error estimate
+
+        // Error estimation (Fortran-aligned)
+        time_type err = 0, err2 = 0;
+        for (size_t i = 0; i < y.size(); ++i) {
+            time_type sk = this->atol_ + this->rtol_ * std::max(std::abs(y[i]), std::abs(y_new[i]));
+            // First error component
+            time_type erri1 = y_new[i] - (BHH1_ * k[0][i] + BHH2_ * k[8][i] + BHH3_ * k[11][i]);
+            err2 += (erri1 / sk) * (erri1 / sk);
+            // Second error component
+            time_type erri2 = ER_[0] * k[0][i] + ER_[5] * k[5][i] + ER_[6] * k[6][i] + ER_[7] * k[7][i] + ER_[8] * k[8][i] + ER_[9] * k[9][i] + ER_[10] * k[10][i] + ER_[11] * k[11][i];
+            err += (erri2 / sk) * (erri2 / sk);
         }
-        
-        return this->error_norm(error, y_new);
+        time_type deno = err + 0.01 * err2;
+        if (deno <= 0.0) deno = 1.0;
+        err = std::abs(h) * std::sqrt(err / (y.size() * deno));
+        return err;
     }
 };