wip

acharneski · acharneski · commit 89efabf22cfe · 2025-12-21T15:48:07.000-05:00
diff --git a/src/line_search/bisection.rs b/src/line_search/bisection.rs
@@ -148,25 +148,27 @@ impl<'a> ProblemEvaluator for LuminalEvaluator<'a> {
         if step.abs() < 1e-10 {
             return Ok(self.initial_loss);
         }
-        let new_params: Vec<f32> = self
+        let new_params: Vec<f64> = self
             .current_params
             .iter()
             .zip(self.direction.iter())
-            .map(|(p, d)| (p + step * d) as f32)
+            .map(|(p, d)| p + step * d)
             .collect();
+        let mut weights_data = Vec::new();
 
         let mut offset = 0;
         for weight in &self.context.weights {
-            let len = weight.shape.len();
 
+            let len = weight.shape.n_elements().to_usize().unwrap();
             if offset + len > new_params.len() {
                 return Err(anyhow!("Parameter size mismatch"));
             }
 
             let chunk = &new_params[offset..offset + len];
-            self.context.graph().set_tensor(weight.id, 0, Tensor::new(chunk.to_vec()));
+            weights_data.push(chunk.iter().map(|&x| x as f32).collect());
             offset += len;
         }
+        self.context.write_weights(&mut weights_data);
 
         self.context.graph().execute();
         self.num_f_evals += 1;
@@ -175,8 +177,8 @@ impl<'a> ProblemEvaluator for LuminalEvaluator<'a> {
             .loss
             .data()
             .as_any()
-            .downcast_ref::<Vec<f64>>()
-            .ok_or_else(|| anyhow!("Failed to downcast loss data"))?[0];
+            .downcast_ref::<Vec<f32>>()
+            .ok_or_else(|| anyhow!("Failed to downcast loss data"))?[0] as f64;
         Ok(loss_val)
     }
 
@@ -185,25 +187,27 @@ impl<'a> ProblemEvaluator for LuminalEvaluator<'a> {
             return Ok(self.initial_dd);
         }
         // Set parameters and execute graph to get gradient
-        let new_params: Vec<f32> = self
+        let new_params: Vec<f64> = self
             .current_params
             .iter()
             .zip(self.direction.iter())
-            .map(|(p, d)| (p + step * d) as f32)
+            .map(|(p, d)| p + step * d)
             .collect();
+        let mut weights_data = Vec::new();
 
         let mut offset = 0;
         for weight in &self.context.weights {
-            let len = weight.shape.len();
+            let len = weight.shape.n_elements().to_usize().unwrap();
 
             if offset + len > new_params.len() {
                 return Err(anyhow!("Parameter size mismatch"));
             }
 
             let chunk = &new_params[offset..offset + len];
-            self.context.graph().set_tensor(weight.id, 0, Tensor::new(chunk.to_vec()));
+            weights_data.push(chunk.iter().map(|&x| x as f32).collect());
             offset += len;
         }
+        self.context.write_weights(&mut weights_data);
 
         self.context.graph().execute();
         self.num_g_evals += 1;
@@ -220,7 +224,7 @@ impl<'a> ProblemEvaluator for LuminalEvaluator<'a> {
         {
             let grad_data = grad_binding
                 .as_any()
-                .downcast_ref::<Vec<f64>>()
+                .downcast_ref::<Vec<f32>>()
                 .ok_or_else(|| anyhow!("Failed to downcast gradient data"))?;
 
             let len = grad_data.len();
@@ -232,7 +236,7 @@ impl<'a> ProblemEvaluator for LuminalEvaluator<'a> {
             let term: f64 = grad_data
                 .iter()
                 .zip(d_chunk.iter())
-                .map(|(g, d)| g * d)
+                .map(|(g, d)| (*g as f64) * d)
                 .sum();
             dd += term;
             offset += len;
@@ -909,4 +913,4 @@ mod tests {
         assert_eq!(line_search.config.max_iterations, 20);
     }
     */
-}
+}
diff --git a/src/line_search/cubic_quadratic.rs b/src/line_search/cubic_quadratic.rs
@@ -3,6 +3,7 @@ use crate::optimizers::optimizer::OptimizationContext;
 use anyhow::anyhow;
 use log::debug;
 use luminal::graph::Graph;
+use std::cell::RefCell;
 
 /// A sophisticated line search algorithm that uses cubic and quadratic interpolation
 /// to efficiently find step sizes satisfying the Wolfe conditions.
@@ -313,8 +314,8 @@ impl LineSearch for CubicQuadraticLineSearch {
         initial_gradient: &[f64],
     ) -> anyhow::Result<LineSearchResult> {
         let f0 = initial_loss;
-        let mut num_f_evals = 0usize;
-        let mut num_g_evals = 0usize;
+        let num_f_evals = RefCell::new(0usize);
+        let num_g_evals = RefCell::new(0usize);
         let g0: f64 = initial_gradient
             .iter()
             .zip(direction.iter())
@@ -325,56 +326,19 @@ impl LineSearch for CubicQuadraticLineSearch {
             return Err(anyhow!("Direction is not a descent direction: g0 = {:.6e} >= 0. This indicates the search direction is pointing uphill.", g0));
         }
         // Helper to evaluate function and gradient
-        let ctx1 = &mut context;
         let mut evaluate = |alpha: f64| -> anyhow::Result<(f64, f64)> {
             let (loss_val, grad_val) =
-                self.evaluate_with_gradient(ctx1, current_params, direction, alpha)?;
+                self.evaluate_with_gradient(&mut context, current_params, direction, alpha)?;
             let dir_deriv: f64 = grad_val
                 .iter()
                 .zip(direction.iter())
                 .map(|(g, d)| g * d)
                 .sum();
+            *num_f_evals.borrow_mut() += 1;
+            *num_g_evals.borrow_mut() += 1;
             Ok((loss_val, dir_deriv))
         };
 
-        // Verify we can make progress
-        let test_step = self.config.min_step;
-        let (f_test, _) = evaluate(test_step)?;
-        num_f_evals += 1;
-        num_g_evals += 1;
-        if f_test >= f0 {
-            let eps_step = f64::EPSILON.sqrt();
-            let (f_eps, _) = evaluate(eps_step)?;
-            num_f_evals += 1;
-            num_g_evals += 1;
-            if f_eps < f0 {
-                return Ok(LineSearchResult {
-                    step_size: eps_step,
-                    success: true,
-                    termination_reason: TerminationReason::StepSizeTooSmall,
-                    num_f_evals,
-                    num_g_evals,
-                });
-            }
-            // Try a slightly larger step
-            let small_step = 1e-8;
-            let (f_small, _) = evaluate(small_step)?;
-            num_f_evals += 1;
-            num_g_evals += 1;
-            if f_small < f0 {
-                return Ok(LineSearchResult {
-                    step_size: small_step,
-                    success: true,
-                    termination_reason: TerminationReason::StepSizeTooSmall,
-                    num_f_evals,
-                    num_g_evals,
-                });
-            }
-            return Err(anyhow!(
-                "Function appears to be ill-conditioned: no improvement possible within machine precision. f0={:.6e}, f_test={:.6e}, f_eps={:.6e}",
-                f0, f_test, f_eps
-            ));
-        }
 
         let mut alpha = self.config.initial_step;
         let mut alpha_prev = 0.0;
@@ -391,8 +355,6 @@ impl LineSearch for CubicQuadraticLineSearch {
         for iter in 0..self.config.max_iterations {
             // Evaluate at current step
             let (f_alpha, g_alpha) = evaluate(alpha)?;
-            num_f_evals += 1;
-            num_g_evals += 1;
             // Track best point
             if f_alpha < best_f {
                 best_f = f_alpha;
@@ -417,8 +379,8 @@ impl LineSearch for CubicQuadraticLineSearch {
                     step_size: alpha,
                     success: true,
                     termination_reason: TerminationReason::WolfeConditionsSatisfied,
-                    num_f_evals,
-                    num_g_evals,
+                    num_f_evals: *num_f_evals.borrow(),
+                    num_g_evals: *num_g_evals.borrow(),
                 });
             }
             // If Armijo condition fails or function increased, interpolate
@@ -471,22 +433,20 @@ impl LineSearch for CubicQuadraticLineSearch {
                 step_size: best_alpha,
                 success: true,
                 termination_reason: TerminationReason::MaxIterationsReached,
-                num_f_evals,
-                num_g_evals,
+                num_f_evals: *num_f_evals.borrow(),
+                num_g_evals: *num_g_evals.borrow(),
             })
         } else {
             // Try a very small step as last resort
             let small_step = self.config.min_step * 10.0;
             let (f_small, _) = evaluate(small_step)?;
-            num_f_evals += 1;
-            num_g_evals += 1;
             if f_small < f0 {
                 Ok(LineSearchResult {
                     step_size: small_step,
                     success: true,
                     termination_reason: TerminationReason::StepSizeTooSmall,
-                    num_f_evals,
-                    num_g_evals,
+                    num_f_evals: *num_f_evals.borrow(),
+                    num_g_evals: *num_g_evals.borrow(),
                 })
             } else {
                 Err(anyhow!(
diff --git a/src/line_search/golden_section.rs b/src/line_search/golden_section.rs
@@ -216,27 +216,6 @@ impl GoldenSectionLineSearch {
         if directional_derivative >= 0.0 {
             return Err(anyhow!("Direction is not a descent direction"));
         }
-        // First verify we can make progress
-        let f0 = initial_loss;
-        let test_step = self.config.min_step;
-        let f_test = objective(test_step)?;
-        if f_test >= f0 {
-            // Try machine epsilon
-            let eps_step = f64::EPSILON.sqrt();
-            let f_eps = objective(eps_step)?;
-            if f_eps < f0 {
-                return Ok(LineSearchResult {
-                    step_size: eps_step,
-                    success: true,
-                    termination_reason: TerminationReason::StepSizeTooSmall,
-                    num_f_evals: 0,
-                    num_g_evals: 0,
-                });
-            }
-            return Err(anyhow!(
-                "Function appears to be ill-conditioned: no improvement possible within machine precision"
-            ));
-        }
         let step_size = self.find_minimum(objective)?;
         let success = step_size >= self.config.min_step && step_size <= self.config.max_step;
         Ok(LineSearchResult {
@@ -729,4 +708,4 @@ mod tests {
             .to_string()
             .contains("descent direction"));
     }
-}
+}
diff --git a/src/line_search/line_search.rs b/src/line_search/line_search.rs
@@ -235,8 +235,8 @@ pub trait LineSearch: Send + Sync + Debug {
             .loss
             .data()
             .as_any()
-            .downcast_ref::<Vec<f64>>()
-            .ok_or_else(|| anyhow::anyhow!("Failed to downcast loss data"))?[0];
+            .downcast_ref::<Vec<f32>>()
+            .ok_or_else(|| anyhow::anyhow!("Failed to downcast loss data"))?[0] as f64;
         if self.is_verbose() {
             println!("LineSearch: f(x + alpha * d) = {:.6e}", f_val);
         }
@@ -334,7 +334,7 @@ pub trait LineSearch: Send + Sync + Debug {
         for tensor_data in &context.gradients.iter().map(|g| g.data()).collect_vec() {
             let g_data = tensor_data
                 .as_any()
-                .downcast_ref::<Vec<f64>>()
+                .downcast_ref::<Vec<f32>>()
                 .ok_or_else(|| anyhow::anyhow!("Failed to downcast gradient data"))?;
 
             let len = g_data.len();
@@ -343,7 +343,7 @@ pub trait LineSearch: Send + Sync + Debug {
             }
 
             let d_chunk = &direction[offset..offset + len];
-            let term: f64 = g_data.iter().zip(d_chunk.iter()).map(|(g, d)| g * d).sum();
+            let term: f64 = g_data.iter().zip(d_chunk.iter()).map(|(g, d)| (*g as f64) * d).sum();
             deriv += term;
             offset += len;
         }
diff --git a/src/line_search/more_thuente.rs b/src/line_search/more_thuente.rs
@@ -482,23 +482,6 @@ impl LineSearch for MoreThuenteLineSearch {
             Ok((loss_val, dir_deriv))
         };
 
-        // Verify we can make progress
-        let test_step = self.config.min_step;
-        let (f_test, _) = evaluate(test_step)?;
-        if f_test >= f0 {
-            let eps_step = f64::EPSILON.sqrt();
-            let (f_eps, _) = evaluate(eps_step)?;
-            if f_eps < f0 {
-                return Ok(LineSearchResult {
-                    step_size: eps_step,
-                    success: true,
-                    termination_reason: TerminationReason::StepSizeTooSmall,
-                    num_f_evals,
-                    num_g_evals,
-                });
-            }
-            return Err(anyhow!("Function appears to be ill-conditioned: no improvement possible within machine precision"));
-        }
 
         let mut stp = self.config.initial_step;
         let mut stx = 0.0_f64;
@@ -619,13 +602,21 @@ impl LineSearch for MoreThuenteLineSearch {
                 num_g_evals,
             })
         } else {
-            Ok(LineSearchResult {
-                step_size: stp,
-                success: true,
-                termination_reason: TerminationReason::MaxIterationsReached,
-                num_f_evals,
-                num_g_evals,
-            })
+            // Try machine epsilon step as last resort
+            let eps_step = f64::EPSILON.sqrt();
+            let (f_eps, _) = evaluate(eps_step)?;
+            if f_eps < f0 {
+                self.log_verbose(&format!("Using machine epsilon step {eps_step:.3e}"));
+                return Ok(LineSearchResult {
+                    step_size: eps_step,
+                    success: true,
+                    termination_reason: TerminationReason::StepSizeTooSmall,
+                    num_f_evals,
+                    num_g_evals,
+                });
+            }
+
+            Err(anyhow!("Function appears to be ill-conditioned: no improvement possible within machine precision"))
         }
     }
 
@@ -1137,4 +1128,4 @@ mod tests {
         }
     }
     */
-}
+}
diff --git a/tests/benchmark_reports.rs b/tests/benchmark_reports.rs