Simple Linesearch (#1)

vchuravy · ranocha · claude · web-flow · commit d4194e6d7acf · 2026-04-20T21:24:32.000+02:00
* simple Backtracking line search after SIAMFANL * Update src/linesearches.jl Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com> * cleanup * rename n_res to norm_res * enable access to J and make backtracking line search fully in place * make alpha configurable * export linesearch and make logic cleaner * add tests * cleanup example * bump version * Remove Pluto notebooks from docs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * include Rosenbrock in docs * ensure neg_res is the same type as res * Revert "Remove Pluto notebooks from docs" This reverts commit 3929f34. --------- Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Ariadne"
 uuid = "0be81120-40bf-4f8b-adf0-26103efb66f1"
 authors = ["Valentin Churavy <v.churavy@gmail.com>"]
-version = "0.1.0"
+version = "0.1.1"
 
 [deps]
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
diff --git a/docs/make.jl b/docs/make.jl
@@ -40,6 +40,7 @@ const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR = joinpath(@__DIR__, "src/generated")
 
 examples = [
+    "Rosenbrock" => "rosenbrock",
     "Bratu -- 1D" => "bratu",
     "Bratu -- KernelAbstractions" => "bratu_ka",
     "Simple" => "simple",
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -9,6 +9,14 @@ newton_krylov!
 newton_krylov
 ```
 
+### Line Searches
+
+```@docs
+Ariadne.LineSearches.AbstractLineSearch
+NoLineSearch
+BacktrackingLineSearch
+```
+
 ### Parameters
 
 ```@docs
diff --git a/examples/rosenbrock.jl b/examples/rosenbrock.jl
@@ -0,0 +1,60 @@
+# # Generalized Rosenbrock
+
+# This example is taken from Fig. 1 of:
+# > A. Pal et al., "NonlinearSolve.jl: High-performance and robust solvers for systems
+# > of nonlinear equations in Julia," arXiv [math.NA], 24-Mar-2024.
+# > https://arxiv.org/abs/2403.16341
+
+# ## Packages
+
+using Ariadne
+
+# ## Problem definition
+
+# The generalized Rosenbrock function in $N$ dimensions:
+# ```math
+# F(x)_1 = 1 - x_1, \quad F(x)_i = 10(x_i - x_{i-1}^2), \quad i = 2, \ldots, N
+# ```
+
+function generalized_rosenbrock(x, _)
+    return vcat(
+        1 - x[1],
+        10 .* (x[2:end] .- x[1:(end - 1)] .* x[1:(end - 1)])
+    )
+end
+
+# The standard starting point is $x_1 = -1.2$, $x_i = 1$ for $i \geq 2$.
+
+N = 12
+x_start = vcat(-1.2, ones(N - 1))
+
+# ## Without line search
+
+# Solving with GMRES and no line search (`NoLineSearch`).
+# The number of iterations required grows quickly with $N$ and the solver
+# fails to converge for $N \geq 9$ within the iteration budget.
+
+_, stats = newton_krylov(
+    generalized_rosenbrock,
+    copy(x_start);
+    algo = :gmres,
+    linesearch! = NoLineSearch(),
+    max_niter = 100_000
+)
+stats
+
+# ## With backtracking line search
+
+# Using `BacktrackingLineSearch` stabilizes convergence for larger $N$.
+# Pal et al. report that their backtracking implementation does not converge for $N = 10$
+# (using `abstol = 1e-8`); with `abstol = 1e-12` our implementation converges for all
+# $N \leq 12$.
+
+_, stats = newton_krylov(
+    generalized_rosenbrock,
+    copy(x_start);
+    algo = :gmres,
+    linesearch! = BacktrackingLineSearch(),
+    max_niter = 100_000
+)
+stats
diff --git a/examples/simple.jl b/examples/simple.jl
@@ -24,23 +24,23 @@ fig, ax = contour(xs, ys, (x, y) -> norm(F([x, y], nothing)); levels)
 
 trace_1 = let x₀ = [2.0, 0.5]
     xs = Vector{Tuple{Float64, Float64}}(undef, 0)
-    hist(x, res, n_res) = (push!(xs, (x[1], x[2])); nothing)
+    hist(x, res, norm_res) = (push!(xs, (x[1], x[2])); nothing)
     x, stats = newton_krylov!(F!, x₀, nothing, callback = hist)
     xs
 end
 lines!(ax, trace_1)
 
 trace_2 = let x₀ = [2.5, 3.0]
     xs = Vector{Tuple{Float64, Float64}}(undef, 0)
-    hist(x, res, n_res) = (push!(xs, (x[1], x[2])); nothing)
+    hist(x, res, norm_res) = (push!(xs, (x[1], x[2])); nothing)
     x, stats = newton_krylov!(F!, x₀, nothing, callback = hist)
     xs
 end
 lines!(ax, trace_2)
 
 trace_3 = let x₀ = [3.0, 4.0]
     xs = Vector{Tuple{Float64, Float64}}(undef, 0)
-    hist(x, res, n_res) = (push!(xs, (x[1], x[2])); nothing)
+    hist(x, res, norm_res) = (push!(xs, (x[1], x[2])); nothing)
     x, stats = newton_krylov!(F!, x₀, nothing, callback = hist, forcing = Ariadne.EisenstatWalker(η_max = 0.68949), verbose = 1)
     @show stats.solved
     xs
diff --git a/src/Ariadne.jl b/src/Ariadne.jl
@@ -227,6 +227,14 @@ function Base.collect(JOp::Union{Adjoint{<:Any, <:AbstractJacobianOperator}, Tra
     return J
 end
 
+##
+# LineSearches
+##
+
+include("linesearches.jl")
+import .LineSearches: AbstractLineSearch, NoLineSearch, BacktrackingLineSearch
+export NoLineSearch, BacktrackingLineSearch
+
 ##
 # Newton-Krylov
 ##
@@ -270,15 +278,15 @@ end
 """
 Compute the Eisenstat-Walker forcing term for n > 0
 """
-function (F::EisenstatWalker)(η, tol, n_res, n_res_prior)
-    η_res = F.γ * n_res^2 / n_res_prior^2
+function (F::EisenstatWalker)(η, tol, norm_res, norm_res_prior)
+    η_res = F.γ * norm_res^2 / norm_res_prior^2
     # Eq 3.6
     if F.γ * η^2 <= 1 // 10
         η_safe = min(F.η_max, η_res)
     else
         η_safe = min(F.η_max, max(η_res, F.γ * η^2))
     end
-    return min(F.η_max, max(η_safe, 1 // 2 * tol / n_res)) # Eq 3.5
+    return min(F.η_max, max(η_safe, 1 // 2 * tol / norm_res)) # Eq 3.5
 end
 initial(F::EisenstatWalker) = F.η_max
 
@@ -331,13 +339,13 @@ end
 struct Stats
     outer_iterations::Int
     inner_iterations::Int
-    n_res::Float64
+    norm_res::Float64
 end
-function update(stats::Stats, inner_iterations, n_res::Float64)
+function update(stats::Stats, inner_iterations, norm_res::Float64)
     return Stats(
         stats.outer_iterations + 1,
         stats.inner_iterations + inner_iterations,
-        n_res
+        norm_res
     )
 end
 
@@ -357,6 +365,7 @@ function newton_krylov!(
         tol_abs = 1.0e-12, # Scipy uses 6e-6
         max_niter = 50,
         forcing::Union{Forcing, Nothing} = EisenstatWalker(),
+        linesearch!::AbstractLineSearch = NoLineSearch(),
         verbose = 0,
         algo = :gmres,
         M = nothing,
@@ -366,25 +375,25 @@ function newton_krylov!(
     )
     t₀ = time_ns()
     F!(res, u, p) # res = F(u)
-    n_res = norm(res)
-    callback(u, res, n_res)
+    norm_res = norm(res)
+    callback(u, res, norm_res)
 
-    tol = tol_rel * n_res + tol_abs
+    tol = tol_rel * norm_res + tol_abs
 
     if forcing !== nothing
         η = initial(forcing)
     end
 
-    verbose > 0 && @info "Jacobian-Free Newton-Krylov" algo res₀ = n_res tol tol_rel tol_abs η
+    verbose > 0 && @info "Jacobian-Free Newton-Krylov" algo res₀ = norm_res tol tol_rel tol_abs η
 
     J = JacobianOperator(F!, res, u, p)
 
     # TODO: Refactor to provide method that re-uses the cache here.
     kc = KrylovConstructor(res)
     workspace = krylov_workspace(algo, kc)
 
-    stats = Stats(0, 0, n_res)
-    while n_res > tol && stats.outer_iterations <= max_niter
+    stats = Stats(0, 0, norm_res)
+    while norm_res > tol && stats.outer_iterations <= max_niter
         # Handle kwargs for Preconditioners
         kwargs = krylov_kwargs
         if N !== nothing
@@ -405,47 +414,41 @@ function newton_krylov!(
             kwargs = (; atol = zero(η), rtol = η, kwargs...)
         end
 
-        # Solve: J d = res = F(u)
-        # Typically, the Newton method is formulated as J d = -F(u)
-        # with update u = u + d.
-        # To simplify the implementation, we solve J d = F(u)
-        # and update u = u - d instead.
-        # `res` is modified by J, so we create a copy `res`
-        # TODO: provide a temporary storage for `res`
-        krylov_solve!(workspace, J, copy(res); kwargs...)
-
-        d = workspace.x # (negative) Newton direction
-        s = 1           # Scaling of the Newton step TODO: LineSearch
+        # Solve: J d = -res = -F(u)
+        # The Newton method is formulated as J d = -F(u)
+        # `res` is modified by J, so we create a `neg_res` copy here.
+        # TODO: provide cache for `neg_res` to avoid this allocation.
+        neg_res = similar(res)
+        @. neg_res = -res
+        krylov_solve!(workspace, J, neg_res; kwargs...)
 
-        # Update u
-        u .= muladd.(-s, d, u) # u = u - s * d
+        d₀ = workspace.x # (negative) Newton direction
 
-        # Update residual and norm
-        n_res_prior = n_res
+        # Perform line search to find an appropriate step size and update `u` and `res` in-place
+        norm_res_prior = norm_res
+        norm_res = linesearch!(J, F!, res, norm_res_prior, u, p, d₀)
 
-        F!(res, u, p) # res = F(u)
-        n_res = norm(res)
-        callback(u, res, n_res)
+        callback(u, res, norm_res)
 
-        if isinf(n_res) || isnan(n_res)
+        if isinf(norm_res) || isnan(norm_res)
             @error "Inner solver blew up" stats
             break
         end
 
         if forcing !== nothing
-            η = forcing(η, tol, n_res, n_res_prior)
+            η = forcing(η, tol, norm_res, norm_res_prior)
         end
 
         # This is almost to be expected for implicit time-stepping
         if verbose > 0 && workspace.stats.niter == 0 && forcing !== nothing
             @info "Inexact Newton thinks our step is good enough " η stats
         end
 
-        stats = update(stats, workspace.stats.niter, n_res)
-        verbose > 0 && @info "Newton" iter = n_res η stats
+        stats = update(stats, workspace.stats.niter, norm_res)
+        verbose > 0 && @info "Newton" iter = norm_res η stats
     end
     t = (time_ns() - t₀) / 1.0e9
-    return u, (; solved = n_res <= tol, stats, t)
+    return u, (; solved = norm_res <= tol, stats, t)
 end
 
 end # module Ariadne
diff --git a/src/linesearches.jl b/src/linesearches.jl
@@ -0,0 +1,90 @@
+module LineSearches
+
+using LinearAlgebra
+
+"""
+    AbstractLineSearch
+
+Line search may update the solution `u` and the residual `res` in-place,
+given the function `F!`, parameters `p`, and the Newton direction `d`.
+
+They must call `F!(res, u, p)` to update the residual after updating `u`.
+
+```julia
+struct NewLineSearch <: AbstractLineSearch
+    # parameters for the line search
+end
+
+function (ls::NewLineSearch)(J::AbstractJacobianOperator, F!, res, norm_res_prior, u, p, d)
+    # perform line search to find an appropriate step size
+    # ...
+    # update u and res in-place
+    F!(res, u, p)
+    return norm(res)
+end
+```
+"""
+abstract type AbstractLineSearch end
+
+"""
+    NoLineSearch()
+
+A line search that does not perform any line search: it simply takes the full Newton step.
+"""
+struct NoLineSearch <: AbstractLineSearch end
+
+function (::NoLineSearch)(J, F!, res, norm_res_prior, u, p, d)
+    # No line search: take the full Newton step
+    u .+= d
+    F!(res, u, p)
+    return norm(res)
+end
+
+"""
+    BacktrackingLineSearch(; n_iter_max = 10)
+
+## References
+
+- Kelley, C. T. (2022).
+  Solving nonlinear equations with iterative methods:
+  Solvers and examples in Julia.
+  Society for Industrial and Applied Mathematics.
+- <https://github.com/ctkelley/SIAMFANLEquations.jl>
+"""
+Base.@kwdef struct BacktrackingLineSearch <: AbstractLineSearch
+    n_iter_max::Int = 10
+    alpha::Float64 = 1.0e-4
+end
+
+function (ls::BacktrackingLineSearch)(J, F!, res, norm_res_prior, u, p, d)
+    alpha = ls.alpha
+    lambda = 1.0
+
+    @assert ls.n_iter_max > 0 "n_iter_max must be positive and larger than 0"
+    @assert alpha > 0 "alpha must be positive"
+
+    # Take the full Newton step (lambda = 1.0)
+    u .= muladd.(lambda, d, u) # u = u + lambda * d
+    F!(res, u, p)
+    norm_res = norm(res)
+
+    for _ in 2:ls.n_iter_max
+        # Armijo condition
+        if norm_res <= (1 - alpha * lambda) * norm_res_prior
+            return norm_res
+        end
+
+        # Halve lambda and retract the excess step incrementally:
+        # u goes from u + old_lambda*d to u + new_lambda*d,
+        # so the adjustment is (new_lambda - old_lambda)*d (negative).
+        new_lambda = lambda * 0.5
+        s = new_lambda - lambda
+        u .= muladd.(s, d, u) # u = u + (new_lambda - old_lambda) * d
+        lambda = new_lambda
+        F!(res, u, p)
+        norm_res = norm(res)
+    end
+    return norm_res
+end
+
+end # module LineSearches
diff --git a/test/linesearches.jl b/test/linesearches.jl