Merge pull request #20 from ihmeuw-msca/bugfix/linesearch-revert

saalUW · web-flow · commit 3d9e90f5dcd4 · 2025-07-11T10:24:14.000-05:00
Bugfix/linesearch-revert
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "msca"
-version = "0.3.2"
+version = "0.3.3"
 description = "Mathematical sciences and computational algorithms"
 readme = "README.md"
 requires-python = ">=3.11,<3.13"
diff --git a/src/msca/c2fun/__init__.py b/src/msca/c2fun/__init__.py
@@ -1 +1,31 @@
-from .main import *
+from .main import (
+    C2Fun,
+    Identity,
+    Exp,
+    Log,
+    Expit,
+    Logit,
+    Logerfc,
+    identity,
+    exp,
+    log,
+    expit,
+    logit,
+    logerfc,
+)
+
+__all__ = [
+    "C2Fun",
+    "Identity",
+    "Exp",
+    "Log",
+    "Expit",
+    "Logit",
+    "Logerfc",
+    "identity",
+    "exp",
+    "log",
+    "expit",
+    "logit",
+    "logerfc",
+]
diff --git a/src/msca/c2fun/main.py b/src/msca/c2fun/main.py
@@ -33,7 +33,7 @@
 
 from __future__ import annotations
 
-from abc import ABC, abstractproperty, abstractstaticmethod
+from abc import ABC, abstractmethod
 from typing import Dict
 
 import numpy as np
@@ -53,12 +53,14 @@ class C2Fun(ABC):
 
     """
 
-    @abstractproperty
+    @property
+    @abstractmethod
     def inv(self) -> C2Fun:
         """The inverse of the function such that :code:`x = fun.inv(fun(x))`."""
         pass
 
-    @abstractstaticmethod
+    @staticmethod
+    @abstractmethod
     def fun(x: NDArray) -> NDArray:
         """Implementation of the function.
 
@@ -70,7 +72,8 @@ def fun(x: NDArray) -> NDArray:
         """
         pass
 
-    @abstractstaticmethod
+    @staticmethod
+    @abstractmethod
     def dfun(x: NDArray) -> NDArray:
         """Implementation of the derivative of the function.
 
@@ -82,7 +85,8 @@ def dfun(x: NDArray) -> NDArray:
         """
         pass
 
-    @abstractstaticmethod
+    @staticmethod
+    @abstractmethod
     def d2fun(x: NDArray) -> NDArray:
         """Implementation of the second order derivative of the function.
 
@@ -520,7 +524,9 @@ def dfun(x: NDArray) -> NDArray:
 
         l_indices = x < 25
         y[l_indices] = (
-            -2 * np.exp(-(x[l_indices] ** 2)) / (erfc(x[l_indices]) * np.sqrt(np.pi))
+            -2
+            * np.exp(-(x[l_indices] ** 2))
+            / (erfc(x[l_indices]) * np.sqrt(np.pi))
         )
 
         r_indices = ~l_indices
diff --git a/src/msca/linalg/matrix.py b/src/msca/linalg/matrix.py
@@ -131,7 +131,9 @@ def solve(self, x: ArrayLike, method: str = "", **kwargs) -> NDArray:
         elif method == "cg":
             result, info = sp.linalg.cg(self, x, **kwargs)
             if info > 0:
-                raise RuntimeError(f"CG convergence not achieved. with {info=:}")
+                raise RuntimeError(
+                    f"CG convergence not achieved. with {info=:}"
+                )
         else:
             raise ValueError(f"{method=:} is not supported.")
         return result
@@ -193,7 +195,9 @@ def solve(self, x: NDArray, method: str = "", **kwargs) -> NDArray:
         elif method == "cg":
             result, info = sp.sparse.linalg.cg(self, x, **kwargs)
             if info > 0:
-                raise RuntimeError(f"CG convergence not achieved. with {info=:}")
+                raise RuntimeError(
+                    f"CG convergence not achieved. with {info=:}"
+                )
         else:
             raise ValueError(f"{method=:} is not supported.")
         return result
@@ -255,7 +259,9 @@ def solve(self, x: NDArray, method: str = "", **kwargs) -> NDArray:
         elif method == "cg":
             result, info = sp.sparse.linalg.cg(self, x, **kwargs)
             if info > 0:
-                raise RuntimeError(f"CG convergence not achieved. with {info=:}")
+                raise RuntimeError(
+                    f"CG convergence not achieved. with {info=:}"
+                )
         else:
             raise ValueError(f"{method=:} is not supported.")
         return result
diff --git a/src/msca/optim/line_search/armijo.py b/src/msca/optim/line_search/armijo.py
@@ -3,49 +3,54 @@
 import numpy as np
 from numpy.typing import NDArray
 
+
 def armijo_line_search(
-    x,
-    p,
-    g,
-    objective: Callable,
+    gradient: Callable,
+    x: NDArray,
+    dx: NDArray,
     step_init: float = 1.0,
-    alpha: float = 0.01,
-    shrinkage: float = 0.5,
-):
-    """
-    Performs an Armijo line search to select an appropriate step size along a given search direction.
-    This function iteratively reduces the step size until the decrease in the objective function, along the direction of descent,
-    satisfies the Armijo (sufficient decrease) condition. In each iteration, it checks whether the new point yields a value that is
-    lower than the current value by a margin proportional to the step and directional derivative. If no satisfactory step size is found
-    and the step size becomes exceedingly small (<= 1e-15), a RuntimeError is raised.
-    Parameters:
-        x (array_like): The current point or position in the parameter space.
-        p (array_like): The descent direction along which the line search is performed.
-        g (array_like): The gradient of the objective function evaluated at x.
-        objective (Callable): A callable that computes the objective function value given a point.
-        step_init (float, optional): The initial step size to start the line search. Default is 1.0.
-        alpha (float, optional): The Armijo condition control parameter defining the sufficient decrease criterion. Default is 0.01.
-        shrinkage (float, optional): The factor by which the step is multiplied to reduce the step size in each iteration. Default is 0.5.
-    Returns:
-        float: The step size that satisfies the Armijo sufficient decrease condition.
-    Raises:
-        RuntimeError: If the step size becomes too small (<= 1e-15) without satisfying the Armijo condition,
-                      indicating failure in finding a suitable step size.
-    """
-    def sufficiently_improved(new_val, step):
-        return (new_val - val <= -1 * alpha * step * np.dot(g, p)) and (
-            not np.isnan(new_val)
-        )
+    step_const: float = 0.01,
+    step_scale: float = 0.9,
+    step_lb: float = 1e-3,
+) -> float:
+    """Armijo line search.
+
+    Parameters
+    ----------
+    x
+        A list a parameters, including x, s, and v, where s is the slackness
+        variable and v is the dual variable for the constraints.
+    dx
+        A list of direction for the parameters.
+    step_init
+        Initial step size, by default 1.0.
+    step_const
+        Constant for the line search condition, the larger the harder, by
+        default 0.01.
+    step_scale
+        Shrinkage factor for step size, by default 0.9.
+    step_lb
+        Lower bound of the step size when the step size is below this bound
+        the line search will be terminated.
 
+    Returns
+    -------
+    float
+        The step size in the given direction.
+
+    """
     step = step_init
-    new_x = x - step * p
-    val, new_val = objective(x), objective(new_x)
-    while (not sufficiently_improved(new_val, step)):
-        if step <= 1e-15:
-            raise RuntimeError(
-                f"Line Search Failed, new_val = {new_val}, prev_val = {val}"
-            )
-        step *= shrinkage
-        new_x = x - step * p
-        new_val = objective(new_x)
-    return step
+    x_next = x + step * dx
+    g_next = gradient(x_next)
+    gnorm_curr = np.max(np.abs(gradient(x)))
+    gnorm_next = np.max(np.abs(g_next))
+
+    while gnorm_next > (1 - step_const * step) * gnorm_curr:
+        if step * step_scale < step_lb:
+            break
+        step *= step_scale
+        x_next = x + step * dx
+        g_next = gradient(x_next)
+        gnorm_next = np.max(np.abs(g_next))
+
+    return step
diff --git a/src/msca/optim/prox/__init__.py b/src/msca/optim/prox/__init__.py
@@ -1 +1,3 @@
 from .capped_simplex import proj_capped_simplex
+
+__all__ = ["proj_capped_simplex"]
diff --git a/src/msca/optim/solver/__init__.py b/src/msca/optim/solver/__init__.py
@@ -1,3 +1,5 @@
 from .ipsolver import IPSolver
 from .ntcgsolver import NTCGSolver
 from .ntsolver import NTSolver
+
+__all__ = ["IPSolver", "NTCGSolver", "NTSolver"]
diff --git a/src/msca/optim/solver/ipsolver.py b/src/msca/optim/solver/ipsolver.py
@@ -59,7 +59,12 @@ class IPSolver:
     """
 
     def __init__(
-        self, fun: Callable, grad: Callable, hess: Callable, cmat: Matrix, cvec: NDArray
+        self,
+        fun: Callable,
+        grad: Callable,
+        hess: Callable,
+        cmat: Matrix,
+        cvec: NDArray,
     ):
         self.fun = fun
         self.grad = grad
@@ -257,7 +262,9 @@ def minimize(
             dp = [dx, ds, dv]
 
             # get step size
-            step, p = self._update_params(p, dp, m, a_init, a_const, a_scale, a_lb)
+            step, p = self._update_params(
+                p, dp, m, a_init, a_const, a_scale, a_lb
+            )
 
             # update m
             if niter % m_freq == 0:
diff --git a/src/msca/optim/solver/ntcgsolver.py b/src/msca/optim/solver/ntcgsolver.py
@@ -126,7 +126,7 @@ def minimize(
             precon_builder = precon_builder_map[precon_builder](
                 **(precon_builder_options or {})
             )
-        cg_options = cg_options or {"rtol":1e-2}
+        cg_options = cg_options or {"rtol": 1e-2}
 
         def get_cg_maxiter(niter: int) -> int | None:
             if cg_maxiter_init is None and cg_maxiter is None:
@@ -171,13 +171,17 @@ def cg_iter_counter(xk, cg_info):
             if precon_builder is not None:
                 cg_options["M"] = precon_builder(x_pair, g_pair)
             cg_options["maxiter"] = get_cg_maxiter(niter)
-            dx = cg(hess, -g,**cg_options)[0]
+            dx = cg(hess, -g, **cg_options)[0]
             try:
                 # get step size
-                step = line_search(x, -dx,g,self.fun, **line_search_options)
-            except:
+                step = line_search(
+                    gradient=self.grad, x=x, dx=-dx, **line_search_options
+                )
+            except RuntimeError:
                 dx = -g
-                step = line_search(x, -dx,g,self.fun, **line_search_options)
+                step = line_search(
+                    gradient=self.grad, x=x, dx=-dx, **line_search_options
+                )
             x = x + step * dx
 
             # update f and gnorm
diff --git a/src/msca/optim/solver/ntsolver.py b/src/msca/optim/solver/ntsolver.py
@@ -168,13 +168,17 @@ def minimize(
         if verbose:
             fun = self.fun(x)
             print(f"{type(self).__name__}:")
-            print(f"{niter=:3d}, {fun=:.2e}, {gnorm=:.2e}, {xdiff=:.2e}, {step=:.2e}")
+            print(
+                f"{niter=:3d}, {fun=:.2e}, {gnorm=:.2e}, {xdiff=:.2e}, {step=:.2e}"
+            )
 
         while (not success) and (niter < max_iter):
             niter += 1
 
             # compute all directions
-            dx = -self.hess(x).solve(g, method=mat_solve_method, **mat_solve_options)
+            dx = -self.hess(x).solve(
+                g, method=mat_solve_method, **mat_solve_options
+            )
 
             # get step size
             step, x = self._update_params(x, dx, a_init, a_const, a_scale, a_lb)
diff --git a/tests/optim/solver/test_ipsolver.py b/tests/optim/solver/test_ipsolver.py
@@ -25,6 +25,8 @@ def hessian(x):
 
 def test_ipsolver():
     solver = IPSolver(objective, gradient, hessian, cmat, cvec)
-    result = solver.minimize(x0=np.zeros(5), gtol=1e-10, xtol=0.0, mtol=1e-10, m_freq=1)
+    result = solver.minimize(
+        x0=np.zeros(5), gtol=1e-10, xtol=0.0, mtol=1e-10, m_freq=1
+    )
     assert result.success
     assert np.allclose(result.x, np.minimum(0, vec))

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`from .capped_simplex import proj_capped_simplex`
	`2`	`+`
	`3`	`+__all__ = ["proj_capped_simplex"]`