neatened nonlinear solver

bobmyhill · bobmyhill · commit c687ec54e773 · 2025-10-17T21:04:30.000+01:00
diff --git a/burnman/optimize/nonlinear_solvers.py b/burnman/optimize/nonlinear_solvers.py
@@ -29,9 +29,6 @@ def __init__(
         """
         Initialize the Solution instance.
 
-        Arguments are stored as attributes of the instance
-        with the same names.
-
         :param x: Final solution vector.
         :type x: np.ndarray, optional
         :param n_it: Number of iterations performed.
@@ -248,7 +245,6 @@ def __init__(
         store_iterates: bool = False,
         regularization: float = np.finfo(float).eps,
         cond_lu_thresh: float = 1e12,
-        cond_lstsq_thresh: float = 1e15,
         constraint_thresh: float = 2 * np.finfo(float).eps,
     ):
         """
@@ -322,7 +318,6 @@ def __init__(
         self.linear_constraints = linear_constraints
         self.regularization = regularization
         self.cond_lu_thresh = cond_lu_thresh
-        self.cond_lstsq_thresh = cond_lstsq_thresh
         self.eps = 2.0 * np.finfo(float).eps
         self.max_condition_number = 1.0 / np.finfo(float).eps
 
@@ -368,82 +363,66 @@ def _solve_subject_to_constraints(
         c_prime: npt.NDArray[np.float64],
     ) -> npt.NDArray[np.float64]:
         """
-        Solve a constrained Newton correction step using the method of
-        Lagrange multipliers (KKT system).
-
-        This method computes a step ``dx`` that minimizes the linearized
-        residual ||J(x)·dx|| subject to linear equality constraints derived
-        from the currently active inequality constraints.
-
-        The system is solved using the KKT (Karush-Kuhn-Tucker) formulation:
-
-        .. math::
-
-            \\begin{bmatrix}
-                J^T J + \\alpha I & A^T \\\\
-                A & 0
-            \\end{bmatrix}
-            \\begin{bmatrix}
-                dx \\\\
-                \\lambda
-            \\end{bmatrix}
-            =
-            - \\begin{bmatrix}
-                0 \\\\
-                c(x)
-            \\end{bmatrix}
+        Compute a constrained Newton correction step using the
+        Karush-Kuhn-Tucker (KKT) formulation.
 
-        where:
+        This method solves for the update ``dx`` that minimizes the linearized
+        residual ``||J(x)·dx||`` subject to the active linear equality constraints
+        ``A·dx + c(x) = 0``:
 
-        * ``J`` is the Jacobian at ``x``
-        * ``A`` is the constraint Jacobian (``c_prime``)
-        * ``c(x)`` is the constraint evaluation
-        * ``\\lambda`` are the Lagrange multipliers
-        * ``\\alpha`` = ``self.regularization`` is an optional regularization parameter
+            [ JᵀJ + alpha * I   c'ᵀ ] [dx]   =  -[ 0 ]
+            [    c'        0  ] [lambda]         [c(x)]
 
-        The KKT system is solved using one of three strategies depending on
-        the estimated condition number of the matrix:
+        where:
+            * ``J`` is the Jacobian at ``x`` (``jac_x``)
+            * ``c_prime`` is the active constraint Jacobian
+            * ``c(x)`` are the constraint values
+            * ``lambda`` are the Lagrange multipliers
+            * ``alpha = self.regularization`` is a Tikhonov regularization parameter
 
-        1. **LU factorization** if ``cond < cond_lu_thresh``
-        2. **Least-squares solve** if ``cond < cond_lstsq_thresh``
-        3. **SVD-based pseudo-inverse** for ill-conditioned cases
+        The KKT system is solved adaptively based on its condition number:
+            1. LU factorization for well-conditioned systems
+            2. SVD-based pseudo-inverse for ill-conditioned systems
 
         :param x: Current solution vector.
-        :type x: np.ndarray
-        :param jac_x: Current Jacobian matrix J(x).
-        :type jac_x: np.ndarray
-        :param c_x: Values of the active constraints at x.
-        :type c_x: np.ndarray
-        :param c_prime: Jacobian of the active constraints (A in Ax + b = 0).
-        :type c_prime: np.ndarray
-
-        :return: A 3-tuple containing:
-
-            * **x_new** (np.ndarray) -- Updated solution ``x + dx``.
-            * **lambdas** (np.ndarray) -- Lagrange multipliers for active constraints.
-            * **condition_number** (float) -- Estimated condition number of the KKT matrix.
-
-        :rtype: tuple[np.ndarray, np.ndarray, float]
+        :type x: numpy.ndarray
+        :param jac_x: Jacobian of residuals at ``x``.
+        :type jac_x: numpy.ndarray
+        :param c_x: Values of the active constraints at ``x``.
+        :type c_x: numpy.ndarray
+        :param c_prime: Jacobian of the active constraints.
+        :type c_prime: numpy.ndarray
+
+        :return: Tuple ``(x_new, lambdas, condition_number)`` where:
+            - **x_new** (*numpy.ndarray*) – Updated solution vector ``x + dx``.
+            - **lambdas** (*numpy.ndarray*) – Lagrange multipliers for active constraints.
+            - **condition_number** (*float*) – Estimated condition number of the KKT matrix.
+        :rtype: tuple[numpy.ndarray, numpy.ndarray, float]
         """
+
         n_x = x.shape[0]
         n_c = c_x.shape[0]
         JTJ_reg = jac_x.T @ jac_x + self.regularization * np.eye(n_x)
-        norm = n_x * n_x / np.linalg.norm(JTJ_reg)
-        KKT = np.block([[JTJ_reg * norm, c_prime.T], [c_prime, np.zeros((n_c, n_c))]])
+        scale = np.linalg.norm(JTJ_reg)
+        if scale == 0:
+            scale = 1.0
+        KKT = np.block([[JTJ_reg / scale, c_prime.T], [c_prime, np.zeros((n_c, n_c))]])
         rhs = -np.concatenate([np.zeros(n_x), c_x])
 
         condition_number = np.linalg.cond(KKT)
         if condition_number < self.cond_lu_thresh:
-            dx_lambda = lu_solve(lu_factor(KKT), rhs)
-        elif condition_number < self.cond_lstsq_thresh:
-            dx_lambda, *_ = np.linalg.lstsq(KKT, rhs, rcond=None)
+            lu, piv = lu_factor(KKT)
+            dx_lambda = lu_solve((lu, piv), rhs)
         else:
             U, s, Vt = np.linalg.svd(KKT, full_matrices=False)
-            s_inv = np.where(s > 1e-12, 1.0 / s, 0.0)
+            tol = np.finfo(float).eps * max(KKT.shape) * np.max(s)
+            s_inv = np.where(s > tol, 1.0 / s, 0.0)
             dx_lambda = Vt.T @ (s_inv * (U.T @ rhs))
 
         dx = dx_lambda[:n_x]
-        return x + dx, dx_lambda[n_x:], condition_number
+        lambdas = dx_lambda[n_x:]
+
+        return x + dx, lambdas, condition_number
 
     def _constrain_step_to_feasible_region(
         self,
@@ -454,46 +433,30 @@ def _constrain_step_to_feasible_region(
         x_j: npt.NDArray[np.float64],
     ) -> tuple[npt.NDArray[np.float64], float]:
         """
-        Project a trial Newton step back into the feasible region defined
-        by linear inequality constraints A.x + b <= 0.
-
-        This method checks whether the trial point x_j = x + lambda.dx violates
-        any constraints. If so, it computes the maximum allowable step scaling
-        factor to remain feasible, reduces lambda accordingly, and updates the
-        trial iterate.
-
-        The scaling factor is computed per violated constraint as:
-
-        .. math::
+        Project a trial step back into the feasible region of linear inequality constraints.
 
-            \\lambda_i = \\frac{c_x[i]}{c_x[i] - c_{x_j}[i]}
-
-        where c_x and c_{x_j} are the constraint function values at x and x_j.
-        The smallest lambda_i is used to rescale the step to just touch the first
-        violated constraint.
+        Given a trial point x_j = x + lambda*dx, this method checks for constraint
+        violations and rescales the step to remain feasible. The scaling factor is
+        computed per violated constraint, and the smallest factor is applied to
+        lambda to ensure the trial point stays within the feasible region.
 
         :param x: Current solution vector.
-        :type x: np.ndarray
-        :param dx: Full Newton step direction.
-        :type dx: np.ndarray
-        :param n_constraints: Total number of linear inequality constraints.
+        :type x: numpy.ndarray
+        :param dx: Newton step direction.
+        :type dx: numpy.ndarray
+        :param n_constraints: Number of linear inequality constraints.
         :type n_constraints: int
-        :param lmda: Current damping factor lambda for the trial step.
+        :param lmda: Current step scaling factor.
         :type lmda: float
-        :param x_j: Current trial iterate x + lambda.dx.
-        :type x_j: np.ndarray
-
-        :return: A 3-tuple containing:
-
-            * **lmda** (float)
-              -- Updated damping factor lambda that ensures feasibility.
-            * **x_j** (np.ndarray)
-              -- Adjusted trial point within the feasible region.
-            * **violated_constraints** (list[tuple[int, float]])
-              -- List of (index, lambda_i) for each violated constraint,
-              sorted by lambda_i.
-
-        :rtype: tuple[float, np.ndarray, list[tuple[int, float]]]
+        :param x_j: Trial point x + lambda*dx.
+        :type x_j: numpy.ndarray
+
+        :return: Tuple ``(lmda, x_j, violated_constraints)`` where:
+            - **lmda** (*float*) - Updated scaling factor ensuring feasibility.
+            - **x_j** (*numpy.ndarray*) - Adjusted trial point within feasible region.
+            - **violated_constraints** (*list[tuple[int, float]]*) - List of
+            (constraint index, scaling factor) for violated constraints, sorted by factor.
+        :rtype: tuple[float, numpy.ndarray, list[tuple[int, float]]]
         """
         c_x_j = self._constraints(x_j)
         c_x = self._constraints(x)
@@ -514,98 +477,88 @@ def _lagrangian_walk_along_constraints(
         dx: npt.NDArray[np.float64],
         luJ: Any,
         dx_norm: float,
-        violated_constraints: list[int],
-    ) -> tuple[npt.NDArray[np.float64], float]:
+        violated_constraints: list[tuple[int, float]],
+    ) -> tuple[float, npt.NDArray[np.float64], npt.NDArray[np.float64], bool]:
         """
-        Attempt to find a constrained Newton step when a step along the
-        standard Newton direction would immediately violate active linear
-        inequality constraints (A.x + b <= 0).
-        Uses the method of Lagrange multipliers, attemping to "walk along"
-        the active constraints to remain in the feasible region while
-        decreasing the residual norm ||F(x)||.
-
-        :param sol: Current solver state with fields x and F.
-        :type sol: SimpleNamespace
-        :param dx: Current Newton step direction.
-        :type dx: np.ndarray
-        :param luJ: LU factorization of the current Jacobian, as returned by
-            ``scipy.linalg.lu_factor``.
-        :type luJ: tuple
-        :param dx_norm: L2 norm of the current Newton step dx.
-        :type dx_norm: float
-        :param lmda_bounds: Tuple (min_lambda, max_lambda) for the damping factor.
-        :type lmda_bounds: tuple[float, float]
+        Attempt a constrained Newton step along active linear constraints
+        to remain feasible while decreasing the residual norm.
+
+        :param dx: Newton step direction.
+        :param luJ: LU factorization of current Jacobian (from `lu_factor`).
+        :param dx_norm: L2 norm of the Newton step.
         :param violated_constraints: List of (index, fraction) for constraints
-            that would be violated by the current Newton step.
-        :type violated_constraints: list[tuple[int, float]]
+            that would be violated by the current step.
 
-        :return: Updated damping factor, updated values, full Newton step,
-            and flag indicating whether the solver encountered a persistent
-            constraint violation or reached the minimum lambda.
-        :rtype: tuple[float, np.ndarray, np.ndarray, bool]
+        :return: Tuple of (lambda, adjusted trial point x_j, full Newton step dx,
+                persistent_bound_violation flag).
         """
         sol = self.sol
 
-        active_constraint_indices = [
+        # Split constraints into active and inactive based on proximity to boundary
+        active_idx = [
             i for i, vc in violated_constraints if vc < self.constraint_thresh
         ]
-        inactive_constraint_indices = [
+        inactive_idx = [
             i for i, vc in violated_constraints if vc >= self.constraint_thresh
         ]
-        c_newton = self._constraints(self.sol.x + dx)[active_constraint_indices]
-        c_A = self.linear_constraints[0][active_constraint_indices]
-        x_n = self.sol.x + dx
-        persistent_bound_violation = False
 
-        if len(c_A) > 0 and np.linalg.matrix_rank(c_A) == len(dx):
-            n_act = len(active_constraint_indices)
+        # Evaluate active constraints and corresponding Jacobian
+        c_active = self._constraints(sol.x + dx)[active_idx]
+        A_active = self.linear_constraints[0][active_idx]
+        x_n = sol.x + dx
+        persistent_violation = False
+
+        # Solve KKT system along active constraints if well-posed
+        if len(A_active) > 0 and np.linalg.matrix_rank(A_active) == len(dx):
+            n_act = len(active_idx)
+            # Attempt to remove one active constraint at a time if necessary
             for i_rm in range(n_act):
-                potential_active_indices = [
-                    active_constraint_indices[i] for i in range(n_act) if i != i_rm
-                ]
-                c_newton = self._constraints(sol.x + dx)[potential_active_indices]
-                c_A = self.linear_constraints[0][potential_active_indices]
-                x_m = self._solve_subject_to_constraints(x_n, sol.J, c_newton, c_A)[0]
-                if self._constraints(x_m)[active_constraint_indices[i_rm]] < 0.0:
+                keep_idx = [active_idx[j] for j in range(n_act) if j != i_rm]
+                c_subset = self._constraints(sol.x + dx)[keep_idx]
+                A_subset = self.linear_constraints[0][keep_idx]
+                x_m = self._solve_subject_to_constraints(
+                    x_n, sol.J, c_subset, A_subset
+                )[0]
+                if self._constraints(x_m)[active_idx[i_rm]] < 0:
                     break
         else:
-            x_m = self._solve_subject_to_constraints(x_n, sol.J, c_newton, c_A)[0]
+            x_m = self._solve_subject_to_constraints(x_n, sol.J, c_active, A_active)[0]
 
+        # Update step and damping factor
         dx = x_m - sol.x
-        lmda_bounds_new = self.lambda_bounds(dx, sol.x)
-        lmda = lmda_bounds_new[1]
+        lmda_min, lmda_max = self.lambda_bounds(dx, sol.x)
+        lmda = lmda_max
         x_j = sol.x + lmda * dx
 
-        # Check feasibility
+        # Check feasibility at minimum lambda
         try:
-            x_j_min = sol.x + lmda_bounds_new[0] * dx
+            x_j_min = sol.x + lmda_min * dx
             F_j_min = self.F(x_j_min)
             dxbar_j_min = lu_solve(luJ, -F_j_min)
-            dxbar_j_min_norm = np.linalg.norm(dxbar_j_min, ord=2)
-
-            if dxbar_j_min_norm > dx_norm or np.linalg.norm(dx, ord=2) < self.eps:
-                persistent_bound_violation = True
+            if np.linalg.norm(dxbar_j_min) > dx_norm or np.linalg.norm(dx) < self.eps:
+                persistent_violation = True
         except Exception:
-            # For example, if self.F(x_j_min) fails
-            persistent_bound_violation = True
-
-        # Check newly violated inactive constraints
-        n_inactive = len(inactive_constraint_indices)
-        c_x_j = self._constraints(x_j)[inactive_constraint_indices]
-        if not np.all(c_x_j < self.eps):
-            c_x = self._constraints(sol.x)[inactive_constraint_indices]
-            violated_constraints = sorted(
-                [
-                    (i, c_x[i] / (c_x[i] - c_x_j[i]))
-                    for i in range(n_inactive)
-                    if c_x_j[i] >= self.eps
-                ],
-                key=lambda x: x[1],
-            )
-            lmda *= violated_constraints[0][1]
-            x_j = sol.x + lmda * dx
+            persistent_violation = True
+
+        # Check that inactive constraints are not now violated
+        # If they are, rescale lambda
+        if inactive_idx:
+            c_inactive_new = self._constraints(x_j)[inactive_idx]
+            if not np.all(c_inactive_new < self.eps):
+                c_inactive_old = self._constraints(sol.x)[inactive_idx]
+                violated_new = sorted(
+                    [
+                        (i, c_inactive_old[i] / (c_inactive_old[i] - c_inactive_new[i]))
+                        for i in range(len(inactive_idx))
+                        if c_inactive_new[i] >= self.eps
+                    ],
+                    key=lambda t: t[1],
+                )
+                # Rescale lambda to maintain feasibility
+                lmda *= violated_new[0][1]
+                x_j = sol.x + lmda * dx
 
-        return lmda, x_j, dx, persistent_bound_violation
+        return lmda, x_j, dx, persistent_violation
 
     def _check_convergence(
         self,