Merge pull request #644 from CUQI-DTU/debug_rto_solver

chaozg · web-flow · commit c62aed410583 · 2025-04-28T12:59:25.000+02:00
Add option to use ScipyMinimizer in RegularizedLinearRTO and fix bug
diff --git a/cuqi/experimental/mcmc/_rto.py b/cuqi/experimental/mcmc/_rto.py
@@ -3,7 +3,7 @@
 from scipy.sparse.linalg import LinearOperator as scipyLinearOperator
 import numpy as np
 import cuqi
-from cuqi.solver import CGLS, FISTA, ADMM, ScipyLinearLSQ
+from cuqi.solver import CGLS, FISTA, ADMM, ScipyLinearLSQ, ScipyMinimizer
 from cuqi.experimental.mcmc import Sampler
 
 
@@ -167,6 +167,7 @@ class RegularizedLinearRTO(LinearRTO):
     ADMM:  [2] Boyd et al. "Distributed optimization and statistical learning via the alternating direction method of multipliers."Foundations and Trends® in Machine learning, 2011.
            Used when prior.proximal is a list of penalty terms.
     ScipyLinearLSQ: Wrapper for Scipy's lsq_linear for the Trust Region Reflective algorithm. Optionally used when the constraint is either "nonnegativity" or "box".
+    ScipyMinimizer: Wrapper for Scipy's minimize. Optionally used when the constraint is either "nonnegativity" or "box".
 
     Parameters
     ------------
@@ -177,7 +178,7 @@ class RegularizedLinearRTO(LinearRTO):
         Initial point for the sampler. *Optional*.
 
     maxit : int
-        Maximum number of iterations of the FISTA/ADMM/ScipyLinearLSQ solver. *Optional*.
+        Maximum number of iterations of the FISTA/ADMM/ScipyLinearLSQ/ScipyMinimizer solver. *Optional*.
 
     inner_max_it : int
         Maximum number of iterations of the CGLS solver used within the ADMM solver. *Optional*.
@@ -191,7 +192,7 @@ class RegularizedLinearRTO(LinearRTO):
         See [2] or `cuqi.solver.ADMM`
 
     abstol : float
-        Absolute tolerance of the FISTA/ScipyLinearLSQ solver. *Optional*.
+        Absolute tolerance of the FISTA/ScipyLinearLSQ/ScipyMinimizer solver. *Optional*.
     
     inner_abstol : float
         Tolerance parameter for ScipyLinearLSQ's inner solve of the unbounded least-squares problem. *Optional*.
@@ -200,7 +201,7 @@ class RegularizedLinearRTO(LinearRTO):
         If True, FISTA is used as solver, otherwise ISTA is used. *Optional*.
     
     solver : string
-        If set to "ScipyLinearLSQ", solver is set to cuqi.solver.ScipyLinearLSQ, otherwise FISTA/ISTA or ADMM is used. Note "ScipyLinearLSQ" can only be used with `RegularizedGaussian` of `box` or `nonnegativity` constraint. *Optional*.
+        Options are "FISTA" (default for a single constraint or regularization), "ADMM" (default and the only option for multiple constraints or regularizations), "ScipyLinearLSQ" and "ScipyMinimizer". Note "ScipyLinearLSQ" and "ScipyMinimizer" can only be used with `RegularizedGaussian` of a single `box` or `nonnegativity` constraint. *Optional*.
 
     callback : callable, optional
         A function that will be called after each sampling step. It can be useful for monitoring the sampler during sampling.
@@ -234,11 +235,11 @@ def solver(self):
 
     @solver.setter
     def solver(self, value):
-        if value == "ScipyLinearLSQ":
+        if value == "ScipyLinearLSQ" or value == "ScipyMinimizer":
             if (self.target.prior.preset["constraint"] == "nonnegativity" or self.target.prior.preset["constraint"] == "box"):
                 self._solver = value
             else:
-                raise ValueError("ScipyLinearLSQ only supports RegularizedGaussian with box or nonnegativity constraint.")
+                raise ValueError("ScipyLinearLSQ and ScipyMinimizer only support RegularizedGaussian with box or nonnegativity constraint.")
         else:
             self._solver = value
 
@@ -281,15 +282,22 @@ def step(self):
             sim = ADMM(self.M, y, self.proximal,
                         self.current_point, self.penalty_parameter, maxit = self.maxit, inner_max_it = self.inner_max_it, adaptive = self.adaptive)
         elif self.solver == "ScipyLinearLSQ":
-                A_op = sp.sparse.linalg.LinearOperator((sum([llh.dim for llh in self.likelihoods])+self.target.prior.dim, self.target.prior.dim),
-                                        matvec=lambda x: self.M(x, 1),
-                                        rmatvec=lambda x: self.M(x, 2)
-                                        )
-                sim = ScipyLinearLSQ(A_op, y, self.target.prior._box_bounds, 
-                                     max_iter = self.maxit,
-                                     lsmr_maxiter = self.inner_max_it, 
-                                     tol = self.abstol,
-                                     lsmr_tol = self.inner_abstol)
+            A_op = sp.sparse.linalg.LinearOperator((sum([llh.distribution.dim for llh in self.likelihoods])+self.target.prior.dim, self.target.prior.dim),
+                                    matvec=lambda x: self.M(x, 1),
+                                    rmatvec=lambda x: self.M(x, 2)
+                                    )
+            sim = ScipyLinearLSQ(A_op, y, self.target.prior._box_bounds, 
+                                    max_iter = self.maxit,
+                                    lsmr_maxiter = self.inner_max_it, 
+                                    tol = self.abstol,
+                                    lsmr_tol = self.inner_abstol)
+        elif self.solver == "ScipyMinimizer":
+            # Adapt bounds format, as scipy.minimize requires a bounds format 
+            # different than that in scipy.lsq_linear.
+            bounds = [(self.target.prior._box_bounds[0][i], self.target.prior._box_bounds[1][i]) for i in range(self.target.prior.dim)]
+            # Note that the objective function is defined as 0.5*||Mx-y||^2, 
+            # and the corresponding gradient (gradfunc) is given by M^T(Mx-y).
+            sim = ScipyMinimizer(lambda x: 0.5*np.sum((self.M(x, 1)-y)**2), self.current_point, gradfunc=lambda x: self.M(self.M(x, 1) - y, 2), bounds=bounds, tol=self.abstol, options={"maxiter": self.maxit})
         else:
             raise ValueError("Choice of solver not supported.")
 
diff --git a/cuqi/solver/_solver.py b/cuqi/solver/_solver.py
@@ -196,15 +196,19 @@ class ScipyLSQ(object):
         'trf', Trust Region Reflective algorithm: for large sparse problems with bounds.
         'dogbox', dogleg algorithm with rectangular trust regions, for small problems with bounds.
         'lm', Levenberg-Marquardt algorithm as implemented in MINPACK. Doesn't handle bounds and sparse Jacobians.
+    tol : The numerical tolerance for convergence checks.
+    maxit : The maximum number of iterations.
+    kwargs : Additional keyword arguments passed to scipy's least_squares. Empty by default. See documentation for scipy.optimize.least_squares
     """
-    def __init__(self, func, x0, jacfun='2-point', method='trf', loss='linear', tol=1e-6, maxit=1e4):
+    def __init__(self, func, x0, jacfun='2-point', method='trf', loss='linear', tol=1e-6, maxit=1e4, **kwargs):
         self.func = func
         self.x0 = x0
         self.jacfun = jacfun
         self.method = method
         self.loss = loss
         self.tol = tol
         self.maxit = int(maxit)
+        self.kwargs = kwargs
     
     def solve(self):
         """Runs optimization algorithm and returns solution and info.
@@ -215,7 +219,7 @@ def solve(self):
             Solution found (array_like) and optimization information (dictionary).
         """
         solution = least_squares(self.func, self.x0, jac=self.jacfun, \
-                                method=self.method, loss=self.loss, xtol=self.tol, max_nfev=self.maxit)
+                                method=self.method, loss=self.loss, xtol=self.tol, max_nfev=self.maxit, **self.kwargs)
         info = {"success": solution['success'],
                 "message": solution['message'],
                 "func": solution['fun'],
diff --git a/tests/test_solver.py b/tests/test_solver.py
@@ -100,19 +100,27 @@ def test_ScipyLinearLSQ_with_LinearOperator():
     sol, _ = ScipyLinearLSQ(A_op, b).solve()
     assert np.allclose(sol, ref_sol, rtol=1e-10)
 
-def test_ScipyLinearLSQ_against_FISTA():
+def test_ScipyLinearLSQ_against_ScipyMinimizer_and_against_FISTA():
     A = np.array([[73,71,52],[87,74,46],[72,2,7],[80,89,71]])
     b = np.array([49,67,68,20])
+
+    # solve with ScipyMinimizer
+    def fun(x):
+        return 0.5*np.linalg.norm(A@x-b)**2
+    def jac(x):
+        return A.T@(A@x-b)
+    sol_min, _ = ScipyMinimizer(fun, np.zeros(3), gradfunc=jac, tol=1e-10, bounds=[(0,np.inf),(0,np.inf),(0,np.inf)]).solve()
+
     # solve with ScipyLinearLSQ
-    lb = np.zeros(3)
-    ub = lb + np.inf
-    sol_lsq, _ = ScipyLinearLSQ(A, b, (lb,ub)).solve()
+    sol_lsq, _ = ScipyLinearLSQ(A, b, ([0,0,0],[np.inf,np.inf,np.inf]), tol=1e-10).solve()
+
     # solve with FISTA
     rng = np.random.default_rng(seed = 1219)
     x0 = rng.standard_normal(3)
     sol_fista, _ = FISTA(A, b, lambda x, _: ProjectNonnegative(x), x0, stepsize=1e-7, maxit=100000, abstol=1e-16, adaptive=True).solve()
 
     assert np.allclose(sol_lsq, sol_fista, rtol=1e-8)
+    assert np.allclose(sol_min, sol_lsq, rtol=1e-8)
 
 def test_LM():
     # compare to MATLAB's original code solution
diff --git a/tests/zexperimental/test_mcmc.py b/tests/zexperimental/test_mcmc.py
@@ -1511,6 +1511,31 @@ def test_RegularizedLinearRTO_ScipyLinearLSQ_option_invalid():
     with pytest.raises(ValueError, match="ScipyLinearLSQ"):
         sampler = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, solver = "ScipyLinearLSQ")
 
+def test_RegularizedLinearRTO_ScipyLinearLSQ_against_ScipyMinimizer_and_against_FISTA():
+    # Define LinearModel and data
+    A, y_obs, _ = cuqi.testproblem.Deconvolution1D().get_components()
+
+    # Define Bayesian Problem
+    x = cuqi.implicitprior.NonnegativeGMRF(np.zeros(A.domain_dim), 100)
+    y = cuqi.distribution.Gaussian(A@x, 0.01**2)
+    posterior = cuqi.distribution.JointDistribution(x, y)(y=y_obs)
+
+    # Set up RegularizedLinearRTO with three solvers
+    sampler1 = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, solver="ScipyMinimizer", maxit=1000, tol=1e-8)
+    sampler2 = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, solver="ScipyLinearLSQ", maxit=1000, tol=1e-8)
+    sampler3 = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, solver="FISTA", maxit=1000, tol=1e-8)
+
+    # Sample with fixed seed
+    np.random.seed(0)
+    samples1 = sampler1.sample(5).get_samples()
+    np.random.seed(0)
+    samples2 = sampler2.sample(5).get_samples()
+    np.random.seed(0)
+    samples3 = sampler3.sample(5).get_samples()
+
+    assert np.allclose(samples1.samples.mean(), samples2.samples.mean(), rtol=1e-5)
+    assert np.allclose(samples1.samples.mean(), samples3.samples.mean(), rtol=1e-5)
+
 # ============ Start testing sampler callback ============
 # Samplers that should be tested for callback
 callback_testing_sampler_classes = [