Merge pull request #679 from CUQI-DTU/rto_initial_guess

chaozg · web-flow · commit 1bd165981a9e · 2025-09-12T10:46:36.000+02:00
Allow other options for initial guess in (Regularilzed)LinearRTO steps
diff --git a/cuqi/experimental/mcmc/_rto.py b/cuqi/experimental/mcmc/_rto.py
@@ -36,21 +36,48 @@ class LinearRTO(Sampler):
     tol : float
         Tolerance of the inner CGLS solver. *Optional*.
 
+    inner_initial_point : string or np.ndarray or cuqi.array.CUQIArray
+        Initial point for the inner optimization problem. Can be "previous_sample" (default), "MAP", or a specific numpy or cuqi array. *Optional*.
+        
     callback : callable, optional
         A function that will be called after each sampling step. It can be useful for monitoring the sampler during sampling.
         The function should take three arguments: the sampler object, the index of the current sampling step, the total number of requested samples. The last two arguments are integers. An example of the callback function signature is: `callback(sampler, sample_index, num_of_samples)`.
         
     """
-    def __init__(self, target=None, initial_point=None, maxit=10, tol=1e-6, **kwargs):
+    def __init__(self, target=None, initial_point=None, maxit=10, tol=1e-6, inner_initial_point="previous_sample", **kwargs):
 
         super().__init__(target=target, initial_point=initial_point, **kwargs)
 
         # Other parameters
         self.maxit = maxit
         self.tol = tol
+        self.inner_initial_point = inner_initial_point
 
     def _initialize(self):
         self._precompute()
+        self._compute_map()
+
+    @property
+    def inner_initial_point(self):
+        if isinstance(self._inner_initial_point, str):
+            if self._inner_initial_point == "previous_sample":
+                return self.current_point
+            elif self._inner_initial_point == "map":
+                return self._map
+        else:
+            return self._inner_initial_point
+
+    @inner_initial_point.setter
+    def inner_initial_point(self, value):
+        is_correct_string = (isinstance(value, str) and
+                             (value.lower() == "previous_sample" or
+                              value.lower() == "map"))
+        if is_correct_string:
+            self._inner_initial_point = value.lower()
+        elif isinstance(value, (np.ndarray, cuqi.array.CUQIarray)):
+            self._inner_initial_point = value
+        else:
+            raise ValueError("Invalid value for inner_initial_point. Choose either 'previous_sample', 'MAP', or provide a numpy array/cuqi array.")
 
     @property
     def prior(self):
@@ -78,6 +105,10 @@ def models(self):
         elif isinstance(self.target, cuqi.distribution.MultipleLikelihoodPosterior):
             return self.target.models    
 
+    def _compute_map(self):
+        sim = CGLS(self.M, self.b_tild, self.current_point, self.maxit, self.tol)            
+        self._map, _ = sim.solve()
+
     def _precompute(self):
         L1 = [likelihood.distribution.sqrtprec for likelihood in self.likelihoods]
         L2 = self.prior.sqrtprec
@@ -114,7 +145,7 @@ def M(x, flag):
 
     def step(self):
         y = self.b_tild + np.random.randn(len(self.b_tild))
-        sim = CGLS(self.M, y, self.current_point, self.maxit, self.tol)            
+        sim = CGLS(self.M, y, self.inner_initial_point, self.maxit, self.tol)            
         self.current_point, _ = sim.solve()
         acc = 1
         return acc
@@ -203,12 +234,15 @@ class RegularizedLinearRTO(LinearRTO):
     solver : string
         Options are "FISTA" (default for a single constraint or regularization), "ADMM" (default and the only option for multiple constraints or regularizations), "ScipyLinearLSQ" and "ScipyMinimizer". Note "ScipyLinearLSQ" and "ScipyMinimizer" can only be used with `RegularizedGaussian` of a single `box` or `nonnegativity` constraint. *Optional*.
 
+    inner_initial_point : string or np.ndarray or cuqi.array.CUQIArray
+        Initial point for the inner optimization problem. Can be "previous_sample" (default), "MAP", or a specific numpy or cuqi array. *Optional*.
+
     callback : callable, optional
         A function that will be called after each sampling step. It can be useful for monitoring the sampler during sampling.
         The function should take three arguments: the sampler object, the index of the current sampling step, the total number of requested samples. The last two arguments are integers. An example of the callback function signature is: `callback(sampler, sample_index, num_of_samples)`.
         
     """
-    def __init__(self, target=None, initial_point=None, maxit=100, inner_max_it=10, stepsize="automatic", penalty_parameter=10, abstol=1e-10, adaptive=True, solver=None, inner_abstol=None, **kwargs):
+    def __init__(self, target=None, initial_point=None, maxit=100, inner_max_it=10, stepsize="automatic", penalty_parameter=10, abstol=1e-10, adaptive=True, solver=None, inner_abstol=None, inner_initial_point="previous_sample", **kwargs):
         
         super().__init__(target=target, initial_point=initial_point, **kwargs)
 
@@ -221,13 +255,15 @@ def __init__(self, target=None, initial_point=None, maxit=100, inner_max_it=10,
         self.inner_max_it = inner_max_it
         self.penalty_parameter = penalty_parameter
         self.solver = solver
+        self.inner_initial_point = inner_initial_point
 
     def _initialize(self):
         super()._initialize()
         if self.solver is None:
             self.solver = "FISTA" if callable(self.proximal) else "ADMM"
         if self.solver == "FISTA":
             self._stepsize = self._choose_stepsize()
+        self._compute_map_regularized()
 
     @property
     def solver(self):
@@ -272,15 +308,16 @@ def _choose_stepsize(self):
     def prior(self):
         return self.target.prior.gaussian
 
-    def step(self):
-        y = self.b_tild + np.random.randn(len(self.b_tild))
+    def _compute_map_regularized(self):
+        self._map = self._customized_step(self.b_tild, self.initial_point)
 
+    def _customized_step(self, y, x0):
         if self.solver == "FISTA":
             sim = FISTA(self.M, y, self.proximal,
-                        self.current_point, maxit = self.maxit, stepsize = self._stepsize, abstol = self.abstol, adaptive = self.adaptive)         
+                        x0, maxit = self.maxit, stepsize = self._stepsize, abstol = self.abstol, adaptive = self.adaptive)         
         elif self.solver == "ADMM":
             sim = ADMM(self.M, y, self.proximal,
-                        self.current_point, self.penalty_parameter, maxit = self.maxit, inner_max_it = self.inner_max_it, adaptive = self.adaptive)
+                        x0, self.penalty_parameter, maxit = self.maxit, inner_max_it = self.inner_max_it, adaptive = self.adaptive)
         elif self.solver == "ScipyLinearLSQ":
             A_op = sp.sparse.linalg.LinearOperator((sum([llh.distribution.dim for llh in self.likelihoods])+self.target.prior.dim, self.target.prior.dim),
                                     matvec=lambda x: self.M(x, 1),
@@ -297,10 +334,17 @@ def step(self):
             bounds = [(self.target.prior._box_bounds[0][i], self.target.prior._box_bounds[1][i]) for i in range(self.target.prior.dim)]
             # Note that the objective function is defined as 0.5*||Mx-y||^2, 
             # and the corresponding gradient (gradfunc) is given by M^T(Mx-y).
-            sim = ScipyMinimizer(lambda x: 0.5*np.sum((self.M(x, 1)-y)**2), self.current_point, gradfunc=lambda x: self.M(self.M(x, 1) - y, 2), bounds=bounds, tol=self.abstol, options={"maxiter": self.maxit})
+            sim = ScipyMinimizer(lambda x: 0.5*np.sum((self.M(x, 1)-y)**2), x0, gradfunc=lambda x: self.M(self.M(x, 1) - y, 2), bounds=bounds, tol=self.abstol, options={"maxiter": self.maxit})
         else:
             raise ValueError("Choice of solver not supported.")
+        
+        sol, _ = sim.solve()
+        return sol
+
+    def step(self):
+        y = self.b_tild + np.random.randn(len(self.b_tild))
+
+        self.current_point = self._customized_step(y, self.inner_initial_point)
 
-        self.current_point, _ = sim.solve()
         acc = 1
         return acc
diff --git a/cuqi/experimental/mcmc/_sampler.py b/cuqi/experimental/mcmc/_sampler.py
@@ -148,6 +148,16 @@ def target(self, value):
         if self._target is not None:
             self.validate_target()
 
+    @property
+    def current_point(self):
+        """ The current point of the sampler. """
+        return self._current_point
+
+    @current_point.setter
+    def current_point(self, value):
+        """ Set the current point of the sampler. """
+        self._current_point = value
+
     # ------------ Public methods ------------
     def get_samples(self) -> Samples:
         """ Return the samples. The internal data-structure for the samples is a dynamic list so this creates a copy. """
diff --git a/tests/zexperimental/test_mcmc.py b/tests/zexperimental/test_mcmc.py
@@ -1453,6 +1453,32 @@ def test_RegularizedLinearRTO_ScipyLinearLSQ_option_invalid():
     with pytest.raises(ValueError, match="ScipyLinearLSQ"):
         sampler = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, solver = "ScipyLinearLSQ")
 
+def test_RegularizedLinearRTO_inner_initial_point_setting():
+    # Define LinearModel and data
+    A, y_obs, _ = cuqi.testproblem.Deconvolution1D().get_components()
+
+    # Define Bayesian Problem
+    x = cuqi.implicitprior.NonnegativeGMRF(np.zeros(A.domain_dim), 100)
+    y = cuqi.distribution.Gaussian(A@x, 0.01**2)
+    posterior = cuqi.distribution.JointDistribution(x, y)(y=y_obs)
+
+    # Set up RegularizedLinearRTO with three solvers
+    sampler1 = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, maxit=10, inner_initial_point="previous_sample", tol=1e-8)
+    sampler2 = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, maxit=10, inner_initial_point="MAP", tol=1e-8)
+    sampler3 = cuqi.experimental.mcmc.RegularizedLinearRTO(posterior, maxit=10, inner_initial_point=np.ones(A.domain_dim), tol=1e-8)
+
+    # Sample with fixed seed
+    np.random.seed(0)
+    sampler1.sample(5)
+    np.random.seed(0)
+    sampler2.sample(5)
+    np.random.seed(0)
+    sampler3.sample(5)
+
+    assert np.allclose(sampler1.inner_initial_point, sampler1.current_point, rtol=1e-5)
+    assert np.allclose(sampler2.inner_initial_point, sampler2._map, rtol=1e-5)
+    assert np.allclose(sampler3.inner_initial_point, np.ones(A.domain_dim), rtol=1e-5)
+
 def test_RegularizedLinearRTO_ScipyLinearLSQ_against_ScipyMinimizer_and_against_FISTA():
     # Define LinearModel and data
     A, y_obs, _ = cuqi.testproblem.Deconvolution1D().get_components()