Merge pull request #690 from CUQI-DTU/feature-online-thinning

chaozg · web-flow · commit f08eb2ae4f6f · 2025-09-21T15:00:42.000+02:00
add online thinning option to samplers
diff --git a/cuqi/experimental/mcmc/_sampler.py b/cuqi/experimental/mcmc/_sampler.py
@@ -203,13 +203,16 @@ def load_checkpoint(self, path):
 
         self.set_state(state)
 
-    def sample(self, Ns, batch_size=0, sample_path='./CUQI_samples/') -> 'Sampler':
+    def sample(self, Ns, Nt=1, batch_size=0, sample_path='./CUQI_samples/') -> 'Sampler':
         """ Sample Ns samples from the target density.
 
         Parameters
         ----------
         Ns : int
             The number of samples to draw.
+        
+        Nt : int, optional, default=1
+            The thinning interval. If Nt >= 1, every Nt'th sample is stored. The larger Nt, the fewer samples are stored.
 
         batch_size : int, optional
             The batch size for saving samples to disk. If 0, no batching is used. If positive, samples are saved to disk in batches of the specified size.
@@ -233,7 +236,8 @@ def sample(self, Ns, batch_size=0, sample_path='./CUQI_samples/') -> 'Sampler':
 
             # Store samples
             self._acc.append(acc)
-            self._samples.append(self.current_point)
+            if (Nt > 0) and ((idx + 1) % Nt == 0):
+                self._samples.append(self.current_point)
 
             # display acc rate at progress bar
             pbar.set_postfix_str(f"acc rate: {np.mean(self._acc[-1-idx:]):.2%}")
@@ -248,14 +252,17 @@ def sample(self, Ns, batch_size=0, sample_path='./CUQI_samples/') -> 'Sampler':
         return self
     
 
-    def warmup(self, Nb, tune_freq=0.1) -> 'Sampler':
+    def warmup(self, Nb, Nt=1, tune_freq=0.1) -> 'Sampler':
         """ Warmup the sampler by drawing Nb samples.
 
         Parameters
         ----------
         Nb : int
             The number of samples to draw during warmup.
 
+        Nt : int, optional, default=1
+            The thinning interval. If Nt >= 1, every Nt'th sample is stored. The larger Nt, the fewer samples are stored.
+            
         tune_freq : float, optional
             The frequency of tuning. Tuning is performed every tune_freq*Nb samples.
 
@@ -278,7 +285,8 @@ def warmup(self, Nb, tune_freq=0.1) -> 'Sampler':
 
             # Store samples
             self._acc.append(acc)
-            self._samples.append(self.current_point)
+            if (Nt > 0) and ((idx + 1) % Nt == 0):
+                self._samples.append(self.current_point)
 
             # display acc rate at progress bar
             pbar.set_postfix_str(f"acc rate: {np.mean(self._acc[-1-idx:]):.2%}")
diff --git a/tests/zexperimental/test_mcmc.py b/tests/zexperimental/test_mcmc.py
@@ -1621,6 +1621,41 @@ def test_gibbs_scan_order():
     sampler = cuqi.experimental.mcmc.HybridGibbs(target, sampling_strategy, scan_order=['x', 's'])
     assert sampler.scan_order == ['x', 's']
 
+def test_online_thinning_with_mala_and_rto():
+
+    # Define LinearModel and data
+    A, y_obs, _ = cuqi.testproblem.Deconvolution1D().get_components()
+
+    # Define Bayesian Problem
+    x = cuqi.distribution.GMRF(np.zeros(A.domain_dim), 100)
+    y = cuqi.distribution.Gaussian(A@x, 0.01**2)
+    posterior = cuqi.distribution.JointDistribution(x, y)(y=y_obs)
+
+    # Set up MALA and RTO samplers
+    sampler_mala_1 = cuqi.experimental.mcmc.MALA(posterior, scale=0.01)
+    sampler_mala_2 = cuqi.experimental.mcmc.MALA(posterior, scale=0.01)
+    sampler_rto_1 = cuqi.experimental.mcmc.LinearRTO(posterior, maxit=1000, tol=1e-8)
+    sampler_rto_2 = cuqi.experimental.mcmc.LinearRTO(posterior, maxit=1000, tol=1e-8)
+
+    # Sample MALA and RTO with fixed seed, but different online thinning Nt
+    np.random.seed(0)
+    samples_mala_1 = sampler_mala_1.sample(100,Nt=5).get_samples()
+    np.random.seed(0)
+    samples_mala_2 = sampler_mala_2.sample(100,Nt=1).get_samples()
+    np.random.seed(0)
+    samples_rto_1 = sampler_rto_1.sample(100,Nt=5).get_samples()
+    np.random.seed(0)
+    samples_rto_2 = sampler_rto_2.sample(100,Nt=1).get_samples()
+
+    # Check that the samples are the same for MALA
+    assert np.allclose(samples_mala_1.samples[:,0], samples_mala_2.samples[:,4], rtol=1e-8)
+    assert np.allclose(samples_mala_1.samples[:,1], samples_mala_2.samples[:,9], rtol=1e-8)
+    assert np.allclose(samples_mala_1.samples[:,2], samples_mala_2.samples[:,14], rtol=1e-8)
+    # Check that the samples are the same for RTO
+    assert np.allclose(samples_rto_1.samples[:,0], samples_rto_2.samples[:,4], rtol=1e-8)
+    assert np.allclose(samples_rto_1.samples[:,1], samples_rto_2.samples[:,9], rtol=1e-8)
+    assert np.allclose(samples_rto_1.samples[:,2], samples_rto_2.samples[:,14], rtol=1e-8)
+
 @pytest.mark.parametrize("step_size", [None, 0.1])
 @pytest.mark.parametrize("num_sampling_steps_x", [1, 5])
 @pytest.mark.parametrize("nb", [5, 20])