Merge pull request #683 from CUQI-DTU/improve_NUTS_statefulness_new

amal-ghamdi · web-flow · commit 2cf72ec9af9a · 2025-09-19T16:37:46.000+03:00
Ensure NUTS statefulness within Gibbs
diff --git a/cuqi/distribution/_posterior.py b/cuqi/distribution/_posterior.py
@@ -1,5 +1,6 @@
 from cuqi.geometry import _DefaultGeometry, _get_identity_geometries
 from cuqi.distribution import Distribution
+from cuqi.density import Density
 
 # ========================================================================
 class Posterior(Distribution):
@@ -25,6 +26,14 @@ def __init__(self, likelihood, prior, **kwargs):
         self.prior = prior 
         super().__init__(**kwargs)
 
+    def get_density(self, name) -> Density:
+        """ Return a density with the given name. """
+        if name == self.likelihood.name:
+            return self.likelihood
+        if name == self.prior.name:
+            return self.prior
+        raise ValueError(f"No density with name {name}.")
+
     @property
     def data(self):
         return self.likelihood.data
diff --git a/cuqi/experimental/mcmc/_gibbs.py b/cuqi/experimental/mcmc/_gibbs.py
@@ -1,7 +1,6 @@
-from cuqi.distribution import JointDistribution
+from cuqi.distribution import JointDistribution, Posterior
 from cuqi.experimental.mcmc import Sampler
 from cuqi.samples import Samples, JointSamples
-from cuqi.experimental.mcmc import NUTS
 from typing import Dict
 import numpy as np
 import warnings
@@ -36,11 +35,10 @@ class HybridGibbs:
     Gelman et al. "Bayesian Data Analysis" (2014), Third Edition
     for more details.
 
-    In each Gibbs step, the corresponding sampler has the initial_point 
-    and initial_scale (if applicable) set to the value of the previous step
-    and the sampler is reinitialized. This means that the sampling is not 
-    fully stateful at this point. This means samplers like NUTS will lose
-    their internal state between Gibbs steps.
+    In each Gibbs step, the corresponding sampler state and history are stored, 
+    then the sampler is reinitialized. After reinitialization, the sampler state
+    and history are set back to the stored values. This ensures preserving the 
+    statefulness of the samplers.
 
     The order in which the conditionals are sampled is the order of the
     variables in the sampling strategy, unless a different sampling order
@@ -177,8 +175,8 @@ def scan_order(self):
     # ------------ Public methods ------------
     def validate_targets(self):
         """ Validate each of the conditional targets used in the Gibbs steps """
-        if not isinstance(self.target, JointDistribution):
-            raise ValueError('Target distribution must be a JointDistribution.')
+        if not isinstance(self.target, (JointDistribution, Posterior)):
+            raise ValueError('Target distribution must be a JointDistribution or Posterior.')
         for sampler in self.samplers.values():
             sampler.validate_target()
 
@@ -257,19 +255,15 @@ def step(self):
             # before reinitializing the sampler and then set the state and history back to the sampler
 
             # Extract state and history from sampler
-            if isinstance(sampler, NUTS): # Special case for NUTS as it is not playing nice with get_state and get_history
-                sampler.initial_point = sampler.current_point
-            else:
-                sampler_state = sampler.get_state()
-                sampler_history = sampler.get_history()
+            sampler_state = sampler.get_state()
+            sampler_history = sampler.get_history()
 
             # Reinitialize sampler
             sampler.reinitialize()
 
             # Set state and history back to sampler
-            if not isinstance(sampler, NUTS): # Again, special case for NUTS.
-                sampler.set_state(sampler_state)
-                sampler.set_history(sampler_history)
+            sampler.set_state(sampler_state)
+            sampler.set_history(sampler_history)
 
             # Allow for multiple sampling steps in each Gibbs step
             for _ in range(self.num_sampling_steps[par_name]):
@@ -309,8 +303,6 @@ def _call_callback(self, sample_index, num_of_samples):
     def _initialize_samplers(self):
         """ Initialize samplers """
         for sampler in self.samplers.values():
-            if isinstance(sampler, NUTS):
-                print(f'Warning: NUTS sampler is not fully stateful in HybridGibbs. Sampler will be reinitialized in each Gibbs step.')
             sampler.initialize()
 
     def _initialize_num_sampling_steps(self):
diff --git a/cuqi/experimental/mcmc/_hmc.py b/cuqi/experimental/mcmc/_hmc.py
@@ -118,16 +118,18 @@ def _initialize(self):
         # to epsilon_bar for the remaining sampling steps.
         if self.step_size is None:
             self._epsilon = self._FindGoodEpsilon()
+            self.step_size = self._epsilon
         else:
             self._epsilon = self.step_size
+
         self._epsilon_bar = "unset"
 
         # Parameter mu, does not change during the run
         self._mu = np.log(10*self._epsilon)
 
         self._H_bar = 0
 
-        # NUTS run diagnostic:
+        # NUTS run diagnostics
         # number of tree nodes created each NUTS iteration
         self._num_tree_node = 0
 
diff --git a/tests/data/s_x_NUTS_within_HybridGibbs.npz b/tests/data/s_x_NUTS_within_HybridGibbs.npz
diff --git a/tests/zexperimental/test_mcmc.py b/tests/zexperimental/test_mcmc.py
@@ -252,7 +252,6 @@ def create_lmrf_prior_target(dim=16):
     return cuqi.distribution.JointDistribution(x, y)(y=y_data)
 
 
-
 @pytest.mark.parametrize("target_dim", [16, 128])
 def test_UGLA_regression_sample(target_dim):
     """Test the UGLA sampler regression."""
@@ -324,7 +323,7 @@ def test_NUTS_regression_warmup(target: cuqi.density.Density):
                                         Ns=Ns,
                                         Nb=Nb,
                                         strategy="NUTS")
-    
+
 # ============= MYULA ==============
 def create_myula_target(dim=16):
     """Create a target for MYULA."""
@@ -419,7 +418,7 @@ def create_conjugate_target(type:str):
     cuqi.experimental.mcmc.ConjugateApprox(create_conjugate_target("LMRF-Gamma")),
     cuqi.experimental.mcmc.NUTS(cuqi.testproblem.Deconvolution1D(dim=10).posterior, max_depth=4)
 ]
-    
+
 # List of samplers from cuqi.experimental.mcmc that should be skipped for checkpoint testing
 skip_checkpoint = [
     cuqi.experimental.mcmc.Sampler,
@@ -967,8 +966,6 @@ def HybridGibbs_target_1():
 def test_NUTS_within_HybridGibbs_regression_sample_and_warmup(copy_reference):
     """ Test that using NUTS sampler within HybridGibbs sampler works as
     expected."""
-    #TODO: This test might break in the future if the NUTS within HybridGibbs
-    # is changed to be fully stateful.
 
     Nb=10
     Ns=10
@@ -982,7 +979,7 @@ def test_NUTS_within_HybridGibbs_regression_sample_and_warmup(copy_reference):
 
     # Here we do 1 internal steps with NUTS for each Gibbs step
     num_sampling_steps = {
-        "x" : 1,
+        "x" : 2,
         "s" : 1
     }
 
@@ -1080,7 +1077,7 @@ def test_nuts_acceptance_rate(sampler: cuqi.experimental.mcmc.Sampler):
     acc_rate_sum = sum(sampler._acc[2:])
 
     assert np.isclose(counter, acc_rate_sum), "NUTS sampler does not update acceptance rate correctly: "+str(counter)+" != "+str(acc_rate_sum)
-    
+
 # ============ Testing of AffineModel with RTO-type samplers ============
 
 def test_LinearRTO_with_AffineModel_is_equivalent_to_LinearModel_and_shifted_data():
@@ -1623,3 +1620,81 @@ def test_gibbs_scan_order():
     
     sampler = cuqi.experimental.mcmc.HybridGibbs(target, sampling_strategy, scan_order=['x', 's'])
     assert sampler.scan_order == ['x', 's']
+
+@pytest.mark.parametrize("step_size", [None, 0.1])
+@pytest.mark.parametrize("num_sampling_steps_x", [1, 5])
+@pytest.mark.parametrize("nb", [5, 20])
+def test_NUTS_within_Gibbs_consistant_with_NUTS(step_size, num_sampling_steps_x, nb):
+    """ Test that using NUTS sampler within HybridGibbs sampler is consistant
+    with using NUTS sampler alone for sampling and tuning. This test ensures 
+    NUTS within HybridGibbs statefulness.
+    """
+
+    ns = 15 # number of sampling steps
+    tune_freq = 0.1
+
+    np.random.seed(0)
+    # Forward problem
+    A, y_data, info = cuqi.testproblem.Deconvolution1D(
+        dim=5, phantom='sinc', noise_std=0.001).get_components()
+
+    # Bayesian Inverse Problem
+    x = cuqi.distribution.GMRF(np.zeros(A.domain_dim), 50)
+    y = cuqi.distribution.Gaussian(A@x, 0.001**2)
+
+    # Posterior
+    target = cuqi.distribution.JointDistribution(y, x)(y=y_data)
+    
+    # Sample with NUTS within HybridGibbs
+    np.random.seed(0)
+    sampling_strategy = {
+        "x" : cuqi.experimental.mcmc.NUTS(max_depth=4, step_size=step_size)
+    }
+
+    num_sampling_steps = {
+    "x" : num_sampling_steps_x
+    }
+
+    sampler_gibbs = cuqi.experimental.mcmc.HybridGibbs(target,
+                                                       sampling_strategy,
+                                                       num_sampling_steps)
+    sampler_gibbs.warmup(nb, tune_freq=tune_freq)
+    sampler_gibbs.sample(ns)
+    samples_gibbs = sampler_gibbs.get_samples()["x"].samples
+
+    # Sample with NUTS alone
+    np.random.seed(0)
+    sampler_nuts = cuqi.experimental.mcmc.NUTS(target,
+                                               max_depth=4,
+                                               step_size=step_size)
+    # Warm up (when num_sampling_steps_x>0, we do not using built-in warmup
+    #          in order to control number of steps between tuning steps to
+    #          match Gibbs sampling behavior)
+    if num_sampling_steps_x == 1:
+        sampler_nuts.warmup(nb, tune_freq=tune_freq)
+    else:
+        tune_interval = max(int(tune_freq * nb), 1)
+        for count in range(nb):
+            for _ in range(num_sampling_steps_x):
+                sampler_nuts.sample(1)
+            if (count+1) % tune_interval == 0:
+                sampler_nuts.tune(None, count//tune_interval)
+    # Sample
+    sampler_nuts.sample(ns * num_sampling_steps_x)
+    samples_nuts = sampler_nuts.get_samples().samples
+    # skip every num_sampling_steps_x samples to match Gibbs samples
+    samples_nuts_skip = samples_nuts[:, num_sampling_steps_x - 1::num_sampling_steps_x]
+
+    # assert warmup samples are correct:
+    assert np.allclose(
+        samples_gibbs[:, :nb],
+        samples_nuts_skip[:, :nb],
+        rtol=1e-5,
+    )
+
+    # assert samples are correct:
+    assert np.allclose(
+        samples_gibbs[:, nb:],
+        samples_nuts_skip[:, nb:],
+        rtol=1e-5,
+    )