Merge pull request #692 from CUQI-DTU/enable_FD_JointDistribution

amal-ghamdi · web-flow · commit 3b38e05b811f · 2025-10-09T01:04:51.000+03:00
enable FD for JointDistribution
diff --git a/cuqi/density/_density.py b/cuqi/density/_density.py
@@ -143,7 +143,15 @@ def __call__(self, *args, **kwargs):
     def enable_FD(self, epsilon=1e-8):
         """ Enable finite difference approximation for logd gradient. Note
         that if enabled, the FD approximation will be used even if the 
-        _gradient method is implemented. """
+        _gradient method is implemented. 
+        
+        Parameters
+        ----------
+        epsilon : float
+
+        Spacing (step size) to use for finite difference approximation for logd
+        gradient for each variable. Default is 1e-8.
+        """
         self._FD_enabled = True
         self._FD_epsilon = epsilon
 
diff --git a/cuqi/distribution/_joint_distribution.py b/cuqi/distribution/_joint_distribution.py
@@ -84,6 +84,8 @@ def __init__(self, *densities: [Density, cuqi.experimental.algebra.RandomVariabl
         cond_vars = self._get_conditioning_variables()
         if len(cond_vars) > 0:
             raise ValueError(f"Every density parameter must have a distribution (prior). Missing prior for {cond_vars}.")
+        # Initialize finite difference gradient approximation settings
+        self.disable_FD()
 
     # --------- Public properties ---------
     @property
@@ -96,6 +98,38 @@ def geometry(self) -> List[Geometry]:
         """ Returns the geometries of the joint distribution. """
         return [dist.geometry for dist in self._distributions]
 
+    @property
+    def FD_enabled(self):
+        """ Returns a dictionary of keys and booleans indicating for each
+        parameter name (key) if finite difference approximation of the logd
+        gradient is enabled. """
+        par_names = self.get_parameter_names()
+        FD_enabled = {
+            par_name: self.FD_epsilon[par_name] is not None for par_name in par_names
+        }
+        return FD_enabled
+
+    @property
+    def FD_epsilon(self):
+        """ Returns a dictionary indicating for each parameter name the
+        spacing for the finite difference approximation of the logd gradient."""
+        return self._FD_epsilon
+
+    @FD_epsilon.setter
+    def FD_epsilon(self, value):
+        """ Set the spacing for the finite difference approximation of the
+        logd gradient as a dictionary. The keys are the parameter names.
+        The value for each key is either None (no FD approximation) or a float
+        representing the FD step size.
+        """
+        par_names = self.get_parameter_names()
+        if value is None:
+            self._FD_epsilon = {par_name: None for par_name in par_names}
+        else:
+            if set(value.keys()) != set(par_names):
+                raise ValueError("Keys of FD_epsilon must match the parameter names of the distribution "+f" {par_names}")
+            self._FD_epsilon = value
+
     # --------- Public methods ---------
     def logd(self, *args, **kwargs):
         """ Evaluate the un-normalized log density function. """
@@ -136,6 +170,33 @@ def _condition(self, *args, **kwargs): # Public through __call__
         # Can reduce to Posterior, Likelihood or Distribution.
         return new_joint._reduce_to_single_density()
 
+    def enable_FD(self, epsilon=None):
+        """ Enable finite difference approximation for logd gradient. Note
+        that if enabled, the FD approximation will be used even if the 
+        _gradient method is implemented. By default, all parameters
+        will have FD enabled with a step size of 1e-8.
+        
+        Parameters
+        ----------
+        epsilon : dict, *optional*
+
+        Dictionary indicating the spacing (step size) to use for finite
+        difference approximation for logd gradient for each variable.
+
+        Keys are variable names.
+        Values are either a float to enable FD with the given value as the FD
+        step size, or None to disable FD for that variable. Default is 1e-8 for
+        all variables.
+        """
+        if epsilon is None:
+            epsilon = {par_name: 1e-8 for par_name in self.get_parameter_names()}
+        self.FD_epsilon = epsilon
+
+    def disable_FD(self):
+        """ Disable finite difference approximation for logd gradient. """
+        par_names = self.get_parameter_names()
+        self.FD_epsilon = {par_name: None for par_name in par_names}
+
     def get_parameter_names(self) -> List[str]:
         """ Returns the parameter names of the joint distribution. """
         return [dist.name for dist in self._distributions]
@@ -202,34 +263,58 @@ def _reduce_to_single_density(self):
         # Count number of distributions and likelihoods
         n_dist = len(self._distributions)
         n_likelihood = len(self._likelihoods)
+        reduced_FD_epsilon = {par_name:self.FD_epsilon[par_name] for par_name in self.get_parameter_names()}
+        self.enable_FD(epsilon=reduced_FD_epsilon)
 
         # Cant reduce if there are multiple distributions or likelihoods
         if n_dist > 1:
             return self
 
+        # If only evaluated densities left return joint to ensure logd method is available
+        if n_dist == 0 and n_likelihood == 0:
+            return self
+
+        # Extract the parameter name of the distribution
+        if n_dist == 1:
+            par_name = self._distributions[0].name
+        elif n_likelihood == 1:
+            par_name = self._likelihoods[0].name
+        else:
+            par_name = None
+
         # If exactly one distribution and multiple likelihoods reduce
         if n_dist == 1 and n_likelihood > 1:
-            return MultipleLikelihoodPosterior(*self._densities)
-        
+            reduced_distribution = MultipleLikelihoodPosterior(*self._densities)
+            reduced_FD_epsilon = {par_name:self.FD_epsilon[par_name]}
+
         # If exactly one distribution and one likelihood its a Posterior
         if n_dist == 1 and n_likelihood == 1:
             # Ensure parameter names match, otherwise return the joint distribution
             if set(self._likelihoods[0].get_parameter_names()) != set(self._distributions[0].get_parameter_names()):
                 return self
-            return self._add_constants_to_density(Posterior(self._likelihoods[0], self._distributions[0]))
+            reduced_distribution = Posterior(self._likelihoods[0], self._distributions[0])
+            reduced_distribution = self._add_constants_to_density(reduced_distribution)
+            reduced_FD_epsilon = self.FD_epsilon[par_name]
 
         # If exactly one distribution and no likelihoods its a Distribution
         if n_dist == 1 and n_likelihood == 0:
-            return self._add_constants_to_density(self._distributions[0])        
-        
+            # Intentionally skip enabling FD here. If the user wants FD, they
+            # can enable it for this particular distribution before forming
+            # the joint distribution.
+            return self._add_constants_to_density(self._distributions[0])
+
         # If no distributions and exactly one likelihood its a Likelihood
         if n_likelihood == 1 and n_dist == 0:
-            return self._likelihoods[0]
+            # This case seems to not happen in practice, but we include it for
+            # completeness.
+            reduced_distribution = self._likelihoods[0]
+            reduced_FD_epsilon = self.FD_epsilon[par_name] 
+
+        if self.FD_enabled[par_name]:
+            reduced_distribution.enable_FD(epsilon=reduced_FD_epsilon)
+
+        return reduced_distribution
 
-        # If only evaluated densities left return joint to ensure logd method is available
-        if n_dist == 0 and n_likelihood == 0:
-            return self
-        
     def _add_constants_to_density(self, density: Density):
         """ Add the constants (evaluated densities) to a single density. Used when reducing to single density. """
 
@@ -274,7 +359,7 @@ def __repr__(self):
                     if len(cond_vars) > 0:
                         msg += f"|{cond_vars}"
                     msg += ")"
-        
+
         msg += "\n"
         msg += "    Densities: \n"
 
diff --git a/tests/test_joint_distribution.py b/tests/test_joint_distribution.py
@@ -484,4 +484,176 @@ def test_joint_distribution_with_multiple_inputs_model_has_correct_parameter_nam
 
     assert joint_dist(x_dist=x_val, y_dist=y_val, data_dist=np.array([2,2,3])).likelihood.get_parameter_names() == ['z_dist']
     assert joint_dist(x_dist=x_val, z_dist=z_val, data_dist=np.array([2,2,3])).likelihood.get_parameter_names() == ['y_dist']
-    assert joint_dist(y_dist=y_val, z_dist=z_val, data_dist=np.array([2,2,3])).likelihood.get_parameter_names() == ['x_dist']
+    assert joint_dist(y_dist=y_val, z_dist=z_val, data_dist=np.array([2,2,3])).likelihood.get_parameter_names() == ['x_dist']
+
+
+def test_FD_enabled_is_set_correctly():
+    """ Test that FD_enabled property is set correctly in JointDistribution """
+
+    # Create a joint distribution with two distributions
+    d1 = cuqi.distribution.Normal(0, 1, name="x")
+    d2 = cuqi.distribution.Gamma(lambda x: x**2, 1, name="y")
+    J = cuqi.distribution.JointDistribution(d1, d2)
+
+    # Initially FD should be disabled for both
+    assert J.FD_enabled == {"x": False, "y": False}
+
+    # Enable FD for x
+    J.enable_FD(epsilon={"x": 1e-6, "y": None})
+    assert J.FD_enabled == {"x": True, "y": False}
+    assert J.FD_epsilon == {"x": 1e-6, "y": None}
+
+    # Enable FD for y as well
+    J.enable_FD(epsilon={"x": 1e-6, "y": 1e-5})
+    assert J.FD_enabled == {"x": True, "y": True}
+    assert J.FD_epsilon == {"x": 1e-6, "y": 1e-5}
+
+    # Disable FD for x
+    J.enable_FD(epsilon={"x": None, "y": 1e-5})
+    assert J.FD_enabled == {"x": False, "y": True}
+    assert J.FD_epsilon == {"x": None, "y": 1e-5}
+
+    # Disable FD for all
+    J.disable_FD()
+    assert J.FD_enabled == {"x": False, "y": False}
+    assert J.FD_epsilon == {"x": None, "y": None}
+
+    # Enable FD and reduce to single density
+    J.enable_FD() # Enable FD for all
+    J_given_x = J(x=0)
+    J_given_y = J(y=1)
+
+    # Check types and FD_enabled status of J_given_x
+    assert isinstance(J_given_x, cuqi.distribution.Gamma)
+    assert not J_given_x.FD_enabled # intentionally disabled for single remaining
+                                    # distribution
+    assert J_given_x.FD_epsilon == None
+
+    # Check types and FD_enabled status of J_given_y
+    assert isinstance(J_given_y, cuqi.distribution.Posterior)
+    assert J_given_y.FD_enabled
+    assert J_given_y.FD_epsilon == 1e-8 # Default epsilon for remaining density
+
+    # Catch error if epsilon keys do not match parameter names
+    with pytest.raises(ValueError, match=r"Keys of FD_epsilon must match"):
+        J.enable_FD(epsilon={"x": 1e-6}) # Missing "y" key
+
+def test_FD_enabled_is_set_correctly_for_stacked_joint_distribution():
+    """ Test that FD_enabled property is set correctly in JointDistribution """
+
+    # Create a joint distribution with two distributions
+    x = cuqi.distribution.Normal(0, 1, name="x")
+    y = cuqi.distribution.Uniform(1, 2, name="y")
+    J = cuqi.distribution._StackedJointDistribution(x, y)
+    J.enable_FD(epsilon={"x": 1e-6, "y": None})
+
+    assert J.FD_enabled == {"x": True, "y": False}
+    assert J.FD_epsilon == {"x": 1e-6, "y": None}
+
+    # Reduce to single density (substitute y)
+    J_given_y = J(y=1.5)
+    assert isinstance(J_given_y, cuqi.distribution.Normal)
+    assert J_given_y.FD_enabled == False # Intentionally disabled for
+                                         # single remaining
+                                         # distribution
+    assert J_given_y.FD_epsilon is None
+
+    # Reduce to single density (substitute x)
+    J_given_x = J(x=0)
+    assert isinstance(J_given_x, cuqi.distribution.Uniform)
+    assert J_given_x.FD_enabled == False
+    assert J_given_x.FD_epsilon is None
+
+
+
+@pytest.mark.parametrize(
+    "densities,kwargs,fd_epsilon,expected_type,expected_fd_enabled",
+    [
+        # Case 0: Single Distribution, FD enabled
+        (
+            [cuqi.distribution.Normal(np.zeros(3), 1, name="x")],
+            {},
+            {"x": 1e-5},
+            cuqi.distribution.Normal,
+            False,  # Intentionally disabled for single remaining distribution
+        ),
+        # Case 1: Single Distribution, FD disabled
+        (
+            [cuqi.distribution.Normal(np.zeros(3), 1, name="x")],
+            {},
+            {"x": None},
+            cuqi.distribution.Normal,
+            False,
+        ),
+        # Case 2: Distribution + Data distribution, substitute y
+        (
+            [
+                cuqi.distribution.Normal(np.zeros(3), 1, name="x"),
+                cuqi.distribution.Gaussian(lambda x: x**2, np.ones(3), name="y"),
+            ],
+            {"y": np.ones(3)},
+            {"x": 1e-6, "y": 1e-7},
+            cuqi.distribution.Posterior,
+            True,
+        ),
+        # Case 3: Distribution + data distribution, substitute x
+        (
+            [
+                cuqi.distribution.Normal(np.zeros(3), 1, name="x"),
+                cuqi.distribution.Gaussian(lambda x: x**2, np.ones(3), name="y"),
+            ],
+            {"x": np.ones(3)},
+            {"x": 1e-5, "y": 1e-6},
+            cuqi.distribution.Distribution,
+            False,  # Intentionally disabled for single remaining distribution
+        ),
+        # Case 4: Multiple data distributions + prior (MultipleLikelihoodPosterior)
+        (
+            [
+                cuqi.distribution.Normal(np.zeros(3), 1, name="x"),
+                cuqi.distribution.Gaussian(lambda x: x, np.ones(3), name="y1"),
+                cuqi.distribution.Gaussian(lambda x: x + 1, np.ones(3), name="y2"),
+            ],
+            {"y1": np.ones(3), "y2": np.ones(3)},
+            {"x": 1e-5, "y1": 1e-6, "y2": 1e-7},
+            cuqi.distribution.MultipleLikelihoodPosterior,
+            {"x": True},
+        ),
+        # Case 5: Distribution, substitute x
+        (
+            [cuqi.distribution.Normal(np.zeros(3), 1, name="x")],
+            {"x": np.ones(3)},
+            {"x": 1e-8},
+            cuqi.distribution.JointDistribution,
+            {},
+        ),
+    ],
+)
+def test_fd_enabled_of_joint_distribution_after_substitution_is_correct(
+    densities, kwargs, fd_epsilon, expected_type, expected_fd_enabled
+):
+    """ Test that FD_enabled and FD_epsilon properties are set correctly in JointDistribution even after substitution."""
+    joint = cuqi.distribution.JointDistribution(*densities)
+    joint.enable_FD(epsilon=fd_epsilon)
+
+    # Assert FD_epsilon is set correctly
+    assert joint.FD_epsilon == fd_epsilon
+
+    # Substitute parameters (if any), which reduces the joint distribution
+    reduced = joint(**kwargs)
+
+    # Assert the type and FD_enabled status of the reduced distribution
+    assert isinstance(reduced, expected_type)
+    assert reduced.FD_enabled == expected_fd_enabled
+
+    # Assert FD_epsilon is set correctly in the reduced distribution
+    if expected_fd_enabled is not False:
+        fd_epsilon_reduced = {
+            k: v for k, v in fd_epsilon.items() if k not in kwargs.keys()
+        }
+        if len(fd_epsilon_reduced) == 1 and not isinstance(
+            reduced, cuqi.distribution.MultipleLikelihoodPosterior
+        ):
+            # Single value instead of dict in this case
+            fd_epsilon_reduced = list(fd_epsilon_reduced.values())[0]
+        assert reduced.FD_epsilon == fd_epsilon_reduced
diff --git a/tests/zexperimental/test_mcmc.py b/tests/zexperimental/test_mcmc.py