Replace FitGPyTorchMLL dispatcher with isinstance checks (#3233)

esantorella · meta-codesync[bot] · commit b6d71f1fa29f · 2026-03-17T09:48:45.000-07:00
Summary: Pull Request resolved: #3233 **Context**: See D96592835 for stack overview. **This PR**: Replace the FitGPyTorchMLL multiple-dispatch mechanism with simple `isinstance` checks in `fit_gpytorch_mll`, using the `custom_fit` method defined in D96592835. This reduces stack depth and makes the fitting code path easier to follow and debug. Changes: - fit_gpytorch_mll now uses isinstance checks to route to _fit_list (for SumMarginalLogLikelihood + ModelListGP), _fit_fallback_approximate (for _ApproximateMarginalLogLikelihood), or _fit_fallback (default). - Removed FitGPyTorchMLL dispatcher and its import of Dispatcher. - Converted RobustRelevancePursuitMixin from two FitGPyTorchMLL.register calls to a custom_fit method on the mixin class. - Simplified _fit_fallback, _fit_list, _fit_fallback_approximate signatures by removing unused type arguments that were only needed for dispatching. Reviewed By: saitcakmak Differential Revision: D96592852 fbshipit-source-id: 57e7e58a421fc055637f1e9fefaf8b8ad654a819
diff --git a/botorch/fit.py b/botorch/fit.py
@@ -19,7 +19,6 @@
 from botorch.exceptions.warnings import OptimizationWarning
 from botorch.logging import logger
 from botorch.models import SingleTaskGP
-from botorch.models.approximate_gp import ApproximateGPyTorchModel
 from botorch.models.fully_bayesian import AbstractFullyBayesianSingleTaskGP
 from botorch.models.fully_bayesian_multitask import SaasFullyBayesianMultiTaskGP
 from botorch.models.map_saas import get_map_saas_model
@@ -39,8 +38,6 @@
     parameter_rollback_ctx,
     TensorCheckpoint,
 )
-from botorch.utils.dispatcher import Dispatcher, type_bypassing_encoder
-from gpytorch.likelihoods import Likelihood
 from gpytorch.mlls._approximate_mll import _ApproximateMarginalLogLikelihood
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
 from gpytorch.mlls.marginal_log_likelihood import MarginalLogLikelihood
@@ -73,7 +70,6 @@ def _rethrow_warn(w: WarningMessage) -> bool:
     debug=_debug_warn,
     rethrow=_rethrow_warn,
 )
-FitGPyTorchMLL = Dispatcher("fit_gpytorch_mll", encoder=type_bypassing_encoder)
 
 
 def fit_gpytorch_mll(
@@ -86,18 +82,22 @@ def fit_gpytorch_mll(
 ) -> MarginalLogLikelihood:
     r"""Clearing house for fitting models passed as GPyTorch MarginalLogLikelihoods.
 
+    If a model defines a ``custom_fit`` method, it will be called directly.
+    Otherwise, a fit method is determined based on the types of the model and
+    MLL.
+
     Args:
         mll: A GPyTorch MarginalLogLikelihood instance.
         closure: Forward-backward closure for obtaining objective values and gradients.
             Responsible for setting parameters' ``grad`` attributes. If no closure is
             provided, one will be obtained by calling ``get_loss_closure_with_grads``.
         optimizer: User specified optimization algorithm. When ``optimizer is None``,
-            this keyword argument is omitted when calling the dispatcher.
+            this keyword argument is omitted when calling the underlying fit routine.
         closure_kwargs: Keyword arguments passed when calling ``closure``.
         optimizer_kwargs: A dictionary of keyword arguments passed when
             calling ``optimizer``.
-        **kwargs: Keyword arguments passed down through the dispatcher to
-            fit subroutines. Unexpected keywords are ignored.
+        **kwargs: Keyword arguments passed to the underlying fit routine.
+            Unexpected keywords are ignored.
 
     Returns:
         The ``mll`` instance. If fitting succeeded, then ``mll`` will be in
@@ -116,22 +116,38 @@ def fit_gpytorch_mll(
             **kwargs,
         )
 
-    return FitGPyTorchMLL(
-        mll,
-        type(mll.likelihood),
-        type(mll.model),
+    if isinstance(mll, SumMarginalLogLikelihood) and isinstance(mll.model, ModelListGP):
+        mll.train()
+        for sub_mll in mll.mlls:
+            fit_gpytorch_mll(
+                mll=sub_mll,
+                closure=closure,
+                closure_kwargs=closure_kwargs,
+                optimizer_kwargs=optimizer_kwargs,
+                **kwargs,
+            )
+        return mll.eval() if not any(sub_mll.training for sub_mll in mll.mlls) else mll
+
+    if isinstance(mll, _ApproximateMarginalLogLikelihood):
+        return _fit_fallback_approximate(
+            mll=mll,
+            closure=closure,
+            closure_kwargs=closure_kwargs,
+            optimizer_kwargs=optimizer_kwargs,
+            **kwargs,
+        )
+
+    return _fit_fallback(
+        mll=mll,
         closure=closure,
         closure_kwargs=closure_kwargs,
         optimizer_kwargs=optimizer_kwargs,
         **kwargs,
     )
 
 
-@FitGPyTorchMLL.register(MarginalLogLikelihood, object, object)
 def _fit_fallback(
     mll: MarginalLogLikelihood,
-    _: type[object],
-    __: type[object],
     *,
     closure: Callable[[], tuple[Tensor, Sequence[Tensor | None]]] | None = None,
     optimizer: Callable = fit_gpytorch_mll_scipy,
@@ -272,35 +288,8 @@ def _fit_fallback(
     raise ModelFittingError("All attempts to fit the model have failed.")
 
 
-@FitGPyTorchMLL.register(SumMarginalLogLikelihood, object, ModelListGP)
-def _fit_list(
-    mll: SumMarginalLogLikelihood,
-    _: type[Likelihood],
-    __: type[ModelListGP],
-    **kwargs: Any,
-) -> SumMarginalLogLikelihood:
-    r"""Fitting routine for lists of independent Gaussian processes.
-
-    Args:
-        **kwargs: Passed to each of ``mll.mlls``.
-
-    Returns:
-        The ``mll`` instance. If fitting succeeded for all of ``mll.mlls``,
-        then ``mll`` will be in evaluation mode, i.e. ``mll.training == False``.
-        Otherwise, ``mll`` will be in training mode.
-    """
-    mll.train()
-    for sub_mll in mll.mlls:
-        fit_gpytorch_mll(sub_mll, **kwargs)
-
-    return mll.eval() if not any(sub_mll.training for sub_mll in mll.mlls) else mll
-
-
-@FitGPyTorchMLL.register(_ApproximateMarginalLogLikelihood, object, object)
 def _fit_fallback_approximate(
     mll: _ApproximateMarginalLogLikelihood,
-    _: type[Likelihood],
-    __: type[ApproximateGPyTorchModel],
     *,
     closure: Callable[[], tuple[Tensor, Sequence[Tensor | None]]] | None = None,
     data_loader: DataLoader | None = None,
@@ -342,7 +331,7 @@ def _fit_fallback_approximate(
             else fit_gpytorch_mll_torch
         )
 
-    return _fit_fallback(mll, _, __, closure=closure, optimizer=optimizer, **kwargs)
+    return _fit_fallback(mll=mll, closure=closure, optimizer=optimizer, **kwargs)
 
 
 def fit_fully_bayesian_model_nuts(
diff --git a/botorch/models/robust_relevance_pursuit_model.py b/botorch/models/robust_relevance_pursuit_model.py
@@ -38,7 +38,6 @@
 
 import torch
 from botorch.exceptions.errors import UnsupportedError
-from botorch.fit import FitGPyTorchMLL
 from botorch.models import SingleTaskGP
 from botorch.models.likelihoods.sparse_outlier_noise import (
     SparseOutlierGaussianLikelihood,
@@ -157,6 +156,103 @@ def load_standard_model(self, standard_model: Model) -> Self:
         self.load_state_dict(standard_model.state_dict())
         return self
 
+    def custom_fit(
+        self,
+        mll: MarginalLogLikelihood,
+        *,
+        numbers_of_outliers: list[int] | None = None,
+        fractions_of_outliers: list[float] | None = None,
+        timeout_sec: float | None = None,
+        relevance_pursuit_optimizer: Callable = backward_relevance_pursuit,
+        reset_parameters: bool = True,
+        reset_dense_parameters: bool = False,
+        closure: Callable[[], tuple[Tensor, Sequence[Tensor | None]]] | None = None,
+        optimizer: Callable | None = None,
+        closure_kwargs: dict[str, Any] | None = None,
+        optimizer_kwargs: Mapping[str, Any] | None = None,
+    ) -> MarginalLogLikelihood:
+        """Fits a RobustRelevancePursuitGP model using the given marginal likelihood.
+
+        For details, see [Ament2024pursuit]_ or https://arxiv.org/abs/2410.24222.
+
+        Args:
+            mll: The marginal likelihood to fit.
+            numbers_of_outliers: An optional list of numbers of outliers to consider
+                during relevance pursuit. By default, the algorithm falls back to a
+                default list of fractions of outliers, see below.
+            fractions_of_outliers: An optional list of fractions of outliers to
+                consider if numbers_of_outliers is None. By default, the algorithm
+                uses ``[0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0]``.
+            relevance_pursuit_optimizer: The relevance pursuit optimizer to use.
+            reset_parameters: If True, reset sparse parameters after each iteration.
+            reset_dense_parameters: If True, reset dense parameters after each
+                iteration.
+            closure: A closure to compute loss and gradients.
+            optimizer: The numerical optimizer.
+            closure_kwargs: Additional arguments to pass to the closure.
+            optimizer_kwargs: Additional arguments to pass to fit_gpytorch_mll.
+
+        Returns:
+            The fitted marginal likelihood.
+        """
+        if isinstance(mll, _ApproximateMarginalLogLikelihood):
+            raise UnsupportedError(
+                "Relevance Pursuit does not yet support approximate inference. "
+            )
+
+        sparse_module = SparseOutlierNoise._from_model(mll.model)
+        n = sparse_module.dim  # equal to the number of training data points
+
+        if numbers_of_outliers is None:
+            if fractions_of_outliers is None:
+                fractions_of_outliers = FRACTIONS_OF_OUTLIERS
+
+            # list from which BMC chooses
+            numbers_of_outliers = [int(p * n) for p in fractions_of_outliers]
+
+        optimizer_kwargs_: dict[str, Any] = (
+            {} if optimizer_kwargs is None else dict(optimizer_kwargs)
+        )
+        if timeout_sec is not None:
+            optimizer_kwargs_["timeout_sec"] = timeout_sec / len(numbers_of_outliers)
+
+        # Need to convert model to avoid recursion through fit_gpytorch_mll,
+        # since relevance pursuit expects to call the base fit_gpytorch_mll.
+        original_model = mll.model  # Robust Relevance Pursuit Model
+        mll.model = original_model.to_standard_model()
+        sparse_module = SparseOutlierNoise._from_model(mll.model)
+        sparse_module, model_trace = relevance_pursuit_optimizer(
+            sparse_module=sparse_module,
+            mll=mll,
+            sparsity_levels=numbers_of_outliers,
+            reset_parameters=reset_parameters,
+            reset_dense_parameters=reset_dense_parameters,
+            record_model_trace=True,
+            # These are the args of the canonical mll fit routine
+            closure=closure,
+            optimizer=optimizer,
+            closure_kwargs=closure_kwargs,
+            optimizer_kwargs=optimizer_kwargs_,
+        )
+
+        # Bayesian model comparison
+        bmc_support_sizes, bmc_probabilities = get_posterior_over_support(
+            SparseOutlierNoise,
+            model_trace,
+            prior_mean_of_support=original_model.prior_mean_of_support,
+        )
+        map_index = torch.argmax(bmc_probabilities)
+        map_model = model_trace[map_index]  # choosing model with highest BMC score
+        # overwrite mll.model with chosen model
+        mll.model = original_model  # first restore original model pointer
+        mll.model.load_standard_model(map_model)
+        # Store the bmc results
+        mll.model.bmc_support_sizes = bmc_support_sizes
+        mll.model.bmc_probabilities = bmc_probabilities
+        if mll.model.cache_model_trace:
+            mll.model.model_trace = model_trace
+        return mll
+
 
 class RobustRelevancePursuitSingleTaskGP(SingleTaskGP, RobustRelevancePursuitMixin):
     def __init__(
@@ -252,127 +348,3 @@ def to_standard_model(self) -> Model:
         if not is_training:
             model.eval()
         return model
-
-
-@FitGPyTorchMLL.register(
-    MarginalLogLikelihood,
-    SparseOutlierGaussianLikelihood,
-    RobustRelevancePursuitMixin,
-)
-def _fit_rrp(
-    mll: MarginalLogLikelihood,
-    _: type[SparseOutlierGaussianLikelihood],
-    __: type[RobustRelevancePursuitMixin],
-    *,
-    numbers_of_outliers: list[int] | None = None,
-    fractions_of_outliers: list[float] | None = None,
-    timeout_sec: float | None = None,
-    relevance_pursuit_optimizer: Callable = backward_relevance_pursuit,
-    reset_parameters: bool = True,
-    reset_dense_parameters: bool = False,
-    # fit_gpytorch_mll kwargs
-    closure: Callable[[], tuple[Tensor, Sequence[Tensor | None]]] | None = None,
-    optimizer: Callable | None = None,
-    closure_kwargs: dict[str, Any] | None = None,
-    optimizer_kwargs: Mapping[str, Any] | None = None,
-) -> MarginalLogLikelihood:
-    """Fits a RobustRelevancePursuitGP model using the given marginal likelihood.
-
-    For details, see [Ament2024pursuit]_ or https://arxiv.org/abs/2410.24222.
-
-    Args:
-        mll: The marginal likelihood to fit.
-        _: A likelihood, only directly used for dispatching.
-        _: A model, only directly used for dispatching.
-        numbers_of_outliers: An optional list of numbers of outliers to consider during
-            relevance pursuit. By default, the algorithm falls back to a default list
-            of fractions of outliers, see below.
-        fractions_of_outliers: An optional list of fractions of outliers to consider if
-            numbers_of_outliers is None. By default, the algorithm uses
-            ``[0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0]``.
-        relevance_pursuit_optimizer: The relevance pursuit optimizer to use. By default,
-            uses ``backward_relevance_pursuit``, which is generally the most powerful
-            algorithm for challenging problems with a wide range of outliers. The
-            ``forward_relevance_pursuit`` algorithm can be efficient when the number of
-            outliers is relatively small.
-        reset_parameters: If True, we will reset the sparse parameters of the model
-            after each iteration of the relevance pursuit algorithm.
-        reset_dense_parameters: If True, we will reset the dense parameters of the model
-            after each iteration of the relevance pursuit algorithm.
-        closure: A closure to use to compute the loss and the gradients, see docstring
-            of ``fit_gpytorch_mll`` for details.
-        optimizer: The numerical optimizer, see docstring of ``fit_gpytorch_mll``.
-        closure_kwargs: Additional arguments to pass to the ``closure`` function.
-        optimizer_kwargs: Additional arguments to pass to ``fit_gpytorch_mll``.
-
-    Returns:
-        The fitted marginal likelihood.
-    """
-    sparse_module = SparseOutlierNoise._from_model(mll.model)
-    n = sparse_module.dim  # equal to the number of training data points
-
-    if numbers_of_outliers is None:
-        if fractions_of_outliers is None:
-            fractions_of_outliers = FRACTIONS_OF_OUTLIERS
-
-        # list from which BMC chooses
-        numbers_of_outliers = [int(p * n) for p in fractions_of_outliers]
-
-    optimizer_kwargs_: dict[str, Any] = (
-        {} if optimizer_kwargs is None else dict(optimizer_kwargs)
-    )
-    if timeout_sec is not None:
-        optimizer_kwargs_["timeout_sec"] = timeout_sec / len(numbers_of_outliers)
-
-    # Need to convert model to avoid recursion through fit_gpytorch_mll dispatch, since
-    # relevance pursuit expects to call the base fit_gpytorch_mll.
-    original_model = mll.model  # Robust Relevance Pursuit Model
-    mll.model = original_model.to_standard_model()
-    sparse_module = SparseOutlierNoise._from_model(mll.model)
-    sparse_module, model_trace = relevance_pursuit_optimizer(
-        sparse_module=sparse_module,
-        mll=mll,
-        sparsity_levels=numbers_of_outliers,
-        reset_parameters=reset_parameters,
-        reset_dense_parameters=reset_dense_parameters,
-        record_model_trace=True,
-        # These are the args of the canonical mll fit routine
-        closure=closure,
-        optimizer=optimizer,
-        closure_kwargs=closure_kwargs,
-        optimizer_kwargs=optimizer_kwargs_,
-    )
-
-    # Bayesian model comparison
-    bmc_support_sizes, bmc_probabilities = get_posterior_over_support(
-        SparseOutlierNoise,
-        model_trace,
-        prior_mean_of_support=original_model.prior_mean_of_support,
-    )
-    map_index = torch.argmax(bmc_probabilities)
-    map_model = model_trace[map_index]  # choosing model with highest BMC score
-    # overwrite mll.model with chosen model
-    mll.model = original_model  # first restore original model pointer
-    mll.model.load_standard_model(map_model)
-    # Store the bmc results
-    mll.model.bmc_support_sizes = bmc_support_sizes
-    mll.model.bmc_probabilities = bmc_probabilities
-    if mll.model.cache_model_trace:
-        mll.model.model_trace = model_trace
-    return mll
-
-
-@FitGPyTorchMLL.register(
-    _ApproximateMarginalLogLikelihood,
-    SparseOutlierGaussianLikelihood,
-    RobustRelevancePursuitMixin,
-)
-def _fit_rrp_approximate_mll(
-    mll: _ApproximateMarginalLogLikelihood,
-    _: type[SparseOutlierGaussianLikelihood],
-    __: type[RobustRelevancePursuitMixin],
-    **kwargs: Any,
-) -> None:
-    raise UnsupportedError(
-        "Relevance Pursuit does not yet support approximate inference. "
-    )
diff --git a/test/test_fit.py b/test/test_fit.py