Support cache_root for low-rank kernels (#3223)

David Eriksson · meta-codesync[bot] · commit 4decc06b0ac5 · 2026-03-19T10:23:15.000-07:00
Summary: Pull Request resolved: #3223 This allows turning off `cache_root` for models that don't support it. Reviewed By: saitcakmak Differential Revision: D95317067 fbshipit-source-id: 6b2cff028c008998cf7e4fb7054f8f7447e0d81f
diff --git a/botorch/acquisition/cached_cholesky.py b/botorch/acquisition/cached_cholesky.py
@@ -20,7 +20,6 @@
 from botorch.models.higher_order_gp import HigherOrderGP
 from botorch.models.model import Model
 from botorch.models.model_list_gp_regression import ModelListGP
-from botorch.models.multitask import KroneckerMultiTaskGP, MultiTaskGP
 from botorch.posteriors.gpytorch import GPyTorchPosterior
 from botorch.posteriors.posterior import Posterior
 from botorch.sampling.base import MCSampler
@@ -39,10 +38,11 @@ def supports_cache_root(model: Model) -> bool:
     """
     if isinstance(model, ModelListGP):
         return all(supports_cache_root(m) for m in model.models)
+    # Allow models to explicitly opt out of cache_root support.
+    if getattr(model, "_supports_cache_root", True) is False:
+        return False
     # Multi task models and non-GPyTorch models are not supported.
-    if isinstance(
-        model, (MultiTaskGP, KroneckerMultiTaskGP, HigherOrderGP)
-    ) or not isinstance(model, GPyTorchModel):
+    if not isinstance(model, GPyTorchModel):
         return False
     # Models that return a TransformedPosterior are not supported.
     if hasattr(model, "outcome_transform") and (not model.outcome_transform._is_linear):
diff --git a/botorch/models/higher_order_gp.py b/botorch/models/higher_order_gp.py
@@ -144,14 +144,14 @@ class HigherOrderGP(BatchedMultiOutputGPyTorchModel, ExactGP, FantasizeMixin):
     r"""
     A model for high-dimensional output regression.
 
-    As described in [Zhe2019hogp]_. “Higher-order” means that the predictions
+    As described in [Zhe2019hogp]_. "Higher-order" means that the predictions
     are matrices (tensors) with at least two dimensions, such as images or
     grids of images, or measurements taken from a region of at least two
     dimensions.
     The posterior uses Matheron's rule [Doucet2010sampl]_
     as described in [Maddox2021bohdo]_.
 
-    ``HigherOrderGP`` differs from a "vector” multi-output model in that it uses
+    ``HigherOrderGP`` differs from a "vector" multi-output model in that it uses
     Kronecker algebra to obtain parsimonious covariance matrices for these
     outputs (see ``KroneckerMultiTaskGP`` for more information). For example,
     imagine a 10 x 20 x 30 grid of images. If we were to vectorize the
@@ -177,6 +177,8 @@ class HigherOrderGP(BatchedMultiOutputGPyTorchModel, ExactGP, FantasizeMixin):
         >>> samples = model.posterior(test_X).rsample()
     """
 
+    _supports_cache_root = False
+
     def __init__(
         self,
         train_X: Tensor,
diff --git a/botorch/models/multitask.py b/botorch/models/multitask.py
@@ -147,6 +147,7 @@ class MultiTaskGP(ExactGP, MultiTaskGPyTorchModel, FantasizeMixin):
     """
 
     _supports_batched_models = False
+    _supports_cache_root = False
 
     def __init__(
         self,
@@ -564,6 +565,8 @@ class KroneckerMultiTaskGP(ExactGP, GPyTorchModel, FantasizeMixin):
         >>> model = KroneckerMultiTaskGP(train_X, train_Y)
     """
 
+    _supports_cache_root = False
+
     def __init__(
         self,
         train_X: Tensor,
diff --git a/test/acquisition/test_cached_cholesky.py b/test/acquisition/test_cached_cholesky.py
@@ -8,14 +8,19 @@
 from unittest import mock
 
 import torch
-from botorch.acquisition.cached_cholesky import CachedCholeskyMCSamplerMixin
+from botorch.acquisition.cached_cholesky import (
+    CachedCholeskyMCSamplerMixin,
+    supports_cache_root,
+)
 from botorch.acquisition.monte_carlo import MCAcquisitionFunction
 from botorch.acquisition.objective import GenericMCObjective, MCAcquisitionObjective
 from botorch.exceptions.warnings import BotorchWarning
 from botorch.models import SingleTaskGP
 from botorch.models.deterministic import GenericDeterministicModel
 from botorch.models.higher_order_gp import HigherOrderGP
 from botorch.models.model import Model, ModelList
+from botorch.models.model_list_gp_regression import ModelListGP
+from botorch.models.multitask import KroneckerMultiTaskGP, MultiTaskGP
 from botorch.models.transforms.outcome import Log
 from botorch.sampling.normal import IIDNormalSampler, MCSampler
 from botorch.utils.low_rank import extract_batch_covar
@@ -146,6 +151,89 @@ def test_cache_root_decomposition(self):
                         mock_cholesky.assert_called_once()
                 self.assertTrue(torch.equal(baseline_L_acqf, baseline_L))
 
+    def test_supports_cache_root_opt_out(self):
+        """Test that models can opt out of cache_root via _supports_cache_root.
+
+        Models with low-rank kernels (e.g., SphericalLinearSingleTaskGP using
+        LinearPredictionStrategy) are incompatible with cache_root because
+        base_samples are generated for rank r < n. These models set
+        _supports_cache_root = False so that cache_root is automatically
+        disabled.
+        """
+        tkwargs = {"device": self.device}
+        for dtype in (torch.float, torch.double):
+            with self.subTest(dtype=dtype):
+                tkwargs["dtype"] = dtype
+
+                # Standard models support cache_root by default
+                stgp = SingleTaskGP(
+                    torch.zeros(2, 1, **tkwargs), torch.zeros(2, 1, **tkwargs)
+                )
+                self.assertTrue(supports_cache_root(stgp))
+
+                # Models with _supports_cache_root = False do not
+                stgp._supports_cache_root = False
+                self.assertFalse(supports_cache_root(stgp))
+
+                # This propagates through ModelListGP
+                stgp2 = SingleTaskGP(
+                    torch.zeros(2, 1, **tkwargs), torch.zeros(2, 1, **tkwargs)
+                )
+                stgp2._supports_cache_root = False
+                model_list = ModelListGP(stgp2)
+                self.assertFalse(supports_cache_root(model_list))
+
+                # CachedCholeskyMCSamplerMixin respects the opt-out
+                sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+                acqf = DummyCachedCholeskyAcqf(
+                    model=stgp,
+                    sampler=sampler,
+                )
+                self.assertFalse(acqf._cache_root)
+
+                # Explicitly passing cache_root=True warns and gets disabled
+                with self.assertWarnsRegex(RuntimeWarning, "cache_root"):
+                    acqf = DummyCachedCholeskyAcqf(
+                        model=stgp,
+                        sampler=sampler,
+                        cache_root=True,
+                    )
+                self.assertFalse(acqf._cache_root)
+
+    def test_unsupported_models_have_supports_cache_root_false(self):
+        """Test that MultiTaskGP, KroneckerMultiTaskGP, and HigherOrderGP
+        set _supports_cache_root = False as a class attribute."""
+        # Check the class attribute directly
+        self.assertFalse(MultiTaskGP._supports_cache_root)
+        self.assertFalse(KroneckerMultiTaskGP._supports_cache_root)
+        self.assertFalse(HigherOrderGP._supports_cache_root)
+
+        # Check that instances also have the attribute set to False
+        tkwargs = {"device": self.device, "dtype": torch.double}
+
+        # MultiTaskGP
+        train_X = torch.cat(
+            [torch.rand(5, 1, **tkwargs), torch.zeros(5, 1, **tkwargs)], dim=-1
+        )
+        train_Y = torch.rand(5, 1, **tkwargs)
+        mtgp = MultiTaskGP(train_X, train_Y, task_feature=-1)
+        self.assertFalse(mtgp._supports_cache_root)
+        self.assertFalse(supports_cache_root(mtgp))
+
+        # KroneckerMultiTaskGP
+        train_X = torch.rand(5, 2, **tkwargs)
+        train_Y = torch.rand(5, 2, **tkwargs)
+        kmtgp = KroneckerMultiTaskGP(train_X, train_Y)
+        self.assertFalse(kmtgp._supports_cache_root)
+        self.assertFalse(supports_cache_root(kmtgp))
+
+        # HigherOrderGP
+        train_X = torch.rand(5, 2, **tkwargs)
+        train_Y = torch.rand(5, 1, 1, **tkwargs)
+        hogp = HigherOrderGP(train_X, train_Y)
+        self.assertFalse(hogp._supports_cache_root)
+        self.assertFalse(supports_cache_root(hogp))
+
     def test_get_f_X_samples(self):
         sample_cached_cholesky_path = (
             "botorch.acquisition.cached_cholesky.sample_cached_cholesky"