Flatten LearnedFeatureImputation raw parameter for scipy fitting compatibility (#3299)

Carl Hvarfner · facebook-github-bot · commit e5dae524b615 · 2026-04-28T07:51:22.000-07:00
Summary:

The `raw_imputation_values` parameter was shape `(num_tasks, d+1)`, but its Interval constraint bounds were only `(d+1,)`. When the scipy fitting path in `get_bounds_as_ndarray` flattens the parameter to `num_tasks*(d+1)` elements, it tries to assign `(d+1,)` bounds into a slice of that length, causing a ValueError.

Flatten `raw_imputation_values` to 1-D with shape `(num_tasks*(d+1),)` and repeat the Interval bounds to match. The `imputation_values` property reshapes back to `(num_tasks, d+1)` for use in `transform()`.

Adds a `fit_gpytorch_mll_with_bounds` subtest that fits a MultiTaskGP with Normalize + LearnedFeatureImputation (with bounds) through the scipy optimizer path, verifying no shape mismatch occurs.

Differential Revision: D102789747
diff --git a/botorch/models/transforms/input.py b/botorch/models/transforms/input.py
@@ -2042,38 +2042,36 @@ def __init__(
             missing_mask[task_pos, feature_indices[task_value]] = False
         self.register_buffer("missing_mask", missing_mask)
 
-        # Learnable imputation values, shape (num_tasks, d+1). The task column
-        # slot is unused but kept for index alignment with X columns.
+        # Learnable imputation values stored as 1-D so that gpytorch's scipy
+        # fitting path (which flattens parameters) sees a bound tensor with
+        # matching numel. Reshaped to (num_tasks, d+1) in `imputation_values`.
         self.register_parameter(
             "raw_imputation_values",
             nn.Parameter(
-                torch.zeros(self.num_tasks, d + 1, dtype=dtype, device=device)
+                torch.zeros(self.num_tasks * (d + 1), dtype=dtype, device=device)
             ),
         )
         if bounds is not None:
-            # Pad bounds with dummy [0, 1] for the task column so the Interval
-            # constraint has shape (d+1,) matching raw_imputation_values.
             padded_lower = torch.zeros(d + 1, dtype=dtype, device=device)
             padded_upper = torch.ones(d + 1, dtype=dtype, device=device)
             padded_lower[:d] = bounds[0]
             padded_upper[:d] = bounds[1]
             self.register_constraint(
                 "raw_imputation_values",
                 Interval(
-                    lower_bound=padded_lower,
-                    upper_bound=padded_upper,
+                    lower_bound=padded_lower.repeat(self.num_tasks),
+                    upper_bound=padded_upper.repeat(self.num_tasks),
                 ),
             )
 
     @property
     def imputation_values(self) -> Tensor:
-        r"""The imputation values, mapped through the Interval constraint when
-        bounds are present, or the raw values otherwise."""
+        r"""The imputation values reshaped to ``(num_tasks, d+1)``, mapped
+        through the Interval constraint when bounds are present."""
+        raw = self.raw_imputation_values
         if self.bounds is not None:
-            return self.raw_imputation_values_constraint.transform(
-                self.raw_imputation_values
-            )
-        return self.raw_imputation_values
+            raw = self.raw_imputation_values_constraint.transform(raw)
+        return raw.view(self.num_tasks, self.d + 1)
 
     def transform(self, X: Tensor) -> Tensor:
         r"""Impute missing features with learned values.
diff --git a/test/models/transforms/test_input.py b/test/models/transforms/test_input.py
@@ -15,6 +15,8 @@
 import torch
 from botorch.exceptions.errors import BotorchTensorDimensionError
 from botorch.exceptions.warnings import UserInputWarning
+from botorch.fit import fit_gpytorch_mll
+from botorch.models.multitask import MultiTaskGP
 from botorch.models.transforms.input import (
     AffineInputTransform,
     AppendFeatures,
@@ -36,8 +38,10 @@
 )
 from botorch.models.transforms.utils import expand_and_copy_tensor
 from botorch.models.utils import fantasize
+from botorch.test_utils.mock import mock_optimize_context_manager
 from botorch.utils.testing import BotorchTestCase
 from gpytorch import Module as GPyTorchModule
+from gpytorch.mlls import ExactMarginalLogLikelihood
 from gpytorch.priors import LogNormalPrior
 from torch import Tensor
 from torch.distributions import Kumaraswamy
@@ -1657,7 +1661,9 @@ def test_learned_feature_imputation(self) -> None:
                     **tkwargs,
                 )
                 self.assertEqual(tf.num_tasks, 2)
-                self.assertEqual(tf.raw_imputation_values.shape, torch.Size([2, d + 1]))
+                self.assertEqual(
+                    tf.raw_imputation_values.shape, torch.Size([2 * (d + 1)])
+                )
                 # missing_mask: shape (num_tasks, d+1), task col always False.
                 self.assertTrue(tf.missing_mask[0, 3].item())
                 self.assertFalse(tf.missing_mask[0, 0].item())
@@ -1671,10 +1677,7 @@ def test_learned_feature_imputation(self) -> None:
                     **tkwargs,
                 )
                 tf.raw_imputation_values.data = torch.tensor(
-                    [
-                        [0.0, 0.0, 0.0, 0.5, 0.0],
-                        [0.0, 0.0, 0.7, 0.0, 0.0],
-                    ],
+                    [0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0],
                     **tkwargs,
                 )
                 X = torch.tensor(
@@ -1746,10 +1749,11 @@ def test_learned_feature_imputation(self) -> None:
                 tf(X_grad).sum().backward()
                 grad = tf.raw_imputation_values.grad
                 self.assertIsNotNone(grad)
-                self.assertNotEqual(grad[1, 1].item(), 0.0)
+                # d=2 → stride is d+1=3. Task 1, feature 1 → index 4.
+                self.assertNotEqual(grad[4].item(), 0.0)
                 # Task 0 observes both features → no imputation → no grad.
-                self.assertEqual(grad[0, 0].item(), 0.0)
-                self.assertEqual(grad[0, 1].item(), 0.0)
+                self.assertEqual(grad[0].item(), 0.0)
+                self.assertEqual(grad[1].item(), 0.0)
 
             with self.subTest("untransform_raises", dtype=dtype):
                 tf = LearnedFeatureImputation(feature_indices={0: [0]}, d=1, **tkwargs)
@@ -1769,10 +1773,7 @@ def test_learned_feature_imputation(self) -> None:
                     **tkwargs,
                 )
                 tf.raw_imputation_values.data = torch.tensor(
-                    [
-                        [0.0, 0.0, 0.0, 0.5, 0.6, 0.0],
-                        [0.0, 0.0, 0.7, 0.0, 0.0, 0.0],
-                    ],
+                    [0.0, 0.0, 0.0, 0.5, 0.6, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0],
                     **tkwargs,
                 )
                 # Imputation with asymmetric observed feature counts.
@@ -1853,7 +1854,8 @@ def test_learned_feature_imputation(self) -> None:
                 tf_g(
                     torch.tensor([[1.0, 2.0, 0.0], [3.0, 9.0, 1.0]], **tkwargs)
                 ).sum().backward()
-                self.assertNotEqual(tf_g.raw_imputation_values.grad[1, 1].item(), 0.0)
+                # d=2 → stride is 3. Task 1, feature 1 → index 4.
+                self.assertNotEqual(tf_g.raw_imputation_values.grad[4].item(), 0.0)
 
             with self.subTest("three_tasks", dtype=dtype):
                 tf = LearnedFeatureImputation(
@@ -1862,11 +1864,7 @@ def test_learned_feature_imputation(self) -> None:
                     **tkwargs,
                 )
                 tf.raw_imputation_values.data = torch.tensor(
-                    [
-                        [0.0, 0.0, 0.3, 0.0],
-                        [0.4, 0.0, 0.0, 0.0],
-                        [0.0, 0.5, 0.0, 0.0],
-                    ],
+                    [0.0, 0.0, 0.3, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0],
                     **tkwargs,
                 )
                 X_three_tasks = torch.tensor(
@@ -1895,7 +1893,7 @@ def test_learned_feature_imputation(self) -> None:
                     )
                 )
                 tf.raw_imputation_values.data = torch.tensor(
-                    [[0.0, 0.0, 0.0, 0.5, 0.0], [0.0, 0.0, 0.7, 0.0, 0.0]],
+                    [0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0],
                     **tkwargs,
                 )
                 X_noncontig = torch.tensor(
@@ -1917,7 +1915,7 @@ def test_learned_feature_imputation(self) -> None:
                     **tkwargs,
                 )
                 tf.raw_imputation_values.data = torch.tensor(
-                    [[0.0, 0.0, 0.0, 0.5, 0.0], [0.0, 0.0, 0.7, 0.0, 0.0]],
+                    [0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0],
                     **tkwargs,
                 )
                 X_batch = torch.tensor(
@@ -1940,7 +1938,7 @@ def test_learned_feature_imputation(self) -> None:
                     **tkwargs,
                 )
                 tf.raw_imputation_values.data = torch.tensor(
-                    [[0.0, 0.0, 0.0, 0.5, 0.0], [0.0, 0.0, 0.7, 0.0, 0.0]],
+                    [0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0],
                     **tkwargs,
                 )
                 X_no_task = torch.tensor(
@@ -1984,6 +1982,47 @@ def test_learned_feature_imputation(self) -> None:
                 with self.assertRaisesRegex(ValueError, "Expected X.shape"):
                     tf(torch.zeros(2, d + 3, **tkwargs))
 
+            with self.subTest("fit_gpytorch_mll_with_bounds", dtype=dtype):
+                n = 5
+                X = torch.cat(
+                    [
+                        torch.cat(
+                            [
+                                torch.rand(n, d, **tkwargs),
+                                i * torch.ones(n, 1, **tkwargs),
+                            ],
+                            dim=-1,
+                        )
+                        for i in range(len(feature_indices))
+                    ]
+                )
+                Y = torch.randn(len(feature_indices) * n, 1, **tkwargs)
+                bounds = torch.stack(
+                    [torch.zeros(d, **tkwargs), torch.ones(d, **tkwargs)]
+                )
+                lfi = LearnedFeatureImputation(
+                    feature_indices=feature_indices, d=d, bounds=bounds, **tkwargs
+                )
+                model = MultiTaskGP(
+                    train_X=X,
+                    train_Y=Y,
+                    task_feature=-1,
+                    input_transform=ChainedInputTransform(
+                        tf0=Normalize(d=d + 1, indices=list(range(d))),
+                        tf1=lfi,
+                    ),
+                )
+                mll = ExactMarginalLogLikelihood(model.likelihood, model)
+                with mock_optimize_context_manager():
+                    fit_gpytorch_mll(mll, max_attempts=1)
+                imp = lfi.imputation_values
+                # These are the two learnable imputation values under the current
+                # setup, so these should be the only non-zero values.
+                self.assertNotEqual(imp[0, 3].item(), 0.0)
+                self.assertNotEqual(imp[1, 2].item(), 0.0)
+                # and here is one that should be zero.
+                self.assertNotEqual(imp[0, 1].item(), 0.0)
+
 
 class TestAppendFeatures(BotorchTestCase):
     def test_append_features(self) -> None: