Fix silent feature misalignment in HeterogeneousMTGP.posterior() (#3254)

Carl Hvarfner · meta-codesync[bot] · commit 42facfab33f9 · 2026-03-27T14:09:38.000-07:00
Summary: Pull Request resolved: #3254 When d_target + 1 == full_feature_dim, posterior() cannot distinguish target-space X + task column from full-space X without one. This causes features to silently shift by one position, producing wrong predictions. Enforcing a mandatory task column eliminates the ambiguity. Not sure about Ax blast radius. Reviewed By: saitcakmak Differential Revision: D97505683 fbshipit-source-id: c90143cd227d87f4f356f153019d762f84b5d28a
diff --git a/botorch/models/heterogeneous_mtgp.py b/botorch/models/heterogeneous_mtgp.py
@@ -17,7 +17,6 @@
     Heterogeneous Search Spaces. AutoML Conference, 2024.
 """
 
-from itertools import chain
 from typing import Any
 
 import torch
@@ -200,12 +199,10 @@ def posterior(
         r"""Computes the posterior for the target task at the provided points.
 
         Args:
-            X: A tensor of shape ``batch_shape x q x d_0(+1)``, where ``d_0`` is the
-                dimension of the feature space for task 0 (not including task indices)
-                and ``q`` is the number of points considered jointly.
-            output_indices: Not supported. Must be ``None`` or ``[0]``. The model will
-                only produce predictions for the target task regardless of
-                the value of ``output_indices``.
+            X: A tensor of shape ``batch_shape x q x (d_0 + 1)``, where ``d_0``
+                is the dimension of the feature space for task 0 and the last
+                column is the task indicator (must be 0 for the target task).
+            output_indices: Not supported. Must be ``None`` or ``[0]``.
             observation_noise: If True, add observation noise from the respective
                 likelihoods. If a Tensor, specifies the observation noise levels
                 to add.
@@ -219,13 +216,18 @@ def posterior(
             raise UnsupportedError(
                 "Heterogeneous MTGP does not support `output_indices`. "
             )
-        if X.shape[-1] == len(self.feature_indices[0]) + 1:
-            # X contains task feature
-            if (X[..., -1] != 0).any():
-                raise UnsupportedError(
-                    "Posterior can only be called for the target task."
-                )
-            X = X[..., :-1]
+
+        d_target = len(self.feature_indices[0])
+        if X.shape[-1] != d_target + 1:
+            raise ValueError(
+                f"Expected X with {d_target + 1} columns "
+                f"({d_target} target features + 1 task column), "
+                f"got {X.shape[-1]}."
+            )
+
+        if (X[..., -1] != 0).any():
+            raise UnsupportedError("Posterior can only be called for the target task.")
+        X = X[..., :-1]
         X_full = self.map_to_full_tensor(X=X, task_index=0)
         return super().posterior(
             X=X_full,
@@ -296,11 +298,13 @@ def construct_inputs(
         child_datasets = training_data.datasets.copy()
         target_dataset = child_datasets.pop(training_data.target_outcome_name)
         all_datasets = [target_dataset] + list(child_datasets.values())
-        # We want all parameters to be in the same order, and include the full X.
-        # remove task feature
-        all_features = sorted(
-            set(chain(*(ds.feature_names[:-1] for ds in all_datasets)))
-        )
+        # Use target's feature order as canonical (NO alphabetical sort).
+        # Source-only features are appended at the end.
+        all_features: list[str] = list(target_dataset.feature_names[:-1])
+        for ds in all_datasets[1:]:
+            for fn in ds.feature_names[:-1]:
+                if fn not in all_features:
+                    all_features.append(fn)
         # Get indices mapping the features from a given dataset to all features.
         feature_indices = [
             [all_features.index(fn) for fn in ds.feature_names[:-1]]
diff --git a/test/models/test_heterogeneous_mtgp.py b/test/models/test_heterogeneous_mtgp.py
@@ -122,9 +122,10 @@ def test_standard_heterogeneous_mtgp(self) -> None:
                 model.likelihood.noise_covar.noise.shape[-1], model.num_tasks
             )
 
-        # Evaluate the posterior.
+        # Evaluate the posterior (task column required).
         with self.assertRaisesRegex(UnsupportedError, "output_indices"):
             model.posterior(self.ds1.X, output_indices=[0, 1])
+        # ds1.X already has task column (last col = 0)
         posterior = model.posterior(self.ds1.X)
         self.assertIsInstance(posterior, GPyTorchPosterior)
         self.assertIsInstance(posterior.distribution, MultivariateNormal)
@@ -191,7 +192,7 @@ def test_identical_search_space(self) -> None:
         self.assertEqual(model.train_inputs[0].shape, torch.Size([8, 4]))
         data_covar_module = model.covar_module.kernels[0]
         self.assertEqual(len(data_covar_module.kernels), 1)
-        # Evaluate the posterior.
+        # Evaluate the posterior (ds1.X has task col = 0).
         posterior = model.posterior(self.ds1.X)
         self.assertEqual(posterior.mean.shape, torch.Size([5, 1]))
         posterior = model.posterior(self.ds1.X.repeat(3, 1, 1))
@@ -243,10 +244,82 @@ def test_with_no_target_data(self) -> None:
             model.forward(model.map_to_full_tensor(X=torch.zeros(5, 3), task_index=0))
             # Evaluation with task 2 -- requires all_tasks to be passed in to the model.
             model.forward(model.map_to_full_tensor(X=torch.zeros(5, 4), task_index=2))
-            # Evaluate the posterior.
-            posterior = model.posterior(torch.rand(5, 3))
+            # Evaluate the posterior (task column required).
+            X_with_task = torch.cat([torch.rand(5, 3), torch.zeros(5, 1)], dim=-1)
+            posterior = model.posterior(X_with_task)
             self.assertIsInstance(posterior, GPyTorchPosterior)
             self.assertIsInstance(posterior.mvn, MultivariateNormal)
             self.assertEqual(posterior.mean.shape, torch.Size([5, 1]))
-            posterior = model.posterior(torch.rand(3, 5, 3))
+            X_batch_with_task = torch.cat(
+                [torch.rand(3, 5, 3), torch.zeros(3, 5, 1)], dim=-1
+            )
+            posterior = model.posterior(X_batch_with_task)
             self.assertEqual(posterior.mean.shape, torch.Size([3, 5, 1]))
+
+    def test_feature_ordering_preserves_target_order(self) -> None:
+        """Test that construct_inputs uses target's feature order as canonical."""
+        # Create target dataset with features in order: A, B, C
+        target_ds = SupervisedDataset(
+            X=torch.cat([torch.rand(3, 3), torch.zeros(3, 1)], dim=-1),
+            Y=torch.rand(3, 1),
+            feature_names=["A", "B", "C", "task"],
+            outcome_names=["target"],
+        )
+        # Create source dataset with features in different order: C, A, B
+        source_ds = SupervisedDataset(
+            X=torch.cat([torch.rand(2, 3), torch.ones(2, 1)], dim=-1),
+            Y=torch.rand(2, 1),
+            feature_names=["C", "A", "B", "task"],
+            outcome_names=["source"],
+        )
+        mtds = MultiTaskDataset(
+            datasets=[target_ds, source_ds],
+            target_outcome_name="target",
+            task_feature_index=-1,
+        )
+        model_inputs = HeterogeneousMTGP.construct_inputs(training_data=mtds)
+
+        with self.subTest("feature_indices_preserve_target_order"):
+            # Target: A, B, C -> canonical [0, 1, 2]
+            # Source: C, A, B -> maps to [2, 0, 1] in canonical order
+            self.assertEqual(model_inputs["feature_indices"], [[0, 1, 2], [2, 0, 1]])
+
+        with self.subTest("source_only_features_appended_at_end"):
+            # Target: A, B; Source: B, C -> canonical should be [A, B, C]
+            target_ds2 = SupervisedDataset(
+                X=torch.cat([torch.rand(3, 2), torch.zeros(3, 1)], dim=-1),
+                Y=torch.rand(3, 1),
+                feature_names=["A", "B", "task"],
+                outcome_names=["target"],
+            )
+            source_ds2 = SupervisedDataset(
+                X=torch.cat([torch.rand(2, 2), torch.ones(2, 1)], dim=-1),
+                Y=torch.rand(2, 1),
+                feature_names=["B", "C", "task"],
+                outcome_names=["source"],
+            )
+            mtds2 = MultiTaskDataset(
+                datasets=[target_ds2, source_ds2],
+                target_outcome_name="target",
+                task_feature_index=-1,
+            )
+            model_inputs2 = HeterogeneousMTGP.construct_inputs(training_data=mtds2)
+            # Target: A, B -> [0, 1]; Source: B, C -> [1, 2]
+            self.assertEqual(model_inputs2["feature_indices"], [[0, 1], [1, 2]])
+            self.assertEqual(model_inputs2["full_feature_dim"], 3)
+
+    def test_posterior_requires_task_column(self) -> None:
+        """Test that posterior rejects X without task column."""
+        model_inputs = HeterogeneousMTGP.construct_inputs(training_data=self.mtds)
+        model = HeterogeneousMTGP(**model_inputs)
+        model.eval()
+        # d_target=3, so posterior requires exactly 4 columns
+
+        with self.subTest("rejects_no_task_column"):
+            with self.assertRaisesRegex(ValueError, "Expected X with 4 columns"):
+                model.posterior(torch.rand(4, 3))
+
+        with self.subTest("rejects_full_space"):
+            X_full = torch.cat([torch.rand(4, 5), torch.zeros(4, 1)], dim=-1)
+            with self.assertRaisesRegex(ValueError, "Expected X with 4 columns"):
+                model.posterior(X_full)