Skip to content

Commit 42facfa

Browse files
Carl Hvarfnermeta-codesync[bot]
authored andcommitted
Fix silent feature misalignment in HeterogeneousMTGP.posterior() (#3254)
Summary: Pull Request resolved: #3254 When d_target + 1 == full_feature_dim, posterior() cannot distinguish target-space X + task column from full-space X without one. This causes features to silently shift by one position, producing wrong predictions. Enforcing a mandatory task column eliminates the ambiguity. Not sure about Ax blast radius. Reviewed By: saitcakmak Differential Revision: D97505683 fbshipit-source-id: c90143cd227d87f4f356f153019d762f84b5d28a
1 parent 719d7b2 commit 42facfa

2 files changed

Lines changed: 101 additions & 24 deletions

File tree

botorch/models/heterogeneous_mtgp.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
Heterogeneous Search Spaces. AutoML Conference, 2024.
1818
"""
1919

20-
from itertools import chain
2120
from typing import Any
2221

2322
import torch
@@ -200,12 +199,10 @@ def posterior(
200199
r"""Computes the posterior for the target task at the provided points.
201200
202201
Args:
203-
X: A tensor of shape ``batch_shape x q x d_0(+1)``, where ``d_0`` is the
204-
dimension of the feature space for task 0 (not including task indices)
205-
and ``q`` is the number of points considered jointly.
206-
output_indices: Not supported. Must be ``None`` or ``[0]``. The model will
207-
only produce predictions for the target task regardless of
208-
the value of ``output_indices``.
202+
X: A tensor of shape ``batch_shape x q x (d_0 + 1)``, where ``d_0``
203+
is the dimension of the feature space for task 0 and the last
204+
column is the task indicator (must be 0 for the target task).
205+
output_indices: Not supported. Must be ``None`` or ``[0]``.
209206
observation_noise: If True, add observation noise from the respective
210207
likelihoods. If a Tensor, specifies the observation noise levels
211208
to add.
@@ -219,13 +216,18 @@ def posterior(
219216
raise UnsupportedError(
220217
"Heterogeneous MTGP does not support `output_indices`. "
221218
)
222-
if X.shape[-1] == len(self.feature_indices[0]) + 1:
223-
# X contains task feature
224-
if (X[..., -1] != 0).any():
225-
raise UnsupportedError(
226-
"Posterior can only be called for the target task."
227-
)
228-
X = X[..., :-1]
219+
220+
d_target = len(self.feature_indices[0])
221+
if X.shape[-1] != d_target + 1:
222+
raise ValueError(
223+
f"Expected X with {d_target + 1} columns "
224+
f"({d_target} target features + 1 task column), "
225+
f"got {X.shape[-1]}."
226+
)
227+
228+
if (X[..., -1] != 0).any():
229+
raise UnsupportedError("Posterior can only be called for the target task.")
230+
X = X[..., :-1]
229231
X_full = self.map_to_full_tensor(X=X, task_index=0)
230232
return super().posterior(
231233
X=X_full,
@@ -296,11 +298,13 @@ def construct_inputs(
296298
child_datasets = training_data.datasets.copy()
297299
target_dataset = child_datasets.pop(training_data.target_outcome_name)
298300
all_datasets = [target_dataset] + list(child_datasets.values())
299-
# We want all parameters to be in the same order, and include the full X.
300-
# remove task feature
301-
all_features = sorted(
302-
set(chain(*(ds.feature_names[:-1] for ds in all_datasets)))
303-
)
301+
# Use target's feature order as canonical (NO alphabetical sort).
302+
# Source-only features are appended at the end.
303+
all_features: list[str] = list(target_dataset.feature_names[:-1])
304+
for ds in all_datasets[1:]:
305+
for fn in ds.feature_names[:-1]:
306+
if fn not in all_features:
307+
all_features.append(fn)
304308
# Get indices mapping the features from a given dataset to all features.
305309
feature_indices = [
306310
[all_features.index(fn) for fn in ds.feature_names[:-1]]

test/models/test_heterogeneous_mtgp.py

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,10 @@ def test_standard_heterogeneous_mtgp(self) -> None:
122122
model.likelihood.noise_covar.noise.shape[-1], model.num_tasks
123123
)
124124

125-
# Evaluate the posterior.
125+
# Evaluate the posterior (task column required).
126126
with self.assertRaisesRegex(UnsupportedError, "output_indices"):
127127
model.posterior(self.ds1.X, output_indices=[0, 1])
128+
# ds1.X already has task column (last col = 0)
128129
posterior = model.posterior(self.ds1.X)
129130
self.assertIsInstance(posterior, GPyTorchPosterior)
130131
self.assertIsInstance(posterior.distribution, MultivariateNormal)
@@ -191,7 +192,7 @@ def test_identical_search_space(self) -> None:
191192
self.assertEqual(model.train_inputs[0].shape, torch.Size([8, 4]))
192193
data_covar_module = model.covar_module.kernels[0]
193194
self.assertEqual(len(data_covar_module.kernels), 1)
194-
# Evaluate the posterior.
195+
# Evaluate the posterior (ds1.X has task col = 0).
195196
posterior = model.posterior(self.ds1.X)
196197
self.assertEqual(posterior.mean.shape, torch.Size([5, 1]))
197198
posterior = model.posterior(self.ds1.X.repeat(3, 1, 1))
@@ -243,10 +244,82 @@ def test_with_no_target_data(self) -> None:
243244
model.forward(model.map_to_full_tensor(X=torch.zeros(5, 3), task_index=0))
244245
# Evaluation with task 2 -- requires all_tasks to be passed in to the model.
245246
model.forward(model.map_to_full_tensor(X=torch.zeros(5, 4), task_index=2))
246-
# Evaluate the posterior.
247-
posterior = model.posterior(torch.rand(5, 3))
247+
# Evaluate the posterior (task column required).
248+
X_with_task = torch.cat([torch.rand(5, 3), torch.zeros(5, 1)], dim=-1)
249+
posterior = model.posterior(X_with_task)
248250
self.assertIsInstance(posterior, GPyTorchPosterior)
249251
self.assertIsInstance(posterior.mvn, MultivariateNormal)
250252
self.assertEqual(posterior.mean.shape, torch.Size([5, 1]))
251-
posterior = model.posterior(torch.rand(3, 5, 3))
253+
X_batch_with_task = torch.cat(
254+
[torch.rand(3, 5, 3), torch.zeros(3, 5, 1)], dim=-1
255+
)
256+
posterior = model.posterior(X_batch_with_task)
252257
self.assertEqual(posterior.mean.shape, torch.Size([3, 5, 1]))
258+
259+
def test_feature_ordering_preserves_target_order(self) -> None:
260+
"""Test that construct_inputs uses target's feature order as canonical."""
261+
# Create target dataset with features in order: A, B, C
262+
target_ds = SupervisedDataset(
263+
X=torch.cat([torch.rand(3, 3), torch.zeros(3, 1)], dim=-1),
264+
Y=torch.rand(3, 1),
265+
feature_names=["A", "B", "C", "task"],
266+
outcome_names=["target"],
267+
)
268+
# Create source dataset with features in different order: C, A, B
269+
source_ds = SupervisedDataset(
270+
X=torch.cat([torch.rand(2, 3), torch.ones(2, 1)], dim=-1),
271+
Y=torch.rand(2, 1),
272+
feature_names=["C", "A", "B", "task"],
273+
outcome_names=["source"],
274+
)
275+
mtds = MultiTaskDataset(
276+
datasets=[target_ds, source_ds],
277+
target_outcome_name="target",
278+
task_feature_index=-1,
279+
)
280+
model_inputs = HeterogeneousMTGP.construct_inputs(training_data=mtds)
281+
282+
with self.subTest("feature_indices_preserve_target_order"):
283+
# Target: A, B, C -> canonical [0, 1, 2]
284+
# Source: C, A, B -> maps to [2, 0, 1] in canonical order
285+
self.assertEqual(model_inputs["feature_indices"], [[0, 1, 2], [2, 0, 1]])
286+
287+
with self.subTest("source_only_features_appended_at_end"):
288+
# Target: A, B; Source: B, C -> canonical should be [A, B, C]
289+
target_ds2 = SupervisedDataset(
290+
X=torch.cat([torch.rand(3, 2), torch.zeros(3, 1)], dim=-1),
291+
Y=torch.rand(3, 1),
292+
feature_names=["A", "B", "task"],
293+
outcome_names=["target"],
294+
)
295+
source_ds2 = SupervisedDataset(
296+
X=torch.cat([torch.rand(2, 2), torch.ones(2, 1)], dim=-1),
297+
Y=torch.rand(2, 1),
298+
feature_names=["B", "C", "task"],
299+
outcome_names=["source"],
300+
)
301+
mtds2 = MultiTaskDataset(
302+
datasets=[target_ds2, source_ds2],
303+
target_outcome_name="target",
304+
task_feature_index=-1,
305+
)
306+
model_inputs2 = HeterogeneousMTGP.construct_inputs(training_data=mtds2)
307+
# Target: A, B -> [0, 1]; Source: B, C -> [1, 2]
308+
self.assertEqual(model_inputs2["feature_indices"], [[0, 1], [1, 2]])
309+
self.assertEqual(model_inputs2["full_feature_dim"], 3)
310+
311+
def test_posterior_requires_task_column(self) -> None:
312+
"""Test that posterior rejects X without task column."""
313+
model_inputs = HeterogeneousMTGP.construct_inputs(training_data=self.mtds)
314+
model = HeterogeneousMTGP(**model_inputs)
315+
model.eval()
316+
# d_target=3, so posterior requires exactly 4 columns
317+
318+
with self.subTest("rejects_no_task_column"):
319+
with self.assertRaisesRegex(ValueError, "Expected X with 4 columns"):
320+
model.posterior(torch.rand(4, 3))
321+
322+
with self.subTest("rejects_full_space"):
323+
X_full = torch.cat([torch.rand(4, 5), torch.zeros(4, 1)], dim=-1)
324+
with self.assertRaisesRegex(ValueError, "Expected X with 4 columns"):
325+
model.posterior(X_full)

0 commit comments

Comments
 (0)