Use search space bounds for Normalize in transfer learning adapter (#5184)

Carl Hvarfner · facebook-github-bot · commit 8a13506a2ee0 · 2026-04-28T06:57:31.000-07:00
Summary:

The transfer learning adapter explicitly passed `bounds=None` to Normalize,
forcing `learn_bounds=True`. This caused Normalize bounds to be learned from
data instead of fixed to the search space, resulting in bounds that drift
during training and differ between benchmark configs despite identical
search spaces.

Remove the `bounds=None` override so that `_set_default_bounds` provides
the correct search space bounds from the SearchSpaceDigest.

Reviewed By: sdaulton

Differential Revision: D100669010
diff --git a/ax/adapter/transfer_learning/adapter.py b/ax/adapter/transfer_learning/adapter.py
@@ -7,6 +7,7 @@
 
 from __future__ import annotations
 
+import dataclasses
 import warnings
 from collections.abc import Mapping, Sequence
 from logging import Logger
@@ -38,7 +39,7 @@
 from ax.core.observation import ObservationData, ObservationFeatures
 from ax.core.optimization_config import OptimizationConfig
 from ax.core.parameter import FixedParameter, RangeParameter
-from ax.core.search_space import SearchSpace
+from ax.core.search_space import SearchSpace, SearchSpaceDigest
 from ax.exceptions.core import DataRequiredError, UnsupportedError, UserInputError
 from ax.generation_strategy.best_model_selector import (
     ReductionCriterion,
@@ -504,6 +505,86 @@ def _get_task_datasets(
             )
         return task_datasets
 
+    def _expand_ssd_to_joint_space(
+        self,
+        search_space_digest: SearchSpaceDigest,
+    ) -> SearchSpaceDigest:
+        """Expand SSD bounds and feature_names to cover the joint search space.
+
+        The SSD produced by ``_get_fit_args`` reflects the target search space.
+        When source experiments have additional parameters, the model operates
+        in the full joint feature space. This method appends bounds and feature
+        names for source-only parameters so that input transforms receive
+        correct full-space bounds.
+        """
+        existing_names = set(search_space_digest.feature_names)
+        extra_names: list[str] = []
+        extra_bounds: list[tuple[int | float, int | float]] = []
+        # Only collect parameters absent from the target SSD. Shared
+        # parameters that appear in both target and source keep the target
+        # bounds -- source observations outside those bounds will normalize
+        # outside [0, 1]. This is intentional, as the GP hyperprior is calibrated
+        # for a __target__ task in [0, 1]^D.
+        for name, param in self.joint_search_space.parameters.items():
+            if name not in existing_names and isinstance(param, RangeParameter):
+                extra_names.append(name)
+                extra_bounds.append((param.lower, param.upper))
+        if not extra_names:
+            return search_space_digest
+        # Insert source-only params before the task feature
+        task_features = search_space_digest.task_features
+        if len(task_features) == 1:
+            tf_idx = task_features[0]
+            names = list(search_space_digest.feature_names)
+            bounds = list(search_space_digest.bounds)
+            # Assert no index-based fields need shifting.
+            for field_name in (
+                "ordinal_features",
+                "categorical_features",
+                "fidelity_features",
+            ):
+                indices = getattr(search_space_digest, field_name)
+                if any(i >= tf_idx for i in indices):
+                    raise UnsupportedError(
+                        f"Cannot expand SSD: {field_name} contains index >= {tf_idx}."
+                    )
+            if any(i >= tf_idx for i in search_space_digest.discrete_choices):
+                raise UnsupportedError(
+                    f"Cannot expand SSD: discrete_choices contains index >= {tf_idx}."
+                )
+            if search_space_digest.hierarchical_dependencies is not None and any(
+                i >= tf_idx for i in search_space_digest.hierarchical_dependencies
+            ):
+                raise UnsupportedError(
+                    "Cannot expand SSD: hierarchical_dependencies contains "
+                    f"index >= {tf_idx}."
+                )
+            names[tf_idx:tf_idx] = extra_names
+            bounds[tf_idx:tf_idx] = extra_bounds
+            # Task feature index shifts by the number of inserted params.
+            new_task_features = [tf_idx + len(extra_names)]
+            new_target_values = dict(search_space_digest.target_values)
+            if tf_idx in new_target_values:
+                new_target_values[new_task_features[0]] = new_target_values.pop(tf_idx)
+            return dataclasses.replace(
+                search_space_digest,
+                feature_names=names,
+                bounds=bounds,
+                task_features=new_task_features,
+                target_values=new_target_values,
+            )
+        elif len(task_features) == 0:
+            # No task feature -- just append.
+            return dataclasses.replace(
+                search_space_digest,
+                feature_names=search_space_digest.feature_names + extra_names,
+                bounds=search_space_digest.bounds + extra_bounds,
+            )
+        else:
+            raise UnsupportedError(
+                "Multiple task features are not supported in transfer learning."
+            )
+
     def _fit(
         self,
         search_space: SearchSpace,
@@ -525,6 +606,10 @@ def _fit(
             experiment_data=experiment_data,
             update_outcomes_and_parameters=True,
         )
+        # Expand SSD bounds to cover source-only params from the joint search
+        # space. This ensures Normalize (and other input transforms) get bounds
+        # for the full feature space, not just the target dims.
+        search_space_digest = self._expand_ssd_to_joint_space(search_space_digest)
         if experiment_data.arm_data.empty:
             self.outcomes = outcomes
             # Temporarily set datasets to None. We will construct empty datasets
@@ -567,6 +652,7 @@ def _cross_validate(
             experiment_data=cv_training_data,
             update_outcomes_and_parameters=False,
         )
+        search_space_digest = self._expand_ssd_to_joint_space(search_space_digest)
         # Add the task feature to SSD, to ensure that a multi-task model is selected.
         if len(search_space_digest.task_features) > 1:
             raise UnsupportedError(
@@ -612,7 +698,7 @@ def gen(
 
         Once the ``GeneratorRun`` is produced, it checks for any fixed parameters
         that are not in the target search space and removes them. This is a hack
-        around limitations of the ``RemoveFixed`` transform. Since we construct the
+        around limitations of the Ax ``RemoveFixed`` transform. Since we construct the
         transforms with the joint space, we end up adding back all fixed parameters
         from the joint space rather than adding only the parameters from the
         target search space. A proper fix would require passing in the search space
@@ -633,6 +719,17 @@ def gen(
                 if fixed_features is None:
                     fixed_features = ObservationFeatures(parameters={})
                 fixed_features.parameters.setdefault(name, target_p.value)
+        # Fix source-only params so the optimizer doesn't search over them.
+        # Center is a reasonable default; LearnedFeatureImputation overwrites
+        # these with learned values when configured.
+        joint_center = self.joint_search_space.compute_naive_center()
+        for name, param in self.joint_search_space.parameters.items():
+            if name not in search_space.parameters and isinstance(
+                param, RangeParameter
+            ):
+                if fixed_features is None:
+                    fixed_features = ObservationFeatures(parameters={})
+                fixed_features.parameters.setdefault(name, joint_center[name])
         generator_run = super().gen(
             n=n,
             search_space=search_space,
@@ -719,12 +816,8 @@ def transfer_learning_generator_specs_constructor(
         selector in case there is model selection enabled.
     """
     input_transform_classes: list[type[InputTransform]] = [Normalize]
-    input_transform_options = {
-        "Normalize": {
-            # None for bounds here ensures we do not use bounds from
-            # the search space digest.
-            "bounds": None,
-        }
+    input_transform_options: dict[str, dict[str, Any]] = {
+        "Normalize": {},
     }
     transforms = transforms or MBM_X_trans + [MetadataToTask] + Y_trans
     transform_configs = get_derelativize_config(
diff --git a/ax/adapter/transfer_learning/tests/test_adapter.py b/ax/adapter/transfer_learning/tests/test_adapter.py
@@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from unittest.mock import MagicMock, PropertyMock
+
+from ax.adapter.transfer_learning.adapter import TransferLearningAdapter
+from ax.core.parameter import ParameterType, RangeParameter
+from ax.core.search_space import SearchSpace, SearchSpaceDigest
+from ax.exceptions.core import UnsupportedError
+from ax.utils.common.testutils import TestCase
+
+
+class ExpandSsdToJointSpaceTest(TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.adapter = MagicMock(spec=TransferLearningAdapter)
+
+    def _make_joint_ss(self, params: dict[str, tuple[float, float]]) -> SearchSpace:
+        return SearchSpace(
+            parameters=[
+                RangeParameter(
+                    name=n,
+                    lower=lo,
+                    upper=hi,
+                    parameter_type=ParameterType.FLOAT,
+                )
+                for n, (lo, hi) in params.items()
+            ]
+        )
+
+    def test_no_extra_params_returns_unchanged(self) -> None:
+        type(self.adapter).joint_search_space = PropertyMock(
+            return_value=self._make_joint_ss({"x1": (0, 1), "x2": (0, 1)})
+        )
+        ssd = SearchSpaceDigest(
+            feature_names=["x1", "x2", "task"],
+            bounds=[(0, 1), (0, 1), (0, 2)],
+            task_features=[2],
+            target_values={2: 0},
+        )
+        result = TransferLearningAdapter._expand_ssd_to_joint_space(self.adapter, ssd)
+        self.assertIs(result, ssd)
+
+    def test_single_task_feature_inserts_before_task(self) -> None:
+        type(self.adapter).joint_search_space = PropertyMock(
+            return_value=self._make_joint_ss(
+                {"x1": (0, 1), "x2": (0, 1), "x3": (-2, 5)}
+            )
+        )
+        ssd = SearchSpaceDigest(
+            feature_names=["x1", "x2", "task"],
+            bounds=[(0, 1), (0, 1), (0, 2)],
+            task_features=[2],
+            target_values={2: 0},
+        )
+        result = TransferLearningAdapter._expand_ssd_to_joint_space(self.adapter, ssd)
+        self.assertEqual(result.feature_names, ["x1", "x2", "x3", "task"])
+        self.assertEqual(result.bounds, [(0, 1), (0, 1), (-2, 5), (0, 2)])
+        self.assertEqual(result.task_features, [3])
+        self.assertEqual(result.target_values, {3: 0})
+
+    def test_zero_task_features_appends(self) -> None:
+        type(self.adapter).joint_search_space = PropertyMock(
+            return_value=self._make_joint_ss({"x1": (0, 1), "x2": (-1, 3)})
+        )
+        ssd = SearchSpaceDigest(
+            feature_names=["x1"],
+            bounds=[(0, 1)],
+        )
+        result = TransferLearningAdapter._expand_ssd_to_joint_space(self.adapter, ssd)
+        self.assertEqual(result.feature_names, ["x1", "x2"])
+        self.assertEqual(result.bounds, [(0, 1), (-1, 3)])
+
+    def test_discrete_choices_at_task_idx_raises(self) -> None:
+        type(self.adapter).joint_search_space = PropertyMock(
+            return_value=self._make_joint_ss({"x1": (0, 1), "x2": (0, 1), "x3": (0, 1)})
+        )
+        ssd = SearchSpaceDigest(
+            feature_names=["x1", "x2", "task"],
+            bounds=[(0, 1), (0, 1), (0, 2)],
+            task_features=[2],
+            target_values={2: 0},
+            discrete_choices={2: [0, 1, 2]},
+        )
+        with self.assertRaisesRegex(UnsupportedError, "discrete_choices"):
+            TransferLearningAdapter._expand_ssd_to_joint_space(self.adapter, ssd)
+
+    def test_hierarchical_dependencies_at_task_idx_raises(self) -> None:
+        type(self.adapter).joint_search_space = PropertyMock(
+            return_value=self._make_joint_ss({"x1": (0, 1), "x2": (0, 1), "x3": (0, 1)})
+        )
+        ssd = SearchSpaceDigest(
+            feature_names=["x1", "x2", "task"],
+            bounds=[(0, 1), (0, 1), (0, 2)],
+            task_features=[2],
+            target_values={2: 0},
+            hierarchical_dependencies={2: {0: [1]}},
+        )
+        with self.assertRaisesRegex(UnsupportedError, "hierarchical_dependencies"):
+            TransferLearningAdapter._expand_ssd_to_joint_space(self.adapter, ssd)
+
+    def test_multiple_task_features_raises(self) -> None:
+        type(self.adapter).joint_search_space = PropertyMock(
+            return_value=self._make_joint_ss({"x1": (0, 1), "x2": (0, 1), "x3": (0, 1)})
+        )
+        ssd = SearchSpaceDigest(
+            feature_names=["x1", "task1", "task2"],
+            bounds=[(0, 1), (0, 1), (0, 1)],
+            task_features=[1, 2],
+        )
+        with self.assertRaisesRegex(UnsupportedError, "Multiple task features"):
+            TransferLearningAdapter._expand_ssd_to_joint_space(self.adapter, ssd)
diff --git a/ax/generators/torch/botorch_modular/surrogate.py b/ax/generators/torch/botorch_modular/surrogate.py
@@ -738,11 +738,15 @@ def fit(
                     candidate_metadata=candidate_metadata,
                 )
 
-            # Only update the outcome names and models if the dataset input matches
-            # the feature names from the search space digest. Otherwise we only
-            # keep the model within self._submodels as it may be models fitted on
-            # auxiliary data such as the preference model for BOPE
-            if set(dataset.feature_names) == feature_names_set:
+            # Only update the outcome names and models if the dataset input
+            # matches the feature names from the SSD. In heterogeneous TL,
+            # _expand_ssd_to_joint_space adds source-only features to the SSD,
+            # so the target MultiTaskDataset's feature_names will be a strict
+            # subset -- the missing names are source-only params.
+            if set(dataset.feature_names) == feature_names_set or (
+                isinstance(dataset, MultiTaskDataset)
+                and set(dataset.feature_names).issubset(feature_names_set)
+            ):
                 models.append(model)
                 outcome_names.extend(dataset.outcome_names)