Runner should only need to know about outcomes, not objectives vs. constraints (facebook#2963)

esantorella · facebook-github-bot · commit eaaea07c12ff · 2024-10-25T05:45:27.000-07:00
Summary:

Context: In theory, a `BenchmarkRunner` should not have to know what metrics are objectives or constraints, and a test function should not have to be aware of that, either. They are just generating data. A `BenchmarkProblem` should only store knowledge of objectives and constraints on the `OptimizationConfig`, so that various `OptimizationConfigs` can be used without changing the runner and test function.

For historical reasons, runners track objectives and constraints separately and add noise to them separately, because this mimics how BoTorch test functions handle this. However, we now can and should isolate the quirks of BoTorch test functions to `BoTorchTestProblem`.

This diff:
* Updates `ParamBasedTestFunction.evaluate_true` to return all outcomes, not just objectives, and gets rid of `ParamBasedTestFunction.evaluate_true`, which was for constraints
* Removes `num_objectives` from `ParamBasedTestProblem`, leaving `ParamBasedTestProblem` with nothing but an `evaluate_true` method
* Removes the argument `constraint_noise_std` from `create_problem_from_botorch` and from `ParamBasedTestProblemRunner`, in favor of just using `noise_std`.

* Updates argument validation

Tangentially related changes:
* For simplicity, makes `get_noise_stds` always return a dict
* Stops allowing `noise_std` to be `None` and defaults it to zero (it was eventually set to zero when it was None in the past)

Differential Revision: D64919207
diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
@@ -301,8 +301,7 @@ def create_problem_from_botorch(
     *,
     test_problem_class: type[BaseTestProblem],
     test_problem_kwargs: dict[str, Any],
-    noise_std: float | list[float] | None = None,
-    constraint_noise_std: float | list[float] | None = None,
+    noise_std: float | list[float] = 0.0,
     num_trials: int,
     lower_is_better: bool = True,
     observe_noise_sd: bool = False,
@@ -321,11 +320,8 @@ def create_problem_from_botorch(
             to define the `search_space`, `optimization_config`, and `runner`.
         test_problem_kwargs: Keyword arguments used to instantiate the
             `test_problem_class`.
-        noise_std: Standard deviation of synthetic noise added to objectives. If
-            `None`, no noise is added. If a float, the same noise level is used
-            for all objectives.
-        constraint_noise_std: Standard deviation of synthetic noise added to
-            constraints.
+        noise_std: Standard deviation of synthetic noise added to outcomes. If a
+            float, the same noise level is used for all objectives.
         lower_is_better: Whether this is a minimization problem. For MOO, this
             applies to all objectives.
         num_trials: Simply the `num_trials` of the `BenchmarkProblem` created.
@@ -392,7 +388,6 @@ def create_problem_from_botorch(
                 param_names=list(search_space.parameters.keys()),
             ),
             noise_std=noise_std,
-            constraint_noise_std=constraint_noise_std,
         ),
         num_trials=num_trials,
         observe_noise_stds=observe_noise_sd,
diff --git a/ax/benchmark/problems/hpo/torchvision.py b/ax/benchmark/problems/hpo/torchvision.py
@@ -118,7 +118,6 @@ def train_and_evaluate(
 @dataclass(kw_only=True)
 class PyTorchCNNTorchvisionParamBasedProblem(ParamBasedTestProblem):
     name: str  # The name of the dataset to load -- MNIST or FashionMNIST
-    num_objectives: int = 1
     device: torch.device = field(
         default_factory=lambda: torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
diff --git a/ax/benchmark/problems/synthetic/hss/jenatton.py b/ax/benchmark/problems/synthetic/hss/jenatton.py
@@ -55,8 +55,6 @@ def jenatton_test_function(
 class Jenatton(ParamBasedTestProblem):
     """Jenatton test function for hierarchical search spaces."""
 
-    num_objectives: int = 1
-
     # pyre-fixme[14]: Inconsistent override
     def evaluate_true(self, params: Mapping[str, float | int | None]) -> torch.Tensor:
         # pyre-fixme: Incompatible parameter type [6]: In call
diff --git a/ax/benchmark/runners/base.py b/ax/benchmark/runners/base.py
@@ -84,7 +84,7 @@ def evaluate_oracle(self, parameters: Mapping[str, TParamValue]) -> ndarray:
         return self.get_Y_true(params=params).numpy()
 
     @abstractmethod
-    def get_noise_stds(self) -> None | float | dict[str, float]:
+    def get_noise_stds(self) -> dict[str, float]:
         """
         Return the standard errors for the synthetic noise to be applied to the
         observed values.
@@ -110,7 +110,9 @@ def run(self, trial: BaseTrial) -> dict[str, Any]:
         Ys, Ystds = {}, {}
         noise_stds = self.get_noise_stds()
 
-        if noise_stds is not None:
+        noiseless = all(v == 0 for v in noise_stds.values())
+
+        if not noiseless:
             # extract arm weights to adjust noise levels accordingly
             if isinstance(trial, BatchTrial):
                 # normalize arm weights (we assume that the noise level is defined)
@@ -122,22 +124,15 @@ def run(self, trial: BaseTrial) -> dict[str, Any]:
             else:
                 nlzd_arm_weights = {checked_cast(Trial, trial).arm: 1.0}
             # generate a tensor of noise levels that we'll reuse below
-            if isinstance(noise_stds, float):
-                noise_stds_tsr = torch.full(
-                    (len(self.outcome_names),),
-                    noise_stds,
-                    dtype=torch.double,
-                )
-            else:
-                noise_stds_tsr = torch.tensor(
-                    [noise_stds[metric_name] for metric_name in self.outcome_names],
-                    dtype=torch.double,
-                )
+            noise_stds_tsr = torch.tensor(
+                [noise_stds[metric_name] for metric_name in self.outcome_names],
+                dtype=torch.double,
+            )
 
         for arm in trial.arms:
             # Case where we do have a ground truth
             Y_true = self.get_Y_true(arm.parameters)
-            if noise_stds is None:
+            if noiseless:
                 # No noise, so just return the true outcome.
                 Ystds[arm.name] = [0.0] * len(Y_true)
                 Ys[arm.name] = Y_true.tolist()
diff --git a/ax/benchmark/runners/botorch_test.py b/ax/benchmark/runners/botorch_test.py
@@ -13,8 +13,6 @@
 import torch
 from ax.benchmark.runners.base import BenchmarkRunner
 from ax.core.types import TParamValue
-from ax.exceptions.core import UnsupportedError
-from botorch.test_functions.multi_objective import MultiObjectiveTestProblem
 from botorch.test_functions.synthetic import BaseTestProblem, ConstrainedBaseTestProblem
 from botorch.utils.transforms import normalize, unnormalize
 from torch import Tensor
@@ -28,17 +26,15 @@ class ParamBasedTestProblem(ABC):
     (Noise - if desired - is added by the runner.)
     """
 
-    num_objectives: int
-
     @abstractmethod
     def evaluate_true(self, params: Mapping[str, TParamValue]) -> Tensor:
-        """Evaluate noiselessly."""
-        ...
+        """
+        Evaluate noiselessly.
 
-    def evaluate_slack_true(self, params: Mapping[str, TParamValue]) -> Tensor:
-        raise NotImplementedError(
-            f"{self.__class__.__name__} does not support constraints."
-        )
+        Returns:
+            1d tensor of shape (num_outcomes,).
+        """
+        ...
 
 
 @dataclass(kw_only=True)
@@ -57,24 +53,18 @@ class BoTorchTestProblem(ParamBasedTestProblem):
             5 will correspond to 0.5 while evaluating the test problem.
             If modified bounds are not provided, the test problem will be
             evaluated using the raw parameter values.
-        num_objectives: The number of objectives.
     """
 
     botorch_problem: BaseTestProblem
     modified_bounds: list[tuple[float, float]] | None = None
-    num_objectives: int = 1
 
     def __post_init__(self) -> None:
-        if isinstance(self.botorch_problem, MultiObjectiveTestProblem):
-            self.num_objectives = self.botorch_problem.num_objectives
-        if self.botorch_problem.noise_std is not None:
-            raise ValueError(
-                "noise_std should be set on the runner, not the test problem."
-            )
-        if getattr(self.botorch_problem, "constraint_noise_std", None) is not None:
+        if (
+            self.botorch_problem.noise_std is not None
+            or getattr(self.botorch_problem, "constraint_noise_std", None) is not None
+        ):
             raise ValueError(
-                "constraint_noise_std should be set on the runner, not the test "
-                "problem."
+                "noise should be set on the `BenchmarkRunner`, not the test function."
             )
         self.botorch_problem = self.botorch_problem.to(dtype=torch.double)
 
@@ -96,20 +86,11 @@ def tensorize_params(self, params: Mapping[str, int | float]) -> torch.Tensor:
     # pyre-fixme [14]: inconsistent override
     def evaluate_true(self, params: Mapping[str, float | int]) -> torch.Tensor:
         x = self.tensorize_params(params=params)
-        return self.botorch_problem(x)
-
-    # pyre-fixme [14]: inconsistent override
-    def evaluate_slack_true(self, params: Mapping[str, float | int]) -> torch.Tensor:
-        if not isinstance(self.botorch_problem, ConstrainedBaseTestProblem):
-            raise UnsupportedError(
-                "`evaluate_slack_true` is only supported when the BoTorch "
-                "problem is a `ConstrainedBaseTestProblem`."
-            )
-        # todo: could return x so as to not recompute
-        # or could do both methods together, track indices of outcomes,
-        # and only negate the non-constraints
-        x = self.tensorize_params(params=params)
-        return self.botorch_problem.evaluate_slack_true(x)
+        objectives = self.botorch_problem(x).view(-1)
+        if isinstance(self.botorch_problem, ConstrainedBaseTestProblem):
+            constraints = self.botorch_problem.evaluate_slack_true(x).view(-1)
+            return torch.cat([objectives, constraints], dim=-1)
+        return objectives
 
 
 @dataclass(kw_only=True)
@@ -119,7 +100,7 @@ class ParamBasedTestProblemRunner(BenchmarkRunner):
 
     Given a trial, the Runner will use its `test_problem` to evaluate the
     problem noiselessly for each arm in the trial, and then add noise as
-    specified by the `noise_std` and `constraint_noise_std`. It will return
+    specified by the `noise_std`. It will return
     metadata including the outcome names and values of metrics.
 
     Args:
@@ -132,64 +113,26 @@ class ParamBasedTestProblemRunner(BenchmarkRunner):
     """
 
     test_problem: ParamBasedTestProblem
-    noise_std: float | list[float] | None = None
-    constraint_noise_std: float | list[float] | None = None
+    noise_std: float | list[float] | dict[str, float] = 0.0
 
-    @property
-    def _is_constrained(self) -> bool:
-        return isinstance(self.test_problem, BoTorchTestProblem) and isinstance(
-            self.test_problem.botorch_problem, ConstrainedBaseTestProblem
-        )
-
-    def get_noise_stds(self) -> None | float | dict[str, float]:
+    def get_noise_stds(self) -> dict[str, float]:
         noise_std = self.noise_std
-        noise_std_dict: dict[str, float] = {}
-        num_obj = self.test_problem.num_objectives
-
-        # populate any noise_stds for constraints
-        if self._is_constrained:
-            constraint_noise_std = self.constraint_noise_std
-            if isinstance(constraint_noise_std, list):
-                for i, cns in enumerate(constraint_noise_std, start=num_obj):
-                    if cns is not None:
-                        noise_std_dict[self.outcome_names[i]] = cns
-            elif constraint_noise_std is not None:
-                noise_std_dict[self.outcome_names[num_obj]] = constraint_noise_std
-
-        # if none of the constraints are subject to noise, then we may return
-        # a single float or None for the noise level
-
-        if not noise_std_dict and not isinstance(noise_std, list):
-            return noise_std  # either a float or None
-
-        if isinstance(noise_std, list):
-            if not len(noise_std) == num_obj:
-                # this shouldn't be possible due to validation upon construction
-                # of the multi-objective problem, but better safe than sorry
+        if isinstance(noise_std, float):
+            return {name: noise_std for name in self.outcome_names}
+        elif isinstance(noise_std, dict):
+            if not set(noise_std.keys()) == set(self.outcome_names):
                 raise ValueError(
-                    "Noise std must have length equal to number of objectives."
+                    "Noise std must have keys equal to outcome names if given as "
+                    "a dict."
                 )
-        else:
-            noise_std = [noise_std for _ in range(num_obj)]
-
-        for i, noise_std_ in enumerate(noise_std):
-            if noise_std_ is not None:
-                noise_std_dict[self.outcome_names[i]] = noise_std_
-
-        return noise_std_dict
+            return noise_std
+        # list of floats
+        return dict(zip(self.outcome_names, noise_std, strict=True))
 
     def get_Y_true(self, params: Mapping[str, TParamValue]) -> Tensor:
         """Evaluates the test problem.
 
         Returns:
-            A `batch_shape x m`-dim tensor of ground truth (noiseless) evaluations.
+            An `m`-dim tensor of ground truth (noiseless) evaluations.
         """
-        Y_true = self.test_problem.evaluate_true(params).view(-1)
-        if self._is_constrained:
-            # Convention: Concatenate objective and black box constraints. `view()`
-            # makes the inputs 1d, so the resulting `Y_true` are also 1d.
-            Y_true = torch.cat(
-                [Y_true, self.test_problem.evaluate_slack_true(params).view(-1)],
-                dim=-1,
-            )
-        return Y_true
+        return torch.atleast_1d(self.test_problem.evaluate_true(params=params))
diff --git a/ax/benchmark/runners/surrogate.py b/ax/benchmark/runners/surrogate.py
@@ -84,8 +84,11 @@ def datasets(self) -> list[SupervisedDataset]:
             self.set_surrogate_and_datasets()
         return none_throws(self._datasets)
 
-    def get_noise_stds(self) -> None | float | dict[str, float]:
-        return self.noise_stds
+    def get_noise_stds(self) -> dict[str, float]:
+        noise_std = self.noise_stds
+        if isinstance(noise_std, float):
+            return {name: noise_std for name in self.outcome_names}
+        return noise_std
 
     # pyre-fixme[14]: Inconsistent override
     def get_Y_true(self, params: Mapping[str, float | int]) -> Tensor:
diff --git a/ax/benchmark/tests/runners/test_botorch_test_problem.py b/ax/benchmark/tests/runners/test_botorch_test_problem.py
diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py