Skip to content

Commit 0759d38

Browse files
esantorellafacebook-github-bot
authored andcommitted
Add unit test for LCBench early stopping problem; add to registry; remove metric_name (#3365)
Summary: Pull Request resolved: #3365 - Add LCBench problems to the registry - Remove 'metric_name' argument to early stopping problem; the unit test revealed that this argument wasn't actually working, since the outcome names on the optimization config didn't match the outcome names on the test function. - Combined the baseline values for the transfer-learning and early-stopping problems since they are the same whenever they are both present. Reviewed By: ltiao Differential Revision: D69615360 fbshipit-source-id: 09106031d334bd899909208b6b9af395d70ff3cd
1 parent fe7fd5a commit 0759d38

File tree

8 files changed

+166
-101
lines changed

8 files changed

+166
-101
lines changed

ax/benchmark/problems/registry.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
get_pytorch_cnn_torchvision_benchmark_problem,
1717
)
1818
from ax.benchmark.problems.runtime_funcs import int_from_params
19+
from ax.benchmark.problems.surrogate.lcbench.early_stopping import (
20+
get_lcbench_early_stopping_benchmark_problem,
21+
)
22+
from ax.benchmark.problems.surrogate.lcbench.transfer_learning import (
23+
get_lcbench_benchmark_problem,
24+
)
1925
from ax.benchmark.problems.synthetic.bandit import get_bandit_problem
2026
from ax.benchmark.problems.synthetic.discretized.mixed_integer import (
2127
get_discrete_ackley,
@@ -147,6 +153,13 @@ class BenchmarkProblemRegistryEntry:
147153
factory_fn=get_jenatton_benchmark_problem,
148154
factory_kwargs={"num_trials": 50, "observe_noise_sd": False},
149155
),
156+
"LCBench:v1 Fashion-MNIST": BenchmarkProblemRegistryEntry(
157+
get_lcbench_benchmark_problem, factory_kwargs={"dataset_name": "Fashion-MNIST"}
158+
),
159+
"LCBench Early Stopping Fashion-MNIST": BenchmarkProblemRegistryEntry(
160+
get_lcbench_early_stopping_benchmark_problem,
161+
factory_kwargs={"dataset_name": "Fashion-MNIST"},
162+
),
150163
"levy4": BenchmarkProblemRegistryEntry(
151164
factory_fn=create_problem_from_botorch,
152165
factory_kwargs={

ax/benchmark/problems/surrogate/lcbench/data.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ def url(self) -> str:
9090

9191
@dataclass(kw_only=True)
9292
class LCBenchData:
93+
"""
94+
Args:
95+
parameter_df: DataFrame with columns corresponding to the names of the
96+
parameters in get_lcbench_parameter_names().
97+
metric_series: Series of metric values with index names "trial" and "epoch".
98+
timestamp_series: Series of timestamps with index name "trial".
99+
"""
100+
93101
parameter_df: pd.DataFrame
94102
metric_series: pd.Series
95103
timestamp_series: pd.Series

ax/benchmark/problems/surrogate/lcbench/early_stopping.py

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
)
2424
from ax.benchmark.problems.surrogate.lcbench.transfer_learning import DEFAULT_NUM_TRIALS
2525
from ax.benchmark.problems.surrogate.lcbench.utils import (
26+
BASELINE_VALUES,
2627
DEFAULT_METRIC_NAME,
2728
get_lcbench_log_scale_parameter_names,
2829
get_lcbench_optimization_config,
@@ -45,43 +46,7 @@
4546

4647
TRegressorProtocol = TypeVar("TRegressorProtocol", bound="RegressorProtocol")
4748

48-
BASELINE_VALUES: dict[str, float] = {
49-
"APSFailure": 97.75948131763847,
50-
"Amazon_employee_access": 93.39364177908142,
51-
"Australian": 88.1445880383116,
52-
"Fashion-MNIST": 84.75904272864778,
53-
"KDDCup09_appetency": 96.13544312868322,
54-
"MiniBooNE": 85.8639428612948,
55-
"adult": 79.50334987749676,
56-
"airlines": 58.96099030718572,
57-
"albert": 63.885932360810884,
58-
"bank-marketing": 83.72755317459641,
59-
"blood-transfusion-service-center": 62.651717620524835,
60-
"car": 78.59464531457958,
61-
"christine": 72.22719165860138,
62-
"cnae-9": 92.24923138962973,
63-
"connect-4": 63.808749677494774,
64-
"covertype": 61.61393200315512,
65-
"credit-g": 70.45312807563056,
66-
"dionis": 53.71071232033245,
67-
"fabert": 64.44304132875557,
68-
"helena": 18.239085505279544,
69-
"higgs": 64.74999655474926,
70-
"jannis": 57.82155396833136,
71-
"jasmine": 80.48475426337272,
72-
"jungle_chess_2pcs_raw_endgame_complete": 65.58537332961572,
73-
"kc1": 77.28692486000287,
74-
"kr-vs-kp": 93.63368446446995,
75-
"mfeat-factors": 94.72758417873838,
76-
"nomao": 93.73968374826451,
77-
"numerai28.6": 51.60281273196557,
78-
"phoneme": 75.20979771001986,
79-
"segment": 78.81992685291081,
80-
"shuttle": 96.45744339531132,
81-
"sylvine": 91.15923021902736,
82-
"vehicle": 67.40729695042013,
83-
"volkert": 49.204981948803855,
84-
}
49+
8550
OPTIMAL_VALUES: dict[str, float] = {
8651
"APSFailure": 98.97643280029295,
8752
"Amazon_employee_access": 94.1208953857422,
@@ -256,18 +221,17 @@ def __post_init__(
256221

257222
def evaluate_true(self, params: Mapping[str, TParamValue]) -> torch.Tensor:
258223
X = pd.DataFrame.from_records(data=[params])
259-
Y = self.metric_surrogate.predict(X) # shape: (1, 50)
224+
Y = self.metric_surrogate.predict(X=X) # shape: (1, 50)
260225
return torch.from_numpy(Y)
261226

262227
def step_runtime(self, params: Mapping[str, TParamValue]) -> float:
263228
X = pd.DataFrame.from_records(data=[params])
264-
Y = self.runtime_surrogate.predict(X) # shape: (1,)
229+
Y = self.runtime_surrogate.predict(X=X) # shape: (1,)
265230
return Y.item()
266231

267232

268233
def get_lcbench_early_stopping_benchmark_problem(
269234
dataset_name: str,
270-
metric_name: str = DEFAULT_METRIC_NAME,
271235
num_trials: int = DEFAULT_NUM_TRIALS,
272236
constant_step_runtime: bool = False,
273237
noise_std: Mapping[str, float] | float = 0.0,
@@ -279,7 +243,6 @@ def get_lcbench_early_stopping_benchmark_problem(
279243
Args:
280244
dataset_name: Must be one of the keys of `DEFAULT_AND_OPTIMAL_VALUES`, which
281245
correspond to the names of the datasets available in LCBench.
282-
metric_name: The name of the metric to use for the objective.
283246
num_trials: The number of optimization trials to run.
284247
constant_step_runtime: Determines if the step runtime is fixed or varies
285248
based on the hyperparameters.
@@ -296,14 +259,14 @@ def get_lcbench_early_stopping_benchmark_problem(
296259
if dataset_name not in DATASET_NAMES:
297260
raise UserInputError(f"`dataset_name` must be one of {sorted(DATASET_NAMES)}")
298261

299-
name = f"LCBench_Surrogate_{dataset_name}_{metric_name}:v1"
262+
name = f"LCBench_Surrogate_{dataset_name}:v1"
300263

301264
optimal_value = OPTIMAL_VALUES[dataset_name]
302265
baseline_value = BASELINE_VALUES[dataset_name]
303266

304267
search_space: SearchSpace = get_lcbench_search_space()
305268
optimization_config: OptimizationConfig = get_lcbench_optimization_config(
306-
metric_name=metric_name,
269+
metric_name=DEFAULT_METRIC_NAME,
307270
observe_noise_sd=observe_noise_sd,
308271
use_map_metric=True,
309272
)

ax/benchmark/problems/surrogate/lcbench/transfer_learning.py

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,11 @@
1414
from ax.benchmark.benchmark_problem import BenchmarkProblem
1515
from ax.benchmark.benchmark_test_functions.surrogate import SurrogateTestFunction
1616
from ax.benchmark.problems.surrogate.lcbench.utils import (
17+
BASELINE_VALUES,
1718
DEFAULT_METRIC_NAME,
1819
get_lcbench_optimization_config,
19-
get_lcbench_search_space,
2020
)
21-
from ax.core.experiment import Experiment
2221
from ax.core.optimization_config import OptimizationConfig
23-
from ax.core.search_space import SearchSpace
2422
from ax.exceptions.core import UserInputError
2523
from ax.modelbridge.registry import Cont_X_trans, Generators, Y_trans
2624
from ax.modelbridge.torch import TorchAdapter
@@ -35,30 +33,7 @@
3533

3634
DEFAULT_NUM_TRIALS: int = 30
3735

38-
BASELINE_VALUES: dict[str, float] = {
39-
"KDDCup09_appetency": 94.84762378096477,
40-
"APSFailure": 97.75754021610224,
41-
"albert": 63.893807756587876,
42-
"Amazon_employee_access": 93.92434556024065,
43-
"Australian": 89.35657945184583,
44-
"Fashion-MNIST": 84.94202558279305,
45-
"car": 80.47958436427733,
46-
"christine": 72.27323565977512,
47-
"cnae-9": 94.15832149950144,
48-
"covertype": 61.552294168420595,
49-
"dionis": 54.99212355534204,
50-
"fabert": 64.88207128531921,
51-
"helena": 19.156010689783603,
52-
"higgs": 64.84690723875762,
53-
"jannis": 57.58628096200955,
54-
"jasmine": 80.6321652907534,
55-
"kr-vs-kp": 94.53560263952683,
56-
"mfeat-factors": 95.58423367904923,
57-
"nomao": 93.51402242799601,
58-
"shuttle": 96.43481523407816,
59-
"sylvine": 91.91719206036713,
60-
"volkert": 49.50686237250762,
61-
}
36+
6237
DEFAULT_AND_OPTIMAL_VALUES: dict[str, tuple[float, float]] = {
6338
"KDDCup09_appetency": (87.14437173839048, 100.41903197808242),
6439
"APSFailure": (97.3412499690734, 98.38099041845653),
@@ -85,36 +60,6 @@
8560
}
8661

8762

88-
def get_lcbench_experiment(
89-
metric_name: str = DEFAULT_METRIC_NAME,
90-
observe_noise_stds: bool = False,
91-
) -> Experiment:
92-
"""Construct an experiment with the LCBench search space and optimization config.
93-
Used in N5808878 to fit the initial surrogate, and may be useful for the setup
94-
of transfer learning experiments.
95-
96-
Args:
97-
observe_noise_stds: Whether or not the magnitude of the observation noise
98-
is known.
99-
metric_name: The name of the metric to use for the objective.
100-
101-
Returns:
102-
An experiment with the LCBench search space and optimization config.
103-
"""
104-
105-
search_space: SearchSpace = get_lcbench_search_space()
106-
optimization_config: OptimizationConfig = get_lcbench_optimization_config(
107-
metric_name=metric_name,
108-
observe_noise_sd=observe_noise_stds,
109-
use_map_metric=False,
110-
)
111-
112-
experiment = Experiment(
113-
search_space=search_space, optimization_config=optimization_config
114-
)
115-
return experiment
116-
117-
11863
def get_lcbench_surrogate() -> Surrogate:
11964
"""Construct a surrogate used to fit the LCBench data.
12065

ax/benchmark/problems/surrogate/lcbench/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,44 @@
1313

1414
DEFAULT_METRIC_NAME: str = "Train/val_accuracy"
1515

16+
BASELINE_VALUES: dict[str, float] = {
17+
"APSFailure": 97.75948131763847,
18+
"Amazon_employee_access": 93.39364177908142,
19+
"Australian": 88.1445880383116,
20+
"Fashion-MNIST": 84.75904272864778,
21+
"KDDCup09_appetency": 96.13544312868322,
22+
"MiniBooNE": 85.8639428612948,
23+
"adult": 79.50334987749676,
24+
"airlines": 58.96099030718572,
25+
"albert": 63.885932360810884,
26+
"bank-marketing": 83.72755317459641,
27+
"blood-transfusion-service-center": 62.651717620524835,
28+
"car": 78.59464531457958,
29+
"christine": 72.22719165860138,
30+
"cnae-9": 92.24923138962973,
31+
"connect-4": 63.808749677494774,
32+
"covertype": 61.61393200315512,
33+
"credit-g": 70.45312807563056,
34+
"dionis": 53.71071232033245,
35+
"fabert": 64.44304132875557,
36+
"helena": 18.239085505279544,
37+
"higgs": 64.74999655474926,
38+
"jannis": 57.82155396833136,
39+
"jasmine": 80.48475426337272,
40+
"jungle_chess_2pcs_raw_endgame_complete": 65.58537332961572,
41+
"kc1": 77.28692486000287,
42+
"kr-vs-kp": 93.63368446446995,
43+
"mfeat-factors": 94.72758417873838,
44+
"nomao": 93.73968374826451,
45+
"numerai28.6": 51.60281273196557,
46+
"phoneme": 75.20979771001986,
47+
"segment": 78.81992685291081,
48+
"shuttle": 96.45744339531132,
49+
"sylvine": 91.15923021902736,
50+
"vehicle": 67.40729695042013,
51+
"volkert": 49.204981948803855,
52+
}
53+
1654

1755
def get_lcbench_search_space() -> SearchSpace:
1856
"""Construct the LCBench search space."""
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# pyre-strict
7+
8+
from unittest.mock import patch
9+
10+
from ax.benchmark.problems.surrogate.lcbench.early_stopping import (
11+
BASELINE_VALUES,
12+
get_lcbench_early_stopping_benchmark_problem,
13+
OPTIMAL_VALUES,
14+
)
15+
from ax.benchmark.problems.surrogate.lcbench.utils import DEFAULT_METRIC_NAME
16+
from ax.utils.common.testutils import TestCase
17+
from ax.utils.testing.benchmark_stubs import get_mock_lcbench_data
18+
19+
20+
class TestEarlyStoppingProblem(TestCase):
21+
def test_get_lcbench_early_stopping_problem(self) -> None:
22+
# Just test one problem for speed. We are mocking out the data load
23+
# anyway, so there is nothing to distinguish these problems from each
24+
# other
25+
26+
observe_noise_sd = True
27+
num_trials = 4
28+
noise_std = 1.0
29+
seed = 27
30+
dataset_name = "credit-g"
31+
32+
early_stopping_path = get_lcbench_early_stopping_benchmark_problem.__module__
33+
with patch(
34+
f"{early_stopping_path}.load_lcbench_data",
35+
return_value=get_mock_lcbench_data(),
36+
) as mock_load_lcbench_data, patch(
37+
# Fitting a surrogate won't work with this small synthetic data
38+
f"{early_stopping_path}._create_surrogate_regressor"
39+
) as mock_create_surrogate_regressor:
40+
problem = get_lcbench_early_stopping_benchmark_problem(
41+
dataset_name=dataset_name,
42+
observe_noise_sd=observe_noise_sd,
43+
num_trials=num_trials,
44+
constant_step_runtime=True,
45+
noise_std=noise_std,
46+
seed=seed,
47+
)
48+
49+
mock_load_lcbench_data.assert_called_once()
50+
mock_load_lcbench_data_kwargs = mock_load_lcbench_data.call_args.kwargs
51+
self.assertEqual(mock_load_lcbench_data_kwargs["dataset_name"], dataset_name)
52+
create_surrogate_regressor_call_args = (
53+
mock_create_surrogate_regressor.call_args_list
54+
)
55+
self.assertEqual(len(create_surrogate_regressor_call_args), 2)
56+
self.assertEqual(create_surrogate_regressor_call_args[0].kwargs["seed"], seed)
57+
self.assertEqual(problem.noise_std, noise_std)
58+
self.assertEqual(
59+
problem.optimization_config.objective.metric.name, DEFAULT_METRIC_NAME
60+
)
61+
self.assertIsNone(problem.step_runtime_function)
62+
self.assertEqual(problem.optimal_value, OPTIMAL_VALUES[dataset_name])
63+
self.assertEqual(problem.baseline_value, BASELINE_VALUES[dataset_name])

ax/benchmark/tests/problems/test_problems.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66
# pyre-strict
77

88

9+
from unittest.mock import patch
10+
911
from ax.benchmark.benchmark_problem import BenchmarkProblem
1012
from ax.benchmark.problems.registry import BENCHMARK_PROBLEM_REGISTRY, get_problem
1113
from ax.benchmark.problems.runtime_funcs import int_from_params
1214
from ax.utils.common.testutils import TestCase
15+
from ax.utils.testing.benchmark_stubs import get_mock_lcbench_data
1316

1417

1518
class TestProblems(TestCase):
@@ -19,7 +22,13 @@ def test_load_problems(self) -> None:
1922
if "MNIST" in name:
2023
continue # Skip these as they cause the test to take a long time
2124

22-
problem = get_problem(problem_key=name)
25+
# Avoid downloading data from the internet
26+
with patch(
27+
"ax.benchmark.problems.surrogate."
28+
"lcbench.early_stopping.load_lcbench_data",
29+
return_value=get_mock_lcbench_data(),
30+
):
31+
problem = get_problem(problem_key=name)
2332
self.assertIsInstance(problem, BenchmarkProblem, msg=name)
2433

2534
def test_name(self) -> None:

0 commit comments

Comments
 (0)