Ax/ax/analysis/plotly/tests/test_cross_validation.py at 287c7a8be91ee47b3a3a78f0ad3625d6549db5ff · bletham/Ax · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

from unittest import mock

from ax.adapter.cross_validation import cross_validate
from ax.adapter.registry import Generators
from ax.analysis.plotly.cross_validation import (
    compute_cross_validation_adhoc,
    CrossValidationPlot,
)
from ax.core.trial import Trial
from ax.exceptions.core import UserInputError
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.utils.common.testutils import TestCase
from ax.utils.testing.core_stubs import get_offline_experiments, get_online_experiments
from ax.utils.testing.mock import mock_botorch_optimize
from ax.utils.testing.modeling_stubs import get_default_generation_strategy_at_MBM_node
from pyre_extensions import assert_is_instance, none_throws


class TestCrossValidationPlot(TestCase):
    @mock_botorch_optimize
    def setUp(self) -> None:
        super().setUp()
        self.client = AxClient()
        self.client.create_experiment(
            is_test=True,
            name="foo",
            parameters=[
                {
                    "name": "x",
                    "type": "range",
                    "bounds": [-1.0, 1.0],
                }
            ],
            objectives={"bar": ObjectiveProperties(minimize=True)},
            tracking_metric_names=["foo"],
        )

        for _ in range(10):
            parameterization, trial_index = self.client.get_next_trial()
            self.client.complete_trial(
                trial_index=trial_index,
                raw_data={
                    "bar": parameterization["x"] ** 2,
                    "foo": parameterization["x"] ** 3,
                },
            )

    @mock.patch(
        "ax.analysis.plotly.cross_validation.coefficient_of_determination",
        return_value=0.85,
    )
    def test_compute(self, mock_r2: mock.Mock) -> None:
        analysis = CrossValidationPlot(metric_names=["bar"])

        # Test that it fails if no GenerationStrategy is provided
        with self.assertRaisesRegex(
            UserInputError, "Must provide either a GenerationStrategy or an Adapter"
        ):
            analysis.compute()

        cards = analysis.compute(
            generation_strategy=self.client.generation_strategy
        ).flatten()
        # Should have the CV plot card and the R2 summary card
        self.assertEqual(len(cards), 2)
        card = cards[0]
        self.assertEqual(
            card.name,
            "CrossValidationPlot",
        )
        self.assertEqual(
            card.title,
            "Cross Validation for bar (R\u00b2 = 0.85)",
        )
        self.assertEqual(
            card.subtitle,
            (
                "The cross-validation plot displays the model fit for each "
                "metric in the experiment. It employs a leave-one-out "
                "approach, where the model is trained on all data except one "
                "sample, which is used for validation. The plot shows the "
                "predicted outcome for the validation set on the y-axis against "
                "its actual value on the x-axis. Points that align closely with "
                "the dotted diagonal line indicate a strong model fit, signifying "
                "accurate predictions. Additionally, the plot includes 95% "
                "confidence intervals that provide insight into the noise in "
                "observations and the uncertainty in model predictions. A "
                "horizontal, flat line of predictions indicates that the model "
                "has not picked up on sufficient signal in the data, and instead "
                "is just predicting the mean."
            ),
        )
        self.assertEqual(
            {*card.df.columns},
            {
                "arm_name",
                "observed",
                "observed_95_ci",
                "predicted",
                "predicted_95_ci",
            },
        )
        self.assertIsNotNone(card.blob)

        # Assert that _r2s is populated after compute
        self.assertIn("bar", analysis._r2s)
        self.assertAlmostEqual(analysis._r2s["bar"], 0.85)

        # Assert the R2 summary card
        r2_card = cards[1]
        self.assertEqual(r2_card.name, "CrossValidationPlot")
        self.assertEqual(r2_card.title, "Summary of model fits")

        # Assert that all arms are in the cross validation df
        # because trial index is not specified
        for t in self.client.experiment.trials.values():
            # Skip the last trial because the model was used to generate it
            # and therefore hasn't observed it
            if t.index == max(self.client.experiment.trials.keys()):
                continue
            arm_name = none_throws(assert_is_instance(t, Trial).arm).name
            self.assertIn(
                arm_name,
                card.df["arm_name"].unique(),
            )

    def test_it_can_specify_trial_index_correctly(self) -> None:
        analysis = CrossValidationPlot(metric_names=["bar"], trial_index=9)
        cards = analysis.compute(
            generation_strategy=self.client.generation_strategy
        ).flatten()
        card = cards[0]
        for t in self.client.experiment.trials.values():
            # Skip the last trial because the model was used to generate it
            # and therefore hasn't observed it
            if t.index == max(self.client.experiment.trials.keys()):
                continue
            arm_name = none_throws(assert_is_instance(t, Trial).arm).name
            self.assertIn(
                arm_name,
                card.df["arm_name"].unique(),
            )

    @mock.patch(
        "ax.analysis.plotly.cross_validation.cross_validate", wraps=cross_validate
    )
    def test_cross_validate_is_called_once_with_multiple_metrics(
        self, mock_cross_validate: mock.Mock
    ) -> None:
        analysis = CrossValidationPlot()
        analysis.compute(generation_strategy=self.client.generation_strategy)
        mock_cross_validate.assert_called_once()

    @mock_botorch_optimize
    @mock.patch(
        "ax.analysis.plotly.cross_validation.coefficient_of_determination",
        return_value=0.85,
    )
    def test_compute_adhoc(self, mock_r2: mock.Mock) -> None:
        metric_mapping = {"bar": "spunky", "foo": "foo2"}
        data = self.client.experiment.lookup_data()
        adapter = Generators.BOTORCH_MODULAR(
            experiment=self.client.experiment, data=data
        )
        cards = compute_cross_validation_adhoc(
            adapter=adapter, labels=metric_mapping
        ).flatten()
        self.assertEqual(len(cards), 3)
        titles = {
            "Cross Validation for spunky (R\u00b2 = 0.85)",
            "Cross Validation for foo2 (R\u00b2 = 0.85)",
        }
        for card in cards[:2]:
            self.assertEqual(card.name, "CrossValidationPlot")
            self.assertIn(card.title, titles)
            titles.remove(card.title)
        # The last card is the R2 summary
        self.assertEqual(cards[2].title, "Summary of model fits")

    @TestCase.ax_long_test(
        reason=(
            "cross_validate still too slow under @mock_botorch_optimize for this test"
        )
    )
    @mock_botorch_optimize
    def test_online(self) -> None:
        # Test CrossValidationPlot can be computed for a variety of experiments which
        # resemble those we see in an online setting.

        for experiment in get_online_experiments():
            for untransform in [True, False]:
                for refined_metric_name in [None, "foo"]:
                    generation_strategy = get_default_generation_strategy_at_MBM_node(
                        experiment=experiment
                    )

                    # Pick an arbitrary metric from the experiment's optimization config
                    metric_name = none_throws(
                        experiment.optimization_config
                    ).objective.metric_names[0]

                    analysis = CrossValidationPlot(
                        metric_names=[metric_name],
                        untransform=untransform,
                        labels={metric_name: refined_metric_name}
                        if refined_metric_name
                        else None,
                    )

                    _ = analysis.compute(
                        experiment=experiment, generation_strategy=generation_strategy
                    )

    @TestCase.ax_long_test(
        reason=(
            "cross_validate still too slow under @mock_botorch_optimize for this test"
        )
    )
    @mock_botorch_optimize
    def test_offline(self) -> None:
        # Test CrossValidationPlot can be computed for a variety of experiments which
        # resemble those we see in an online setting.

        for experiment in get_offline_experiments():
            for untransform in [True, False]:
                for refined_metric_name in [None, "foo"]:
                    generation_strategy = get_default_generation_strategy_at_MBM_node(
                        experiment=experiment
                    )

                    # Pick an arbitrary metric from the experiment's optimization config
                    metric_name = none_throws(
                        experiment.optimization_config
                    ).objective.metric_names[0]

                    analysis = CrossValidationPlot(
                        metric_names=[metric_name],
                        untransform=untransform,
                        labels={metric_name: refined_metric_name}
                        if refined_metric_name
                        else None,
                    )

                    _ = analysis.compute(
                        experiment=experiment, generation_strategy=generation_strategy
                    )