|
10 | 10 | from ax.analysis.summary import Summary |
11 | 11 | from ax.api.client import Client |
12 | 12 | from ax.api.configs import RangeParameterConfig |
13 | | -from ax.core.arm import Arm |
14 | 13 | from ax.core.base_trial import TrialStatus |
15 | 14 | from ax.core.data import Data |
| 15 | +from ax.core.experiment import Experiment |
16 | 16 | from ax.core.metric import Metric |
17 | 17 | from ax.core.objective import MultiObjective, Objective |
18 | 18 | from ax.core.optimization_config import MultiObjectiveOptimizationConfig |
@@ -224,35 +224,97 @@ def test_trial_status_filter(self) -> None: |
224 | 224 | self.assertIn(0, card.df["trial_index"].values) |
225 | 225 | self.assertIn(1, card.df["trial_index"].values) |
226 | 226 |
|
227 | | - def test_compute_with_preference_objective_skips_relativization(self) -> None: |
228 | | - """Summary should skip relativization when a preference metric is an |
229 | | - objective, since binary 0/1 labels have SQ mean near zero which causes |
230 | | - 'mean_control too small' errors.""" |
| 227 | + def _attach_binary_pairwise_data( |
| 228 | + self, experiment: Experiment, pairwise_name: str |
| 229 | + ) -> None: |
| 230 | + """Attach binary 0/1 pairwise data to every trial, with the status-quo |
| 231 | + (control) arm set to 0. A near-zero control mean is exactly what makes |
| 232 | + relativizing this metric crash with 'mean_control too small', so this |
| 233 | + deterministically reproduces the crash unless the preference metric is |
| 234 | + scoped out of relativization.""" |
| 235 | + status_quo_name = none_throws(experiment.status_quo).name |
| 236 | + for trial in experiment.trials.values(): |
| 237 | + arm_names = [arm.name for arm in trial.arms] |
| 238 | + # Control arm gets 0.0 so |mean_control| is below the relativization |
| 239 | + # epsilon; all other arms get 1.0. |
| 240 | + means = [0.0 if name == status_quo_name else 1.0 for name in arm_names] |
| 241 | + experiment.attach_data( |
| 242 | + Data( |
| 243 | + df=pd.DataFrame( |
| 244 | + { |
| 245 | + "arm_name": arm_names, |
| 246 | + "metric_name": [pairwise_name] * len(arm_names), |
| 247 | + "mean": means, |
| 248 | + "sem": [0.0] * len(arm_names), |
| 249 | + "trial_index": [trial.index] * len(arm_names), |
| 250 | + "metric_signature": [pairwise_name] * len(arm_names), |
| 251 | + } |
| 252 | + ) |
| 253 | + ) |
| 254 | + ) |
| 255 | + |
| 256 | + def test_compute_with_preference_objective_per_metric_relativization( |
| 257 | + self, |
| 258 | + ) -> None: |
| 259 | + """Summary with a preference metric objective should relativize only |
| 260 | + non-preference metrics. The preference metric (pairwise_pref_query) |
| 261 | + has binary 0/1 labels with SQ mean near zero -- relativizing it would |
| 262 | + crash with 'mean_control too small'. Non-preference metrics should |
| 263 | + be relativized normally.""" |
231 | 264 | pairwise_name = Keys.PAIRWISE_PREFERENCE_QUERY.value |
232 | 265 |
|
233 | | - # Use Client to set up experiment with SQ and data |
234 | | - client = self.client |
235 | | - client.configure_optimization(objective="foo") |
236 | | - experiment = client._experiment |
237 | | - experiment.status_quo = Arm(parameters={"x1": 0.5, "x2": 0.5}) |
| 266 | + # Use an experiment with BatchTrials and SQ data, which triggers |
| 267 | + # relativization in the Summary. get_branin_experiment_with_status_quo_trials |
| 268 | + # creates BatchTrials with a SQ arm, so data.relativize() has SQ data. |
| 269 | + experiment = get_branin_experiment_with_status_quo_trials() |
238 | 270 |
|
239 | | - # Add pairwise_pref_query as an additional objective |
| 271 | + # Add pairwise_pref_query as an additional objective alongside branin. |
240 | 272 | experiment.add_tracking_metric(Metric(name=pairwise_name)) |
241 | 273 | experiment.optimization_config = MultiObjectiveOptimizationConfig( |
242 | 274 | objective=MultiObjective( |
243 | 275 | objectives=[ |
244 | | - Objective(metric=Metric(name="foo"), minimize=True), |
| 276 | + Objective(metric=experiment.metrics["branin"], minimize=True), |
245 | 277 | Objective(metric=Metric(name=pairwise_name), minimize=False), |
246 | 278 | ] |
247 | 279 | ) |
248 | 280 | ) |
249 | 281 |
|
250 | | - client.get_next_trials(max_trials=1) |
251 | | - client.complete_trial(trial_index=0, raw_data={"foo": 1.0, pairwise_name: 0.0}) |
| 282 | + self._attach_binary_pairwise_data(experiment, pairwise_name) |
252 | 283 |
|
253 | | - # Should succeed without "mean_control too small" error |
| 284 | + # Should succeed without "mean_control too small" crash |
254 | 285 | card = Summary().compute(experiment=experiment) |
255 | | - self.assertNotIn("relativized", card.subtitle) |
| 286 | + |
| 287 | + # Subtitle should indicate relativization (non-preference metrics) |
| 288 | + self.assertIn("relativized", card.subtitle) |
| 289 | + |
| 290 | + # Preference metric column should be dropped from the summary |
| 291 | + self.assertNotIn(pairwise_name, card.df.columns) |
| 292 | + |
| 293 | + def test_compute_with_preference_tracking_metric_and_no_optimization_config( |
| 294 | + self, |
| 295 | + ) -> None: |
| 296 | + """A preference metric attached as a tracking metric (with a status quo |
| 297 | + but no optimization_config) must still be scoped out of relativization. |
| 298 | + Relativization is gated only on metrics/status_quo/step data, not on the |
| 299 | + optimization_config, so without scoping the binary 0/1 preference metric |
| 300 | + (SQ mean ~0) would crash with 'mean_control too small'.""" |
| 301 | + pairwise_name = Keys.PAIRWISE_PREFERENCE_QUERY.value |
| 302 | + |
| 303 | + experiment = get_branin_experiment_with_status_quo_trials() |
| 304 | + # Preference metric is tracking-only; there is no optimization_config. |
| 305 | + experiment.add_tracking_metric(Metric(name=pairwise_name)) |
| 306 | + experiment._optimization_config = None |
| 307 | + |
| 308 | + self._attach_binary_pairwise_data(experiment, pairwise_name) |
| 309 | + |
| 310 | + # Should succeed without "mean_control too small" crash even though |
| 311 | + # there is no optimization_config. |
| 312 | + card = Summary().compute(experiment=experiment) |
| 313 | + |
| 314 | + # branin is still relativized (non-preference metric). |
| 315 | + self.assertIn("relativized", card.subtitle) |
| 316 | + # Preference metric column should be dropped from the summary. |
| 317 | + self.assertNotIn(pairwise_name, card.df.columns) |
256 | 318 |
|
257 | 319 | def test_default_excludes_stale_trials(self) -> None: |
258 | 320 | """Test that Summary defaults to excluding STALE trials.""" |
|
0 commit comments