Skip to content

Commit 187df24

Browse files
shrutipatel31facebook-github-bot
authored andcommitted
Use WARNING instead of FAIL for Baseline Improvement Healthcheck (#4883)
Summary: The baseline improvement healthcheck currently marks the result as FAIL when no objectives improve over baseline, which can be overly alarming and may confuse our users unnecessarily. Lack of improvement is informational the experiment is still valid, just not yet showing gains so WARNING is more appropriate. Also updated the subtitle logic to branch on `num_improved` counts instead of status, since both partial and no improvement now share the same WARNING status. This also allowed simplifying the status assignment to a single ternary (PASS if all improved, WARNING otherwise) and removing the now-unnecessary status parameter from `_build_subtitle`. Reviewed By: bernardbeckerman Differential Revision: D92871561
1 parent 541c458 commit 187df24

2 files changed

Lines changed: 13 additions & 17 deletions

File tree

ax/analysis/healthcheck/baseline_improvement.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ class BaselineImprovementAnalysis(Analysis):
4141
4242
Status Logic:
4343
- PASS: All objectives improved over baseline
44-
- WARNING: Some objectives improved over baseline
45-
- FAIL: No objectives improved over baseline
44+
- WARNING: One or more objectives did not improve over baseline
4645
4746
The healthcheck evaluates improvement by:
4847
1. Identifying a baseline arm (explicit, status quo, or first trial)
@@ -231,20 +230,18 @@ def compute(
231230
num_improved = len(improved)
232231
num_total = len(comparison_list)
233232

234-
if num_improved == num_total:
235-
status = HealthcheckStatus.PASS
236-
elif num_improved > 0:
237-
status = HealthcheckStatus.WARNING
238-
else:
239-
status = HealthcheckStatus.FAIL
233+
status = (
234+
HealthcheckStatus.PASS
235+
if num_improved == num_total
236+
else HealthcheckStatus.WARNING
237+
)
240238

241239
# Build subtitle
242240
subtitle = self._build_subtitle(
243241
num_improved=num_improved,
244242
num_total=num_total,
245243
not_improved=not_improved,
246244
details=details,
247-
status=status,
248245
baseline_arm_name=baseline_arm_name,
249246
auto_selected_from_first_arm=auto_selected_from_first_arm,
250247
baseline_in_design=experiment.search_space.check_membership(
@@ -325,7 +322,6 @@ def _build_subtitle(
325322
num_total: int,
326323
not_improved: list[str],
327324
details: list[dict[str, str]],
328-
status: HealthcheckStatus,
329325
baseline_arm_name: str,
330326
auto_selected_from_first_arm: bool,
331327
baseline_in_design: bool,
@@ -334,9 +330,9 @@ def _build_subtitle(
334330
parts: list[str] = []
335331

336332
# Status summary
337-
if status == HealthcheckStatus.PASS:
333+
if num_improved == num_total:
338334
parts.append(f"All {num_total} objective(s) improved over baseline.")
339-
elif status == HealthcheckStatus.WARNING:
335+
elif num_improved > 0:
340336
parts.append(
341337
f"{num_improved} out of {num_total} objective(s) "
342338
"improved over baseline. The following metrics were not improved: "

ax/analysis/healthcheck/tests/test_baseline_improvement.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ def _attach_data(
6161
exp.attach_data(Data(df=pd.DataFrame(rows)))
6262

6363
def test_status_outcomes(self) -> None:
64-
"""Test PASS/FAIL status based on improvement."""
64+
"""Test PASS/WARNING status based on improvement."""
6565
# minimize=True: lower is better
6666
test_cases = [
6767
# (baseline_mean, comparison_mean, expected_status, description)
6868
(100.0, 50.0, HealthcheckStatus.PASS, "improved (lower)"),
69-
(50.0, 100.0, HealthcheckStatus.FAIL, "not improved (higher)"),
69+
(50.0, 100.0, HealthcheckStatus.WARNING, "not improved (higher)"),
7070
]
7171

7272
for baseline_mean, comparison_mean, expected_status, desc in test_cases:
@@ -107,7 +107,7 @@ def test_multi_objective_partial_improvement(self) -> None:
107107

108108
def test_documentation_link(self) -> None:
109109
"""Test documentation_link is appended correctly."""
110-
# minimize=True: baseline=50, comparison=100 -> NOT improved (FAIL status)
110+
# minimize=True: baseline=50, comparison=100 -> NOT improved (WARNING status)
111111
self._attach_data(
112112
{"branin": [(50.0, 0.1), (100.0, 0.1)]}, arm_names=["0_0", "0_1"]
113113
)
@@ -178,7 +178,7 @@ def test_dataframe_structure(self) -> None:
178178
self.assertEqual(len(card.df), 1)
179179

180180
def test_no_improvement_message_parameter(self) -> None:
181-
"""Test custom no_improvement_message is displayed on FAIL status."""
181+
"""Test custom no_improvement_message is displayed on WARNING status."""
182182
self._attach_data(
183183
{"branin": [(50.0, 0.1), (100.0, 0.1)]}, arm_names=["0_0", "0_1"]
184184
)
@@ -192,5 +192,5 @@ def test_no_improvement_message_parameter(self) -> None:
192192
)
193193
card = analysis.compute(experiment=self.experiment)
194194

195-
self.assertEqual(card.get_status(), HealthcheckStatus.FAIL)
195+
self.assertEqual(card.get_status(), HealthcheckStatus.WARNING)
196196
self.assertIn(custom_message, card.subtitle)

0 commit comments

Comments
 (0)