Change get_trace to return dict[int, float] and filter by MetricAvailability (facebook#5140)

saitcakmak · facebook-github-bot · commit c9196f27453a · 2026-04-07T07:43:09.000-07:00
Summary:

Goal: Eliminate assumptions around COMPLETED trials always having all metrics available. This is part of the code that would've raised errors if it encountered missing metrics in COMPLETED trials. Changing the return type here seemed like a better move than using carry-forward logic, and fits better with the logic that skips status quo as well (which may lead to confusing indexing when skipping status quo).

How does this new return type fit with `BatchTrial`: `get_trace` takes the best arm out of each batch, so it returns (and did so before this change) one result per trial. Nothing changed.

Changes (claude summary):
`get_trace` previously returned `list[float]` with positional indexing,
which forced callers to fabricate sequential trial indices. This change:

1. Changes the return type to `dict[int, float]` mapping trial_index to
   performance value, so callers get real trial indices.

2. Adds `MetricAvailability` filtering to exclude trials with incomplete
   metric data before pivoting. This prevents the `ValueError("Some
   metrics are not present for all trials and arms")` that
   `_pivot_data_with_feasibility` raises when a completed trial is
   missing any metric (e.g., partial fetches, fetch failures, metrics
   added mid-experiment).

3. Removes the carry-forward expansion loop for abandoned/failed trials.
   These trials are now simply excluded from the returned dict.

Callers updated:
- `UtilityProgressionAnalysis` now shows real trial indices on x-axis
- `pareto_frontier.hypervolume_trace_plot` uses real trial indices
- `benchmark.py` extracts values from dict

Differential Revision: D99448053
diff --git a/ax/analysis/plotly/tests/test_utility_progression.py b/ax/analysis/plotly/tests/test_utility_progression.py
@@ -36,7 +36,7 @@ def _assert_valid_utility_card(
         """Assert that a card has valid structure for utility progression."""
         self.assertIsInstance(card, PlotlyAnalysisCard)
         self.assertEqual(card.name, "UtilityProgressionAnalysis")
-        self.assertIn("trace_index", card.df.columns)
+        self.assertIn("trial_index", card.df.columns)
         self.assertIn("utility", card.df.columns)
 
     def test_utility_progression_soo(self) -> None:
@@ -211,7 +211,7 @@ def test_all_infeasible_points_raises_error(self) -> None:
         with (
             patch(
                 "ax.analysis.plotly.utility_progression.get_trace",
-                return_value=[math.inf, -math.inf, math.inf],
+                return_value={0: math.inf, 1: -math.inf, 2: math.inf},
             ),
             self.assertRaises(ExperimentNotReadyError) as cm,
         ):
diff --git a/ax/analysis/plotly/utility_progression.py b/ax/analysis/plotly/utility_progression.py
@@ -28,11 +28,9 @@
 _UTILITY_PROGRESSION_TITLE = "Utility Progression"
 
 _TRACE_INDEX_EXPLANATION = (
-    "The x-axis shows trace index, which counts completed or early-stopped trials "
-    "sequentially (1, 2, 3, ...). This differs from trial index, which may have "
-    "gaps if some trials failed or were abandoned. For example, if trials 0, 2, "
-    "and 5 completed while trials 1, 3, and 4 failed, the trace indices would be "
-    "1, 2, 3 corresponding to trial indices 0, 2, 5."
+    "The x-axis shows trial index. Only completed or early-stopped trials with "
+    "complete metric data are included, so there may be gaps if some trials "
+    "failed, were abandoned, or have incomplete data."
 )
 
 _CUMULATIVE_BEST_EXPLANATION = (
@@ -57,7 +55,8 @@ class UtilityProgressionAnalysis(Analysis):
 
     The DataFrame computed will contain one row per completed trial and the
     following columns:
-        - trace_index: Sequential index of completed/early-stopped trials (1, 2, 3, ...)
+        - trial_index: The trial index of each completed/early-stopped trial
+            that has complete metric avilability.
         - utility: The cumulative best utility value at that trial
     """
 
@@ -114,7 +113,7 @@ def compute(
             )
 
         # Check if all points are infeasible (inf or -inf values)
-        if all(np.isinf(value) for value in trace):
+        if all(np.isinf(value) for value in trace.values()):
             raise ExperimentNotReadyError(
                 "All trials in the utility trace are infeasible i.e., they violate "
                 "outcome constraints, so there are no feasible points to plot. During "
@@ -125,12 +124,11 @@ def compute(
                 "space, or (2) relaxing outcome constraints."
             )
 
-        # Create DataFrame with 1-based trace index for user-friendly display
-        # (1st completed trial, 2nd completed trial, etc. instead of 0-indexed)
+        # Create DataFrame with trial indices from the trace
         df = pd.DataFrame(
             {
-                "trace_index": list(range(1, len(trace) + 1)),
-                "utility": trace,
+                "trial_index": list(trace.keys()),
+                "utility": list(trace.values()),
             }
         )
 
@@ -185,14 +183,14 @@ def compute(
         # Create the plot
         fig = px.line(
             data_frame=df,
-            x="trace_index",
+            x="trial_index",
             y="utility",
             markers=True,
             color_discrete_sequence=[AX_BLUE],
         )
 
         # Update axis labels and format x-axis to show integers only
-        fig.update_xaxes(title_text="Trace Index", dtick=1, rangemode="nonnegative")
+        fig.update_xaxes(title_text="Trial Index", dtick=1, rangemode="nonnegative")
         fig.update_yaxes(title_text=y_label)
 
         return create_plotly_analysis_card(
diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
@@ -320,9 +320,10 @@ def _get_oracle_value_of_params(
     dummy_experiment = get_oracle_experiment_from_params(
         problem=problem, dict_of_dict_of_params={0: {"0_0": params}}
     )
-    (inference_value,) = get_trace(
+    trace = get_trace(
         experiment=dummy_experiment, optimization_config=problem.optimization_config
     )
+    inference_value = next(iter(trace.values()))
     return inference_value
 
 
@@ -510,12 +511,27 @@ def get_benchmark_result_from_experiment_and_gs(
         dict_of_dict_of_params=dict_of_dict_of_params,
         trial_statuses=trial_statuses,
     )
-    oracle_trace = np.array(
-        get_trace(
-            experiment=actual_params_oracle_dummy_experiment,
-            optimization_config=problem.optimization_config,
-        )
+    oracle_trace_dict = get_trace(
+        experiment=actual_params_oracle_dummy_experiment,
+        optimization_config=problem.optimization_config,
+    )
+    # Expand trace dict to a positional array aligned with all trials,
+    # carry-forwarding the last best value for trials without data (e.g.,
+    # failed or abandoned trials preserved via trial_statuses).
+    maximize = (
+        isinstance(problem.optimization_config, MultiObjectiveOptimizationConfig)
+        or problem.optimization_config.objective.is_scalarized_objective
+        or not problem.optimization_config.objective.minimize
     )
+    all_trial_indices = sorted(actual_params_oracle_dummy_experiment.trials.keys())
+    last_best = -float("inf") if maximize else float("inf")
+    oracle_trace_list: list[float] = []
+    for idx in all_trial_indices:
+        if idx in oracle_trace_dict:
+            last_best = oracle_trace_dict[idx]
+        oracle_trace_list.append(last_best)
+    oracle_trace = np.array(oracle_trace_list)
+
     is_feasible_trace = np.array(
         get_is_feasible_trace(
             experiment=actual_params_oracle_dummy_experiment,
diff --git a/ax/plot/pareto_frontier.py b/ax/plot/pareto_frontier.py
@@ -61,8 +61,8 @@ def scatter_plot_with_hypervolume_trace_plotly(experiment: Experiment) -> go.Fig
 
     df = pd.DataFrame(
         {
-            "hypervolume": hypervolume_trace,
-            "trial_index": [*range(len(hypervolume_trace))],
+            "trial_index": list(hypervolume_trace.keys()),
+            "hypervolume": list(hypervolume_trace.values()),
         }
     )
 
diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py
@@ -57,7 +57,7 @@ def test_get_trace(self) -> None:
         exp = get_experiment_with_observations(
             observations=[[11], [10], [9], [15], [5]], minimize=True
         )
-        self.assertEqual(get_trace(exp), [11, 10, 9, 9, 5])
+        self.assertEqual(get_trace(exp), {0: 11, 1: 10, 2: 9, 3: 9, 4: 5})
 
         # Same experiment with maximize via new optimization config.
         opt_conf = none_throws(exp.optimization_config).clone()
@@ -67,7 +67,7 @@ def test_get_trace(self) -> None:
                 opt_conf.objective.metric_names[0]: opt_conf.objective.metric_names[0]
             },
         )
-        self.assertEqual(get_trace(exp, opt_conf), [11, 11, 11, 15, 15])
+        self.assertEqual(get_trace(exp, opt_conf), {0: 11, 1: 11, 2: 11, 3: 15, 4: 15})
 
         with self.subTest("Single objective with constraints"):
             # The second metric is the constraint and needs to be >= 0
@@ -76,48 +76,52 @@ def test_get_trace(self) -> None:
                 minimize=False,
                 constrained=True,
             )
-            self.assertEqual(get_trace(exp), [float("-inf"), 10, 10, 10, 11])
+            self.assertEqual(
+                get_trace(exp),
+                {0: float("-inf"), 1: 10, 2: 10, 3: 10, 4: 11},
+            )
 
             exp = get_experiment_with_observations(
                 observations=[[11, -1], [10, 1], [9, 1], [15, -1], [11, 1]],
                 minimize=True,
                 constrained=True,
             )
-            self.assertEqual(get_trace(exp), [float("inf"), 10, 9, 9, 9])
+            self.assertEqual(get_trace(exp), {0: float("inf"), 1: 10, 2: 9, 3: 9, 4: 9})
 
         # Scalarized.
         exp = get_experiment_with_observations(
             observations=[[1, 1], [2, 2], [3, 3]],
             scalarized=True,
         )
-        self.assertEqual(get_trace(exp), [2, 4, 6])
+        self.assertEqual(get_trace(exp), {0: 2, 1: 4, 2: 6})
 
         # Multi objective.
         exp = get_experiment_with_observations(
             observations=[[1, 1], [-1, 100], [1, 2], [3, 3], [2, 4], [2, 1]],
         )
-        self.assertEqual(get_trace(exp), [1, 1, 2, 9, 11, 11])
+        self.assertEqual(get_trace(exp), {0: 1, 1: 1, 2: 2, 3: 9, 4: 11, 5: 11})
 
         # W/o ObjectiveThresholds (inferring ObjectiveThresholds from scaled nadir)
         assert_is_instance(
             exp.optimization_config, MultiObjectiveOptimizationConfig
         ).objective_thresholds = []
         trace = get_trace(exp)
+        trace_values = list(trace.values())
         # With inferred thresholds via scaled nadir, check trace properties:
         # - All values should be non-negative
-        self.assertTrue(all(v >= 0.0 for v in trace))
+        self.assertTrue(all(v >= 0.0 for v in trace_values))
         # - Trace should be non-decreasing (cumulative best)
-        for i in range(1, len(trace)):
-            self.assertGreaterEqual(trace[i], trace[i - 1])
+        for i in range(1, len(trace_values)):
+            self.assertGreaterEqual(trace_values[i], trace_values[i - 1])
         # - Final value should be positive (non-trivial HV)
-        self.assertGreater(trace[-1], 0.0)
+        self.assertGreater(trace_values[-1], 0.0)
 
         # Multi-objective w/ constraints.
         exp = get_experiment_with_observations(
             observations=[[-1, 1, 1], [1, 2, 1], [3, 3, -1], [2, 4, 1], [2, 1, 1]],
             constrained=True,
         )
-        self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8])
+        self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8})
 
         # W/ relative constraints & status quo.
         exp.status_quo = Arm(parameters={"x": 0.5, "y": 0.5}, name="status_quo")
@@ -149,17 +153,17 @@ def test_get_trace(self) -> None:
         ]
         status_quo_data = Data(df=pd.DataFrame.from_records(df_dict))
         exp.attach_data(data=status_quo_data)
-        self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8])
+        self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8})
 
         # W/ first objective being minimized.
         exp = get_experiment_with_observations(
             observations=[[1, 1], [-1, 2], [3, 3], [-2, 4], [2, 1]], minimize=True
         )
-        self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8])
+        self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8})
 
         # W/ empty data.
         exp = get_experiment_with_trial()
-        self.assertEqual(get_trace(exp), [])
+        self.assertEqual(get_trace(exp), {})
 
         # test batch trial
         exp = get_experiment_with_batch_trial(with_status_quo=False)
@@ -191,7 +195,7 @@ def test_get_trace(self) -> None:
                 ]
             )
         exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict)))
-        self.assertEqual(get_trace(exp), [2.0])
+        self.assertEqual(get_trace(exp), {0: 2.0})
         # test that there is performance metric in the trace for each
         # completed/early-stopped trial
         trial1 = assert_is_instance(trial, BatchTrial).clone_to(include_sq=False)
@@ -214,7 +218,7 @@ def test_get_trace(self) -> None:
                 ]
             )
         exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict2)))
-        self.assertEqual(get_trace(exp), [2.0, 2.0, 20.0])
+        self.assertEqual(get_trace(exp), {0: 2.0, 2: 20.0})
 
     def test_get_trace_with_non_completed_trials(self) -> None:
         with self.subTest("minimize with abandoned trial"):
@@ -224,12 +228,11 @@ def test_get_trace_with_non_completed_trials(self) -> None:
             # Mark trial 2 (value=9) as abandoned
             exp.trials[2].mark_abandoned(unsafe=True)
 
-            # Abandoned trial carries forward the last best value
+            # Abandoned trial is excluded from trace
             trace = get_trace(exp)
-            self.assertEqual(len(trace), 5)
-            # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): carry forward 10
+            # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): excluded
             # Trial 3: 10 (15 > 10), Trial 4: 5
-            self.assertEqual(trace, [11, 10, 10, 10, 5])
+            self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5})
 
         with self.subTest("maximize with abandoned trial"):
             exp = get_experiment_with_observations(
@@ -238,12 +241,11 @@ def test_get_trace_with_non_completed_trials(self) -> None:
             # Mark trial 1 (value=3) as abandoned
             exp.trials[1].mark_abandoned(unsafe=True)
 
-            # Abandoned trial carries forward the last best value
+            # Abandoned trial is excluded from trace
             trace = get_trace(exp)
-            self.assertEqual(len(trace), 5)
-            # Trial 0: 1, Trial 1 (abandoned): carry forward 1,
+            # Trial 0: 1, Trial 1 (abandoned): excluded,
             # Trial 2: 2, Trial 3: 5, Trial 4: 5
-            self.assertEqual(trace, [1, 1, 2, 5, 5])
+            self.assertEqual(trace, {0: 1, 2: 2, 3: 5, 4: 5})
 
         with self.subTest("minimize with failed trial"):
             exp = get_experiment_with_observations(
@@ -252,12 +254,11 @@ def test_get_trace_with_non_completed_trials(self) -> None:
             # Mark trial 2 (value=9) as failed
             exp.trials[2].mark_failed(unsafe=True)
 
-            # Failed trial carries forward the last best value
+            # Failed trial is excluded from trace
             trace = get_trace(exp)
-            self.assertEqual(len(trace), 5)
-            # Trial 0: 11, Trial 1: 10, Trial 2 (failed): carry forward 10
+            # Trial 0: 11, Trial 1: 10, Trial 2 (failed): excluded
             # Trial 3: 10 (15 > 10), Trial 4: 5
-            self.assertEqual(trace, [11, 10, 10, 10, 5])
+            self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5})
 
     def test_get_trace_with_include_status_quo(self) -> None:
         with self.subTest("Multi-objective: status quo dominates in some trials"):
@@ -347,9 +348,11 @@ def test_get_trace_with_include_status_quo(self) -> None:
             # The last value MUST differ because status quo dominates
             # Without status quo, only poor arms contribute (low hypervolume)
             # With status quo, excellent values contribute (high hypervolume)
+            last_without = list(trace_without_sq.values())[-1]
+            last_with = list(trace_with_sq.values())[-1]
             self.assertGreater(
-                trace_with_sq[-1],
-                trace_without_sq[-1],
+                last_with,
+                last_without,
                 f"Status quo dominates in trial 3, so trace with SQ should be higher. "
                 f"Without SQ: {trace_without_sq}, With SQ: {trace_with_sq}",
             )
@@ -418,9 +421,11 @@ def test_get_trace_with_include_status_quo(self) -> None:
             # The last value MUST differ because status quo is best
             # Without status quo: best in trial 3 is 15.0, cumulative min is 9
             # With status quo: best in trial 3 is 5.0, cumulative min is 5
+            last_without = list(trace_without_sq.values())[-1]
+            last_with = list(trace_with_sq.values())[-1]
             self.assertLess(
-                trace_with_sq[-1],
-                trace_without_sq[-1],
+                last_with,
+                last_without,
                 f"Status quo is best in trial 3, so trace with SQ should be "
                 f"lower (minimize). Without SQ: {trace_without_sq}, "
                 f"With SQ: {trace_with_sq}",
@@ -502,19 +507,20 @@ def _make_pref_opt_config(self, profile_name: str) -> PreferenceOptimizationConf
             preference_profile_name=profile_name,
         )
 
-    def _assert_valid_trace(self, trace: list[float], expected_len: int) -> None:
+    def _assert_valid_trace(self, trace: dict[int, float], expected_len: int) -> None:
         """Assert trace has expected length, contains floats, is non-decreasing and has
         more than one unique value."""
         self.assertEqual(len(trace), expected_len)
-        for value in trace:
+        trace_values = list(trace.values())
+        for value in trace_values:
             self.assertIsInstance(value, float)
-        for i in range(1, len(trace)):
+        for i in range(1, len(trace_values)):
             self.assertGreaterEqual(
-                trace[i],
-                trace[i - 1],
+                trace_values[i],
+                trace_values[i - 1],
                 msg=f"Trace not monotonically increasing at index {i}: {trace}",
             )
-        unique_values = set(trace)
+        unique_values = set(trace_values)
         self.assertGreater(
             len(unique_values),
             1,
diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py

Original file line number	Diff line number	Diff line change
`@@ -61,8 +61,8 @@ def scatter_plot_with_hypervolume_trace_plotly(experiment: Experiment) -> go.Fig`
`61`	`61`
`62`	`62`	`df = pd.DataFrame(`
`63`	`63`	`{`
`64`		`- "hypervolume": hypervolume_trace,`
`65`		`- "trial_index": [*range(len(hypervolume_trace))],`
	`64`	`+ "trial_index": list(hypervolume_trace.keys()),`
	`65`	`+ "hypervolume": list(hypervolume_trace.values()),`
`66`	`66`	`}`
`67`	`67`	`)`
`68`	`68`