Change get_trace to return dict[int, float] and filter by MetricAvailability

saitcakmak · meta-codesync[bot] · commit 8ac7d52a3638 · 2026-04-07T07:41:09.000-07:00
Summary:
`get_trace` previously returned `list[float]` with positional indexing,
which forced callers to fabricate sequential trial indices. This change:

1. Changes the return type to `dict[int, float]` mapping trial_index to
   performance value, so callers get real trial indices.

2. Adds `MetricAvailability` filtering to exclude trials with incomplete
   metric data before pivoting. This prevents the `ValueError("Some
   metrics are not present for all trials and arms")` that
   `_pivot_data_with_feasibility` raises when a completed trial is
   missing any metric (e.g., partial fetches, fetch failures, metrics
   added mid-experiment).

3. Removes the carry-forward expansion loop for abandoned/failed trials.
   These trials are now simply excluded from the returned dict.

Callers updated:
- `UtilityProgressionAnalysis` now shows real trial indices on x-axis
- `pareto_frontier.hypervolume_trace_plot` uses real trial indices
- `benchmark.py` extracts values from dict

Differential Revision: D99448053
diff --git a/ax/analysis/plotly/tests/test_utility_progression.py b/ax/analysis/plotly/tests/test_utility_progression.py
@@ -36,7 +36,7 @@ def _assert_valid_utility_card(
         """Assert that a card has valid structure for utility progression."""
         self.assertIsInstance(card, PlotlyAnalysisCard)
         self.assertEqual(card.name, "UtilityProgressionAnalysis")
-        self.assertIn("trace_index", card.df.columns)
+        self.assertIn("trial_index", card.df.columns)
         self.assertIn("utility", card.df.columns)
 
     def test_utility_progression_soo(self) -> None:
@@ -211,7 +211,7 @@ def test_all_infeasible_points_raises_error(self) -> None:
         with (
             patch(
                 "ax.analysis.plotly.utility_progression.get_trace",
-                return_value=[math.inf, -math.inf, math.inf],
+                return_value={0: math.inf, 1: -math.inf, 2: math.inf},
             ),
             self.assertRaises(ExperimentNotReadyError) as cm,
         ):
diff --git a/ax/analysis/plotly/utility_progression.py b/ax/analysis/plotly/utility_progression.py
@@ -28,11 +28,9 @@
 _UTILITY_PROGRESSION_TITLE = "Utility Progression"
 
 _TRACE_INDEX_EXPLANATION = (
-    "The x-axis shows trace index, which counts completed or early-stopped trials "
-    "sequentially (1, 2, 3, ...). This differs from trial index, which may have "
-    "gaps if some trials failed or were abandoned. For example, if trials 0, 2, "
-    "and 5 completed while trials 1, 3, and 4 failed, the trace indices would be "
-    "1, 2, 3 corresponding to trial indices 0, 2, 5."
+    "The x-axis shows trial index. Only completed or early-stopped trials with "
+    "complete metric data are included, so there may be gaps if some trials "
+    "failed, were abandoned, or have incomplete data."
 )
 
 _CUMULATIVE_BEST_EXPLANATION = (
@@ -57,7 +55,8 @@ class UtilityProgressionAnalysis(Analysis):
 
     The DataFrame computed will contain one row per completed trial and the
     following columns:
-        - trace_index: Sequential index of completed/early-stopped trials (1, 2, 3, ...)
+        - trial_index: The trial index of each completed/early-stopped trial
+            that has complete metric avilability.
         - utility: The cumulative best utility value at that trial
     """
 
@@ -114,7 +113,7 @@ def compute(
             )
 
         # Check if all points are infeasible (inf or -inf values)
-        if all(np.isinf(value) for value in trace):
+        if all(np.isinf(value) for value in trace.values()):
             raise ExperimentNotReadyError(
                 "All trials in the utility trace are infeasible i.e., they violate "
                 "outcome constraints, so there are no feasible points to plot. During "
@@ -125,12 +124,11 @@ def compute(
                 "space, or (2) relaxing outcome constraints."
             )
 
-        # Create DataFrame with 1-based trace index for user-friendly display
-        # (1st completed trial, 2nd completed trial, etc. instead of 0-indexed)
+        # Create DataFrame with trial indices from the trace
         df = pd.DataFrame(
             {
-                "trace_index": list(range(1, len(trace) + 1)),
-                "utility": trace,
+                "trial_index": list(trace.keys()),
+                "utility": list(trace.values()),
             }
         )
 
@@ -185,14 +183,14 @@ def compute(
         # Create the plot
         fig = px.line(
             data_frame=df,
-            x="trace_index",
+            x="trial_index",
             y="utility",
             markers=True,
             color_discrete_sequence=[AX_BLUE],
         )
 
         # Update axis labels and format x-axis to show integers only
-        fig.update_xaxes(title_text="Trace Index", dtick=1, rangemode="nonnegative")
+        fig.update_xaxes(title_text="Trial Index", dtick=1, rangemode="nonnegative")
         fig.update_yaxes(title_text=y_label)
 
         return create_plotly_analysis_card(
diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
@@ -320,9 +320,10 @@ def _get_oracle_value_of_params(
     dummy_experiment = get_oracle_experiment_from_params(
         problem=problem, dict_of_dict_of_params={0: {"0_0": params}}
     )
-    (inference_value,) = get_trace(
+    trace = get_trace(
         experiment=dummy_experiment, optimization_config=problem.optimization_config
     )
+    inference_value = next(iter(trace.values()))
     return inference_value
 
 
@@ -510,12 +511,27 @@ def get_benchmark_result_from_experiment_and_gs(
         dict_of_dict_of_params=dict_of_dict_of_params,
         trial_statuses=trial_statuses,
     )
-    oracle_trace = np.array(
-        get_trace(
-            experiment=actual_params_oracle_dummy_experiment,
-            optimization_config=problem.optimization_config,
-        )
+    oracle_trace_dict = get_trace(
+        experiment=actual_params_oracle_dummy_experiment,
+        optimization_config=problem.optimization_config,
+    )
+    # Expand trace dict to a positional array aligned with all trials,
+    # carry-forwarding the last best value for trials without data (e.g.,
+    # failed or abandoned trials preserved via trial_statuses).
+    maximize = (
+        isinstance(problem.optimization_config, MultiObjectiveOptimizationConfig)
+        or problem.optimization_config.objective.is_scalarized_objective
+        or not problem.optimization_config.objective.minimize
     )
+    all_trial_indices = sorted(actual_params_oracle_dummy_experiment.trials.keys())
+    last_best = -float("inf") if maximize else float("inf")
+    oracle_trace_list: list[float] = []
+    for idx in all_trial_indices:
+        if idx in oracle_trace_dict:
+            last_best = oracle_trace_dict[idx]
+        oracle_trace_list.append(last_best)
+    oracle_trace = np.array(oracle_trace_list)
+
     is_feasible_trace = np.array(
         get_is_feasible_trace(
             experiment=actual_params_oracle_dummy_experiment,
diff --git a/ax/plot/pareto_frontier.py b/ax/plot/pareto_frontier.py
@@ -61,8 +61,8 @@ def scatter_plot_with_hypervolume_trace_plotly(experiment: Experiment) -> go.Fig
 
     df = pd.DataFrame(
         {
-            "hypervolume": hypervolume_trace,
-            "trial_index": [*range(len(hypervolume_trace))],
+            "trial_index": list(hypervolume_trace.keys()),
+            "hypervolume": list(hypervolume_trace.values()),
         }
     )
 
diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py
@@ -57,7 +57,7 @@ def test_get_trace(self) -> None:
         exp = get_experiment_with_observations(
             observations=[[11], [10], [9], [15], [5]], minimize=True
         )
-        self.assertEqual(get_trace(exp), [11, 10, 9, 9, 5])
+        self.assertEqual(get_trace(exp), {0: 11, 1: 10, 2: 9, 3: 9, 4: 5})
 
         # Same experiment with maximize via new optimization config.
         opt_conf = none_throws(exp.optimization_config).clone()
@@ -67,7 +67,7 @@ def test_get_trace(self) -> None:
                 opt_conf.objective.metric_names[0]: opt_conf.objective.metric_names[0]
             },
         )
-        self.assertEqual(get_trace(exp, opt_conf), [11, 11, 11, 15, 15])
+        self.assertEqual(get_trace(exp, opt_conf), {0: 11, 1: 11, 2: 11, 3: 15, 4: 15})
 
         with self.subTest("Single objective with constraints"):
             # The second metric is the constraint and needs to be >= 0
@@ -76,48 +76,52 @@ def test_get_trace(self) -> None:
                 minimize=False,
                 constrained=True,
             )
-            self.assertEqual(get_trace(exp), [float("-inf"), 10, 10, 10, 11])
+            self.assertEqual(
+                get_trace(exp),
+                {0: float("-inf"), 1: 10, 2: 10, 3: 10, 4: 11},
+            )
 
             exp = get_experiment_with_observations(
                 observations=[[11, -1], [10, 1], [9, 1], [15, -1], [11, 1]],
                 minimize=True,
                 constrained=True,
             )
-            self.assertEqual(get_trace(exp), [float("inf"), 10, 9, 9, 9])
+            self.assertEqual(get_trace(exp), {0: float("inf"), 1: 10, 2: 9, 3: 9, 4: 9})
 
         # Scalarized.
         exp = get_experiment_with_observations(
             observations=[[1, 1], [2, 2], [3, 3]],
             scalarized=True,
         )
-        self.assertEqual(get_trace(exp), [2, 4, 6])
+        self.assertEqual(get_trace(exp), {0: 2, 1: 4, 2: 6})
 
         # Multi objective.
         exp = get_experiment_with_observations(
             observations=[[1, 1], [-1, 100], [1, 2], [3, 3], [2, 4], [2, 1]],
         )
-        self.assertEqual(get_trace(exp), [1, 1, 2, 9, 11, 11])
+        self.assertEqual(get_trace(exp), {0: 1, 1: 1, 2: 2, 3: 9, 4: 11, 5: 11})
 
         # W/o ObjectiveThresholds (inferring ObjectiveThresholds from scaled nadir)
         assert_is_instance(
             exp.optimization_config, MultiObjectiveOptimizationConfig
         ).objective_thresholds = []
         trace = get_trace(exp)
+        trace_values = list(trace.values())
         # With inferred thresholds via scaled nadir, check trace properties:
         # - All values should be non-negative
-        self.assertTrue(all(v >= 0.0 for v in trace))
+        self.assertTrue(all(v >= 0.0 for v in trace_values))
         # - Trace should be non-decreasing (cumulative best)
-        for i in range(1, len(trace)):
-            self.assertGreaterEqual(trace[i], trace[i - 1])
+        for i in range(1, len(trace_values)):
+            self.assertGreaterEqual(trace_values[i], trace_values[i - 1])
         # - Final value should be positive (non-trivial HV)
-        self.assertGreater(trace[-1], 0.0)
+        self.assertGreater(trace_values[-1], 0.0)
 
         # Multi-objective w/ constraints.
         exp = get_experiment_with_observations(
             observations=[[-1, 1, 1], [1, 2, 1], [3, 3, -1], [2, 4, 1], [2, 1, 1]],
             constrained=True,
         )
-        self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8])
+        self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8})
 
         # W/ relative constraints & status quo.
         exp.status_quo = Arm(parameters={"x": 0.5, "y": 0.5}, name="status_quo")
@@ -149,17 +153,17 @@ def test_get_trace(self) -> None:
         ]
         status_quo_data = Data(df=pd.DataFrame.from_records(df_dict))
         exp.attach_data(data=status_quo_data)
-        self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8])
+        self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8})
 
         # W/ first objective being minimized.
         exp = get_experiment_with_observations(
             observations=[[1, 1], [-1, 2], [3, 3], [-2, 4], [2, 1]], minimize=True
         )
-        self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8])
+        self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8})
 
         # W/ empty data.
         exp = get_experiment_with_trial()
-        self.assertEqual(get_trace(exp), [])
+        self.assertEqual(get_trace(exp), {})
 
         # test batch trial
         exp = get_experiment_with_batch_trial(with_status_quo=False)
@@ -191,7 +195,7 @@ def test_get_trace(self) -> None:
                 ]
             )
         exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict)))
-        self.assertEqual(get_trace(exp), [2.0])
+        self.assertEqual(get_trace(exp), {0: 2.0})
         # test that there is performance metric in the trace for each
         # completed/early-stopped trial
         trial1 = assert_is_instance(trial, BatchTrial).clone_to(include_sq=False)
@@ -214,7 +218,7 @@ def test_get_trace(self) -> None:
                 ]
             )
         exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict2)))
-        self.assertEqual(get_trace(exp), [2.0, 2.0, 20.0])
+        self.assertEqual(get_trace(exp), {0: 2.0, 2: 20.0})
 
     def test_get_trace_with_non_completed_trials(self) -> None:
         with self.subTest("minimize with abandoned trial"):
@@ -224,12 +228,11 @@ def test_get_trace_with_non_completed_trials(self) -> None:
             # Mark trial 2 (value=9) as abandoned
             exp.trials[2].mark_abandoned(unsafe=True)
 
-            # Abandoned trial carries forward the last best value
+            # Abandoned trial is excluded from trace
             trace = get_trace(exp)
-            self.assertEqual(len(trace), 5)
-            # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): carry forward 10
+            # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): excluded
             # Trial 3: 10 (15 > 10), Trial 4: 5
-            self.assertEqual(trace, [11, 10, 10, 10, 5])
+            self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5})
 
         with self.subTest("maximize with abandoned trial"):
             exp = get_experiment_with_observations(
@@ -238,12 +241,11 @@ def test_get_trace_with_non_completed_trials(self) -> None:
             # Mark trial 1 (value=3) as abandoned
             exp.trials[1].mark_abandoned(unsafe=True)
 
-            # Abandoned trial carries forward the last best value
+            # Abandoned trial is excluded from trace
             trace = get_trace(exp)
-            self.assertEqual(len(trace), 5)
-            # Trial 0: 1, Trial 1 (abandoned): carry forward 1,
+            # Trial 0: 1, Trial 1 (abandoned): excluded,
             # Trial 2: 2, Trial 3: 5, Trial 4: 5
-            self.assertEqual(trace, [1, 1, 2, 5, 5])
+            self.assertEqual(trace, {0: 1, 2: 2, 3: 5, 4: 5})
 
         with self.subTest("minimize with failed trial"):
             exp = get_experiment_with_observations(
@@ -252,12 +254,11 @@ def test_get_trace_with_non_completed_trials(self) -> None:
             # Mark trial 2 (value=9) as failed
             exp.trials[2].mark_failed(unsafe=True)
 
-            # Failed trial carries forward the last best value
+            # Failed trial is excluded from trace
             trace = get_trace(exp)
-            self.assertEqual(len(trace), 5)
-            # Trial 0: 11, Trial 1: 10, Trial 2 (failed): carry forward 10
+            # Trial 0: 11, Trial 1: 10, Trial 2 (failed): excluded
             # Trial 3: 10 (15 > 10), Trial 4: 5
-            self.assertEqual(trace, [11, 10, 10, 10, 5])
+            self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5})
 
     def test_get_trace_with_include_status_quo(self) -> None:
         with self.subTest("Multi-objective: status quo dominates in some trials"):
@@ -347,9 +348,11 @@ def test_get_trace_with_include_status_quo(self) -> None:
             # The last value MUST differ because status quo dominates
             # Without status quo, only poor arms contribute (low hypervolume)
             # With status quo, excellent values contribute (high hypervolume)
+            last_without = list(trace_without_sq.values())[-1]
+            last_with = list(trace_with_sq.values())[-1]
             self.assertGreater(
-                trace_with_sq[-1],
-                trace_without_sq[-1],
+                last_with,
+                last_without,
                 f"Status quo dominates in trial 3, so trace with SQ should be higher. "
                 f"Without SQ: {trace_without_sq}, With SQ: {trace_with_sq}",
             )
@@ -418,9 +421,11 @@ def test_get_trace_with_include_status_quo(self) -> None:
             # The last value MUST differ because status quo is best
             # Without status quo: best in trial 3 is 15.0, cumulative min is 9
             # With status quo: best in trial 3 is 5.0, cumulative min is 5
+            last_without = list(trace_without_sq.values())[-1]
+            last_with = list(trace_with_sq.values())[-1]
             self.assertLess(
-                trace_with_sq[-1],
-                trace_without_sq[-1],
+                last_with,
+                last_without,
                 f"Status quo is best in trial 3, so trace with SQ should be "
                 f"lower (minimize). Without SQ: {trace_without_sq}, "
                 f"With SQ: {trace_with_sq}",
@@ -502,19 +507,20 @@ def _make_pref_opt_config(self, profile_name: str) -> PreferenceOptimizationConf
             preference_profile_name=profile_name,
         )
 
-    def _assert_valid_trace(self, trace: list[float], expected_len: int) -> None:
+    def _assert_valid_trace(self, trace: dict[int, float], expected_len: int) -> None:
         """Assert trace has expected length, contains floats, is non-decreasing and has
         more than one unique value."""
         self.assertEqual(len(trace), expected_len)
-        for value in trace:
+        trace_values = list(trace.values())
+        for value in trace_values:
             self.assertIsInstance(value, float)
-        for i in range(1, len(trace)):
+        for i in range(1, len(trace_values)):
             self.assertGreaterEqual(
-                trace[i],
-                trace[i - 1],
+                trace_values[i],
+                trace_values[i - 1],
                 msg=f"Trace not monotonically increasing at index {i}: {trace}",
             )
-        unique_values = set(trace)
+        unique_values = set(trace_values)
         self.assertGreater(
             len(unique_values),
             1,
diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py

Original file line number	Diff line number	Diff line change
`@@ -61,8 +61,8 @@ def scatter_plot_with_hypervolume_trace_plotly(experiment: Experiment) -> go.Fig`
`61`	`61`
`62`	`62`	`df = pd.DataFrame(`
`63`	`63`	`{`
`64`		`- "hypervolume": hypervolume_trace,`
`65`		`- "trial_index": [*range(len(hypervolume_trace))],`
	`64`	`+ "trial_index": list(hypervolume_trace.keys()),`
	`65`	`+ "hypervolume": list(hypervolume_trace.values()),`
`66`	`66`	`}`
`67`	`67`	`)`
`68`	`68`