Allow Oracle Experiment to take ABANDONED trials into account (facebook#4953)

Sunny Shen · facebook-github-bot · commit 30ce28a1255c · 2026-02-26T14:44:06.000-08:00
Summary:

Include ABANDONED trials in the trace by carrying forward the last best value. This ensures the trace has one value per trial, reflecting that ABANDONED trials consumed resources but didn't improve optimization.

Differential Revision: D86833965
diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
@@ -48,6 +48,7 @@
 )
 from ax.core.search_space import SearchSpace
 from ax.core.trial import BaseTrial, Trial
+from ax.core.trial_status import TrialStatus
 from ax.core.types import TParameterization, TParamValue
 from ax.core.utils import get_model_times
 from ax.early_stopping.strategies.base import BaseEarlyStoppingStrategy
@@ -157,6 +158,7 @@ def get_benchmark_runner(
 def get_oracle_experiment_from_params(
     problem: BenchmarkProblem,
     dict_of_dict_of_params: Mapping[int, Mapping[str, Mapping[str, TParamValue]]],
+    trial_statuses: Mapping[int, TrialStatus] | None = None,
 ) -> Experiment:
     """
     Get a new experiment with the same search space and optimization config
@@ -170,6 +172,12 @@ def get_oracle_experiment_from_params(
             config for generating an experiment.
         dict_of_dict_of_params: Keys are trial indices, values are Mappings
             (e.g. dicts) that map arm names to parameterizations.
+        trial_statuses: Optional mapping from trial indices to their statuses.
+            If provided, trials in oracle experiments will be set to the
+            specified status.
+            This helps preserve the trial status from the original experiment,
+            especially if we want to take `ABANDONED` trials into account.
+            If not provided, trials will be set to completed.
 
     Example:
         >>> get_oracle_experiment_from_params(
@@ -215,11 +223,33 @@ def get_oracle_experiment_from_params(
         trial = experiment.trials[trial_index]
         metadata = runner.run(trial=trial)
         trial.update_run_metadata(metadata=metadata)
-        trial.mark_completed()
+
+        # Determine the status for the trial in the oracle experiment.
+        # Mark ABANDONED and FAILED immediately (they don't require data).
+        # EARLY_STOPPED requires data, so mark as completed for now and
+        # defer the status change until after fetch_data().
+        if trial_statuses is not None:
+            status = trial_statuses[trial_index]
+        else:
+            status = TrialStatus.COMPLETED
+
+        if status == TrialStatus.ABANDONED:
+            trial.mark_abandoned()
+        elif status == TrialStatus.FAILED:
+            trial.mark_failed()
+        else:
+            trial.mark_completed()
 
     logger.setLevel(level=original_log_level)
 
     experiment.fetch_data()
+
+    # Apply EARLY_STOPPED status after data is available, since
+    # mark_early_stopped() requires data on the trial.
+    if trial_statuses is not None:
+        for trial_index, status in trial_statuses.items():
+            if status == TrialStatus.EARLY_STOPPED:
+                experiment.trials[trial_index].mark_early_stopped(unsafe=True)
     return experiment
 
 
@@ -451,13 +481,22 @@ def get_benchmark_result_from_experiment_and_gs(
         for new_trial_index, trials in enumerate(trial_completion_order)
     }
 
+    # Create trial_statuses mapping to preserve trial status in oracle experiment
+    trial_statuses = {
+        trial_index: experiment.trials[trial_index].status
+        for trial_index in dict_of_dict_of_params.keys()
+    }
+
     actual_params_oracle_dummy_experiment = get_oracle_experiment_from_params(
-        problem=problem, dict_of_dict_of_params=dict_of_dict_of_params
+        problem=problem,
+        dict_of_dict_of_params=dict_of_dict_of_params,
+        trial_statuses=trial_statuses,
     )
     oracle_trace = np.array(
         get_trace(
             experiment=actual_params_oracle_dummy_experiment,
             optimization_config=problem.optimization_config,
+            include_abandoned=True,
         )
     )
     is_feasible_trace = np.array(
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
@@ -68,6 +68,7 @@
     get_single_objective_benchmark_problem,
     get_soo_surrogate,
 )
+from ax.core.base_trial import TrialStatus
 from ax.core.experiment import Experiment
 from ax.core.objective import MultiObjective
 from ax.early_stopping.strategies.threshold import ThresholdEarlyStoppingStrategy
@@ -1014,6 +1015,52 @@ def test_get_oracle_experiment_from_params(self) -> None:
                 problem=problem, dict_of_dict_of_params={0: {}}
             )
 
+        with self.subTest("trial_statuses"):
+            trial_statuses = {
+                0: TrialStatus.COMPLETED,
+                1: TrialStatus.ABANDONED,
+            }
+            experiment = get_oracle_experiment_from_params(
+                problem=problem,
+                dict_of_dict_of_params={
+                    0: {"0": near_opt_params},
+                    1: {"1": other_params},
+                },
+                trial_statuses=trial_statuses,
+            )
+            self.assertEqual(len(experiment.trials), 2)
+            self.assertTrue(experiment.trials[0].status.is_completed)
+            self.assertEqual(experiment.trials[1].status, TrialStatus.ABANDONED)
+
+        with self.subTest("trial_statuses with FAILED and EARLY_STOPPED"):
+            trial_statuses = {
+                0: TrialStatus.FAILED,
+                1: TrialStatus.EARLY_STOPPED,
+            }
+            experiment = get_oracle_experiment_from_params(
+                problem=problem,
+                dict_of_dict_of_params={
+                    0: {"0": near_opt_params},
+                    1: {"1": other_params},
+                },
+                trial_statuses=trial_statuses,
+            )
+            self.assertEqual(experiment.trials[0].status, TrialStatus.FAILED)
+            self.assertEqual(experiment.trials[1].status, TrialStatus.EARLY_STOPPED)
+
+        with self.subTest("trial_statuses=None defaults to COMPLETED"):
+            experiment = get_oracle_experiment_from_params(
+                problem=problem,
+                dict_of_dict_of_params={
+                    0: {"0": near_opt_params},
+                    1: {"1": other_params},
+                },
+                trial_statuses=None,
+            )
+            self.assertTrue(
+                all(t.status.is_completed for t in experiment.trials.values())
+            )
+
     def _test_multi_fidelity_or_multi_task(
         self, fidelity_or_task: Literal["fidelity", "task"]
     ) -> None:
diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py
@@ -183,6 +183,50 @@ def test_get_trace(self) -> None:
         exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict2)))
         self.assertEqual(get_trace(exp), [2.0, 20.0])
 
+    def test_get_trace_include_abandoned(self) -> None:
+        with self.subTest("minimize with abandoned trial"):
+            exp = get_experiment_with_observations(
+                observations=[[11], [10], [9], [15], [5]], minimize=True
+            )
+            # Mark trial 2 (value=9) as abandoned
+            exp.trials[2].mark_abandoned(unsafe=True)
+
+            # Without include_abandoned (default): abandoned trial excluded
+            trace_default = get_trace(exp)
+            self.assertEqual(trace_default, [11, 10, 10, 5])
+
+            # With include_abandoned=True: abandoned trial carries forward
+            trace_with_abandoned = get_trace(exp, include_abandoned=True)
+            self.assertEqual(len(trace_with_abandoned), 5)
+            # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): carry forward 10
+            self.assertEqual(trace_with_abandoned, [11, 10, 10, 10, 5])
+
+        with self.subTest("maximize with abandoned trial"):
+            exp = get_experiment_with_observations(
+                observations=[[1], [3], [2], [5], [4]], minimize=False
+            )
+            # Mark trial 1 (value=3) as abandoned
+            exp.trials[1].mark_abandoned(unsafe=True)
+
+            # Without include_abandoned: only 4 values
+            trace_default = get_trace(exp)
+            self.assertEqual(trace_default, [1, 2, 5, 5])
+
+            # With include_abandoned: 5 values, carry forward
+            trace_with_abandoned = get_trace(exp, include_abandoned=True)
+            self.assertEqual(len(trace_with_abandoned), 5)
+            # Trial 0: 1, Trial 1 (abandoned): carry forward 1,
+            # Trial 2: 2, Trial 3: 5, Trial 4: 5
+            self.assertEqual(trace_with_abandoned, [1, 1, 2, 5, 5])
+
+        with self.subTest("include_abandoned=False is default"):
+            exp = get_experiment_with_observations(
+                observations=[[11], [10], [9]], minimize=True
+            )
+            trace_explicit = get_trace(exp, include_abandoned=False)
+            trace_default = get_trace(exp)
+            self.assertEqual(trace_explicit, trace_default)
+
     def test_get_trace_with_include_status_quo(self) -> None:
         with self.subTest("Multi-objective: status quo dominates in some trials"):
             # Create experiment with multi-objective optimization where status quo
diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py
@@ -1041,6 +1041,7 @@ def get_trace_by_arm_pull_from_data(
 def get_trace(
     experiment: Experiment,
     optimization_config: OptimizationConfig | None = None,
+    include_abandoned: bool = False,
     include_status_quo: bool = False,
 ) -> list[float]:
     """Compute the optimization trace at each iteration.
@@ -1069,6 +1070,11 @@ def get_trace(
         include_status_quo: If True, include status quo in the trace computation.
             If False (default), exclude status quo for compatibility with legacy
             behavior.
+        include_abandoned: If True, include ABANDONED trials in the trace by
+            carrying forward the last best value. This ensures the trace has
+            one value per trial, reflecting that ABANDONED trials consumed
+            resources but didn't improve optimization. If False (default),
+            only COMPLETED and EARLY_STOPPED trials are included.
 
     Returns:
         A list of performance values at each iteration.
@@ -1128,7 +1134,34 @@ def get_trace(
         value_by_trial = trial_grouped.min()
         cumulative_value = np.minimum.accumulate(value_by_trial)
 
-    return cumulative_value.tolist()
+    compact_trace = cumulative_value.tolist()
+
+    # If not including abandoned trials, return early
+    if not include_abandoned:
+        return compact_trace
+
+    # Expand trace to include ABANDONED trials with carry-forward values
+    expanded_trace = []
+    compact_idx = 0
+    last_best_value = -float("inf") if maximize else float("inf")
+
+    for trial_index in sorted(experiment.trials.keys()):
+        trial = experiment.trials[trial_index]
+        if trial.status in (TrialStatus.COMPLETED, TrialStatus.EARLY_STOPPED):
+            # Use value from compact trace
+            if compact_idx < len(compact_trace):
+                value = compact_trace[compact_idx]
+                expanded_trace.append(value)
+                last_best_value = value
+                compact_idx += 1
+            else:
+                # Should not happen, but handle gracefully
+                expanded_trace.append(last_best_value)
+        else:
+            # ABANDONED or other status: carry forward last best value
+            expanded_trace.append(last_best_value)
+
+    return expanded_trace
 
 
 def get_tensor_converter_adapter(