Allow custom tolerated_trial_failure_rate

Sunny Shen · facebook-github-bot · commit 5966881774dc · 2026-02-26T10:52:36.000-08:00
Summary: Allow custom tolerated_trial_failure_rate for failure aware benchmark problems where Standard BO without failure awareness is expected to have a high trial failure rate

Differential Revision: D87092423
diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
@@ -260,6 +260,7 @@ def get_benchmark_orchestrator_options(
     early_stopping_strategy: BaseEarlyStoppingStrategy | None,
     include_status_quo: bool = False,
     logging_level: int = DEFAULT_LOG_LEVEL,
+    tolerated_trial_failure_rate: float = 0.5,
 ) -> OrchestratorOptions:
     """
     Get the ``OrchestratorOptions`` for the given ``BenchmarkMethod``.
@@ -274,6 +275,8 @@ def get_benchmark_orchestrator_options(
         early_stopping_strategy: The early stopping strategy to use (if any).
         include_status_quo: Whether to include the status quo in each trial.
         logging_level: The logging level to use for the Orchestrator.
+        tolerated_trial_failure_rate: Fraction of trials allowed to fail without
+            aborting the optimization. Expects value between 0 and 1. Default is 0.5.
 
     Returns:
         ``OrchestratorOptions``
@@ -295,6 +298,7 @@ def get_benchmark_orchestrator_options(
         early_stopping_strategy=early_stopping_strategy,
         status_quo_weight=1.0 if include_status_quo else 0.0,
         logging_level=logging_level,
+        tolerated_trial_failure_rate=tolerated_trial_failure_rate,
     )
 
 
@@ -578,6 +582,7 @@ def run_optimization_with_orchestrator(
     run_trials_in_batches: bool = False,
     timeout_hours: float | None = None,
     orchestrator_logging_level: int = DEFAULT_LOG_LEVEL,
+    tolerated_trial_failure_rate: float = 0.5,
 ) -> Experiment:
     """
     Optimize the ``problem`` using the ``method`` and ``Orchestrator``, seeding
@@ -614,6 +619,7 @@ def run_optimization_with_orchestrator(
         early_stopping_strategy=method.early_stopping_strategy,
         include_status_quo=sq_arm is not None,
         logging_level=orchestrator_logging_level,
+        tolerated_trial_failure_rate=tolerated_trial_failure_rate,
     )
 
     # Use custom runner if provided on the problem, otherwise create standard runner
@@ -671,6 +677,7 @@ def benchmark_replication(
     timeout_hours: float = 4.0,
     orchestrator_logging_level: int = DEFAULT_LOG_LEVEL,
     strip_runner_before_saving: bool = True,
+    tolerated_trial_failure_rate: float = 0.5,
 ) -> BenchmarkResult:
     """
     Run one benchmarking replication (equivalent to one optimization loop).
@@ -708,6 +715,7 @@ def benchmark_replication(
         run_trials_in_batches=run_trials_in_batches,
         timeout_hours=timeout_hours,
         orchestrator_logging_level=orchestrator_logging_level,
+        tolerated_trial_failure_rate=tolerated_trial_failure_rate,
     )
 
     benchmark_result = get_benchmark_result_from_experiment_and_gs(
@@ -789,6 +797,7 @@ def benchmark_one_method_problem(
     run_trials_in_batches: bool = False,
     timeout_hours: float = 4.0,
     orchestrator_logging_level: int = DEFAULT_LOG_LEVEL,
+    tolerated_trial_failure_rate: float = 0.5,
 ) -> AggregatedBenchmarkResult:
     return AggregatedBenchmarkResult.from_benchmark_results(
         results=[
@@ -799,6 +808,7 @@ def benchmark_one_method_problem(
                 run_trials_in_batches=run_trials_in_batches,
                 timeout_hours=timeout_hours,
                 orchestrator_logging_level=orchestrator_logging_level,
+                tolerated_trial_failure_rate=tolerated_trial_failure_rate,
             )
             for seed in seeds
         ]
@@ -812,6 +822,7 @@ def benchmark_multiple_problems_methods(
     run_trials_in_batches: bool = False,
     timeout_hours: float = 4.0,
     orchestrator_logging_level: int = DEFAULT_LOG_LEVEL,
+    tolerated_trial_failure_rate: float = 0.5,
 ) -> list[AggregatedBenchmarkResult]:
     """
     For each `problem` and `method` in the Cartesian product of `problems` and
@@ -827,6 +838,7 @@ def benchmark_multiple_problems_methods(
             run_trials_in_batches=run_trials_in_batches,
             timeout_hours=timeout_hours,
             orchestrator_logging_level=orchestrator_logging_level,
+            tolerated_trial_failure_rate=tolerated_trial_failure_rate,
         )
         for p, m in product(problems, methods)
     ]
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
@@ -1131,6 +1131,18 @@ def test_get_benchmark_orchestrator_options(self) -> None:
             self.assertEqual(
                 orchestrator_options.status_quo_weight, 1.0 if include_sq else 0.0
             )
+            # Default tolerated_trial_failure_rate should be 0.5
+            self.assertEqual(orchestrator_options.tolerated_trial_failure_rate, 0.5)
+
+        with self.subTest("custom tolerated_trial_failure_rate"):
+            orchestrator_options = get_benchmark_orchestrator_options(
+                batch_size=1,
+                run_trials_in_batches=False,
+                max_pending_trials=2,
+                early_stopping_strategy=None,
+                tolerated_trial_failure_rate=0.9,
+            )
+            self.assertEqual(orchestrator_options.tolerated_trial_failure_rate, 0.9)
 
     def test_replication_with_status_quo(self) -> None:
         method = BenchmarkMethod(