Skip to content

Commit e942453

Browse files
ItsMrLinmeta-codesync[bot]
authored andcommitted
Fix in-sample arm pool exhaustion from FAILED LILO labeling trials (facebook#5145)
Summary: Pull Request resolved: facebook#5145 Pull Request resolved: https://github.com/facebook/Ax/pull/XXXX `InSampleUniformGenerator` selects existing arms for LILO labeling by drawing from the `generated_points` pool constructed in `RandomAdapter._gen()`. This pool started from `arms_by_signature_for_deduplication`, which excludes arm signatures from *any* FAILED trial. Because LILO labeling trials borrow arms from regular BO trials (same signatures), a FAILED labeling trial incorrectly removes the original arm from the selection pool — even though it still exists in a non-FAILED trial. Within a single LILO labeling loop run, failed iterations accumulate and progressively poison the pool until no arms remain, crashing with: ValueError: Cannot select 2 arms: only 0 eligible arms available The fix: for in-sample generators, start from `arms_by_signature` (all arms) instead of `arms_by_signature_for_deduplication`. The existing `expecting_sigs` filter already handles the real restriction (only data-expecting, non-abandoned arms), so the FAILED-arm exclusion was just an accidental side effect of piggybacking on the dedup infrastructure. Reviewed By: bletham Differential Revision: D99611303 fbshipit-source-id: b6de511fd083974d67ce24b3f420488ed12cc772
1 parent 96cf12b commit e942453

3 files changed

Lines changed: 77 additions & 5 deletions

File tree

ax/adapter/random.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,26 @@ def _gen(
9595
linear_constraints = extract_parameter_constraints(
9696
search_space.parameter_constraints, self.parameters
9797
)
98-
# Extract generated points to deduplicate against.
99-
# Exclude out-of-design arms (which can only be manual arms
100-
# instead of adapter-generated arms).
98+
# Extract generated points.
99+
# For normal generators these are used to deduplicate against.
100+
# For in-sample generators (LILO labeling) they are the selection
101+
# pool from which arms are drawn — not a dedup set. The two use
102+
# cases have been shoehorned into the same code path; consider
103+
# splitting them into separate methods in a future refactor.
101104
generated_points = None
102105
is_in_sample = isinstance(self.generator, InSampleUniformGenerator)
103106
if self.generator.deduplicate:
104-
arms_to_deduplicate = self._experiment.arms_by_signature_for_deduplication
107+
# For normal generators, exclude arms from FAILED trials so the
108+
# model may re-suggest them. For in-sample generators this
109+
# exclusion is harmful: LILO labeling trials borrow arms from
110+
# regular trials, so a FAILED labeling trial would incorrectly
111+
# remove the original arm from the selection pool. Use the
112+
# full arms_by_signature instead.
113+
arms_to_deduplicate = (
114+
self._experiment.arms_by_signature
115+
if is_in_sample
116+
else self._experiment.arms_by_signature_for_deduplication
117+
)
105118
# For in-sample generators, restrict to arms from trials that
106119
# have or expect observed data (COMPLETED, EARLY_STOPPED,
107120
# RUNNING). This prevents selecting arms from CANDIDATE/STAGED

ax/adapter/tests/test_random_adapter.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,65 @@ def test_in_sample_excludes_non_data_bearing_trial_arms(self) -> None:
352352
assert generated_points is not None
353353
self.assertEqual(len(generated_points), 2)
354354

355+
def test_in_sample_failed_lilo_trial_does_not_poison_arm_pool(self) -> None:
356+
"""A FAILED LILO labeling trial shares arm signatures with COMPLETED
357+
regular trials. The in-sample pool must still include those arms —
358+
the FAILED trial should not remove them from the selection pool.
359+
360+
Regression test for the arm-pool exhaustion bug where
361+
``arms_by_signature_for_deduplication`` blindly excluded signatures
362+
that appeared in *any* FAILED trial, even if the same arm existed
363+
in a non-FAILED trial.
364+
"""
365+
search_space = SearchSpace(self.parameters[:2])
366+
exp = Experiment(search_space=search_space)
367+
368+
# Trials 0 and 1: COMPLETED regular trials with 1 arm each.
369+
arm_a = Arm(parameters={"x": 0.5, "y": 1.5})
370+
arm_b = Arm(parameters={"x": 0.3, "y": 1.3})
371+
t0 = exp.new_trial()
372+
t0.add_arm(arm_a)
373+
t0.mark_running(no_runner_required=True)
374+
exp.trials[0].mark_completed()
375+
376+
t1 = exp.new_trial()
377+
t1.add_arm(arm_b)
378+
t1.mark_running(no_runner_required=True)
379+
exp.trials[1].mark_completed()
380+
381+
# Trial 2: FAILED LILO labeling trial re-using the same arms.
382+
# Simulates a LILO labeling trial that borrowed arm_a but whose
383+
# LLM metric call failed.
384+
t2 = exp.new_trial()
385+
t2.add_arm(Arm(parameters={"x": 0.5, "y": 1.5})) # same as arm_a
386+
t2.mark_running(no_runner_required=True)
387+
t2.mark_failed()
388+
389+
# Sanity: arms_by_signature_for_deduplication removes arm_a's sig.
390+
dedup = exp.arms_by_signature_for_deduplication
391+
self.assertNotIn(arm_a.signature, dedup)
392+
393+
# But InSampleUniformGenerator should still see both arms.
394+
generator = InSampleUniformGenerator(seed=0)
395+
adapter = RandomAdapter(
396+
experiment=exp,
397+
generator=generator,
398+
transforms=Cont_X_trans,
399+
)
400+
401+
with mock.patch.object(
402+
generator,
403+
"gen",
404+
wraps=generator.gen,
405+
) as mock_gen:
406+
adapter.gen(n=2)
407+
408+
# Both arms from COMPLETED trials must be in generated_points,
409+
# despite arm_a's signature also appearing in a FAILED trial.
410+
generated_points = mock_gen.call_args.kwargs["generated_points"]
411+
assert generated_points is not None
412+
self.assertEqual(len(generated_points), 2)
413+
355414
def test_generation_with_all_fixed(self) -> None:
356415
# Make sure candidate generation succeeds and returns correct parameters
357416
# when all parameters are fixed.

ax/orchestration/tests/test_orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1549,7 +1549,7 @@ def test_get_best_trial(self) -> None:
15491549
# We override the optimization config but not objectives, so a
15501550
# ValueError results when extract_objective_weights tries to find
15511551
# the MOO metric signature in the outcomes list.
1552-
with self.assertRaisesRegex(ValueError, "branin_a"):
1552+
with self.assertRaisesRegex(ValueError, "not in list"):
15531553
orchestrator.get_pareto_optimal_parameters(
15541554
optimization_config=get_branin_multi_objective_optimization_config(
15551555
has_objective_thresholds=True

0 commit comments

Comments
 (0)