Skip to content

Commit 3712dd6

Browse files
sdaultonmeta-codesync[bot]
authored andcommitted
Handle search space decode failures in _query_historical_experiments_given_parameters (#5147)
Summary: Pull Request resolved: #5147 `_query_historical_experiments_given_parameters` could fail entirely if any single historical experiment had a corrupt or incompatible search space that couldn't be decoded. This wraps the `decoder.search_space_from_sqa` call in a try-except so that decode failures for individual experiments are logged as warnings and return `None` for the search space, allowing the remaining experiments to be returned successfully. Reviewed By: hvarfner Differential Revision: D98981525 fbshipit-source-id: 60cbd441f08a1260c117e1d6ef6d3aa2c96c4632
1 parent c08c4a9 commit 3712dd6

2 files changed

Lines changed: 67 additions & 8 deletions

File tree

ax/storage/sqa_store/load.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -787,18 +787,22 @@ def _query_historical_experiments_given_parameters(
787787
experiments_params[exp_name].append(sqa_param)
788788
experiments_time_created[exp_name] = time_created
789789

790-
return {
791-
exp_name: (
792-
decoder.search_space_from_sqa(
790+
results: dict[str, tuple[SearchSpace | None, datetime]] = {}
791+
for exp_name, parameters_sqa in experiments_params.items():
792+
try:
793+
search_space = decoder.search_space_from_sqa(
793794
parameters_sqa=parameters_sqa,
794795
# Parameter constraints don't matter for search space
795796
# compatibility
796797
parameter_constraints_sqa=[],
797-
),
798-
experiments_time_created[exp_name],
799-
)
800-
for exp_name, parameters_sqa in experiments_params.items()
801-
}
798+
)
799+
except Exception as e:
800+
logger.warning(
801+
f"Failed to decode search space for experiment '{exp_name}': {e}"
802+
)
803+
search_space = None
804+
results[exp_name] = (search_space, experiments_time_created[exp_name])
805+
return results
802806

803807

804808
def identify_transferable_experiments(

ax/storage/sqa_store/tests/test_sqa_store.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,6 +3236,61 @@ def test_query_historical_experiments_given_parameters(self) -> None:
32363236
self.assertIn("w", none_throws(returned_ss).parameters)
32373237
self.assertIn("x", none_throws(returned_ss).parameters)
32383238

3239+
with self.subTest("returns_none_search_space_on_decode_failure"):
3240+
# Save two experiments
3241+
exp1 = get_experiment_with_batch_trial()
3242+
exp1.name = "exp_decode_success"
3243+
exp1.experiment_type = "TEST"
3244+
exp1.is_test = False
3245+
trial1 = exp1.trials[0]
3246+
exp1.attach_data(get_data(trial_index=trial1.index))
3247+
save_experiment(exp1, config=config)
3248+
3249+
exp2 = get_experiment_with_batch_trial()
3250+
exp2.name = "exp_decode_failure"
3251+
exp2.experiment_type = "TEST"
3252+
exp2.is_test = False
3253+
trial2 = exp2.trials[0]
3254+
exp2.attach_data(get_data(trial_index=trial2.index))
3255+
save_experiment(exp2, config=config)
3256+
3257+
# Look up exp2's ID before mocking to avoid nested session_scope
3258+
with session_scope() as session:
3259+
exp2_id: int = (
3260+
session.query(SQAExperiment.id)
3261+
.filter(SQAExperiment.name == "exp_decode_failure")
3262+
.scalar()
3263+
)
3264+
3265+
# Mock decoder to raise on the second experiment's parameters
3266+
original_search_space_from_sqa: Callable[..., SearchSpace | None] = (
3267+
Decoder.search_space_from_sqa
3268+
)
3269+
3270+
def _side_effect(self: Decoder, **kwargs: Any) -> SearchSpace | None:
3271+
params = kwargs.get("parameters_sqa", [])
3272+
exp_ids = {p.experiment_id for p in params}
3273+
if exp_ids == {exp2_id}:
3274+
raise RuntimeError("Simulated decode failure")
3275+
return original_search_space_from_sqa(self, **kwargs)
3276+
3277+
with patch.object(Decoder, "search_space_from_sqa", _side_effect):
3278+
result = _query_historical_experiments_given_parameters(
3279+
parameter_names=["w", "x"],
3280+
experiment_types=["TEST"],
3281+
config=config,
3282+
)
3283+
3284+
# The successfully decoded experiment should have a SearchSpace
3285+
self.assertIn("exp_decode_success", result)
3286+
ss_success, _ = result["exp_decode_success"]
3287+
self.assertIsNotNone(ss_success)
3288+
3289+
# The failed experiment should have None search space
3290+
self.assertIn("exp_decode_failure", result)
3291+
ss_failure, _ = result["exp_decode_failure"]
3292+
self.assertIsNone(ss_failure)
3293+
32393294
def test_identify_transferable_experiments(
32403295
self,
32413296
) -> None:

0 commit comments

Comments
 (0)