Skip to content

Commit 316dbad

Browse files
mgrange1998meta-codesync[bot]
authored andcommitted
Add exponential backoff to Ax DB retry operations (#5104)
Summary: Pull Request resolved: #5104 The Axolotl experiment `igfr_h2_toprank_brew_ax_tuning` failed with a MySQL OperationalError (1290) during a database failover while saving analysis cards. The MySQL server was temporarily in read-only mode during master switchover. The existing `retry_on_exception` decorator on DB save/update functions in `with_db_settings_base.py` correctly catches `OperationalError` and retries up to 3 times, but it had no wait between retries (`initial_wait_seconds` was not set). This means all 3 retries fired immediately and all failed because the failover hadn't completed yet. This diff adds `initial_wait_seconds=5` to all 7 retry-decorated DB operation functions. This enables exponential backoff between retries: - 1st attempt: immediate - 2nd attempt: after 5 second wait - 3rd attempt: after 10 second wait This gives MySQL failovers up to 15 seconds to complete, which should be sufficient for typical failover scenarios. The `initial_wait_seconds` parameter is already supported by the `retry_on_exception` decorator in `ax.utils.common.executils` — it was simply not being used. Functions updated: - `_save_experiment_to_db_if_possible` - `_save_or_update_trials_in_db_if_possible` - `_save_generation_strategy_to_db_if_possible` - `_update_generation_strategy_in_db_if_possible` - `_update_runner_on_experiment_in_db_if_possible` - `_update_experiment_properties_in_db` - `_save_analysis_card_to_db` Reviewed By: mpolson64 Differential Revision: D98166115 fbshipit-source-id: 2d32aa4b26ac3e08cc95ecf8335899e75ca2c86b
1 parent 6715f6e commit 316dbad

1 file changed

Lines changed: 7 additions & 0 deletions

File tree

ax/storage/sqa_store/with_db_settings_base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ def _save_analysis_card_to_db_if_possible(
498498
retries=3,
499499
default_return_on_suppression=False,
500500
exception_types=RETRY_EXCEPTION_TYPES,
501+
initial_wait_seconds=5,
501502
)
502503
def _save_experiment_to_db_if_possible(
503504
experiment: Experiment,
@@ -521,6 +522,7 @@ def _save_experiment_to_db_if_possible(
521522
retries=3,
522523
default_return_on_suppression=False,
523524
exception_types=RETRY_EXCEPTION_TYPES,
525+
initial_wait_seconds=5,
524526
)
525527
def _save_or_update_trials_in_db_if_possible(
526528
experiment: Experiment,
@@ -550,6 +552,7 @@ def _save_or_update_trials_in_db_if_possible(
550552
retries=3,
551553
default_return_on_suppression=False,
552554
exception_types=RETRY_EXCEPTION_TYPES,
555+
initial_wait_seconds=5,
553556
)
554557
def _save_generation_strategy_to_db_if_possible(
555558
generation_strategy: GenerationStrategy,
@@ -573,6 +576,7 @@ def _save_generation_strategy_to_db_if_possible(
573576
retries=3,
574577
default_return_on_suppression=False,
575578
exception_types=RETRY_EXCEPTION_TYPES,
579+
initial_wait_seconds=5,
576580
)
577581
def _update_generation_strategy_in_db_if_possible(
578582
generation_strategy: GenerationStrategy,
@@ -602,6 +606,7 @@ def _update_generation_strategy_in_db_if_possible(
602606
retries=3,
603607
default_return_on_suppression=False,
604608
exception_types=RETRY_EXCEPTION_TYPES,
609+
initial_wait_seconds=5,
605610
)
606611
def _update_runner_on_experiment_in_db_if_possible(
607612
experiment: Experiment,
@@ -619,6 +624,7 @@ def _update_runner_on_experiment_in_db_if_possible(
619624
retries=3,
620625
default_return_on_suppression=False,
621626
exception_types=RETRY_EXCEPTION_TYPES,
627+
initial_wait_seconds=5,
622628
)
623629
def _update_experiment_properties_in_db(
624630
experiment_with_updated_properties: Experiment,
@@ -635,6 +641,7 @@ def _update_experiment_properties_in_db(
635641
retries=3,
636642
default_return_on_suppression=False,
637643
exception_types=RETRY_EXCEPTION_TYPES,
644+
initial_wait_seconds=5,
638645
)
639646
def _save_analysis_card_to_db(
640647
experiment: Experiment,

0 commit comments

Comments
 (0)