Make iter persistent for AdagradW

minhua-chen · facebook-github-bot · commit 683965753246 · 2025-05-17T16:32:35.000-07:00
Summary:
Make iter persistent for AdagradW optimizer state saving.
This is to avoid potential loss of the iter information when training is restarted.

Differential Revision: D74717848
diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_training.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_training.py
@@ -1359,6 +1359,13 @@ def __init__(  # noqa C901
                     OptimType.EMAINPLACE_ROWWISE_ADAGRAD,
                 )
                 or self._used_rowwise_adagrad_with_global_weight_decay
+                or (
+                    self._used_rowwise_adagrad_with_counter
+                    and self.optimizer_args.regularization_mode
+                    == WeightDecayMode.COUNTER.value
+                    and self.optimizer_args.weight_decay_mode
+                    == CounterWeightDecayMode.ADAGRADW.value
+                )
             ):
                 self.register_buffer(
                     "iter",