We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 749f6d4 commit 8553751Copy full SHA for 8553751
1 file changed
swift/megatron/trainers/base.py
@@ -446,6 +446,8 @@ def _get_param_groups(
446
param_group['max_lr'] = lr
447
param_group['min_lr'] = min_lr
448
lr_mult = param_group.pop('lr_mult')
449
+ # Instead of using lr_mult to control the learning rate, we directly use max_lr/min_lr.
450
+ param_group['lr_mult'] = 1.
451
param_group['max_lr'] *= lr_mult
452
param_group['min_lr'] *= lr_mult
453
return param_groups
0 commit comments