Skip to content

Commit 2116caf

Browse files
authored
Adamw with beta = 0.01 as a default
1 parent 4ff61fc commit 2116caf

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

src/rules.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ function apply!(o::NAdam, state, x::AbstractArray{T}, dx) where T
499499
end
500500

501501
"""
502-
AdamW(η = 0.001, β = (0.9, 0.999), λ = 0, ϵ = 1e-8; couple = true)
502+
AdamW(η = 0.001, β = (0.9, 0.999), λ = 0.01, ϵ = 1e-8; couple = true)
503503
AdamW(; [eta, beta, lambda, epsilon, couple])
504504
505505
[AdamW](https://arxiv.org/abs/1711.05101) is a variant of Adam fixing (as in repairing) its
@@ -534,12 +534,12 @@ struct AdamW{Teta,Tbeta<:Tuple,Tlambda,Teps} <: AbstractRule
534534
couple::Bool
535535
end
536536

537-
function AdamW(η, β = (0.9, 0.999), λ = 0.0, ϵ = 1e-8; couple::Bool = true)
537+
function AdamW(η, β = (0.9, 0.999), λ = 0.01, ϵ = 1e-8; couple::Bool = true)
538538
η < 0 && throw(DomainError(η, "the learning rate cannot be negative"))
539539
return AdamW(float(η), β, float(λ), float(ϵ), couple)
540540
end
541541

542-
AdamW(; eta = 0.001, beta = (0.9, 0.999), lambda= 0.0, epsilon = 1e-8, kw...) =
542+
AdamW(; eta = 0.001, beta = (0.9, 0.999), lambda= 0.01, epsilon = 1e-8, kw...) =
543543
AdamW(eta, beta, lambda, epsilon; kw...)
544544

545545
init(o::AdamW, x::AbstractArray{T}) where T = (zero(x), zero(x), T.(o.beta))

0 commit comments

Comments
 (0)