LuxDL
diff --git a/‎test/misc/doctests.jl‎
Lines changed: 9 additions & 0 deletions b/‎test/misc/doctests.jl‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎test/misc/helpers/adtypes_tests.jl‎
Lines changed: 33 additions & 0 deletions b/‎test/misc/helpers/adtypes_tests.jl‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎test/misc/helpers/training_api_tests.jl‎
Lines changed: 115 additions & 0 deletions b/‎test/misc/helpers/training_api_tests.jl‎
Lines changed: 115 additions & 0 deletions
diff --git a/‎test/misc/helpers/training_enzyme_tests.jl‎
Lines changed: 125 additions & 0 deletions b/‎test/misc/helpers/training_enzyme_tests.jl‎
Lines changed: 125 additions & 0 deletions
diff --git a/‎test/misc/helpers/training_forwarddiff_tests.jl‎
Lines changed: 51 additions & 0 deletions b/‎test/misc/helpers/training_forwarddiff_tests.jl‎
Lines changed: 51 additions & 0 deletions
@@ -0,0 +1,9 @@
+using Lux, Documenter
+
+# Some of the tests are flaky on prereleases
+@testset "doctests: Quality Assurance" begin
+    doctestexpr = :(using Adapt, Lux, Random, Optimisers, Zygote, NNlib)
+
+    DocMeta.setdocmeta!(Lux, :DocTestSetup, doctestexpr; recursive=true)
+    doctest(Lux; manual=false)
+end
@@ -0,0 +1,33 @@
+using ADTypes, Optimisers, Tracker, ReverseDiff, Mooncake, ComponentArrays, Enzyme
+
+include("../../shared_testsetup.jl")
+
+@testset "AbstractADTypes" begin
+    function _loss_function(model, ps, st, data)
+        y, st = model(data, ps, st)
+        return sum(y), st, ()
+    end
+
+    rng = StableRNG(12345)
+
+    @testset "$mode" for (mode, aType, dev, ongpu) in MODES
+        model = Dense(3, 2)
+        opt = Adam(0.01f0)
+        ps, st = dev(Lux.setup(rng, model))
+
+        tstate = Training.TrainState(model, ps, st, opt)
+
+        x = aType(randn(Lux.replicate(rng), Float32, (3, 1)))
+
+        for ad in (AutoZygote(), AutoTracker(), AutoReverseDiff(), AutoEnzyme())
+            ongpu && (ad isa AutoReverseDiff || ad isa AutoEnzyme) && continue
+            !LuxTestUtils.ENZYME_TESTING_ENABLED[] && ad isa AutoEnzyme && continue
+            !LuxTestUtils.ZYGOTE_TESTING_ENABLED[] && ad isa AutoZygote && continue
+
+            grads, _, _, _ = Training.compute_gradients(ad, _loss_function, x, tstate)
+            tstate_ = Training.apply_gradients(tstate, grads)
+            @test tstate_.step == 1
+            @test tstate != tstate_
+        end
+    end
+end
@@ -0,0 +1,115 @@
+using ADTypes, Optimisers, Tracker, ReverseDiff, Mooncake, ComponentArrays, Enzyme
+
+include("../../shared_testsetup.jl")
+
+@testset "Training API" begin
+    mse = MSELoss()
+
+    rng = StableRNG(12345)
+
+    x_data = randn(rng, Float32, 4, 32)
+    y_data = evalpoly.(x_data, ((1, 2, 3),)) .- evalpoly.(x_data, ((5, 2),))
+    y_data = (y_data .- minimum(y_data)) ./ (maximum(y_data) - minimum(y_data))
+    dataset = [(x_data[:, i], y_data[:, i]) for i in Iterators.partition(1:32, 8)]
+
+    @testset "$mode" for (mode, aType, dev, ongpu) in MODES
+        model = Chain(
+            Dense(4, 32, tanh),
+            BatchNorm(32),
+            Dense(32, 32, tanh),
+            BatchNorm(32),
+            Dense(32, 4),
+        )
+        dataset_ = [dev((x, y)) for (x, y) in dataset]
+        opt = Adam(0.001f0)
+
+        @testset "$(ad)" for ad in (
+            AutoZygote(), AutoTracker(), AutoReverseDiff(), AutoEnzyme(), AutoMooncake()
+        )
+            ongpu &&
+                (ad isa AutoReverseDiff || ad isa AutoEnzyme || ad isa AutoMooncake) &&
+                continue
+            !LuxTestUtils.ENZYME_TESTING_ENABLED[] && ad isa AutoEnzyme && continue
+            !LuxTestUtils.ZYGOTE_TESTING_ENABLED[] && ad isa AutoZygote && continue
+
+            function get_total_loss(model, tstate)
+                loss = 0.0f0
+                for (x, y) in dataset_
+                    loss += mse(model, tstate.parameters, tstate.states, (x, y))[1]
+                end
+                return loss
+            end
+
+            @testset "compute_gradients + apply_gradients!" begin
+                ps, st = dev(Lux.setup(rng, model))
+                tstate = Training.TrainState(model, ps, st, opt)
+
+                initial_loss = get_total_loss(model, tstate)
+
+                for epoch in 1:1000, (x, y) in dataset_
+                    grads, loss, _, tstate = allow_unstable() do
+                        Training.compute_gradients(ad, mse, (x, y), tstate)
+                    end
+                    tstate = Training.apply_gradients!(tstate, grads)
+                end
+
+                final_loss = get_total_loss(model, tstate)
+                @test final_loss * 100 < initial_loss
+            end
+
+            @testset "single_train_step!" begin
+                ps, st = dev(Lux.setup(rng, model))
+                tstate = Training.TrainState(model, ps, st, opt)
+
+                initial_loss = get_total_loss(model, tstate)
+
+                for epoch in 1:1000, (x, y) in dataset_
+                    grads, loss, _, tstate = allow_unstable() do
+                        Training.single_train_step!(ad, mse, (x, y), tstate)
+                    end
+                end
+
+                final_loss = get_total_loss(model, tstate)
+                @test final_loss * 100 < initial_loss
+            end
+
+            @testset "single_train_step" begin
+                ps, st = dev(Lux.setup(rng, model))
+                tstate = Training.TrainState(model, ps, st, opt)
+
+                initial_loss = get_total_loss(model, tstate)
+
+                for epoch in 1:1000, (x, y) in dataset_
+                    grads, loss, _, tstate = allow_unstable() do
+                        Training.single_train_step(ad, mse, (x, y), tstate)
+                    end
+                end
+
+                final_loss = get_total_loss(model, tstate)
+                @test final_loss * 100 < initial_loss
+            end
+
+            # Test the adjust API
+            tstate = Optimisers.adjust(tstate, 0.1f0)
+            @test tstate.optimizer_state.layer_1.weight.rule.eta ≈ 0.1f0
+
+            tstate = Optimisers.adjust(tstate; eta=0.5f0)
+            @test tstate.optimizer_state.layer_1.weight.rule.eta ≈ 0.5f0
+
+            Optimisers.adjust!(tstate, 0.01f0)
+            @test tstate.optimizer_state.layer_1.weight.rule.eta ≈ 0.01f0
+
+            Optimisers.adjust!(tstate; eta=0.11f0)
+            @test tstate.optimizer_state.layer_1.weight.rule.eta ≈ 0.11f0
+        end
+
+        struct AutoCustomAD <: ADTypes.AbstractADType end
+
+        ps, st = dev(Lux.setup(rng, model))
+        tstate = Training.TrainState(model, ps, st, opt)
+
+        @test_throws ArgumentError Training.compute_gradients(
+            AutoCustomAD(), mse, dataset_[1], tstate
+        )
+    end
+end
@@ -0,0 +1,125 @@
+using ADTypes, Optimisers, Enzyme
+
+include("../../shared_testsetup.jl")
+
+@testset "Training API Enzyme Runtime Mode" begin
+    if !LuxTestUtils.ENZYME_TESTING_ENABLED[]
+        @test_broken false
+        return nothing
+    end
+
+    function makemodel(n)
+        @compact(dense = Dense(n => 1; use_bias=true), b = ones(Float32, n)) do x
+            @return dense(x .+ b)
+        end
+    end
+
+    n_samples = 20
+    x_dim = 10
+    y_dim = 1
+
+    model = makemodel(x_dim)
+    rng = Random.default_rng()
+    ps, st = Lux.setup(rng, model)
+
+    W = randn(rng, Float32, y_dim, x_dim)
+    b = randn(rng, Float32, y_dim)
+
+    x_samples = randn(rng, Float32, x_dim, n_samples)
+    y_samples = W * x_samples .+ b .+ 0.01f0 .* randn(rng, Float32, y_dim, n_samples)
+
+    lossfn = MSELoss()
+
+    function train_model!(model, ps, st, opt, nepochs::Int)
+        tstate = Training.TrainState(model, ps, st, opt)
+        for i in 1:nepochs
+            grads, loss, _, tstate = Training.single_train_step!(
+                AutoEnzyme(; mode=set_runtime_activity(Reverse)),
+                lossfn,
+                (x_samples, y_samples),
+                tstate,
+            )
+        end
+        return tstate.model, tstate.parameters, tstate.states
+    end
+
+    initial_loss = lossfn(first(model(x_samples, ps, st)), y_samples)
+
+    model, ps, st = train_model!(model, ps, st, Descent(0.01f0), 10000)
+
+    final_loss = lossfn(first(model(x_samples, ps, st)), y_samples)
+
+    @test final_loss * 100 < initial_loss
+end
+
+@testset "Enzyme: Invalidate Cache on State Update" begin
+    if !LuxTestUtils.ENZYME_TESTING_ENABLED[]
+        @test_broken false
+        return nothing
+    end
+
+    mse = MSELoss()
+
+    function mse2(model, ps, st, (x, y))
+        z, st = model(x, ps, st)
+        return sum(abs2, z .- y), st, ()
+    end
+
+    rng = StableRNG(12345)
+
+    model = Chain(Dense(4 => 3), VariationalHiddenDropout(0.5f0), Dense(3 => 4))
+    ps, st = Lux.setup(rng, model)
+    x = randn(rng, Float32, 4, 32)
+    opt = Adam(0.001f0)
+
+    tstate = Training.TrainState(model, ps, st, opt)
+
+    _, _, _, tstate_new = Training.compute_gradients(AutoEnzyme(), mse, (x, x), tstate)
+
+    @test tstate_new.states !== tstate.states
+
+    model = Chain(Dense(4 => 3), Dense(3 => 4))
+    ps, st = Lux.setup(rng, model)
+
+    tstate = Training.TrainState(model, ps, st, opt)
+
+    _, _, _, tstate_new = Training.compute_gradients(AutoEnzyme(), mse, (x, x), tstate)
+
+    @test @inferred(Training.compute_gradients(AutoEnzyme(), mse, (x, x), tstate_new)) isa
+        Any
+
+    _, _, _, tstate_new2 = Training.compute_gradients(
+        AutoEnzyme(), mse2, (x, x), tstate_new
+    )
+    @test hasfield(typeof(tstate_new2.cache.extras), :forward)
+    @test hasfield(typeof(tstate_new2.cache.extras), :reverse)
+
+    rng = StableRNG(12345)
+
+    model = Chain(Dense(4 => 3), VariationalHiddenDropout(0.5f0), Dense(3 => 4))
+    ps, st = Lux.setup(rng, model)
+    x = randn(rng, Float32, 4, 32)
+    opt = Adam(0.001f0)
+
+    tstate = Training.TrainState(model, ps, st, opt)
+
+    _, _, _, tstate_new = Training.compute_gradients(AutoEnzyme(), mse, (x, x), tstate)
+
+    @test tstate_new.states !== tstate.states
+
+    model = Chain(Dense(4 => 3), Dense(3 => 4))
+    ps, st = Lux.setup(rng, model)
+
+    tstate = Training.TrainState(model, ps, st, opt)
+
+    _, _, _, tstate_new = Training.compute_gradients(AutoEnzyme(), mse, (x, x), tstate)
+
+    @test @inferred(Training.compute_gradients(AutoEnzyme(), mse, (x, x), tstate_new)) isa
+        Any
+
+    _, _, _, tstate_new2 = Training.compute_gradients(
+        AutoEnzyme(), mse2, (x, x), tstate_new
+    )
+    @test hasfield(typeof(tstate_new2.cache.extras), :forward)
+    @test hasfield(typeof(tstate_new2.cache.extras), :reverse)
+end
@@ -0,0 +1,51 @@
+using ADTypes, Optimisers, ComponentArrays, ForwardDiff
+
+include("../../shared_testsetup.jl")
+
+@testset "Training API ForwardDiff" begin
+    mse = MSELoss()
+
+    rng = StableRNG(12345)
+
+    x_data = randn(rng, Float32, 4, 32)
+    y_data = evalpoly.(x_data, ((1, 2, 3),)) .- evalpoly.(x_data, ((5, 2),))
+    y_data = (y_data .- minimum(y_data)) ./ (maximum(y_data) - minimum(y_data))
+    dataset = [(x_data[:, i], y_data[:, i]) for i in Iterators.partition(1:32, 8)]
+
+    model = Chain(
+        Dense(4, 32, tanh), BatchNorm(32), Dense(32, 32, tanh), BatchNorm(32), Dense(32, 4)
+    )
+
+    dataset_ = [(x, y) for (x, y) in dataset]
+    opt = Adam(0.001f0)
+
+    ps, st = Lux.setup(rng, model)
+    tstate = Training.TrainState(model, ComponentVector(ps), st, opt)
+
+    initial_loss = first(
+        mse(model, tstate.parameters, Lux.testmode(tstate.states), dataset_[1])
+    )
+
+    for epoch in 1:100, (x, y) in dataset_
+        grads, loss, _, tstate = allow_unstable() do
+            Training.compute_gradients(AutoForwardDiff(), mse, (x, y), tstate)
+        end
+        tstate = Training.apply_gradients!(tstate, grads)
+    end
+
+    for epoch in 1:100, (x, y) in dataset_
+        grads, loss, _, tstate = allow_unstable() do
+            Training.single_train_step!(AutoForwardDiff(), mse, (x, y), tstate)
+        end
+    end
+
+    for epoch in 1:100, (x, y) in dataset_
+        grads, loss, _, tstate = allow_unstable() do
+            Training.single_train_step(AutoForwardDiff(), mse, (x, y), tstate)
+        end
+    end
+
+    final_loss = first(mse(model, tstate.parameters, tstate.states, dataset_[1]))
+
+    @test final_loss * 50 < initial_loss
+end