Added support for Flux 0.14. Progress bars now display appropriately in both terminal and Pluto. Moved to Julia 1.9. Released version 0.3.0

JoshuaBillson · JoshuaBillson · commit 2905f55de15f · 2023-07-15T14:14:53.000-06:00
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.7'
+          - '1.9'
         os:
           - ubuntu-latest
         arch:
diff --git a/Project.toml b/Project.toml
@@ -1,23 +1,26 @@
 name = "MixtureDensityNetworks"
 uuid = "521d8788-cab4-41cb-a05a-da376f16ad79"
 authors = ["Joshua Billson"]
-version = "0.2.2"
+version = "0.3.0"
 
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 Pipe = "b98c9c47-44ae-5843-9183-064241ee97a0"
 ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+TerminalLoggers = "5d786b92-1e48-4d6f-9151-6b4477ca9bed"
 
 [compat]
 Distributions = "0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25"
 DocStringExtensions = "0.8, 0.9"
-Flux = "0.13"
+Flux = "0.14"
 MLJModelInterface = "1"
 Pipe = "1.3"
 ProgressLogging = "0.1"
-julia = "1.6"
+TerminalLoggers = "0.1"
+julia = "1.9"
diff --git a/README.md b/README.md
@@ -33,9 +33,7 @@ function main()
     model = MixtureDensityNetwork(1, 1, layers, mixtures)
 
     # Fit Model
-    model, report = with_logger(TerminalLogger()) do 
-        MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
-    end
+    model, report = MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
 
     # Plot Learning Curve
     fig, _, _ = lines(1:epochs, report.learning_curve, axis=(;xlabel="Epochs", ylabel="Loss"))
@@ -61,7 +59,7 @@ main()
 # Example (MLJ Interface)
 
 ```julia
-using MixtureDensityNetworks, Distributions, Logging, TerminalLoggers, CairoMakie, MLJ, Random
+using MixtureDensityNetworks, Distributions, CairoMakie, MLJ
 
 const n_samples = 1000
 const epochs = 500
@@ -77,24 +75,21 @@ function main()
     mach = MLJ.machine(MDN(epochs=epochs, mixtures=mixtures, layers=layers, batchsize=batchsize), MLJ.table(X'), Y[1,:])
 
     # Fit Model on Training Data, Then Evaluate on Test
-    with_logger(TerminalLogger()) do 
-        @info "Evaluating..."
-        evaluation = MLJ.evaluate!(
-            mach, 
-            resampling=Holdout(shuffle=true), 
-            measure=[rsq, rmse, mae, mape], 
-            operation=MLJ.predict_mean
-        )
-        names = ["R²", "RMSE", "MAE", "MAPE"]
-        metrics = round.(evaluation.measurement, digits=3)
-        @info "Metrics: " * join(["$name: $metric" for (name, metric) in zip(names, metrics)], ", ")
-    end
+    @info "Evaluating..."
+    evaluation = MLJ.evaluate!(
+        mach, 
+        resampling=Holdout(shuffle=true), 
+        measure=[rsq, rmse, mae, mape], 
+        operation=MLJ.predict_mean, 
+        verbosity=2  # Need to set verbosity=2 to show training progress during evaluation 
+    )
+    names = ["R²", "RMSE", "MAE", "MAPE"]
+    metrics = round.(evaluation.measurement, digits=3)
+    @info "Metrics: " * join(["$name: $metric" for (name, metric) in zip(names, metrics)], ", ")
 
     # Fit Model on Entire Dataset
-    with_logger(TerminalLogger()) do 
-        @info "Training..."
-        MLJ.fit!(mach)
-    end
+    @info "Training..."
+    MLJ.fit!(mach)
 
     # Plot Learning Curve
     fig, _, _ = lines(1:epochs, MLJ.training_losses(mach), axis=(;xlabel="Epochs", ylabel="Loss"))
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -13,6 +13,10 @@ First, let's create our dataset. To properly demonstrate the power of MDNs, we'l
 using Flux, Distributions, CairoMakie, MixtureDensityNetworks
 
 const n_samples = 1000
+const epochs = 1000
+const batchsize = 128
+const mixtures = 8
+const layers = [128, 128]
 
 X, Y = generate_data(n_samples)
 
@@ -24,16 +28,16 @@ fig, ax, plt = scatter(X[1,:], Y[1,:], markersize=5)
 Now we'll define a standard univariate MDN. For this example, we construct a network with 2 hidden layers of size 128, which outputs a distribution
 with 5 Gaussian mixtures.
 ```julia
-model = MixtureDensityNetwork(1, 1, [128, 128], 5)
+model = MixtureDensityNetwork(1, 1, layers, mixtures)
 ```
 
 We can fit our model to our data by calling `fit!(m, X, Y; opt=Flux.Adam(), batchsize=32, epochs=100)`. We specify that we want to train our model for
 500 epochs with the Adam optimiser and a batch size of 128. This method returns the model with the lowest loss as its first value and a named tuple 
 containing the learning curve, best epoch, and lowest loss observed during training as its second value. We can use Makie's `lines` method to visualize
 the learning curve.
 ```julia
-model, report = MixtureDensityNetworks.fit!(model, X, Y; epochs=500, opt=Flux.Adam(1e-3), batchsize=128)
-fig, _, _ = lines(1:500, lc, axis=(;xlabel="Epochs", ylabel="Loss"))
+model, report = MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
+fig, _, _ = lines(1:epochs, report.learning_curve, axis=(;xlabel="Epochs", ylabel="Loss"))
 ```
 
 ![](figures/LearningCurve.png)
@@ -60,7 +64,7 @@ density(fig[1,1], rand(cond, 10000), npoints=10000)
 
 Below is a script for running the complete example.
 ```julia
-using Flux, MixtureDensityNetworks, Distributions, CairoMakie, Logging, TerminalLoggers
+using Flux, MixtureDensityNetworks, Distributions, CairoMakie
 
 const n_samples = 1000
 const epochs = 1000
@@ -76,9 +80,7 @@ function main()
     model = MixtureDensityNetwork(1, 1, layers, mixtures)
 
     # Fit Model
-    model, report = with_logger(TerminalLogger()) do 
-        MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
-    end
+    model, report = MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
 
     # Plot Learning Curve
     fig, _, _ = lines(1:epochs, report.learning_curve, axis=(;xlabel="Epochs", ylabel="Loss"))
diff --git a/docs/src/mlj.md b/docs/src/mlj.md
@@ -8,7 +8,7 @@ This package implements the interface specified by [MLJModelInterface](https://g
 with the MLJ ecosystem. Below is an example demonstrating the use of this package in conjunction with MLJ. 
 
 ```julia
-using MixtureDensityNetworks, Distributions, Logging, TerminalLoggers, CairoMakie, MLJ, Random
+using MixtureDensityNetworks, Distributions, CairoMakie, MLJ
 
 const n_samples = 1000
 const epochs = 500
@@ -24,24 +24,21 @@ function main()
     mach = MLJ.machine(MDN(epochs=epochs, mixtures=mixtures, layers=layers, batchsize=batchsize), MLJ.table(X'), Y[1,:])
 
     # Fit Model on Training Data, Then Evaluate on Test
-    with_logger(TerminalLogger()) do 
-        @info "Evaluating..."
-        evaluation = MLJ.evaluate!(
-            mach, 
-            resampling=Holdout(shuffle=true), 
-            measure=[rsq, rmse, mae, mape], 
-            operation=MLJ.predict_mean
-        )
-        names = ["R²", "RMSE", "MAE", "MAPE"]
-        metrics = round.(evaluation.measurement, digits=3)
-        @info "Metrics: " * join(["$name: $metric" for (name, metric) in zip(names, metrics)], ", ")
-    end
+    @info "Evaluating..."
+    evaluation = MLJ.evaluate!(
+        mach, 
+        resampling=Holdout(shuffle=true), 
+        measure=[rsq, rmse, mae, mape], 
+        operation=MLJ.predict_mean, 
+        verbosity=2  # Need to set verbosity=2 to show training progress during evaluation 
+    )
+    names = ["R²", "RMSE", "MAE", "MAPE"]
+    metrics = round.(evaluation.measurement, digits=3)
+    @info "Metrics: " * join(["$name: $metric" for (name, metric) in zip(names, metrics)], ", ")
 
     # Fit Model on Entire Dataset
-    with_logger(TerminalLogger()) do 
-        @info "Training..."
-        MLJ.fit!(mach)
-    end
+    @info "Training..."
+    MLJ.fit!(mach)
 
     # Plot Learning Curve
     fig, _, _ = lines(1:epochs, MLJ.training_losses(mach), axis=(;xlabel="Epochs", ylabel="Loss"))
diff --git a/examples/mlj_example.jl b/examples/mlj_example.jl
@@ -1,4 +1,4 @@
-using MixtureDensityNetworks, Distributions, Logging, TerminalLoggers, CairoMakie, MLJ, Random
+using MixtureDensityNetworks, Distributions, CairoMakie, MLJ
 
 const n_samples = 1000
 const epochs = 500
@@ -14,24 +14,21 @@ function main()
     mach = MLJ.machine(MDN(epochs=epochs, mixtures=mixtures, layers=layers, batchsize=batchsize), MLJ.table(X'), Y[1,:])
 
     # Fit Model on Training Data, Then Evaluate on Test
-    with_logger(TerminalLogger()) do 
-        @info "Evaluating..."
-        evaluation = MLJ.evaluate!(
-            mach, 
-            resampling=Holdout(shuffle=true), 
-            measure=[rsq, rmse, mae, mape], 
-            operation=MLJ.predict_mean
-        )
-        names = ["R²", "RMSE", "MAE", "MAPE"]
-        metrics = round.(evaluation.measurement, digits=3)
-        @info "Metrics: " * join(["$name: $metric" for (name, metric) in zip(names, metrics)], ", ")
-    end
+    @info "Evaluating..."
+    evaluation = MLJ.evaluate!(
+        mach, 
+        resampling=Holdout(shuffle=true), 
+        measure=[rsq, rmse, mae, mape], 
+        operation=MLJ.predict_mean, 
+        verbosity=2  # Need to set verbosity=2 to show training progress during evaluation 
+    )
+    names = ["R²", "RMSE", "MAE", "MAPE"]
+    metrics = round.(evaluation.measurement, digits=3)
+    @info "Metrics: " * join(["$name: $metric" for (name, metric) in zip(names, metrics)], ", ")
 
     # Fit Model on Entire Dataset
-    with_logger(TerminalLogger()) do 
-        @info "Training..."
-        MLJ.fit!(mach)
-    end
+    @info "Training..."
+    MLJ.fit!(mach)
 
     # Plot Learning Curve
     fig, _, _ = lines(1:epochs, MLJ.training_losses(mach), axis=(;xlabel="Epochs", ylabel="Loss"))
diff --git a/examples/native_example.jl b/examples/native_example.jl
@@ -14,9 +14,7 @@ function main()
     model = MixtureDensityNetwork(1, 1, layers, mixtures)
 
     # Fit Model
-    model, report = with_logger(TerminalLogger()) do 
-        MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
-    end
+    model, report = MixtureDensityNetworks.fit!(model, X, Y; epochs=epochs, opt=Flux.Adam(1e-3), batchsize=batchsize)
 
     # Plot Learning Curve
     fig, _, _ = lines(1:epochs, report.learning_curve, axis=(;xlabel="Epochs", ylabel="Loss"))
diff --git a/src/MixtureDensityNetworks.jl b/src/MixtureDensityNetworks.jl
@@ -7,6 +7,8 @@ using LinearAlgebra
 using ProgressLogging
 using MLJModelInterface
 using DocStringExtensions
+using Logging
+using TerminalLoggers
 using Pipe: @pipe
 
 const MMI = MLJModelInterface
diff --git a/src/mlj_interface.jl b/src/mlj_interface.jl
@@ -65,14 +65,14 @@ end
 
 function MLJModelInterface.fit(model::MDN, verbosity, X, y)
     m = MixtureDensityNetwork(size(X, 1), 1, model.layers, model.mixtures)
-    fitresult, report = MixtureDensityNetworks.fit!(m, X, y, opt=Flux.Adam(model.η), batchsize=model.batchsize, epochs=model.epochs)
+    fitresult, report = MixtureDensityNetworks.fit!(m, X, y, opt=Flux.Adam(model.η), batchsize=model.batchsize, epochs=model.epochs, verbosity=verbosity)
     cache = (;learning_curve=report.learning_curve[1:report.best_epoch])
     return fitresult, cache, report
 end
 
 function MLJModelInterface.update(model::MDN, verbosity, old_fitresult, old_cache, X, y)
     # Update Fitresult
-    fitresult, report = MixtureDensityNetworks.fit!(old_fitresult, X, y, opt=Flux.Adam(model.η), batchsize=model.batchsize, epochs=model.epochs)
+    fitresult, report = MixtureDensityNetworks.fit!(old_fitresult, X, y, opt=Flux.Adam(model.η), batchsize=model.batchsize, epochs=model.epochs, verbosity=verbosity)
 
     # Update Report
     learning_curve=vcat(old_cache.learning_curve, report.learning_curve)
diff --git a/src/model.jl b/src/model.jl
@@ -33,7 +33,7 @@ function MixtureDensityNetwork(input::Int, output::Int, layers::Vector{Int}, mix
     layers = vcat([input], layers)
     for (dim_in, dim_out) in zip(layers, layers[2:end])
         push!(hidden, Flux.Dense(dim_in=>dim_out, init=init))
-        push!(hidden, Flux.BatchNorm(dim_out, Flux.relu, initβ=zeros, initγ=ones, ϵ=1e-5, momentum=0.1))
+        push!(hidden, Flux.BatchNorm(dim_out, Flux.relu, initβ=zeros, initγ=ones, eps=1e-5, momentum=0.1))
     end
     hidden_layer = Flux.Chain(hidden...)
     
diff --git a/src/native_interface.jl b/src/native_interface.jl
@@ -10,20 +10,31 @@ Fit the model to the data given by X and Y.
 - `opt`: The optimization algorithm to use during training (default = Adam(1e-3)).
 - `batchsize`: The batch size for each iteration of gradient descent (default = 32).
 - `epochs`: The number of epochs to train for (default = 100).
+- `verbosity`: Whether to show a progress bar (default = 1) or not (0).
 """
-function fit!(m, X::Matrix{<:Real}, Y::Matrix{<:Real}; opt=Flux.Adam(), batchsize=32, epochs=100)
-    fit!(m, Float64.(X), Float64.(Y); opt=opt, batchsize=batchsize, epochs=epochs)
+function fit!(m, X::Matrix{<:Real}, Y::Matrix{<:Real}; opt=Flux.Optimisers.Adam(), batchsize=32, epochs=100, verbosity=1)
+    fit!(m, Float64.(X), Float64.(Y); opt=opt, batchsize=batchsize, epochs=epochs, verbosity=verbosity)
 end
 
-function fit!(m, X::Matrix{Float64}, Y::Matrix{Float64}; opt=Flux.Adam(), batchsize=32, epochs=100)
-    # Get Parameters
-    params = Flux.params(m)
+function fit!(m, X::Matrix{Float64}, Y::Matrix{Float64}; opt=Flux.Optimisers.Adam(), batchsize=32, epochs=100, verbosity=1)
+    # Select Logger Based On Verbosity And Environment
+    logger = verbosity == 1 ? ((@isdefined PlutoRunner) ? current_logger() : TerminalLogger()) : NullLogger()
+
+    # Run Fit Algorithm
+    with_logger(logger) do
+        _fit!(m, X, Y; opt=opt, batchsize=batchsize, epochs=epochs)
+    end
+end
+
+function _fit!(model, X::Matrix{Float64}, Y::Matrix{Float64}; opt=Flux.Optimisers.Adam(), batchsize=32, epochs=100)
+    # Initialize Optimiser
+    opt_state = Flux.setup(opt, model)
 
     # Prepare Training Data
     data = Flux.DataLoader((X, Y); batchsize=batchsize, shuffle=true)
 
     # Iterate Over Epochs
-    best_model = deepcopy(m)
+    best_model = deepcopy(model)
     learning_curve = Float64[]
     @progress for epoch in 1:epochs
 
@@ -32,10 +43,10 @@ function fit!(m, X::Matrix{Float64}, Y::Matrix{Float64}; opt=Flux.Adam(), batchs
         for (x, y) in data
 
             # Compute Loss and Gradient
-            l, grad = Flux.withgradient(() -> likelihood_loss(m(x), y), params)
+            l, grads = Flux.withgradient(m -> likelihood_loss(m(x), y), model)
 
             # Update Parameters
-            Flux.update!(opt, params, grad)
+            Flux.update!(opt_state, model, grads[1])
 
             # Save Loss
             push!(losses, l)
@@ -46,7 +57,7 @@ function fit!(m, X::Matrix{Float64}, Y::Matrix{Float64}; opt=Flux.Adam(), batchs
 
         # Save Best Performing Model
         if length(learning_curve) == 1 || learning_curve[end] < minimum(learning_curve[1:end-1])
-            best_model = deepcopy(m)
+            best_model = deepcopy(model)
         end
     end
 
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -57,7 +57,7 @@ end
     # Fit Model
     MLJ.fit!(mach)
 
-    @test  MLJ.report(mach).best_loss < 1.60
+    @test MLJ.report(mach).best_loss < 1.60
 
     @test likelihood_loss(MLJ.predict(mach), Y) < 1.60
 end