Add Hudson Bay example

DrWatson · DrWatson · commit 6804d187791f · 2021-02-10T21:49:17.000+01:00
diff --git a/LotkaVolterra/Hudson_Bay_recovery.jld2 b/LotkaVolterra/Hudson_Bay_recovery.jld2
diff --git a/LotkaVolterra/hudson_bay.jl b/LotkaVolterra/hudson_bay.jl
@@ -0,0 +1,246 @@
+## Environment and packages
+cd(@__DIR__)
+using Pkg; Pkg.activate("."); Pkg.instantiate()
+
+using OrdinaryDiffEq
+using ModelingToolkit
+using DataDrivenDiffEq
+using LinearAlgebra, DiffEqSensitivity, Optim
+using DiffEqFlux, Flux
+using Plots
+gr()
+using JLD2, FileIO
+using Statistics
+using DelimitedFiles
+# Set a random seed for reproduceable behaviour
+using Random
+Random.seed!(5443)
+
+## Data Preprocessing
+# The data has been taken from https://jmahaffy.sdsu.edu/courses/f00/math122/labs/labj/q3v1.htm
+# Originally published in
+hudson_bay_data = readdlm("hudson_bay_data.dat", '\t', Float32, '\n')
+# Measurements of prey and predator
+Xₙ = Matrix(transpose(hudson_bay_data[:, 2:3]))
+plot(t, transpose(Xₙ))
+# Normalize the data; since the data domain is strictly positive
+# we just need to divide by the maximum
+xscale = maximum(Xₙ, dims =2)
+Xₙ .= 1f0 ./ xscale .* Xₙ
+# Time from 0 -> n
+t = hudson_bay_data[:, 1] .- hudson_bay_data[1, 1]
+tspan = (t[1], t[end])
+
+# Plot the data
+scatter(t, transpose(Xₙ), xlabel = "t [a]", ylabel = "x(t), y(t)")
+plot!(t, transpose(Xₙ), xlabel = "t", ylabel = "x(t), y(t)")
+
+## Direct Identification via SINDy + Collocation
+# Create a collocation
+dx̂,x̂ = collocate_data(Xₙ,t, GaussianKernel())
+# Look at the collocation
+plot(t, dx̂')
+# Perform sindy
+
+# Create a Basis
+@variables u[1:2]
+
+# Generate the basis functions, multivariate polynomials up to deg 5
+# and sine
+b = [polynomial_basis(u, 5); sin.(u)]
+basis = Basis(b, u)
+# Create an optimizer for the SINDy problem
+opt = SR3(Float32(1e-2), Float32(1e-2))
+# Create the thresholds which should be used in the search process
+λ = Float32.(exp10.(-7:0.1:3))
+# Target function to choose the results from; x = L0 of coefficients and L2-Error of the model
+g(x) = x[1] < 1 ? Inf : norm(x, 2)
+# Test on derivative data
+Ψ = SINDy(x̂, dx̂, basis, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true) # Succeed
+println(Ψ)
+print_equations(Ψ) # Fails
+b2 = Basis((u,p,t)->Ψ(u,ones(length(parameters(Ψ))),t),u, linear_independent = true)
+Ψ = SINDy(x̂, dx̂, b2, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true) # Succeed
+println(Ψ)
+print_equations(Ψ) # Fails
+parameters(Ψ)
+## UDE Approach
+# Subsample the data in y -> initial fitting strategy (batching)
+# We assume we have only 5 measurements in y, evenly distributed
+ty = collect(t[1]:Float32(t[end]/5):t[end])
+# Create datasets for the different measurements
+t
+XS = zeros(Float32, length(ty)-1, floor(Int64, mean(diff(ty))/mean(diff(t)))+1) # All x data
+TS = zeros(Float32, length(ty)-1, floor(Int64, mean(diff(ty))/mean(diff(t)))+1) # Time data
+YS = zeros(Float32, length(ty)-1, 2) # Just two measurements in y
+
+for i in 1:length(ty)-1
+    idxs = ty[i].<= t .<= ty[i+1]
+    XS[i, :] = Xₙ[1, idxs]
+    TS[i, :] = t[idxs]
+    YS[i, :] = [Xₙ[2, t .== ty[i]]'; Xₙ[2, t .== ty[i+1]]]
+end
+
+## Define the network
+# Gaussian RBF as activation
+rbf(x) = exp.(-(x.^2))
+
+# Define the network 2->5->5->5->2
+U = FastChain(
+    FastDense(2,5,rbf), FastDense(5,5, rbf), FastDense(5,5, tanh), FastDense(5,2)
+)
+
+# Get the initial parameters, first two is linear birth / decay of prey and predator
+p = [rand(Float32,2); initial_params(U)]
+
+# Define the hybrid model
+function ude_dynamics!(du,u, p, t)
+    û = U(u, p[3:end]) # Network prediction
+    # We assume a linear birth rate for the prey
+    du[1] = p[1]u[1] + û[1]
+    # We assume a linear decay rate for the predator
+    du[2] = -p[2]*u[2] + û[2]
+end
+
+# Define the problem
+prob_nn = ODEProblem(ude_dynamics!,Xₙ[:, 1], tspan, p)
+
+## Function to train the network
+# Define a predictor
+function predict(θ, X = Xₙ[:,1], T = t)
+    Array(solve(prob_nn, Vern7(), u0 = X, p=θ,
+                tspan = (T[1], T[end]), saveat = T,
+                abstol=1e-6, reltol=1e-6,
+                sensealg = ForwardDiffSensitivity()
+                ))
+end
+
+# Multiple shooting like loss
+function shooting_loss(θ)
+    # Start with a regularization on the network
+    l = convert(eltype(θ), 1e-3)*sum(abs2, θ[3:end]) ./ length(θ[3:end])
+    for i in 1:size(XS,1)
+        X̂ = predict(θ, [XS[i,1], YS[i,1]], TS[i, :])
+        # Full prediction in x
+        l += sum(abs2, XS[i,:] .- X̂[1,:])
+        # Add the boundary condition in y
+        l += abs2(YS[i, 2] .- X̂[2, end])
+    end
+
+    return l
+end
+
+function loss(θ)
+    X̂ = predict(θ)
+    sum(abs2, Xₙ - X̂) + convert(eltype(θ), 1e-3)*sum(abs2, θ[3:end]) ./ length(θ[3:end])
+end
+
+# Container to track the losses
+losses = Float32[]
+
+# Callback to show the loss during training
+callback(θ,l) = begin
+    push!(losses, l)
+    if length(losses)%5==0
+        println("Current loss after $(length(losses)) iterations: $(losses[end])")
+    end
+    false
+end
+
+## Training -> First shooting / batching to get a rough estimate
+
+# First train with ADAM for better convergence -> move the parameters into a
+# favourable starting positing for BFGS
+res1 = DiffEqFlux.sciml_train(shooting_loss, p, ADAM(0.1f0), cb=callback, maxiters = 100)
+println("Training loss after $(length(losses)) iterations: $(losses[end])")
+# Train with BFGS to achieve partial fit of the data
+res2 = DiffEqFlux.sciml_train(shooting_loss, res1.minimizer, BFGS(initial_stepnorm=0.01f0), cb=callback, maxiters = 200)
+println("Training loss after $(length(losses)) iterations: $(losses[end])")
+# Full L2-Loss for full prediction
+res3 = DiffEqFlux.sciml_train(loss, res2.minimizer, BFGS(initial_stepnorm=0.01f0), cb=callback, maxiters = 10000)
+println("Final training loss after $(length(losses)) iterations: $(losses[end])")
+
+# Rename the best candidate
+p_trained = res3.minimizer
+
+## Analysis of the trained network
+# Interpolate the solution
+tsample = t[1]:0.5:t[end]
+X̂ = predict(p_trained, Xₙ[:,1], tsample)
+# Trained on noisy data vs real solution
+plot(t, transpose(Xₙ), color = :black, label = ["Measurements" nothing])
+plot!(tsample, transpose(X̂), color = :red, label = ["Interpolation" nothing])
+
+# Neural network guess
+Ŷ = U(X̂,p_trained[3:end])
+
+scatter(tsample, transpose(Ŷ), xlabel = "t", ylabel ="I1(t), I2(t)", color = :red, label = ["UDE Approximation" nothing])
+
+## Symbolic regression via sparse regression ( SINDy based )
+
+# Create a Basis
+@variables u[1:2]
+
+# Generate the basis functions, multivariate polynomials up to deg 5
+# and sine
+b = [polynomial_basis(u, 5); sin.(u)]
+basis = Basis(b, u)
+
+# Create an optimizer for the SINDy problem
+opt = SR3(Float32(1e-2), Float32(1e-2))
+# Create the thresholds which should be used in the search process
+λ = Float32.(exp10.(-7:0.1:3))
+# Target function to choose the results from; x = L0 of coefficients and L2-Error of the model
+g(x) = x[1] < 1 ? Inf : norm(x, 2)
+
+# Test on uode derivative data
+println("SINDy on learned, partial, available data")
+Ψ = SINDy(X̂, Ŷ, basis, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true)
+println(Ψ)
+print_equations(Ψ)
+
+# Extract the parameter
+p̂ = parameters(Ψ)
+println("First parameter guess : $(p̂)")
+
+# Just the equations -> we reiterate on sindy here
+# searching all linear independent components again
+b = Basis((u, p, t)->Ψ(u, ones(length(p̂)), t), u, linear_independent = true)
+println(b)
+# Retune for better parameters -> we could also use DiffEqFlux or other parameter estimation tools here.
+opt = SR3(Float32(1e-2), Float32(1e-2))
+Ψf = SINDy(X̂, Ŷ, b, opt, maxiter = 10000,  normalize = true, convergence_error = eps()) # Succeed
+println(Ψf)
+print_equations(Ψf)
+p̂ = parameters(Ψf)
+println("Second parameter guess : $(p̂)")
+
+# Define the recovered, hyrid model with the rescaled dynamics
+function recovered_dynamics!(du,u, p, t)
+    û = Ψf(u, p[3:4]) # Network prediction
+    du[1] = p[1]*u[1] + û[1]
+    du[2] = -p[2]*u[2] + û[2]
+end
+
+p_model = [p_trained[1:2];p̂]
+estimation_prob = ODEProblem(recovered_dynamics!, Xₙ[:, 1], tspan, p_model)
+estimate = solve(estimation_prob, Tsit5(), saveat = 0.1)
+
+# Plot
+plot(t, transpose(Xₙ))
+plot!(estimate)
+
+## Simulation
+
+# Look at long term prediction
+t_long = (0.0f0, 50.0f0)
+estimation_prob = ODEProblem(recovered_dynamics!, Xₙ[:, 1], t_long, p_model)
+estimate_long = solve(estimation_prob, Tsit5(), saveat = 0.25)
+plot(estimate_long)
+
+## Save the results
+save("Hudson_Bay_recovery.jld2",
+    "X", Xₙ, "t" , t, "neural_network" , U, "initial_parameters", p, "trained_parameters" , p_trained, # Training
+    "losses", losses, "result", Ψf, "recovered_parameters", p̂, # Recovery
+    "model", recovered_dynamics!, "model_parameter", p_model,
+    "long_estimate", estimate_long) # Estimation
diff --git a/LotkaVolterra/hudson_bay_data.dat b/LotkaVolterra/hudson_bay_data.dat
@@ -0,0 +1,21 @@
+1900	30	4
+1901	47.2	6.1
+1902	70.2	9.8
+1903	77.4	35.2
+1904	36.3	59.4
+1905	20.6	41.7
+1906	18.1	19
+1907	21.4	13
+1908	22	8.3
+1909	25.4	9.1
+1910	27.1	7.4
+1911	40.3	8
+1912	57	12.3
+1913	76.6	19.5
+1914	52.3	45.7
+1915	19.5	51.1
+1916	11.2	29.7
+1917	7.6	15.8
+1918	14.6	9.7
+1919	16.2	10.1
+1920	24.7	8.6