New version and Symbolic Regression Result handling (#325)

AlCap23 · web-flow · commit 40340bfa5ffc · 2022-01-12T18:16:50.000+01:00
* Adapt Symbolic regression solution process

* Update getters

* Remove old code for now

* Bump version
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "DataDrivenDiffEq"
 uuid = "2445eb08-9709-466a-b3fc-47e12bd697a2"
 authors = ["Julius Martensen <julius.martensen@gmail.com>"]
-version = "0.6.9"
+version = "0.7.0"
 
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
diff --git a/src/basis/type.jl b/src/basis/type.jl
@@ -238,7 +238,7 @@ function dynamics(b::AbstractBasis)
 end
 
 ## Callable
-get_f(b::AbstractBasis) = getproperty(b, :f)
+get_f(b::AbstractBasis) = getfield(b, :f)
 
 # Fallback
 (b::AbstractBasis)(args...) = get_f(b)(args...)
diff --git a/src/koopman/type.jl b/src/koopman/type.jl
@@ -146,19 +146,20 @@ end
 
 
 # We assume that we only have real valued observed
-Base.Matrix(k::AbstractKoopman) = real.(Matrix(k.K))
+Base.Matrix(k::AbstractKoopman) = real.(Matrix(_get_K(k)))
 
 # Get the lifting function
-lifting(k::AbstractKoopman) = k.lift
+lifting(k::AbstractKoopman) = getfield(k, :lift)
 
+# Get K
+_get_K(k::AbstractKoopman) = getfield(k, :K)
 
-# TODO FIXME MAYBE? 
 """
 $(SIGNATURES)
 
 Returns `true` if the `AbstractKoopmanOperator` `k` is discrete in time.
 """
-is_discrete(k::AbstractKoopman) = !(!k.is_discrete)
+is_discrete(k::AbstractKoopman) = getfield(k, :is_discrete)
 
 
 
@@ -167,28 +168,32 @@ $(SIGNATURES)
 
 Returns `true` if the `AbstractKoopmanOperator` `k` is continuous in time.
 """
-is_continuous(k::AbstractKoopman) = !k.is_discrete
+is_continuous(k::AbstractKoopman) = !is_discrete(k)
 
 """
 $(SIGNATURES)
 
 Return the eigendecomposition of the `AbstractKoopmanOperator`.
 """
-LinearAlgebra.eigen(k::AbstractKoopman) = isa(k.K, Eigen) ? k.K : eigen(k.K)
+LinearAlgebra.eigen(k::AbstractKoopman) = begin 
+    K = _get_K(k)
+    isa(K, Eigen) && return K 
+    eigen(K)
+end
 
 """
 $(SIGNATURES)
 
 Return the eigenvalues of the `AbstractKoopmanOperator`.
 """
-LinearAlgebra.eigvals(k::AbstractKoopman) = eigvals(k.K)
+LinearAlgebra.eigvals(k::AbstractKoopman) = eigvals(_get_K(k))
 
 """
 $(SIGNATURES)
 
 Return the eigenvectors of the `AbstractKoopmanOperator`.
 """
-LinearAlgebra.eigvecs(k::AbstractKoopman) = eigvecs(k.K)
+LinearAlgebra.eigvecs(k::AbstractKoopman) = eigvecs(_get_K(k))
 
 """
 $(SIGNATURES)
@@ -209,14 +214,14 @@ $(SIGNATURES)
 
 Return the approximation of the discrete Koopman operator stored in `k`.
 """
-operator(k::AbstractKoopman) = is_discrete(k) ? k.K : throw(AssertionError("Koopman is continouos."))
+operator(k::AbstractKoopman) = is_discrete(k) ? _get_K(k) : throw(AssertionError("Koopman is continouos."))
 
 """
 $(SIGNATURES)
 
 Return the approximation of the continuous Koopman generator stored in `k`.
 """
-generator(k::AbstractKoopman) = is_continuous(k) ? k.K : throw(AssertionError("Koopman is discrete."))
+generator(k::AbstractKoopman) = is_continuous(k) ? _get_K(k) : throw(AssertionError("Koopman is discrete."))
 
 """
 $(SIGNATURES)
@@ -240,7 +245,11 @@ Returns `true` if either:
 + the Koopman operator has just eigenvalues with magnitude less than one or
 + the Koopman generator has just eigenvalues with a negative real part
 """
-is_stable(k::AbstractKoopman) = is_discrete(k) ? all(real.(eigvals(k)) .< real.(one(eltype(k.K)))) : all(real.(eigvals(k)) .< zero(eltype(k.K)))
+is_stable(k::AbstractKoopman) = begin 
+    K = _get_K(k)
+    is_discrete(k) && all(real.(eigvals(k)) .< real.(one(eltype(K)))) 
+    all(real.(eigvals(k)) .< zero(eltype(K)))
+end
 
 # TODO This does not work, since we are using the reduced basis instead of the
 # original, lifted dynamics...
diff --git a/src/solution.jl b/src/solution.jl
@@ -375,7 +375,7 @@ function DataDrivenSolution(prob::AbstractDataDrivenProblem, k, C, B, Q, P, inds
     res_ = Koopman(equations(bs), states(bs),
         parameters = parameters(bs),
         controls = controls(bs), iv = get_iv(bs),
-        K = k, C = C, Q = Q, P = P, lift = b.f,
+        K = k, C = C, Q = Q, P = P, lift = get_f(b),
         is_discrete = is_discrete(prob),
         eval_expression = eval_expression)
 
diff --git a/src/symbolic_regression/symbolic_regression.jl b/src/symbolic_regression/symbolic_regression.jl
@@ -37,13 +37,16 @@ function to_options(x::EQSearch)
     )
 end
 
+
+
 function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch;
     max_iter::Int = 10,
     weights = nothing,
     numprocs = nothing, procs = nothing,
     multithreading = false,
     runtests::Bool = true,
-    eval_expression = false
+    eval_expression = false,
+    kwargs...
     )
 
     opt = to_options(alg)
@@ -60,29 +63,63 @@ function DiffEqBase.solve(prob::AbstractDataDrivenProblem, alg::EQSearch;
     hof = SymbolicRegression.EquationSearch(X, Y, niterations = max_iter, weights = weights, options = opt,
             numprocs = numprocs, procs = procs, multithreading = multithreading,
             runtests = runtests)
-    # Sort the paretofront
-    doms = map(1:size(Y, 1)) do i
-        calculateParetoFrontier(X, Y[i, :], hof[i], opt)
-    end
 
-    build_solution(prob, alg, doms; eval_expression = eval_expression)
+    build_solution(prob, alg, hof; eval_expression = eval_expression)
 end
 
+function pareto_optimal_equations(hof::HallOfFame, prob, alg)
+    return pareto_optimal_equations([hof], prob, alg)
+end
 
-function build_solution(prob::AbstractDataDrivenProblem, alg::EQSearch, doms; eval_expression = false)
 
-    opt = to_options(alg)
+function pareto_optimal_equations(hof::Vector{HallOfFame}, prob, alg)
+
+    opts = DataDrivenDiffEq.to_options(alg)
+    y = DataDrivenDiffEq.get_target(prob)
+    x, _, t, c = DataDrivenDiffEq.get_oop_args(prob)
+    X =  vcat([x for x in (x, c, permutedims(t)) if !isempty(x)]...)
+    
     @variables x[1:size(prob.X, 1)] u[1:size(prob.U,1)] t
     x = Symbolics.scalarize(x)
     u = Symbolics.scalarize(u)
-    x_ = [x;u;t]
+    x_ = Num[x;u;t]
 
     # Build a dict
     subs = Dict([SymbolicUtils.Sym{Number}(Symbol("x$(i)")) => x_[i] for i in 1:size(x_, 1)]...)
-    # Create a variable
-    eqs = vcat(map(x->node_to_symbolic(x[end].tree, opt), doms))
-    eqs = map(x->substitute(x, subs), eqs)
 
+
+    eqs = map(1:size(hof, 1)) do i
+        @show i
+        d = calculateParetoFrontier(X, y[i,:], hof[i], opts)
+        isempty(d) && return Num(0)
+        eq_ = node_to_symbolic(last(d).tree, opts)
+        substitute(eq_, subs)
+    end
+
+    return eqs, x, u, t
+end
+
+
+
+function build_solution(prob::AbstractDataDrivenProblem, alg::EQSearch, hof; eval_expression = false)
+
+    #opt = to_options(alg)
+#
+    #@variables x[1:size(prob.X, 1)] u[1:size(prob.U,1)] t
+    #x = Symbolics.scalarize(x)
+    #u = Symbolics.scalarize(u)
+    #x_ = [x;u;t]
+
+    # Build a dict
+    #subs = Dict([SymbolicUtils.Sym{Number}(Symbol("x$(i)")) => x_[i] for i in 1:size(x_, 1)]...)
+
+
+    # Create a variable
+    #eqs = vcat(map(x->node_to_symbolic(x[end].tree, opt), doms))
+    #eqs = map(x->substitute(x, subs), eqs)
+    
+    eqs, x, u, t = pareto_optimal_equations(hof, prob, alg)
+    
     lhs, dt = assert_lhs(prob)
 
 
@@ -104,10 +141,11 @@ function build_solution(prob::AbstractDataDrivenProblem, alg::EQSearch, doms; ev
     Y = res_(get_oop_args(prob)...)
 
 
+
     error = sum(abs2, X-Y, dims = 2)[:,1]
     retcode = :converged 
     
     return DataDrivenSolution(
-        false, res_, [], retcode, alg, doms, prob, error
+        false, res_, [], retcode, alg, hof, prob, error
     )
 end
diff --git a/src/utils/utils.jl b/src/utils/utils.jl
@@ -142,90 +142,3 @@ function optimal_shrinkage!(X::AbstractArray{T, 2}) where T <: Number
     X .= U[:, inds]*Diagonal(S[inds])*V[:, inds]'
     return
 end
-
-
-## TODO
-# This is old code and will be processed to be used with the Problems
-#"""
-#	($SIGNATURES)
-#
-#Randomly selects `n` bursts of data with size `samplesize` from the data `X`.
-#
-#Randomly selects `n` bursts of data with size `samplesize` from the data `X` and `Y`.
-#
-#Randomly selects `n` bursts of data within a time window `period` from the data `X`. The time information
-#has to be provided in `t`.
-#"""
-#@inline function burst_sampling(x::AbstractArray, samplesize::Int64, bursts::Int64)
-#    @assert size(x)[end] >= samplesize*bursts "Bursting impossible. Please provide more data or reduce bursts or samplesize."
-#    inds = sample(1:size(x)[end]-samplesize, bursts, replace = false)
-#    inds = sort(unique(vcat([collect(i:i+samplesize) for i in inds]...)))
-#    return resample(x, inds)
-#end
-#
-#@inline function burst_sampling(x::AbstractArray, y::AbstractArray, samplesize::Int64, bursts::Int64)
-#    @assert size(x)[end] >= samplesize*bursts "Bursting impossible. Please provide more data or reduce bursts or samplesize"
-#    @assert size(x)[end] == size(y)[end]
-#    inds = sample(1:size(x)[end]-samplesize, bursts, replace = false)
-#    inds = sort(unique(vcat([collect(i:i+samplesize) for i in inds]...)))
-#    return resample(x, inds), resample(y, inds)
-#end
-#
-#@inline function burst_sampling(x::AbstractArray, t::AbstractVector, period::T, bursts::Int64) where T <: AbstractFloat
-#    @assert period > zero(typeof(period)) "Sampling period has to be positive."
-#    @assert size(x)[end] == size(t)[end] "Provide consistent data."
-#    @assert bursts >= 1 "Number of bursts has to be positive."
-#    @assert t[end]-t[1]>= period*bursts "Bursting impossible. Please provide more data or reduce bursts or samplesize"
-#    t_ids = zero(eltype(t)) .<= t .- period  .<= t[end] .- 2*period
-#    samplesize = Int64(floor(period/(t[end]-t[1])*length(t)))
-#    inds = sample(collect(1:length(t))[t_ids], bursts, replace = false)
-#    inds = sort(unique(vcat([collect(i:i+samplesize) for i in inds]...)))
-#    return resample(x, inds), resample(t, inds)
-#end
-#
-#
-#"""
-#	$(SIGNATURES)
-#
-#Returns the subsampled `X` with only every `n`-th entry.
-#
-#Returns the subsampled `X` with a a minimum period of `dt` between two data points. `t` provides the
-#time information.
-#"""
-#@inline function subsample(x::AbstractVector, frequency::Int64)
-#    @assert frequency > 0 "Sampling frequency has to be positive."
-#    return x[1:frequency:end]
-#end
-#
-#@inline function subsample(x::AbstractArray, frequency::Int64)
-#    @assert frequency > 0 "Sampling frequency has to be positive."
-#    return x[:, 1:frequency:end]
-#end
-#
-#@inline function subsample(x::AbstractArray, t::AbstractVector, period::T) where T <: AbstractFloat
-#    @assert period > zero(typeof(period)) "Sampling period has to be positive."
-#    @assert size(x)[end] == size(t)[end] "Provide consistent data."
-#    @assert t[end]-t[1]>= period "Subsampling impossible. Sampling period exceeds time window."
-#    idx = Int64[1]
-#    t_now = t[1]
-#    @inbounds for (i, t_current) in enumerate(t)
-#        if t_current - t_now >= period
-#            push!(idx, i)
-#            t_now = t_current
-#        end
-#    end
-#    return resample(x, idx), resample(t, idx)
-#end
-#
-#@inline function resample(x::AbstractArray{T,1}, indx::AbstractArray{Int64}) where T <: Number
-#    @assert maximum(indx) <= length(x) "Sampling index has to be consistent with array dimensions."
-#    @assert minimum(indx) >= 1 "Sampling index has to be consistent with array dimensions."
-#    return x[indx]
-#end
-#
-#@inline function resample(x::AbstractArray{T,2}, indx::AbstractArray{Int64}) where T <: Number
-#    @assert maximum(indx) <= size(x, 2) "Sampling index has to be consistent with array dimensions."
-#    @assert minimum(indx) >= 1 "Sampling index has to be consistent with array dimensions."
-#    return x[:, indx]
-#end
-#
diff --git a/test/symbolic_regression/symbolic_regression.jl b/test/symbolic_regression/symbolic_regression.jl
@@ -15,4 +15,13 @@
     x = states(sys)
     @test all(m[:L₂] .<= eps())
     @test isequal([x.rhs for x in equations(sys)], [sin(x[1]); exp(x[2])])
+
+    # Single target
+    prob = DirectDataDrivenProblem(X, Y[1:1,:])
+    res = solve(prob, opts, numprocs = 0, multithreading = false)
+    sys = result(res)
+    m = metrics(res)
+    x = states(sys)
+    @test all(m[:L₂] .<= eps())
+    @test isequal([x.rhs for x in equations(sys)], [sin(x[1])]) 
 end