Start towards caching and perf optimizations

utkarsh530 · utkarsh530 · commit 82dff1cfaea0 · 2024-02-26T17:25:38.000-05:00
diff --git a/Project.toml b/Project.toml
@@ -15,6 +15,7 @@ NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
diff --git a/src/PSOGPU.jl b/src/PSOGPU.jl
@@ -8,6 +8,9 @@ import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
 using QuasiMonteCarlo
 import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve
 
+using Reexport
+@reexport using SciMLBase
+
 ## Use lb and ub either as StaticArray or pass them separately as CuArrays
 ## Passing as CuArrays makes more sense, or maybe SArray? The based on no. of dimension
 struct SPSOParticle{T1, T2 <: eltype(T1)}
@@ -68,5 +71,5 @@ include("./bfgs.jl")
 include("./hybrid.jl")
 
 export ParallelPSOKernel,
-       ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO, OptimizationProblem, solve
+       ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO
 end
diff --git a/src/solve.jl b/src/solve.jl
@@ -2,6 +2,64 @@ function get_pos(particle)
     return particle.position
 end
 
+mutable struct PSOCache{TP, TAlg, TPart, TGbest}
+    prob::TP
+    alg::TAlg
+    particles::TPart
+    gbest::TGbest
+end
+
+function SciMLBase.init(
+        prob::OptimizationProblem, opt::ParallelPSOKernel, args...; kwargs...)
+    backend = opt.backend
+    @assert prob.u0 isa SArray
+
+    ## initialize cache
+
+    ## Bounds check
+    lb, ub = check_init_bounds(prob)
+    lb, ub = check_init_bounds(prob)
+    prob = remake(prob; lb = lb, ub = ub)
+
+    init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
+
+    # TODO: Do the equivalent of cu()/roc()
+    particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
+    gpu_particles = KernelAbstractions.allocate(backend,
+        particles_eltype,
+        size(particles))
+    copyto!(gpu_particles, particles)
+    gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
+    copyto!(gpu_init_gbest, [init_gbest])
+    return PSOCache{
+        typeof(prob), typeof(opt), typeof(gpu_particles), typeof(gpu_init_gbest)}(
+        prob, opt, gpu_particles, gpu_init_gbest)
+end
+
+function SciMLBase.solve!(
+        cache::PSOCache, opt::ParallelPSOKernel, args...; maxiters = 100, kwargs...)
+    prob = cache.prob
+    t0 = time()
+    gbest, particles = vectorized_solve!(cache.prob,
+        cache.gbest,
+        cache.particles,
+        opt,
+        Val(opt.global_update),
+        args...;
+        maxiters, kwargs...)
+    t1 = time()
+
+    particles_positions = get_pos.(particles)
+    SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
+        gbest.position, prob.f(gbest.position, prob.p), original = particles_positions,
+        stats = Optimization.OptimizationStats(; time = t1 - t0))
+end
+
+function SciMLBase.solve(prob::OptimizationProblem, opt::ParallelPSOKernel,
+        args...; maxiters = 100, kwargs...)
+    solve!(init(prob, opt, args...; maxiters, kwargs...), opt)
+end
+
 function SciMLBase.__solve(prob::OptimizationProblem,
         opt::PSOAlgorithm,
         args...;
diff --git a/src/utils.jl b/src/utils.jl
@@ -1,3 +1,8 @@
+@inbounds function uniform_itr(
+        dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T}
+    (rand(T) * (ub[i] - lb[i]) + lb[i] for i in 1:dim)
+end
+
 function uniform(dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T}
     arr = rand(T, dim)
     @inbounds for i in 1:dim
@@ -6,7 +11,7 @@ function uniform(dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T}
     return arr
 end
 
-function init_particles(prob, opt, ::Type{T}) where {T <: SArray}
+function init_particles!(particles, prob, opt, ::Type{T}) where {T <: SArray}
     dim = length(prob.u0)
     lb = prob.lb
     ub = prob.ub
@@ -15,47 +20,102 @@ function init_particles(prob, opt, ::Type{T}) where {T <: SArray}
     num_particles = opt.num_particles
 
     if lb === nothing || (all(isinf, lb) && all(isinf, ub))
-        gbest_position = Array{eltype(prob.u0), 1}(undef, dim)
-        for i in 1:dim
-            if abs(prob.u0[i]) > 0
-                gbest_position[i] = prob.u0[i] + rand(eltype(prob.u0)) * abs(prob.u0[i])
-            else
-                gbest_position[i] = rand(eltype(prob.u0))
-            end
+        gbest_position = StaticArrays.sacollect(T,
+            ifelse(
+                abs(prob.u0[i]) > 0, prob.u0[i] + rand(eltype(prob.u0)) * abs(prob.u0[i]),
+                rand(eltype(prob.u0))) for i in 1:dim)
+    else
+        gbest_position = StaticArrays.sacollect(T, uniform_itr(dim, lb, ub))
+    end
+
+    gbest_position = convert(T, gbest_position)
+    gbest_cost = cost_func(gbest_position, p)
+    if !isnothing(prob.f.cons)
+        penalty = calc_penalty(gbest_position, prob, 1, opt.θ, opt.γ, opt.h)
+        gbest_cost = cost_func(gbest_position, p) + penalty
+    else
+        gbest_cost = cost_func(gbest_position, p)
+    end
+    gbest_cost = cost_func(gbest_position, p)
+    # particles = SPSOParticle[]
+
+    if !(lb === nothing || (all(isinf, lb) && all(isinf, ub)))
+        positions = QuasiMonteCarlo.sample(num_particles, lb, ub, LatinHypercubeSample())
+    end
+
+    for i in 1:num_particles
+        if lb === nothing || (all(isinf, lb) && all(isinf, ub))
+            position = StaticArrays.sacollect(T,
+                ifelse(abs(prob.u0[i]) > 0,
+                    prob.u0[i] + rand(eltype(prob.u0)) * abs(prob.u0[i]),
+                    rand(eltype(prob.u0))) for i in 1:dim)
+        else
+            @inbounds position = StaticArrays.sacollect(T, positions[j, i] for j in 1:dim)
         end
+
+        velocity = zero(T)
+
+        if !isnothing(prob.f.cons)
+            penalty = calc_penalty(position, prob, 1, opt.θ, opt.γ, opt.h)
+            cost = cost_func(position, p) + penalty
+        else
+            cost = cost_func(position, p)
+        end
+
+        best_position = position
+        best_cost = cost
+        @inbounds particles[i] = SPSOParticle(
+            position, velocity, cost, best_position, best_cost)
+
+        if best_cost < gbest_cost
+            gbest_position = best_position
+            gbest_cost = best_cost
+        end
+    end
+    gbest = SPSOGBest(gbest_position, gbest_cost)
+    return gbest, particles
+end
+
+function init_particles(prob, opt, ::Type{T}) where {T <: SArray}
+    dim = length(prob.u0)
+    lb = prob.lb
+    ub = prob.ub
+    cost_func = prob.f
+    p = prob.p
+    num_particles = opt.num_particles
+
+    if lb === nothing || (all(isinf, lb) && all(isinf, ub))
+        gbest_position = StaticArrays.sacollect(T,
+            ifelse(
+                abs(prob.u0[i]) > 0, prob.u0[i] + rand(eltype(prob.u0)) * abs(prob.u0[i]),
+                rand(eltype(prob.u0))) for i in 1:dim)
     else
-        gbest_position = uniform(dim, lb, ub)
+        gbest_position = StaticArrays.sacollect(T, uniform_itr(dim, lb, ub))
     end
 
-    gbest_position = SVector{length(gbest_position), eltype(gbest_position)}(gbest_position)
+    gbest_cost = cost_func(gbest_position, p)
     if !isnothing(prob.f.cons)
         penalty = calc_penalty(gbest_position, prob, 1, opt.θ, opt.γ, opt.h)
         gbest_cost = cost_func(gbest_position, p) + penalty
     else
         gbest_cost = cost_func(gbest_position, p)
     end
-    # gbest_cost = cost_func(gbest_position, p)
-    particles = SPSOParticle[]
+    particles = SPSOParticle{T, eltype(T)}[]
 
     if !(lb === nothing || (all(isinf, lb) && all(isinf, ub)))
         positions = QuasiMonteCarlo.sample(num_particles, lb, ub, LatinHypercubeSample())
     end
 
     for i in 1:num_particles
         if lb === nothing || (all(isinf, lb) && all(isinf, ub))
-            position = Array{eltype(prob.u0), 1}(undef, dim)
-            for i in 1:dim
-                if abs(prob.u0[i]) > 0
-                    position[i] = prob.u0[i] + rand(eltype(prob.u0)) * abs(prob.u0[i])
-                else
-                    position[i] = rand(eltype(prob.u0))
-                end
-            end
+            @inbounds position = StaticArrays.sacollect(T,
+                ifelse(abs(prob.u0[i]) > 0,
+                    prob.u0[i] + rand(eltype(prob.u0)) * abs(prob.u0[i]),
+                    rand(eltype(prob.u0))) for i in 1:dim)
         else
-            position = @view positions[:, i]
+            @inbounds position = StaticArrays.sacollect(T, positions[j, i] for j in 1:dim)
         end
-        position = SVector{length(position), eltype(position)}(position)
-        velocity = @SArray zeros(eltype(position), dim)
+        velocity = zero(T)
 
         if !isnothing(prob.f.cons)
             penalty = calc_penalty(position, prob, 1, opt.θ, opt.γ, opt.h)
@@ -74,7 +134,7 @@ function init_particles(prob, opt, ::Type{T}) where {T <: SArray}
         end
     end
     gbest = SPSOGBest(gbest_position, gbest_cost)
-    return gbest, convert(Vector{typeof(particles[1])}, particles)
+    return gbest, particles
 end
 
 function init_particles(prob, opt, ::Type{T}) where {T <: AbstractArray}