SciML · utkarsh530 · Mar 22, 2024 · Feb 26, 2024 · Mar 1, 2024 · Mar 1, 2024
diff --git a/Project.toml b/Project.toml
@@ -15,19 +15,12 @@ NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
 SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 
 [compat]
-julia = "1.6"
-
-[extras]
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test", "StaticArrays", "LinearAlgebra", "Optimization", "ForwardDiff"]
+julia = "1.6"
diff --git a/src/PSOGPU.jl b/src/PSOGPU.jl
@@ -3,11 +3,15 @@ module PSOGPU
 using SciMLBase, StaticArrays, Setfield, KernelAbstractions
 using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
 import Adapt
+import Adapt: adapt
 import Enzyme: autodiff_deferred, Active, Reverse
 import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
 using QuasiMonteCarlo
 import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve
 
+using Reexport
+@reexport using SciMLBase
+
 ## Use lb and ub either as StaticArray or pass them separately as CuArrays
 ## Passing as CuArrays makes more sense, or maybe SArray? The based on no. of dimension
 struct SPSOParticle{T1, T2 <: eltype(T1)}
@@ -63,10 +67,11 @@ include("./utils.jl")
 include("./ode_pso.jl")
 include("./kernels.jl")
 include("./lowerlevel_solve.jl")
+include("init.jl")
 include("./solve.jl")
 include("./bfgs.jl")
 include("./hybrid.jl")
 
 export ParallelPSOKernel,
-       ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO, OptimizationProblem, solve
+       ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO
 end
diff --git a/src/algorithms.jl b/src/algorithms.jl
@@ -1,6 +1,14 @@
 
 abstract type PSOAlgorithm end
 abstract type HybridPSOAlgorithm{LocalOpt} end
+abstract type GPUSamplingAlgorithm end
+
+struct GPUUniformSampler <: GPUSamplingAlgorithm
+end
+
+struct GPUUnboundedSampler <: GPUSamplingAlgorithm
+end
+
 """
 ```julia
 ParallelPSOKernel(num_particles = 100)

diff --git a/src/hybrid.jl b/src/hybrid.jl
@@ -5,27 +5,26 @@
     result[i] = sol.u
 end
 
-function SciMLBase.__solve(prob::SciMLBase.OptimizationProblem,
-        opt::HybridPSO{Backend, LocalOpt},
-        args...;
+function SciMLBase.solve!(
+        cache::HybridPSOCache, opt::HybridPSO{Backend, LocalOpt}, args...;
         abstol = nothing,
         reltol = nothing,
-        maxiters = 100,
-        local_maxiters = 10,
-        kwargs...) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}
-    t0 = time()
-    psoalg = opt.pso
-    local_opt = opt.local_opt
-    backend = opt.backend
+        maxiters = 100, local_maxiters = 10, kwargs...) where {
+        Backend, LocalOpt <: Union{LBFGS, BFGS}}
+    pso_cache = cache.pso_cache
 
-    sol_pso = solve(prob, psoalg, args...; maxiters, kwargs...)
+    sol_pso = solve!(pso_cache)
     x0s = sol_pso.original
-    prob = remake(prob, lb = nothing, ub = nothing)
+
+    backend = opt.backend
+
+    prob = remake(cache.prob, lb = nothing, ub = nothing)
     f = Base.Fix2(prob.f.f, prob.p)
     ∇f = instantiate_gradient(f, prob.f.adtype)
 
     kernel = simplebfgs_run!(backend)
-    result = KernelAbstractions.allocate(backend, typeof(prob.u0), length(x0s))
+    result = cache.start_points
+    copyto!(result, x0s)
     nlprob = NonlinearProblem{false}(∇f, prob.u0)
 
     nlalg = LocalOpt isa LBFGS ?

diff --git a/src/init.jl b/src/init.jl
@@ -0,0 +1,166 @@
+mutable struct PSOCache{TP, TAlg, TPart, TGbest, TSampler}
+    prob::TP
+    alg::TAlg
+    particles::TPart
+    gbest::TGbest
+    sampler::TSampler
+end
+
+struct HybridPSOCache{TPc, TSp, TAlg}
+    pso_cache::TPc
+    start_points::TSp
+    alg::TAlg
+end
+
+function __init!(particles, prob::OptimizationProblem,
+        opt::Union{ParallelPSOKernel, ParallelSyncPSOKernel}, sampler::T,
+        args...; kwargs...) where {T <: QuasiMonteCarlo.SamplingAlgorithm}
+    backend = opt.backend
+
+    qmc_samples = QuasiMonteCarlo.sample(opt.num_particles, prob.lb, prob.ub, sampler)
+
+    qmc_samples = adapt(backend, qmc_samples)
+
+    kernel! = gpu_init_particles!(backend)
+
+    kernel!(
+        particles, qmc_samples, prob, opt, typeof(prob.u0), T; ndrange = opt.num_particles)
+
+    best_particle = minimum(particles)
+    init_gbest = SPSOGBest(best_particle.best_position, best_particle.best_cost)
+
+    return particles, init_gbest
+end
+
+function __init!(particles, prob::OptimizationProblem,
+        opt::Union{ParallelPSOKernel, ParallelSyncPSOKernel}, sampler::T,
+        args...; kwargs...) where {T <: GPUSamplingAlgorithm}
+    backend = opt.backend
+
+    kernel! = gpu_init_particles!(backend)
+
+    kernel!(particles, prob, opt, typeof(prob.u0), T; ndrange = opt.num_particles)
+
+    best_particle = minimum(particles)
+
+    init_gbest = SPSOGBest(best_particle.best_position, best_particle.best_cost)
+
+    particles, init_gbest
+end
+
+function SciMLBase.init(
+        prob::OptimizationProblem, opt::ParallelPSOKernel, args...; sampler = GPUUniformSampler(), kwargs...)
+    @assert prob.u0 isa SArray
+
+    ## Bounds check
+    lb, ub = check_init_bounds(prob)
+    lb, ub = check_init_bounds(prob)
+    prob = remake(prob; lb = lb, ub = ub)
+
+    particles = KernelAbstractions.allocate(
+        opt.backend, SPSOParticle{typeof(prob.u0), eltype(typeof(prob.u0))}, opt.num_particles)
+
+    _sampler = if lb === nothing || ub === nothing || (all(isinf, lb) && all(isinf, ub))
+        GPUUnboundedSampler()
+    else
+        sampler
+    end
+
+    particles, _init_gbest = __init!(particles, prob, opt, _sampler, args...; kwargs...)
+
+    init_gbest = KernelAbstractions.allocate(opt.backend, typeof(_init_gbest), (1,))
+    copyto!(init_gbest, [_init_gbest])
+
+    return PSOCache{
+        typeof(prob), typeof(opt), typeof(particles), typeof(init_gbest), typeof(_sampler)}(
+        prob, opt, particles, init_gbest, _sampler)
+end
+
+function SciMLBase.init(
+        prob::OptimizationProblem, opt::ParallelSyncPSOKernel, args...; sampler = GPUUniformSampler(), kwargs...)
+    @assert prob.u0 isa SArray
+
+    ## Bounds check
+    lb, ub = check_init_bounds(prob)
+    lb, ub = check_init_bounds(prob)
+    prob = remake(prob; lb = lb, ub = ub)
+
+    particles = KernelAbstractions.allocate(
+        opt.backend, SPSOParticle{typeof(prob.u0), eltype(typeof(prob.u0))}, opt.num_particles)
+
+    _sampler = if lb === nothing || ub === nothing || (all(isinf, lb) && all(isinf, ub))
+        GPUUnboundedSampler()
+    else
+        sampler
+    end
+
+    particles, init_gbest = __init!(particles, prob, opt, _sampler, args...; kwargs...)
+
+    return PSOCache{
+        typeof(prob), typeof(opt), typeof(particles), typeof(init_gbest), typeof(_sampler)}(
+        prob, opt, particles, init_gbest, _sampler)
+end
+
+function SciMLBase.reinit!(cache::Union{PSOCache, HybridPSOCache})
+    reinit_cache!(cache, cache.alg)
+end
+
+function reinit_cache!(cache::PSOCache, opt::ParallelPSOKernel)
+    prob = cache.prob
+    particles = cache.particles
+
+    particles, _init_gbest = __init!(particles, prob, opt, cache.sampler)
+
+    copyto!(cache.gbest, [_init_gbest])
+
+    return nothing
+end
+
+function reinit_cache!(cache::PSOCache, opt::ParallelSyncPSOKernel)
+    prob = cache.prob
+    particles = cache.particles
+
+    particles, init_gbest = __init!(particles, prob, opt, cache.sampler)
+
+    cache.gbest = init_gbest
+
+    return nothing
+end
+
+function SciMLBase.init(
+        prob::OptimizationProblem, opt::HybridPSO{Backend, LocalOpt}, args...;
+        kwargs...) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}
+    psoalg = opt.pso
+    backend = opt.backend
+
+    pso_cache = init(prob, psoalg, args...; kwargs...)
+
+    start_points = KernelAbstractions.allocate(
+        backend, typeof(prob.u0), opt.pso.num_particles)
+
+    return HybridPSOCache{
+        typeof(pso_cache), typeof(start_points), typeof(opt)}(pso_cache, start_points, opt)
+end
+
+function reinit_cache!(cache::HybridPSOCache,
+        opt::HybridPSO{Backend, LocalOpt}) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}
+    reinit!(cache.pso_cache)
+    fill!(cache.start_points, zero(eltype(cache.start_points)))
+    return nothing
+end
+
+function Base.getproperty(cache::HybridPSOCache, name::Symbol)
+    if name ∈ (:start_points, :pso_cache, :alg)
+        return getfield(cache, name)
+    else
+        return getproperty(cache.pso_cache, name)
+    end
+end
+
+function Base.setproperty!(cache::HybridPSOCache, name::Symbol, val)
+    if name ∈ (:start_points, :pso_cache, :alg)
+        return setfield!(cache, name, val)
+    else
+        return setproperty!(cache.pso_cache, name, val)
+    end
+end
diff --git a/src/solve.jl b/src/solve.jl
@@ -2,6 +2,55 @@ function get_pos(particle)
     return particle.position
 end
 
+function SciMLBase.solve!(
+        cache::Union{PSOCache, HybridPSOCache}, args...; maxiters = 100, kwargs...)
+    solve!(cache, cache.alg, args...; maxiters, kwargs...)
+end
+
+function SciMLBase.solve!(
+        cache::PSOCache, opt::ParallelPSOKernel, args...; maxiters = 100, kwargs...)
+    prob = cache.prob
+    t0 = time()
+    gbest, particles = vectorized_solve!(cache.prob,
+        cache.gbest,
+        cache.particles,
+        opt,
+        Val(opt.global_update),
+        args...;
+        maxiters, kwargs...)
+    t1 = time()
+
+    particles_positions = get_pos.(particles)
+    SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
+        gbest.position, prob.f(gbest.position, prob.p), original = particles_positions,
+        stats = Optimization.OptimizationStats(; time = t1 - t0))
+end
+
+function SciMLBase.solve!(
+        cache::PSOCache, opt::ParallelSyncPSOKernel, args...; maxiters = 100, kwargs...)
+    prob = cache.prob
+    t0 = time()
+    gbest, particles = vectorized_solve!(prob,
+        cache.gbest,
+        cache.particles,
+        opt,
+        args...;
+        maxiters,
+        kwargs...)
+    t1 = time()
+
+    particles_positions = get_pos.(particles)
+    SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
+        gbest.position, prob.f(gbest.position, prob.p), original = particles_positions,
+        stats = Optimization.OptimizationStats(; time = t1 - t0))
+end
+
+function SciMLBase.solve(prob::OptimizationProblem,
+        opt::Union{ParallelPSOKernel, ParallelSyncPSOKernel, HybridPSO},
+        args...; maxiters = 100, kwargs...)
+    solve!(init(prob, opt, args...; kwargs...), opt, args...; maxiters, kwargs...)
+end
+
 function SciMLBase.__solve(prob::OptimizationProblem,
         opt::PSOAlgorithm,
         args...;
@@ -18,34 +67,6 @@ function SciMLBase.__solve(prob::OptimizationProblem,
         stats = Optimization.OptimizationStats(; time = solve_time))
 end
 
-function pso_solve(prob::OptimizationProblem,
-        opt::ParallelPSOKernel,
-        args...;
-        kwargs...)
-    backend = opt.backend
-    @assert prob.u0 isa SArray
-    init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
-    # TODO: Do the equivalent of cu()/roc()
-    particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
-    gpu_particles = KernelAbstractions.allocate(backend,
-        particles_eltype,
-        size(particles))
-    copyto!(gpu_particles, particles)
-    gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
-    copyto!(gpu_init_gbest, [init_gbest])
-
-    t0 = time()
-    gbest, particles = vectorized_solve!(prob,
-        gpu_init_gbest,
-        gpu_particles,
-        opt,
-        Val(opt.global_update),
-        args...;
-        kwargs...)
-    t1 = time()
-    gbest, particles, t1 - t0
-end
-
 function pso_solve(prob::OptimizationProblem,
         opt::ParallelPSOArray,
         args...;
@@ -69,25 +90,3 @@ function pso_solve(prob::OptimizationProblem, opt::SerialPSO, args...; kwargs...
     t1 = time()
     gbest, particles, t1 - t0
 end
-
-function pso_solve(prob::OptimizationProblem,
-        opt::ParallelSyncPSOKernel,
-        args...;
-        kwargs...)
-    backend = opt.backend
-    init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
-    particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
-    gpu_particles = KernelAbstractions.allocate(backend, particles_eltype, size(particles))
-    copyto!(gpu_particles, particles)
-    init_gbest = init_gbest
-
-    t0 = time()
-    gbest, particles = vectorized_solve!(prob,
-        init_gbest,
-        gpu_particles,
-        opt,
-        args...;
-        kwargs...)
-    t1 = time()
-    gbest, particles, t1 - t0
-end