Skip to content

Add VecchiaKKT.jl #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ VecchiaMLE.generate_xyGrid
VecchiaMLE.generate_safe_xyGrid
VecchiaMLE.MadNLP_Print_Level
VecchiaMLE.Int_to_Mode
VecchiaMLE.Int_to_KKT_System
VecchiaMLE.Uni_Error
VecchiaMLE.IndexReorder
VecchiaMLE.IndexMaxMin
Expand Down
1 change: 1 addition & 0 deletions src/VecchiaMLE.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const KA = KernelAbstractions
include("VecchiaMLE_defines.jl")
include("VecchiaMLE_utils.jl")
include("VecchiaMLE_input.jl")
include("models/kkt/VecchiaKKT.jl")
include("VecchiaMLE_kernels.jl")
include("models/VecchiaMLE_NLPModel.jl")

Expand Down
6 changes: 4 additions & 2 deletions src/VecchiaMLE_defines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ The fields to the struct are as follows:\n
* `Number_of_Samples`: The Number of Samples the user gives input to the program.
* `MadNLP_print_level`: The print level to the optimizer. Can be ignored, and will default to ERROR.
* `mode`: The opertaing mode to the analysis(GPU or CPU). The mapping is [1: 'CPU', 2: 'GPU'].
* `KKT_System`: Use the default Sparse KKT System or the custom VecchiaKKTSystem. The mapping is [1: 'Sparse System', 2: 'VecchiaKKTSystem'].

"""
mutable struct VecchiaMLEInput{M}
Expand All @@ -99,8 +100,9 @@ mutable struct VecchiaMLEInput{M}
Number_of_Samples::Int
MadNLP_print_level::Int
mode::Int
KKT_System::Int

function VecchiaMLEInput(n::Int, k::Int, samples::M, Number_of_Samples::Int, MadNLP_print_Level::Int=5, mode::Int=1) where {M<:AbstractMatrix}
return new{M}(n, k, samples, Number_of_Samples, MadNLP_print_Level, mode)
function VecchiaMLEInput(n::Int, k::Int, samples::M, Number_of_Samples::Int, MadNLP_print_Level::Int=5, mode::Int=1, KKT_System::Int=1) where {M<:AbstractMatrix}
return new{M}(n, k, samples, Number_of_Samples, MadNLP_print_Level, mode, KKT_System)
end
end
12 changes: 10 additions & 2 deletions src/VecchiaMLE_input.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,16 @@ function ExecuteModel!(iVecchiaMLE::VecchiaMLEInput, ptGrid::AbstractVector, pre
model = get_vecchia_model(iVecchiaMLE, ptGrid)
end

diags.solve_model_time = @elapsed begin
output = madnlp(model, print_level=MadNLP_Print_Level(iVecchiaMLE.MadNLP_print_level))
diags.solve_model_time = @elapsed begin
if iVecchiaMLE.KKT_System == 1
output = madnlp(model, print_level=MadNLP_Print_Level(iVecchiaMLE.MadNLP_print_level))
elseif iVecchiaMLE.KKT_System == 2
output = madnlp(model,
print_level=MadNLP_Print_Level(iVecchiaMLE.MadNLP_print_level),
kkt_system = VecchiaKKTSystem,
linear_solver=MadNLPGPU.CUDSSSolver
)
end
end


Expand Down
3 changes: 1 addition & 2 deletions src/VecchiaMLE_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ function vecchia_generate_hess_tri_structure!(nnzh::Int, n::Int, colptr_diff::Cu
fill!(hrows, one(Int))
fill!(hcols, one(Int))


# launch the kernel
backend = KA.get_backend(hrows)
kernel = vecchia_generate_hess_tri_structure_kernel!(backend)
Expand All @@ -154,7 +153,7 @@ function vecchia_generate_hess_tri_structure!(nnzh::Int, n::Int, colptr_diff::Cu
view(hrows, 2:n) .+= cumsum(f.(view(colptr_diff, 1:n-1)))
view(hcols, 2:n) .+= cumsum(view(colptr_diff, 1:n-1))

kernel(nnzh, n, colptr_diff, view(hrows, 1:n), view(hcols, 1:n), hrows, hcols, ndrange = n)
kernel(nnzh, n, colptr_diff, hrows, hcols, hrows, hcols, ndrange = n)

KA.synchronize(backend)

Expand Down
43 changes: 34 additions & 9 deletions src/VecchiaMLE_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ end
Samples_Matrix = generate_Samples(MatCov::AbstractMatrix,
n::Integer,
Number_of_Samples::Int;
mode::VecchiaMLE.COMPUTE_MODE )
mode::VecchiaMLE.COMPUTE_MODE)

Generate a number of samples according to the given Covariance Matrix MatCov.
Note the samples are given as mean zero.
If a CUDA compatible device is detected, the samples are generated on the GPU
and transferred back to the CPU.
Note that this isn't the most optimized, and should only be used for small sample generation
(i.e., n < 100, Number_of_Samples < 100).

## Input arguments

Expand Down Expand Up @@ -62,6 +64,7 @@ function generate_Samples(MatCov::AbstractMatrix, n::Integer, Number_of_Samples:
return V
end


"""
Covariance_Matrix = generate_MatCov(n::Integer,
params::AbstractArray,
Expand Down Expand Up @@ -174,7 +177,7 @@ function MadNLP_Print_Level(pLevel::Integer)::MadNLP.LogLevels
end

"""
cpu_mode = Int_to_Mode(n::Integer)
cpu_mode = Int_to_Mode(n::Int)

A helper function to convert an integer to a COMPUTE_MODE.
The mapping is [1: 'CPU', 2: 'GPU'].
Expand All @@ -196,14 +199,36 @@ function Int_to_Mode(n::Int)::COMPUTE_MODE
return CPU
end

"""
kkt_system = Int_to_KKT_System(n::Int)

A helper function to determine which KKT system to use.
The mapping is [1: 'Sparse System', 2: 'VecchiaKKTSystem'].

## Input arguments
* `n`: the given system as an int;

## Output Arguments
*`kkt_system` : The chosen KKT system.
"""
function Int_to_KKT_System(n::Int)::MadNLP.AbstractKKTSystem
if n == 1
return MadNLP.AbstractSparseKKTSystem
elseif n == 2
return VecchiaKKTSystem
else
return MadNLP.AbstractSparseKKTSystem
end
end

"""
Error = Uni_Error(TCov::AbstractMatrix,
L::AbstractMatrix)
Generates the "Univariate KL Divergence" from the Given True Covariane matrix
and Approximate Covariance matrix. The output is the result of the following
optimization problem: sup(a^T X_A || a^⊺ X_T). The solution is
f(μ) = ln(μ) + (2μ)^{-2} - 0.5,
where μ is the largest or smallest eigenvalue of the matrix Σ_A^{-1}Σ_T, whichever
optimization problem: sup(aᵀ X_A || aᵀ X_T). The solution is
f(μ) = ln(μ) + (2μ)⁻² - 0.5,
where μ is the largest or smallest eigenvalue of the matrix Σ_A⁻¹Σ_T, whichever
maximizes the function f(μ). Note: Σ_A , Σ_T are the respective approximate and true
covariance matrices, and X_A, X_T are samples from the respective Distributions (mean zero).

Expand Down Expand Up @@ -559,15 +584,15 @@ The current checks are:\n
* `iVecchiaMLE`: The filled-out VecchiaMLEInput struct. See VecchiaMLEInput struct for more details.
* `ptGrid`: The grid of point locations in 2D space. Must be a Vector of 2D Vectors!
"""
function sanitize_input!(iVecchiaMLE::VecchiaMLEInput, ptGrid::T) where T <: Union{AbstractVector, Nothing}
function sanitize_input!(iVecchiaMLE::VecchiaMLEInput, ptGrid::T) where T <: Union{AbstractVector, Nothing}
@assert iVecchiaMLE.n > 0 "The dimension n must be strictly positive!"
@assert iVecchiaMLE.k <= iVecchiaMLE.n^2 "The number of conditioning neighbors must be less than n^2 !"
@assert size(iVecchiaMLE.samples, 1) > 0 "Samples must be nonempty!"
@assert (iVecchiaMLE.MadNLP_print_level in 1:5) "MadNLP Print Level not in 1:5!"
@assert size(iVecchiaMLE.samples, 2) == iVecchiaMLE.n^2 "samples must be of size Number_of_Samples x n^2!"
@assert size(iVecchiaMLE.samples, 1) == iVecchiaMLE.Number_of_Samples "samples must be of size Number_of_Samples x n^2!"
@assert iVecchiaMLE.mode in [1, 2] "Operation mode not valid! must be in [1, 2]."

@assert iVecchiaMLE.mode in [1, 2] "Operation mode not valid! must be in [1, 2]."
@assert iVecchiaMLE.KKT_System in [1, 2] "KKT_System not properly specified! Must be in [1, 2]."
if typeof(iVecchiaMLE.samples) <: Matrix && iVecchiaMLE.mode == 2
iVecchiaMLE.samples = CuMatrix{Float64}(iVecchiaMLE.samples)
end
Expand All @@ -581,5 +606,5 @@ function sanitize_input!(iVecchiaMLE::VecchiaMLEInput, ptGrid::T) where T <: Uni
@assert length(pt) == 2 "Position $(i) in ptGrid is not 2 dimensional!"
end

return ptGrid::Vector{Vector{Float64}}
return ptGrid
end
2 changes: 1 addition & 1 deletion src/models/VecchiaMLE_NLPModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function create_vecchia_cache(samples::AbstractMatrix, k::Int, ptGrid::AbstractV
if S != Vector{Float64}

offsets = cumsum([0; m[1:end-1]]) |> CuVector{Int}
B = [CuMatrix{T}(undef, 0, 0)]
B = [CuMatrix{T}(undef, m[j], m[j]) for j in 1:n]
rowsL = CuVector{Int}(rowsL)
colsL = CuVector{Int}(colsL)
colptrL = CuVector{Int}(colptrL)
Expand Down
195 changes: 195 additions & 0 deletions src/models/kkt/VecchiaKKT.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
using CUDA.CUSOLVER
#=
Custom KKT System for the Vecchia optimiation problem.
TODO: Update KKT System function?
=#


struct VecchiaKKTSystem{T, VT, MT<:AbstractMatrix{T}, QN<:MadNLP.AbstractHessian{T, VT}, LS} <: MadNLP.AbstractCondensedKKTSystem{T, VT, MT, QN}

# number of nonzeros in L
p::Int

# number of constraints
n::Int

# number of nonzeros in hessian. Not p+n!
nnzh::Int

# Diagonal Matrices (Stored as vectors)
# Diagonal matrix NB⁻¹Nᵀ
NBNT::VT

# Vector of Vectors which holds s_j
S::Vector{VT}

# holds sizes of s_j ~> side length of each sub-block in hessian
# cumsum format
m::Vector{Int}

# Jacobian values
jac::VT
# Hessian Values
hess::VT

# Scrap Buffer
buffer::VT

linear_solver::LS

# constraint vector
c::VT
end

#=
TODO: Fill out
Things to do:
1. Query MadNLPSolver for c, grad_y_Lagrange, grad_z_Lagrange, x, lagrange multipliers
2. Allocate stuff (buffer)
3. Calculate NBNT, S, Λ, D.
4. Create VecchiaKKT
5. return
=#

function MadNLP.create_kkt_system(
::Type{VecchiaKKTSystem},
cb::MadNLP.SparseCallback{T,VT},
ind_cons,
linear_solver::Type;
opt_linear_solver=MadNLP.default_options(linear_solver),
hessian_approximation=MadNLP.ExactHessian,
qn_options=MadNLP.QuasiNewtonOptions(),
) where {T,VT}

nlp = cb.nlp
n = nlp.cache.n
p = get_nvar(nlp) - n


# Get S, need potrf_batch and potrs_batch.
# How do I do this?
B_cpy = copy(nlp.cache.B)

# port m to CPU right now, i don't know exactly how to get this?
m = zeros(length(nlp.cache.m))
copyto!(m, nlp.cache.m)

# What is m?
println("m\n", m)

S = [CuVector{T}([T(i == 1) for i in 1:m[j]]) for j in eachindex(m)]
println("S\n", S)

println("length S: ", length(S))
println("lengths: ", [length(s) for s in S])
println("length B_cpy", length(B_cpy))
println("lengths:", [size(B) for B in B_cpy])
CUDA.CUSOLVER.potrfBatched!('U', B_cpy)
CUDA.CUSOLVER.potrsBatched!('U', B_cpy, S)

CUDA.@allowscalar NBNT = CuVector{T}([S[j][1] for j in eachindex(m)])


# NOTE: MadNLP automatically updates these values
hess = CUDA.zeros(T, p+n)
jac = CUDA.zeros(T, 2*n)

# Setting the buffer avoids edge cases for small k values
buffer = CUDA.zeros(T, max(p+n, 3*n))

# Query for constraint vector
c = # ???

return VecchiaKKTSystem{T, VT, AbstractMatrix{T}, MadNLP.AbstractHessian{T, VT}, typeof(linear_solver)}(
p,
n,
p+n,
NBNT,
S,
m,
jac,
hess,
buffer,
linear_solver,
c
)
end

#=
Things to do:
1. Solve Δz
2. Solve Δλ
3. Solve Δy
4. return true

w = (Δy, Δz, Δλ)
Δy is length kkt.p
Δz is length kkt.n
Δλ is length kkt.n
~> w is length kkt.p + 2*kkt.n
=#
function MadNLP.solve!(kkt::VecchiaKKTSystem, w::MadNLP.AbstractKKTVector)

# Hold here for the moment.
Λ = view(kkt.jac, (1:kkt.n).+kkt.p)
D = view(kkt.hess, (1:kkt.n).+kkt.p) ./ Λ

# get buffer
buffer = kkt.buffer

# Solve for Δz
# RHS ~> save to buffer
buffer_dz_ptr = view(buffer, 1:p)
view(buffer_dz_ptr, 1:kkt.n) .= view(w, (1:kkt.n).+kkt.p)

# TODO: MAYBE WRONG!!!
# TODO: This is for sure wrong, since the subviews are over the colptr, not just m either.
subviews = [view(buffer_dz_ptr, kkt.S[i] : (kkt.S[i+1] - 1)) for i in eachindex(kkt.S)]
view(buffer_dz_ptr, 1:length(kkt.S)) .= dot.(kkt.S, subviews)

buffer_dz_ptr = view(buffer, 1:kkt.n)
buffer_dz_ptr .= kkt.c .- buffer_dz_ptr # TODO: kkt.c won't work. c is stored in w (where?), wait for francois.

# Need product for later, save off to other part of buffer
buffer_cmNBgyL = view(buffer, (1:kkt.n).+2*kkt.n)
buffer_cmNBgyL .= copy(buffer_dz_ptr)

buffer_dz_ptr .= buffer_dz_ptr ./ NBNT
buffer_dz_ptr .= kkt.grad_z_Lagrange - buffer_dz_ptr

# building matrix ΛD + D(NB⁻¹Nᵀ)⁻¹D
# Should be same size as grad_z_Lagrange
buffer_DNBNTD_ptr = view(buffer, (1:kkt.n).+kkt.n)

buffer_DNBNTD_ptr .= D ./ kkt.NBNT
buffer_DNBNTD_ptr ./= D
buffer_DNBNTD_ptr .+= view(kkt.hess, kkt.nnzh-kkt.n:kkt.nnzh) # View is on ΛD in Hessian of Lagrangian

# Now solve for Δz. After Δy in w.
view(w, (1:kkt.n).+kkt.p) .= - buffer_dz_ptr ./ buffer_DNBNTD_ptr

# buffer can now be overwritten
# solve for Δλ
# same size as buffer_dz_ptr!
buffer_dlm_ptr = view(buffer, 1:kkt.n)
buffer_dlm_ptr .= D .* view(w, (1:kkt.n).+kkt.p)
buffer_dlm_ptr .+= buffer_cmNBgyL

view(w, (1:kkt.n).+(kkt.n+kkt.p)) .= buffer_dlm_ptr ./ kkt.NBNT

# buffer can now be overwritten
# solve for Δy
buffer_dy_ptr = view(buffer, 1:kkt.p)

# view onto diagonal to add Δλ entries
# Note negative (of the Nᵀ) cancels out with the other negative
buffer_diag_y = view(buffer_dy_ptr, kkt.m)
buffer_diag_y .+= view(w, (1:kkt.n).+(kkt.n+kkt.p))
# That SHOULD be the RHS of Δy

# The S_j's are the nonzero elements of B⁻¹.
# Fill in Δy.
view(w, 1:kkt.p) .= dot.(kkt.S[1:length(kkt.m)], Ref(buffer_diag_y))

return true
end
9 changes: 6 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@ include("models/VecchiaMLE_models.jl")
include("test_cpu_compatible_with_jump.jl")
include("test_cpu_diagnostics.jl")
include("test_memory_allocation_cpu.jl")
include("test_custom_kkt_cpu.jl")
include("test_kkt_system_comparable_cpu.jl")
include("test_model_inputs.jl")
include("test_abnormal_ptGrid.jl")

if CUDA.has_cuda()
include("test_gpu_compatible_with_jump.jl")
include("test_gpu_diagnostics.jl")
include("test_cpu_compatible_with_gpu.jl")
include("test_memory_allocation_gpu.jl")
include("test_custom_kkt_gpu.jl")
include("test_kkt_system_comparable_gpu.jl")
end

include("test_model_inputs.jl")
include("test_abnormal_ptGrid.jl")
Loading
Loading