diff --git a/ext/MathOptAIFluxExt.jl b/ext/MathOptAIFluxExt.jl index 57a2fb4..f019d7b 100644 --- a/ext/MathOptAIFluxExt.jl +++ b/ext/MathOptAIFluxExt.jl @@ -179,7 +179,7 @@ julia> MathOptAI.build_predictor( ) Pipeline with layers: * Affine(A, b) [input: 1, output: 16] - * ReLUQuadratic() + * ReLUQuadratic(nothing) * Affine(A, b) [input: 16, output: 1] ``` """ diff --git a/ext/MathOptAILuxExt.jl b/ext/MathOptAILuxExt.jl index 77a4179..fcb0cd4 100644 --- a/ext/MathOptAILuxExt.jl +++ b/ext/MathOptAILuxExt.jl @@ -148,7 +148,7 @@ julia> MathOptAI.build_predictor( ) Pipeline with layers: * Affine(A, b) [input: 1, output: 16] - * ReLUQuadratic() + * ReLUQuadratic(nothing) * Affine(A, b) [input: 16, output: 1] ``` """ diff --git a/src/predictors/Pipeline.jl b/src/predictors/Pipeline.jl index ef3d1ae..8baa502 100644 --- a/src/predictors/Pipeline.jl +++ b/src/predictors/Pipeline.jl @@ -28,7 +28,7 @@ julia> f = MathOptAI.Pipeline( ) Pipeline with layers: * Affine(A, b) [input: 2, output: 1] - * ReLUQuadratic() + * ReLUQuadratic(nothing) julia> y, formulation = MathOptAI.add_predictor(model, f, x); @@ -42,7 +42,7 @@ Affine(A, b) [input: 2, output: 1] │ └ moai_Affine[1] └ constraints [1] └ x[1] + 2 x[2] - moai_Affine[1] = 0 -ReLUQuadratic() +ReLUQuadratic(nothing) ├ variables [2] │ ├ moai_ReLU[1] │ └ moai_z[1] diff --git a/src/predictors/ReLU.jl b/src/predictors/ReLU.jl index eb08077..9f900a1 100644 --- a/src/predictors/ReLU.jl +++ b/src/predictors/ReLU.jl @@ -244,7 +244,7 @@ function add_predictor( end """ - ReLUQuadratic() <: AbstractPredictor + ReLUQuadratic(; relaxation_parameter = nothing) <: AbstractPredictor An [`AbstractPredictor`](@ref) that represents the relationship: ```math @@ -258,6 +258,15 @@ y \\cdot z = 0 \\\\ y, z \\ge 0 \\end{aligned} ``` +If `relaxation_parameter` is set to a value `ϵ`, the constraints become: +```math +\\begin{aligned} +x = y - z \\\\ +y \\cdot z \\leq \\epsilon \\\\ +y, z \\ge 0 +\\end{aligned} +``` + ## Example @@ -269,7 +278,7 @@ julia> model = Model(); julia> @variable(model, -1 <= x[i in 1:2] <= i); julia> f = MathOptAI.ReLUQuadratic() -ReLUQuadratic() +ReLUQuadratic(nothing) julia> y, formulation = MathOptAI.add_predictor(model, f, x); @@ -279,7 +288,7 @@ julia> y moai_ReLU[2] julia> formulation -ReLUQuadratic() +ReLUQuadratic(nothing) ├ variables [4] │ ├ moai_ReLU[1] │ ├ moai_ReLU[2] @@ -300,7 +309,15 @@ ReLUQuadratic() └ moai_ReLU[2]*moai_z[2] = 0 ``` """ -struct ReLUQuadratic <: AbstractPredictor end +struct ReLUQuadratic <: AbstractPredictor + relaxation_parameter::Union{Nothing,Float64} + function ReLUQuadratic(; + relaxation_parameter::Union{Nothing,Float64} = nothing, + ) + @assert something(relaxation_parameter, 0.0) >= 0.0 + return new(relaxation_parameter) + end +end function add_predictor( model::JuMP.AbstractModel, @@ -314,6 +331,11 @@ function add_predictor( z = JuMP.@variable(model, [1:m], base_name = "moai_z") _set_bounds_if_finite.(Ref(cons), z, 0, max.(0, -first.(bounds))) append!(cons, JuMP.@constraint(model, x .== y - z)) - append!(cons, JuMP.@constraint(model, y .* z .== 0)) + if predictor.relaxation_parameter === nothing + append!(cons, JuMP.@constraint(model, y .* z .== 0)) + else + ϵ = predictor.relaxation_parameter + append!(cons, JuMP.@constraint(model, y .* z .<= ϵ)) + end return y, Formulation(predictor, Any[y; z], cons) end diff --git a/test/test_predictors.jl b/test/test_predictors.jl index f94f152..199c63c 100644 --- a/test/test_predictors.jl +++ b/test/test_predictors.jl @@ -225,6 +225,7 @@ function test_ReLU_Quadratic() set_silent(model) @variable(model, x[1:2]) f = MathOptAI.ReLUQuadratic() + @test f.relaxation_parameter === nothing y, formulation = MathOptAI.add_predictor(model, f, x) @test length(y) == 2 @test num_variables(model) == 6 @@ -237,6 +238,28 @@ function test_ReLU_Quadratic() return end +function test_ReLU_Quadratic_relaxed() + model = Model(Ipopt.Optimizer) + set_silent(model) + @variable(model, x[1:2]) + f = MathOptAI.ReLUQuadratic(; relaxation_parameter = 1e-4) + y, formulation = MathOptAI.add_predictor(model, f, x) + # Maximize sum of all variables to exercise the ReLU relaxation + @objective(model, Max, sum(formulation.variables)) + @test length(y) == 2 + @test num_variables(model) == 6 + @test num_constraints(model, AffExpr, MOI.EqualTo{Float64}) == 2 + @test num_constraints(model, QuadExpr, MOI.LessThan{Float64}) == 2 + fix.(x, [-1, 2]) + optimize!(model) + @assert is_solved_and_feasible(model) + # We do not satisfy equality to a tight tolerance + @test !isapprox(value.(y), [0.0, 2.0]; atol = 1e-6) + # But we satisfy equality to a loose tolerance + @test isapprox(value.(y), [0.0, 2.0]; atol = 1e-2) + return +end + function test_Sigmoid() model = Model(Ipopt.Optimizer) set_silent(model)