diff --git a/ifopt_core/include/ifopt/composite.h b/ifopt_core/include/ifopt/composite.h index 7f7a5a5..8953127 100644 --- a/ifopt_core/include/ifopt/composite.h +++ b/ifopt_core/include/ifopt/composite.h @@ -67,6 +67,9 @@ class Component { using Jacobian = Eigen::SparseMatrix; using VectorXd = Eigen::VectorXd; using VecBound = std::vector; + using Hessian = Eigen::SparseMatrix; + using HessianTriplet = std::vector>; + using RowIndicesHessiansPair = std::pair, std::vector>; /** * @brief Creates a component. @@ -116,6 +119,8 @@ class Component { */ virtual Jacobian GetJacobian() const = 0; + virtual RowIndicesHessiansPair GetHessians() const = 0; + /** * @brief Returns the number of rows of this component. */ @@ -176,6 +181,7 @@ class Composite : public Component { // see Component for documentation VectorXd GetValues() const override; Jacobian GetJacobian() const override; + RowIndicesHessiansPair GetHessians() const override; VecBound GetBounds() const override; void SetVariables(const VectorXd& x) override; void PrintAll() const; diff --git a/ifopt_core/include/ifopt/constraint_set.h b/ifopt_core/include/ifopt/constraint_set.h index da258f7..830399d 100644 --- a/ifopt_core/include/ifopt/constraint_set.h +++ b/ifopt_core/include/ifopt/constraint_set.h @@ -107,6 +107,49 @@ class ConstraintSet : public Component { virtual void FillJacobianBlock(std::string var_set, Jacobian& jac_block) const = 0; + /** + * @brief The second-order derivatives (Hessians) for these constraints and variables. + * + * This function returns a pair containing two vectors. The first vector holds + * the indices of the corresponding constraints or costs, while the second vector + * contains the Hessian matrices as sparse representations. + * + * Assuming @c n constraints or costs and @c m variables, each Hessian matrix + * in the second vector has dimensions m x m, representing the second-order + * derivatives of a specific constraint or cost function with respect to the + * optimization variables. + * + * This function only combines the user-defined Hessians from + * FillHessianTriplets(). + */ + RowIndicesHessiansPair GetHessians() const final; + + /** + * @brief Set individual Hessians corresponding to each constraint or cost function. + * @param var_set_list The full set of variables involved in Hessian computation. + * @param hessian_row_index_list Indices of the corresponding constraints or costs. + * @param triplets_list Nonzero elements of each Hessian, stored as triplets. + * + * This function provides the second-order derivatives (Hessians) of constraints + * or costs with respect to the optimization variables. Unlike the Jacobian, Hessians + * involve cross-derivatives between different variable sets, so the full variable + * set is provided. + * + * The Hessians are stored efficiently in triplet form to minimize memory usage, + * as Hessian matrices are typically much sparser than Jacobians. Each entry in + * @c triplets_list corresponds to a specific constraint or cost function, with + * its nonzero elements stored as Eigen triplets. + * + * If a constraint or cost does not require a Hessian, it should simply be omitted + * from @c hessian_row_index_list and @c triplets_list. The missing entries will + * be treated as empty Hessians. + */ + virtual void FillHessianTriplets(std::vector var_set_list, + std::vector& hessian_row_index_list, + std::vector& triplets_list) const + { + } + protected: /** * @brief Read access to the value of the optimization variables. diff --git a/ifopt_core/include/ifopt/problem.h b/ifopt_core/include/ifopt/problem.h index 4a3321f..ef5699a 100644 --- a/ifopt_core/include/ifopt/problem.h +++ b/ifopt_core/include/ifopt/problem.h @@ -98,6 +98,8 @@ class Problem { using VecBound = Component::VecBound; using Jacobian = Component::Jacobian; using VectorXd = Component::VectorXd; + using Hessian = Component::Hessian; + using RowIndicesHessiansPair = Component::RowIndicesHessiansPair; /** * @brief Creates a optimization problem with no variables, costs or constraints. @@ -215,6 +217,64 @@ class Problem { */ Jacobian GetJacobianOfCosts() const; + /** + * @brief Computes the nonzero entries of the total Hessian after applying scaling factors. + * + * This function extracts the nonzero elements from the total Hessian, which is + * the combination of the Hessians from both cost functions and constraints, and + * applies the appropriate scaling factors. + * + * The Hessian of the cost function is multiplied by @c obj_factor, while the + * Hessians of the constraints are weighted by their corresponding Lagrange + * multipliers in @c lambda. + * + * @param [in] x The current values of the optimization variables. + * @param [in] obj_factor The scaling multiplier for the Hessian of the objective function (cost). + * @param [in] lambda The Lagrange multipliers for the constraints. + * @param [out] values The nonzero second-order derivatives, ordered in @c Eigen::RowMajor format. + */ + void EvalNonzerosOfHessian(const double* x, double obj_factor, const double* lambda, double* values); + + /** + * @brief Computes the full Hessian by summing the Hessians of costs and constraints. + * + * This function returns the total Hessian, which combines the second-order + * derivatives of both cost functions and constraints with respect to the + * optimization variables. + * + * The primary purpose of this function is to determine the structure of the + * overall Hessian, including the number and positions of nonzero elements. + * The resulting matrix is stored in sparse format for efficiency. + */ + Hessian GetTotalHessian() const; + + /** + * @brief The sparse-matrix representation of the Hessians of the constraints. + * + * This function returns a pair of vectors, where the first vector contains + * the indices of the corresponding constraints, and the second vector holds + * the Hessian matrices in sparse format. + * + * Each Hessian matrix represents the second-order derivatives of a constraint + * with respect to the optimization variables. The Hessians are stored sparsely + * to optimize memory usage, as they are typically very sparse. + */ + RowIndicesHessiansPair GetHessianOfConstraints() const; + + + /** + * @brief The sparse-matrix representation of the Hessians of the cost functions. + * + * This function returns a pair of vectors, where the first vector contains + * the indices of the corresponding cost functions, and the second vector holds + * the Hessian matrices in sparse format. + * + * Each Hessian matrix represents the second-order derivatives of a cost function + * with respect to the optimization variables. The Hessians are stored sparsely + * to optimize memory usage, as they are typically very sparse. + */ + RowIndicesHessiansPair GetHessianOfCosts() const; + /** * @brief Saves the current values of the optimization variables in x_prev. * diff --git a/ifopt_core/include/ifopt/variable_set.h b/ifopt_core/include/ifopt/variable_set.h index 9fc9ece..f091df9 100644 --- a/ifopt_core/include/ifopt/variable_set.h +++ b/ifopt_core/include/ifopt/variable_set.h @@ -58,6 +58,10 @@ class VariableSet : public Component { { throw std::runtime_error("not implemented for variables"); }; + RowIndicesHessiansPair GetHessians() const final + { + throw std::runtime_error("not implemented for variables"); + } }; } // namespace ifopt diff --git a/ifopt_core/src/composite.cc b/ifopt_core/src/composite.cc index 831aeba..1e34131 100644 --- a/ifopt_core/src/composite.cc +++ b/ifopt_core/src/composite.cc @@ -175,6 +175,43 @@ Composite::Jacobian Composite::GetJacobian() const return jacobian; } +Composite::RowIndicesHessiansPair Composite::GetHessians() const +{ + if (n_var == -1 && components_.empty()) + n_var = 0; + + RowIndicesHessiansPair row_indices_hessians_pair; + if (n_var == 0) + return row_indices_hessians_pair; + + std::vector& hessian_row_indices = row_indices_hessians_pair.first; + hessian_row_indices.reserve(GetRows()); + + std::vector& hessians = row_indices_hessians_pair.second; + hessians.reserve(GetRows()); // reserve space in the vector to avoid reallocations + + int offset = 0; // offset for row indices in different component + for (const auto& c : components_) { + const RowIndicesHessiansPair& local_row_indices_hessians = c->GetHessians(); + const std::vector& row_indices = local_row_indices_hessians.first; + const std::vector& hess = local_row_indices_hessians.second; + assert(row_indices.size() == hess.size()); + + for(int i = 0; i < row_indices.size(); ++i) { + hessian_row_indices.push_back(row_indices[i] + offset); + } + if (!hess.empty()) { + if (n_var == -1) + n_var = hess.front().cols(); + + hessians.insert(hessians.end(), hess.begin(), hess.end()); + } + + offset += c->GetRows(); + } + return row_indices_hessians_pair; +} + Composite::VecBound Composite::GetBounds() const { VecBound bounds_; diff --git a/ifopt_core/src/leaves.cc b/ifopt_core/src/leaves.cc index 6e03800..ac07fd4 100644 --- a/ifopt_core/src/leaves.cc +++ b/ifopt_core/src/leaves.cc @@ -73,6 +73,40 @@ ConstraintSet::Jacobian ConstraintSet::GetJacobian() const return jacobian; } +ConstraintSet::RowIndicesHessiansPair ConstraintSet::GetHessians() const +{ + std::vector triplets_list; + triplets_list.reserve(GetRows()); + + // hessians and their corresponding row index + RowIndicesHessiansPair row_indices_hessians_pair; + std::vector& hessian_row_indices = row_indices_hessians_pair.first; + hessian_row_indices.reserve(GetRows()); + + std::vector variable_names; + for (const auto& vars : variables_->GetComponents()) { + variable_names.push_back(vars->GetName()); + } + + FillHessianTriplets(variable_names, hessian_row_indices, triplets_list); + assert(hessian_row_indices.size() == triplets_list.size()); + + if (triplets_list.empty()) + return {}; + + std::vector& hessians = row_indices_hessians_pair.second; + hessians.reserve(hessian_row_indices.size()); + + int nrows = variables_->GetRows(); + for (int i = 0; i < hessian_row_indices.size(); ++i) { + Eigen::SparseMatrix H(nrows, nrows); + H.setFromTriplets(triplets_list[i].begin(), triplets_list[i].end()); // efficiently construct sparse matrix + hessians.push_back(std::move(H)); + } + + return row_indices_hessians_pair; +} + void ConstraintSet::LinkWithVariables(const VariablesPtr& x) { variables_ = x; diff --git a/ifopt_core/src/problem.cc b/ifopt_core/src/problem.cc index d472563..8b5f964 100644 --- a/ifopt_core/src/problem.cc +++ b/ifopt_core/src/problem.cc @@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include namespace ifopt { @@ -153,6 +154,75 @@ Problem::Jacobian Problem::GetJacobianOfCosts() const return costs_.GetJacobian(); } +void Problem::EvalNonzerosOfHessian(const double* x, double obj_factor, const double* lambda, double* values) +{ + SetVariables(x); + const std::vector& hessian_of_costs = GetHessianOfCosts().second; + const RowIndicesHessiansPair& row_indices_hessians_pair_of_constraints = GetHessianOfConstraints(); + const std::vector& row_indices_of_constraints = row_indices_hessians_pair_of_constraints.first; + const std::vector& hessian_of_constraints = row_indices_hessians_pair_of_constraints.second; + + int dim = GetNumberOfOptimizationVariables(); + + // dimension of hessian matrix should be n by n where n is the number of optimization variables + assert(hessian_of_costs.empty() || (hessian_of_costs[0].rows() == dim && hessian_of_costs[0].cols() == dim)); + assert(hessian_of_constraints.empty() || (hessian_of_constraints[0].rows() == dim && hessian_of_constraints[0].cols() == dim)); + + if (hessian_of_costs.empty() && hessian_of_constraints.empty()) + return; + + // hessian from costs + Hessian total_hessian = obj_factor * std::accumulate(hessian_of_costs.begin(), hessian_of_costs.end(), Hessian(dim, dim)); + // hessian from constraints + for (int i = 0; i < (int)row_indices_of_constraints.size(); ++i) { + int row_index = row_indices_of_constraints[i]; + const Hessian& hess = hessian_of_constraints[i]; + total_hessian += lambda[row_index] * hess; + } + + // only need upper triangular values because hessian matrix is a symmetry matrix + int index = 0; + for (int k = 0; k < total_hessian.outerSize(); ++k) { + for (Hessian::InnerIterator it(total_hessian, k); it; ++it) { + if (it.col() < it.row()) { + continue; + } + values[index] = it.value(); + index++; + } + } +} + +Problem::Hessian Problem::GetTotalHessian() const +{ + const std::vector& hessian_of_costs = GetHessianOfCosts().second; + const std::vector& hessian_of_constraints = GetHessianOfConstraints().second; + + int dim = GetNumberOfOptimizationVariables(); + + // dimension of hessian matrix should be n by n where n is the number of optimization variables + assert(hessian_of_costs.empty() || (hessian_of_costs[0].rows() == dim && hessian_of_costs[0].cols() == dim)); + assert(hessian_of_constraints.empty() || (hessian_of_constraints[0].rows() == dim && hessian_of_constraints[0].cols() == dim)); + + // hessian from costs + Hessian total_hessian = std::accumulate(hessian_of_costs.begin(), hessian_of_costs.end(), Hessian(dim, dim)); + // hessian from constraints + total_hessian = std::accumulate(hessian_of_constraints.begin(), hessian_of_constraints.end(), total_hessian); + + return total_hessian; +} + +Problem::RowIndicesHessiansPair Problem::GetHessianOfConstraints() const +{ + return constraints_.GetHessians(); +} + +Problem::RowIndicesHessiansPair Problem::GetHessianOfCosts() const +{ + return costs_.GetHessians(); +} + + void Problem::SaveCurrent() { x_prev.push_back(variables_->GetValues()); diff --git a/ifopt_core/test/composite_test.cc b/ifopt_core/test/composite_test.cc index 77d2cb3..758c343 100644 --- a/ifopt_core/test/composite_test.cc +++ b/ifopt_core/test/composite_test.cc @@ -48,6 +48,10 @@ class ExComponent : public Component { { throw std::runtime_error("not implemented"); }; + virtual RowIndicesHessiansPair GetHessians() const + { + throw std::runtime_error("not implemented"); + }; virtual void SetVariables(const VectorXd& x){}; }; diff --git a/ifopt_core/test/ifopt/test_vars_constr_cost.h b/ifopt_core/test/ifopt/test_vars_constr_cost.h index cbe9e8a..dc91590 100644 --- a/ifopt_core/test/ifopt/test_vars_constr_cost.h +++ b/ifopt_core/test/ifopt/test_vars_constr_cost.h @@ -144,6 +144,26 @@ class ExConstraint : public ConstraintSet { 1.0; // derivative of first constraint w.r.t x1 } } + + void FillHessianTriplets(std::vector var_set_list, + std::vector& hessian_row_index_list, + std::vector& triplets_list) const override + { + // only the upper triangular elements need to be provided, as the Hessian + // is symmetric and internally only these values are used. + + // only 1 constraint in this problem + HessianTriplet constraint1_triplets; + for (int i = 0; i < (int)var_set_list.size(); ++i) { + const std::string& var_set = var_set_list[i]; + if (var_set == "var_set1") { + constraint1_triplets.emplace_back(i, i, 2.0); // H(0, 0) = 2.0, dx0^2 + } + } + // hessian info for the first constraint + hessian_row_index_list.push_back(0); + triplets_list.push_back(std::move(constraint1_triplets)); + } }; class ExCost : public CostTerm { @@ -166,6 +186,24 @@ class ExCost : public CostTerm { jac.coeffRef(0, 1) = -2.0 * (x(1) - 2.0); // derivative of cost w.r.t x1 } } + + void FillHessianTriplets(std::vector var_set_list, + std::vector& hessian_row_index_list, + std::vector& triplets_list) const override + { + // only the upper triangular elements need to be provided, as the Hessian + // is symmetric and internally only these values are used. + HessianTriplet cost_triplets; + for (int i = 0; i < (int)var_set_list.size(); ++i) { + const std::string& var_set = var_set_list[i]; + if (var_set == "var_set1") { + cost_triplets.emplace_back(i, i, -2.0); // H(0, 0) = -2.0, dx0^2 + } + } + // hessian info for the only 1 cost + hessian_row_index_list.push_back(0); + triplets_list.push_back(std::move(cost_triplets)); + } }; } // namespace ifopt diff --git a/ifopt_ipopt/include/ifopt/ipopt_adapter.h b/ifopt_ipopt/include/ifopt/ipopt_adapter.h index 44e6935..cf313fe 100644 --- a/ifopt_ipopt/include/ifopt/ipopt_adapter.h +++ b/ifopt_ipopt/include/ifopt/ipopt_adapter.h @@ -102,6 +102,15 @@ class IpoptAdapter : public TNLP { Index nele_jac, Index* iRow, Index* jCol, double* values); + /** Method to return: + * 1) The structure of the Hessian of the Lagrangian (if "values" is NULL) + * 2) The values of the Hessian of the Lagrangian (if "values" is not NULL) + */ + virtual bool eval_h(Index n, const double* x, bool new_x, double obj_factor, + Index m, const double* lambda, bool new_lambda, + Index nele_hess, Index* iRow, Index* jCol, + double* values); + /** This is called after every iteration and is used to save intermediate * solutions in the nlp */ virtual bool intermediate_callback(AlgorithmMode mode, Index iter, diff --git a/ifopt_ipopt/src/ipopt_adapter.cc b/ifopt_ipopt/src/ipopt_adapter.cc index 7ddda8a..25aa486 100644 --- a/ifopt_ipopt/src/ipopt_adapter.cc +++ b/ifopt_ipopt/src/ipopt_adapter.cc @@ -45,7 +45,24 @@ bool IpoptAdapter::get_nlp_info(Index& n, Index& m, Index& nnz_jac_g, else nnz_jac_g = nlp_->GetJacobianOfConstraints().nonZeros(); - nnz_h_lag = n * n; + auto hess = nlp_->GetTotalHessian(); + if (hess.nonZeros() == 0) { + // not using custom hessian + nnz_h_lag = n * n; + } + else { + Index nele = 0; + for (int k = 0; k < hess.outerSize(); ++k) { + for (Jacobian::InnerIterator it(hess, k); it; ++it) { + if (it.col() < it.row()) { + // only need the upper triangular values because hessian is a symmetry matrix + continue; + } + nele++; + } + } + nnz_h_lag = nele; + } // start index at 0 for row/col entries index_style = C_STYLE; @@ -147,6 +164,36 @@ bool IpoptAdapter::eval_jac_g(Index n, const double* x, bool new_x, Index m, return true; } +bool IpoptAdapter::eval_h(Index n, const double* x, bool new_x, double obj_factor, + Index m, const double* lambda, bool new_lambda, + Index nele_hess, Index* iRow, Index* jCol, + double* values) +{ + // defines the positions of the nonzero elements of the hessian + if (values == NULL) { + Index nele = 0; + auto hess = nlp_->GetTotalHessian(); + for (int k = 0; k < hess.outerSize(); ++k) { + for (Jacobian::InnerIterator it(hess, k); it; ++it) { + if (it.col() < it.row()) { + // only need the upper triangular values because hessian is a symmetry matrix + continue; + } + iRow[nele] = it.row(); + jCol[nele] = it.col(); + nele++; + } + } + + assert(nele == + nele_hess); // initial sparsity structure is never allowed to change + } else { + // only gets used if "hessian_approximation" option is "exact" + nlp_->EvalNonzerosOfHessian(x, obj_factor, lambda, values); + } + return true; +} + bool IpoptAdapter::intermediate_callback( AlgorithmMode mode, Index iter, double obj_value, double inf_pr, double inf_du, double mu, double d_norm, double regularization_size, diff --git a/ifopt_ipopt/test/ex_test_ipopt.cc b/ifopt_ipopt/test/ex_test_ipopt.cc index 3229695..1dc70ae 100644 --- a/ifopt_ipopt/test/ex_test_ipopt.cc +++ b/ifopt_ipopt/test/ex_test_ipopt.cc @@ -45,6 +45,7 @@ int main() IpoptSolver ipopt; ipopt.SetOption("linear_solver", "mumps"); ipopt.SetOption("jacobian_approximation", "exact"); + ipopt.SetOption("hessian_approximation", "exact"); // 3 . solve ipopt.Solve(nlp);