Skip to content

Parallel linear system builder #58

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <gtsam_points/optimizers/linear_solver.hpp>
#include <gtsam_points/optimizers/linear_system_builder.hpp>

#include <gtsam_points/util/easy_profiler.hpp>

namespace gtsam_points {

Expand Down
52 changes: 45 additions & 7 deletions src/gtsam_points/optimizers/levenberg_marquardt_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,18 @@

#include <gtsam_points/optimizers/linearization_hook.hpp>

#include <numeric>
#include <gtsam/base/Vector.h>
#include <gtsam/linear/GaussianFactorGraph.h>
#include <gtsam/nonlinear/NonlinearFactorGraph.h>
#include <gtsam/nonlinear/internal/LevenbergMarquardtState.h>
#include <gtsam/inference/Ordering.h>
#include <gtsam_points/config.hpp>
#include <gtsam_points/util/parallelism.hpp>

#ifdef GTSAM_POINTS_USE_TBB
#include <tbb/parallel_for.h>
#endif

#include <boost/format.hpp>
#include <boost/range/adaptor/map.hpp>
Expand All @@ -35,6 +42,38 @@ namespace gtsam_points {

using State = gtsam::internal::LevenbergMarquardtState;

double calc_error(const gtsam::GaussianFactorGraph& gfg, const gtsam::VectorValues& x) {
if (is_omp_default()) {
return gfg.error(x);
} else {
#ifdef GTSAM_POINTS_USE_TBB
// TODO: Should use parallel reduction
std::vector<double> errors(gfg.size(), 0.0);
tbb::parallel_for(static_cast<size_t>(0), gfg.size(), [&](size_t i) { errors[i] = gfg[i]->error(x); });
return std::accumulate(errors.begin(), errors.end(), 0.0);
#else
std::cerr << "error: gtsam_points is not built with TBB!!" << std::endl;
return gfg.error(x);
#endif
}
}

double calc_error(const gtsam::NonlinearFactorGraph& graph, const gtsam::Values& x) {
if (is_omp_default()) {
return graph.error(x);
} else {
#ifdef GTSAM_POINTS_USE_TBB
// TODO: Should use parallel reduction
std::vector<double> errors(graph.size(), 0.0);
tbb::parallel_for(static_cast<size_t>(0), graph.size(), [&](size_t i) { errors[i] = graph[i]->error(x); });
return std::accumulate(errors.begin(), errors.end(), 0.0);
#else
std::cerr << "error: gtsam_points is not built with TBB!!" << std::endl;
return gfg.error(x);
#endif
}
}

LevenbergMarquardtExtParams LevenbergMarquardtExtParams::ensureHasOrdering(const gtsam::NonlinearFactorGraph& graph) const {
if (ordering) {
return *this;
Expand Down Expand Up @@ -107,8 +146,8 @@ bool LevenbergMarquardtOptimizerExt::tryLambda(
if (systemSolvedSuccessfully) {
// Compute the old linearized error as it is not the same
// as the nonlinear error when robust noise models are used.
double oldLinearizedError = linear.error(gtsam::VectorValues::Zero(delta));
double newlinearizedError = linear.error(delta);
double oldLinearizedError = calc_error(linear, gtsam::VectorValues::Zero(delta));
double newlinearizedError = calc_error(linear, delta);
// cost change in the linearized system (old - new)
double linearizedCostChange = oldLinearizedError - newlinearizedError;

Expand All @@ -119,7 +158,7 @@ bool LevenbergMarquardtOptimizerExt::tryLambda(

// cost change in the original, nonlinear system (old - new)
linearization_hook->error(newValues);
newError = graph_.error(newValues);
newError = calc_error(graph_, newValues);
costChange = oldError - newError;

if (linearizedCostChange > std::numeric_limits<double>::epsilon() * oldLinearizedError) {
Expand All @@ -138,7 +177,6 @@ bool LevenbergMarquardtOptimizerExt::tryLambda(
}
}
} // if (systemSolvedSuccessfully)

if (params_.callback || params_.status_msg_callback) {
LevenbergMarquardtOptimizationStatus status;
status.iterations = currentState->iterations;
Expand Down Expand Up @@ -179,7 +217,7 @@ bool LevenbergMarquardtOptimizerExt::tryLambda(
// TODO(frank): make Values actually support move. Does not seem to happen now.
state_ = currentState->decreaseLambda(params_, modelFidelity, std::move(newValues), newError);
return true;
} else if (!stopSearchingLambda) { // we failed to solved the system or had no decrease in cost
} else if (!stopSearchingLambda) { // we failed to solved the system or had no decrease in cost
State* modifiedState = static_cast<State*>(state_.get());
modifiedState->increaseLambda(params_); // TODO(frank): make this functional with Values move

Expand All @@ -190,7 +228,7 @@ bool LevenbergMarquardtOptimizerExt::tryLambda(
} else {
return false; // only case where we will keep trying
}
} else { // the change in the cost is very small and it is not worth trying bigger lambdas
} else { // the change in the cost is very small and it is not worth trying bigger lambdas
return true;
}
}
Expand All @@ -206,7 +244,7 @@ gtsam::GaussianFactorGraph::shared_ptr LevenbergMarquardtOptimizerExt::iterate()
linearization_time = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);

linearization_hook->error(currentState->values);
double oldError = graph_.error(currentState->values);
double oldError = calc_error(graph_, currentState->values);
// double oldError = graph_.error(currentState->values, linear);

gtsam::VectorValues sqrtHessianDiagonal;
Expand Down
219 changes: 166 additions & 53 deletions src/gtsam_points/optimizers/linear_system_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,18 @@

#include <gtsam_points/optimizers/linear_system_builder.hpp>

#include <numeric>
#include <unordered_map>
#include <gtsam/linear/HessianFactor.h>

#include <gtsam_points/config.hpp>
#include <gtsam_points/util/parallelism.hpp>

#ifdef GTSAM_POINTS_USE_TBB
#include <tbb/parallel_for.h>
#include <tbb/concurrent_hash_map.h>
#endif

namespace {

template <typename T>
Expand Down Expand Up @@ -71,87 +80,191 @@ SparseLinearSystemBuilder<BLOCK_SIZE>::SparseLinearSystemBuilder(const gtsam::Ga
}
}

std::unordered_map<std::pair<int, int>, BlockMatrix, PairHasher<int>> A_blocks;
std::unordered_map<int, BlockVector> b_blocks;
c = 0.0;

for (const auto& gf : gfg) {
std::vector<BlockSlot> slots(gf->keys().size());
std::transform(gf->keys().begin(), gf->keys().end(), slots.begin(), [&](const gtsam::Key key) { return block_slots[key]; });
if (is_omp_default()) {
std::unordered_map<std::pair<int, int>, BlockMatrix, PairHasher<int>> A_blocks;
std::unordered_map<int, BlockVector> b_blocks;

for (const auto& gf : gfg) {
std::vector<BlockSlot> slots(gf->keys().size());
std::transform(gf->keys().begin(), gf->keys().end(), slots.begin(), [&](const gtsam::Key key) { return block_slots[key]; });

const auto augmented = gf->augmentedInformation();
const int N = augmented.rows() - 1;

const auto augmented = gf->augmentedInformation();
const int N = augmented.rows() - 1;
c += augmented(augmented.rows() - 1, augmented.cols() - 1);

c += augmented(augmented.rows() - 1, augmented.cols() - 1);
for (int i = 0, index_i = 0; i < gf->keys().size(); index_i += slots[i++].dim) {
const int dim_i = slots[i].dim;
const int global_index_i = slots[i].index;

for (int i = 0, index_i = 0; i < gf->keys().size(); index_i += slots[i++].dim) {
const int dim_i = slots[i].dim;
const int global_index_i = slots[i].index;
BlockVector b_i;
if constexpr (BLOCK_SIZE < 0) {
b_i = augmented.block(index_i, N, dim_i, 1);
} else {
b_i = augmented.block<BLOCK_SIZE, 1>(index_i, N);
}

BlockVector b_i;
if constexpr (BLOCK_SIZE < 0) {
b_i = augmented.block(index_i, N, dim_i, 1);
} else {
b_i = augmented.block<BLOCK_SIZE, 1>(index_i, N);
auto found = b_blocks.find(global_index_i);
if (found == b_blocks.end()) {
b_blocks.emplace_hint(found, global_index_i, b_i);
} else {
found->second += b_i;
}
}

auto found = b_blocks.find(global_index_i);
if (found == b_blocks.end()) {
b_blocks.emplace_hint(found, global_index_i, b_i);
} else {
found->second += b_i;
for (int i = 0, index_i = 0; i < gf->keys().size(); index_i += slots[i++].dim) {
for (int j = i, index_j = index_i; j < gf->keys().size(); index_j += slots[j++].dim) {
const int dim_i = slots[i].dim;
const int dim_j = slots[j].dim;
int global_index_i = slots[i].index;
int global_index_j = slots[j].index;

BlockMatrix H_ij;
if constexpr (BLOCK_SIZE < 0) {
H_ij = augmented.block(index_i, index_j, dim_i, dim_j);
} else {
H_ij = augmented.block<BLOCK_SIZE, BLOCK_SIZE>(index_i, index_j);
}

if (global_index_i < global_index_j) {
std::swap(global_index_i, global_index_j);
H_ij = H_ij.transpose().eval();
}

auto found = A_blocks.find({global_index_i, global_index_j});
if (found == A_blocks.end()) {
A_blocks.emplace_hint(found, std::make_pair(global_index_i, global_index_j), H_ij);
} else {
found->second += H_ij;
}
}
}
}

for (int i = 0, index_i = 0; i < gf->keys().size(); index_i += slots[i++].dim) {
for (int j = i, index_j = index_i; j < gf->keys().size(); index_j += slots[j++].dim) {
std::vector<Eigen::Triplet<double>> triplets;
triplets.reserve(A_blocks.size() * 6 * 6);

for (const auto& [index, H_ij] : A_blocks) {
const auto [index_i, index_j] = index;
Eigen::SparseMatrix<double> sH_ij = H_ij.sparseView();

for (int k = 0; k < sH_ij.outerSize(); k++) {
for (Eigen::SparseMatrix<double>::InnerIterator it(sH_ij, k); it; ++it) {
triplets.emplace_back(index_i + it.row(), index_j + it.col(), it.value());
}
}
}

A.resize(total_dim, total_dim);
A.setFromTriplets(triplets.begin(), triplets.end());

b.resize(total_dim);
for (const auto& [index_i, b_i] : b_blocks) {
b.block(index_i, 0, b_i.size(), 1) = b_i;
}
} else {
#ifdef GTSAM_POINTS_USE_TBB
using BlockMatrixMap = tbb::concurrent_hash_map<std::uint64_t, BlockMatrix>;
using BlockVectorMap = tbb::concurrent_hash_map<std::uint64_t, BlockVector>;
BlockMatrixMap A_blocks;
BlockVectorMap b_blocks;
std::vector<double> all_c(gfg.size(), 0.0);

const auto perfactor_task = [&](const gtsam::GaussianFactor& gf) {
std::vector<BlockSlot> slots(gf.keys().size());
std::transform(gf.keys().begin(), gf.keys().end(), slots.begin(), [&](const gtsam::Key key) { return block_slots[key]; });

const auto augmented = gf.augmentedInformation();
const int N = augmented.rows() - 1;

// Add b
for (int i = 0, index_i = 0; i < gf.keys().size(); index_i += slots[i++].dim) {
const int dim_i = slots[i].dim;
const int dim_j = slots[j].dim;
int global_index_i = slots[i].index;
int global_index_j = slots[j].index;
const int global_index_i = slots[i].index;

BlockMatrix H_ij;
BlockVector b_i;
if constexpr (BLOCK_SIZE < 0) {
H_ij = augmented.block(index_i, index_j, dim_i, dim_j);
b_i = augmented.block(index_i, N, dim_i, 1);
} else {
H_ij = augmented.block<BLOCK_SIZE, BLOCK_SIZE>(index_i, index_j);
b_i = augmented.block<BLOCK_SIZE, 1>(index_i, N);
}

if (global_index_i < global_index_j) {
std::swap(global_index_i, global_index_j);
H_ij = H_ij.transpose().eval();
typename BlockVectorMap::accessor a;
if (!b_blocks.insert(a, global_index_i)) {
a->second += b_i;
} else {
a->second = b_i;
}
}

auto found = A_blocks.find({global_index_i, global_index_j});
if (found == A_blocks.end()) {
A_blocks.emplace_hint(found, std::make_pair(global_index_i, global_index_j), H_ij);
} else {
found->second += H_ij;
// Add A
for (int i = 0, index_i = 0; i < gf.keys().size(); index_i += slots[i++].dim) {
for (int j = i, index_j = index_i; j < gf.keys().size(); index_j += slots[j++].dim) {
const int dim_i = slots[i].dim;
const int dim_j = slots[j].dim;
int global_index_i = slots[i].index;
int global_index_j = slots[j].index;

BlockMatrix H_ij;
if constexpr (BLOCK_SIZE < 0) {
H_ij = augmented.block(index_i, index_j, dim_i, dim_j);
} else {
H_ij = augmented.block<BLOCK_SIZE, BLOCK_SIZE>(index_i, index_j);
}

if (global_index_i < global_index_j) {
std::swap(global_index_i, global_index_j);
H_ij = H_ij.transpose().eval();
}

const size_t key = (static_cast<std::uint64_t>(global_index_i) << 32) | global_index_j;

typename BlockMatrixMap::accessor a;
if (!A_blocks.insert(a, key)) {
a->second += H_ij;
} else {
a->second = H_ij;
}
}
}
}
}

std::vector<Eigen::Triplet<double>> triplets;
triplets.reserve(A_blocks.size() * 6 * 6);
return augmented(augmented.rows() - 1, augmented.cols() - 1);
};

for (const auto& [index, H_ij] : A_blocks) {
const auto [index_i, index_j] = index;
Eigen::SparseMatrix<double> sH_ij = H_ij.sparseView();
tbb::parallel_for(static_cast<size_t>(0), gfg.size(), [&](size_t i) { all_c[i] = perfactor_task(*gfg[i]); });
c = std::accumulate(all_c.begin(), all_c.end(), 0.0);

for (int k = 0; k < sH_ij.outerSize(); k++) {
for (Eigen::SparseMatrix<double>::InnerIterator it(sH_ij, k); it; ++it) {
triplets.emplace_back(index_i + it.row(), index_j + it.col(), it.value());
std::vector<Eigen::Triplet<double>> triplets;
triplets.reserve(A_blocks.size() * 6 * 6);

for (auto itr = A_blocks.begin(); itr != A_blocks.end(); itr++) {
const size_t key = itr->first;
const int index_i = key >> 32;
const int index_j = key & ((1ull << 32) - 1);
const BlockMatrix& H_ij = itr->second;

Eigen::SparseMatrix<double> sH_ij = H_ij.sparseView();

for (int k = 0; k < sH_ij.outerSize(); k++) {
for (Eigen::SparseMatrix<double>::InnerIterator it(sH_ij, k); it; ++it) {
triplets.emplace_back(index_i + it.row(), index_j + it.col(), it.value());
}
}
}
}

A.resize(total_dim, total_dim);
A.setFromTriplets(triplets.begin(), triplets.end());
A.resize(total_dim, total_dim);
A.setFromTriplets(triplets.begin(), triplets.end());

b.resize(total_dim);
for (auto itr = b_blocks.begin(); itr != b_blocks.end(); itr++) {
b.block(itr->first, 0, itr->second.size(), 1) = itr->second;
}

b.resize(total_dim);
for (const auto& [index_i, b_i] : b_blocks) {
b.block(index_i, 0, b_i.size(), 1) = b_i;
#else
std::cerr << "error : gtsam_points is not built with TBB!!" << std::endl;
#endif
}
}

Expand Down