4
4
5
5
#include < ipc/utils/local_to_global.hpp>
6
6
7
- #include < tbb/parallel_for.h>
8
7
#include < tbb/blocked_range.h>
8
+ #include < tbb/combinable.h>
9
9
#include < tbb/enumerable_thread_specific.h>
10
+ #include < tbb/parallel_for.h>
11
+ #include < tbb/parallel_reduce.h>
10
12
11
13
namespace ipc {
12
14
@@ -18,25 +20,18 @@ double Potential<TCollisions>::operator()(
18
20
{
19
21
assert (X.rows () == mesh.num_vertices ());
20
22
21
- if (collisions.empty ()) {
22
- return 0 ;
23
- }
24
-
25
- tbb::enumerable_thread_specific<double > storage (0 );
26
-
27
- tbb::parallel_for (
28
- tbb::blocked_range<size_t >(size_t (0 ), collisions.size ()),
29
- [&](const tbb::blocked_range<size_t >& r) {
30
- auto & local_potential = storage.local ();
23
+ return tbb::parallel_reduce (
24
+ tbb::blocked_range<size_t >(size_t (0 ), collisions.size ()), 0.0 ,
25
+ [&](const tbb::blocked_range<size_t >& r, double partial_sum) {
31
26
for (size_t i = r.begin (); i < r.end (); i++) {
32
27
// Quadrature weight is premultiplied by local potential
33
- local_potential += (*this )(
28
+ partial_sum += (*this )(
34
29
collisions[i],
35
30
collisions[i].dof (X, mesh.edges (), mesh.faces ()));
36
31
}
37
- }) ;
38
-
39
- return storage. combine ([]( double a, double b) { return a + b; } );
32
+ return partial_sum ;
33
+ },
34
+ std::plus< double >() );
40
35
}
41
36
42
37
template <class TCollisions >
@@ -53,14 +48,11 @@ Eigen::VectorXd Potential<TCollisions>::gradient(
53
48
54
49
const int dim = X.cols ();
55
50
56
- tbb::enumerable_thread_specific<Eigen::VectorXd> storage (
57
- Eigen::VectorXd::Zero (X.size ()));
51
+ tbb::combinable<Eigen::VectorXd> grad (Eigen::VectorXd::Zero (X.size ()));
58
52
59
53
tbb::parallel_for (
60
54
tbb::blocked_range<size_t >(size_t (0 ), collisions.size ()),
61
55
[&](const tbb::blocked_range<size_t >& r) {
62
- auto & global_grad = storage.local ();
63
-
64
56
for (size_t i = r.begin (); i < r.end (); i++) {
65
57
const TCollision& collision = collisions[i];
66
58
@@ -71,12 +63,11 @@ Eigen::VectorXd Potential<TCollisions>::gradient(
71
63
collision.vertex_ids (mesh.edges (), mesh.faces ());
72
64
73
65
local_gradient_to_global_gradient (
74
- local_grad, vids, dim, global_grad );
66
+ local_grad, vids, dim, grad. local () );
75
67
}
76
68
});
77
69
78
- return storage.combine ([](const Eigen::VectorXd& a,
79
- const Eigen::VectorXd& b) { return a + b; });
70
+ return grad.combine (std::plus<const Eigen::VectorXd&>());
80
71
}
81
72
82
73
template <class TCollisions >
@@ -121,14 +112,22 @@ Eigen::SparseMatrix<double> Potential<TCollisions>::hessian(
121
112
}
122
113
});
123
114
124
- Eigen::SparseMatrix<double > hess (ndof, ndof);
125
- for (const auto & local_hess_triplets : storage) {
126
- Eigen::SparseMatrix<double > local_hess (ndof, ndof);
127
- local_hess.setFromTriplets (
128
- local_hess_triplets.begin (), local_hess_triplets.end ());
129
- hess += local_hess;
130
- }
131
- return hess;
115
+ // Combine the local hessians
116
+ tbb::combinable<Eigen::SparseMatrix<double >> hess (
117
+ Eigen::SparseMatrix<double >(ndof, ndof));
118
+
119
+ tbb::parallel_for (
120
+ tbb::blocked_range<decltype (storage)::iterator>(
121
+ storage.begin (), storage.end ()),
122
+ [&](const tbb::blocked_range<decltype (storage)::iterator>& r) {
123
+ for (auto it = r.begin (); it != r.end (); ++it) {
124
+ Eigen::SparseMatrix<double > local_hess (ndof, ndof);
125
+ local_hess.setFromTriplets (it->begin (), it->end ());
126
+ hess.local () += local_hess;
127
+ }
128
+ });
129
+
130
+ return hess.combine (std::plus<const Eigen::SparseMatrix<double >&>());
132
131
}
133
132
134
133
} // namespace ipc
0 commit comments