4
4
5
5
#include < ipc/utils/local_to_global.hpp>
6
6
7
- #include < tbb/parallel_for.h>
8
7
#include < tbb/blocked_range.h>
8
+ #include < tbb/combinable.h>
9
9
#include < tbb/enumerable_thread_specific.h>
10
+ #include < tbb/parallel_for.h>
11
+ #include < tbb/parallel_reduce.h>
10
12
11
13
namespace ipc {
12
14
@@ -18,25 +20,18 @@ double Potential<TCollisions>::operator()(
18
20
{
19
21
assert (X.rows () == mesh.num_vertices ());
20
22
21
- if (collisions.empty ()) {
22
- return 0 ;
23
- }
24
-
25
- tbb::enumerable_thread_specific<double > storage (0 );
26
-
27
- tbb::parallel_for (
28
- tbb::blocked_range<size_t >(size_t (0 ), collisions.size ()),
29
- [&](const tbb::blocked_range<size_t >& r) {
30
- auto & local_potential = storage.local ();
23
+ return tbb::parallel_reduce (
24
+ tbb::blocked_range<size_t >(size_t (0 ), collisions.size ()), 0.0 ,
25
+ [&](const tbb::blocked_range<size_t >& r, double partial_sum) {
31
26
for (size_t i = r.begin (); i < r.end (); i++) {
32
27
// Quadrature weight is premultiplied by local potential
33
- local_potential += (*this )(
28
+ partial_sum += (*this )(
34
29
collisions[i],
35
30
collisions[i].dof (X, mesh.edges (), mesh.faces ()));
36
31
}
37
- }) ;
38
-
39
- return storage. combine ( [](double a, double b) { return a + b; });
32
+ return partial_sum ;
33
+ },
34
+ [](double a, double b) { return a + b; });
40
35
}
41
36
42
37
template <class TCollisions >
@@ -53,14 +48,11 @@ Eigen::VectorXd Potential<TCollisions>::gradient(
53
48
54
49
const int dim = X.cols ();
55
50
56
- tbb::enumerable_thread_specific<Eigen::VectorXd> storage (
57
- Eigen::VectorXd::Zero (X.size ()));
51
+ tbb::combinable<Eigen::VectorXd> grad (Eigen::VectorXd::Zero (X.size ()));
58
52
59
53
tbb::parallel_for (
60
54
tbb::blocked_range<size_t >(size_t (0 ), collisions.size ()),
61
55
[&](const tbb::blocked_range<size_t >& r) {
62
- auto & global_grad = storage.local ();
63
-
64
56
for (size_t i = r.begin (); i < r.end (); i++) {
65
57
const TCollision& collision = collisions[i];
66
58
@@ -71,12 +63,13 @@ Eigen::VectorXd Potential<TCollisions>::gradient(
71
63
collision.vertex_ids (mesh.edges (), mesh.faces ());
72
64
73
65
local_gradient_to_global_gradient (
74
- local_grad, vids, dim, global_grad );
66
+ local_grad, vids, dim, grad. local () );
75
67
}
76
68
});
77
69
78
- return storage.combine ([](const Eigen::VectorXd& a,
79
- const Eigen::VectorXd& b) { return a + b; });
70
+ return grad.combine ([](const Eigen::VectorXd& a, const Eigen::VectorXd& b) {
71
+ return a + b;
72
+ });
80
73
}
81
74
82
75
template <class TCollisions >
@@ -121,14 +114,24 @@ Eigen::SparseMatrix<double> Potential<TCollisions>::hessian(
121
114
}
122
115
});
123
116
124
- Eigen::SparseMatrix<double > hess (ndof, ndof);
125
- for (const auto & local_hess_triplets : storage) {
126
- Eigen::SparseMatrix<double > local_hess (ndof, ndof);
127
- local_hess.setFromTriplets (
128
- local_hess_triplets.begin (), local_hess_triplets.end ());
129
- hess += local_hess;
130
- }
131
- return hess;
117
+ // Combine the local hessians
118
+ tbb::combinable<Eigen::SparseMatrix<double >> hess (
119
+ Eigen::SparseMatrix<double >(ndof, ndof));
120
+
121
+ tbb::parallel_for (
122
+ tbb::blocked_range<decltype (storage)::iterator>(
123
+ storage.begin (), storage.end ()),
124
+ [&](const tbb::blocked_range<decltype (storage)::iterator>& r) {
125
+ for (auto it = r.begin (); it != r.end (); ++it) {
126
+ Eigen::SparseMatrix<double > local_hess (ndof, ndof);
127
+ local_hess.setFromTriplets (it->begin (), it->end ());
128
+ hess.local () += local_hess;
129
+ }
130
+ });
131
+
132
+ return hess.combine (
133
+ [](const Eigen::SparseMatrix<double >& a,
134
+ const Eigen::SparseMatrix<double >& b) { return a + b; });
132
135
}
133
136
134
137
} // namespace ipc
0 commit comments