Skip to content

Commit 24ce40a

Browse files
committed
Cache info to optimize computations when ratio of selected indices is high.
1 parent 3fa7f80 commit 24ce40a

File tree

4 files changed

+175
-69
lines changed

4 files changed

+175
-69
lines changed

cpp/benchmarks/t/pipelines/registration/Feature.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
// SPDX-License-Identifier: MIT
66
// ----------------------------------------------------------------------------
77

8-
#include <map>
9-
108
#include "open3d/t/pipelines/registration/Feature.h"
119

1210
#include <benchmark/benchmark.h>

cpp/open3d/t/pipelines/kernel/Feature.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@ namespace t {
1515
namespace pipelines {
1616
namespace kernel {
1717

18-
void ComputeFPFHFeature(const core::Tensor &points,
19-
const core::Tensor &normals,
20-
const core::Tensor &indices,
21-
const core::Tensor &distance2,
22-
const core::Tensor &counts,
23-
core::Tensor &fpfhs,
24-
const utility::optional<core::Tensor> &mask,
25-
const utility::optional<core::Tensor>
26-
&map_batch_info_idx_to_point_idx) {
18+
void ComputeFPFHFeature(
19+
const core::Tensor &points,
20+
const core::Tensor &normals,
21+
const core::Tensor &indices,
22+
const core::Tensor &distance2,
23+
const core::Tensor &counts,
24+
core::Tensor &fpfhs,
25+
const utility::optional<core::Tensor> &mask,
26+
const utility::optional<core::Tensor> &map_info_idx_to_point_idx) {
2727
if (mask.has_value()) {
2828
const int64_t size =
2929
mask.value().To(core::Int64).Sum({0}).Item<int64_t>();
@@ -32,21 +32,22 @@ void ComputeFPFHFeature(const core::Tensor &points,
3232
} else {
3333
core::AssertTensorShape(fpfhs, {points.GetLength(), 33});
3434
}
35-
if (map_batch_info_idx_to_point_idx.has_value()) {
36-
core::AssertTensorShape(map_batch_info_idx_to_point_idx.value(),
37-
{counts.GetLength()});
35+
if (map_info_idx_to_point_idx.has_value()) {
36+
const bool is_radius_search = indices.GetShape().size() == 1;
37+
core::AssertTensorShape(
38+
map_info_idx_to_point_idx.value(),
39+
{counts.GetLength() - (is_radius_search ? 1 : 0)});
3840
}
3941
const core::Tensor points_d = points.Contiguous();
4042
const core::Tensor normals_d = normals.Contiguous();
4143
const core::Tensor counts_d = counts.To(core::Int32);
4244
if (points_d.IsCPU()) {
4345
ComputeFPFHFeatureCPU(points_d, normals_d, indices, distance2, counts_d,
44-
fpfhs, mask, map_batch_info_idx_to_point_idx);
46+
fpfhs, mask, map_info_idx_to_point_idx);
4547
} else {
4648
core::CUDAScopedDevice scoped_device(points.GetDevice());
4749
CUDA_CALL(ComputeFPFHFeatureCUDA, points_d, normals_d, indices,
48-
distance2, counts_d, fpfhs, mask,
49-
map_batch_info_idx_to_point_idx);
50+
distance2, counts_d, fpfhs, mask, map_info_idx_to_point_idx);
5051
}
5152
utility::LogDebug(
5253
"[ComputeFPFHFeature] Computed {:d} features from "

cpp/open3d/t/pipelines/kernel/FeatureImpl.h

+10-12
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,17 @@ void ComputeFPFHFeatureCPU
116116
const core::Tensor &counts,
117117
core::Tensor &fpfhs,
118118
const utility::optional<core::Tensor> &mask,
119-
const utility::optional<core::Tensor>
120-
&map_batch_info_idx_to_point_idx) {
119+
const utility::optional<core::Tensor> &map_info_idx_to_point_idx) {
121120
const core::Dtype dtype = points.GetDtype();
122121
const core::Device device = points.GetDevice();
123122
const int64_t n_points = points.GetLength();
124123

125124
const bool filter_fpfh =
126-
mask.has_value() && map_batch_info_idx_to_point_idx.has_value();
127-
if (mask.has_value() ^ map_batch_info_idx_to_point_idx.has_value()) {
125+
mask.has_value() && map_info_idx_to_point_idx.has_value();
126+
if (mask.has_value() ^ map_info_idx_to_point_idx.has_value()) {
128127
utility::LogError(
129-
"Parameters mask and map_batch_info_idx_to_point_idx must be "
130-
"both "
131-
"provided or both not provided.");
128+
"Parameters mask and map_info_idx_to_point_idx must "
129+
"either be both provided or both not provided.");
132130
}
133131
if (filter_fpfh) {
134132
if (mask.value().GetShape()[0] != n_points) {
@@ -137,19 +135,19 @@ void ComputeFPFHFeatureCPU
137135
"be equal to the number of points {:d}.",
138136
(int)mask.value().GetShape()[0], n_points);
139137
}
140-
if (map_batch_info_idx_to_point_idx.value().GetShape()[0] !=
141-
counts.GetShape()[0]) {
138+
if (map_info_idx_to_point_idx.value().GetShape()[0] !=
139+
counts.GetShape()[0] - (indices.GetShape().size() == 1 ? 1 : 0)) {
142140
utility::LogError(
143-
"Parameter map_batch_info_idx_to_point_idx was provided, "
141+
"Parameter map_info_idx_to_point_idx was provided, "
144142
"but its size"
145143
"{:d} should be equal to the size of counts {:d}.",
146-
(int)map_batch_info_idx_to_point_idx.value().GetShape()[0],
144+
(int)map_info_idx_to_point_idx.value().GetShape()[0],
147145
(int)counts.GetShape()[0]);
148146
}
149147
}
150148

151149
core::Tensor map_spfh_info_idx_to_point_idx =
152-
map_batch_info_idx_to_point_idx.value_or(
150+
map_info_idx_to_point_idx.value_or(
153151
core::Tensor::Empty({0}, core::Int64, device));
154152

155153
const core::Tensor map_fpfh_idx_to_point_idx =

cpp/open3d/t/pipelines/registration/Feature.cpp

+149-40
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "open3d/t/pipelines/registration/Feature.h"
99

10+
#include "open3d/core/ParallelFor.h"
1011
#include "open3d/core/nns/NearestNeighborSearch.h"
1112
#include "open3d/t/geometry/PointCloud.h"
1213
#include "open3d/t/pipelines/kernel/Feature.h"
@@ -43,58 +44,140 @@ core::Tensor ComputeFPFHFeature(
4344
core::Int32);
4445
bool tree_set = false;
4546

47+
const bool filter_fpfh = indices.has_value();
48+
// If we are computing a subset of the FPFH feature,
49+
// cache some information to speed up the computation
50+
// if the ratio of the indices to the total number of points is high.
51+
const int cache_info_indices_ratio_thresh = 0.1;
52+
bool cache_fpfh_info = true;
53+
4654
core::Tensor mask_fpfh_points;
47-
core::Tensor mask_required_points;
48-
if (indices.has_value()) {
55+
core::Tensor indices_fpfh_points;
56+
core::Tensor map_point_idx_to_required_point_idx;
57+
core::Tensor map_required_point_idx_to_point_idx;
58+
core::Tensor save_p_indices, save_p_distance2, save_p_counts;
59+
core::Tensor mask_spfh_points;
60+
61+
// If we are computing a subset of the FPFH feature, we need to find
62+
// the subset of points (neighbors) required to compute the FPFH features.
63+
if (filter_fpfh) {
64+
if (indices.value().GetLength() == 0) {
65+
return core::Tensor::Zeros({0, 33}, dtype, device);
66+
}
4967
mask_fpfh_points =
5068
core::Tensor::Zeros({num_points}, core::Bool, device);
51-
mask_required_points =
52-
core::Tensor::Zeros({num_points}, core::Bool, device);
53-
core::Tensor indices_tmp, distance2_tmp, counts_tmp;
5469
mask_fpfh_points.IndexSet({indices.value()},
5570
core::Tensor::Ones({1}, core::Bool, device));
5671
const core::Tensor query_point_positions =
57-
input.GetPointPositions().IndexGet({indices.value()});
72+
input.GetPointPositions().IndexGet({mask_fpfh_points});
73+
core::Tensor p_indices, p_distance2, p_counts;
5874
if (radius.has_value() && max_nn.has_value()) {
5975
tree_set = tree.HybridIndex(radius.value());
6076
if (!tree_set) {
6177
utility::LogError("Building HybridIndex failed.");
6278
}
63-
std::tie(indices_tmp, distance2_tmp, counts_tmp) =
64-
tree.HybridSearch(query_point_positions, radius.value(),
65-
max_nn.value());
79+
std::tie(p_indices, p_distance2, p_counts) = tree.HybridSearch(
80+
query_point_positions, radius.value(), max_nn.value());
6681
} else if (!radius.has_value() && max_nn.has_value()) {
6782
tree_set = tree.KnnIndex();
6883
if (!tree_set) {
6984
utility::LogError("Building KnnIndex failed.");
7085
}
71-
std::tie(indices_tmp, distance2_tmp) =
86+
std::tie(p_indices, p_distance2) =
7287
tree.KnnSearch(query_point_positions, max_nn.value());
88+
89+
// Make counts full with min(max_nn, num_points).
90+
const int fill_value =
91+
max_nn.value() > num_points ? num_points : max_nn.value();
92+
p_counts = core::Tensor::Full({query_point_positions.GetLength()},
93+
fill_value, core::Int32, device);
7394
} else if (radius.has_value() && !max_nn.has_value()) {
7495
tree_set = tree.FixedRadiusIndex(radius.value());
7596
if (!tree_set) {
7697
utility::LogError("Building RadiusIndex failed.");
7798
}
78-
std::tie(indices_tmp, distance2_tmp, counts_tmp) =
79-
tree.FixedRadiusSearch(query_point_positions,
80-
radius.value());
99+
std::tie(p_indices, p_distance2, p_counts) = tree.FixedRadiusSearch(
100+
query_point_positions, radius.value());
81101
} else {
82102
utility::LogError("Both max_nn and radius are none.");
83103
}
84104

85-
indices_tmp = indices_tmp.To(core::Int64).View({-1});
105+
core::Tensor mask_required_points =
106+
core::Tensor::Zeros({num_points}, core::Bool, device);
86107
mask_required_points.IndexSet(
87-
{indices_tmp}, core::Tensor::Ones({1}, core::Bool, device));
108+
{p_indices.To(core::Int64).View({-1})},
109+
core::Tensor::Ones({1}, core::Bool, device));
110+
map_required_point_idx_to_point_idx =
111+
mask_required_points.NonZero().GetItem(
112+
{core::TensorKey::Index(0)});
113+
indices_fpfh_points =
114+
mask_fpfh_points.NonZero().GetItem({core::TensorKey::Index(0)});
88115

89-
} else {
90-
mask_fpfh_points = core::Tensor::Zeros({0}, core::Bool, device);
91-
mask_required_points = core::Tensor::Zeros({0}, core::Bool, device);
116+
const bool is_radius_search = p_indices.GetShape().size() == 1;
117+
118+
// Cache the info if the ratio of the indices to the total number of
119+
// points is high and we are not doing a radius search. Radius search
120+
// requires a different pipeline since tensor output p_counts is a
121+
// prefix sum.
122+
cache_fpfh_info =
123+
!is_radius_search &&
124+
(static_cast<double>(indices_fpfh_points.GetLength()) >=
125+
cache_info_indices_ratio_thresh *
126+
static_cast<double>(num_points));
127+
128+
if (cache_fpfh_info) {
129+
map_point_idx_to_required_point_idx =
130+
core::Tensor::Full({num_points}, -1, core::Int32, device);
131+
map_point_idx_to_required_point_idx.IndexSet(
132+
{map_required_point_idx_to_point_idx},
133+
core::Tensor::Arange(
134+
0, map_required_point_idx_to_point_idx.GetLength(),
135+
1, core::Int32, device));
136+
137+
core::SizeVector save_p_indices_shape = p_indices.GetShape();
138+
save_p_indices_shape[0] =
139+
map_required_point_idx_to_point_idx.GetLength();
140+
save_p_indices = core::Tensor::Zeros(save_p_indices_shape,
141+
core::Int32, device);
142+
save_p_distance2 = core::Tensor::Zeros(save_p_indices.GetShape(),
143+
dtype, device);
144+
save_p_counts = core::Tensor::Zeros(
145+
{map_required_point_idx_to_point_idx.GetLength() +
146+
(is_radius_search ? 1 : 0)},
147+
core::Int32, device);
148+
149+
core::Tensor map_fpfh_point_idx_to_required_point_idx =
150+
map_point_idx_to_required_point_idx
151+
.IndexGet({indices_fpfh_points})
152+
.To(core::Int64);
153+
154+
save_p_indices.IndexSet({map_fpfh_point_idx_to_required_point_idx},
155+
p_indices);
156+
save_p_distance2.IndexSet(
157+
{map_fpfh_point_idx_to_required_point_idx}, p_distance2);
158+
save_p_counts.IndexSet({map_fpfh_point_idx_to_required_point_idx},
159+
p_counts);
160+
161+
// If we are filtering FPFH features, we have already computed some
162+
// info about the FPFH points' neighbors. Now we just need to
163+
// compute the info for the remaining required points, so skip the
164+
// computation for the already computed info.
165+
mask_spfh_points =
166+
core::Tensor::Zeros({num_points}, core::Bool, device);
167+
mask_spfh_points.IndexSet(
168+
{map_required_point_idx_to_point_idx},
169+
core::Tensor::Ones({1}, core::Bool, device));
170+
mask_spfh_points.IndexSet(
171+
{indices_fpfh_points},
172+
core::Tensor::Zeros({1}, core::Bool, device));
173+
} else {
174+
mask_spfh_points = mask_required_points;
175+
}
92176
}
93177

94178
const core::Tensor query_point_positions =
95-
mask_required_points.GetShape()[0] > 0
96-
? input.GetPointPositions().IndexGet({mask_required_points})
97-
: input.GetPointPositions();
179+
filter_fpfh ? input.GetPointPositions().IndexGet({mask_spfh_points})
180+
: input.GetPointPositions();
98181

99182
// Compute nearest neighbors and squared distances.
100183
core::Tensor p_indices, p_distance2, p_counts;
@@ -119,14 +202,25 @@ core::Tensor ComputeFPFHFeature(
119202
utility::LogError("Building KnnIndex failed.");
120203
}
121204
}
122-
std::tie(p_indices, p_distance2) =
123-
tree.KnnSearch(query_point_positions, max_nn.value());
124-
125-
// Make counts full with min(max_nn, num_points).
126-
const int fill_value =
127-
max_nn.value() > num_points ? num_points : max_nn.value();
128-
p_counts = core::Tensor::Full({query_point_positions.GetLength()},
129-
fill_value, core::Int32, device);
205+
206+
// tree.KnnSearch complains if the query point cloud is empty.
207+
if (query_point_positions.GetLength() > 0) {
208+
std::tie(p_indices, p_distance2) =
209+
tree.KnnSearch(query_point_positions, max_nn.value());
210+
211+
const int fill_value =
212+
max_nn.value() > num_points ? num_points : max_nn.value();
213+
214+
p_counts = core::Tensor::Full({query_point_positions.GetLength()},
215+
fill_value, core::Int32, device);
216+
} else {
217+
p_indices = core::Tensor::Zeros({0, max_nn.value()}, core::Int32,
218+
device);
219+
p_distance2 =
220+
core::Tensor::Zeros({0, max_nn.value()}, dtype, device);
221+
p_counts = core::Tensor::Zeros({0}, core::Int32, device);
222+
}
223+
130224
utility::LogDebug(
131225
"Use KNNSearch [max_nn: {}] for computing FPFH feature.",
132226
max_nn.value());
@@ -147,18 +241,33 @@ core::Tensor ComputeFPFHFeature(
147241
}
148242

149243
core::Tensor fpfh;
150-
if (indices.has_value()) {
151-
const auto mask_fpfh_points_indices =
152-
mask_fpfh_points.NonZero().GetItem({core::TensorKey::Index(0)});
153-
const auto map_batch_info_idx_to_point_idx =
154-
mask_required_points.NonZero().GetItem(
155-
{core::TensorKey::Index(0)});
156-
fpfh = core::Tensor::Zeros({mask_fpfh_points_indices.GetLength(), 33},
157-
dtype, device);
244+
if (filter_fpfh) {
245+
const int64_t size = indices_fpfh_points.GetLength();
246+
fpfh = core::Tensor::Zeros({size, 33}, dtype, device);
247+
core::Tensor final_p_indices, final_p_distance2, final_p_counts;
248+
if (cache_fpfh_info) {
249+
core::Tensor map_spfh_idx_to_required_point_idx =
250+
map_point_idx_to_required_point_idx
251+
.IndexGet({mask_spfh_points})
252+
.To(core::Int64);
253+
save_p_indices.IndexSet({map_spfh_idx_to_required_point_idx},
254+
p_indices);
255+
save_p_distance2.IndexSet({map_spfh_idx_to_required_point_idx},
256+
p_distance2);
257+
save_p_counts.IndexSet({map_spfh_idx_to_required_point_idx},
258+
p_counts);
259+
final_p_indices = save_p_indices;
260+
final_p_distance2 = save_p_distance2;
261+
final_p_counts = save_p_counts;
262+
} else {
263+
final_p_indices = p_indices;
264+
final_p_distance2 = p_distance2;
265+
final_p_counts = p_counts;
266+
}
158267
pipelines::kernel::ComputeFPFHFeature(
159-
input.GetPointPositions(), input.GetPointNormals(), p_indices,
160-
p_distance2, p_counts, fpfh, mask_fpfh_points,
161-
map_batch_info_idx_to_point_idx);
268+
input.GetPointPositions(), input.GetPointNormals(),
269+
final_p_indices, final_p_distance2, final_p_counts, fpfh,
270+
mask_fpfh_points, map_required_point_idx_to_point_idx);
162271
} else {
163272
const int64_t size = input.GetPointPositions().GetLength();
164273
fpfh = core::Tensor::Zeros({size, 33}, dtype, device);

0 commit comments

Comments
 (0)