Skip to content

Commit 3d5d385

Browse files
authored
Merge pull request #1350 from aprokop/dbscan_precisions
Allow running DBSCAN benchmark using different precisions
2 parents 104a25c + 568d67c commit 3d5d385

9 files changed

Lines changed: 195 additions & 121 deletions

File tree

benchmarks/cluster/data.cpp

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,36 @@ namespace ArborXBenchmark
2222

2323
// Explicit instantiations
2424
using MemorySpace = typename Kokkos::DefaultExecutionSpace::memory_space;
25-
#define INSTANTIATE_LOADER(DIM) \
26-
template Kokkos::View<ArborX::Point<DIM> *, MemorySpace> \
27-
loadData<DIM, MemorySpace>(ArborXBenchmark::Parameters const &);
28-
INSTANTIATE_LOADER(2)
29-
INSTANTIATE_LOADER(3)
30-
INSTANTIATE_LOADER(4)
31-
INSTANTIATE_LOADER(5)
32-
INSTANTIATE_LOADER(6)
25+
#define INSTANTIATE_LOADER(DIM, Coordinate) \
26+
template Kokkos::View<ArborX::Point<DIM, Coordinate> *, MemorySpace> \
27+
loadData<DIM, Coordinate, MemorySpace>(ArborXBenchmark::Parameters const &)
28+
INSTANTIATE_LOADER(2, float);
29+
INSTANTIATE_LOADER(3, float);
30+
INSTANTIATE_LOADER(4, float);
31+
INSTANTIATE_LOADER(5, float);
32+
INSTANTIATE_LOADER(6, float);
33+
INSTANTIATE_LOADER(2, double);
34+
INSTANTIATE_LOADER(3, double);
35+
INSTANTIATE_LOADER(4, double);
36+
INSTANTIATE_LOADER(5, double);
37+
INSTANTIATE_LOADER(6, double);
3338
#undef INSTANTIATE_LOADER
3439

3540
#ifdef ARBORX_ENABLE_MPI
36-
#define INSTANTIATE_MPI_LOADER(DIM) \
37-
template Kokkos::View<ArborX::Point<DIM> *, MemorySpace> \
38-
loadData<DIM, MemorySpace>(MPI_Comm, ArborXBenchmark::Parameters const &);
39-
INSTANTIATE_MPI_LOADER(2)
40-
INSTANTIATE_MPI_LOADER(3)
41-
INSTANTIATE_MPI_LOADER(4)
42-
INSTANTIATE_MPI_LOADER(5)
43-
INSTANTIATE_MPI_LOADER(6)
41+
#define INSTANTIATE_MPI_LOADER(DIM, Coordinate) \
42+
template Kokkos::View<ArborX::Point<DIM, Coordinate> *, MemorySpace> \
43+
loadData<DIM, Coordinate, MemorySpace>(MPI_Comm, \
44+
ArborXBenchmark::Parameters const &);
45+
INSTANTIATE_MPI_LOADER(2, float);
46+
INSTANTIATE_MPI_LOADER(3, float);
47+
INSTANTIATE_MPI_LOADER(4, float);
48+
INSTANTIATE_MPI_LOADER(5, float);
49+
INSTANTIATE_MPI_LOADER(6, float);
50+
INSTANTIATE_MPI_LOADER(2, double);
51+
INSTANTIATE_MPI_LOADER(3, double);
52+
INSTANTIATE_MPI_LOADER(4, double);
53+
INSTANTIATE_MPI_LOADER(5, double);
54+
INSTANTIATE_MPI_LOADER(6, double);
4455
#undef INSTANTIATE_LOADER
4556
#endif
4657

benchmarks/cluster/data.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ namespace ArborXBenchmark
2828

2929
int getDataDimension(std::string const &filename, bool binary);
3030

31-
template <int DIM, typename MemorySpace>
32-
Kokkos::View<ArborX::Point<DIM> *, MemorySpace>
31+
template <int DIM, typename Coordinate, typename MemorySpace>
32+
Kokkos::View<ArborX::Point<DIM, Coordinate> *, MemorySpace>
3333
loadData(ArborXBenchmark::Parameters const &params);
3434

3535
#ifdef ARBORX_ENABLE_MPI
36-
template <int DIM, typename MemorySpace>
37-
Kokkos::View<ArborX::Point<DIM> *, MemorySpace>
36+
template <int DIM, typename Coordinate, typename MemorySpace>
37+
Kokkos::View<ArborX::Point<DIM, Coordinate> *, MemorySpace>
3838
loadData(MPI_Comm comm, ArborXBenchmark::Parameters const &params);
3939
#endif
4040

benchmarks/cluster/data_timpl.hpp

Lines changed: 71 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,10 @@ namespace ArborXBenchmark
3232

3333
using ArborX::Point;
3434

35-
template <int DIM>
36-
std::vector<Point<DIM>> sampleData(std::vector<Point<DIM>> const &data,
37-
int num_samples)
35+
template <typename Point>
36+
auto sampleData(std::vector<Point> const &data, int num_samples)
3837
{
39-
std::vector<Point<DIM>> sampled_data(num_samples);
38+
std::vector<Point> sampled_data(num_samples);
4039

4140
// We use a hardcoded Lehmer (or Park-Miller) random generator instead of C++
4241
// <random> to guarantee sampling reproducibility across platforms and
@@ -62,10 +61,9 @@ std::vector<Point<DIM>> sampleData(std::vector<Point<DIM>> const &data,
6261
return sampled_data;
6362
}
6463

65-
template <int DIM>
66-
std::vector<Point<DIM>> loadData(std::string const &filename,
67-
bool binary = true, int max_num_points = -1,
68-
int comm_rank = 0, int comm_size = 1)
64+
template <typename Point>
65+
auto loadData(std::string const &filename, bool binary = true,
66+
int max_num_points = -1, int comm_rank = 0, int comm_size = 1)
6967
{
7068
if (comm_size > 1 && !binary)
7169
throw std::runtime_error(
@@ -82,7 +80,7 @@ std::vector<Point<DIM>> loadData(std::string const &filename,
8280
input.open(filename, std::ifstream::binary);
8381
ARBORX_ASSERT(input.good());
8482

85-
std::vector<Point<DIM>> v;
83+
std::vector<Point> v;
8684

8785
int num_points = 0;
8886
int dim = 0;
@@ -97,6 +95,9 @@ std::vector<Point<DIM>> loadData(std::string const &filename,
9795
input.read(reinterpret_cast<char *>(&dim), sizeof(int));
9896
}
9997

98+
constexpr int DIM = ArborX::GeometryTraits::dimension_v<Point>;
99+
using Coordinate = ArborX::GeometryTraits::coordinate_type_t<Point>;
100+
100101
ARBORX_ASSERT(dim == DIM);
101102

102103
if (max_num_points > 0 && max_num_points < num_points)
@@ -118,9 +119,19 @@ std::vector<Point<DIM>> loadData(std::string const &filename,
118119
else
119120
{
120121
// Directly read into a point
121-
auto const value_size = sizeof(Point<DIM>);
122+
auto const value_size = dim * sizeof(float);
122123
input.seekg(num_points_per_proc * comm_rank * value_size, std::ios::cur);
123-
input.read(reinterpret_cast<char *>(v.data()), num_points * value_size);
124+
if constexpr (std::is_same_v<Coordinate, float>)
125+
input.read(reinterpret_cast<char *>(v.data()), num_points * value_size);
126+
else
127+
{
128+
// We need to read into a temporary buffer and convert to the output type
129+
std::vector<ArborX::Point<DIM, float>> tmp(num_points);
130+
input.read(reinterpret_cast<char *>(tmp.data()), num_points * value_size);
131+
for (int i = 0; i < num_points; ++i)
132+
for (int d = 0; d < DIM; ++d)
133+
v[i][d] = tmp[i][d];
134+
}
124135
}
125136
input.close();
126137

@@ -131,59 +142,63 @@ std::vector<Point<DIM>> loadData(std::string const &filename,
131142
return v;
132143
}
133144

134-
template <int DIM, typename Generator>
135-
auto randomDomainPoint(Generator &generator, float L)
145+
template <typename Point, typename Generator>
146+
auto randomDomainPoint(Generator &generator, double L)
136147
{
137-
std::uniform_real_distribution<float> distribution(0.f, 1.f);
148+
constexpr int DIM = ArborX::GeometryTraits::dimension_v<Point>;
149+
150+
std::uniform_real_distribution<double> distribution(0, 1);
138151
auto rd = [&distribution, &generator]() { return distribution(generator); };
139152

140-
Point<DIM> point;
153+
Point point;
141154
for (int d = 0; d < DIM; ++d)
142155
point[d] = rd() * L;
143156

144157
return point;
145158
}
146159

147-
template <int DIM, typename Generator>
148-
auto randomBallPoint(Generator &generator, Point<DIM> const &center,
149-
float radius)
160+
template <typename Point, typename Generator>
161+
auto randomBallPoint(Generator &generator, Point const &center, double radius)
150162
{
151-
std::uniform_real_distribution<float> distribution(-1.f, 1.f);
163+
constexpr int DIM = ArborX::GeometryTraits::dimension_v<Point>;
164+
165+
std::uniform_real_distribution<double> distribution(-1, 1);
152166
auto rd = [&distribution, &generator]() { return distribution(generator); };
153167

154-
Point<DIM> p;
155-
float norm2;
168+
Point p;
169+
double norm2;
156170
do
157171
{
158-
norm2 = 0.f;
172+
norm2 = 0;
159173
for (int d = 0; d < DIM; ++d)
160174
{
161175
p[d] = rd();
162176
norm2 += p[d] * p[d];
163177
}
164-
} while (norm2 > 1.f);
178+
} while (norm2 > 1);
165179
for (int d = 0; d < DIM; ++d)
166180
p[d] = center[d] + p[d] * radius;
167181
return p;
168182
}
169183

170-
template <int DIM, typename Generator>
171-
auto randomShiftPoint(Generator &generator, Point<DIM> const &center,
172-
float radius)
184+
template <typename Point, typename Generator>
185+
auto randomShiftPoint(Generator &generator, Point const &center, float radius)
173186
{
174-
std::normal_distribution<float> distribution(0.f, 1.f);
187+
constexpr int DIM = ArborX::GeometryTraits::dimension_v<Point>;
188+
189+
std::normal_distribution<double> distribution(0, 1);
175190
auto rd = [&distribution, &generator]() { return distribution(generator); };
176191

177-
Point<DIM> direction;
178-
float norm = 0.f;
192+
Point direction;
193+
double norm = 0;
179194
for (int d = 0; d < DIM; ++d)
180195
{
181196
direction[d] = rd();
182197
norm += direction[d] * direction[d];
183198
}
184199
norm = std::sqrt(norm);
185200

186-
Point<DIM> p;
201+
Point p;
187202
for (int d = 0; d < DIM; ++d)
188203
p[d] = center[d] + (direction[d] / norm) * radius;
189204

@@ -234,13 +249,14 @@ auto randomShiftPoint(Generator &generator, Point<DIM> const &center,
234249
//
235250
// [1] J. Gan and Y. Tao. "On the hardness and approximation of Euclidean
236251
// DBSCAN." ACM Transactions on Database Systems (TODS), 2017.
237-
template <int DIM>
238-
std::vector<Point<DIM>> GanTao(int n, bool variable_density = false,
239-
int num_clusters = 10, int c_reset = 100,
240-
float rho_noise = 1e-4)
252+
template <typename Point>
253+
auto GanTao(int n, bool variable_density = false, int num_clusters = 10,
254+
int c_reset = 100, double rho_noise = 1e-4)
241255
{
256+
constexpr int DIM = ArborX::GeometryTraits::dimension_v<Point>;
257+
242258
// FIXME
243-
float const L = 1e6;
259+
double const L = 1e6;
244260
int const n_wo_noise = n - (n * rho_noise);
245261
int const num_different_densities = (variable_density ? 10 : 1);
246262
double const rho_restart = double(num_clusters - 1) / n_wo_noise;
@@ -257,20 +273,20 @@ std::vector<Point<DIM>> GanTao(int n, bool variable_density = false,
257273
};
258274

259275
auto random_point_in_domain = [&generator_center, L]() {
260-
return randomDomainPoint<DIM>(generator_center, L);
276+
return randomDomainPoint<Point>(generator_center, L);
261277
};
262-
auto random_point_shift = [&generator_shift](auto const &c, float r) {
278+
auto random_point_shift = [&generator_shift](auto const &c, double r) {
263279
return randomShiftPoint(generator_shift, c, r);
264280
};
265-
auto random_point_in_ball = [&generator_ball](auto const &c, float r) {
281+
auto random_point_in_ball = [&generator_ball](auto const &c, double r) {
266282
return randomBallPoint(generator_ball, c, r);
267283
};
268284

269-
std::vector<Point<DIM>> points(n);
285+
std::vector<Point> points(n);
270286

271-
Point<DIM> origin;
272-
float r_vicinity;
273-
float r_shift;
287+
Point origin;
288+
double r_vicinity;
289+
double r_shift;
274290
int count;
275291
bool do_restart = true;
276292
int num_restarts = 0;
@@ -329,10 +345,12 @@ auto vec2view(std::vector<T> const &in, std::string const &label = "")
329345
return out;
330346
}
331347

332-
template <int DIM, typename MemorySpace>
333-
Kokkos::View<ArborX::Point<DIM> *, MemorySpace>
348+
template <int DIM, typename Coordinate, typename MemorySpace>
349+
Kokkos::View<ArborX::Point<DIM, Coordinate> *, MemorySpace>
334350
loadData(ArborXBenchmark::Parameters const &params)
335351
{
352+
using Point = ArborX::Point<DIM, Coordinate>;
353+
336354
if (!params.filename.empty())
337355
{
338356
// Read in data
@@ -344,7 +362,7 @@ loadData(ArborXBenchmark::Parameters const &params)
344362
(params.binary ? "binary" : "text"), max_num_points);
345363
printf("samples : %d\n", num_samples);
346364

347-
auto v = loadData<DIM>(filename, params.binary, max_num_points);
365+
auto v = loadData<Point>(filename, params.binary, max_num_points);
348366
if (num_samples > 0 && num_samples < (int)v.size())
349367
v = sampleData(v, num_samples);
350368

@@ -355,15 +373,17 @@ loadData(ArborXBenchmark::Parameters const &params)
355373
int dim = params.dim;
356374
printf("generator : n = %d, dim = %d, density = %s\n", params.n, dim,
357375
(params.variable_density ? "variable" : "constant"));
358-
return vec2view<MemorySpace>(GanTao<DIM>(params.n, params.variable_density),
376+
return vec2view<MemorySpace>(GanTao<Point>(params.n, params.variable_density),
359377
"Benchmark::primitives");
360378
}
361379

362380
#ifdef ARBORX_ENABLE_MPI
363-
template <int DIM, typename MemorySpace>
364-
Kokkos::View<ArborX::Point<DIM> *, MemorySpace>
381+
template <int DIM, typename Coordinate, typename MemorySpace>
382+
Kokkos::View<ArborX::Point<DIM, Coordinate> *, MemorySpace>
365383
loadData(MPI_Comm comm, ArborXBenchmark::Parameters const &params)
366384
{
385+
using Point = ArborX::Point<DIM, Coordinate>;
386+
367387
int comm_rank;
368388
MPI_Comm_rank(comm, &comm_rank);
369389

@@ -380,8 +400,8 @@ loadData(MPI_Comm comm, ArborXBenchmark::Parameters const &params)
380400

381401
int comm_size;
382402
MPI_Comm_size(comm, &comm_size);
383-
auto v = loadData<DIM>(filename, params.binary, max_num_points, comm_rank,
384-
comm_size);
403+
auto v = loadData<Point>(filename, params.binary, max_num_points, comm_rank,
404+
comm_size);
385405
return vec2view<MemorySpace>(v, "Benchmark::primitives");
386406
}
387407

@@ -390,7 +410,7 @@ loadData(MPI_Comm comm, ArborXBenchmark::Parameters const &params)
390410
if (comm_rank == 0)
391411
printf("generator : n = %d, dim = %d, density = %s\n", params.n,
392412
dim, (params.variable_density ? "variable" : "constant"));
393-
return vec2view<MemorySpace>(GanTao<DIM>(params.n, params.variable_density),
413+
return vec2view<MemorySpace>(GanTao<Point>(params.n, params.variable_density),
394414
"Benchmark::primitives");
395415
}
396416
#endif

0 commit comments

Comments
 (0)