Skip to content

Commit 79c20b1

Browse files
author
mlourakis
committed
added NANOFLANN_RESTRICT and alignas() comment
1 parent a330885 commit 79c20b1

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

include/nanoflann.hpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,15 @@
7878
#undef None
7979
#endif
8080

81+
// Handle restricted pointers
82+
#if defined(__GNUC__) || defined(__clang__)
83+
# define NANOFLANN_RESTRICT __restrict__
84+
#elif defined(_MSC_VER)
85+
# define NANOFLANN_RESTRICT __restrict
86+
#else
87+
# define NANOFLANN_RESTRICT
88+
#endif
89+
8190
namespace nanoflann
8291
{
8392
/** @addtogroup nanoflann_grp nanoflann C++ library for KD-trees
@@ -506,7 +515,7 @@ struct L1_Adaptor
506515
L1_Adaptor(const DataSource& _data_source) : data_source(_data_source) {}
507516

508517
inline DistanceType evalMetric(
509-
const T* __restrict a, const IndexType b_idx, size_t size,
518+
const T* NANOFLANN_RESTRICT a, const IndexType b_idx, size_t size,
510519
DistanceType worst_dist = -1) const
511520
{
512521
DistanceType result = DistanceType();
@@ -590,7 +599,7 @@ struct L2_Adaptor
590599
L2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {}
591600

592601
inline DistanceType evalMetric(
593-
const T* __restrict a, const IndexType b_idx, size_t size,
602+
const T* NANOFLANN_RESTRICT a, const IndexType b_idx, size_t size,
594603
DistanceType worst_dist = -1) const
595604
{
596605
DistanceType result = DistanceType();
@@ -1075,10 +1084,19 @@ class KDTreeBaseClass
10751084
using Size = typename decltype(vAcc_)::size_type;
10761085
using Dimension = int32_t;
10771086

1078-
/*---------------------------
1087+
/*-------------------------------------------------------------------
10791088
* Internal Data Structures
1080-
* --------------------------*/
1081-
struct /*alignas(64)*/ Node
1089+
*
1090+
* "Node" below can be declared with alignas(N) to improve
1091+
* cache friendliness and SIMD load/store performance.
1092+
*
1093+
* The optimal N depends on the underlying hardware:
1094+
* + Intel x86-64: 16 for SSE, 32 for AVX/AVX2 and 64 for AVX-512
1095+
* + NVIDIA Jetson: 16 for ARM + NEON and CUDA float4/
1096+
* To avoid unnecessary padding, the smallest alignment
1097+
* compatible with a platform's vector width should be chosen.
1098+
* ------------------------------------------------------------------*/
1099+
struct /*alignas(N)*/ Node
10821100
{
10831101
/** Union used because a node can be either a LEAF node or a non-leaf
10841102
* node, so both data fields are never used simultaneously */
@@ -2858,3 +2876,5 @@ struct KDTreeEigenMatrixAdaptor
28582876

28592877
/** @} */ // end of grouping
28602878
} // namespace nanoflann
2879+
2880+
#undef NANOFLANN_RESTRICT

0 commit comments

Comments
 (0)