|
78 | 78 | #undef None |
79 | 79 | #endif |
80 | 80 |
|
| 81 | +// Handle restricted pointers |
| 82 | +#if defined(__GNUC__) || defined(__clang__) |
| 83 | +# define NANOFLANN_RESTRICT __restrict__ |
| 84 | +#elif defined(_MSC_VER) |
| 85 | +# define NANOFLANN_RESTRICT __restrict |
| 86 | +#else |
| 87 | +# define NANOFLANN_RESTRICT |
| 88 | +#endif |
| 89 | + |
81 | 90 | namespace nanoflann |
82 | 91 | { |
83 | 92 | /** @addtogroup nanoflann_grp nanoflann C++ library for KD-trees |
@@ -506,7 +515,7 @@ struct L1_Adaptor |
506 | 515 | L1_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} |
507 | 516 |
|
508 | 517 | inline DistanceType evalMetric( |
509 | | - const T* __restrict a, const IndexType b_idx, size_t size, |
| 518 | + const T* NANOFLANN_RESTRICT a, const IndexType b_idx, size_t size, |
510 | 519 | DistanceType worst_dist = -1) const |
511 | 520 | { |
512 | 521 | DistanceType result = DistanceType(); |
@@ -590,7 +599,7 @@ struct L2_Adaptor |
590 | 599 | L2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {} |
591 | 600 |
|
592 | 601 | inline DistanceType evalMetric( |
593 | | - const T* __restrict a, const IndexType b_idx, size_t size, |
| 602 | + const T* NANOFLANN_RESTRICT a, const IndexType b_idx, size_t size, |
594 | 603 | DistanceType worst_dist = -1) const |
595 | 604 | { |
596 | 605 | DistanceType result = DistanceType(); |
@@ -1075,10 +1084,19 @@ class KDTreeBaseClass |
1075 | 1084 | using Size = typename decltype(vAcc_)::size_type; |
1076 | 1085 | using Dimension = int32_t; |
1077 | 1086 |
|
1078 | | - /*--------------------------- |
| 1087 | + /*------------------------------------------------------------------- |
1079 | 1088 | * Internal Data Structures |
1080 | | - * --------------------------*/ |
1081 | | - struct /*alignas(64)*/ Node |
| 1089 | + * |
| 1090 | + * "Node" below can be declared with alignas(N) to improve |
| 1091 | + * cache friendliness and SIMD load/store performance. |
| 1092 | + * |
| 1093 | + * The optimal N depends on the underlying hardware: |
| 1094 | + * + Intel x86-64: 16 for SSE, 32 for AVX/AVX2 and 64 for AVX-512 |
| 1095 | + * + NVIDIA Jetson: 16 for ARM + NEON and CUDA float4/ |
| 1096 | + * To avoid unnecessary padding, the smallest alignment |
| 1097 | + * compatible with a platform's vector width should be chosen. |
| 1098 | + * ------------------------------------------------------------------*/ |
| 1099 | + struct /*alignas(N)*/ Node |
1082 | 1100 | { |
1083 | 1101 | /** Union used because a node can be either a LEAF node or a non-leaf |
1084 | 1102 | * node, so both data fields are never used simultaneously */ |
@@ -2858,3 +2876,5 @@ struct KDTreeEigenMatrixAdaptor |
2858 | 2876 |
|
2859 | 2877 | /** @} */ // end of grouping |
2860 | 2878 | } // namespace nanoflann |
| 2879 | + |
| 2880 | +#undef NANOFLANN_RESTRICT |
0 commit comments