@@ -81,18 +81,18 @@ template<>
81
81
inline float euclidean_distance_squared<distance_comp_l2, float , float >(
82
82
float const * a, float const * b, size_t n) {
83
83
84
- int n_rounded = n - (n % 4 );
84
+ size_t n_rounded = n - (n % 4 );
85
85
86
86
float32x4_t vreg_dsum = vdupq_n_f32 (0 .f );
87
- for (int i = 0 ; i < n_rounded; i += 4 ) {
87
+ for (size_t i = 0 ; i < n_rounded; i += 4 ) {
88
88
float32x4_t vreg_a = vld1q_f32 (&a[i]);
89
89
float32x4_t vreg_b = vld1q_f32 (&b[i]);
90
90
float32x4_t vreg_d = vsubq_f32 (vreg_a, vreg_b);
91
91
vreg_dsum = vfmaq_f32 (vreg_dsum, vreg_d, vreg_d);
92
92
}
93
93
94
94
float dsum = vaddvq_f32 (vreg_dsum);
95
- for (int i = n_rounded; i < n; ++i) {
95
+ for (size_t i = n_rounded; i < n; ++i) {
96
96
float d = a[i] - b[i];
97
97
dsum += d * d;
98
98
}
@@ -104,7 +104,7 @@ template<>
104
104
inline float euclidean_distance_squared<distance_comp_l2, float , ::std::int8_t >(
105
105
::std::int8_t const * a, ::std::int8_t const * b, size_t n) {
106
106
107
- int n_rounded = n - (n % 16 );
107
+ size_t n_rounded = n - (n % 16 );
108
108
float dsum = 0 .f ;
109
109
110
110
if (n_rounded > 0 ) {
@@ -113,7 +113,7 @@ inline float euclidean_distance_squared<distance_comp_l2, float, ::std::int8_t>(
113
113
float32x4_t vreg_dsum_fp32_2 = vreg_dsum_fp32_0;
114
114
float32x4_t vreg_dsum_fp32_3 = vreg_dsum_fp32_0;
115
115
116
- for (int i = 0 ; i < n_rounded; i += 16 ) {
116
+ for (size_t i = 0 ; i < n_rounded; i += 16 ) {
117
117
int8x16_t vreg_a = vld1q_s8 (&a[i]);
118
118
int16x8_t vreg_a_s16_0 = vmovl_s8 (vget_low_s8 (vreg_a));
119
119
int16x8_t vreg_a_s16_1 = vmovl_s8 (vget_high_s8 (vreg_a));
@@ -143,7 +143,7 @@ inline float euclidean_distance_squared<distance_comp_l2, float, ::std::int8_t>(
143
143
dsum = vaddvq_f32 (vreg_dsum_fp32_0); // faddp
144
144
}
145
145
146
- for (int i = n_rounded; i < n; ++i) {
146
+ for (size_t i = n_rounded; i < n; ++i) {
147
147
float d = a[i] - b[i];
148
148
dsum += d * d; // [nvc++] faddp, [clang] fadda, [gcc] vecsum+fadda
149
149
}
@@ -155,7 +155,7 @@ template<>
155
155
inline float euclidean_distance_squared<distance_comp_l2, float , ::std::uint8_t >(
156
156
::std::uint8_t const * a, ::std::uint8_t const * b, size_t n) {
157
157
158
- int n_rounded = n - (n % 16 );
158
+ size_t n_rounded = n - (n % 16 );
159
159
float dsum = 0 .f ;
160
160
161
161
if (n_rounded > 0 ) {
@@ -164,7 +164,7 @@ inline float euclidean_distance_squared<distance_comp_l2, float, ::std::uint8_t>
164
164
float32x4_t vreg_dsum_fp32_2 = vreg_dsum_fp32_0;
165
165
float32x4_t vreg_dsum_fp32_3 = vreg_dsum_fp32_0;
166
166
167
- for (int i = 0 ; i < n_rounded; i += 16 ) {
167
+ for (size_t i = 0 ; i < n_rounded; i += 16 ) {
168
168
uint8x16_t vreg_a = vld1q_u8 (&a[i]);
169
169
uint16x8_t vreg_a_u16_0 = vmovl_u8 (vget_low_u8 (vreg_a));
170
170
uint16x8_t vreg_a_u16_1 = vmovl_u8 (vget_high_u8 (vreg_a));
@@ -199,7 +199,7 @@ inline float euclidean_distance_squared<distance_comp_l2, float, ::std::uint8_t>
199
199
dsum = vaddvq_f32 (vreg_dsum_fp32_0); // faddp
200
200
}
201
201
202
- for (int i = n_rounded; i < n; ++i) {
202
+ for (size_t i = n_rounded; i < n; ++i) {
203
203
float d = a[i] - b[i];
204
204
dsum += d * d; // [nvc++] faddp, [clang] fadda, [gcc] vecsum+fadda
205
205
}
@@ -211,18 +211,18 @@ template<>
211
211
inline float euclidean_distance_squared<distance_comp_inner, float , float >(
212
212
float const * a, float const * b, size_t n) {
213
213
214
- int n_rounded = n - (n % 4 );
214
+ size_t n_rounded = n - (n % 4 );
215
215
216
216
float32x4_t vreg_dsum = vdupq_n_f32 (0 .f );
217
- for (int i = 0 ; i < n_rounded; i += 4 ) {
217
+ for (size_t i = 0 ; i < n_rounded; i += 4 ) {
218
218
float32x4_t vreg_a = vld1q_f32 (&a[i]);
219
219
float32x4_t vreg_b = vld1q_f32 (&b[i]);
220
220
vreg_a = vnegq_f32 (vreg_a);
221
221
vreg_dsum = vfmaq_f32 (vreg_dsum, vreg_a, vreg_b);
222
222
}
223
223
224
224
float dsum = vaddvq_f32 (vreg_dsum);
225
- for (int i = n_rounded; i < n; ++i) {
225
+ for (size_t i = n_rounded; i < n; ++i) {
226
226
dsum += -a[i] * b[i];
227
227
}
228
228
@@ -233,7 +233,7 @@ template<>
233
233
inline float euclidean_distance_squared<distance_comp_inner, float , ::std::int8_t >(
234
234
::std::int8_t const * a, ::std::int8_t const * b, size_t n) {
235
235
236
- int n_rounded = n - (n % 16 );
236
+ size_t n_rounded = n - (n % 16 );
237
237
float dsum = 0 .f ;
238
238
239
239
if (n_rounded > 0 ) {
@@ -242,7 +242,7 @@ inline float euclidean_distance_squared<distance_comp_inner, float, ::std::int8_
242
242
float32x4_t vreg_dsum_fp32_2 = vreg_dsum_fp32_0;
243
243
float32x4_t vreg_dsum_fp32_3 = vreg_dsum_fp32_0;
244
244
245
- for (int i = 0 ; i < n_rounded; i += 16 ) {
245
+ for (size_t i = 0 ; i < n_rounded; i += 16 ) {
246
246
int8x16_t vreg_a = vld1q_s8 (&a[i]);
247
247
int16x8_t vreg_a_s16_0 = vmovl_s8 (vget_low_s8 (vreg_a));
248
248
int16x8_t vreg_a_s16_1 = vmovl_s8 (vget_high_s8 (vreg_a));
@@ -272,7 +272,7 @@ inline float euclidean_distance_squared<distance_comp_inner, float, ::std::int8_
272
272
dsum = vaddvq_f32 (vreg_dsum_fp32_0); // faddp
273
273
}
274
274
275
- for (int i = n_rounded; i < n; ++i) {
275
+ for (size_t i = n_rounded; i < n; ++i) {
276
276
dsum += -a[i] * b[i];
277
277
}
278
278
@@ -281,7 +281,7 @@ inline float euclidean_distance_squared<distance_comp_inner, float, ::std::int8_
281
281
282
282
template <>
283
283
inline float euclidean_distance_squared<distance_comp_inner, float , ::std::uint8_t >(::std::uint8_t const * a, ::std::uint8_t const * b, size_t n) {
284
- int n_rounded = n - (n % 16 );
284
+ size_t n_rounded = n - (n % 16 );
285
285
float dsum = 0 .f ;
286
286
287
287
if (n_rounded > 0 ) {
@@ -290,7 +290,7 @@ inline float euclidean_distance_squared<distance_comp_inner, float, ::std::uint8
290
290
float32x4_t vreg_dsum_fp32_2 = vreg_dsum_fp32_0;
291
291
float32x4_t vreg_dsum_fp32_3 = vreg_dsum_fp32_0;
292
292
293
- for (int i = 0 ; i < n_rounded; i += 16 ) {
293
+ for (size_t i = 0 ; i < n_rounded; i += 16 ) {
294
294
uint8x16_t vreg_a = vld1q_u8 (&a[i]);
295
295
uint16x8_t vreg_a_u16_0 = vmovl_u8 (vget_low_u8 (vreg_a));
296
296
uint16x8_t vreg_a_u16_1 = vmovl_u8 (vget_high_u8 (vreg_a));
@@ -320,7 +320,7 @@ inline float euclidean_distance_squared<distance_comp_inner, float, ::std::uint8
320
320
dsum = vaddvq_f32 (vreg_dsum_fp32_0); // faddp
321
321
}
322
322
323
- for (int i = n_rounded; i < n; ++i) {
323
+ for (size_t i = n_rounded; i < n; ++i) {
324
324
dsum += -a[i] * b[i];
325
325
}
326
326
0 commit comments