@@ -245,14 +245,14 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
245245 simsimd_size_t xor_count ;
246246 // It's harder to squeeze out performance from tiny representations, so we unroll the loops for binary metrics.
247247 if (n_words <= 64 ) { // Up to 512 bits.
248- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words );
248+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words );
249249 __m512i a_vec = _mm512_maskz_loadu_epi8 (mask , a );
250250 __m512i b_vec = _mm512_maskz_loadu_epi8 (mask , b );
251251 __m512i xor_count_vec = _mm512_popcnt_epi64 (_mm512_xor_si512 (a_vec , b_vec ));
252252 xor_count = _mm512_reduce_add_epi64 (xor_count_vec );
253253 }
254254 else if (n_words <= 128 ) { // Up to 1024 bits.
255- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words - 64 );
255+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words - 64 );
256256 __m512i a1_vec = _mm512_loadu_epi8 (a );
257257 __m512i b1_vec = _mm512_loadu_epi8 (b );
258258 __m512i a2_vec = _mm512_maskz_loadu_epi8 (mask , a + 64 );
@@ -262,7 +262,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
262262 xor_count = _mm512_reduce_add_epi64 (_mm512_add_epi64 (xor2_count_vec , xor1_count_vec ));
263263 }
264264 else if (n_words <= 196 ) { // Up to 1568 bits.
265- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words - 128 );
265+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words - 128 );
266266 __m512i a1_vec = _mm512_loadu_epi8 (a );
267267 __m512i b1_vec = _mm512_loadu_epi8 (b );
268268 __m512i a2_vec = _mm512_loadu_epi8 (a + 64 );
@@ -276,7 +276,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
276276 _mm512_reduce_add_epi64 (_mm512_add_epi64 (xor3_count_vec , _mm512_add_epi64 (xor2_count_vec , xor1_count_vec )));
277277 }
278278 else if (n_words <= 256 ) { // Up to 2048 bits.
279- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words - 192 );
279+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words - 192 );
280280 __m512i a1_vec = _mm512_loadu_epi8 (a );
281281 __m512i b1_vec = _mm512_loadu_epi8 (b );
282282 __m512i a2_vec = _mm512_loadu_epi8 (a + 64 );
@@ -298,7 +298,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
298298
299299 simsimd_hamming_b8_ice_cycle :
300300 if (n_words < 64 ) {
301- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words );
301+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words );
302302 a_vec = _mm512_maskz_loadu_epi8 (mask , a );
303303 b_vec = _mm512_maskz_loadu_epi8 (mask , b );
304304 n_words = 0 ;
@@ -336,7 +336,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
336336 //
337337 // It's harder to squeeze out performance from tiny representations, so we unroll the loops for binary metrics.
338338 if (n_words <= 64 ) { // Up to 512 bits.
339- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words );
339+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words );
340340 __m512i a_vec = _mm512_maskz_loadu_epi8 (mask , a );
341341 __m512i b_vec = _mm512_maskz_loadu_epi8 (mask , b );
342342 __m512i and_count_vec = _mm512_popcnt_epi64 (_mm512_and_si512 (a_vec , b_vec ));
@@ -345,7 +345,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
345345 union_ = _mm512_reduce_add_epi64 (or_count_vec );
346346 }
347347 else if (n_words <= 128 ) { // Up to 1024 bits.
348- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words - 64 );
348+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words - 64 );
349349 __m512i a1_vec = _mm512_loadu_epi8 (a );
350350 __m512i b1_vec = _mm512_loadu_epi8 (b );
351351 __m512i a2_vec = _mm512_maskz_loadu_epi8 (mask , a + 64 );
@@ -358,7 +358,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
358358 union_ = _mm512_reduce_add_epi64 (_mm512_add_epi64 (or2_count_vec , or1_count_vec ));
359359 }
360360 else if (n_words <= 196 ) { // Up to 1568 bits.
361- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words - 128 );
361+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words - 128 );
362362 __m512i a1_vec = _mm512_loadu_epi8 (a );
363363 __m512i b1_vec = _mm512_loadu_epi8 (b );
364364 __m512i a2_vec = _mm512_loadu_epi8 (a + 64 );
@@ -377,7 +377,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
377377 _mm512_add_epi64 (or3_count_vec , _mm512_add_epi64 (or2_count_vec , or1_count_vec )));
378378 }
379379 else if (n_words <= 256 ) { // Up to 2048 bits.
380- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words - 192 );
380+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words - 192 );
381381 __m512i a1_vec = _mm512_loadu_epi8 (a );
382382 __m512i b1_vec = _mm512_loadu_epi8 (b );
383383 __m512i a2_vec = _mm512_loadu_epi8 (a + 64 );
@@ -405,7 +405,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
405405
406406 simsimd_jaccard_b8_ice_cycle :
407407 if (n_words < 64 ) {
408- __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFF , n_words );
408+ __mmask64 mask = (__mmask64 )_bzhi_u64 (0xFFFFFFFFFFFFFFFFull , n_words );
409409 a_vec = _mm512_maskz_loadu_epi8 (mask , a );
410410 b_vec = _mm512_maskz_loadu_epi8 (mask , b );
411411 n_words = 0 ;
0 commit comments