Skip to content

Commit e089626

Browse files
committed
Improve: Unsigned type literals for masks
1 parent 463e8f3 commit e089626

File tree

3 files changed

+15
-15
lines changed

3 files changed

+15
-15
lines changed

include/simsimd/binary.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -245,14 +245,14 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
245245
simsimd_size_t xor_count;
246246
// It's harder to squeeze out performance from tiny representations, so we unroll the loops for binary metrics.
247247
if (n_words <= 64) { // Up to 512 bits.
248-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words);
248+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words);
249249
__m512i a_vec = _mm512_maskz_loadu_epi8(mask, a);
250250
__m512i b_vec = _mm512_maskz_loadu_epi8(mask, b);
251251
__m512i xor_count_vec = _mm512_popcnt_epi64(_mm512_xor_si512(a_vec, b_vec));
252252
xor_count = _mm512_reduce_add_epi64(xor_count_vec);
253253
}
254254
else if (n_words <= 128) { // Up to 1024 bits.
255-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 64);
255+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words - 64);
256256
__m512i a1_vec = _mm512_loadu_epi8(a);
257257
__m512i b1_vec = _mm512_loadu_epi8(b);
258258
__m512i a2_vec = _mm512_maskz_loadu_epi8(mask, a + 64);
@@ -262,7 +262,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
262262
xor_count = _mm512_reduce_add_epi64(_mm512_add_epi64(xor2_count_vec, xor1_count_vec));
263263
}
264264
else if (n_words <= 196) { // Up to 1568 bits.
265-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 128);
265+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words - 128);
266266
__m512i a1_vec = _mm512_loadu_epi8(a);
267267
__m512i b1_vec = _mm512_loadu_epi8(b);
268268
__m512i a2_vec = _mm512_loadu_epi8(a + 64);
@@ -276,7 +276,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
276276
_mm512_reduce_add_epi64(_mm512_add_epi64(xor3_count_vec, _mm512_add_epi64(xor2_count_vec, xor1_count_vec)));
277277
}
278278
else if (n_words <= 256) { // Up to 2048 bits.
279-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 192);
279+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words - 192);
280280
__m512i a1_vec = _mm512_loadu_epi8(a);
281281
__m512i b1_vec = _mm512_loadu_epi8(b);
282282
__m512i a2_vec = _mm512_loadu_epi8(a + 64);
@@ -298,7 +298,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
298298

299299
simsimd_hamming_b8_ice_cycle:
300300
if (n_words < 64) {
301-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words);
301+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words);
302302
a_vec = _mm512_maskz_loadu_epi8(mask, a);
303303
b_vec = _mm512_maskz_loadu_epi8(mask, b);
304304
n_words = 0;
@@ -336,7 +336,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
336336
//
337337
// It's harder to squeeze out performance from tiny representations, so we unroll the loops for binary metrics.
338338
if (n_words <= 64) { // Up to 512 bits.
339-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words);
339+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words);
340340
__m512i a_vec = _mm512_maskz_loadu_epi8(mask, a);
341341
__m512i b_vec = _mm512_maskz_loadu_epi8(mask, b);
342342
__m512i and_count_vec = _mm512_popcnt_epi64(_mm512_and_si512(a_vec, b_vec));
@@ -345,7 +345,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
345345
union_ = _mm512_reduce_add_epi64(or_count_vec);
346346
}
347347
else if (n_words <= 128) { // Up to 1024 bits.
348-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 64);
348+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words - 64);
349349
__m512i a1_vec = _mm512_loadu_epi8(a);
350350
__m512i b1_vec = _mm512_loadu_epi8(b);
351351
__m512i a2_vec = _mm512_maskz_loadu_epi8(mask, a + 64);
@@ -358,7 +358,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
358358
union_ = _mm512_reduce_add_epi64(_mm512_add_epi64(or2_count_vec, or1_count_vec));
359359
}
360360
else if (n_words <= 196) { // Up to 1568 bits.
361-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 128);
361+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words - 128);
362362
__m512i a1_vec = _mm512_loadu_epi8(a);
363363
__m512i b1_vec = _mm512_loadu_epi8(b);
364364
__m512i a2_vec = _mm512_loadu_epi8(a + 64);
@@ -377,7 +377,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
377377
_mm512_add_epi64(or3_count_vec, _mm512_add_epi64(or2_count_vec, or1_count_vec)));
378378
}
379379
else if (n_words <= 256) { // Up to 2048 bits.
380-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 192);
380+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words - 192);
381381
__m512i a1_vec = _mm512_loadu_epi8(a);
382382
__m512i b1_vec = _mm512_loadu_epi8(b);
383383
__m512i a2_vec = _mm512_loadu_epi8(a + 64);
@@ -405,7 +405,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
405405

406406
simsimd_jaccard_b8_ice_cycle:
407407
if (n_words < 64) {
408-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words);
408+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words);
409409
a_vec = _mm512_maskz_loadu_epi8(mask, a);
410410
b_vec = _mm512_maskz_loadu_epi8(mask, b);
411411
n_words = 0;

include/simsimd/dot.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,7 @@ SIMSIMD_PUBLIC void simsimd_dot_u8_ice(simsimd_u8_t const *a, simsimd_u8_t const
17921792

17931793
simsimd_dot_u8_ice_cycle:
17941794
if (n < 64) {
1795-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n);
1795+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n);
17961796
a_u8_vec = _mm512_maskz_loadu_epi8(mask, a);
17971797
b_u8_vec = _mm512_maskz_loadu_epi8(mask, b);
17981798
n = 0;

include/simsimd/spatial.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1975,7 +1975,7 @@ SIMSIMD_PUBLIC void simsimd_l2sq_u8_ice(simsimd_u8_t const *a, simsimd_u8_t cons
19751975

19761976
simsimd_l2sq_u8_ice_cycle:
19771977
if (n < 64) {
1978-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n);
1978+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n);
19791979
a_u8_vec = _mm512_maskz_loadu_epi8(mask, a);
19801980
b_u8_vec = _mm512_maskz_loadu_epi8(mask, b);
19811981
n = 0;
@@ -2014,7 +2014,7 @@ SIMSIMD_PUBLIC void simsimd_cos_u8_ice(simsimd_u8_t const *a, simsimd_u8_t const
20142014

20152015
simsimd_cos_u8_ice_cycle:
20162016
if (n < 64) {
2017-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n);
2017+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n);
20182018
a_u8_vec = _mm512_maskz_loadu_epi8(mask, a);
20192019
b_u8_vec = _mm512_maskz_loadu_epi8(mask, b);
20202020
n = 0;
@@ -2085,7 +2085,7 @@ SIMSIMD_PUBLIC void simsimd_l2sq_i4x2_ice(simsimd_i4x2_t const *a, simsimd_i4x2_
20852085

20862086
simsimd_l2sq_i4x2_ice_cycle:
20872087
if (n_words < 64) {
2088-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words);
2088+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words);
20892089
a_i4x2_vec = _mm512_maskz_loadu_epi8(mask, a);
20902090
b_i4x2_vec = _mm512_maskz_loadu_epi8(mask, b);
20912091
n_words = 0;
@@ -2199,7 +2199,7 @@ SIMSIMD_PUBLIC void simsimd_cos_i4x2_ice(simsimd_i4x2_t const *a, simsimd_i4x2_t
21992199

22002200
simsimd_cos_i4x2_ice_cycle:
22012201
if (n_words < 64) {
2202-
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words);
2202+
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFFull, n_words);
22032203
a_i4x2_vec = _mm512_maskz_loadu_epi8(mask, a);
22042204
b_i4x2_vec = _mm512_maskz_loadu_epi8(mask, b);
22052205
n_words = 0;

0 commit comments

Comments
 (0)