Skip to content

Commit b55f013

Browse files
committed
Break: Return types & 4-way finalizers
1 parent 31fda77 commit b55f013

File tree

5 files changed

+1649
-1513
lines changed

5 files changed

+1649
-1513
lines changed

include/simsimd/curved.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -204,23 +204,23 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f32_accurate(simsimd_f32_t const *a, sim
204204
simsimd_f32_t const *c, simsimd_size_t n, simsimd_f64_t *result);
205205
/** @copydoc simsimd_bilinear_f16 */
206206
SIMSIMD_PUBLIC void simsimd_bilinear_f16_accurate(simsimd_f16_t const *a, simsimd_f16_t const *b,
207-
simsimd_f16_t const *c, simsimd_size_t n, simsimd_f32_t *result);
207+
simsimd_f16_t const *c, simsimd_size_t n, simsimd_f64_t *result);
208208
/** @copydoc simsimd_bilinear_f16c */
209209
SIMSIMD_PUBLIC void simsimd_bilinear_f16c_accurate(simsimd_f16c_t const *a, simsimd_f16c_t const *b,
210-
simsimd_f16c_t const *c, simsimd_size_t n, simsimd_f32c_t *results);
210+
simsimd_f16c_t const *c, simsimd_size_t n, simsimd_f64c_t *results);
211211
/** @copydoc simsimd_mahalanobis_f16 */
212212
SIMSIMD_PUBLIC void simsimd_mahalanobis_f16_accurate(simsimd_f16_t const *a, simsimd_f16_t const *b,
213-
simsimd_f16_t const *c, simsimd_size_t n, simsimd_f32_t *result);
213+
simsimd_f16_t const *c, simsimd_size_t n, simsimd_f64_t *result);
214214
/** @copydoc simsimd_bilinear_bf16 */
215215
SIMSIMD_PUBLIC void simsimd_bilinear_bf16_accurate(simsimd_bf16_t const *a, simsimd_bf16_t const *b,
216-
simsimd_bf16_t const *c, simsimd_size_t n, simsimd_f32_t *result);
216+
simsimd_bf16_t const *c, simsimd_size_t n, simsimd_f64_t *result);
217217
/** @copydoc simsimd_bilinear_bf16c */
218218
SIMSIMD_PUBLIC void simsimd_bilinear_bf16c_accurate(simsimd_bf16c_t const *a, simsimd_bf16c_t const *b,
219219
simsimd_bf16c_t const *c, simsimd_size_t n,
220-
simsimd_f32c_t *results);
220+
simsimd_f64c_t *results);
221221
/** @copydoc simsimd_mahalanobis_bf16 */
222222
SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_accurate(simsimd_bf16_t const *a, simsimd_bf16_t const *b,
223-
simsimd_bf16_t const *c, simsimd_size_t n, simsimd_f32_t *result);
223+
simsimd_bf16_t const *c, simsimd_size_t n, simsimd_f64_t *result);
224224
#if SIMSIMD_TARGET_NEON
225225
/** @copydoc simsimd_bilinear_f32 */
226226
SIMSIMD_PUBLIC void simsimd_bilinear_f32_neon(simsimd_f32_t const *a, simsimd_f32_t const *b, simsimd_f32_t const *c,

include/simsimd/dots.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,7 @@ SIMSIMD_MAKE_DOTS_INNER(i8i8i32_neon, i8, i32, simsimd_b128_vec_t, simsimd_dot_i
968968
// U8 GEMM: k_tile=16 (16 u8s = 16 bytes = NEON register width)
969969
SIMSIMD_MAKE_DOTS_SERIAL_PACKED_SIZE(neon, u8, i32, 16)
970970
SIMSIMD_MAKE_DOTS_SERIAL_PACK(neon, u8, i32, 16)
971-
SIMSIMD_MAKE_DOTS_INNER(u8u8i32_neon, u8, i32, simsimd_b128_vec_t, simsimd_dot_u8x16_state_neon_t,
971+
SIMSIMD_MAKE_DOTS_INNER(u8u8i32_neon, u8, u32, simsimd_b128_vec_t, simsimd_dot_u8x16_state_neon_t,
972972
simsimd_dot_u8x16_init_neon, _simsimd_load_b128_neon, _simsimd_partial_load_b8x16_neon,
973973
simsimd_dot_u8x16_update_neon, simsimd_dot_u8x16_finalize_neon,
974974
/*k_tile=*/16, /*MR=*/4, /*MC=*/128, /*NC=*/2048, /*KC=*/256)
@@ -1090,7 +1090,7 @@ SIMSIMD_MAKE_DOTS_INNER(i8i8i32_haswell, i8, i32, simsimd_b256_vec_t, simsimd_do
10901090
// U8 GEMM: k_tile=32 (32 u8s = 32 bytes = AVX2 register width)
10911091
SIMSIMD_MAKE_DOTS_SERIAL_PACKED_SIZE(haswell, u8, i32, 32)
10921092
SIMSIMD_MAKE_DOTS_SERIAL_PACK(haswell, u8, i32, 32)
1093-
SIMSIMD_MAKE_DOTS_INNER(u8u8i32_haswell, u8, i32, simsimd_b256_vec_t, simsimd_dot_u8x32_state_haswell_t,
1093+
SIMSIMD_MAKE_DOTS_INNER(u8u8i32_haswell, u8, u32, simsimd_b256_vec_t, simsimd_dot_u8x32_state_haswell_t,
10941094
simsimd_dot_u8x32_init_haswell, _simsimd_load_b256_haswell, _simsimd_partial_load_b8x32_haswell,
10951095
simsimd_dot_u8x32_update_haswell, simsimd_dot_u8x32_finalize_haswell,
10961096
/*k_tile=*/32, /*MR=*/4, /*MC=*/128, /*NC=*/2048, /*KC=*/256)
@@ -1199,7 +1199,7 @@ SIMSIMD_MAKE_DOTS_INNER(i8i8i32_ice, i8, i32, simsimd_b512_vec_t, simsimd_dot_i8
11991199
// U8 GEMM: k_tile=64 (64 u8s = 64 bytes = 1 cache line)
12001200
SIMSIMD_MAKE_DOTS_SERIAL_PACKED_SIZE(ice, u8, i32, SIMSIMD_DOTS_SERIAL_TILE_K_U8)
12011201
SIMSIMD_MAKE_DOTS_SERIAL_PACK(ice, u8, i32, SIMSIMD_DOTS_SERIAL_TILE_K_U8)
1202-
SIMSIMD_MAKE_DOTS_INNER(u8u8i32_ice, u8, i32, simsimd_b512_vec_t, simsimd_dot_u8x64_state_ice_t,
1202+
SIMSIMD_MAKE_DOTS_INNER(u8u8i32_ice, u8, u32, simsimd_b512_vec_t, simsimd_dot_u8x64_state_ice_t,
12031203
simsimd_dot_u8x64_init_ice, _simsimd_load_b512_skylake, _simsimd_partial_load_b8x64_skylake,
12041204
simsimd_dot_u8x64_update_ice, simsimd_dot_u8x64_finalize_ice,
12051205
/*k_tile=*/64, /*MR=*/4, /*MC=*/128, /*NC=*/2048, /*KC=*/256)

include/simsimd/sparse.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -341,15 +341,12 @@ SIMSIMD_MAKE_SPARSE_DOT(accurate, u16, bf16, f64, simsimd_bf16_to_f64) // simsim
341341
*count = (simsimd_u32_t)intersection_size; \
342342
}
343343

344-
SIMSIMD_MAKE_INTERSECT_GALLOPING(serial, u16, size) // simsimd_intersect_u16_serial
345-
SIMSIMD_MAKE_INTERSECT_GALLOPING(serial, u32, size) // simsimd_intersect_u32_serial
346-
SIMSIMD_MAKE_SPARSE_DOT(serial, u16, bf16, f32, simsimd_bf16_to_f32) // simsimd_sparse_dot_u16bf16_serial
347-
348-
SIMSIMD_INTERNAL void _simsimd_f32_to_f32(simsimd_f32_t const *x, simsimd_f32_t *y) { *y = *x; }
349-
SIMSIMD_INTERNAL void _simsimd_f32_to_f64(simsimd_f32_t const *x, simsimd_f64_t *y) { *y = (simsimd_f64_t)*x; }
350-
351-
SIMSIMD_MAKE_SPARSE_DOT(serial, u32, f32, f32, _simsimd_f32_to_f32) // simsimd_sparse_dot_u32f32_serial
352-
SIMSIMD_MAKE_SPARSE_DOT(accurate, u32, f32, f64, _simsimd_f32_to_f64) // simsimd_sparse_dot_u32f32_accurate
344+
SIMSIMD_MAKE_INTERSECT_GALLOPING(serial, u16, size) // simsimd_intersect_u16_serial
345+
SIMSIMD_MAKE_INTERSECT_GALLOPING(serial, u32, size) // simsimd_intersect_u32_serial
346+
SIMSIMD_MAKE_SPARSE_DOT(serial, u16, bf16, f32, simsimd_bf16_to_f32) // simsimd_sparse_dot_u16bf16_serial
347+
SIMSIMD_MAKE_SPARSE_DOT(serial, u32, f32, f32, SIMSIMD_ASSIGN_FROM_TO) // simsimd_sparse_dot_u32f32_serial
348+
SIMSIMD_MAKE_SPARSE_DOT(accurate, u16, bf16, f64, _simsimd_bf16_to_f64) // simsimd_sparse_dot_u16bf16_accurate
349+
SIMSIMD_MAKE_SPARSE_DOT(accurate, u32, f32, f64, _simsimd_f32_to_f64) // simsimd_sparse_dot_u32f32_accurate
353350

354351
/* The AVX-512 implementations are inspired by the "Faster-Than-Native Alternatives
355352
* for x86 VP2INTERSECT Instructions" paper by Guille Diez-Canas, 2022.

0 commit comments

Comments
 (0)