Skip to content

Commit ae6806a

Browse files
ashvardaniankukugpt
andcommitted
Fix: Avoid LibC symbols
Closes #229 Co-authored-by: kukugpt <[email protected]>
1 parent f333bb5 commit ae6806a

File tree

2 files changed

+40
-17
lines changed

2 files changed

+40
-17
lines changed

c/lib.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
#define SIMSIMD_NATIVE_F16 0
88
#define SIMSIMD_NATIVE_BF16 0
99

10+
/* Override the primary serial operations to avoid the LibC dependency.
11+
*/
12+
#define SIMSIMD_SQRT(x) simsimd_approximate_square_root(x)
13+
#define SIMSIMD_RSQRT(x) simsimd_approximate_inverse_square_root(x)
14+
#define SIMSIMD_LOG(x) simsimd_approximate_log(x)
15+
1016
/* Depending on the Operating System, the following intrinsics are available
1117
* on recent compiler toolchains:
1218
*

include/simsimd/types.h

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
#elif defined(__GNUC__) || defined(__clang__)
3535
#define SIMSIMD_DYNAMIC __attribute__((visibility("default")))
3636
#define SIMSIMD_PUBLIC __attribute__((unused)) inline static
37-
#define SIMSIMD_INTERNAL __attribute__((always_inline)) inline static
37+
#define SIMSIMD_INTERNAL inline static
3838
#else
3939
#define SIMSIMD_DYNAMIC
4040
#define SIMSIMD_PUBLIC inline static
@@ -436,16 +436,20 @@ SIMSIMD_STATIC_ASSERT(sizeof(simsimd_bf16_t) == 2, simsimd_bf16_t_must_be_2_byte
436436
#endif
437437

438438
#if !defined(SIMSIMD_F32_TO_I8)
439-
#define SIMSIMD_F32_TO_I8(x, y) *(y) = (simsimd_i8_t)fminf(fmaxf(roundf(x), -128), 127)
439+
#define SIMSIMD_F32_TO_I8(x, y) \
440+
*(y) = (simsimd_i8_t)((x) > 127 ? 127 : ((x) < -128 ? -128 : (int)((x) + ((x) < 0 ? -0.5f : 0.5f))))
440441
#endif
441442
#if !defined(SIMSIMD_F32_TO_U8)
442-
#define SIMSIMD_F32_TO_U8(x, y) *(y) = (simsimd_u8_t)fminf(fmaxf(roundf(x), 0), 255)
443+
#define SIMSIMD_F32_TO_U8(x, y) \
444+
*(y) = (simsimd_u8_t)((x) > 255 ? 255 : ((x) < 0 ? 0 : (int)((x) + ((x) < 0 ? -0.5f : 0.5f))))
443445
#endif
444446
#if !defined(SIMSIMD_F64_TO_I8)
445-
#define SIMSIMD_F64_TO_I8(x, y) *(y) = (simsimd_i8_t)fmin(fmax(round(x), -128), 127)
447+
#define SIMSIMD_F64_TO_I8(x, y) \
448+
*(y) = (simsimd_i8_t)((x) > 127 ? 127 : ((x) < -128 ? -128 : (int)((x) + ((x) < 0 ? -0.5 : 0.5))))
446449
#endif
447450
#if !defined(SIMSIMD_F64_TO_U8)
448-
#define SIMSIMD_F64_TO_U8(x, y) *(y) = (simsimd_u8_t)fmin(fmax(round(x), 0), 255)
451+
#define SIMSIMD_F64_TO_U8(x, y) \
452+
*(y) = (simsimd_u8_t)((x) > 255 ? 255 : ((x) < 0 ? 0 : (int)((x) + ((x) < 0 ? -0.5 : 0.5))))
449453
#endif
450454

451455
/** @brief Convenience type for half-precision floating-point type conversions. */
@@ -467,20 +471,33 @@ typedef union {
467471
* https://web.archive.org/web/20210208132927/http://assemblyrequired.crashworks.org/timing-square-root/
468472
* https://stackoverflow.com/a/41460625/2766161
469473
*/
470-
SIMSIMD_PUBLIC simsimd_f32_t simsimd_approximate_inverse_square_root(simsimd_f32_t number) {
474+
SIMSIMD_INTERNAL simsimd_f32_t simsimd_approximate_inverse_square_root(simsimd_f32_t number) {
471475
simsimd_f32i32_t conv;
472476
conv.f = number;
473477
conv.i = 0x5F1FFFF9 - (conv.i >> 1);
478+
// Refine using a Newton-Raphson step for better accuracy
474479
conv.f *= 0.703952253f * (2.38924456f - number * conv.f * conv.f);
475480
return conv.f;
476481
}
477482

483+
/**
484+
* @brief Approximates `sqrt(x)` using the fast inverse square root trick
485+
* with adjustments for direct square root approximation.
486+
*
487+
* Similar to `rsqrt` approximation but multiplies by `number` to get `sqrt`.
488+
* This technique is useful where `sqrt` approximation is needed in performance-critical code,
489+
* though modern hardware provides optimized alternatives.
490+
*/
491+
SIMSIMD_INTERNAL simsimd_f32_t simsimd_approximate_square_root(simsimd_f32_t number) {
492+
return number * simsimd_approximate_inverse_square_root(number);
493+
}
494+
478495
/**
479496
* @brief Computes `log(x)` using the Mercator series.
480497
* The series converges to the natural logarithm for args between -1 and 1.
481498
* Published in 1668 in "Logarithmotechnia".
482499
*/
483-
SIMSIMD_PUBLIC simsimd_f32_t simsimd_approximate_log(simsimd_f32_t number) {
500+
SIMSIMD_INTERNAL simsimd_f32_t simsimd_approximate_log(simsimd_f32_t number) {
484501
simsimd_f32_t x = number - 1;
485502
simsimd_f32_t x2 = x * x;
486503
simsimd_f32_t x3 = x * x * x;
@@ -497,7 +514,7 @@ SIMSIMD_PUBLIC simsimd_f32_t simsimd_approximate_log(simsimd_f32_t number) {
497514
* https://gist.github.com/milhidaka/95863906fe828198f47991c813dbe233
498515
* https://github.com/OpenCyphal/libcanard/blob/636795f4bc395f56af8d2c61d3757b5e762bb9e5/canard.c#L811-L834
499516
*/
500-
SIMSIMD_PUBLIC simsimd_f32_t simsimd_f16_to_f32(simsimd_f16_t const *x_ptr) {
517+
SIMSIMD_INTERNAL simsimd_f32_t simsimd_f16_to_f32(simsimd_f16_t const *x_ptr) {
501518
unsigned short x = *(unsigned short const *)x_ptr;
502519
unsigned int exponent = (x & 0x7C00) >> 10;
503520
unsigned int mantissa = (x & 0x03FF) << 13;
@@ -519,7 +536,7 @@ SIMSIMD_PUBLIC simsimd_f32_t simsimd_f16_to_f32(simsimd_f16_t const *x_ptr) {
519536
* https://gist.github.com/milhidaka/95863906fe828198f47991c813dbe233
520537
* https://github.com/OpenCyphal/libcanard/blob/636795f4bc395f56af8d2c61d3757b5e762bb9e5/canard.c#L811-L834
521538
*/
522-
SIMSIMD_PUBLIC void simsimd_f32_to_f16(simsimd_f32_t x, simsimd_f16_t *result_ptr) {
539+
SIMSIMD_INTERNAL void simsimd_f32_to_f16(simsimd_f32_t x, simsimd_f16_t *result_ptr) {
523540
simsimd_f32i32_t conv;
524541
conv.f = x;
525542
unsigned int b = conv.i + 0x00001000;
@@ -538,7 +555,7 @@ SIMSIMD_PUBLIC void simsimd_f32_to_f16(simsimd_f32_t x, simsimd_f16_t *result_pt
538555
* https://stackoverflow.com/questions/55253233/convert-fp32-to-bfloat16-in-c/55254307#55254307
539556
* https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus
540557
*/
541-
SIMSIMD_PUBLIC simsimd_f32_t simsimd_bf16_to_f32(simsimd_bf16_t const *x_ptr) {
558+
SIMSIMD_INTERNAL simsimd_f32_t simsimd_bf16_to_f32(simsimd_bf16_t const *x_ptr) {
542559
unsigned short x = *(unsigned short const *)x_ptr;
543560
simsimd_f32i32_t conv;
544561
conv.i = x << 16; // Zero extends the mantissa
@@ -551,7 +568,7 @@ SIMSIMD_PUBLIC simsimd_f32_t simsimd_bf16_to_f32(simsimd_bf16_t const *x_ptr) {
551568
* https://stackoverflow.com/questions/55253233/convert-fp32-to-bfloat16-in-c/55254307#55254307
552569
* https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus
553570
*/
554-
SIMSIMD_PUBLIC void simsimd_f32_to_bf16(simsimd_f32_t x, simsimd_bf16_t *result_ptr) {
571+
SIMSIMD_INTERNAL void simsimd_f32_to_bf16(simsimd_f32_t x, simsimd_bf16_t *result_ptr) {
555572
simsimd_f32i32_t conv;
556573
conv.f = x;
557574
conv.i += 0x8000; // Rounding is optional
@@ -561,12 +578,12 @@ SIMSIMD_PUBLIC void simsimd_f32_to_bf16(simsimd_f32_t x, simsimd_bf16_t *result_
561578
*(unsigned short *)result_ptr = (unsigned short)conv.i;
562579
}
563580

564-
SIMSIMD_PUBLIC simsimd_u32_t simsimd_u32_rol(simsimd_u32_t x, int n) { return (x << n) | (x >> (32 - n)); }
565-
SIMSIMD_PUBLIC simsimd_u16_t simsimd_u16_rol(simsimd_u16_t x, int n) { return (x << n) | (x >> (16 - n)); }
566-
SIMSIMD_PUBLIC simsimd_u8_t simsimd_u8_rol(simsimd_u8_t x, int n) { return (x << n) | (x >> (8 - n)); }
567-
SIMSIMD_PUBLIC simsimd_u32_t simsimd_u32_ror(simsimd_u32_t x, int n) { return (x >> n) | (x << (32 - n)); }
568-
SIMSIMD_PUBLIC simsimd_u16_t simsimd_u16_ror(simsimd_u16_t x, int n) { return (x >> n) | (x << (16 - n)); }
569-
SIMSIMD_PUBLIC simsimd_u8_t simsimd_u8_ror(simsimd_u8_t x, int n) { return (x >> n) | (x << (8 - n)); }
581+
SIMSIMD_INTERNAL simsimd_u32_t simsimd_u32_rol(simsimd_u32_t x, int n) { return (x << n) | (x >> (32 - n)); }
582+
SIMSIMD_INTERNAL simsimd_u16_t simsimd_u16_rol(simsimd_u16_t x, int n) { return (x << n) | (x >> (16 - n)); }
583+
SIMSIMD_INTERNAL simsimd_u8_t simsimd_u8_rol(simsimd_u8_t x, int n) { return (x << n) | (x >> (8 - n)); }
584+
SIMSIMD_INTERNAL simsimd_u32_t simsimd_u32_ror(simsimd_u32_t x, int n) { return (x >> n) | (x << (32 - n)); }
585+
SIMSIMD_INTERNAL simsimd_u16_t simsimd_u16_ror(simsimd_u16_t x, int n) { return (x >> n) | (x << (16 - n)); }
586+
SIMSIMD_INTERNAL simsimd_u8_t simsimd_u8_ror(simsimd_u8_t x, int n) { return (x >> n) | (x << (8 - n)); }
570587

571588
#ifdef __cplusplus
572589
} // extern "C"

0 commit comments

Comments
 (0)