@@ -153,8 +153,11 @@ SIMSIMD_MAKE_MAHALANOBIS(accurate, bf16, f64, SIMSIMD_BF16_TO_F32) // simsimd_ma
153
153
#if SIMSIMD_TARGET_NEON
154
154
#pragma GCC push_options
155
155
#pragma GCC target("arch=armv8.2-a+simd")
156
+ #ifdef __clang__
156
157
#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+simd"))), apply_to = function)
157
158
159
+ #endif
160
+
158
161
SIMSIMD_PUBLIC void simsimd_bilinear_f32_neon (simsimd_f32_t const * a , simsimd_f32_t const * b , simsimd_f32_t const * c ,
159
162
simsimd_size_t n , simsimd_distance_t * result ) {
160
163
float32x4_t sum_vec = vdupq_n_f32 (0 );
@@ -220,15 +223,20 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f32_neon(simsimd_f32_t const* a, simsimd
220
223
* result = sum ;
221
224
}
222
225
226
+ #ifdef __clang__
223
227
#pragma clang attribute pop
228
+ #endif
224
229
#pragma GCC pop_options
225
230
#endif // SIMSIMD_TARGET_NEON
226
231
227
232
#if SIMSIMD_TARGET_NEON_F16
228
233
#pragma GCC push_options
229
234
#pragma GCC target("arch=armv8.2-a+simd+fp16")
235
+ #ifdef __clang__
230
236
#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+simd+fp16"))), apply_to = function)
231
237
238
+ #endif
239
+
232
240
SIMSIMD_PUBLIC void simsimd_bilinear_f16_neon (simsimd_f16_t const * a , simsimd_f16_t const * b , simsimd_f16_t const * c ,
233
241
simsimd_size_t n , simsimd_distance_t * result ) {
234
242
float32x4_t sum_vec = vdupq_n_f32 (0 );
@@ -301,15 +309,20 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f16_neon(simsimd_f16_t const* a, simsimd
301
309
* result = sum ;
302
310
}
303
311
312
+ #ifdef __clang__
304
313
#pragma clang attribute pop
314
+ #endif
305
315
#pragma GCC pop_options
306
316
#endif // SIMSIMD_TARGET_NEON_F16
307
317
308
318
#if SIMSIMD_TARGET_NEON_BF16
309
319
#pragma GCC push_options
310
320
#pragma GCC target("arch=armv8.6-a+simd+bf16")
321
+ #ifdef __clang__
311
322
#pragma clang attribute push(__attribute__((target("arch=armv8.6-a+simd+bf16"))), apply_to = function)
312
323
324
+ #endif
325
+
313
326
SIMSIMD_PUBLIC void simsimd_bilinear_bf16_neon (simsimd_bf16_t const * a , simsimd_bf16_t const * b ,
314
327
simsimd_bf16_t const * c , simsimd_size_t n , simsimd_distance_t * result ) {
315
328
float32x4_t sum_vec = vdupq_n_f32 (0 );
@@ -401,7 +414,9 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_neon(simsimd_bf16_t const* a, simsi
401
414
* result = sum ;
402
415
}
403
416
417
+ #ifdef __clang__
404
418
#pragma clang attribute pop
419
+ #endif
405
420
#pragma GCC pop_options
406
421
#endif // SIMSIMD_TARGET_NEON_BF16
407
422
@@ -411,8 +426,11 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_neon(simsimd_bf16_t const* a, simsi
411
426
#if SIMSIMD_TARGET_HASWELL
412
427
#pragma GCC push_options
413
428
#pragma GCC target("avx2", "f16c", "fma")
429
+ #ifdef __clang__
414
430
#pragma clang attribute push(__attribute__((target("avx2,f16c,fma"))), apply_to = function)
415
431
432
+ #endif
433
+
416
434
SIMSIMD_PUBLIC void simsimd_bilinear_f16_haswell (simsimd_f16_t const * a , simsimd_f16_t const * b , simsimd_f16_t const * c ,
417
435
simsimd_size_t n , simsimd_distance_t * result ) {
418
436
__m256 sum_vec = _mm256_setzero_ps ();
@@ -558,15 +576,20 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_haswell(simsimd_bf16_t const* a, si
558
576
* result = sum ;
559
577
}
560
578
579
+ #ifdef __clang__
561
580
#pragma clang attribute pop
581
+ #endif
562
582
#pragma GCC pop_options
563
583
#endif // SIMSIMD_TARGET_HASWELL
564
584
565
585
#if SIMSIMD_TARGET_SKYLAKE
566
586
#pragma GCC push_options
567
587
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2")
588
+ #ifdef __clang__
568
589
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2"))), apply_to = function)
569
590
591
+ #endif
592
+
570
593
SIMSIMD_PUBLIC void simsimd_bilinear_f32_skylake (simsimd_f32_t const * a , simsimd_f32_t const * b , simsimd_f32_t const * c ,
571
594
simsimd_size_t n , simsimd_distance_t * result ) {
572
595
simsimd_size_t tail_length = n % 16 ;
@@ -634,16 +657,21 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f32_skylake(simsimd_f32_t const* a, sims
634
657
* result = _mm512_reduce_add_ps (sum_vec );
635
658
}
636
659
660
+ #ifdef __clang__
637
661
#pragma clang attribute pop
662
+ #endif
638
663
#pragma GCC pop_options
639
664
#endif // SIMSIMD_TARGET_SKYLAKE
640
665
641
666
#if SIMSIMD_TARGET_GENOA
642
667
#pragma GCC push_options
643
668
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512bf16")
669
+ #ifdef __clang__
644
670
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512bf16"))), \
645
671
apply_to = function)
646
672
673
+ #endif
674
+
647
675
SIMSIMD_PUBLIC void simsimd_bilinear_bf16_genoa (simsimd_bf16_t const * a , simsimd_bf16_t const * b ,
648
676
simsimd_bf16_t const * c , simsimd_size_t n , simsimd_distance_t * result ) {
649
677
simsimd_size_t tail_length = n % 32 ;
@@ -711,16 +739,21 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_genoa(simsimd_bf16_t const* a, sims
711
739
* result = _mm512_reduce_add_ps (sum_vec );
712
740
}
713
741
742
+ #ifdef __clang__
714
743
#pragma clang attribute pop
744
+ #endif
715
745
#pragma GCC pop_options
716
746
#endif // SIMSIMD_TARGET_GENOA
717
747
718
748
#if SIMSIMD_TARGET_SAPPHIRE
719
749
#pragma GCC push_options
720
750
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512fp16")
751
+ #ifdef __clang__
721
752
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512fp16"))), \
722
753
apply_to = function)
723
754
755
+ #endif
756
+
724
757
SIMSIMD_PUBLIC void simsimd_bilinear_f16_sapphire (simsimd_f16_t const * a , simsimd_f16_t const * b ,
725
758
simsimd_f16_t const * c , simsimd_size_t n ,
726
759
simsimd_distance_t * result ) {
@@ -800,7 +833,9 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_sapphire(simsimd_bf16_t const* a, s
800
833
simsimd_bf16_t const * c , simsimd_size_t n ,
801
834
simsimd_distance_t * result ) {}
802
835
836
+ #ifdef __clang__
803
837
#pragma clang attribute pop
838
+ #endif
804
839
#pragma GCC pop_options
805
840
#endif // SIMSIMD_TARGET_SAPPHIRE
806
841
#endif // SIMSIMD_TARGET_X86
0 commit comments