Skip to content

Commit 637a8a7

Browse files
committed
Merge branch 'main-dev'
2 parents 2c06091 + 9976b88 commit 637a8a7

File tree

8 files changed

+195
-125
lines changed

8 files changed

+195
-125
lines changed

.github/workflows/prerelease.yml

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ jobs:
171171
deno-version: v2.x
172172

173173
- name: Test with Deno
174-
run: deno test -A
174+
run: deno test -A --no-check
175175

176176
test_rust:
177177
name: Test Rust
@@ -184,9 +184,28 @@ jobs:
184184
- name: Install Rust toolchain
185185
uses: moonrepo/setup-rust@v1
186186

187-
- name: Build and Test
187+
- name: Install 32-bit target and dependencies
188+
run: |
189+
rustup target add i686-unknown-linux-gnu
190+
sudo dpkg --add-architecture i386
191+
sudo apt-get update
192+
sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
193+
194+
- name: Test default build
188195
run: cargo test
189196

197+
- name: Test with all features
198+
run: cargo test --all-features
199+
200+
- name: Test no-std build (check only)
201+
run: cargo check --no-default-features
202+
203+
- name: Test i686 cross-compilation build
204+
run: cargo build --target i686-unknown-linux-gnu --all-features
205+
206+
- name: Test i686 cross-compilation (no-std)
207+
run: cargo build --target i686-unknown-linux-gnu --no-default-features
208+
190209
# Temporary workaround to run Swift tests on Linux
191210
# Based on: https://github.com/swift-actions/setup-swift/issues/591#issuecomment-1685710678
192211
test_ubuntu_swift:
@@ -209,7 +228,8 @@ jobs:
209228
runs-on: ubuntu-22.04
210229
strategy:
211230
matrix:
212-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
231+
python-version:
232+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
213233
needs: [test_python]
214234
steps:
215235
- name: Checkout
@@ -236,7 +256,8 @@ jobs:
236256
runs-on: ubuntu-24.04-arm
237257
strategy:
238258
matrix:
239-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
259+
python-version:
260+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
240261
needs: [test_python]
241262
steps:
242263
- name: Checkout
@@ -273,7 +294,8 @@ jobs:
273294
runs-on: macos-15
274295
strategy:
275296
matrix:
276-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
297+
python-version:
298+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
277299
needs: [test_python]
278300
steps:
279301
- name: Checkout
@@ -297,7 +319,8 @@ jobs:
297319
runs-on: macos-15
298320
strategy:
299321
matrix:
300-
python-version: ["38", "39", "310", "311", "312", "313", "313t"] #! Python 3.7 isn't supported on ARM macOS
322+
python-version:
323+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
301324
needs: [test_python]
302325
steps:
303326
- name: Checkout
@@ -321,9 +344,10 @@ jobs:
321344
runs-on: windows-2022
322345
strategy:
323346
matrix:
324-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
347+
python-version:
348+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
325349
architecture: [AMD64] # List ARM64 separately and avoid 32-bit
326-
#! ARM64 isn't supported for Python 3.7 and 3.8
350+
#! ARM64 isn't supported for Python 3.8
327351
include:
328352
- python-version: "39"
329353
architecture: ARM64
@@ -337,6 +361,10 @@ jobs:
337361
architecture: ARM64
338362
- python-version: "313t"
339363
architecture: ARM64
364+
- python-version: "314"
365+
architecture: ARM64
366+
- python-version: "314t"
367+
architecture: ARM64
340368
needs: [test_python]
341369
steps:
342370
- name: Checkout
@@ -363,7 +391,8 @@ jobs:
363391
runs-on: ubuntu-22.04
364392
strategy:
365393
matrix:
366-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
394+
python-version:
395+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
367396
needs: [test_python]
368397
steps:
369398
- name: Checkout

.github/workflows/release.yml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ jobs:
7474
needs: versioning
7575
strategy:
7676
matrix:
77-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
77+
python-version:
78+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
7879
steps:
7980
- uses: actions/checkout@v6
8081
with:
@@ -107,7 +108,8 @@ jobs:
107108
needs: versioning
108109
strategy:
109110
matrix:
110-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
111+
python-version:
112+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
111113
steps:
112114
- uses: actions/checkout@v6
113115
with:
@@ -150,7 +152,8 @@ jobs:
150152
needs: versioning
151153
strategy:
152154
matrix:
153-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
155+
python-version:
156+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
154157
steps:
155158
- uses: actions/checkout@v6
156159
with:
@@ -182,7 +185,8 @@ jobs:
182185
strategy:
183186
matrix:
184187
# 3.7 not supported on macOS ARM
185-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
188+
python-version:
189+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
186190
steps:
187191
- uses: actions/checkout@v6
188192
with:
@@ -213,7 +217,8 @@ jobs:
213217
needs: versioning
214218
strategy:
215219
matrix:
216-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
220+
python-version:
221+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
217222
architecture: [AMD64]
218223
include:
219224
- python-version: "39"
@@ -228,6 +233,10 @@ jobs:
228233
architecture: ARM64
229234
- python-version: "313t"
230235
architecture: ARM64
236+
- python-version: "314"
237+
architecture: ARM64
238+
- python-version: "314t"
239+
architecture: ARM64
231240
steps:
232241
- uses: actions/checkout@v6
233242
with:
@@ -261,7 +270,8 @@ jobs:
261270
needs: versioning
262271
strategy:
263272
matrix:
264-
python-version: ["38", "39", "310", "311", "312", "313", "313t"]
273+
python-version:
274+
["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
265275
steps:
266276
- uses: actions/checkout@v6
267277
with:

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,5 +157,5 @@ if (SIMSIMD_BUILD_SHARED)
157157
)
158158
endif ()
159159

160-
install(DIRECTORY ./include/ DESTINATION /usr/include/)
161-
install(DIRECTORY ./c/ DESTINATION /usr/src/${PROJECT_NAME}/)
160+
install(DIRECTORY include/ DESTINATION include)
161+
install(DIRECTORY c/ DESTINATION share/doc/${PROJECT_NAME}/src)

build.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@ fn build_simsimd() -> HashMap<String, bool> {
2323
.flag_if_supported("-pedantic") // Strict compliance when supported
2424
.warnings(false);
2525

26-
// Detect target architecture UPFRONT using Cargo's cross-compilation vars
27-
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
28-
let target_bits = env::var("CARGO_CFG_TARGET_POINTER_WIDTH").unwrap_or_default();
26+
// On 32-bit x86, ensure proper stack alignment for floating-point operations
27+
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=38534
28+
let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
29+
if target_arch == "x86" {
30+
build.flag_if_supported("-mstackrealign");
31+
build.flag_if_supported("-mpreferred-stack-boundary=4");
32+
}
2933

3034
// Set architecture-specific macros explicitly (like StringZilla)
35+
let target_bits = env::var("CARGO_CFG_TARGET_POINTER_WIDTH").unwrap_or_default();
3136
if target_arch == "x86_64" && target_bits == "64" {
3237
build.define("SIMSIMD_IS_64BIT_X86", "1");
3338
build.define("SIMSIMD_IS_64BIT_ARM", "0");

include/simsimd/binary.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
291291
__m512i xor2_count_vec = _mm512_popcnt_epi64(_mm512_xor_si512(a2_vec, b2_vec));
292292
xor_count = _mm512_reduce_add_epi64(_mm512_add_epi64(xor2_count_vec, xor1_count_vec));
293293
}
294-
else if (n_words <= 196) { // Up to 1568 bits.
294+
else if (n_words <= 192) { // Up to 1536 bits.
295295
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 128);
296296
__m512i a1_vec = _mm512_loadu_epi8(a);
297297
__m512i b1_vec = _mm512_loadu_epi8(b);
@@ -374,7 +374,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const *a, simsimd_b8_t c
374374
intersection = _mm512_reduce_add_epi64(_mm512_add_epi64(and2_count_vec, and1_count_vec));
375375
union_ = _mm512_reduce_add_epi64(_mm512_add_epi64(or2_count_vec, or1_count_vec));
376376
}
377-
else if (n_words <= 196) { // Up to 1568 bits.
377+
else if (n_words <= 192) { // Up to 1536 bits.
378378
__mmask64 mask = (__mmask64)_bzhi_u64(0xFFFFFFFFFFFFFFFF, n_words - 128);
379379
__m512i a1_vec = _mm512_loadu_epi8(a);
380380
__m512i b1_vec = _mm512_loadu_epi8(b);

include/simsimd/probability.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const* a, simsimd_f16_
108108
d += ai * SIMSIMD_LOG((ai + epsilon) / (mi + epsilon)); \
109109
d += bi * SIMSIMD_LOG((bi + epsilon) / (mi + epsilon)); \
110110
} \
111-
*result = SIMSIMD_SQRT(((simsimd_distance_t)d / 2)); \
111+
simsimd_distance_t d_half = ((simsimd_distance_t)d / 2); \
112+
*result = d_half > 0 ? SIMSIMD_SQRT(d_half) : 0; \
112113
}
113114

114115
SIMSIMD_MAKE_KL(serial, f64, f64, SIMSIMD_DEREFERENCE, SIMSIMD_F32_DIVISION_EPSILON) // simsimd_kl_f64_serial
@@ -225,7 +226,7 @@ SIMSIMD_PUBLIC void simsimd_js_f32_neon(simsimd_f32_t const *a, simsimd_f32_t co
225226

226227
simsimd_f32_t log2_normalizer = 0.693147181f;
227228
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
228-
*result = _simsimd_sqrt_f32_neon(sum);
229+
*result = sum > 0 ? _simsimd_sqrt_f32_neon(sum) : 0;
229230
}
230231

231232
#pragma clang attribute pop
@@ -298,7 +299,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_neon(simsimd_f16_t const *a, simsimd_f16_t co
298299

299300
simsimd_f32_t log2_normalizer = 0.693147181f;
300301
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
301-
*result = _simsimd_sqrt_f32_neon(sum);
302+
*result = sum > 0 ? _simsimd_sqrt_f32_neon(sum) : 0;
302303
}
303304

304305
#pragma clang attribute pop
@@ -403,7 +404,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_haswell(simsimd_f16_t const *a, simsimd_f16_t
403404
simsimd_f32_t log2_normalizer = 0.693147181f;
404405
simsimd_f32_t sum = _simsimd_reduce_f32x8_haswell(sum_vec);
405406
sum *= log2_normalizer / 2;
406-
*result = _simsimd_sqrt_f32_haswell(sum);
407+
*result = sum > 0 ? _simsimd_sqrt_f32_haswell(sum) : 0;
407408
}
408409

409410
#pragma clang attribute pop
@@ -498,7 +499,7 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t
498499
simsimd_f32_t log2_normalizer = 0.693147181f;
499500
simsimd_f32_t sum = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec));
500501
sum *= log2_normalizer / 2;
501-
*result = _simsimd_sqrt_f32_haswell(sum);
502+
*result = sum > 0 ? _simsimd_sqrt_f32_haswell(sum) : 0;
502503
}
503504

504505
#pragma clang attribute pop
@@ -591,7 +592,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_
591592
simsimd_f32_t log2_normalizer = 0.693147181f;
592593
simsimd_f32_t sum = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec));
593594
sum *= log2_normalizer / 2;
594-
*result = _simsimd_sqrt_f32_haswell(sum);
595+
*result = sum > 0 ? _simsimd_sqrt_f32_haswell(sum) : 0;
595596
}
596597

597598
#pragma clang attribute pop

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
# - `manylinux` and `musllinux` wheels for Linux on i686, ppc64le, s390x;
55
# - `macos` wheels for x86_64, arm64;
66
# - `windows` wheels for AMD64, ARM64.
7-
# * for 7 Python versions from 3.8 to 3.13 (+ 3.13t).
7+
# * for 7 Python versions from 3.8 to 3.14.
8+
# * for 2 free-threading Python versions from 3.13 to 3.14.
89
# * running thousands of fuzzy tests on each wheel.
9-
# = meaning 12 platforms * 7 Python versions = 84 builds.
10+
# = meaning 12 platforms * 9 Python versions = 108 builds.
1011
[build-system]
1112
requires = ["setuptools>=42"]
1213
build-backend = "setuptools.build_meta"

0 commit comments

Comments
 (0)