@@ -1941,7 +1941,7 @@ SZ_PUBLIC sz_cptr_t sz_find_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr
1941
1941
if (!h_length) return SZ_NULL_CHAR;
1942
1942
sz_cptr_t const h_end = h + h_length;
1943
1943
1944
- #if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevety .
1944
+ #if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevity .
1945
1945
#if !SZ_USE_MISALIGNED_LOADS // Process the misaligned head, to void UB on unaligned 64-bit loads.
1946
1946
for (; ((sz_size_t )h & 7ull ) && h < h_end; ++h)
1947
1947
if (*h == *n) return h;
@@ -1978,7 +1978,7 @@ sz_cptr_t sz_rfind_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) {
1978
1978
// Reposition the `h` pointer to the end, as we will be walking backwards.
1979
1979
h = h + h_length - 1 ;
1980
1980
1981
- #if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevety .
1981
+ #if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevity .
1982
1982
#if !SZ_USE_MISALIGNED_LOADS // Process the misaligned head, to void UB on unaligned 64-bit loads.
1983
1983
for (; ((sz_size_t )(h + 1 ) & 7ull ) && h >= h_start; --h)
1984
1984
if (*h == *n) return h;
@@ -2464,9 +2464,9 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_serial( //
2464
2464
sz_size_t cost_if_deletion_or_insertion = sz_min_of_two (current_distances[i], current_distances[i + 1 ]) + 1 ;
2465
2465
next_distances[i + 1 ] = sz_min_of_two (cost_if_deletion_or_insertion, cost_if_substitution);
2466
2466
}
2467
- // Don't forget to populate the first row and the fiest column of the Levenshtein matrix.
2467
+ // Don't forget to populate the first row and the first column of the Levenshtein matrix.
2468
2468
next_distances[0 ] = next_distances[next_skew_diagonal_length - 1 ] = next_skew_diagonal_index;
2469
- // Perform a circular rotarion of those buffers, to reuse the memory.
2469
+ // Perform a circular rotation of those buffers, to reuse the memory.
2470
2470
sz_size_t *temporary = previous_distances;
2471
2471
previous_distances = current_distances;
2472
2472
current_distances = next_distances;
@@ -2486,7 +2486,7 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_serial( //
2486
2486
sz_size_t cost_if_deletion_or_insertion = sz_min_of_two (current_distances[i], current_distances[i + 1 ]) + 1 ;
2487
2487
next_distances[i] = sz_min_of_two (cost_if_deletion_or_insertion, cost_if_substitution);
2488
2488
}
2489
- // Perform a circular rotarion of those buffers, to reuse the memory, this time, with a shift,
2489
+ // Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
2490
2490
// dropping the first element in the current array.
2491
2491
sz_size_t *temporary = previous_distances;
2492
2492
previous_distances = current_distances + 1 ;
@@ -3486,32 +3486,38 @@ SZ_PUBLIC void sz_string_free(sz_string_t *string, sz_memory_allocator_t *alloca
3486
3486
sz_string_init (string);
3487
3487
}
3488
3488
3489
- // When overriding libc, disable optimisations for this function beacuse MSVC will optimize the loops into a memset.
3489
+ // When overriding libc, disable optimizations for this function because MSVC will optimize the loops into a memset.
3490
3490
// Which then causes a stack overflow due to infinite recursion (memset -> sz_fill_serial -> memset).
3491
3491
#if defined(_MSC_VER) && defined(SZ_OVERRIDE_LIBC) && SZ_OVERRIDE_LIBC
3492
3492
#pragma optimize("", off)
3493
3493
#endif
3494
3494
SZ_PUBLIC void sz_fill_serial (sz_ptr_t target, sz_size_t length, sz_u8_t value) {
3495
- sz_ptr_t end = target + length;
3496
3495
// Dealing with short strings, a single sequential pass would be faster.
3497
3496
// If the size is larger than 2 words, then at least 1 of them will be aligned.
3498
3497
// But just one aligned word may not be worth SWAR.
3499
3498
if (length < SZ_SWAR_THRESHOLD)
3500
- while (target != end ) *(target++) = value;
3499
+ while (length-- ) *(target++) = value;
3501
3500
3502
3501
// In case of long strings, skip unaligned bytes, and then fill the rest in 64-bit chunks.
3503
3502
else {
3504
3503
sz_u64_t value64 = (sz_u64_t )value * 0x0101010101010101ull ;
3505
- while ((sz_size_t )target & 7ull ) *(target++) = value;
3506
- while (target + 8 <= end ) *(sz_u64_t *)target = value64, target += 8 ;
3507
- while (target != end ) *(target++) = value;
3504
+ while ((sz_size_t )target & 7ull ) *(target++) = value, length-- ;
3505
+ while (length >= 8 ) *(sz_u64_t *)target = value64, target += 8 , length - = 8 ;
3506
+ while (length-- ) *(target++) = value;
3508
3507
}
3509
3508
}
3510
3509
#if defined(_MSC_VER) && defined(SZ_OVERRIDE_LIBC) && SZ_OVERRIDE_LIBC
3511
3510
#pragma optimize("", on)
3512
3511
#endif
3513
3512
3514
3513
SZ_PUBLIC void sz_copy_serial (sz_ptr_t target, sz_cptr_t source, sz_size_t length) {
3514
+ // The most typical implementation of `memcpy` suffers from Undefined Behavior:
3515
+ //
3516
+ // for (char const *end = source + length; source < end; source++) *target++ = *source;
3517
+ //
3518
+ // As NULL pointer arithmetic is undefined for calls like `memcpy(NULL, NULL, 0)`.
3519
+ // That's mitigated in C2y with the N3322 proposal, but our solution uses a design, that has no such issues.
3520
+ // https://developers.redhat.com/articles/2024/12/11/making-memcpynull-null-0-well-defined
3515
3521
#if SZ_USE_MISALIGNED_LOADS
3516
3522
while (length >= 8 ) *(sz_u64_t *)target = *(sz_u64_t const *)source, target += 8 , source += 8 , length -= 8 ;
3517
3523
#endif
@@ -5215,7 +5221,7 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_upto65k_avx512( //
5215
5221
}
5216
5222
// Don't forget to populate the first row and the fiest column of the Levenshtein matrix.
5217
5223
next_distances[0 ] = next_distances[next_skew_diagonal_length - 1 ] = (sz_u16_t )next_skew_diagonal_index;
5218
- // Perform a circular rotarion of those buffers, to reuse the memory.
5224
+ // Perform a circular rotation of those buffers, to reuse the memory.
5219
5225
sz_u16_t *temporary = previous_distances;
5220
5226
previous_distances = current_distances;
5221
5227
current_distances = next_distances;
@@ -5257,7 +5263,7 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_upto65k_avx512( //
5257
5263
i += register_length;
5258
5264
}
5259
5265
5260
- // Perform a circular rotarion of those buffers, to reuse the memory, this time, with a shift,
5266
+ // Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
5261
5267
// dropping the first element in the current array.
5262
5268
sz_u16_t *temporary = previous_distances;
5263
5269
previous_distances = current_distances + 1 ;
0 commit comments