Skip to content

Commit 14ee92c

Browse files
committed
1 parent 152ed04 commit 14ee92c

File tree

3 files changed

+32
-15
lines changed

3 files changed

+32
-15
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -1196,8 +1196,10 @@ __`SZ_AVOID_LIBC`__ and __`SZ_OVERRIDE_LIBC`__:
11961196
> This may affect the type resolution system on obscure hardware platforms.
11971197
> Moreover, one may let `stringzilla` override the common symbols like the `memcpy` and `memset` with its own implementations.
11981198
> In that case you can use the [`LD_PRELOAD` trick][ld-preload-trick] to prioritize it's symbols over the ones from the LibC and accelerate existing string-heavy applications without recompiling them.
1199+
> It also adds a layer of security, as the `stringzilla` isn't [undefined for NULL inputs][redhat-memcpy-ub] like `memcpy(NULL, NULL, 0)`.
11991200
12001201
[ld-preload-trick]: https://ashvardanian.com/posts/ld-preload-libsee
1202+
[redhat-memcpy-ub]: https://developers.redhat.com/articles/2024/12/11/making-memcpynull-null-0-well-defined
12011203

12021204
__`SZ_AVOID_STL`__ and __`SZ_SAFETY_OVER_COMPATIBILITY`__:
12031205

include/stringzilla/stringzilla.h

+19-13
Original file line numberDiff line numberDiff line change
@@ -1941,7 +1941,7 @@ SZ_PUBLIC sz_cptr_t sz_find_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr
19411941
if (!h_length) return SZ_NULL_CHAR;
19421942
sz_cptr_t const h_end = h + h_length;
19431943

1944-
#if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevety.
1944+
#if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevity.
19451945
#if !SZ_USE_MISALIGNED_LOADS // Process the misaligned head, to void UB on unaligned 64-bit loads.
19461946
for (; ((sz_size_t)h & 7ull) && h < h_end; ++h)
19471947
if (*h == *n) return h;
@@ -1978,7 +1978,7 @@ sz_cptr_t sz_rfind_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) {
19781978
// Reposition the `h` pointer to the end, as we will be walking backwards.
19791979
h = h + h_length - 1;
19801980

1981-
#if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevety.
1981+
#if !SZ_DETECT_BIG_ENDIAN // Use SWAR only on little-endian platforms for brevity.
19821982
#if !SZ_USE_MISALIGNED_LOADS // Process the misaligned head, to void UB on unaligned 64-bit loads.
19831983
for (; ((sz_size_t)(h + 1) & 7ull) && h >= h_start; --h)
19841984
if (*h == *n) return h;
@@ -2464,9 +2464,9 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_serial( //
24642464
sz_size_t cost_if_deletion_or_insertion = sz_min_of_two(current_distances[i], current_distances[i + 1]) + 1;
24652465
next_distances[i + 1] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
24662466
}
2467-
// Don't forget to populate the first row and the fiest column of the Levenshtein matrix.
2467+
// Don't forget to populate the first row and the first column of the Levenshtein matrix.
24682468
next_distances[0] = next_distances[next_skew_diagonal_length - 1] = next_skew_diagonal_index;
2469-
// Perform a circular rotarion of those buffers, to reuse the memory.
2469+
// Perform a circular rotation of those buffers, to reuse the memory.
24702470
sz_size_t *temporary = previous_distances;
24712471
previous_distances = current_distances;
24722472
current_distances = next_distances;
@@ -2486,7 +2486,7 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_serial( //
24862486
sz_size_t cost_if_deletion_or_insertion = sz_min_of_two(current_distances[i], current_distances[i + 1]) + 1;
24872487
next_distances[i] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
24882488
}
2489-
// Perform a circular rotarion of those buffers, to reuse the memory, this time, with a shift,
2489+
// Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
24902490
// dropping the first element in the current array.
24912491
sz_size_t *temporary = previous_distances;
24922492
previous_distances = current_distances + 1;
@@ -3486,32 +3486,38 @@ SZ_PUBLIC void sz_string_free(sz_string_t *string, sz_memory_allocator_t *alloca
34863486
sz_string_init(string);
34873487
}
34883488

3489-
// When overriding libc, disable optimisations for this function beacuse MSVC will optimize the loops into a memset.
3489+
// When overriding libc, disable optimizations for this function because MSVC will optimize the loops into a memset.
34903490
// Which then causes a stack overflow due to infinite recursion (memset -> sz_fill_serial -> memset).
34913491
#if defined(_MSC_VER) && defined(SZ_OVERRIDE_LIBC) && SZ_OVERRIDE_LIBC
34923492
#pragma optimize("", off)
34933493
#endif
34943494
SZ_PUBLIC void sz_fill_serial(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
3495-
sz_ptr_t end = target + length;
34963495
// Dealing with short strings, a single sequential pass would be faster.
34973496
// If the size is larger than 2 words, then at least 1 of them will be aligned.
34983497
// But just one aligned word may not be worth SWAR.
34993498
if (length < SZ_SWAR_THRESHOLD)
3500-
while (target != end) *(target++) = value;
3499+
while (length--) *(target++) = value;
35013500

35023501
// In case of long strings, skip unaligned bytes, and then fill the rest in 64-bit chunks.
35033502
else {
35043503
sz_u64_t value64 = (sz_u64_t)value * 0x0101010101010101ull;
3505-
while ((sz_size_t)target & 7ull) *(target++) = value;
3506-
while (target + 8 <= end) *(sz_u64_t *)target = value64, target += 8;
3507-
while (target != end) *(target++) = value;
3504+
while ((sz_size_t)target & 7ull) *(target++) = value, length--;
3505+
while (length >= 8) *(sz_u64_t *)target = value64, target += 8, length -= 8;
3506+
while (length--) *(target++) = value;
35083507
}
35093508
}
35103509
#if defined(_MSC_VER) && defined(SZ_OVERRIDE_LIBC) && SZ_OVERRIDE_LIBC
35113510
#pragma optimize("", on)
35123511
#endif
35133512

35143513
SZ_PUBLIC void sz_copy_serial(sz_ptr_t target, sz_cptr_t source, sz_size_t length) {
3514+
// The most typical implementation of `memcpy` suffers from Undefined Behavior:
3515+
//
3516+
// for (char const *end = source + length; source < end; source++) *target++ = *source;
3517+
//
3518+
// As NULL pointer arithmetic is undefined for calls like `memcpy(NULL, NULL, 0)`.
3519+
// That's mitigated in C2y with the N3322 proposal, but our solution uses a design, that has no such issues.
3520+
// https://developers.redhat.com/articles/2024/12/11/making-memcpynull-null-0-well-defined
35153521
#if SZ_USE_MISALIGNED_LOADS
35163522
while (length >= 8) *(sz_u64_t *)target = *(sz_u64_t const *)source, target += 8, source += 8, length -= 8;
35173523
#endif
@@ -5215,7 +5221,7 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_upto65k_avx512( //
52155221
}
52165222
// Don't forget to populate the first row and the fiest column of the Levenshtein matrix.
52175223
next_distances[0] = next_distances[next_skew_diagonal_length - 1] = (sz_u16_t)next_skew_diagonal_index;
5218-
// Perform a circular rotarion of those buffers, to reuse the memory.
5224+
// Perform a circular rotation of those buffers, to reuse the memory.
52195225
sz_u16_t *temporary = previous_distances;
52205226
previous_distances = current_distances;
52215227
current_distances = next_distances;
@@ -5257,7 +5263,7 @@ SZ_INTERNAL sz_size_t _sz_edit_distance_skewed_diagonals_upto65k_avx512( //
52575263
i += register_length;
52585264
}
52595265

5260-
// Perform a circular rotarion of those buffers, to reuse the memory, this time, with a shift,
5266+
// Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
52615267
// dropping the first element in the current array.
52625268
sz_u16_t *temporary = previous_distances;
52635269
previous_distances = current_distances + 1;

scripts/test.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,23 @@ inline void expect_equality(char const *a, char const *b, std::size_t size) {
153153
* Uses a large heap-allocated buffer to ensure that operations optimized for @b larger-than-L2-cache memory
154154
* regions are tested. Uses a combination of deterministic and random tests with uniform and exponential distributions.
155155
*/
156-
static void test_memory_utilities(std::size_t experiments = 1024ull * 1024ull,
157-
std::size_t max_l2_size = 1024ull * 1024ull) {
156+
static void test_memory_utilities( //
157+
std::size_t experiments = 1024ull * 1024ull, std::size_t max_l2_size = 1024ull * 1024ull) {
158158

159159
// We will be mirroring the operations on both standard and StringZilla strings.
160160
std::string text_stl(max_l2_size, '-');
161161
std::string text_sz(max_l2_size, '-');
162162
expect_equality(text_stl.data(), text_sz.data(), max_l2_size);
163163

164+
// The traditional `memset` and `memcpy` functions are undefined for zero-length buffers and NULL pointers
165+
// for older C standards. However, with the N3322 proposal for C2y, that issue has been resolved.
166+
// https://developers.redhat.com/articles/2024/12/11/making-memcpynull-null-0-well-defined
167+
//
168+
// Let's make sure, that our versions don't trigger any undefined behavior.
169+
sz::memset(NULL, 0, 0);
170+
sz::memcpy(NULL, NULL, 0);
171+
sz::memmove(NULL, NULL, 0);
172+
164173
// First start with simple deterministic tests.
165174
// Let's use `memset` to fill the strings with a pattern like "122333444455555...00000000000011111111111..."
166175
std::size_t count_groups = 0;

0 commit comments

Comments
 (0)