Skip to content

Commit 37fe2c9

Browse files
committed
Fix access violation exception
1 parent 247abb4 commit 37fe2c9

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

operators/tokenizer/ugm_kernels.hpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,21 @@ struct SpmUgmTokenizer {
521521
if (longest_prefix_offset >= prefix_replacements_size_) {
522522
ORTX_CXX_API_THROW("[UgmTok]Index out of array bounds in precompiled charsmap!", ORT_RUNTIME_EXCEPTION);
523523
}
524+
// Validate that we have valid prefix_replacements_ data
525+
if (prefix_replacements_ == nullptr || prefix_replacements_size_ == 0) {
526+
ORTX_CXX_API_THROW("[UgmTok]prefix_replacements_ is null or empty!", ORT_RUNTIME_EXCEPTION);
527+
}
524528
const char* prefix_replacement = &prefix_replacements_[longest_prefix_offset];
525-
return {prefix_replacement, static_cast<int>(longest_prefix_length)};
529+
// Calculate safe length: find null terminator within remaining buffer
530+
size_t max_len = prefix_replacements_size_ - longest_prefix_offset;
531+
size_t replacement_len = 0;
532+
while (replacement_len < max_len && prefix_replacement[replacement_len] != '\0') {
533+
replacement_len++;
534+
}
535+
if (replacement_len == max_len) {
536+
ORTX_CXX_API_THROW("[UgmTok]Replacement string not null-terminated in precompiled charsmap!", ORT_RUNTIME_EXCEPTION);
537+
}
538+
return {std::string_view(prefix_replacement, replacement_len), static_cast<int>(longest_prefix_length)};
526539
} else {
527540
// if yes, return this sequence unmodified
528541
size_t prefix_offset = ustring::UTF8Len(input_view[0]);
@@ -674,8 +687,21 @@ struct SpmUgmTokenizer {
674687
if (longest_prefix_offset >= prefix_replacements_size_) {
675688
ORTX_CXX_API_THROW("[UgmTok]Index out of array bounds in precompiled charsmap!", ORT_RUNTIME_EXCEPTION);
676689
}
690+
// Validate that we have valid prefix_replacements_ data
691+
if (prefix_replacements_ == nullptr || prefix_replacements_size_ == 0) {
692+
ORTX_CXX_API_THROW("[UgmTok]prefix_replacements_ is null or empty!", ORT_RUNTIME_EXCEPTION);
693+
}
677694
const char* prefix_replacement = &prefix_replacements_[longest_prefix_offset];
678-
return {prefix_replacement, strlen(prefix_replacement), longest_prefix_length};
695+
// Calculate safe length: find null terminator within remaining buffer
696+
size_t max_len = prefix_replacements_size_ - longest_prefix_offset;
697+
size_t replacement_len = 0;
698+
while (replacement_len < max_len && prefix_replacement[replacement_len] != '\0') {
699+
replacement_len++;
700+
}
701+
if (replacement_len == max_len) {
702+
ORTX_CXX_API_THROW("[UgmTok]Replacement string not null-terminated in precompiled charsmap!", ORT_RUNTIME_EXCEPTION);
703+
}
704+
return {prefix_replacement, replacement_len, longest_prefix_length};
679705
} else {
680706
// if yes, return this sequence unmodified
681707
size_t prefix_offset = input_offset + ustring::UTF8Len(input[input_offset]);

0 commit comments

Comments
 (0)