Skip to content

Commit 197f93c

Browse files
add support for multi-bytes utf8 strings in logs
1 parent d1c76dd commit 197f93c

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

src/format/llm.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,15 @@ const MAX_TOKEN_LEN: usize = 50;
246246
/// Truncate a single string if longer than MAX_TOKEN_LEN.
247247
fn truncate_token(s: &str) -> String {
248248
if s.len() > MAX_TOKEN_LEN {
249-
format!("{}...{}", &s[..20], &s[s.len() - 10..])
249+
let mut head = 20;
250+
while !s.is_char_boundary(head) {
251+
head -= 1;
252+
}
253+
let mut tail = s.len() - 10;
254+
while !s.is_char_boundary(tail) {
255+
tail += 1;
256+
}
257+
format!("{}...{}", &s[..head], &s[tail..])
250258
} else {
251259
s.to_string()
252260
}

src/scoring.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ const KEYWORD_PREFIX_LEN: usize = 100;
101101
/// deep in the template body. Returns the max weight found, or 1.0 if
102102
/// no keywords match.
103103
fn match_keyword_weight(template: &str, weights: &HashMap<&str, f64>) -> f64 {
104-
let prefix = &template[..template.len().min(KEYWORD_PREFIX_LEN)];
104+
let mut end = template.len().min(KEYWORD_PREFIX_LEN);
105+
while !template.is_char_boundary(end) {
106+
end -= 1;
107+
}
108+
let prefix = &template[..end];
105109
let tokens = tokenize_template(prefix);
106110
let mut max_weight = 1.0_f64;
107111
let mut found = false;

0 commit comments

Comments
 (0)