Skip to content

Commit c18ef22

Browse files
authored
Fix crash when valid char was split
If the valid character was split across reads of partial utf8 and got terminated by invalid byte, we should print it and advance, instead of trying to discard it entirely.
1 parent ff21c30 commit c18ef22

File tree

1 file changed

+32
-20
lines changed

1 file changed

+32
-20
lines changed

src/lib.rs

+32-20
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,22 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
460460
c.len_utf8() - old_bytes
461461
},
462462
Err(err) => {
463+
let valid_bytes = err.valid_up_to();
464+
// If we have any valid bytes, that means we partially copied another
465+
// utf8 character into `partial_utf8`. Since we only care about the
466+
// first character, we just ignore the rest.
467+
if valid_bytes > 0 {
468+
let c = unsafe {
469+
let parsed = str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
470+
parsed.chars().next().unwrap_unchecked()
471+
};
472+
473+
performer.print(c);
474+
475+
self.partial_utf8_len = 0;
476+
return valid_bytes - old_bytes;
477+
}
478+
463479
match err.error_len() {
464480
// If the partial character was also invalid, emit the replacement
465481
// character.
@@ -469,26 +485,8 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
469485
self.partial_utf8_len = 0;
470486
invalid_len - old_bytes
471487
},
472-
None => {
473-
// If we have any valid bytes, that means we partially copied another
474-
// utf8 character into `partial_utf8`. Since we only care about the
475-
// first character, we just ignore the rest.
476-
let valid_bytes = err.valid_up_to();
477-
if valid_bytes > 0 {
478-
let c = unsafe {
479-
let parsed =
480-
str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
481-
parsed.chars().next().unwrap_unchecked()
482-
};
483-
performer.print(c);
484-
485-
self.partial_utf8_len = 0;
486-
valid_bytes - old_bytes
487-
} else {
488-
// If the character still isn't complete, wait for more data.
489-
bytes.len()
490-
}
491-
},
488+
// If the character still isn't complete, wait for more data.
489+
None => to_copy,
492490
}
493491
},
494492
}
@@ -1231,6 +1229,20 @@ mod tests {
12311229
assert_eq!(dispatcher.dispatched[2], Sequence::Print('b'));
12321230
}
12331231

1232+
#[test]
1233+
fn partial_invalid_utf8_split() {
1234+
const INPUT: &[u8] = b"\xE4\xBF\x99\xB5";
1235+
1236+
let mut dispatcher = Dispatcher::default();
1237+
let mut parser = Parser::new();
1238+
1239+
parser.advance(&mut dispatcher, &INPUT[..2]);
1240+
parser.advance(&mut dispatcher, &INPUT[2..]);
1241+
1242+
assert_eq!(dispatcher.dispatched[0], Sequence::Print('俙'));
1243+
assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
1244+
}
1245+
12341246
#[test]
12351247
fn partial_utf8_into_esc() {
12361248
const INPUT: &[u8] = b"\xD8\x1b012";

0 commit comments

Comments
 (0)