@@ -460,6 +460,22 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
460
460
c. len_utf8 ( ) - old_bytes
461
461
} ,
462
462
Err ( err) => {
463
+ let valid_bytes = err. valid_up_to ( ) ;
464
+ // If we have any valid bytes, that means we partially copied another
465
+ // utf8 character into `partial_utf8`. Since we only care about the
466
+ // first character, we just ignore the rest.
467
+ if valid_bytes > 0 {
468
+ let c = unsafe {
469
+ let parsed = str:: from_utf8_unchecked ( & self . partial_utf8 [ ..valid_bytes] ) ;
470
+ parsed. chars ( ) . next ( ) . unwrap_unchecked ( )
471
+ } ;
472
+
473
+ performer. print ( c) ;
474
+
475
+ self . partial_utf8_len = 0 ;
476
+ return valid_bytes - old_bytes;
477
+ }
478
+
463
479
match err. error_len ( ) {
464
480
// If the partial character was also invalid, emit the replacement
465
481
// character.
@@ -469,26 +485,8 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
469
485
self . partial_utf8_len = 0 ;
470
486
invalid_len - old_bytes
471
487
} ,
472
- None => {
473
- // If we have any valid bytes, that means we partially copied another
474
- // utf8 character into `partial_utf8`. Since we only care about the
475
- // first character, we just ignore the rest.
476
- let valid_bytes = err. valid_up_to ( ) ;
477
- if valid_bytes > 0 {
478
- let c = unsafe {
479
- let parsed =
480
- str:: from_utf8_unchecked ( & self . partial_utf8 [ ..valid_bytes] ) ;
481
- parsed. chars ( ) . next ( ) . unwrap_unchecked ( )
482
- } ;
483
- performer. print ( c) ;
484
-
485
- self . partial_utf8_len = 0 ;
486
- valid_bytes - old_bytes
487
- } else {
488
- // If the character still isn't complete, wait for more data.
489
- bytes. len ( )
490
- }
491
- } ,
488
+ // If the character still isn't complete, wait for more data.
489
+ None => to_copy,
492
490
}
493
491
} ,
494
492
}
@@ -1231,6 +1229,20 @@ mod tests {
1231
1229
assert_eq ! ( dispatcher. dispatched[ 2 ] , Sequence :: Print ( 'b' ) ) ;
1232
1230
}
1233
1231
1232
+ #[ test]
1233
+ fn partial_invalid_utf8_split ( ) {
1234
+ const INPUT : & [ u8 ] = b"\xE4 \xBF \x99 \xB5 " ;
1235
+
1236
+ let mut dispatcher = Dispatcher :: default ( ) ;
1237
+ let mut parser = Parser :: new ( ) ;
1238
+
1239
+ parser. advance ( & mut dispatcher, & INPUT [ ..2 ] ) ;
1240
+ parser. advance ( & mut dispatcher, & INPUT [ 2 ..] ) ;
1241
+
1242
+ assert_eq ! ( dispatcher. dispatched[ 0 ] , Sequence :: Print ( '俙' ) ) ;
1243
+ assert_eq ! ( dispatcher. dispatched[ 1 ] , Sequence :: Print ( '�' ) ) ;
1244
+ }
1245
+
1234
1246
#[ test]
1235
1247
fn partial_utf8_into_esc ( ) {
1236
1248
const INPUT : & [ u8 ] = b"\xD8 \x1b 012" ;
0 commit comments