Skip to content

Commit f9e11ce

Browse files
committed
fix: avoid char-boundary panic in NBReader::try_read
1 parent 169ea91 commit f9e11ce

1 file changed

Lines changed: 10 additions & 10 deletions

File tree

src/reader.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,9 @@ impl NBReader {
203203
pub fn try_read(&mut self) -> Option<char> {
204204
// discard eventual errors, EOF will be handled in read_until correctly
205205
let _ = self.read_into_buffer();
206-
if !self.buffer.is_empty() {
207-
self.buffer.drain(..1).last()
208-
} else {
209-
None
210-
}
206+
let first = self.buffer.chars().next()?;
207+
self.buffer.drain(..first.len_utf8());
208+
Some(first)
211209
}
212210
}
213211

@@ -458,18 +456,20 @@ mod tests {
458456
assert_eq!(None, r.try_read());
459457
}
460458

461-
// Multi-byte UTF-8 characters in the buffer cause `drain(..1)` to panic at
462-
// the char boundary. Marked `should_panic` to capture the current behavior;
463-
// the follow-up fix commit removes the attribute and asserts correct decode.
464459
#[test]
465-
#[should_panic = "is_char_boundary"]
466460
fn test_try_read_multibyte() {
467461
let f = io::Cursor::new("\u{c3}");
468462
let mut r = NBReader::new(f, Options::default());
469463
// pump bytes from the reader thread into the buffer
464+
let mut chars = String::new();
470465
for _ in 0..10 {
471-
let _ = r.try_read();
466+
if let Some(c) = r.try_read() {
467+
chars.push(c);
468+
}
472469
thread::sleep(time::Duration::from_millis(5));
473470
}
471+
// Each raw byte is cast to a char before reaching the buffer, so the two
472+
// UTF-8 bytes of "\u{c3}" surface as separate Latin-1 chars.
473+
assert_eq!(chars, "\u{c3}\u{83}");
474474
}
475475
}

0 commit comments

Comments
 (0)