Skip to content

Commit 8836cd3

Browse files
committed
Optimise String deserialisation.
1 parent 5c29a2f commit 8836cd3

1 file changed

Lines changed: 23 additions & 2 deletions

File tree

src/implementations/string.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,31 @@ impl SerializeRevisioned for String {
1212
}
1313

1414
impl DeserializeRevisioned for String {
15+
/// Reads the length-prefixed byte payload in a single bulk `read_exact`
16+
/// and validates it as UTF-8 in place, avoiding both the per-byte fallback
17+
/// when `specialised-vectors` is disabled and the `Take::read_to_end`
18+
/// overhead of the `Vec<u8>` specialised path.
1519
#[inline]
1620
fn deserialize_revisioned<R: std::io::Read>(reader: &mut R) -> Result<Self, Error> {
17-
let bytes = Vec::<u8>::deserialize_revisioned(reader)?;
18-
String::from_utf8(bytes).map_err(|x| Error::Utf8Error(x.utf8_error()))
21+
let len = usize::deserialize_revisioned(reader)?;
22+
if len == 0 {
23+
return Ok(String::new());
24+
}
25+
let mut buf: Vec<u8> = Vec::with_capacity(len);
26+
// SAFETY: `Vec::with_capacity(len)` guarantees capacity `>= len`, so
27+
// `from_raw_parts_mut(ptr, len)` yields a valid exclusive slice of
28+
// `len` (currently uninitialised) bytes. `read_exact` either fully
29+
// initialises the slice and returns `Ok`, in which case we commit
30+
// the length via `set_len`, or returns `Err`, in which case `?`
31+
// returns before `set_len` and `buf` is dropped with `len = 0`,
32+
// so no uninitialised memory is ever observed. `String::from_utf8`
33+
// then enforces UTF-8 validity before producing a `String`.
34+
unsafe {
35+
let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), len);
36+
reader.read_exact(slice).map_err(Error::Io)?;
37+
buf.set_len(len);
38+
}
39+
String::from_utf8(buf).map_err(|x| Error::Utf8Error(x.utf8_error()))
1940
}
2041
}
2142

0 commit comments

Comments
 (0)