@@ -22,20 +22,14 @@ impl DeserializeRevisioned for String {
2222 if len == 0 {
2323 return Ok ( String :: new ( ) ) ;
2424 }
25- let mut buf: Vec < u8 > = Vec :: with_capacity ( len) ;
26- // SAFETY: `Vec::with_capacity(len)` guarantees capacity `>= len`, so
27- // `from_raw_parts_mut(ptr, len)` yields a valid exclusive slice of
28- // `len` (currently uninitialised) bytes. `read_exact` either fully
29- // initialises the slice and returns `Ok`, in which case we commit
30- // the length via `set_len`, or returns `Err`, in which case `?`
31- // returns before `set_len` and `buf` is dropped with `len = 0`,
32- // so no uninitialised memory is ever observed. `String::from_utf8`
33- // then enforces UTF-8 validity before producing a `String`.
34- unsafe {
35- let slice = std:: slice:: from_raw_parts_mut ( buf. as_mut_ptr ( ) , len) ;
36- reader. read_exact ( slice) . map_err ( Error :: Io ) ?;
37- buf. set_len ( len) ;
38- }
25+ // Zero-initialise before handing the buffer to `read_exact`. Passing an
26+ // uninitialised slice to `Read::read` is explicitly documented as UB,
27+ // and constructing a `&mut [u8]` over uninitialised memory would itself
28+ // be UB per `slice::from_raw_parts_mut`'s initialisation requirement.
29+ // The memset is negligible compared to the pending I/O and matches the
30+ // pattern used in `uuid.rs` and the numeric specialised Vec impls.
31+ let mut buf = vec ! [ 0u8 ; len] ;
32+ reader. read_exact ( & mut buf) . map_err ( Error :: Io ) ?;
3933 String :: from_utf8 ( buf) . map_err ( |x| Error :: Utf8Error ( x. utf8_error ( ) ) )
4034 }
4135}
@@ -160,6 +154,42 @@ mod tests {
160154 assert_bincode_compat ( & '𐃌' ) ;
161155 }
162156
157+ #[ test]
158+ fn str_and_string_serialize_identically ( ) {
159+ // `str` and `String` must produce byte-for-byte identical output
160+ // so that a `String` can round-trip values originally serialised
161+ // from a `&str` (and vice versa). Cover empty, ASCII, multi-byte
162+ // UTF-8, embedded NULs, and a longer payload that exercises the
163+ // length-prefix encoding beyond a single byte.
164+ let cases: & [ & str ] = & [
165+ "" ,
166+ "a" ,
167+ "this is a test" ,
168+ "unicode: 🚀🔥✨" ,
169+ "with\0 embedded\0 nuls" ,
170+ & "x" . repeat ( 300 ) ,
171+ ] ;
172+ for & s in cases {
173+ let mut from_str: Vec < u8 > = Vec :: new ( ) ;
174+ <str as SerializeRevisioned >:: serialize_revisioned ( s, & mut from_str) . unwrap ( ) ;
175+
176+ let owned = s. to_owned ( ) ;
177+ let mut from_string: Vec < u8 > = Vec :: new ( ) ;
178+ owned. serialize_revisioned ( & mut from_string) . unwrap ( ) ;
179+
180+ assert_eq ! (
181+ from_str, from_string,
182+ "str and String serialisation diverged for input {:?}" ,
183+ s
184+ ) ;
185+
186+ let out =
187+ <String as DeserializeRevisioned >:: deserialize_revisioned ( & mut from_str. as_slice ( ) )
188+ . unwrap ( ) ;
189+ assert_eq ! ( out, owned) ;
190+ }
191+ }
192+
163193 #[ test]
164194 fn bincode_compat_string ( ) {
165195 assert_bincode_compat ( & char:: MAX . to_string ( ) ) ;
0 commit comments