Skip to content

Commit 20be253

Browse files
committed
Address feedback
1 parent ab1d142 commit 20be253

1 file changed

Lines changed: 44 additions & 14 deletions

File tree

src/implementations/string.rs

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,14 @@ impl DeserializeRevisioned for String {
2222
if len == 0 {
2323
return Ok(String::new());
2424
}
25-
let mut buf: Vec<u8> = Vec::with_capacity(len);
26-
// SAFETY: `Vec::with_capacity(len)` guarantees capacity `>= len`, so
27-
// `from_raw_parts_mut(ptr, len)` yields a valid exclusive slice of
28-
// `len` (currently uninitialised) bytes. `read_exact` either fully
29-
// initialises the slice and returns `Ok`, in which case we commit
30-
// the length via `set_len`, or returns `Err`, in which case `?`
31-
// returns before `set_len` and `buf` is dropped with `len = 0`,
32-
// so no uninitialised memory is ever observed. `String::from_utf8`
33-
// then enforces UTF-8 validity before producing a `String`.
34-
unsafe {
35-
let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), len);
36-
reader.read_exact(slice).map_err(Error::Io)?;
37-
buf.set_len(len);
38-
}
25+
// Zero-initialise before handing the buffer to `read_exact`. Passing an
26+
// uninitialised slice to `Read::read` is explicitly documented as UB,
27+
// and constructing a `&mut [u8]` over uninitialised memory would itself
28+
// be UB per `slice::from_raw_parts_mut`'s initialisation requirement.
29+
// The memset is negligible compared to the pending I/O and matches the
30+
// pattern used in `uuid.rs` and the numeric specialised Vec impls.
31+
let mut buf = vec![0u8; len];
32+
reader.read_exact(&mut buf).map_err(Error::Io)?;
3933
String::from_utf8(buf).map_err(|x| Error::Utf8Error(x.utf8_error()))
4034
}
4135
}
@@ -160,6 +154,42 @@ mod tests {
160154
assert_bincode_compat(&'𐃌');
161155
}
162156

157+
#[test]
158+
fn str_and_string_serialize_identically() {
159+
// `str` and `String` must produce byte-for-byte identical output
160+
// so that a `String` can round-trip values originally serialised
161+
// from a `&str` (and vice versa). Cover empty, ASCII, multi-byte
162+
// UTF-8, embedded NULs, and a longer payload that exercises the
163+
// length-prefix encoding beyond a single byte.
164+
let cases: &[&str] = &[
165+
"",
166+
"a",
167+
"this is a test",
168+
"unicode: 🚀🔥✨",
169+
"with\0embedded\0nuls",
170+
&"x".repeat(300),
171+
];
172+
for &s in cases {
173+
let mut from_str: Vec<u8> = Vec::new();
174+
<str as SerializeRevisioned>::serialize_revisioned(s, &mut from_str).unwrap();
175+
176+
let owned = s.to_owned();
177+
let mut from_string: Vec<u8> = Vec::new();
178+
owned.serialize_revisioned(&mut from_string).unwrap();
179+
180+
assert_eq!(
181+
from_str, from_string,
182+
"str and String serialisation diverged for input {:?}",
183+
s
184+
);
185+
186+
let out =
187+
<String as DeserializeRevisioned>::deserialize_revisioned(&mut from_str.as_slice())
188+
.unwrap();
189+
assert_eq!(out, owned);
190+
}
191+
}
192+
163193
#[test]
164194
fn bincode_compat_string() {
165195
assert_bincode_compat(&char::MAX.to_string());

0 commit comments

Comments
 (0)