Skip to content

Commit 47747d5

Browse files
committed
Add a family of builder methods to create an ArcStr using a callback
1 parent 9abe831 commit 47747d5

File tree

1 file changed

+188
-11
lines changed

1 file changed

+188
-11
lines changed

Diff for: src/arc_str.rs

+188-11
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
clippy::redundant_slicing,
88
)]
99
use core::alloc::Layout;
10-
use core::mem::{align_of, size_of};
10+
use core::mem::{align_of, size_of, MaybeUninit};
1111
use core::ptr::NonNull;
1212
#[cfg(not(all(loom, test)))]
1313
pub(crate) use core::sync::atomic::{AtomicUsize, Ordering};
@@ -167,6 +167,136 @@ impl ArcStr {
167167
}
168168
}
169169

170+
/// Attempt to allocate memory for an [`ArcStr`] of length `n`, and use the
171+
/// provided callback to fully initialize the provided buffer with valid
172+
/// UTF-8 text.
173+
///
174+
/// This function returns `None` if memory allocation fails, see
175+
/// [`ArcStr::build_unchecked`] for a version which calls
176+
/// [`handle_alloc_error`](alloc::alloc::handle_alloc_error).
177+
///
178+
/// # Safety
179+
/// The provided `initializer` callback must fully initialize the provided
180+
/// buffer with valid UTF-8 text.
181+
///
182+
/// # Examples
183+
///
184+
/// ```
185+
/// # use arcstr::ArcStr;
186+
/// # use core::mem::MaybeUninit;
187+
/// let arcstr = unsafe {
188+
/// ArcStr::try_build_unchecked(10, |s: &mut [MaybeUninit<u8>]| {
189+
/// s.fill(MaybeUninit::new(b'a'));
190+
/// }).unwrap()
191+
/// };
192+
/// assert_eq!(arcstr, "aaaaaaaaaa")
193+
/// ```
194+
#[inline]
195+
pub unsafe fn try_build_unchecked<F>(n: usize, initializer: F) -> Option<Self>
196+
where
197+
F: FnOnce(&mut [MaybeUninit<u8>]),
198+
{
199+
if let Ok(inner) = ThinInner::try_allocate_with(n, false, AllocInit::Uninit, initializer) {
200+
Some(Self(inner))
201+
} else {
202+
None
203+
}
204+
}
205+
206+
/// Allocate memory for an [`ArcStr`] of length `n`, and use the provided
207+
/// callback to fully initialize the provided buffer with valid UTF-8 text.
208+
///
209+
/// This function calls
210+
/// [`handle_alloc_error`](alloc::alloc::handle_alloc_error) if memory
211+
/// allocation fails, see [`ArcStr::try_build_unchecked`] for a version
212+
/// which returns `None`
213+
///
214+
/// # Safety
215+
/// The provided `initializer` callback must fully initialize the provided
216+
/// buffer with valid UTF-8 text.
217+
///
218+
/// # Examples
219+
///
220+
/// ```
221+
/// # use arcstr::ArcStr;
222+
/// # use core::mem::MaybeUninit;
223+
/// let arcstr = unsafe {
224+
/// ArcStr::build_unchecked(10, |s: &mut [MaybeUninit<u8>]| {
225+
/// s.fill(MaybeUninit::new(b'a'));
226+
/// })
227+
/// };
228+
/// assert_eq!(arcstr, "aaaaaaaaaa")
229+
/// ```
230+
#[inline]
231+
pub unsafe fn build_unchecked<F>(n: usize, initializer: F) -> Self
232+
where
233+
F: FnOnce(&mut [MaybeUninit<u8>]),
234+
{
235+
match ThinInner::try_allocate_with(n, false, AllocInit::Uninit, initializer) {
236+
Ok(inner) => Self(inner),
237+
Err(None) => panic!("capacity overflow"),
238+
Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout),
239+
}
240+
}
241+
242+
/// Attempt to allocate memory for an [`ArcStr`] of length `n`, and use the
243+
/// provided callback to initialize the provided (initially-zeroed) buffer
244+
/// with valid UTF-8 text.
245+
///
246+
/// Note: This function is provided with a zeroed buffer, and performs UTF-8
247+
/// validation after calling the initializer. While both of these are fast
248+
/// operations, some high-performance use cases will be better off using
249+
/// [`ArcStr::try_build_unchecked`] as the building block.
250+
///
251+
/// # Errors
252+
/// The provided `initializer` callback must initialize the provided buffer
253+
/// with valid UTF-8 text, or a UTF-8 error will be returned.
254+
///
255+
/// # Examples
256+
///
257+
/// ```
258+
/// # use arcstr::ArcStr;
259+
///
260+
/// let s = ArcStr::build(5, |slice| {
261+
/// slice
262+
/// .iter_mut()
263+
/// .zip(b'0'..b'5')
264+
/// .for_each(|(db, sb)| *db = sb);
265+
/// }).unwrap();
266+
/// assert_eq!(s, "01234");
267+
/// ```
268+
#[inline]
269+
pub fn build<F>(n: usize, initializer: F) -> Result<Self, core::str::Utf8Error>
270+
where
271+
F: FnOnce(&mut [u8]),
272+
{
273+
let mut failed = None::<core::str::Utf8Error>;
274+
let wrapper = |zeroed_slice: &mut [MaybeUninit<u8>]| {
275+
debug_assert_eq!(n, zeroed_slice.len());
276+
// Safety: we pass `AllocInit::Zero`, so this is actually initialized
277+
let slice = unsafe {
278+
core::slice::from_raw_parts_mut(zeroed_slice.as_mut_ptr().cast::<u8>(), n)
279+
};
280+
initializer(slice);
281+
if let Err(e) = core::str::from_utf8(slice) {
282+
failed = Some(e);
283+
}
284+
};
285+
match unsafe { ThinInner::try_allocate_with(n, false, AllocInit::Zero, wrapper) } {
286+
Ok(inner) => {
287+
// Ensure we clean up the allocation even on error.
288+
let this = Self(inner);
289+
if let Some(e) = failed {
290+
Err(e)
291+
} else {
292+
Ok(this)
293+
}
294+
}
295+
Err(None) => panic!("capacity overflow"),
296+
Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout),
297+
}
298+
}
299+
170300
/// Extract a string slice containing our data.
171301
///
172302
/// Note: This is an equivalent to our `Deref` implementation, but can be
@@ -751,7 +881,8 @@ impl ArcStr {
751881
///
752882
/// # Errors
753883
///
754-
/// This function returns an error if the capacity overflows
884+
/// This function returns an error if the capacity overflows or allocation
885+
/// fails.
755886
///
756887
/// # Examples
757888
///
@@ -771,7 +902,8 @@ impl ArcStr {
771902

772903
// Calculate the capacity for the allocated string
773904
let capacity = source.len().checked_mul(n)?;
774-
let inner = ThinInner::try_allocate_uninit(capacity, false).ok()?;
905+
let inner =
906+
ThinInner::try_allocate_maybe_uninit(capacity, false, AllocInit::Uninit).ok()?;
775907

776908
unsafe {
777909
let mut data_ptr = ThinInner::data_ptr(inner);
@@ -791,7 +923,8 @@ impl ArcStr {
791923
///
792924
/// # Panics
793925
///
794-
/// This function panics if the capacity overflows
926+
/// This function panics if the capacity overflows, see
927+
/// [`try_repeat`](ArcStr::try_repeat) if this is undesirable.
795928
///
796929
/// # Examples
797930
///
@@ -1038,13 +1171,15 @@ impl ThinInner {
10381171
unsafe { this.as_ptr().cast::<u8>().add(OFFSET_DATA) }
10391172
}
10401173

1041-
/// Allocates a `ThinInner` where the data segment is uninitialized
1174+
/// Allocates a `ThinInner` where the data segment is uninitialized or
1175+
/// zeroed.
10421176
///
10431177
/// Returns `Err(Some(layout))` if we failed to allocate that layout, and
10441178
/// `Err(None)` for integer overflow when computing layout
1045-
fn try_allocate_uninit(
1179+
fn try_allocate_maybe_uninit(
10461180
capacity: usize,
10471181
initially_static: bool,
1182+
init_how: AllocInit,
10481183
) -> Result<NonNull<Self>, Option<Layout>> {
10491184
const ALIGN: usize = align_of::<ThinInner>();
10501185

@@ -1055,7 +1190,10 @@ impl ThinInner {
10551190

10561191
debug_assert!(Layout::from_size_align(capacity + OFFSET_DATA, ALIGN).is_ok());
10571192
let layout = unsafe { Layout::from_size_align_unchecked(capacity + OFFSET_DATA, ALIGN) };
1058-
let ptr = unsafe { alloc::alloc::alloc(layout) as *mut ThinInner };
1193+
let ptr = match init_how {
1194+
AllocInit::Uninit => unsafe { alloc::alloc::alloc(layout) as *mut ThinInner },
1195+
AllocInit::Zero => unsafe { alloc::alloc::alloc_zeroed(layout) as *mut ThinInner },
1196+
};
10591197
if ptr.is_null() {
10601198
return Err(Some(layout));
10611199
}
@@ -1087,14 +1225,47 @@ impl ThinInner {
10871225
// `Err(None)` for integer overflow when computing layout.
10881226
#[inline]
10891227
fn try_allocate(data: &str, initially_static: bool) -> Result<NonNull<Self>, Option<Layout>> {
1228+
// Safety: we initialize the whole buffer by copying `data` into it.
1229+
unsafe {
1230+
// Allocate a enough space to hold the given string
1231+
Self::try_allocate_with(
1232+
data.len(),
1233+
initially_static,
1234+
AllocInit::Uninit,
1235+
// Copy the given string into the allocation
1236+
|uninit_slice| {
1237+
debug_assert_eq!(uninit_slice.len(), data.len());
1238+
core::ptr::copy_nonoverlapping(
1239+
data.as_ptr(),
1240+
uninit_slice.as_mut_ptr().cast::<u8>(),
1241+
data.len(),
1242+
)
1243+
},
1244+
)
1245+
}
1246+
}
1247+
1248+
/// Safety: caller must fully initialize the provided buffer with valid
1249+
/// UTF-8 in the `initializer` function (well, you at least need to handle
1250+
/// it before giving it back to the user).
1251+
#[inline]
1252+
unsafe fn try_allocate_with(
1253+
len: usize,
1254+
initially_static: bool,
1255+
init_style: AllocInit,
1256+
initializer: impl FnOnce(&mut [core::mem::MaybeUninit<u8>]),
1257+
) -> Result<NonNull<Self>, Option<Layout>> {
10901258
// Allocate a enough space to hold the given string
1091-
let uninit = Self::try_allocate_uninit(data.len(), initially_static)?;
1259+
let this = Self::try_allocate_maybe_uninit(len, initially_static, init_style)?;
10921260

1093-
// Copy the given string into the allocation
1094-
unsafe { core::ptr::copy_nonoverlapping(data.as_ptr(), Self::data_ptr(uninit), data.len()) }
1261+
initializer(core::slice::from_raw_parts_mut(
1262+
Self::data_ptr(this).cast::<MaybeUninit<u8>>(),
1263+
len,
1264+
));
10951265

1096-
Ok(uninit)
1266+
Ok(this)
10971267
}
1268+
10981269
#[inline]
10991270
unsafe fn get_len_flag(p: *const ThinInner) -> PackedFlagUint {
11001271
debug_assert_eq!(OFFSET_LENFLAGS, 0);
@@ -1115,6 +1286,12 @@ impl ThinInner {
11151286
}
11161287
}
11171288

1289+
#[derive(Clone, Copy, PartialEq)]
1290+
enum AllocInit {
1291+
Uninit,
1292+
Zero,
1293+
}
1294+
11181295
#[inline(never)]
11191296
#[cold]
11201297
fn alloc_overflow() -> ! {

0 commit comments

Comments
 (0)