diff --git a/Cargo.toml b/Cargo.toml index 9d4a58c..4ba66aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,7 @@ keywords = ["header", "heap", "vec", "vector", "graph"] categories = ["no-std"] license = "MIT" readme = "README.md" + +[features] +default = ["atomic_append"] +atomic_append = [] diff --git a/src/lib.rs b/src/lib.rs index 56968dd..38abbbd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,21 +3,32 @@ extern crate alloc; use core::{ - cmp, fmt::Debug, - marker::PhantomData, - mem::{self, ManuallyDrop}, + mem::{self, ManuallyDrop, MaybeUninit}, ops::{Deref, DerefMut, Index, IndexMut}, ptr, + ptr::NonNull, slice::SliceIndex, }; +#[cfg(feature = "atomic_append")] +use core::sync::atomic::{AtomicUsize, Ordering}; + struct HeaderVecHeader { head: H, capacity: usize, + #[cfg(feature = "atomic_append")] + len: AtomicUsize, + #[cfg(not(feature = "atomic_append"))] len: usize, } +// This union will be properly aligned and sized to store headers followed by T's. +union AlignedHeader { + _header: ManuallyDrop>, + _data: ManuallyDrop<[T; 0]>, +} + /// A vector with a header of your choosing behind a thin pointer /// /// # Example @@ -41,8 +52,7 @@ struct HeaderVecHeader { /// All of the data, like our header `OurHeaderType { a: 2 }`, the length of the vector: `2`, /// and the contents of the vector `['x', 'z']` resides on the other side of the pointer. pub struct HeaderVec { - ptr: *mut T, - _phantom: PhantomData, + ptr: NonNull>, } impl HeaderVec { @@ -51,21 +61,17 @@ impl HeaderVec { } pub fn with_capacity(capacity: usize, head: H) -> Self { - assert!(capacity > 0, "HeaderVec capacity cannot be 0"); - // Allocate the initial memory, which is unititialized. + // Allocate the initial memory, which is uninitialized. let layout = Self::layout(capacity); - let ptr = unsafe { alloc::alloc::alloc(layout) } as *mut T; + let ptr = unsafe { alloc::alloc::alloc(layout) } as *mut AlignedHeader; - // Handle out-of-memory. - if ptr.is_null() { + let Some(ptr) = NonNull::new(ptr) else { + // Handle out-of-memory. alloc::alloc::handle_alloc_error(layout); - } + }; // Create self. - let mut this = Self { - ptr, - _phantom: PhantomData, - }; + let mut this = Self { ptr }; // Set the header. let header = this.header_mut(); @@ -74,47 +80,107 @@ impl HeaderVec { unsafe { core::ptr::write(&mut header.head, head) }; // These primitive types don't have drop implementations. header.capacity = capacity; - header.len = 0; + header.len = 0usize.into(); this } + /// Get the length of the vector from a mutable reference. When one has a `&mut + /// HeaderVec`, this is the method is always exact and can be slightly faster than the non + /// mutable `len()`. + #[cfg(feature = "atomic_append")] + #[inline(always)] + pub fn len_exact(&mut self) -> usize { + *self.header_mut().len.get_mut() + } + #[cfg(not(feature = "atomic_append"))] + #[inline(always)] + pub fn len_exact(&mut self) -> usize { + self.header_mut().len + } + + /// This gives the length of the `HeaderVec`. This is the non synchronized variant may + /// produce racy results in case another thread atomically appended to + /// `&self`. Nevertheless it is always safe to use. + #[cfg(feature = "atomic_append")] + #[inline(always)] + pub fn len(&self) -> usize { + self.len_atomic_relaxed() + } + #[cfg(not(feature = "atomic_append"))] #[inline(always)] pub fn len(&self) -> usize { self.header().len } + /// This gives the length of the `HeaderVec`. With `atomic_append` enabled this gives a + /// exact result *after* another thread atomically appended to this `HeaderVec`. It still + /// requires synchronization because the length may become invalidated when another thread + /// atomically appends data to this `HeaderVec` while we still work with the result of + /// this method. + #[cfg(not(feature = "atomic_append"))] + #[inline(always)] + pub fn len_strict(&self) -> usize { + self.header().len + } + #[cfg(feature = "atomic_append")] + #[inline(always)] + pub fn len_strict(&self) -> usize { + self.len_atomic_acquire() + } + + /// Check whenever a `HeaderVec` is empty. This uses a `&mut self` reference and is + /// always exact and may be slightly faster than the non mutable variant. + #[inline(always)] + pub fn is_empty_exact(&mut self) -> bool { + self.len_exact() == 0 + } + + /// Check whenever a `HeaderVec` is empty. This uses a `&self` reference and may be racy + /// when another thread atomically appended to this `HeaderVec`. #[inline(always)] pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Check whenever a `HeaderVec` is empty. see [`len_strict()`] about the exactness guarantees. + #[inline(always)] + pub fn is_empty_strict(&self) -> bool { + self.len_strict() == 0 + } + #[inline(always)] pub fn capacity(&self) -> usize { self.header().capacity } + /// This is the amount of elements that can be added to the `HeaderVec` without reallocation. + #[inline(always)] + pub fn spare_capacity(&self) -> usize { + self.header().capacity - self.len_strict() + } + #[inline(always)] pub fn as_slice(&self) -> &[T] { - unsafe { core::slice::from_raw_parts(self.start_ptr(), self.len()) } + unsafe { core::slice::from_raw_parts(self.start_ptr(), self.len_strict()) } } #[inline(always)] pub fn as_mut_slice(&mut self) -> &mut [T] { - unsafe { core::slice::from_raw_parts_mut(self.start_ptr_mut(), self.len()) } + unsafe { core::slice::from_raw_parts_mut(self.start_ptr_mut(), self.len_exact()) } } /// This is useful to check if two nodes are the same. Use it with [`HeaderVec::is`]. #[inline(always)] pub fn ptr(&self) -> *const () { - self.ptr as *const () + self.ptr.as_ptr() as *const () } /// This is used to check if this is the `HeaderVec` that corresponds to the given pointer. /// This is useful for updating weak references after [`HeaderVec::push`] returns the pointer. #[inline(always)] pub fn is(&self, ptr: *const ()) -> bool { - self.ptr as *const () == ptr + self.ptr.as_ptr() as *const () == ptr } /// Create a (dangerous) weak reference to the `HeaderVec`. This is useful to be able @@ -138,10 +204,7 @@ impl HeaderVec { #[inline(always)] pub unsafe fn weak(&self) -> HeaderVecWeak { HeaderVecWeak { - header_vec: ManuallyDrop::new(Self { - ptr: self.ptr, - _phantom: PhantomData, - }), + header_vec: ManuallyDrop::new(Self { ptr: self.ptr }), } } @@ -156,55 +219,126 @@ impl HeaderVec { self.ptr = weak.ptr; } + /// Reserves capacity for at least `additional` more elements to be inserted in the given `HeaderVec`. + #[inline(always)] + pub fn reserve(&mut self, additional: usize) -> Option<*const ()> { + if self.spare_capacity() < additional { + let len = self.len_exact(); + unsafe { self.resize_cold(len.saturating_add(additional), false) } + } else { + None + } + } + + /// Reserves capacity for exactly `additional` more elements to be inserted in the given `HeaderVec`. + #[inline] + pub fn reserve_exact(&mut self, additional: usize) -> Option<*const ()> { + if self.spare_capacity() < additional { + let len = self.len_exact(); + unsafe { self.resize_cold(len.saturating_add(additional), true) } + } else { + None + } + } + + /// Shrinks the capacity of the `HeaderVec` to the `min_capacity` or `self.len()`, whichever is larger. + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) -> Option<*const ()> { + let requested_capacity = self.len_exact().max(min_capacity); + unsafe { self.resize_cold(requested_capacity, true) } + } + + /// Resizes the vector hold exactly `self.len()` elements. + #[inline(always)] + pub fn shrink_to_fit(&mut self) -> Option<*const ()> { + let len = self.len_exact(); + self.shrink_to(len) + } + + /// Resize the vector to least `requested_capacity` elements. + /// Does exact resizing if `exact` is true. + /// + /// Returns `Some(*const ())` if the memory was moved to a new location. + /// + /// # Safety + /// + /// `requested_capacity` must be greater or equal than `self.len()` #[cold] - fn resize_insert(&mut self) -> Option<*const ()> { + unsafe fn resize_cold(&mut self, requested_capacity: usize, exact: bool) -> Option<*const ()> { + // For efficiency we do only a debug_assert here, this is a internal unsafe function + // it's contract should be already enforced by the caller which is under our control + debug_assert!( + self.len_exact() <= requested_capacity, + "requested capacity is less than current length" + ); let old_capacity = self.capacity(); - let new_capacity = old_capacity * 2; - // Set the new capacity. - self.header_mut().capacity = new_capacity; + + let new_capacity = if requested_capacity > old_capacity { + if exact { + // exact growing + requested_capacity + } else if requested_capacity <= old_capacity * 2 { + // doubling the capacity is sufficient + old_capacity * 2 + } else if old_capacity > 0 { + // requested more than twice as much space, reserve the next multiple of + // old_capacity that is greater than the requested capacity. This gives headroom + // for new inserts while not doubling the memory requirement with bulk requests + (requested_capacity / old_capacity + 1).saturating_mul(old_capacity) + } else { + // special case when we start at capacity 0 + requested_capacity + } + } else if exact { + // exact shrinking + requested_capacity + } else { + unimplemented!() + // or: (has no public API yet) + // // shrink to the next power of two or self.capacity, whichever is smaller + // requested_capacity.next_power_of_two().min(self.capacity()) + }; // Reallocate the pointer. let ptr = unsafe { alloc::alloc::realloc( - self.ptr as *mut u8, + self.ptr.as_ptr() as *mut u8, Self::layout(old_capacity), Self::elems_to_mem_bytes(new_capacity), - ) as *mut T + ) as *mut AlignedHeader }; - // Handle out-of-memory. - if ptr.is_null() { + + let Some(ptr) = NonNull::new(ptr) else { + // Handle out-of-memory. alloc::alloc::handle_alloc_error(Self::layout(new_capacity)); - } + }; + // Check if the new pointer is different than the old one. let previous_pointer = if ptr != self.ptr { // Give the user the old pointer so they can update everything. - Some(self.ptr as *const ()) + Some(self.ptr()) } else { None }; // Assign the new pointer. self.ptr = ptr; + // And set the new capacity. + self.header_mut().capacity = new_capacity; previous_pointer } /// Adds an item to the end of the list. /// - /// Returns `true` if the memory was moved to a new location. + /// Returns `Some(*const ())` if the memory was moved to a new location. /// In this case, you are responsible for updating the weak nodes. pub fn push(&mut self, item: T) -> Option<*const ()> { - let old_len = self.len(); + let old_len = self.len_exact(); let new_len = old_len + 1; - let old_capacity = self.capacity(); - // If it isn't big enough. - let previous_pointer = if new_len > old_capacity { - self.resize_insert() - } else { - None - }; + let previous_pointer = self.reserve(1); unsafe { core::ptr::write(self.start_ptr_mut().add(old_len), item); } - self.header_mut().len = new_len; + self.header_mut().len = new_len.into(); previous_pointer } @@ -217,7 +351,7 @@ impl HeaderVec { // This keeps track of the length (and next position) of the contiguous retained elements // at the beginning of the vector. let mut head = 0; - let original_len = self.len(); + let original_len = self.len_exact(); // Get the offset of the beginning of the slice. let start_ptr = self.start_ptr_mut(); // Go through each index. @@ -239,15 +373,51 @@ impl HeaderVec { } } // The head now represents the new length of the vector. - self.header_mut().len = head; + self.header_mut().len = head.into(); + } + + /// Returns the remaining spare capacity of the vector as a slice of + /// `MaybeUninit`. + /// + /// The returned slice can be used to fill the vector with data (e.g. by + /// reading from a file) before marking the data as initialized using the + /// [`set_len`] method. + /// + pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { + unsafe { + core::slice::from_raw_parts_mut( + self.end_ptr_mut() as *mut MaybeUninit, + self.spare_capacity(), + ) + } + } + + /// Forces the length of the headervec to `new_len`. + /// + /// This is a low-level operation that maintains none of the normal + /// invariants of the type. Normally changing the length of a vector + /// is done using one of the safe operations instead. Noteworthy is that + /// this method does not drop any of the elements that are removed when + /// shrinking the vector. + /// + /// # Safety + /// + /// - `new_len` must be less than or equal to [`capacity()`]. + /// - The elements at `old_len..new_len` must be initialized. + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!( + new_len <= self.capacity(), + "new_len is greater than capacity" + ); + self.header_mut().len = new_len.into(); } /// Gives the offset in units of T (as if the pointer started at an array of T) that the slice actually starts at. #[inline(always)] - fn offset() -> usize { + const fn offset() -> usize { // The first location, in units of size_of::(), that is after the header // It's the end of the header, rounded up to the nearest size_of::() - (mem::size_of::>() + mem::size_of::() - 1) / mem::size_of::() + (mem::size_of::>() - 1) / mem::size_of::() + 1 } /// Compute the number of elements (in units of T) to allocate for a given capacity. @@ -267,7 +437,7 @@ impl HeaderVec { fn layout(capacity: usize) -> alloc::alloc::Layout { alloc::alloc::Layout::from_size_align( Self::elems_to_mem_bytes(capacity), - cmp::max(mem::align_of::(), mem::align_of::()), + mem::align_of::>() ) .expect("unable to produce memory layout with Hrc key type (is it a zero sized type? they are not permitted)") } @@ -275,25 +445,170 @@ impl HeaderVec { /// Gets the pointer to the start of the slice. #[inline(always)] fn start_ptr(&self) -> *const T { - unsafe { self.ptr.add(Self::offset()) } + unsafe { (self.ptr.as_ptr() as *const T).add(Self::offset()) } } /// Gets the pointer to the start of the slice. #[inline(always)] fn start_ptr_mut(&mut self) -> *mut T { - unsafe { self.ptr.add(Self::offset()) } + unsafe { (self.ptr.as_ptr() as *mut T).add(Self::offset()) } + } + + /// Gets the pointer to the end of the slice. This returns a mutable pointer to + /// uninitialized memory behind the last element. + #[inline(always)] + fn end_ptr_mut(&mut self) -> *mut T { + unsafe { self.start_ptr_mut().add(self.len_exact()) } } #[inline(always)] fn header(&self) -> &HeaderVecHeader { // The beginning of the memory is always the header. - unsafe { &*(self.ptr as *const HeaderVecHeader) } + unsafe { &*(self.ptr.as_ptr() as *const HeaderVecHeader) } } #[inline(always)] fn header_mut(&mut self) -> &mut HeaderVecHeader { // The beginning of the memory is always the header. - unsafe { &mut *(self.ptr as *mut HeaderVecHeader) } + unsafe { &mut *(self.ptr.as_ptr() as *mut HeaderVecHeader) } + } +} + +impl HeaderVec { + /// Adds items from a slice to the end of the list. + /// + /// Returns `Some(*const ())` if the memory was moved to a new location. + /// In this case, you are responsible for updating the weak nodes. + pub fn extend_from_slice(&mut self, slice: &[T]) -> Option<*const ()> { + let previous_pointer = self.reserve(slice.len()); + + // copy data + let end_ptr = self.end_ptr_mut(); + for (index, item) in slice.iter().enumerate() { + unsafe { + core::ptr::write(end_ptr.add(index), item.clone()); + } + } + // correct the len + self.header_mut().len = (self.len_exact() + slice.len()).into(); + + previous_pointer + } +} + +#[cfg(feature = "atomic_append")] +/// The atomic append API is only enabled when the `atomic_append` feature flag is set (which +/// is the default). The [`push_atomic()`] or [`extend_from_slice_atomic()`] methods then +/// become available and some internals using atomic operations. +/// +/// This API implements interior-mutable appending to a shared `HeaderVec`. To other threads +/// the appended elements are either not seen or all seen at once. Without additional +/// synchronization these appends are racy but memory safe. The intention behind this API is to +/// provide facilities for building other container abstractions the benefit from the shared +/// non blocking nature while being unaffected from the racy semantics or provide synchronization +/// on their own (Eg: reference counted data, interners, streaming parsers, etc). Since the +/// `HeaderVec` is a shared object and we have only a `&self`, it can not be reallocated and moved, +/// therefore appending can only be done within the reserved capacity. +/// +/// # Safety +/// +/// Only one single thread must try to [`push_atomic()`] or [`extend_from_slice_atomic()`] the +/// `HeaderVec` at at time using the atomic append API's. The actual implementations of this +/// restriction is left to the caller. This can be done by mutexes or guard objects. Or +/// simply by staying single threaded or ensuring somehow else that there is only a single +/// thread using the atomic_appending API. +impl HeaderVec { + /// Atomically adds an item to the end of the list without reallocation. + /// + /// # Errors + /// + /// If the vector is full, the item is returned. + /// + /// # Safety + /// + /// There must be only one thread calling this method at any time. Synchronization has to + /// be provided by the user. + pub unsafe fn push_atomic(&self, item: T) -> Result<(), T> { + // relaxed is good enough here because this should be the only thread calling this method. + let len = self.len_atomic_relaxed(); + if len < self.capacity() { + unsafe { + core::ptr::write(self.end_ptr_atomic_mut(), item); + }; + let len_again = self.len_atomic_add_release(1); + // in debug builds we check for races, the chance to catch these are still pretty minimal + debug_assert_eq!(len_again, len, "len was updated by another thread"); + Ok(()) + } else { + Err(item) + } + } + + /// Get the length of the vector with `Ordering::Acquire`. This ensures that the length is + /// properly synchronized after it got atomically updated. + #[inline(always)] + fn len_atomic_acquire(&self) -> usize { + self.header().len.load(Ordering::Acquire) + } + + /// Get the length of the vector with `Ordering::Relaxed`. This is useful for when you don't + /// need exact synchronization semantic. + #[inline(always)] + fn len_atomic_relaxed(&self) -> usize { + self.header().len.load(Ordering::Relaxed) + } + + /// Add `n` to the length of the vector atomically with `Ordering::Release`. + /// + /// # Safety + /// + /// Before incrementing the length of the vector, you must ensure that new elements are + /// properly initialized. + #[inline(always)] + unsafe fn len_atomic_add_release(&self, n: usize) -> usize { + self.header().len.fetch_add(n, Ordering::Release) + } + + /// Gets the pointer to the end of the slice. This returns a mutable pointer to + /// uninitialized memory behind the last element. + #[inline(always)] + fn end_ptr_atomic_mut(&self) -> *mut T { + unsafe { self.start_ptr().add(self.len_atomic_acquire()) as *mut T } + } +} + +#[cfg(feature = "atomic_append")] +impl HeaderVec { + /// Atomically add items from a slice to the end of the list. without reallocation + /// + /// # Errors + /// + /// If the vector is full, the item is returned. + /// + /// # Safety + /// + /// There must be only one thread calling this method at any time. Synchronization has to + /// be provided by the user. + pub unsafe fn extend_from_slice_atomic<'a>(&self, slice: &'a [T]) -> Result<(), &'a [T]> { + #[cfg(debug_assertions)] // only for the race check later + let len = self.len_atomic_relaxed(); + if self.spare_capacity() >= slice.len() { + // copy data + let end_ptr = self.end_ptr_atomic_mut(); + for (index, item) in slice.iter().enumerate() { + unsafe { + core::ptr::write(end_ptr.add(index), item.clone()); + } + } + // correct the len + let len_again = self.len_atomic_add_release(slice.len()); + // in debug builds we check for races, the chance to catch these are still pretty minimal + #[cfg(debug_assertions)] + debug_assert_eq!(len_again, len, "len was updated by another thread"); + Ok(()) + } else { + Err(slice) + } } } @@ -301,10 +616,10 @@ impl Drop for HeaderVec { fn drop(&mut self) { unsafe { ptr::drop_in_place(&mut self.header_mut().head); - for ix in 0..self.len() { + for ix in 0..self.len_exact() { ptr::drop_in_place(self.start_ptr_mut().add(ix)); } - alloc::alloc::dealloc(self.ptr as *mut u8, Self::layout(self.capacity())); + alloc::alloc::dealloc(self.ptr.as_ptr() as *mut u8, Self::layout(self.capacity())); } } } @@ -363,7 +678,7 @@ where T: Clone, { fn clone(&self) -> Self { - let mut new_vec = Self::with_capacity(self.len(), self.header().head.clone()); + let mut new_vec = Self::with_capacity(self.len_strict(), self.header().head.clone()); for e in self.as_slice() { new_vec.push(e.clone()); } diff --git a/tests/atomic_append.rs b/tests/atomic_append.rs new file mode 100644 index 0000000..42f5535 --- /dev/null +++ b/tests/atomic_append.rs @@ -0,0 +1,27 @@ +#![cfg(feature = "atomic_append")] +extern crate std; + +use header_vec::*; + +#[test] +fn test_atomic_append() { + let mut hv = HeaderVec::with_capacity(10, ()); + + hv.push(1); + unsafe { hv.push_atomic(2).unwrap() }; + hv.push(3); + + assert_eq!(hv.len(), 3); + assert_eq!(hv.as_slice(), [1, 2, 3]); +} + +#[test] +fn test_extend_from_slice() { + let hv = HeaderVec::with_capacity(6, ()); + + unsafe { + hv.extend_from_slice_atomic(&[0, 1, 2]).unwrap(); + hv.extend_from_slice_atomic(&[3, 4, 5]).unwrap(); + } + assert_eq!(hv.as_slice(), &[0, 1, 2, 3, 4, 5]); +} diff --git a/tests/simple.rs b/tests/simple.rs index 2b42d85..092f498 100644 --- a/tests/simple.rs +++ b/tests/simple.rs @@ -11,6 +11,31 @@ struct TestA { c: usize, } +#[test] +fn test_sizeof() { + // assert that HeaderVec is really a single lean pointer + assert_eq!( + core::mem::size_of::>(), + core::mem::size_of::<*mut ()>() + ); + // and has space for niche optimization + assert_eq!( + core::mem::size_of::>(), + core::mem::size_of::>>() + ); +} + +#[test] +fn test_empty() { + let mut v_empty = HeaderVec::with_capacity(0, TestA { a: 4, b: !0, c: 66 }); + + assert_eq!(0, v_empty.len()); + assert_eq!(0, v_empty.capacity()); + assert_eq!(0, v_empty.as_slice().len()); + + v_empty.extend_from_slice("the quick brown fox jumps over the lazy dog".as_bytes()); +} + #[test] fn test_head_array() { let mut v_orig = HeaderVec::new(TestA { a: 4, b: !0, c: 66 }); @@ -44,3 +69,21 @@ fn test_head_array() { v_orig.as_slice().iter().copied().collect::() ); } + +// This shown a miri error +#[test] +fn test_push() { + let mut hv = HeaderVec::with_capacity(10, ()); + + hv.push(123); + assert_eq!(hv[0], 123); +} + +#[test] +fn test_extend_from_slice() { + let mut hv = HeaderVec::new(()); + + hv.extend_from_slice(&[0, 1, 2]); + hv.extend_from_slice(&[3, 4, 5]); + assert_eq!(hv.as_slice(), &[0, 1, 2, 3, 4, 5]); +}