Skip to content

Commit c001df0

Browse files
committed
Make the rodeo generic over what it interns
1 parent 85aacda commit c001df0

22 files changed

+1312
-839
lines changed

examples/non_string_keys.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
use lasso::Rodeo;
2+
3+
fn main() {
4+
// Create a rodeo that interns Vec<i32> instead of String
5+
let mut rodeo: Rodeo<Vec<i32>> = Rodeo::new();
6+
7+
// Intern some integer sequences
8+
let a = rodeo.get_or_intern(vec![1, 2, 3]);
9+
let b = rodeo.get_or_intern(vec![4, 5, 6, 7, 8]);
10+
11+
// Interning the same value returns the same key
12+
let a2 = rodeo.get_or_intern(vec![1, 2, 3]);
13+
assert_eq!(a, a2);
14+
15+
// Resolve keys back to values
16+
assert_eq!(rodeo.resolve(&a), &[1, 2, 3]);
17+
assert_eq!(rodeo.resolve(&b), &[4, 5, 6, 7, 8]);
18+
19+
// Lookup by value
20+
assert_eq!(rodeo.get([1, 2, 3].as_slice()), Some(a));
21+
assert_eq!(rodeo.get([7, 8, 9].as_slice()), None);
22+
23+
println!("Interned {} sequences", rodeo.len());
24+
}

src/arenas/atomic_bucket.rs

Lines changed: 78 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,34 @@
1-
use crate::{LassoError, LassoErrorKind, LassoResult};
1+
use crate::{
2+
rodeo::{Internable, InternableRef},
3+
LassoError, LassoErrorKind, LassoResult,
4+
};
25
use alloc::alloc::{alloc, dealloc, Layout};
36
use core::{
47
hint,
5-
mem::{align_of, size_of},
8+
marker::PhantomData,
9+
mem::size_of,
610
num::NonZeroUsize,
711
ptr::{self, addr_of_mut, NonNull},
8-
slice,
912
sync::atomic::{AtomicPtr, AtomicUsize, Ordering},
1013
};
1114

12-
pub(super) struct AtomicBucketList {
15+
pub(super) struct AtomicBucketList<T: Internable> {
1316
/// The first bucket in the list, will be null if the list currently
1417
/// has no buckets
15-
head: AtomicPtr<AtomicBucket>,
18+
head: AtomicPtr<AtomicBucket<T>>,
1619
}
1720

18-
impl AtomicBucketList {
21+
impl<T: Internable> AtomicBucketList<T> {
1922
/// Create a new bucket list
2023
pub fn new(first_bucket_capacity: NonZeroUsize) -> LassoResult<Self> {
21-
let bucket = AtomicBucket::with_capacity(first_bucket_capacity)?;
24+
let bucket = AtomicBucket::<T>::with_capacity(first_bucket_capacity)?;
2225

2326
Ok(Self {
2427
head: AtomicPtr::new(bucket.as_ptr()),
2528
})
2629
}
2730

28-
pub fn iter(&self) -> AtomicBucketIter<'_> {
31+
pub fn iter(&self) -> AtomicBucketIter<'_, T> {
2932
AtomicBucketIter {
3033
current: &self.head,
3134
}
@@ -42,7 +45,7 @@ impl AtomicBucketList {
4245
self.len() == 0
4346
}
4447

45-
pub fn push_front(&self, bucket: BucketRef) {
48+
pub fn push_front(&self, bucket: BucketRef<T>) {
4649
let bucket_ptr = bucket.as_ptr();
4750
let mut head_ptr = self.head.load(Ordering::Acquire);
4851

@@ -73,7 +76,7 @@ impl AtomicBucketList {
7376
}
7477
}
7578

76-
impl Drop for AtomicBucketList {
79+
impl<T: Internable> Drop for AtomicBucketList<T> {
7780
fn drop(&mut self) {
7881
// Safety: We should have exclusive access to all buckets
7982
unsafe {
@@ -89,7 +92,7 @@ impl Drop for AtomicBucketList {
8992

9093
// Get the layout of the current bucket so we can deallocate it
9194
let capacity = (*current_ptr).capacity;
92-
let layout = AtomicBucket::layout(capacity)
95+
let layout = AtomicBucket::<T>::layout(capacity)
9396
.expect("buckets with invalid capacities can't be constructed");
9497

9598
// Deallocate all memory that the bucket allocated
@@ -99,12 +102,12 @@ impl Drop for AtomicBucketList {
99102
}
100103
}
101104

102-
pub(super) struct AtomicBucketIter<'a> {
103-
current: &'a AtomicPtr<AtomicBucket>,
105+
pub(super) struct AtomicBucketIter<'a, T: Internable> {
106+
current: &'a AtomicPtr<AtomicBucket<T>>,
104107
}
105108

106-
impl<'a> Iterator for AtomicBucketIter<'a> {
107-
type Item = BucketRef;
109+
impl<'a, T: Internable> Iterator for AtomicBucketIter<'a, T> {
110+
type Item = BucketRef<T>;
108111

109112
fn next(&mut self) -> Option<Self::Item> {
110113
let current = self.current.load(Ordering::Acquire);
@@ -121,26 +124,26 @@ impl<'a> Iterator for AtomicBucketIter<'a> {
121124

122125
/// A unique reference to an atomic bucket
123126
#[repr(transparent)]
124-
pub(super) struct UniqueBucketRef {
125-
bucket: BucketRef,
127+
pub(super) struct UniqueBucketRef<T: Internable> {
128+
bucket: BucketRef<T>,
126129
}
127130

128-
impl UniqueBucketRef {
131+
impl<T: Internable> UniqueBucketRef<T> {
129132
/// Create a new unique bucket ref
130133
///
131134
/// # Safety
132135
///
133136
/// The pointer must have exclusive, mutable and unique access to the pointed-to
134137
/// bucket
135138
#[inline]
136-
const unsafe fn new(bucket: NonNull<AtomicBucket>) -> Self {
139+
const unsafe fn new(bucket: NonNull<AtomicBucket<T>>) -> Self {
137140
Self {
138141
bucket: unsafe { BucketRef::new(bucket) },
139142
}
140143
}
141144

142145
#[inline]
143-
pub const fn as_ptr(&self) -> *mut AtomicBucket {
146+
pub const fn as_ptr(&self) -> *mut AtomicBucket<T> {
144147
self.bucket.as_ptr()
145148
}
146149

@@ -180,64 +183,72 @@ impl UniqueBucketRef {
180183
///
181184
/// # Safety
182185
///
183-
/// The returned `&'static str` (and all copies of it) must be dropped
186+
/// The returned `&'static T::Ref` (and all copies of it) must be dropped
184187
/// before the current bucket is, as this bucket contains the backing
185-
/// memory for the string.
188+
/// memory for the data.
186189
/// Additionally, the underlying [`AtomicBucket`] must have enough room
187-
/// to store the entire slice and the given slice must be valid utf-8 data.
190+
/// to store the entire value (including alignment padding).
188191
///
189-
pub unsafe fn push_slice(&mut self, slice: &[u8]) -> &'static str {
192+
pub unsafe fn push_slice(&mut self, value: &T::Ref) -> &'static T::Ref {
190193
let len = self.len();
194+
let slice = value.as_bytes();
195+
let count = value.len();
196+
197+
// Align the index to the required alignment for T::Ref
198+
let align = T::Ref::ALIGNMENT;
199+
let aligned_len = (len + align - 1) & !(align - 1);
191200

192201
if cfg!(debug_assertions) {
193202
let capacity = self.capacity().get();
194203

195-
debug_assert_ne!(len, capacity);
196-
debug_assert!(slice.len() <= capacity - len);
204+
debug_assert_ne!(aligned_len, capacity);
205+
debug_assert!(aligned_len + slice.len() <= capacity);
197206
}
198207

199-
// Get a pointer to the start of the free data
200-
let ptr = unsafe { addr_of_mut!((*self.as_ptr())._data).cast::<u8>().add(len) };
208+
// Get a pointer to the aligned start of the free data
209+
let ptr = unsafe {
210+
addr_of_mut!((*self.as_ptr())._data)
211+
.cast::<u8>()
212+
.add(aligned_len)
213+
};
201214

202-
// Make the slice that we'll fill with the string's data
203-
let target = unsafe { slice::from_raw_parts_mut(ptr, slice.len()) };
204-
// Copy the data from the source string into the bucket's buffer
205-
target.copy_from_slice(slice);
215+
// Copy the data from the source into the bucket's buffer
216+
unsafe { ptr.copy_from_nonoverlapping(slice.as_ptr(), slice.len()) };
206217

207-
// Increment the index so that the string we just added isn't overwritten
218+
// Increment the index so that the data we just added isn't overwritten
208219
// Safety: All bytes are initialized and the length is <= capacity
209-
unsafe { self.set_len(len + slice.len()) };
210-
211-
// Create a string from that slice
212-
// Safety: The source string was valid utf8, so the created buffer will be as well
220+
unsafe { self.set_len(aligned_len + slice.len()) };
213221

214-
unsafe { core::str::from_utf8_unchecked(target) }
222+
// Create a reference from the allocated data
223+
// Safety: The source data was valid, so the created buffer will be as well.
224+
// The pointer is properly aligned because we aligned the index above.
225+
unsafe { T::Ref::from_raw_parts(ptr, count) }
215226
}
216227

217228
#[inline]
218-
pub(crate) const fn into_ref(self) -> BucketRef {
229+
pub(crate) fn into_ref(self) -> BucketRef<T> {
219230
self.bucket
220231
}
221232
}
222233

223234
/// A reference to an [`AtomicBucket`]
224235
#[repr(transparent)]
225-
pub(super) struct BucketRef {
226-
bucket: NonNull<AtomicBucket>,
236+
pub(super) struct BucketRef<T: Internable> {
237+
bucket: NonNull<AtomicBucket<T>>,
227238
}
228239

229-
impl BucketRef {
240+
impl<T: Internable> BucketRef<T> {
230241
/// Create a new [`BucketRef`]
231242
///
232243
/// # Safety
233244
///
234245
/// `bucket` must be a valid pointer to an [`AtomicBucket`]
235-
const unsafe fn new(bucket: NonNull<AtomicBucket>) -> Self {
246+
const unsafe fn new(bucket: NonNull<AtomicBucket<T>>) -> Self {
236247
Self { bucket }
237248
}
238249

239250
#[inline]
240-
pub const fn as_ptr(&self) -> *mut AtomicBucket {
251+
pub const fn as_ptr(&self) -> *mut AtomicBucket<T> {
241252
self.bucket.as_ptr()
242253
}
243254

@@ -261,16 +272,22 @@ impl BucketRef {
261272
unsafe { addr_of_mut!((*self.as_ptr())._data).cast::<u8>().add(start) }
262273
}
263274

275+
/// Try to atomically reserve space for `additional` bytes with the given alignment.
276+
/// Returns the aligned start position on success.
264277
pub fn try_inc_length(&self, additional: usize) -> Result<usize, ()> {
265278
debug_assert_ne!(additional, 0);
266279

267280
let length = self.length();
268281
let capacity = self.capacity().get();
282+
let align = T::Ref::ALIGNMENT;
269283

270284
// TODO: Add backoff to this loop so we don't thrash it
271285
let mut len = length.load(Ordering::Acquire);
272286
for _ in 0..100 {
273-
let new_length = len + additional;
287+
// Compute the aligned start position
288+
let aligned_start = (len + align - 1) & !(align - 1);
289+
let new_length = aligned_start + additional;
290+
274291
if new_length <= capacity {
275292
match length.compare_exchange_weak(
276293
len,
@@ -279,8 +296,8 @@ impl BucketRef {
279296
Ordering::Acquire,
280297
) {
281298
Ok(_) => {
282-
debug_assert!(len < capacity && len + additional <= capacity);
283-
return Ok(len);
299+
debug_assert!(aligned_start < capacity && new_length <= capacity);
300+
return Ok(aligned_start);
284301
}
285302
Err(loaded) => {
286303
hint::spin_loop();
@@ -297,7 +314,7 @@ impl BucketRef {
297314
}
298315

299316
#[repr(C)]
300-
pub(super) struct AtomicBucket {
317+
pub(super) struct AtomicBucket<T: Internable> {
301318
/// The next bucket in the list, will be null if this is the last bucket
302319
next: AtomicPtr<Self>,
303320

@@ -314,11 +331,16 @@ pub(super) struct AtomicBucket {
314331
/// Invariant: Never touch this field manually, it contains uninitialized data up
315332
/// to the length of `capacity`
316333
_data: [u8; 0],
334+
335+
/// Marker for the internable type
336+
_marker: PhantomData<T>,
317337
}
318338

319-
impl AtomicBucket {
339+
impl<T: Internable> AtomicBucket<T> {
340+
const ALIGN: usize = T::Ref::ALIGNMENT;
341+
320342
/// Allocates a bucket with space for `capacity` items
321-
pub(crate) fn with_capacity(capacity: NonZeroUsize) -> LassoResult<UniqueBucketRef> {
343+
pub(crate) fn with_capacity(capacity: NonZeroUsize) -> LassoResult<UniqueBucketRef<T>> {
322344
// Create the bucket's layout
323345
let layout = Self::layout(capacity)?;
324346
debug_assert_ne!(layout.size(), 0);
@@ -360,14 +382,11 @@ impl AtomicBucket {
360382
let len = Layout::new::<usize>();
361383
let cap = Layout::new::<NonZeroUsize>();
362384

363-
// Safety: Align will always be a non-zero power of two and the
385+
// Safety: ALIGN is a non-zero power of two (checked at construction) and the
364386
// size will not overflow when rounded up
365-
debug_assert!(
366-
Layout::from_size_align(size_of::<u8>() * capacity.get(), align_of::<u8>()).is_ok()
367-
);
368-
let data = unsafe {
369-
Layout::from_size_align_unchecked(size_of::<u8>() * capacity.get(), align_of::<u8>())
370-
};
387+
debug_assert!(Layout::from_size_align(size_of::<u8>() * capacity.get(), Self::ALIGN).is_ok());
388+
let data =
389+
unsafe { Layout::from_size_align_unchecked(size_of::<u8>() * capacity.get(), Self::ALIGN) };
371390

372391
next.extend(len)
373392
.and_then(|(layout, _)| layout.extend(cap))
@@ -377,5 +396,5 @@ impl AtomicBucket {
377396
}
378397
}
379398

380-
unsafe impl Send for AtomicBucket {}
381-
unsafe impl Sync for AtomicBucket {}
399+
unsafe impl<T: Internable> Send for AtomicBucket<T> {}
400+
unsafe impl<T: Internable> Sync for AtomicBucket<T> {}

0 commit comments

Comments
 (0)