Skip to content

Add a Bump interner #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ maintenance = { status = "actively-developed" }
[dependencies]

ahash = { version = "0.8.5", optional = true }
bumpalo = { version = "3.17.0", optional = true }
dashmap = { version = "5.4.0", optional = true }
once_cell = { version = "1.4", optional = true }
tinyset = { version = "0.4.2", optional = true }
Expand All @@ -52,6 +53,7 @@ portable-atomic = { version = "1", default-features = false, optional = true }
arc = ["std", "dep:ahash", "dep:dashmap", "dep:once_cell"]
bench = ["arc", "arena", "_experimental-new-intern", "dep:memorable-wordlist"]
arena = ["alloc"]
bump = ["alloc", "dep:bumpalo"]
intern = ["alloc"]
default = ["intern", "std"]
_experimental-new-intern = ["alloc", "dep:append-only-vec"]
Expand Down
172 changes: 172 additions & 0 deletions src/bump.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
use core::{
borrow::Borrow,
cell::Cell,
hash::{BuildHasher, Hash, Hasher},
ptr::NonNull,
};
use hashbrown::{hash_map::RawEntryMut, HashMap};

#[cfg(test)]
use std::println;

/// A bump-arena for storing interned data
///
/// You can use an `Bump<T>` to intern data of type `T`. This data is then
/// freed when the `Bump` is dropped.
///
/// # Example
/// ```rust
/// use internment::Bump;
/// let arena: Bump<&'static str> = Bump::new();
/// let x = arena.intern("hello");
/// let y = arena.intern("world");
/// assert_ne!(x, y);
/// println!("The conventional greeting is '{} {}'", x, y);
/// ```

#[cfg_attr(docsrs, doc(cfg(feature = "bump")))]
pub struct Bump<T, S = hashbrown::DefaultHashBuilder> {
arena: bumpalo::Bump,
interner: Cell<HashMap<Interned<T>, (), S>>,
}

impl<T, S: Default> Bump<T, S> {
/// Allocate a new `Bump`
#[inline]
pub fn new() -> Self {
Bump {
arena: bumpalo::Bump::new(),
interner: Default::default(),
}
}
}
impl<T: Eq + Hash, S: Default + BuildHasher> Bump<T, S> {
/// Intern a value.
///
/// If this value has not previously been interned, then `intern` will
/// allocate a spot for the value on the heap. Otherwise, it will return a
/// pointer to the object previously allocated.
pub fn intern(&self, val: T) -> &T {
self.intern_inner(val)
}

/// Same as [`intern`](Self::intern), but does not clone if the value is already interned.
pub fn intern_ref(&self, val: &T) -> &T
where
T: Clone,
{
self.intern_inner(val)
}

fn intern_inner<Q: Internable<T>>(&self, val: Q) -> &T {
let mut interner = self.interner.take();
let entry = interner.raw_entry_mut().from_key(val.borrow());
let r = match entry {
RawEntryMut::Vacant(v) => {
let r = &*self.arena.alloc(val.to_owned());
v.insert(Interned(NonNull::from(r)), ());
r
}
RawEntryMut::Occupied(o) => {
let key = o.key();
// SAFETY: We are creating a ref with the same lifetime as
// `&self` (the enclosing `Bump`).
unsafe { key.deref() }
}
};
self.interner.set(interner);
r
}
}

impl<T, S: Default> Default for Bump<T, S> {
#[inline]
fn default() -> Self {
Self::new()
}
}

/// Private trait to allow for generic interning implementation which works with
/// both `&T` and `T`.
trait Internable<T>: Borrow<T> {
fn to_owned(self) -> T;
}

impl<T> Internable<T> for T {
fn to_owned(self) -> T {
self
}
}

impl<T: Clone> Internable<T> for &T {
fn to_owned(self) -> T {
self.clone()
}
}

// Essentially a `&'static T` reference to a value allocated in the `Bump`
// arena. Always safe to deref, but any `&'a T` reference lifetime must be
// linked to the lifetime of the `Bump` arena (i.e. the lifetime of this
// `Interned<T>`).
struct Interned<T>(NonNull<T>);

impl<T> Interned<T> {
/// SAFETY: always safe to call, however if the lifetime of the resulting
/// reference must be shorter than the lifetime of the enclosing `Bump`.
unsafe fn deref<'a>(&self) -> &'a T {
unsafe { self.0.as_ref() }
}

fn borrow(&self) -> &T {
// SAFETY: The `self: Interned` only exists in the `interner` field. Any
// (lifetime) reference to it must live as long as the containing
// `Bump`. This means that the `arena` field must also be live for this
// duration. Therefore it is safe to hand out a ref to a `T` in that
// arena.
unsafe { self.deref() }
}
}

impl<T> Borrow<T> for Interned<T> {
fn borrow(&self) -> &T {
self.borrow()
}
}

impl<T: PartialEq> PartialEq for Interned<T> {
fn eq(&self, other: &Self) -> bool {
self.borrow() == other.borrow()
}
}
impl<T: Eq> Eq for Interned<T> {}

impl<T: Hash> Hash for Interned<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.borrow().hash(state);
}
}

#[test]
fn eq_string() {
let arena = Bump::<&'static str>::new();
assert_eq!(
arena.intern("hello") as *const _,
arena.intern("hello") as *const _
);
assert_ne!(
arena.intern("goodbye") as *const _,
arena.intern("farewell") as *const _
);
}
#[test]
fn display() {
let arena = Bump::<&'static str>::new();
let world = arena.intern("world");
println!("Hello {}", world);
}
#[test]
fn debug() {
let arena = Bump::<&'static str>::new();
let world = arena.intern("world");
println!("Hello {:?}", world);
}
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ pub use arena::Arena;
#[cfg(feature = "arena")]
pub use arena::ArenaIntern;

#[cfg(feature = "bump")]
mod bump;

#[cfg(feature = "bump")]
pub use bump::Bump;

#[cfg(feature = "arc")]
mod arc;
#[cfg(feature = "arc")]
Expand Down