Skip to content

Commit 48cbe1b

Browse files
committed
hopefully less allocation
1 parent 7c97d2a commit 48cbe1b

File tree

5 files changed

+422
-53
lines changed

5 files changed

+422
-53
lines changed

Diff for: src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ mod as_bytes;
1414
mod entry;
1515
mod iter;
1616
mod node;
17+
mod slice_pool;
1718
mod trie_map;
1819

1920
pub use as_bytes::AsBytes;

Diff for: src/node.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[derive(Clone)]
1+
#[derive(Clone, Default)]
22
pub(crate) struct TrieNode {
33
pub(crate) is_present: [u64; 4],
44
pub(crate) children: Box<[TrieNode]>,

Diff for: src/slice_pool.rs

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// src/slice_pool.rs
2+
3+
use crate::node::TrieNode;
4+
5+
/// A pool for reusing boxed slices of TrieNodes to reduce allocation overhead
6+
pub(crate) struct SlicePool {
7+
pub(crate) pools: [Vec<Box<[TrieNode]>>; 257],
8+
}
9+
10+
impl SlicePool {
11+
/// Creates a new empty slice pool
12+
pub fn new() -> Self {
13+
let pools = std::array::from_fn(|_| Vec::new());
14+
SlicePool { pools }
15+
}
16+
/// Gets a boxed slice of the specified length from the pool, or creates a new one
17+
pub fn get(&mut self, len: usize) -> Box<[TrieNode]> {
18+
if len == 0 {
19+
return Box::new([]);
20+
}
21+
let idx = len.max(256);
22+
if let Some(slice) = self.pools[idx as usize].pop() {
23+
return slice;
24+
}
25+
let mut vec = Vec::with_capacity(len as usize);
26+
for _ in 0..len {
27+
vec.push(TrieNode::new());
28+
}
29+
vec.into_boxed_slice()
30+
}
31+
32+
/// Returns a boxed slice to the pool for future reuse
33+
pub fn put(&mut self, slice: Box<[TrieNode]>) {
34+
let len = slice.len();
35+
36+
if len == 0 {
37+
return; // Don't pool empty slices
38+
}
39+
40+
let idx = len.min(255);
41+
self.pools[idx].push(slice);
42+
}
43+
44+
/// Clears all pools, dropping all stored slices
45+
pub fn clear(&mut self) {
46+
for pool in &mut self.pools {
47+
pool.clear();
48+
}
49+
}
50+
}
51+
52+
impl Drop for SlicePool {
53+
fn drop(&mut self) {
54+
// Clear all pools when the pool itself is dropped
55+
self.clear();
56+
}
57+
}

Diff for: src/trie_map.rs

+75-52
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
use std::collections::{BTreeMap, HashMap};
22
use std::hash::{Hash, Hasher};
3+
use std::mem;
34
use std::ops::{Index, IndexMut};
45

56
use crate::as_bytes::AsBytes;
67
use crate::entry::{Entry, OccupiedEntry, VacantEntry};
78
use crate::iter::{DrainIter, Iter, Keys, PrefixIter, PrefixKeys, PrefixValues, Values};
89
use crate::node::{clear_bit, popcount, set_bit, test_bit, TrieNode};
10+
use crate::slice_pool::SlicePool;
911

1012
/// A `TrieMap` is a key-value data structure that uses a trie (prefix tree) for storage
1113
/// and retrieval of data.
@@ -55,6 +57,7 @@ pub struct TrieMap<T> {
5557
pub(crate) free_indices: Vec<usize>,
5658
pub(crate) root: TrieNode,
5759
pub(crate) size: usize,
60+
pub(crate) pool: SlicePool,
5861
}
5962

6063
impl<T, K: AsBytes, V: Into<T>, const N: usize> From<[(K, V); N]> for TrieMap<T> {
@@ -103,6 +106,7 @@ impl<T: Clone> Clone for TrieMap<T> {
103106
free_indices: self.free_indices.clone(),
104107
root: self.root.clone(),
105108
size: self.size,
109+
pool: SlicePool::new(),
106110
}
107111
}
108112
}
@@ -241,6 +245,7 @@ impl<T> TrieMap<T> {
241245
free_indices: Vec::new(),
242246
root: TrieNode::new(),
243247
size: 0,
248+
pool: SlicePool::new(),
244249
}
245250
}
246251

@@ -262,6 +267,7 @@ impl<T> TrieMap<T> {
262267
free_indices: Vec::new(),
263268
root: TrieNode::new(),
264269
size: 0,
270+
pool: SlicePool::new(),
265271
}
266272
}
267273

@@ -341,19 +347,22 @@ impl<T> TrieMap<T> {
341347

342348
if !test_bit(&current.is_present, byte) {
343349
let current_size = current.children.len();
344-
let mut new_children = Vec::with_capacity(current_size + 1);
350+
let mut new_children = self.pool.get(current_size + 1);
345351

346352
for i in 0..idx {
347-
new_children.push(std::mem::replace(&mut current.children[i], TrieNode::new()));
353+
mem::swap(&mut new_children[i], &mut current.children[i]);
354+
//new_children.push(std::mem::replace(&mut current.children[i], TrieNode::new()));
348355
}
349356

350-
new_children.push(TrieNode::new());
357+
new_children[idx] = TrieNode::new();
351358

352359
for i in idx..current_size {
353-
new_children.push(std::mem::replace(&mut current.children[i], TrieNode::new()));
360+
mem::swap(&mut new_children[i + 1], &mut current.children[i]);
354361
}
355362

356-
current.children = new_children.into_boxed_slice();
363+
let old_children = mem::replace(&mut current.children, new_children);
364+
self.pool.put(old_children);
365+
357366
set_bit(&mut current.is_present, byte);
358367
}
359368

@@ -669,48 +678,32 @@ impl<T> TrieMap<T> {
669678
None
670679
}
671680
}
672-
673-
/// Prunes unused nodes from the trie to reclaim memory.
674-
///
675-
/// This method removes all nodes that don't contain values and don't lead to nodes with values.
676-
/// It's useful to call periodically if you've removed many items from the trie.
677-
///
678-
/// # Examples
679-
///
680-
/// ```
681-
/// # use triemap::TrieMap;
682-
/// let mut map = TrieMap::new();
683-
/// map.insert("apple", 1);
684-
/// map.insert("application", 2);
685-
///
686-
/// map.remove("apple");
687-
/// map.remove("application");
688-
///
689-
/// // The trie structure still contains nodes for "apple" and "application"
690-
/// // even though the values have been removed
691-
///
692-
/// map.prune();
693-
/// // Now the unused nodes have been removed
694-
/// ```
695681
pub fn prune(&mut self) -> usize {
696-
Self::prune_node(&mut self.root)
682+
// We need to avoid having two mutable references to self
683+
// Let's extract the nodes we need separately
684+
let mut root = std::mem::take(&mut self.root);
685+
let slice_pool = &mut self.pool;
686+
687+
let pruned = Self::prune_node_helper(&mut root, slice_pool);
688+
689+
// Put the root back
690+
self.root = root;
691+
692+
pruned
697693
}
698694

699-
// Helper method to recursively prune nodes
700-
fn prune_node(node: &mut TrieNode) -> usize {
695+
fn prune_node_helper(node: &mut TrieNode, slice_pool: &mut SlicePool) -> usize {
701696
let mut pruned_nodes = 0;
702697
let mut bytes_to_clear = Vec::new();
703698

704-
// Check each byte in the is_present array
705699
for byte in 0..=255u8 {
706700
if test_bit(&node.is_present, byte) {
707701
let idx = popcount(&node.is_present, byte) as usize;
708702
if idx < node.children.len() {
709703
// Recursively prune the child node
710-
let child_pruned = Self::prune_node(&mut node.children[idx]);
704+
let child_pruned = Self::prune_node_helper(&mut node.children[idx], slice_pool);
711705
pruned_nodes += child_pruned;
712706

713-
// Check if the child node is now empty and can be removed
714707
if node.children[idx].data_idx.is_none()
715708
&& node.children[idx].children.is_empty()
716709
{
@@ -720,32 +713,62 @@ impl<T> TrieMap<T> {
720713
}
721714
}
722715

723-
// Remove empty children that were marked for removal
724-
for &byte in &bytes_to_clear {
725-
let idx = popcount(&node.is_present, byte) as usize;
726-
727-
// Create a new children array without the empty node
728-
let mut new_children = Vec::with_capacity(node.children.len() - 1);
716+
if !bytes_to_clear.is_empty() {
717+
let current_size = node.children.len();
718+
let new_size = current_size - bytes_to_clear.len();
729719

730-
// Copy all children except the one being removed
731-
for i in 0..node.children.len() {
732-
if i != idx {
733-
new_children.push(std::mem::replace(&mut node.children[i], TrieNode::new()));
720+
if new_size == 0 {
721+
let old_children = std::mem::replace(&mut node.children, Box::new([]));
722+
slice_pool.put(old_children);
723+
} else {
724+
let mut new_children = slice_pool.get(new_size);
725+
let mut new_idx = 0;
726+
727+
for byte in 0..=255u8 {
728+
if test_bit(&node.is_present, byte) && !bytes_to_clear.contains(&byte) {
729+
let idx = popcount(&node.is_present, byte) as usize;
730+
if idx < node.children.len() {
731+
std::mem::swap(&mut new_children[new_idx], &mut node.children[idx]);
732+
new_idx += 1;
733+
}
734+
}
734735
}
735-
}
736-
737-
// Update the node's children
738-
node.children = new_children.into_boxed_slice();
739736

740-
// Update the is_present bits - need to clear the bit for the removed node
741-
clear_bit(&mut node.is_present, byte);
737+
let old_children = std::mem::replace(&mut node.children, new_children);
738+
slice_pool.put(old_children);
739+
}
742740

743-
// Update counts
744-
pruned_nodes += 1;
741+
pruned_nodes += bytes_to_clear.len();
742+
for byte in bytes_to_clear {
743+
clear_bit(&mut node.is_present, byte);
744+
}
745745
}
746746

747747
pruned_nodes
748748
}
749+
/// Prunes unused nodes from the trie to reclaim memory.
750+
///
751+
/// This method removes all nodes that don't contain values and don't lead to nodes with values.
752+
/// It's useful to call periodically if you've removed many items from the trie.
753+
///
754+
/// # Examples
755+
///
756+
/// ```
757+
/// # use triemap::TrieMap;
758+
/// let mut map = TrieMap::new();
759+
/// map.insert("apple", 1);
760+
/// map.insert("application", 2);
761+
///
762+
/// map.remove("apple");
763+
/// map.remove("application");
764+
///
765+
/// // The trie structure still contains nodes for "apple" and "application"
766+
/// // even though the values have been removed
767+
///
768+
/// map.prune();
769+
/// // Now the unused nodes have been removed
770+
/// ```
771+
749772
/// Returns an iterator over the key-value pairs of the map.
750773
///
751774
/// # Examples

0 commit comments

Comments
 (0)