Skip to content

Commit 7c97d2a

Browse files
committed
remove optimization and prune
1 parent 2e900df commit 7c97d2a

File tree

3 files changed

+797
-6
lines changed

3 files changed

+797
-6
lines changed

Diff for: src/proptest_triemap.rs

+173-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use super::*;
22
use proptest::prelude::*;
33
use std::collections::BTreeMap;
44
use std::collections::HashMap;
5+
use std::collections::HashSet;
56

67
fn key_value_pairs(
78
min_pairs: usize,
@@ -163,7 +164,178 @@ fn test_empty_triemap_behavior() {
163164
}
164165

165166
proptest! {
166-
#[test]
167+
168+
#[test]
169+
fn pruning_preserves_values(pairs in key_value_pairs(1, 100), to_remove in key_value_pairs(1, 50)) {
170+
let mut trie = TrieMap::new();
171+
let mut reference_map = BTreeMap::new();
172+
173+
for (key, value) in &pairs {
174+
trie.insert(key, *value);
175+
reference_map.insert(key.clone(), *value);
176+
}
177+
178+
for (key, _) in &to_remove {
179+
trie.remove(key);
180+
reference_map.remove(key);
181+
}
182+
183+
trie.prune();
184+
185+
for (key, expected_value) in &reference_map {
186+
prop_assert_eq!(trie.get(key), Some(expected_value));
187+
}
188+
189+
for (key, _) in &to_remove {
190+
if !reference_map.contains_key(key) {
191+
prop_assert_eq!(trie.get(key), None);
192+
}
193+
}
194+
195+
prop_assert_eq!(trie.len(), reference_map.len());
196+
}
197+
// Test that multiple prune operations are idempotent
198+
#[test]
199+
fn multiple_prunes_are_idempotent(
200+
pairs in key_value_pairs(1, 100),
201+
to_remove in key_value_pairs(1, 50)
202+
) {
203+
let mut trie = TrieMap::new();
204+
205+
// Insert all pairs
206+
for (key, value) in &pairs {
207+
trie.insert(key, *value);
208+
}
209+
210+
// Remove some keys
211+
for (key, _) in &to_remove {
212+
trie.remove(key);
213+
}
214+
215+
// First prune
216+
let first_pruned = trie.prune();
217+
let size_after_first = trie.len();
218+
219+
// Second prune
220+
let second_pruned = trie.prune();
221+
let size_after_second = trie.len();
222+
223+
// The second prune should not remove any nodes
224+
prop_assert_eq!(second_pruned, 0);
225+
226+
// Sizes should be the same
227+
prop_assert_eq!(size_after_first, size_after_second);
228+
}
229+
230+
#[test]
231+
fn removal_works_correctly(pairs in key_value_pairs(5, 100)) {
232+
let mut trie = TrieMap::new();
233+
let mut reference_map = BTreeMap::new();
234+
235+
// Insert all pairs
236+
for (key, value) in &pairs {
237+
trie.insert(key, *value);
238+
reference_map.insert(key.clone(), *value);
239+
}
240+
241+
// Get the unique keys that were actually inserted (last value wins for duplicates)
242+
let unique_keys: Vec<String> = reference_map.keys().cloned().collect();
243+
244+
// Remove half of the unique keys
245+
let mut removed = 0;
246+
for (i, key) in unique_keys.iter().enumerate() {
247+
if i % 2 == 0 { // Only remove every other key
248+
let expected_value = reference_map.get(key).copied();
249+
let trie_removed = trie.remove(key);
250+
let ref_removed = reference_map.remove(key);
251+
252+
// The removed value should match between trie and reference map
253+
prop_assert_eq!(trie_removed, ref_removed);
254+
// And should match what we expected to remove
255+
prop_assert_eq!(trie_removed, expected_value);
256+
257+
removed += 1;
258+
}
259+
}
260+
261+
// Check that the size is correct
262+
prop_assert_eq!(trie.len(), reference_map.len());
263+
264+
// Check that all remaining keys are accessible
265+
for (key, value) in &reference_map {
266+
prop_assert_eq!(trie.get(key), Some(value));
267+
}
268+
269+
// Check that removed keys are not accessible
270+
for (i, key) in unique_keys.iter().enumerate() {
271+
if i % 2 == 0 { // These were removed
272+
prop_assert_eq!(trie.get(key), None);
273+
}
274+
}
275+
}
276+
277+
#[test]
278+
fn iteration_after_removal_is_correct(
279+
pairs in key_value_pairs(5, 100),
280+
removal_indices in proptest::collection::vec(0..100usize, 1..50)
281+
) {
282+
let mut trie = TrieMap::new();
283+
let mut reference_map = BTreeMap::new();
284+
285+
// Insert all pairs
286+
for (key, value) in &pairs {
287+
trie.insert(key, *value);
288+
reference_map.insert(key.clone(), *value);
289+
}
290+
291+
// Create list of keys to remove (using indices to prevent duplicates)
292+
let keys_to_remove: Vec<String> = removal_indices.iter()
293+
.filter(|&idx| *idx < pairs.len())
294+
.map(|&idx| pairs[idx].0.clone())
295+
.collect();
296+
297+
// Remove selected keys
298+
for key in &keys_to_remove {
299+
trie.remove(key);
300+
reference_map.remove(key);
301+
}
302+
303+
// Get all key-value pairs from trie iteration
304+
let mut trie_pairs: Vec<(String, i32)> = trie.iter()
305+
.map(|(k, &v)| (String::from_utf8(k).unwrap(), v))
306+
.collect();
307+
308+
// Get all key-value pairs from reference map
309+
let mut ref_pairs: Vec<(String, i32)> = reference_map
310+
.iter()
311+
.map(|(k, &v)| (k.clone(), v))
312+
.collect();
313+
314+
// Sort both for comparison
315+
trie_pairs.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
316+
ref_pairs.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
317+
318+
// They should be equal
319+
prop_assert_eq!(trie_pairs, ref_pairs);
320+
321+
// Size should match
322+
prop_assert_eq!(trie.len(), reference_map.len());
323+
324+
// Check iterator count matches expected count
325+
prop_assert_eq!(trie.iter().count(), reference_map.len());
326+
327+
// Also check keys() and values() iterators
328+
prop_assert_eq!(trie.keys().count(), reference_map.len());
329+
prop_assert_eq!(trie.values().count(), reference_map.len());
330+
331+
// Verify removed keys are not present
332+
for key in &keys_to_remove {
333+
prop_assert_eq!(trie.get(key), None);
334+
prop_assert!(!trie.keys().any(|k| String::from_utf8(k).unwrap() == *key));
335+
}
336+
}
337+
338+
#[test]
167339
fn triemap_correctly_handles_common_prefixes(
168340
pairs in prefixed_keys(vec!["app", "ban", "car", "dog"], 5, 50)
169341
) {

Diff for: src/trie_map.rs

+131-5
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,9 @@ impl<T> TrieMap<T> {
530530
}
531531

532532
/// Removes a key from the map, returning the value at the key if the key was previously in the map.
533-
///
533+
/// This does not remove the nodes used to register the key
534+
/// Use `remove_and_prune` method for removing intermittent nodes as well.
535+
/// Use `prune` method to remove *all* nodes that leads to a thombstone
534536
/// # Examples
535537
///
536538
/// ```
@@ -548,6 +550,56 @@ impl<T> TrieMap<T> {
548550
}
549551

550552
fn remove_internal(&mut self, bytes: &[u8]) -> Option<T> {
553+
let mut current = &mut self.root;
554+
let mut found = true;
555+
556+
for &byte in bytes {
557+
if !test_bit(&current.is_present, byte) {
558+
found = false;
559+
break;
560+
}
561+
let idx = popcount(&current.is_present, byte) as usize;
562+
if idx >= current.children.len() {
563+
found = false;
564+
break;
565+
}
566+
current = &mut current.children[idx];
567+
}
568+
569+
if found && current.data_idx.is_some() {
570+
let data_idx = current.data_idx.unwrap();
571+
572+
if data_idx < self.data.len() && self.data[data_idx].is_some() {
573+
let value = self.data[data_idx].take();
574+
current.data_idx = None;
575+
self.free_indices.push(data_idx);
576+
self.size -= 1;
577+
return value;
578+
}
579+
}
580+
581+
None
582+
}
583+
584+
/// Removes a key from the map, returning the value at the key if the key was previously in the map.
585+
/// This method also removes the nodes used to register the key
586+
/// # Examples
587+
///
588+
/// ```
589+
/// # use triemap::TrieMap;
590+
/// let mut map = TrieMap::new();
591+
/// map.insert("a", 1);
592+
///
593+
/// assert_eq!(map.remove("a"), Some(1));
594+
/// assert_eq!(map.remove("a"), None);
595+
/// ```
596+
pub fn remove_and_prune<K: AsBytes>(&mut self, key: K) -> Option<T> {
597+
let bytes = key.as_bytes();
598+
599+
self.remove_and_prune_internal(bytes)
600+
}
601+
602+
fn remove_and_prune_internal(&mut self, bytes: &[u8]) -> Option<T> {
551603
let mut path = Vec::with_capacity(bytes.len());
552604
let mut path_indices = Vec::with_capacity(bytes.len());
553605

@@ -618,6 +670,82 @@ impl<T> TrieMap<T> {
618670
}
619671
}
620672

673+
/// Prunes unused nodes from the trie to reclaim memory.
674+
///
675+
/// This method removes all nodes that don't contain values and don't lead to nodes with values.
676+
/// It's useful to call periodically if you've removed many items from the trie.
677+
///
678+
/// # Examples
679+
///
680+
/// ```
681+
/// # use triemap::TrieMap;
682+
/// let mut map = TrieMap::new();
683+
/// map.insert("apple", 1);
684+
/// map.insert("application", 2);
685+
///
686+
/// map.remove("apple");
687+
/// map.remove("application");
688+
///
689+
/// // The trie structure still contains nodes for "apple" and "application"
690+
/// // even though the values have been removed
691+
///
692+
/// map.prune();
693+
/// // Now the unused nodes have been removed
694+
/// ```
695+
pub fn prune(&mut self) -> usize {
696+
Self::prune_node(&mut self.root)
697+
}
698+
699+
// Helper method to recursively prune nodes
700+
fn prune_node(node: &mut TrieNode) -> usize {
701+
let mut pruned_nodes = 0;
702+
let mut bytes_to_clear = Vec::new();
703+
704+
// Check each byte in the is_present array
705+
for byte in 0..=255u8 {
706+
if test_bit(&node.is_present, byte) {
707+
let idx = popcount(&node.is_present, byte) as usize;
708+
if idx < node.children.len() {
709+
// Recursively prune the child node
710+
let child_pruned = Self::prune_node(&mut node.children[idx]);
711+
pruned_nodes += child_pruned;
712+
713+
// Check if the child node is now empty and can be removed
714+
if node.children[idx].data_idx.is_none()
715+
&& node.children[idx].children.is_empty()
716+
{
717+
bytes_to_clear.push(byte);
718+
}
719+
}
720+
}
721+
}
722+
723+
// Remove empty children that were marked for removal
724+
for &byte in &bytes_to_clear {
725+
let idx = popcount(&node.is_present, byte) as usize;
726+
727+
// Create a new children array without the empty node
728+
let mut new_children = Vec::with_capacity(node.children.len() - 1);
729+
730+
// Copy all children except the one being removed
731+
for i in 0..node.children.len() {
732+
if i != idx {
733+
new_children.push(std::mem::replace(&mut node.children[i], TrieNode::new()));
734+
}
735+
}
736+
737+
// Update the node's children
738+
node.children = new_children.into_boxed_slice();
739+
740+
// Update the is_present bits - need to clear the bit for the removed node
741+
clear_bit(&mut node.is_present, byte);
742+
743+
// Update counts
744+
pruned_nodes += 1;
745+
}
746+
747+
pruned_nodes
748+
}
621749
/// Returns an iterator over the key-value pairs of the map.
622750
///
623751
/// # Examples
@@ -1638,8 +1766,7 @@ impl<T> TrieMap<T> {
16381766
&'a self,
16391767
other: &'a TrieMap<T>,
16401768
) -> impl Iterator<Item = (Vec<u8>, &'a T)> + 'a {
1641-
self.iter()
1642-
.filter(move |(key, _)| other.contains_key(key))
1769+
self.iter().filter(move |(key, _)| other.contains_key(key))
16431770
}
16441771

16451772
/// Returns an iterator over the entries whose keys are in this map but not in the other map.
@@ -1667,8 +1794,7 @@ impl<T> TrieMap<T> {
16671794
&'a self,
16681795
other: &'a TrieMap<T>,
16691796
) -> impl Iterator<Item = (Vec<u8>, &'a T)> + 'a {
1670-
self.iter()
1671-
.filter(move |(key, _)| !other.contains_key(key))
1797+
self.iter().filter(move |(key, _)| !other.contains_key(key))
16721798
}
16731799

16741800
/// Returns an iterator over entries whose keys are in exactly one of the maps.

0 commit comments

Comments
 (0)