Skip to content

Commit 1ff2437

Browse files
fix(hash_builder): store branch nodes with only leaf children in updates
Previously, branch nodes were only stored in `updated_branch_nodes` if they had children that were themselves branches (non-empty tree_masks/hash_masks). This caused branch nodes with only leaf children to be silently omitted from updates, breaking incremental trie sync for: - Small contracts with few storage slots - Sparse trie regions where all children are leaves - Fresh tries built without existing database data The fix adds a third condition: store the branch if its parent references it as a hashed child (which we just set in the parent's hash_mask). This ensures all branch nodes that would be looked up during trie traversal are present. Changes: - Root node (len == 0) is always stored - Non-root nodes are stored if they have branch children OR if the parent references this child in its hash_mask - Updated test expectations for tree_mask (now correctly marks stored children) - Added regression tests for the bug scenarios Closes: #25 Co-authored-by: Amp <amp@ampcode.com> Amp-Thread-ID: https://ampcode.com/threads/T-019bf607-2e34-74e0-b2d6-df3285ef82df
1 parent fa56822 commit 1ff2437

1 file changed

Lines changed: 205 additions & 4 deletions

File tree

src/hash_builder/mod.rs

Lines changed: 205 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -427,13 +427,37 @@ impl<K: AsRef<AddedRemovedKeys>> HashBuilder<K> {
427427
/// masks in the database. We will use that when consuming the intermediate nodes
428428
/// from the database to efficiently build the trie.
429429
fn store_branch_node(&mut self, current: &Nibbles, len: usize, children: Vec<B256>) {
430+
// Update the parent's hash_mask to indicate this child is a hashed branch node
430431
if len > 0 {
431432
let parent_index = len - 1;
432433
self.hash_masks[parent_index] |=
433434
TrieMask::from_nibble(current.get_unchecked(parent_index));
434435
}
435436

436-
let store_in_db_trie = !self.tree_masks[len].is_empty() || !self.hash_masks[len].is_empty();
437+
// Determine if this branch node should be stored in the database.
438+
//
439+
// A branch node must be stored if:
440+
// 1. It is the root node (len == 0), since it's the entry point for trie traversal, OR
441+
// 2. It has children that are themselves branches (indicated by non-empty tree/hash masks),
442+
// meaning we need to store it to allow incremental updates, OR
443+
// 3. Its parent references it as a hashed child. Since we just set the parent's hash_mask
444+
// above, we check if this child's bit is now set in the parent's hash_mask.
445+
//
446+
// The key insight is that even a branch node with only leaf children must be stored
447+
// if its parent will reference it by hash (which is always the case for branch nodes
448+
// whose RLP encoding >= 32 bytes).
449+
let store_in_db_trie = if len == 0 {
450+
// Root node is always stored
451+
true
452+
} else {
453+
let parent_index = len - 1;
454+
let flag = TrieMask::from_nibble(current.get_unchecked(parent_index));
455+
// Store if this node has branch children OR if the parent references this child
456+
!self.tree_masks[len].is_empty()
457+
|| !self.hash_masks[len].is_empty()
458+
|| (self.hash_masks[parent_index] & flag) != TrieMask::default()
459+
};
460+
437461
if store_in_db_trie {
438462
if len > 0 {
439463
let parent_index = len - 1;
@@ -559,7 +583,8 @@ mod tests {
559583
// `3`.
560584
// 2. Hash Mask: 0b0110. Of the above items, nibbles `1` and `2` correspond to children that
561585
// are branch nodes.
562-
// 3. Tree Mask: 0b0000. None of the children are stored in the database (yet).
586+
// 3. Tree Mask: 0b0110. Children at nibbles `1` and `2` are branch nodes that are stored
587+
// in the database/updates (they are referenced by hash from this parent node).
563588
// 4. Hashes: Hashes of the 2 sub-branch roots, at nibbles `1` and `2`. Calculated by
564589
// hashing the 0th and 1st element for the branch at nibble `1` , and the 0th and 2nd
565590
// element for the branch at nibble `2`. This basically means that every
@@ -605,8 +630,8 @@ mod tests {
605630
let update = updates.get(&Nibbles::from_nibbles_unchecked(hex!("01"))).unwrap();
606631
// Nibbles 0, 1, 2, 3 have children
607632
assert_eq!(update.state_mask, TrieMask::new(0b1111));
608-
// None of the children are stored in the database
609-
assert_eq!(update.tree_mask, TrieMask::new(0b0000));
633+
// Children at nibbles 1 and 2 are branch nodes stored in the trie
634+
assert_eq!(update.tree_mask, TrieMask::new(0b0110));
610635
// Children under nibbles `1` and `2` are branch nodes with `hashes`
611636
assert_eq!(update.hash_mask, TrieMask::new(0b0110));
612637
// Calculated when running the hash builder
@@ -678,4 +703,180 @@ mod tests {
678703
assert_eq!(hb.root(), expected);
679704
assert_eq!(hb2.root(), expected);
680705
}
706+
707+
/// Test that branch nodes are correctly added to updates even when they only have leaf
708+
/// children. This was a bug where branch nodes with only leaf children were not being
709+
/// stored in updates because the condition only checked if the node had branch children.
710+
#[test]
711+
fn test_updates_root() {
712+
let mut hb = HashBuilder::default().with_updates(true);
713+
let account: Vec<u8> = Vec::new();
714+
715+
hb.add_leaf(
716+
Nibbles::unpack(hex!(
717+
"a711355ec1c8f7e26bb3ccbcb0b75d870d15846c0b98e5cc452db46c37faea40"
718+
)),
719+
account.as_ref(),
720+
);
721+
hb.add_leaf(
722+
Nibbles::unpack(hex!(
723+
"a77d337781e762f3577784bab7491fcc43e291ce5a356b9bc517ac52eed3a37a"
724+
)),
725+
account.as_ref(),
726+
);
727+
hb.add_leaf(
728+
Nibbles::unpack(hex!(
729+
"a77d397a32b8ab5eb4b043c65b1f00c93f517bc8883c5cd31baf8e8a279475e3"
730+
)),
731+
account.as_ref(),
732+
);
733+
hb.add_leaf(
734+
Nibbles::unpack(hex!(
735+
"a7f936599f93b769acf90c7178fd2ddcac1b5b4bc9949ee5a04b7e0823c2446e"
736+
)),
737+
account.as_ref(),
738+
);
739+
740+
let _root = hb.root();
741+
let (_, updates) = hb.split();
742+
743+
// Previously this would return an empty map because branch nodes with only
744+
// leaf children were not being stored. Now all branch nodes should be stored.
745+
assert!(!updates.is_empty(), "updates should not be empty");
746+
}
747+
748+
/// Test the tree handling for various branch configurations.
749+
/// This tests that all branch nodes in a multi-level trie are correctly stored.
750+
#[test]
751+
fn test_trie_updates_branch_nodes() {
752+
let default_leaf = b"hello";
753+
754+
// Create a trie structure with multiple branch levels:
755+
// Root branch at 0x with children at nibbles 0, 1, 2, 3
756+
// Each of these has further structure creating nested branches
757+
let data = vec![
758+
(hex!("0000").to_vec(), default_leaf.to_vec()),
759+
(hex!("0020").to_vec(), default_leaf.to_vec()),
760+
(hex!("1000").to_vec(), default_leaf.to_vec()),
761+
(hex!("2000").to_vec(), default_leaf.to_vec()),
762+
(hex!("2111").to_vec(), default_leaf.to_vec()),
763+
(hex!("2122").to_vec(), default_leaf.to_vec()),
764+
(hex!("2123").to_vec(), default_leaf.to_vec()),
765+
(hex!("3000").to_vec(), default_leaf.to_vec()),
766+
(hex!("3001").to_vec(), default_leaf.to_vec()),
767+
];
768+
769+
let mut hb = HashBuilder::default().with_updates(true);
770+
for (key, val) in &data {
771+
let nibbles = Nibbles::unpack(key);
772+
hb.add_leaf(nibbles, val.as_ref());
773+
}
774+
775+
let _root = hb.root();
776+
let (_, updates) = hb.split();
777+
778+
// The trie structure creates 6 branch nodes that should all be in updates:
779+
// 1. Root branch (path: empty)
780+
// 2. Branch at 0x0 (children at 0, 2)
781+
// 3. Branch at 0x2 (children at 0, 1)
782+
// 4. Branch at 0x21 (children at 1, 2)
783+
// 5. Branch at 0x212 (children at 2, 3)
784+
// 6. Branch at 0x300 (children at 0, 1)
785+
assert_eq!(updates.len(), 6, "expected 6 branch nodes in updates");
786+
}
787+
788+
/// Test that small storage tries (like new contracts with few slots) correctly
789+
/// store their branch nodes for incremental sync.
790+
#[test]
791+
fn test_small_storage_trie_updates() {
792+
use alloy_primitives::keccak256;
793+
794+
let mut hb = HashBuilder::default().with_updates(true);
795+
796+
// Simulate a contract with 4 storage slots (common for simple contracts)
797+
// Keys are keccak256(slot_index) as per Ethereum spec
798+
let slots: BTreeMap<_, _> = (0u64..4)
799+
.map(|i| {
800+
let mut slot = [0u8; 32];
801+
slot[31] = i as u8;
802+
let key = keccak256(slot);
803+
(key, alloy_rlp::encode(U256::from(i * 100)))
804+
})
805+
.collect();
806+
807+
for (key, value) in &slots {
808+
hb.add_leaf(Nibbles::unpack(key), value);
809+
}
810+
811+
let _root = hb.root();
812+
let (_, updates) = hb.split();
813+
814+
// Even a small trie should have at least the root branch node stored
815+
assert!(!updates.is_empty(), "small storage trie should have updates");
816+
817+
// Verify root is included (path = empty nibbles)
818+
assert!(updates.contains_key(&Nibbles::default()), "root branch node should be in updates");
819+
}
820+
821+
/// Test edge case: keys that diverge at the last nibble with empty values.
822+
/// This creates very small leaf RLPs and deep branch structures.
823+
#[test]
824+
fn test_deep_divergence_empty_values() {
825+
let mut hb = HashBuilder::default().with_updates(true);
826+
827+
let key1 = hex!("0000000000000000000000000000000000000000000000000000000000000000");
828+
let key2 = hex!("0000000000000000000000000000000000000000000000000000000000000001");
829+
830+
hb.add_leaf(Nibbles::unpack(key1), &[]);
831+
hb.add_leaf(Nibbles::unpack(key2), &[]);
832+
833+
let _root = hb.root();
834+
let (_, updates) = hb.split();
835+
836+
// With the fix, should have at least one branch update
837+
assert!(!updates.is_empty(), "deep divergence should have branch updates");
838+
}
839+
840+
/// Test: siblings at different depths to verify mask propagation
841+
#[test]
842+
fn test_mask_propagation_across_depths() {
843+
let mut hb = HashBuilder::default().with_updates(true);
844+
845+
// Path 1 and 2 share prefix and go deep, path 3 is shallow sibling
846+
let keys = [
847+
hex!("1000000000000000000000000000000000000000000000000000000000000000"),
848+
hex!("1000000000000000000000000000000000000000000000000000000000000001"),
849+
hex!("2000000000000000000000000000000000000000000000000000000000000000"),
850+
];
851+
852+
for key in &keys {
853+
hb.add_leaf(Nibbles::unpack(*key), b"value");
854+
}
855+
856+
let _root = hb.root();
857+
let (_, updates) = hb.split();
858+
859+
// Verify all stored nodes have valid invariants
860+
for (path, node) in &updates {
861+
assert!(
862+
node.tree_mask.is_subset_of(node.state_mask),
863+
"tree_mask must be subset of state_mask at {:?}",
864+
path
865+
);
866+
assert!(
867+
node.hash_mask.is_subset_of(node.state_mask),
868+
"hash_mask must be subset of state_mask at {:?}",
869+
path
870+
);
871+
assert_eq!(
872+
node.hash_mask.count_ones() as usize,
873+
node.hashes.len(),
874+
"hash count mismatch at {:?}",
875+
path
876+
);
877+
}
878+
879+
// Root should be present
880+
assert!(updates.contains_key(&Nibbles::default()), "root should be in updates");
881+
}
681882
}

0 commit comments

Comments
 (0)