Skip to content

Commit 2f6d7f2

Browse files
matktahamlat
andauthored
feat: Add parallel state root computation support for Bonsai trie (#9576)
Signed-off-by: Karim Taam <karim.t2am@gmail.com> Co-authored-by: ahamlat <ameziane.hamlat@consensys.net>
1 parent 2c60d51 commit 2f6d7f2

21 files changed

Lines changed: 1738 additions & 220 deletions

File tree

app/src/main/java/org/hyperledger/besu/cli/options/storage/PathBasedExtraStorageOptions.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_LIMIT_TRIE_LOGS_ENABLED;
1818
import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_MAX_LAYERS_TO_LOAD;
19+
import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_PARALLEL_STATE_ROOT_COMPUTATION;
1920
import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_PARALLEL_TX_PROCESSING;
2021
import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_TRIE_LOG_PRUNING_WINDOW_SIZE;
2122
import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.MINIMUM_TRIE_LOG_RETENTION_LIMIT;
@@ -60,6 +61,10 @@ public class PathBasedExtraStorageOptions
6061
public static final String PARALLEL_TX_PROCESSING_ENABLED =
6162
"--bonsai-parallel-tx-processing-enabled";
6263

64+
/** The bonsai parallel state root computation enabled option name. */
65+
public static final String PARALLEL_STATE_ROOT_COMPUTATION_ENABLED =
66+
"--bonsai-parallel-state-root-computation-enabled";
67+
6368
@Option(
6469
names = {LIMIT_TRIE_LOGS_ENABLED},
6570
fallbackValue = "true",
@@ -81,6 +86,13 @@ public class PathBasedExtraStorageOptions
8186
"Enables parallelization of transactions to optimize processing speed by concurrently loading and executing necessary data in advance. Will be ignored if --data-storage-format is not bonsai (default: ${DEFAULT-VALUE})")
8287
private Boolean isParallelTxProcessingEnabled = DEFAULT_PARALLEL_TX_PROCESSING;
8388

89+
@Option(
90+
names = {PARALLEL_STATE_ROOT_COMPUTATION_ENABLED},
91+
arity = "1",
92+
description =
93+
"Enables parallel computation of state root hash to optimize performance. Will be ignored if --data-storage-format is not bonsai (default: ${DEFAULT-VALUE})")
94+
private Boolean isParallelStateRootComputationEnabled = DEFAULT_PARALLEL_STATE_ROOT_COMPUTATION;
95+
8496
@CommandLine.ArgGroup(validate = false)
8597
private final PathBasedExtraStorageOptions.Unstable unstableOptions = new Unstable();
8698

@@ -174,6 +186,8 @@ public static PathBasedExtraStorageOptions fromConfig(
174186
domainObject.getUnstable().getCodeStoredByCodeHashEnabled();
175187
dataStorageOptions.isParallelTxProcessingEnabled =
176188
domainObject.getParallelTxProcessingEnabled();
189+
dataStorageOptions.isParallelStateRootComputationEnabled =
190+
domainObject.getParallelStateRootComputationEnabled();
177191

178192
return dataStorageOptions;
179193
}
@@ -185,6 +199,7 @@ public final PathBasedExtraStorageConfiguration toDomainObject() {
185199
.limitTrieLogsEnabled(limitTrieLogsEnabled)
186200
.trieLogPruningWindowSize(trieLogPruningWindowSize)
187201
.parallelTxProcessingEnabled(isParallelTxProcessingEnabled)
202+
.parallelStateRootComputationEnabled(isParallelStateRootComputationEnabled)
188203
.unstable(
189204
ImmutablePathBasedExtraStorageConfiguration.PathBasedUnstable.builder()
190205
.fullFlatDbEnabled(unstableOptions.fullFlatDbEnabled)

app/src/main/java/org/hyperledger/besu/controller/BesuControllerBuilder.java

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,10 +1263,7 @@ WorldStateArchive createWorldStateArchive(
12631263
yield new BonsaiWorldStateProvider(
12641264
worldStateKeyValueStorage,
12651265
blockchain,
1266-
Optional.of(
1267-
dataStorageConfiguration
1268-
.getPathBasedExtraStorageConfiguration()
1269-
.getMaxLayersToLoad()),
1266+
dataStorageConfiguration.getPathBasedExtraStorageConfiguration(),
12701267
bonsaiCachedMerkleTrieLoader,
12711268
besuComponent.map(BesuComponent::getBesuPluginContext).orElse(null),
12721269
evmConfiguration,
@@ -1280,10 +1277,7 @@ yield new BonsaiWorldStateProvider(
12801277
yield new BonsaiArchiveWorldStateProvider(
12811278
worldStateKeyValueStorage,
12821279
blockchain,
1283-
Optional.of(
1284-
dataStorageConfiguration
1285-
.getPathBasedExtraStorageConfiguration()
1286-
.getMaxLayersToLoad()),
1280+
dataStorageConfiguration.getPathBasedExtraStorageConfiguration(),
12871281
bonsaiCachedMerkleTrieLoader,
12881282
besuComponent.map(BesuComponent::getBesuPluginContext).orElse(null),
12891283
evmConfiguration,

app/src/test/java/org/hyperledger/besu/cli/options/stable/DataStorageOptionsTest.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,76 @@ public void pathbasedCodeUsingCodeHashEnabledCanBeDisabled() {
138138
"false");
139139
}
140140

141+
@Test
142+
public void parallelTxProcessingEnabledByDefault() {
143+
internalTestSuccess(
144+
dataStorageConfiguration ->
145+
assertThat(
146+
dataStorageConfiguration
147+
.getPathBasedExtraStorageConfiguration()
148+
.getParallelTxProcessingEnabled())
149+
.isEqualTo(true));
150+
}
151+
152+
@Test
153+
public void parallelTxProcessingCanBeEnabled() {
154+
internalTestSuccess(
155+
dataStorageConfiguration ->
156+
assertThat(
157+
dataStorageConfiguration
158+
.getPathBasedExtraStorageConfiguration()
159+
.getParallelTxProcessingEnabled())
160+
.isEqualTo(true),
161+
"--bonsai-parallel-tx-processing-enabled=true");
162+
}
163+
164+
@Test
165+
public void parallelTxProcessingCanBeDisabled() {
166+
internalTestSuccess(
167+
dataStorageConfiguration ->
168+
assertThat(
169+
dataStorageConfiguration
170+
.getPathBasedExtraStorageConfiguration()
171+
.getParallelTxProcessingEnabled())
172+
.isEqualTo(false),
173+
"--bonsai-parallel-tx-processing-enabled=false");
174+
}
175+
176+
@Test
177+
public void parallelStateRootComputationEnabledByDefault() {
178+
internalTestSuccess(
179+
dataStorageConfiguration ->
180+
assertThat(
181+
dataStorageConfiguration
182+
.getPathBasedExtraStorageConfiguration()
183+
.getParallelStateRootComputationEnabled())
184+
.isEqualTo(true));
185+
}
186+
187+
@Test
188+
public void parallelStateRootComputationCanBeEnabled() {
189+
internalTestSuccess(
190+
dataStorageConfiguration ->
191+
assertThat(
192+
dataStorageConfiguration
193+
.getPathBasedExtraStorageConfiguration()
194+
.getParallelStateRootComputationEnabled())
195+
.isEqualTo(true),
196+
"--bonsai-parallel-state-root-computation-enabled=true");
197+
}
198+
199+
@Test
200+
public void parallelStateRootComputationCanBeDisabled() {
201+
internalTestSuccess(
202+
dataStorageConfiguration ->
203+
assertThat(
204+
dataStorageConfiguration
205+
.getPathBasedExtraStorageConfiguration()
206+
.getParallelStateRootComputationEnabled())
207+
.isEqualTo(false),
208+
"--bonsai-parallel-state-root-computation-enabled=false");
209+
}
210+
141211
@Test
142212
public void receiptCompactionCanBeEnabledWithImplicitTrueValue() {
143213
internalTestSuccess(
@@ -176,6 +246,8 @@ protected DataStorageConfiguration createCustomizedDomainObject() {
176246
.maxLayersToLoad(513L)
177247
.limitTrieLogsEnabled(true)
178248
.trieLogPruningWindowSize(514)
249+
.parallelTxProcessingEnabled(true)
250+
.parallelStateRootComputationEnabled(true)
179251
.build())
180252
.build();
181253
}

app/src/test/resources/everything_config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ bonsai-limit-trie-logs-enabled=true
221221
bonsai-trie-logs-pruning-window-size=100_000
222222
receipt-compaction-enabled=true
223223
bonsai-parallel-tx-processing-enabled=false
224+
bonsai-parallel-state-root-computation-enabled=false
224225

225226
# feature flags
226227
Xsecp256k1-native-enabled=false

docs/trie/parallel-merkle-trie.md

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# ParallelStoredMerklePatriciaTrie - Logic Explanation
2+
3+
## Core Problem Being Solved
4+
5+
When you need to update many keys in a Merkle Patricia Trie (like Ethereum state updates), doing them one-by-one is slow. This implementation batches updates and processes independent parts of the tree simultaneously using multiple CPU cores.
6+
7+
## High-Level Strategy
8+
9+
**The Big Idea:** A branch node has 16 independent children. If you have updates going to different children, you can process those children in parallel.
10+
11+
### Three Phases
12+
13+
1. **Accumulation Phase**: Collect all updates without applying them
14+
2. **Parallel Processing Phase**: Apply updates recursively, splitting work when beneficial
15+
3. **Persistence Phase**: Write all modified nodes to storage at once
16+
17+
## The Trie Structure Recap
18+
19+
Think of the trie like a filing system:
20+
- **Keys** are converted to hex nibbles (0-F)
21+
- **Path** through the tree follows these nibbles
22+
- Example: Key `0xABCD` → path `[A, B, C, D]`
23+
24+
**Node types:**
25+
- **Branch**: 16 slots (one per hex digit) + optional value
26+
- **Extension**: Shortcut for long single paths (e.g., `[A,B,C,D]` → child)
27+
- **Leaf**: Terminal node with actual value
28+
- **Null**: Empty slot
29+
30+
## How Parallelization Works
31+
32+
### Starting Point: Branch Nodes
33+
34+
When processing a branch node with multiple updates:
35+
36+
1. **Group updates by their next nibble**
37+
- Updates to `[A, ...]` go in group A
38+
- Updates to `[B, ...]` go in group B
39+
- And so on for all 16 possible hex digits
40+
41+
2. **Decide which groups to parallelize**
42+
- Large groups (multiple updates) → process in thread pool
43+
- Small groups (single update) → process in current task
44+
- Reason: Avoid task creation overhead for tiny tasks
45+
46+
3. **Process each group recursively**
47+
- Each child node gets processed with only its relevant updates
48+
- That child might be a branch → parallelizes again
49+
- Creates a tree of parallel tasks
50+
51+
4. **Wait for all parallel tasks to complete**
52+
- Use futures to track parallel work
53+
- Block until all children are updated at each level
54+
55+
5. **Reconstruct the branch with new children**
56+
- Create new branch node (immutable pattern)
57+
- Compute its hash
58+
- Return to parent
59+
60+
### Example Visualization
61+
62+
```
63+
Updates: [0xAB01, 0xAB02, 0xAE03, 0xF123]
64+
65+
Root Branch:
66+
├─ Child[A]: 3 updates [AB01, AB02, AE03] → PARALLEL TASK 1
67+
│ └─ Branch at [A]:
68+
│ ├─ Child[B]: 2 updates [AB01, AB02] → PARALLEL TASK 1.1
69+
│ └─ Child[E]: 1 update [AE03] → SEQUENTIAL TASK 1
70+
└─ Child[F]: 1 update [F123] → SEQUENTIAL
71+
72+
Result: multiple threads working simultaneously
73+
```
74+
75+
### Why This Works
76+
77+
**Independence**: Updates to `child[A]` don't affect `child[F]`. They share no data, so they can safely run in parallel.
78+
79+
**Recursive Nature**: The child at `[A]` is itself a branch, so it can further parallelize its own children `[B]` and `[E]`.
80+
81+
**Natural Load Balancing**: Hot paths in the trie (many updates) automatically get more parallel workers.
82+
83+
## Handling Non-Branch Nodes
84+
85+
### Extension Nodes: The Complexity
86+
87+
Extensions compress long single paths. Example:
88+
- Instead of: `Branch[A] → Branch[B] → Branch[C] → Child`
89+
- Store: `Extension([A,B,C]) → Child`
90+
91+
**Problem**: Extensions are sequential by definition. How to parallelize?
92+
93+
**Solution**: Temporarily expand them into branches!
94+
95+
#### Expansion Logic
96+
97+
When updates diverge within an extension's path:
98+
99+
1. **Find the divergence point**
100+
- Extension path: `[A, B, C, D]`
101+
- Addition at: `[A, B, C, E]` and `[A, B, C, F]`
102+
- Divergence at index 3 (position D)
103+
104+
2. **Build branch structure up to divergence**
105+
- Create branches for `[A]`, then `[B]`, then `[C]`
106+
- At `[C]`, create branch with children at `[D]`, `[E]`, `[F]`
107+
108+
3. **Process the expanded structure in parallel**
109+
- Now it's a branch tree → normal parallel processing
110+
111+
4. **Let the visitor pattern re-optimize**
112+
- After updates, the structure may collapse back to extensions
113+
- Standard trie optimization (not shown in parallel code)
114+
115+
**When to expand:**
116+
- Only if there are multiple updates (worth the overhead)
117+
- Single update → use sequential visitor (faster, handles restructuring)
118+
119+
### Leaf Nodes: Conversion Strategy
120+
121+
Leaf represents a single value at some path.
122+
123+
**If multiple addition affect this area:**
124+
1. Convert leaf to a branch
125+
2. Place the existing leaf value in appropriate child slot
126+
3. Process addition in parallel on the new branch
127+
128+
**If single update:**
129+
- Let sequential visitor handle it (might just update the value, or split the leaf)
130+
131+
### Null Nodes: Building from Scratch
132+
133+
Empty position getting multiple addition:
134+
1. Create empty branch (16 null children)
135+
2. Process addition in parallel
136+
3. Children get created as addition are applied
137+
138+
## Thread Safety Mechanisms
139+
140+
### BranchWrapper: Coordinating Parallel Updates
141+
142+
Problem: Multiple threads want to update different children of the same branch.
143+
144+
Solution:
145+
- Wrap the branch node's children list in a synchronized list
146+
- Each parallel task updates its assigned child slot
147+
- No conflicts because each task has exclusive child index
148+
- After all futures complete, reconstruct the branch atomically
149+
150+
### CommitCache: Deferred Storage Writes
151+
152+
Problem: Many threads computing node hashes, but storage writes should be batched.
153+
154+
Solution:
155+
- Thread-safe map collects all nodes to persist
156+
- Each parallel task adds its nodes to that map
157+
- Single flush at the end writes everything to storage
158+
- Reduces I/O contention and transaction overhead
159+
160+
### Immutable Nodes
161+
162+
Key principle: Nodes are never modified in place.
163+
164+
Every update creates a new node instance. This means:
165+
- No locks needed for reading nodes
166+
- Parallel tasks can safely read shared nodes
167+
- Parent gets updated with reference to new child
168+
- Old nodes eventually garbage collected
169+
170+
## Decision Logic: When to Parallelize?
171+
172+
### The Partitioning Rule
173+
174+
A group of updates gets parallel processing if:
175+
1. The group has more than 1 update, AND
176+
2. The branch has more than 1 active group
177+
178+
**Why both conditions?**
179+
- Single update → sequential is faster (no parallelization benefit)
180+
- Single active child → no parallelization opportunity at this level
181+
- Both conditions met → multiple independent tasks worth parallelizing

0 commit comments

Comments
 (0)