besu-eth
diff --git a/‎app/src/main/java/org/hyperledger/besu/cli/options/storage/PathBasedExtraStorageOptions.java‎
Lines changed: 15 additions & 0 deletions b/‎app/src/main/java/org/hyperledger/besu/cli/options/storage/PathBasedExtraStorageOptions.java‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎app/src/main/java/org/hyperledger/besu/controller/BesuControllerBuilder.java‎
Lines changed: 2 additions & 8 deletions b/‎app/src/main/java/org/hyperledger/besu/controller/BesuControllerBuilder.java‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎app/src/test/java/org/hyperledger/besu/cli/options/stable/DataStorageOptionsTest.java‎
Lines changed: 72 additions & 0 deletions b/‎app/src/test/java/org/hyperledger/besu/cli/options/stable/DataStorageOptionsTest.java‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎app/src/test/resources/everything_config.toml‎
Lines changed: 1 addition & 0 deletions b/‎app/src/test/resources/everything_config.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/trie/parallel-merkle-trie.md‎
Lines changed: 181 additions & 0 deletions b/‎docs/trie/parallel-merkle-trie.md‎
Lines changed: 181 additions & 0 deletions
@@ -16,6 +16,7 @@
 
 import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_LIMIT_TRIE_LOGS_ENABLED;
 import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_MAX_LAYERS_TO_LOAD;
+import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_PARALLEL_STATE_ROOT_COMPUTATION;
 import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_PARALLEL_TX_PROCESSING;
 import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.DEFAULT_TRIE_LOG_PRUNING_WINDOW_SIZE;
 import static org.hyperledger.besu.ethereum.worldstate.PathBasedExtraStorageConfiguration.MINIMUM_TRIE_LOG_RETENTION_LIMIT;
@@ -60,6 +61,10 @@ public class PathBasedExtraStorageOptions
   public static final String PARALLEL_TX_PROCESSING_ENABLED =
       "--bonsai-parallel-tx-processing-enabled";
 
+  /** The bonsai parallel state root computation enabled option name. */
+  public static final String PARALLEL_STATE_ROOT_COMPUTATION_ENABLED =
+      "--bonsai-parallel-state-root-computation-enabled";
+
   @Option(
       names = {LIMIT_TRIE_LOGS_ENABLED},
       fallbackValue = "true",
@@ -81,6 +86,13 @@ public class PathBasedExtraStorageOptions
           "Enables parallelization of transactions to optimize processing speed by concurrently loading and executing necessary data in advance. Will be ignored if --data-storage-format is not bonsai (default: ${DEFAULT-VALUE})")
   private Boolean isParallelTxProcessingEnabled = DEFAULT_PARALLEL_TX_PROCESSING;
 
+  @Option(
+      names = {PARALLEL_STATE_ROOT_COMPUTATION_ENABLED},
+      arity = "1",
+      description =
+          "Enables parallel computation of state root hash to optimize performance. Will be ignored if --data-storage-format is not bonsai (default: ${DEFAULT-VALUE})")
+  private Boolean isParallelStateRootComputationEnabled = DEFAULT_PARALLEL_STATE_ROOT_COMPUTATION;
+
   @CommandLine.ArgGroup(validate = false)
   private final PathBasedExtraStorageOptions.Unstable unstableOptions = new Unstable();
 
@@ -174,6 +186,8 @@ public static PathBasedExtraStorageOptions fromConfig(
         domainObject.getUnstable().getCodeStoredByCodeHashEnabled();
     dataStorageOptions.isParallelTxProcessingEnabled =
         domainObject.getParallelTxProcessingEnabled();
+    dataStorageOptions.isParallelStateRootComputationEnabled =
+        domainObject.getParallelStateRootComputationEnabled();
 
     return dataStorageOptions;
   }
@@ -185,6 +199,7 @@ public final PathBasedExtraStorageConfiguration toDomainObject() {
         .limitTrieLogsEnabled(limitTrieLogsEnabled)
         .trieLogPruningWindowSize(trieLogPruningWindowSize)
         .parallelTxProcessingEnabled(isParallelTxProcessingEnabled)
+        .parallelStateRootComputationEnabled(isParallelStateRootComputationEnabled)
         .unstable(
             ImmutablePathBasedExtraStorageConfiguration.PathBasedUnstable.builder()
                 .fullFlatDbEnabled(unstableOptions.fullFlatDbEnabled)
 
@@ -1263,10 +1263,7 @@ WorldStateArchive createWorldStateArchive(
         yield new BonsaiWorldStateProvider(
             worldStateKeyValueStorage,
             blockchain,
-            Optional.of(
-                dataStorageConfiguration
-                    .getPathBasedExtraStorageConfiguration()
-                    .getMaxLayersToLoad()),
+            dataStorageConfiguration.getPathBasedExtraStorageConfiguration(),
             bonsaiCachedMerkleTrieLoader,
             besuComponent.map(BesuComponent::getBesuPluginContext).orElse(null),
             evmConfiguration,
@@ -1280,10 +1277,7 @@ yield new BonsaiWorldStateProvider(
         yield new BonsaiArchiveWorldStateProvider(
             worldStateKeyValueStorage,
             blockchain,
-            Optional.of(
-                dataStorageConfiguration
-                    .getPathBasedExtraStorageConfiguration()
-                    .getMaxLayersToLoad()),
+            dataStorageConfiguration.getPathBasedExtraStorageConfiguration(),
             bonsaiCachedMerkleTrieLoader,
             besuComponent.map(BesuComponent::getBesuPluginContext).orElse(null),
             evmConfiguration,
 
@@ -138,6 +138,76 @@ public void pathbasedCodeUsingCodeHashEnabledCanBeDisabled() {
         "false");
   }
 
+  @Test
+  public void parallelTxProcessingEnabledByDefault() {
+    internalTestSuccess(
+        dataStorageConfiguration ->
+            assertThat(
+                    dataStorageConfiguration
+                        .getPathBasedExtraStorageConfiguration()
+                        .getParallelTxProcessingEnabled())
+                .isEqualTo(true));
+  }
+
+  @Test
+  public void parallelTxProcessingCanBeEnabled() {
+    internalTestSuccess(
+        dataStorageConfiguration ->
+            assertThat(
+                    dataStorageConfiguration
+                        .getPathBasedExtraStorageConfiguration()
+                        .getParallelTxProcessingEnabled())
+                .isEqualTo(true),
+        "--bonsai-parallel-tx-processing-enabled=true");
+  }
+
+  @Test
+  public void parallelTxProcessingCanBeDisabled() {
+    internalTestSuccess(
+        dataStorageConfiguration ->
+            assertThat(
+                    dataStorageConfiguration
+                        .getPathBasedExtraStorageConfiguration()
+                        .getParallelTxProcessingEnabled())
+                .isEqualTo(false),
+        "--bonsai-parallel-tx-processing-enabled=false");
+  }
+
+  @Test
+  public void parallelStateRootComputationEnabledByDefault() {
+    internalTestSuccess(
+        dataStorageConfiguration ->
+            assertThat(
+                    dataStorageConfiguration
+                        .getPathBasedExtraStorageConfiguration()
+                        .getParallelStateRootComputationEnabled())
+                .isEqualTo(true));
+  }
+
+  @Test
+  public void parallelStateRootComputationCanBeEnabled() {
+    internalTestSuccess(
+        dataStorageConfiguration ->
+            assertThat(
+                    dataStorageConfiguration
+                        .getPathBasedExtraStorageConfiguration()
+                        .getParallelStateRootComputationEnabled())
+                .isEqualTo(true),
+        "--bonsai-parallel-state-root-computation-enabled=true");
+  }
+
+  @Test
+  public void parallelStateRootComputationCanBeDisabled() {
+    internalTestSuccess(
+        dataStorageConfiguration ->
+            assertThat(
+                    dataStorageConfiguration
+                        .getPathBasedExtraStorageConfiguration()
+                        .getParallelStateRootComputationEnabled())
+                .isEqualTo(false),
+        "--bonsai-parallel-state-root-computation-enabled=false");
+  }
+
   @Test
   public void receiptCompactionCanBeEnabledWithImplicitTrueValue() {
     internalTestSuccess(
@@ -176,6 +246,8 @@ protected DataStorageConfiguration createCustomizedDomainObject() {
                 .maxLayersToLoad(513L)
                 .limitTrieLogsEnabled(true)
                 .trieLogPruningWindowSize(514)
+                .parallelTxProcessingEnabled(true)
+                .parallelStateRootComputationEnabled(true)
                 .build())
         .build();
   }
 
@@ -221,6 +221,7 @@ bonsai-limit-trie-logs-enabled=true
 bonsai-trie-logs-pruning-window-size=100_000
 receipt-compaction-enabled=true
 bonsai-parallel-tx-processing-enabled=false
+bonsai-parallel-state-root-computation-enabled=false
 
 # feature flags
 Xsecp256k1-native-enabled=false
 
@@ -0,0 +1,181 @@
+# ParallelStoredMerklePatriciaTrie - Logic Explanation
+
+## Core Problem Being Solved
+
+When you need to update many keys in a Merkle Patricia Trie (like Ethereum state updates), doing them one-by-one is slow. This implementation batches updates and processes independent parts of the tree simultaneously using multiple CPU cores.
+
+## High-Level Strategy
+
+**The Big Idea:** A branch node has 16 independent children. If you have updates going to different children, you can process those children in parallel.
+
+### Three Phases
+
+1. **Accumulation Phase**: Collect all updates without applying them
+2. **Parallel Processing Phase**: Apply updates recursively, splitting work when beneficial
+3. **Persistence Phase**: Write all modified nodes to storage at once
+
+## The Trie Structure Recap
+
+Think of the trie like a filing system:
+- **Keys** are converted to hex nibbles (0-F)
+- **Path** through the tree follows these nibbles
+- Example: Key `0xABCD` → path `[A, B, C, D]`
+
+**Node types:**
+- **Branch**: 16 slots (one per hex digit) + optional value
+- **Extension**: Shortcut for long single paths (e.g., `[A,B,C,D]` → child)
+- **Leaf**: Terminal node with actual value
+- **Null**: Empty slot
+
+## How Parallelization Works
+
+### Starting Point: Branch Nodes
+
+When processing a branch node with multiple updates:
+
+1. **Group updates by their next nibble**
+    - Updates to `[A, ...]` go in group A
+    - Updates to `[B, ...]` go in group B
+    - And so on for all 16 possible hex digits
+
+2. **Decide which groups to parallelize**
+    - Large groups (multiple updates) → process in thread pool
+    - Small groups (single update) → process in current task
+    - Reason: Avoid task creation overhead for tiny tasks
+
+3. **Process each group recursively**
+    - Each child node gets processed with only its relevant updates
+    - That child might be a branch → parallelizes again
+    - Creates a tree of parallel tasks
+
+4. **Wait for all parallel tasks to complete**
+    - Use futures to track parallel work
+    - Block until all children are updated at each level
+
+5. **Reconstruct the branch with new children**
+    - Create new branch node (immutable pattern)
+    - Compute its hash
+    - Return to parent
+
+### Example Visualization
+
+```
+Updates: [0xAB01, 0xAB02, 0xAE03, 0xF123]
+
+Root Branch:
+├─ Child[A]: 3 updates [AB01, AB02, AE03] → PARALLEL TASK 1
+│   └─ Branch at [A]:
+│       ├─ Child[B]: 2 updates [AB01, AB02] → PARALLEL TASK 1.1
+│       └─ Child[E]: 1 update [AE03] → SEQUENTIAL TASK 1
+└─ Child[F]: 1 update [F123] → SEQUENTIAL 
+
+Result: multiple threads working simultaneously
+```
+
+### Why This Works
+
+**Independence**: Updates to `child[A]` don't affect `child[F]`. They share no data, so they can safely run in parallel.
+
+**Recursive Nature**: The child at `[A]` is itself a branch, so it can further parallelize its own children `[B]` and `[E]`.
+
+**Natural Load Balancing**: Hot paths in the trie (many updates) automatically get more parallel workers.
+
+## Handling Non-Branch Nodes
+
+### Extension Nodes: The Complexity
+
+Extensions compress long single paths. Example:
+- Instead of: `Branch[A] → Branch[B] → Branch[C] → Child`
+- Store: `Extension([A,B,C]) → Child`
+
+**Problem**: Extensions are sequential by definition. How to parallelize?
+
+**Solution**: Temporarily expand them into branches!
+
+#### Expansion Logic
+
+When updates diverge within an extension's path:
+
+1. **Find the divergence point**
+    - Extension path: `[A, B, C, D]`
+    - Addition at: `[A, B, C, E]` and `[A, B, C, F]`
+    - Divergence at index 3 (position D)
+
+2. **Build branch structure up to divergence**
+    - Create branches for `[A]`, then `[B]`, then `[C]`
+    - At `[C]`, create branch with children at `[D]`, `[E]`, `[F]`
+
+3. **Process the expanded structure in parallel**
+    - Now it's a branch tree → normal parallel processing
+
+4. **Let the visitor pattern re-optimize**
+    - After updates, the structure may collapse back to extensions
+    - Standard trie optimization (not shown in parallel code)
+
+**When to expand:**
+- Only if there are multiple updates (worth the overhead)
+- Single update → use sequential visitor (faster, handles restructuring)
+
+### Leaf Nodes: Conversion Strategy
+
+Leaf represents a single value at some path.
+
+**If multiple addition affect this area:**
+1. Convert leaf to a branch
+2. Place the existing leaf value in appropriate child slot
+3. Process addition in parallel on the new branch
+
+**If single update:**
+- Let sequential visitor handle it (might just update the value, or split the leaf)
+
+### Null Nodes: Building from Scratch
+
+Empty position getting multiple addition:
+1. Create empty branch (16 null children)
+2. Process addition in parallel
+3. Children get created as addition are applied
+
+## Thread Safety Mechanisms
+
+### BranchWrapper: Coordinating Parallel Updates
+
+Problem: Multiple threads want to update different children of the same branch.
+
+Solution:
+- Wrap the branch node's children list in a synchronized list
+- Each parallel task updates its assigned child slot
+- No conflicts because each task has exclusive child index
+- After all futures complete, reconstruct the branch atomically
+
+### CommitCache: Deferred Storage Writes
+
+Problem: Many threads computing node hashes, but storage writes should be batched.
+
+Solution:
+- Thread-safe map collects all nodes to persist
+- Each parallel task adds its nodes to that map
+- Single flush at the end writes everything to storage
+- Reduces I/O contention and transaction overhead
+
+### Immutable Nodes
+
+Key principle: Nodes are never modified in place.
+
+Every update creates a new node instance. This means:
+- No locks needed for reading nodes
+- Parallel tasks can safely read shared nodes
+- Parent gets updated with reference to new child
+- Old nodes eventually garbage collected
+
+## Decision Logic: When to Parallelize?
+
+### The Partitioning Rule
+
+A group of updates gets parallel processing if:
+1. The group has more than 1 update, AND
+2. The branch has more than 1 active group
+
+**Why both conditions?**
+- Single update → sequential is faster (no parallelization benefit)
+- Single active child → no parallelization opportunity at this level
+- Both conditions met → multiple independent tasks worth parallelizing