@@ -75,7 +75,6 @@ use itertools::Itertools as _;
7575use nonzero_ext:: nonzero;
7676use num:: BigInt ;
7777use num_traits:: identities:: Zero ;
78- use rayon:: prelude:: ParallelBridge ;
7978use schemars:: JsonSchema ;
8079use serde:: { Deserialize , Serialize } ;
8180use std:: ops:: RangeInclusive ;
@@ -1645,8 +1644,10 @@ where
16451644
16461645 /// Validates all tipsets at epoch `start..=end` behind the heaviest tipset.
16471646 ///
1648- /// This spawns [`rayon::current_num_threads`] threads to do the compute-heavy work
1649- /// of tipset validation.
1647+ /// Tipsets are processed sequentially. The compute-intensive work inside each
1648+ /// tipset (`bellperson` proof verification, FVM batch seal verification, etc.)
1649+ /// is already heavily rayon-parallelized. Parallelizing the outer loop actually introduces
1650+ /// some issues due to locks in the aforementioned crates. So don't do it.
16501651 ///
16511652 /// # What is validation?
16521653 /// Every state transition returns a new _state root_, which is typically retained in, e.g., snapshots.
@@ -1662,10 +1663,6 @@ where
16621663 /// - assert that they match
16631664 ///
16641665 /// See [`Self::compute_tipset_state_blocking`] for an explanation of state transitions.
1665- ///
1666- /// # Known issues
1667- /// This function is blocking, but we do observe threads waiting and synchronizing.
1668- /// This is suspected to be due something in the VM or its `WASM` runtime.
16691666 #[ tracing:: instrument( skip( self ) ) ]
16701667 pub fn validate_range ( & self , epochs : RangeInclusive < i64 > ) -> anyhow:: Result < ( ) > {
16711668 let heaviest = self . heaviest_tipset ( ) ;
@@ -1852,44 +1849,42 @@ where
18521849 DB : Blockstore + Send + Sync + ' static ,
18531850 T : Iterator < Item = Tipset > + Send ,
18541851{
1855- use rayon:: iter:: ParallelIterator as _;
1856- tipsets
1857- . tuple_windows ( )
1858- . par_bridge ( )
1859- . try_for_each ( |( child, parent) | {
1860- info ! ( height = parent. epoch( ) , "compute parent state" ) ;
1861- let ExecutedTipset {
1862- state_root : actual_state,
1863- receipt_root : actual_receipt,
1864- ..
1865- } = apply_block_messages (
1866- genesis_timestamp,
1867- chain_index. shallow_clone ( ) ,
1868- chain_config. shallow_clone ( ) ,
1869- beacon. shallow_clone ( ) ,
1870- engine,
1871- parent,
1872- NO_CALLBACK ,
1873- VMTrace :: NotTraced ,
1874- )
1875- . context ( "couldn't compute tipset state" ) ?;
1876- let expected_receipt = child. min_ticket_block ( ) . message_receipts ;
1877- let expected_state = child. parent_state ( ) ;
1878- match ( expected_state, expected_receipt) == ( & actual_state, actual_receipt) {
1879- true => Ok ( ( ) ) ,
1880- false => {
1881- error ! (
1882- height = child. epoch( ) ,
1883- ?expected_state,
1884- ?expected_receipt,
1885- ?actual_state,
1886- ?actual_receipt,
1887- "state mismatch"
1888- ) ;
1889- bail ! ( "state mismatch" ) ;
1890- }
1891- }
1892- } )
1852+ // Validate one tipset at a time. Parallelizing the outer loop across tipsets
1853+ // might wedge the global rayon pool.
1854+ // Sequential outer iteration leaves the entire rayon pool free for that
1855+ // already-rich inner parallelism.
1856+ for ( child, parent) in tipsets. tuple_windows ( ) {
1857+ info ! ( height = parent. epoch( ) , "compute parent state" ) ;
1858+ let ExecutedTipset {
1859+ state_root : actual_state,
1860+ receipt_root : actual_receipt,
1861+ ..
1862+ } = apply_block_messages (
1863+ genesis_timestamp,
1864+ chain_index. shallow_clone ( ) ,
1865+ chain_config. shallow_clone ( ) ,
1866+ beacon. shallow_clone ( ) ,
1867+ engine,
1868+ parent,
1869+ NO_CALLBACK ,
1870+ VMTrace :: NotTraced ,
1871+ )
1872+ . context ( "couldn't compute tipset state" ) ?;
1873+ let expected_receipt = child. min_ticket_block ( ) . message_receipts ;
1874+ let expected_state = child. parent_state ( ) ;
1875+ if ( expected_state, expected_receipt) != ( & actual_state, actual_receipt) {
1876+ error ! (
1877+ height = child. epoch( ) ,
1878+ ?expected_state,
1879+ ?expected_receipt,
1880+ ?actual_state,
1881+ ?actual_receipt,
1882+ "state mismatch"
1883+ ) ;
1884+ bail ! ( "state mismatch" ) ;
1885+ }
1886+ }
1887+ Ok ( ( ) )
18931888}
18941889
18951890/// Shared context for creating VMs and preparing tipset state.
0 commit comments