fix(cbf): loop sync until reveal cursor stops advancing on fresh recovery

febyeji · febyeji · commit 12feeff2994e · 2026-04-08T13:53:33.000+09:00
A fresh CBF recovery only scanned scripts at indices `0..stop_gap`, so
funds at deeper indices were invisible to the first sync. Subsequent
syncs raised `skip_height` past the funding block, leaving the deeper
funds permanently undiscovered.
diff --git a/src/chain/cbf.rs b/src/chain/cbf.rs
@@ -55,6 +55,15 @@ const MAX_RESTART_RETRIES: u32 = 5;
 /// Initial backoff delay for restart retries (doubles each attempt).
 const INITIAL_BACKOFF_MS: u64 = 500;
 
+/// Maximum number of passes the on-chain wallet recovery loop will run before
+/// giving up. Each extra pass after the first re-scans the full chain history
+/// for newly revealed scripts past the previously scanned window. With
+/// `BDK_CLIENT_STOP_GAP = 20`, eight passes can recover funds across roughly
+/// `8 * 20 = 160` consecutive derivation indices in a single sync. Anything
+/// beyond that violates the BIP44 stop-gap convention and will be discovered
+/// over subsequent syncs instead.
+const MAX_RECOVERY_LOOP_ITERS: usize = 8;
+
 /// The fee estimation back-end used by the CBF chain source.
 enum FeeSource {
 	/// Derive fee rates from the coinbase reward of recent blocks.
@@ -586,27 +595,64 @@ impl CbfChainSource {
 
 		let res = async {
 			let requester = self.requester()?;
+			let now = Instant::now();
+
+			// Multi-pass recovery loop. On a fresh wallet `get_spks_for_cbf_sync`
+			// only covers indices `0..stop_gap`, so funds at deeper indices are
+			// invisible to a single scan. Each iteration:
+			//   1. asks the wallet for scripts past the previously scanned boundary,
+			//   2. runs a filter scan + apply_update,
+			//   3. lets `Update.last_active_indices` advance BDK's reveal cursor,
+			//   4. loops if the new reveal cursor extends the window past the
+			//      boundary we just scanned.
+			// In steady state this terminates after the very first iteration since
+			// no new revealed indices appear past the existing window. Iterations
+			// after the first scan over the *full* chain history (skip_height = 0)
+			// because the newly added scripts could match historical blocks.
+			let mut prev_window_ends: BTreeMap<KeychainKind, u32> = BTreeMap::new();
+			let mut iter: usize = 0;
+			let mut total_matched_blocks: usize = 0;
+
+			loop {
+				let (scripts, spk_to_keychain_idx, window_ends) =
+					onchain_wallet.get_spks_for_cbf_sync(BDK_CLIENT_STOP_GAP, &prev_window_ends);
+
+				if scripts.is_empty() {
+					if iter == 0 {
+						log_debug!(self.logger, "No wallet scripts to sync via CBF.");
+					}
+					break;
+				}
+
+				// First pass scans incrementally from BDK's checkpoint (cheap delta
+				// sync). Subsequent passes need to scan the full chain because the
+				// newly added scripts could match historical blocks.
+				let skip_height = if iter == 0 {
+					onchain_wallet.latest_checkpoint().height().checked_sub(REORG_SAFETY_BLOCKS)
+				} else {
+					None
+				};
 
-			let (scripts, spk_to_keychain_idx) =
-				onchain_wallet.get_spks_for_cbf_sync(BDK_CLIENT_STOP_GAP);
-			if scripts.is_empty() {
-				log_debug!(self.logger, "No wallet scripts to sync via CBF.");
-			} else {
-				let now = Instant::now();
 				let timeout_fut = tokio::time::timeout(
 					Duration::from_secs(
 						self.sync_config.timeouts_config.onchain_wallet_sync_timeout_secs,
 					),
-					self.sync_onchain_wallet_op(requester, &onchain_wallet, scripts),
+					self.sync_onchain_wallet_op(
+						requester.clone(),
+						scripts,
+						skip_height,
+						/* include_registered_scripts */ iter == 0,
+					),
 				);
 
-				let (tx_update, sync_update) = match timeout_fut.await {
+				let (tx_update, sync_update, matched_count) = match timeout_fut.await {
 					Ok(res) => res?,
 					Err(e) => {
 						log_error!(self.logger, "Sync of on-chain wallet timed out: {}", e);
 						return Err(Error::WalletOperationTimeout);
 					},
 				};
+				total_matched_blocks += matched_count;
 
 				// Build chain checkpoint extending from the wallet's current tip.
 				let mut cp = onchain_wallet.latest_checkpoint();
@@ -622,10 +668,10 @@ impl CbfChainSource {
 					cp = cp.push(tip_block_id).unwrap_or_else(|old| old);
 				}
 
-				// Walk the matched outputs to find the highest derivation index hit per
-				// keychain. Passing these via Update.last_active_indices tells BDK to
-				// advance its reveal cursor, which in turn extends the scan window we
-				// compute in get_spks_for_cbf_sync on the next sync.
+				// Walk the matched outputs to find the highest derivation index hit
+				// per keychain. Passing these via Update.last_active_indices tells
+				// BDK to advance its reveal cursor, which in turn extends the scan
+				// window for the next loop iteration.
 				let mut last_active_indices: BTreeMap<KeychainKind, u32> = BTreeMap::new();
 				for tx in &tx_update.txs {
 					for txout in &tx.output {
@@ -644,10 +690,28 @@ impl CbfChainSource {
 
 				onchain_wallet.apply_update(update)?;
 
+				prev_window_ends = window_ends;
+				iter += 1;
+
+				if iter >= MAX_RECOVERY_LOOP_ITERS {
+					log_info!(
+						self.logger,
+						"CBF on-chain recovery loop hit max iterations ({}); deeper funds will be discovered on subsequent syncs.",
+						MAX_RECOVERY_LOOP_ITERS,
+					);
+					break;
+				}
+			}
+
+			if iter > 0 {
 				log_debug!(
 					self.logger,
-					"Sync of on-chain wallet via CBF finished in {}ms.",
-					now.elapsed().as_millis()
+					"Sync of on-chain wallet via CBF finished in {}ms ({} pass{}, {} matched block{}).",
+					now.elapsed().as_millis(),
+					iter,
+					if iter == 1 { "" } else { "es" },
+					total_matched_blocks,
+					if total_matched_blocks == 1 { "" } else { "s" },
 				);
 			}
 
@@ -670,25 +734,28 @@ impl CbfChainSource {
 	}
 
 	async fn sync_onchain_wallet_op(
-		&self, requester: Requester, onchain_wallet: &Wallet, scripts: Vec<ScriptBuf>,
-	) -> Result<(TxUpdate<ConfirmationBlockTime>, SyncUpdate), Error> {
-		// Derive skip height from BDK's persisted checkpoint, walked back by
-		// REORG_SAFETY_BLOCKS for reorg safety (same approach as bdk-kyoto).
-		// This survives restarts since BDK persists its checkpoint chain.
+		&self, requester: Requester, scripts: Vec<ScriptBuf>, skip_height: Option<u32>,
+		include_registered_scripts: bool,
+	) -> Result<(TxUpdate<ConfirmationBlockTime>, SyncUpdate, usize), Error> {
+		// We optionally include LDK-registered scripts (e.g., channel funding
+		// output scripts) alongside the wallet scripts. This ensures the
+		// on-chain wallet scan also fetches blocks containing channel funding
+		// transactions, whose outputs are needed by BDK's TxGraph to calculate
+		// fees for subsequent spends such as splice transactions. Without
+		// these, BDK's `calculate_fee` would fail with `MissingTxOut` because
+		// the parent transaction's outputs are unknown. This mirrors what the
+		// Bitcoind chain source does in `Wallet::block_connected` by inserting
+		// registered tx outputs.
 		//
-		// We include LDK-registered scripts (e.g., channel funding output
-		// scripts) alongside the wallet scripts. This ensures the on-chain
-		// wallet scan also fetches blocks containing channel funding
-		// transactions, whose outputs are needed by BDK's TxGraph to
-		// calculate fees for subsequent spends such as splice transactions.
-		// Without these, BDK's `calculate_fee` would fail with
-		// `MissingTxOut` because the parent transaction's outputs are
-		// unknown. This mirrors what the Bitcoind chain source does in
-		// `Wallet::block_connected` by inserting registered tx outputs.
+		// `include_registered_scripts` is `false` for the recovery loop's
+		// follow-up passes: those passes only carry the *new* wallet scripts
+		// past the previously scanned window, so re-scanning the full set of
+		// channel scripts would be wasted work — they were already scanned in
+		// the first pass.
 		let mut all_scripts = scripts;
-		all_scripts.extend(self.registered_scripts.lock().unwrap().iter().cloned());
-		let skip_height =
-			onchain_wallet.latest_checkpoint().height().checked_sub(REORG_SAFETY_BLOCKS);
+		if include_registered_scripts {
+			all_scripts.extend(self.registered_scripts.lock().unwrap().iter().cloned());
+		}
 		let (sync_update, matched) = self.run_filter_scan(all_scripts, skip_height).await?;
 
 		log_debug!(
@@ -727,7 +794,8 @@ impl CbfChainSource {
 			}
 		}
 
-		Ok((tx_update, sync_update))
+		let matched_count = matched.len();
+		Ok((tx_update, sync_update, matched_count))
 	}
 
 	/// Sync the Lightning wallet by confirming channel transactions via compact block filters.
diff --git a/src/wallet/mod.rs b/src/wallet/mod.rs
@@ -5,7 +5,7 @@
 // http://opensource.org/licenses/MIT>, at your option. You may not use this file except in
 // accordance with one or both of these licenses.
 
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::future::Future;
 use std::ops::Deref;
 use std::str::FromStr;
@@ -124,35 +124,48 @@ impl Wallet {
 	}
 
 	/// Returns the on-chain scripts CBF should scan for, plus a mapping
-	/// from each script to its `(keychain, derivation index)`.
+	/// from each script to its `(keychain, derivation index)` and the per-keychain
+	/// window end (exclusive) used to compute the script set.
 	///
-	/// For each keychain, the returned set covers indices `0..last_revealed + 1 + stop_gap`,
+	/// For each keychain, the full window covers indices `0..last_revealed + 1 + stop_gap`,
 	/// i.e. all already-revealed scripts plus a `stop_gap`-sized lookahead buffer past the
 	/// last revealed index. This mirrors BDK's internal `KeychainTxOutIndex` lookahead so
 	/// CBF also scans for funds that land at indices just past the current reveal cursor
 	/// (fresh recovery, gap deposits, etc.). On a completely fresh wallet `last_revealed` is
-	/// `None`, so the window is simply `0..stop_gap`.
+	/// `None`, so the full window is simply `0..stop_gap`.
 	///
-	/// The accompanying map lets callers translate a matched output script back to
+	/// `start_indices` lets callers restrict the returned scripts to a tail of the window
+	/// per keychain. Indices strictly less than `start_indices[keychain]` are skipped, so
+	/// callers running a multi-pass recovery loop can scan only the *new* scripts past a
+	/// previously scanned boundary. A keychain absent from the map starts at index 0.
+	///
+	/// The returned `window_ends` map records the exclusive upper bound used for each
+	/// keychain. Callers can feed this back as the next iteration's `start_indices` to
+	/// continue scanning past already-covered indices.
+	///
+	/// The script-to-keychain map lets callers translate a matched output script back to
 	/// `(keychain, index)` so they can populate `Update.last_active_indices` and advance
 	/// BDK's reveal cursor to reflect what was actually observed on-chain.
 	pub(crate) fn get_spks_for_cbf_sync(
-		&self, stop_gap: usize,
-	) -> (Vec<ScriptBuf>, HashMap<ScriptBuf, (KeychainKind, u32)>) {
+		&self, stop_gap: usize, start_indices: &BTreeMap<KeychainKind, u32>,
+	) -> (Vec<ScriptBuf>, HashMap<ScriptBuf, (KeychainKind, u32)>, BTreeMap<KeychainKind, u32>) {
 		let wallet = self.inner.lock().unwrap();
 		let mut scripts = Vec::new();
 		let mut spk_to_keychain_idx: HashMap<ScriptBuf, (KeychainKind, u32)> = HashMap::new();
+		let mut window_ends: BTreeMap<KeychainKind, u32> = BTreeMap::new();
 		for keychain in [KeychainKind::External, KeychainKind::Internal] {
 			let window_end =
 				wallet.spk_index().last_revealed_index(keychain).map(|i| i + 1).unwrap_or(0)
 					+ stop_gap as u32;
-			for idx in 0..window_end {
+			let start = start_indices.get(&keychain).copied().unwrap_or(0);
+			for idx in start..window_end {
 				let spk = wallet.peek_address(keychain, idx).address.script_pubkey();
 				scripts.push(spk.clone());
 				spk_to_keychain_idx.insert(spk, (keychain, idx));
 			}
+			window_ends.insert(keychain, window_end);
 		}
-		(scripts, spk_to_keychain_idx)
+		(scripts, spk_to_keychain_idx, window_ends)
 	}
 
 	pub(crate) fn latest_checkpoint(&self) -> bdk_chain::CheckPoint {
diff --git a/tests/integration_tests_rust.rs b/tests/integration_tests_rust.rs
@@ -3197,6 +3197,81 @@ async fn onchain_wallet_recovery_cbf_advances_reveal_cursor() {
 	recovered_node.stop().unwrap();
 }
 
+/// Regression test: a fresh CBF recovery must discover funds at derivation
+/// indices past the initial `BDK_CLIENT_STOP_GAP` window in a single call to
+/// `sync_wallets()`. Without the convergence loop, the first sync only scans
+/// scripts at indices `0..stop_gap`. Subsequent syncs use a `skip_height`
+/// derived from the just-advanced BDK checkpoint, so once the chain has grown
+/// past the funding block by more than `REORG_SAFETY_BLOCKS`, the historical
+/// block holding the deeper funds is never re-evaluated and the recovery
+/// silently misses the funds.
+#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+async fn onchain_wallet_recovery_cbf_deep_stop_gap() {
+	let (bitcoind, electrsd) = setup_bitcoind_and_electrsd();
+	let chain_source = TestChainSource::Cbf(&bitcoind);
+
+	let original_config = random_config(true);
+	let original_node_entropy = original_config.node_entropy.clone();
+	let original_node = setup_node(&chain_source, original_config);
+
+	// Reveal 40 addresses; fund the ones at indices 19 and 38 so that:
+	//   - idx 19 sits inside the initial `0..20` recovery window,
+	//   - idx 38 sits inside the expanded `0..40` window we get after
+	//     advancing the reveal cursor to 19,
+	// requiring at least two sync iterations to converge.
+	let mut addrs = Vec::with_capacity(40);
+	for _ in 0..40 {
+		addrs.push(original_node.onchain_payment().new_address().unwrap());
+	}
+	let funded_low = addrs[19].clone();
+	let funded_high = addrs[38].clone();
+
+	let premine_amount_sat = 100_000;
+	premine_and_distribute_funds(
+		&bitcoind.client,
+		&electrsd.client,
+		vec![funded_low, funded_high],
+		Amount::from_sat(premine_amount_sat),
+	)
+	.await;
+
+	// Mine extra blocks so the funding block is well past `REORG_SAFETY_BLOCKS`
+	// behind the chain tip. Without this gap a broken recovery could still find
+	// the deeper funds on a follow-up sync because the second-sync `skip_height`
+	// would not yet exclude the funding block.
+	generate_blocks_and_wait(&bitcoind.client, &electrsd.client, 20).await;
+
+	wait_for_cbf_sync(&original_node, || {
+		original_node.list_balances().spendable_onchain_balance_sats == premine_amount_sat * 2
+	})
+	.await;
+	assert_eq!(
+		original_node.list_balances().spendable_onchain_balance_sats,
+		premine_amount_sat * 2
+	);
+
+	original_node.stop().unwrap();
+	drop(original_node);
+
+	// Recover from a completely fresh wallet state, same seed.
+	let mut recovered_config = random_config(true);
+	recovered_config.node_entropy = original_node_entropy;
+	recovered_config.recovery_mode = true;
+	let recovered_node = setup_node(&chain_source, recovered_config);
+
+	wait_for_cbf_sync(&recovered_node, || {
+		recovered_node.list_balances().spendable_onchain_balance_sats == premine_amount_sat * 2
+	})
+	.await;
+	assert_eq!(
+		recovered_node.list_balances().spendable_onchain_balance_sats,
+		premine_amount_sat * 2,
+		"recovery did not find funds beyond the initial CBF stop-gap window"
+	);
+
+	recovered_node.stop().unwrap();
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
 async fn onchain_send_receive_cbf() {
 	let (bitcoind, electrsd) = setup_bitcoind_and_electrsd();