From 21f829bce8f8b5ca82445f96f2719c52170356d7 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 12 May 2025 00:37:05 +0000 Subject: [PATCH] Do not fail to load `ChannelManager` when we see claiming payments When we begin claiming a payment, we move the tracking of it from `claimable_payments` to `claiming_payments`. This ensures we only ever have one payment which is in the process of being claimed with a given payment hash at a time and lets us keep track of when all parts have been claimed with their `ChannelMonitor`s. However, on startup, we check that failing to move a payment from `claimable_payments` to `claiming_payments` implies that it is not present in `claiming_payments`. This is fine if the payment doesn't exist, but if the payment has already started being claimed, this will fail and we'll refuse to deserialize the `ChannelManager` (with a `debug_assert` failure in debug mode). Here we resolve this by checking if a payment is already being claimed before we attempt to initiate claiming and skip the failing check in that case. --- lightning/src/ln/channelmanager.rs | 16 ++++++++++++++++ lightning/src/ln/reload_tests.rs | 23 ++++++++++++++++------- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 3bf672f7e13..170d8261d5a 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -14835,6 +14835,22 @@ where if payment_claim.mpp_parts.is_empty() { return Err(DecodeError::InvalidValue); } + { + let payments = channel_manager.claimable_payments.lock().unwrap(); + if !payments.claimable_payments.contains_key(&payment_hash) { + if let Some(payment) = payments.pending_claiming_payments.get(&payment_hash) { + if payment.payment_id == payment_claim.claiming_payment.payment_id { + // If this payment already exists and was marked as + // being-claimed then the serialized state must contain all + // of the pending `ChannelMonitorUpdate`s required to get + // the preimage on disk in all MPP parts. Thus we can skip + // the replay below. + continue; + } + } + } + } + let mut channels_without_preimage = payment_claim.mpp_parts.iter() .map(|htlc_info| (htlc_info.counterparty_node_id, htlc_info.funding_txo, htlc_info.channel_id)) .collect::>(); diff --git a/lightning/src/ln/reload_tests.rs b/lightning/src/ln/reload_tests.rs index 78243d8db39..bad430ab287 100644 --- a/lightning/src/ln/reload_tests.rs +++ b/lightning/src/ln/reload_tests.rs @@ -783,7 +783,7 @@ fn test_forwardable_regen() { claim_payment(&nodes[0], &[&nodes[1], &nodes[2]], payment_preimage_2); } -fn do_test_partial_claim_before_restart(persist_both_monitors: bool) { +fn do_test_partial_claim_before_restart(persist_both_monitors: bool, double_restart: bool) { // Test what happens if a node receives an MPP payment, claims it, but crashes before // persisting the ChannelManager. If `persist_both_monitors` is false, also crash after only // updating one of the two channels' ChannelMonitors. As a result, on startup, we'll (a) still @@ -799,11 +799,11 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool) { // definitely claimed. let chanmon_cfgs = create_chanmon_cfgs(4); let node_cfgs = create_node_cfgs(4, &chanmon_cfgs); - let persister; - let new_chain_monitor; + let (persist_d_1, persist_d_2); + let (chain_d_1, chain_d_2); let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]); - let nodes_3_deserialized; + let (node_d_1, node_d_2); let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs); @@ -878,7 +878,14 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool) { } // Now restart nodes[3]. - reload_node!(nodes[3], original_manager, &[&updated_monitor.0, &original_monitor.0], persister, new_chain_monitor, nodes_3_deserialized); + reload_node!(nodes[3], original_manager.clone(), &[&updated_monitor.0, &original_monitor.0], persist_d_1, chain_d_1, node_d_1); + + if double_restart { + // Previously, we had a bug where we'd fail to reload if we re-persist the `ChannelManager` + // without updating any `ChannelMonitor`s as we'd fail to double-initiate the claim replay. + // We test that here ensuring that we can reload again. + reload_node!(nodes[3], node_d_1.encode(), &[&updated_monitor.0, &original_monitor.0], persist_d_2, chain_d_2, node_d_2); + } // Until the startup background events are processed (in `get_and_clear_pending_events`, // below), the preimage is not copied to the non-persisted monitor... @@ -973,8 +980,10 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool) { #[test] fn test_partial_claim_before_restart() { - do_test_partial_claim_before_restart(false); - do_test_partial_claim_before_restart(true); + do_test_partial_claim_before_restart(false, false); + do_test_partial_claim_before_restart(false, true); + do_test_partial_claim_before_restart(true, false); + do_test_partial_claim_before_restart(true, true); } fn do_forwarded_payment_no_manager_persistence(use_cs_commitment: bool, claim_htlc: bool, use_intercept: bool) {