From 21f829bce8f8b5ca82445f96f2719c52170356d7 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Mon, 12 May 2025 00:37:05 +0000
Subject: [PATCH] Do not fail to load `ChannelManager` when we see claiming
 payments

When we begin claiming a payment, we move the tracking of it from
`claimable_payments` to `claiming_payments`. This ensures we only
ever have one payment which is in the process of being claimed with
a given payment hash at a time and lets us keep track of when all
parts have been claimed with their `ChannelMonitor`s.

However, on startup, we check that failing to move a payment from
`claimable_payments` to `claiming_payments` implies that it is not
present in `claiming_payments`. This is fine if the payment doesn't
exist, but if the payment has already started being claimed, this
will fail and we'll refuse to deserialize the `ChannelManager`
(with a `debug_assert` failure in debug mode).

Here we resolve this by checking if a payment is already being
claimed before we attempt to initiate claiming and skip the failing
check in that case.
---
 lightning/src/ln/channelmanager.rs | 16 ++++++++++++++++
 lightning/src/ln/reload_tests.rs   | 23 ++++++++++++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 3bf672f7e13..170d8261d5a 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -14835,6 +14835,22 @@ where
 						if payment_claim.mpp_parts.is_empty() {
 							return Err(DecodeError::InvalidValue);
 						}
+						{
+							let payments = channel_manager.claimable_payments.lock().unwrap();
+							if !payments.claimable_payments.contains_key(&payment_hash) {
+								if let Some(payment) = payments.pending_claiming_payments.get(&payment_hash) {
+									if payment.payment_id == payment_claim.claiming_payment.payment_id {
+										// If this payment already exists and was marked as
+										// being-claimed then the serialized state must contain all
+										// of the pending `ChannelMonitorUpdate`s required to get
+										// the preimage on disk in all MPP parts. Thus we can skip
+										// the replay below.
+										continue;
+									}
+								}
+							}
+						}
+
 						let mut channels_without_preimage = payment_claim.mpp_parts.iter()
 							.map(|htlc_info| (htlc_info.counterparty_node_id, htlc_info.funding_txo, htlc_info.channel_id))
 							.collect::<Vec<_>>();
diff --git a/lightning/src/ln/reload_tests.rs b/lightning/src/ln/reload_tests.rs
index 78243d8db39..bad430ab287 100644
--- a/lightning/src/ln/reload_tests.rs
+++ b/lightning/src/ln/reload_tests.rs
@@ -783,7 +783,7 @@ fn test_forwardable_regen() {
 	claim_payment(&nodes[0], &[&nodes[1], &nodes[2]], payment_preimage_2);
 }
 
-fn do_test_partial_claim_before_restart(persist_both_monitors: bool) {
+fn do_test_partial_claim_before_restart(persist_both_monitors: bool, double_restart: bool) {
 	// Test what happens if a node receives an MPP payment, claims it, but crashes before
 	// persisting the ChannelManager. If `persist_both_monitors` is false, also crash after only
 	// updating one of the two channels' ChannelMonitors. As a result, on startup, we'll (a) still
@@ -799,11 +799,11 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool) {
 	// definitely claimed.
 	let chanmon_cfgs = create_chanmon_cfgs(4);
 	let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
-	let persister;
-	let new_chain_monitor;
+	let (persist_d_1, persist_d_2);
+	let (chain_d_1, chain_d_2);
 
 	let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
-	let nodes_3_deserialized;
+	let (node_d_1, node_d_2);
 
 	let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs);
 
@@ -878,7 +878,14 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool) {
 	}
 
 	// Now restart nodes[3].
-	reload_node!(nodes[3], original_manager, &[&updated_monitor.0, &original_monitor.0], persister, new_chain_monitor, nodes_3_deserialized);
+	reload_node!(nodes[3], original_manager.clone(), &[&updated_monitor.0, &original_monitor.0], persist_d_1, chain_d_1, node_d_1);
+
+	if double_restart {
+		// Previously, we had a bug where we'd fail to reload if we re-persist the `ChannelManager`
+		// without updating any `ChannelMonitor`s as we'd fail to double-initiate the claim replay.
+		// We test that here ensuring that we can reload again.
+		reload_node!(nodes[3], node_d_1.encode(), &[&updated_monitor.0, &original_monitor.0], persist_d_2, chain_d_2, node_d_2);
+	}
 
 	// Until the startup background events are processed (in `get_and_clear_pending_events`,
 	// below), the preimage is not copied to the non-persisted monitor...
@@ -973,8 +980,10 @@ fn do_test_partial_claim_before_restart(persist_both_monitors: bool) {
 
 #[test]
 fn test_partial_claim_before_restart() {
-	do_test_partial_claim_before_restart(false);
-	do_test_partial_claim_before_restart(true);
+	do_test_partial_claim_before_restart(false, false);
+	do_test_partial_claim_before_restart(false, true);
+	do_test_partial_claim_before_restart(true, false);
+	do_test_partial_claim_before_restart(true, true);
 }
 
 fn do_forwarded_payment_no_manager_persistence(use_cs_commitment: bool, claim_htlc: bool, use_intercept: bool) {