Skip to content

Commit bee42b1

Browse files
committed
Handle async initial ChannelMonitor persistence failing on restart
If the initial ChannelMonitor persistence is done asynchronously but does not complete before the node restarts (with a ChannelManager persistence), we'll start back up with a channel present but no corresponding ChannelMonitor. Because the Channel is pending-monitor-update and has not yet broadcasted its initial funding transaction or sent channel_ready, this is not a violation of our API contract nor a safety violation. However, the previous code would refuse to deserialize the ChannelManager treating it as an API contract violation. The solution is to test for this case explicitly and drop the channel entirely as if the peer disconnected before we received the funding_signed for outbound channels or before sending the channel_ready for inbound channels.
1 parent 7544030 commit bee42b1

File tree

4 files changed

+274
-2
lines changed

4 files changed

+274
-2
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

+218
Original file line numberDiff line numberDiff line change
@@ -2745,3 +2745,221 @@ fn double_temp_error() {
27452745
commitment_signed_dance!(nodes[0], nodes[1], commitment_signed_b2, false);
27462746
expect_payment_sent!(nodes[0], payment_preimage_2);
27472747
}
2748+
2749+
fn do_test_outbound_reload_without_init_mon(use_0conf: bool) {
2750+
// Test that if the monitor update generated in funding_signed is stored async and we restart
2751+
// with the latest ChannelManager but the ChannelMonitor persistence never completed we happily
2752+
// drop the channel and move on.
2753+
let chanmon_cfgs = create_chanmon_cfgs(2);
2754+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
2755+
2756+
let persister: test_utils::TestPersister;
2757+
let new_chain_monitor: test_utils::TestChainMonitor;
2758+
let nodes_0_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
2759+
2760+
let mut chan_config = test_default_channel_config();
2761+
chan_config.manually_accept_inbound_channels = true;
2762+
chan_config.channel_handshake_limits.trust_own_funding_0conf = true;
2763+
2764+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(chan_config), Some(chan_config)]);
2765+
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
2766+
2767+
nodes[0].node.create_channel(nodes[1].node.get_our_node_id(), 100000, 10001, 43, None).unwrap();
2768+
nodes[1].node.handle_open_channel(&nodes[0].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[0], MessageSendEvent::SendOpenChannel, nodes[1].node.get_our_node_id()));
2769+
2770+
let events = nodes[1].node.get_and_clear_pending_events();
2771+
assert_eq!(events.len(), 1);
2772+
match events[0] {
2773+
Event::OpenChannelRequest { temporary_channel_id, .. } => {
2774+
if use_0conf {
2775+
nodes[1].node.accept_inbound_channel_from_trusted_peer_0conf(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
2776+
} else {
2777+
nodes[1].node.accept_inbound_channel(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
2778+
}
2779+
},
2780+
_ => panic!("Unexpected event"),
2781+
};
2782+
2783+
nodes[0].node.handle_accept_channel(&nodes[1].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[1], MessageSendEvent::SendAcceptChannel, nodes[0].node.get_our_node_id()));
2784+
2785+
let (temporary_channel_id, funding_tx, ..) = create_funding_transaction(&nodes[0], &nodes[1].node.get_our_node_id(), 100000, 43);
2786+
2787+
nodes[0].node.funding_transaction_generated(&temporary_channel_id, &nodes[1].node.get_our_node_id(), funding_tx.clone()).unwrap();
2788+
check_added_monitors!(nodes[0], 0);
2789+
2790+
let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
2791+
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
2792+
check_added_monitors!(nodes[1], 1);
2793+
2794+
let bs_signed_locked = nodes[1].node.get_and_clear_pending_msg_events();
2795+
assert_eq!(bs_signed_locked.len(), if use_0conf { 2 } else { 1 });
2796+
match &bs_signed_locked[0] {
2797+
MessageSendEvent::SendFundingSigned { msg, .. } => {
2798+
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
2799+
2800+
nodes[0].node.handle_funding_signed(&nodes[1].node.get_our_node_id(), &msg);
2801+
check_added_monitors!(nodes[0], 1);
2802+
}
2803+
_ => panic!("Unexpected event"),
2804+
}
2805+
if use_0conf {
2806+
match &bs_signed_locked[1] {
2807+
MessageSendEvent::SendChannelReady { msg, .. } => {
2808+
nodes[0].node.handle_channel_ready(&nodes[1].node.get_our_node_id(), &msg);
2809+
}
2810+
_ => panic!("Unexpected event"),
2811+
}
2812+
}
2813+
2814+
assert!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().is_empty());
2815+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
2816+
assert!(nodes[0].node.get_and_clear_pending_events().is_empty());
2817+
2818+
// nodes[0] is now waiting on the first ChannelMonitor persistence to complete in order to
2819+
// broadcast the funding transaction. If nodes[0] restarts at this point with the
2820+
// ChannelMonitor lost, we should simply discard the channel.
2821+
2822+
// The test framework checks that watched_txn/outputs match the monitor set, which they will
2823+
// not, so we have to clear them here.
2824+
nodes[0].chain_source.watched_txn.lock().unwrap().clear();
2825+
nodes[0].chain_source.watched_outputs.lock().unwrap().clear();
2826+
2827+
let nodes_0_serialized = nodes[0].node.encode();
2828+
persister = test_utils::TestPersister::new();
2829+
let keys_manager = &chanmon_cfgs[0].keys_manager;
2830+
new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[0].chain_source), nodes[0].tx_broadcaster.clone(), nodes[0].logger, node_cfgs[0].fee_estimator, &persister, keys_manager);
2831+
nodes[0].chain_monitor = &new_chain_monitor;
2832+
2833+
let mut nodes_0_read = &nodes_0_serialized[..];
2834+
let config = UserConfig::default();
2835+
nodes_0_deserialized = {
2836+
<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_0_read, ChannelManagerReadArgs {
2837+
default_config: config,
2838+
keys_manager,
2839+
fee_estimator: node_cfgs[0].fee_estimator,
2840+
chain_monitor: nodes[0].chain_monitor,
2841+
tx_broadcaster: nodes[0].tx_broadcaster.clone(),
2842+
logger: nodes[0].logger,
2843+
channel_monitors: HashMap::new(),
2844+
}).unwrap().1
2845+
};
2846+
nodes[0].node = &nodes_0_deserialized;
2847+
assert!(nodes_0_read.is_empty());
2848+
2849+
check_closed_event!(nodes[0], 1, ClosureReason::DisconnectedPeer);
2850+
assert!(nodes[0].node.list_channels().is_empty());
2851+
}
2852+
2853+
#[test]
2854+
fn test_outbound_reload_without_init_mon() {
2855+
do_test_outbound_reload_without_init_mon(true);
2856+
do_test_outbound_reload_without_init_mon(false);
2857+
}
2858+
2859+
fn do_test_inbound_reload_without_init_mon(use_0conf: bool, lock_commitment: bool) {
2860+
// Test that if the monitor update generated by funding_transaction_generated is stored async
2861+
// and we restart with the latest ChannelManager but the ChannelMonitor persistence never
2862+
// completed we happily drop the channel and move on.
2863+
let chanmon_cfgs = create_chanmon_cfgs(2);
2864+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
2865+
2866+
let persister: test_utils::TestPersister;
2867+
let new_chain_monitor: test_utils::TestChainMonitor;
2868+
let nodes_1_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
2869+
2870+
let mut chan_config = test_default_channel_config();
2871+
chan_config.manually_accept_inbound_channels = true;
2872+
chan_config.channel_handshake_limits.trust_own_funding_0conf = true;
2873+
2874+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(chan_config), Some(chan_config)]);
2875+
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
2876+
2877+
nodes[0].node.create_channel(nodes[1].node.get_our_node_id(), 100000, 10001, 43, None).unwrap();
2878+
nodes[1].node.handle_open_channel(&nodes[0].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[0], MessageSendEvent::SendOpenChannel, nodes[1].node.get_our_node_id()));
2879+
2880+
let events = nodes[1].node.get_and_clear_pending_events();
2881+
assert_eq!(events.len(), 1);
2882+
match events[0] {
2883+
Event::OpenChannelRequest { temporary_channel_id, .. } => {
2884+
if use_0conf {
2885+
nodes[1].node.accept_inbound_channel_from_trusted_peer_0conf(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
2886+
} else {
2887+
nodes[1].node.accept_inbound_channel(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
2888+
}
2889+
},
2890+
_ => panic!("Unexpected event"),
2891+
};
2892+
2893+
nodes[0].node.handle_accept_channel(&nodes[1].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[1], MessageSendEvent::SendAcceptChannel, nodes[0].node.get_our_node_id()));
2894+
2895+
let (temporary_channel_id, funding_tx, ..) = create_funding_transaction(&nodes[0], &nodes[1].node.get_our_node_id(), 100000, 43);
2896+
2897+
nodes[0].node.funding_transaction_generated(&temporary_channel_id, &nodes[1].node.get_our_node_id(), funding_tx.clone()).unwrap();
2898+
check_added_monitors!(nodes[0], 0);
2899+
2900+
let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
2901+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
2902+
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
2903+
check_added_monitors!(nodes[1], 1);
2904+
2905+
// nodes[1] happily sends its funding_signed even though its awaiting the persistence of the
2906+
// initial ChannelMonitor, but it will decline to send its channel_ready even if the funding
2907+
// transaction is confirmed.
2908+
let funding_signed_msg = get_event_msg!(nodes[1], MessageSendEvent::SendFundingSigned, nodes[0].node.get_our_node_id());
2909+
2910+
nodes[0].node.handle_funding_signed(&nodes[1].node.get_our_node_id(), &funding_signed_msg);
2911+
check_added_monitors!(nodes[0], 1);
2912+
2913+
let as_funding_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
2914+
if lock_commitment {
2915+
confirm_transaction(&nodes[0], &as_funding_tx[0]);
2916+
confirm_transaction(&nodes[1], &as_funding_tx[0]);
2917+
}
2918+
if use_0conf || lock_commitment {
2919+
let as_ready = get_event_msg!(nodes[0], MessageSendEvent::SendChannelReady, nodes[1].node.get_our_node_id());
2920+
nodes[1].node.handle_channel_ready(&nodes[0].node.get_our_node_id(), &as_ready);
2921+
}
2922+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
2923+
2924+
// nodes[1] is now waiting on the first ChannelMonitor persistence to complete in order to
2925+
// move the channel to ready (or is waiting on the funding transaction to confirm). If nodes[1]
2926+
// restarts at this point with the ChannelMonitor lost, we should simply discard the channel.
2927+
2928+
// The test framework checks that watched_txn/outputs match the monitor set, which they will
2929+
// not, so we have to clear them here.
2930+
nodes[1].chain_source.watched_txn.lock().unwrap().clear();
2931+
nodes[1].chain_source.watched_outputs.lock().unwrap().clear();
2932+
2933+
let nodes_1_serialized = nodes[1].node.encode();
2934+
persister = test_utils::TestPersister::new();
2935+
let keys_manager = &chanmon_cfgs[1].keys_manager;
2936+
new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[1].chain_source), nodes[1].tx_broadcaster.clone(), nodes[1].logger, node_cfgs[1].fee_estimator, &persister, keys_manager);
2937+
nodes[1].chain_monitor = &new_chain_monitor;
2938+
2939+
let mut nodes_1_read = &nodes_1_serialized[..];
2940+
let config = UserConfig::default();
2941+
nodes_1_deserialized = {
2942+
<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_1_read, ChannelManagerReadArgs {
2943+
default_config: config,
2944+
keys_manager,
2945+
fee_estimator: node_cfgs[1].fee_estimator,
2946+
chain_monitor: nodes[1].chain_monitor,
2947+
tx_broadcaster: nodes[1].tx_broadcaster.clone(),
2948+
logger: nodes[1].logger,
2949+
channel_monitors: HashMap::new(),
2950+
}).unwrap().1
2951+
};
2952+
nodes[1].node = &nodes_1_deserialized;
2953+
assert!(nodes_1_read.is_empty());
2954+
2955+
check_closed_event!(nodes[1], 1, ClosureReason::DisconnectedPeer);
2956+
assert!(nodes[1].node.list_channels().is_empty());
2957+
}
2958+
2959+
#[test]
2960+
fn test_inbound_reload_without_init_mon() {
2961+
do_test_inbound_reload_without_init_mon(true, true);
2962+
do_test_inbound_reload_without_init_mon(true, false);
2963+
do_test_inbound_reload_without_init_mon(false, true);
2964+
do_test_inbound_reload_without_init_mon(false, false);
2965+
}

lightning/src/ln/channel.rs

+36
Original file line numberDiff line numberDiff line change
@@ -4788,6 +4788,42 @@ impl<Signer: Sign> Channel<Signer> {
47884788
self.channel_state >= ChannelState::FundingSent as u32
47894789
}
47904790

4791+
/// Returns true if the channel is awaiting the persistence of the initial ChannelMonitor.
4792+
/// If the channel is outbound, this implies we have not yet broadcasted the funding
4793+
/// transaction. If the channel is inbound, this implies simply that the channel has not
4794+
/// advanced state.
4795+
pub fn is_awaiting_initial_mon_persist(&self) -> bool {
4796+
if !self.is_awaiting_monitor_update() { return false; }
4797+
if self.channel_state &
4798+
!(ChannelState::TheirChannelReady as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)
4799+
== ChannelState::FundingSent as u32 {
4800+
// If we're not a 0conf channel, we'll be waiting on a monitor update with only
4801+
// FundingSent set, though our peer could have sent their channel_ready.
4802+
debug_assert!(self.minimum_depth.unwrap_or(1) > 0);
4803+
return true;
4804+
}
4805+
if self.cur_holder_commitment_transaction_number == INITIAL_COMMITMENT_NUMBER - 1 &&
4806+
self.cur_counterparty_commitment_transaction_number == INITIAL_COMMITMENT_NUMBER - 1 {
4807+
// If we're a 0-conf channel, we'll move beyond FundingSent immediately even while
4808+
// waiting for the initial monitor persistence. Thus, we check if our commitment
4809+
// transaction numbers have both been iterated only exactly once (for the
4810+
// funding_signed), and we're awaiting monitor update.
4811+
//
4812+
// If we got here, we shouldn't have yet broadcasted the funding transaction (as the
4813+
// only way to get an awaiting-monitor-update state during initial funding is if the
4814+
// initial monitor persistence is still pending).
4815+
//
4816+
// Because deciding we're awaiting initial broadcast spuriously could result in
4817+
// funds-loss (as we don't have a monitor, but have the funding transaction confirmed),
4818+
// we hard-assert here, even in production builds.
4819+
if self.is_outbound() { assert!(self.funding_transaction.is_some()); }
4820+
assert!(self.monitor_pending_channel_ready);
4821+
assert_eq!(self.latest_monitor_update_id, 0);
4822+
return true;
4823+
}
4824+
false
4825+
}
4826+
47914827
/// Returns true if our channel_ready has been sent
47924828
pub fn is_our_channel_ready(&self) -> bool {
47934829
(self.channel_state & ChannelState::OurChannelReady as u32) != 0 || self.channel_state >= ChannelState::ChannelFunded as u32

lightning/src/ln/channelmanager.rs

+10
Original file line numberDiff line numberDiff line change
@@ -6921,6 +6921,16 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
69216921
}
69226922
by_id.insert(channel.channel_id(), channel);
69236923
}
6924+
} else if channel.is_awaiting_initial_mon_persist() {
6925+
// If we were persisted and shut down while the initial ChannelMonitor persistence
6926+
// was in-progress, we never broadcasted the funding transaction and can still
6927+
// safely discard the channel.
6928+
let _ = channel.force_shutdown(false);
6929+
channel_closures.push(events::Event::ChannelClosed {
6930+
channel_id: channel.channel_id(),
6931+
user_channel_id: channel.get_user_id(),
6932+
reason: ClosureReason::DisconnectedPeer,
6933+
});
69246934
} else {
69256935
log_error!(args.logger, "Missing ChannelMonitor for channel {} needed by ChannelManager.", log_bytes!(channel.channel_id()));
69266936
log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");

lightning/src/util/events.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,19 @@ pub enum ClosureReason {
111111
/// The peer disconnected prior to funding completing. In this case the spec mandates that we
112112
/// forget the channel entirely - we can attempt again if the peer reconnects.
113113
///
114+
/// This includes cases where we restarted prior to funding completion, including prior to the
115+
/// initial [`ChannelMonitor`] persistence completing.
116+
///
114117
/// In LDK versions prior to 0.0.107 this could also occur if we were unable to connect to the
115118
/// peer because of mutual incompatibility between us and our channel counterparty.
119+
///
120+
/// [`ChannelMonitor`]: crate::chain::channelmonitor::ChannelMonitor
116121
DisconnectedPeer,
117-
/// Closure generated from `ChannelManager::read` if the ChannelMonitor is newer than
118-
/// the ChannelManager deserialized.
122+
/// Closure generated from `ChannelManager::read` if the [`ChannelMonitor`] is newer than
123+
/// the [`ChannelManager`] deserialized.
124+
///
125+
/// [`ChannelMonitor`]: crate::chain::channelmonitor::ChannelMonitor
126+
/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
119127
OutdatedChannelManager
120128
}
121129

0 commit comments

Comments
 (0)