Skip to content

Commit 6930ccd

Browse files
Support persistent monitor events
Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. Here we complete work that was built on recent prior commits and actually start re-providing monitor events on startup if they went un-acked during runtime. This isn't actually supported in prod yet, so this new code will run randomly in tests, to ensure we still support the old paths.
1 parent 42b7085 commit 6930ccd

File tree

5 files changed

+75
-8
lines changed

5 files changed

+75
-8
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,11 @@ pub(crate) struct ChannelMonitorImpl<Signer: EcdsaChannelSigner> {
12901290
// block/transaction-connected events and *not* during block/transaction-disconnected events,
12911291
// we further MUST NOT generate events during block/transaction-disconnection.
12921292
pending_monitor_events: Vec<(u64, MonitorEvent)>,
1293+
// `MonitorEvent`s that have been provided to the `ChannelManager` via
1294+
// [`ChannelMonitor::get_and_clear_pending_monitor_events`] and are awaiting
1295+
// [`ChannelMonitor::ack_monitor_event`] for removal. If an event in this queue is not ack'd, it
1296+
// will be re-provided to the `ChannelManager` on startup.
1297+
provided_monitor_events: Vec<(u64, MonitorEvent)>,
12931298
/// When set, monitor events are retained until explicitly acked rather than cleared on read.
12941299
persistent_events_enabled: bool,
12951300
next_monitor_event_id: u64,
@@ -1766,7 +1771,12 @@ pub(crate) fn write_chanmon_internal<Signer: EcdsaChannelSigner, W: Writer>(
17661771
// Only write `persistent_events_enabled` if it's set to true, as it's an even TLV.
17671772
let persistent_events_enabled = channel_monitor.persistent_events_enabled.then_some(true);
17681773
let pending_mon_evs_with_ids = if persistent_events_enabled.is_some() {
1769-
Some(IterableOwned(channel_monitor.pending_monitor_events.iter()))
1774+
Some(IterableOwned(
1775+
channel_monitor
1776+
.provided_monitor_events
1777+
.iter()
1778+
.chain(channel_monitor.pending_monitor_events.iter()),
1779+
))
17701780
} else {
17711781
None
17721782
};
@@ -1979,6 +1989,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
19791989

19801990
payment_preimages: new_hash_map(),
19811991
pending_monitor_events: Vec::new(),
1992+
provided_monitor_events: Vec::new(),
19821993
persistent_events_enabled: false,
19831994
next_monitor_event_id: 0,
19841995
pending_events: Vec::new(),
@@ -2204,19 +2215,31 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
22042215

22052216
/// Removes a [`MonitorEvent`] by its event ID, acknowledging that it has been processed.
22062217
pub(super) fn ack_monitor_event(&self, event_id: u64) {
2207-
self.inner.lock().unwrap().pending_monitor_events.retain(|(id, _)| *id != event_id);
2218+
let inner = &mut *self.inner.lock().unwrap();
2219+
inner.provided_monitor_events.retain(|(id, _)| *id != event_id);
2220+
}
2221+
2222+
/// Enables persistent monitor events mode. When enabled, monitor events are retained until
2223+
/// explicitly acked rather than cleared on read.
2224+
pub(crate) fn set_persistent_events_enabled(&self, enabled: bool) {
2225+
self.inner.lock().unwrap().persistent_events_enabled = enabled;
22082226
}
22092227

22102228
/// Copies [`MonitorEvent`] state from `other` into `self`.
22112229
/// Used in tests to align transient runtime state before equality comparison after a
22122230
/// serialization round-trip.
22132231
#[cfg(any(test, feature = "_test_utils"))]
22142232
pub fn copy_monitor_event_state(&self, other: &ChannelMonitor<Signer>) {
2215-
let (pending, next_id) = {
2233+
let (provided, pending, next_id) = {
22162234
let other_inner = other.inner.lock().unwrap();
2217-
(other_inner.pending_monitor_events.clone(), other_inner.next_monitor_event_id)
2235+
(
2236+
other_inner.provided_monitor_events.clone(),
2237+
other_inner.pending_monitor_events.clone(),
2238+
other_inner.next_monitor_event_id,
2239+
)
22182240
};
22192241
let mut self_inner = self.inner.lock().unwrap();
2242+
self_inner.provided_monitor_events = provided;
22202243
self_inner.pending_monitor_events = pending;
22212244
self_inner.next_monitor_event_id = next_id;
22222245
}
@@ -4445,9 +4468,16 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
44454468
}
44464469

44474470
fn get_and_clear_pending_monitor_events(&mut self) -> Vec<(u64, MonitorEvent)> {
4448-
let mut ret = Vec::new();
4449-
mem::swap(&mut ret, &mut self.pending_monitor_events);
4450-
ret
4471+
if self.persistent_events_enabled {
4472+
let mut ret = Vec::new();
4473+
mem::swap(&mut ret, &mut self.pending_monitor_events);
4474+
self.provided_monitor_events.extend(ret.iter().cloned());
4475+
ret
4476+
} else {
4477+
let mut ret = Vec::new();
4478+
mem::swap(&mut ret, &mut self.pending_monitor_events);
4479+
ret
4480+
}
44514481
}
44524482

44534483
/// Gets the set of events that are repeated regularly (e.g. those which RBF bump
@@ -6797,6 +6827,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP
67976827

67986828
payment_preimages,
67996829
pending_monitor_events,
6830+
provided_monitor_events: Vec::new(),
68006831
persistent_events_enabled,
68016832
next_monitor_event_id,
68026833
pending_events,

lightning/src/ln/channelmanager.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3632,6 +3632,8 @@ impl<
36323632
our_network_pubkey, current_timestamp, expanded_inbound_key,
36333633
node_signer.get_receive_auth_key(), secp_ctx.clone(), message_router, logger.clone(),
36343634
);
3635+
#[cfg(test)]
3636+
let override_persistent_monitor_events = config.override_persistent_monitor_events;
36353637

36363638
ChannelManager {
36373639
config: RwLock::new(config),
@@ -3688,7 +3690,27 @@ impl<
36883690

36893691
logger,
36903692

3691-
persistent_monitor_events: false,
3693+
persistent_monitor_events: {
3694+
#[cfg(not(test))]
3695+
{ false }
3696+
#[cfg(test)]
3697+
{
3698+
override_persistent_monitor_events.unwrap_or_else(|| {
3699+
use core::hash::{BuildHasher, Hasher};
3700+
match std::env::var("LDK_TEST_PERSISTENT_MON_EVENTS") {
3701+
Ok(val) => match val.as_str() {
3702+
"1" => true,
3703+
"0" => false,
3704+
_ => panic!("LDK_TEST_PERSISTENT_MON_EVENTS must be 0 or 1, got: {}", val),
3705+
},
3706+
Err(_) => {
3707+
let rand_val = std::collections::hash_map::RandomState::new().build_hasher().finish();
3708+
rand_val % 2 == 0
3709+
},
3710+
}
3711+
})
3712+
}
3713+
},
36923714

36933715
#[cfg(feature = "_test_utils")]
36943716
testing_dnssec_proof_offer_resolution_override: Mutex::new(new_hash_map()),
@@ -11578,6 +11600,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1157811600
fail_chan!("Already had channel with the new channel_id");
1157911601
},
1158011602
hash_map::Entry::Vacant(e) => {
11603+
monitor.set_persistent_events_enabled(self.persistent_monitor_events);
1158111604
let monitor_res = self.chain_monitor.watch_channel(monitor.channel_id(), monitor);
1158211605
if let Ok(persist_state) = monitor_res {
1158311606
// There's no problem signing a counterparty's funding transaction if our monitor
@@ -11748,6 +11771,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1174811771
match chan
1174911772
.funding_signed(&msg, best_block, &self.signer_provider, &self.logger)
1175011773
.and_then(|(funded_chan, monitor)| {
11774+
monitor.set_persistent_events_enabled(self.persistent_monitor_events);
1175111775
self.chain_monitor
1175211776
.watch_channel(funded_chan.context.channel_id(), monitor)
1175311777
.map_err(|()| {
@@ -12663,6 +12687,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1266312687

1266412688
if let Some(chan) = chan.as_funded_mut() {
1266512689
if let Some(monitor) = monitor_opt {
12690+
monitor.set_persistent_events_enabled(self.persistent_monitor_events);
1266612691
let monitor_res =
1266712692
self.chain_monitor.watch_channel(monitor.channel_id(), monitor);
1266812693
if let Ok(persist_state) = monitor_res {

lightning/src/ln/monitor_tests.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3591,6 +3591,9 @@ fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: b
35913591
let mut cfg = test_default_channel_config();
35923592
cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true;
35933593
cfg.channel_handshake_config.negotiate_anchor_zero_fee_commitments = p2a_anchor;
3594+
// This test specifically tests lost monitor events, which requires the legacy
3595+
// (non-persistent) monitor event behavior.
3596+
cfg.override_persistent_monitor_events = Some(false);
35943597
let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())];
35953598

35963599
let chanmon_cfgs = create_chanmon_cfgs(3);

lightning/src/util/config.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,10 @@ pub struct UserConfig {
11031103
///
11041104
/// [`ChannelManager::splice_channel`]: crate::ln::channelmanager::ChannelManager::splice_channel
11051105
pub reject_inbound_splices: bool,
1106+
/// If set to `Some`, overrides the random selection of whether to use persistent monitor
1107+
/// events. Only available in tests.
1108+
#[cfg(test)]
1109+
pub override_persistent_monitor_events: Option<bool>,
11061110
}
11071111

11081112
impl Default for UserConfig {
@@ -1119,6 +1123,8 @@ impl Default for UserConfig {
11191123
enable_htlc_hold: false,
11201124
hold_outbound_htlcs_at_next_hop: false,
11211125
reject_inbound_splices: true,
1126+
#[cfg(test)]
1127+
override_persistent_monitor_events: None,
11221128
}
11231129
}
11241130
}

lightning/src/util/test_utils.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,8 @@ impl<'a> chain::Watch<TestChannelSigner> for TestChainMonitor<'a> {
663663
)
664664
.unwrap()
665665
.1;
666+
// The deserialized monitor will reset the monitor event state, so copy it from the live
667+
// monitor before comparing.
666668
new_monitor.copy_monitor_event_state(&monitor);
667669
if let Some(chan_id) = self.expect_monitor_round_trip_fail.lock().unwrap().take() {
668670
assert_eq!(chan_id, channel_id);

0 commit comments

Comments
 (0)