Skip to content

Commit cdb2132

Browse files
committedOct 7, 2021
Persist ChannelMonitors after new blocks are connected
This resolves several user complaints (and issues in the sample node) where startup is substantially delayed as we're always waiting for the chain data to sync. Further, in an upcoming PR, we'll be reloading pending payments from ChannelMonitors on restart, at which point we'll need the change here which avoids handling events until after the user has confirmed the `ChannelMonitor` has been persisted to disk. It will avoid a race where we * send a payment/HTLC (persisting the monitor to disk with the HTLC pending), * force-close the channel, removing the channel entry from the ChannelManager entirely, * persist the ChannelManager, * connect a block which contains a fulfill of the HTLC, generating a claim event, * handle the claim event while the `ChannelMonitor` is being persisted, * persist the ChannelManager (before the CHannelMonitor is persisted fully), * restart, reloading the HTLC as a pending payment in the ChannelManager, which now has no references to it except from the ChannelMonitor which still has the pending HTLC, * replay the block connection, generating a duplicate PaymentSent event.
1 parent e312f48 commit cdb2132

File tree

6 files changed

+127
-53
lines changed

6 files changed

+127
-53
lines changed
 

‎lightning-persister/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,18 @@ impl FilesystemPersister {
159159
}
160160

161161
impl<ChannelSigner: Sign> channelmonitor::Persist<ChannelSigner> for FilesystemPersister {
162+
// TODO: We really need a way for the persister to inform the user that its time to crash/shut
163+
// down once these start returning failure.
164+
// A PermanentFailure implies we need to shut down since we're force-closing channels without
165+
// even broadcasting!
166+
162167
fn persist_new_channel(&self, funding_txo: OutPoint, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr> {
163168
let filename = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
164169
util::write_to_file(self.path_to_monitor_data(), filename, monitor)
165170
.map_err(|_| ChannelMonitorUpdateErr::PermanentFailure)
166171
}
167172

168-
fn update_persisted_channel(&self, funding_txo: OutPoint, _update: &ChannelMonitorUpdate, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr> {
173+
fn update_persisted_channel(&self, funding_txo: OutPoint, _update: &Option<ChannelMonitorUpdate>, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr> {
169174
let filename = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
170175
util::write_to_file(self.path_to_monitor_data(), filename, monitor)
171176
.map_err(|_| ChannelMonitorUpdateErr::PermanentFailure)

‎lightning/src/chain/chainmonitor.rs

Lines changed: 72 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,20 @@ pub struct ChainMonitor<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: De
6060
{
6161
/// The monitors
6262
pub monitors: RwLock<HashMap<OutPoint, ChannelMonitor<ChannelSigner>>>,
63+
/// Beyond the synchronization of `monitors` itself, we cannot handle user events until after
64+
/// any chain updates have been stored on disk. This mutex is used to provide mutual exclusion
65+
/// of event-processing/block-/transaction-connection.
66+
/// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor
67+
/// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping
68+
/// the pending payment entry, and then reloading before the monitor is persisted, resulting in
69+
/// the ChannelManager re-adding the same payment entry, before the same block is replayed,
70+
/// resulting in a duplicate PaymentSent event.
71+
///
72+
/// XXX Describe what this means
73+
/// XXX Figure out if its possible to have update ids here, I think no but it complicates the
74+
/// channel_monitor_updated api a ton to make users track if they have the latest non-id state
75+
/// stored :/
76+
event_mutex: Mutex<HashSet<OutPoint>>,
6377
chain_source: Option<C>,
6478
broadcaster: T,
6579
logger: L,
@@ -89,26 +103,44 @@ where C::Target: chain::Filter,
89103
FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
90104
{
91105
let mut dependent_txdata = Vec::new();
92-
let monitors = self.monitors.read().unwrap();
93-
for monitor in monitors.values() {
94-
let mut txn_outputs = process(monitor, txdata);
106+
{
107+
let monitors = self.monitors.write().unwrap();
108+
for (funding_outpoint, monitor) in monitors.iter() {
109+
let mut txn_outputs;
110+
{
111+
let mut ev_lock = self.event_mutex.lock().unwrap();
112+
txn_outputs = process(monitor, txdata);
113+
log_trace!(self.logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor));
114+
match self.persister.update_persisted_channel(*funding_outpoint, &None, monitor) {
115+
Ok(()) =>
116+
log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
117+
Err(ChannelMonitorUpdateErr::PermanentFailure) => {
118+
self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateFailed(*funding_outpoint));
119+
},
120+
Err(ChannelMonitorUpdateErr::TemporaryFailure) => {
121+
log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
122+
ev_lock.insert(*funding_outpoint);
123+
},
124+
}
125+
}
95126

96-
// Register any new outputs with the chain source for filtering, storing any dependent
97-
// transactions from within the block that previously had not been included in txdata.
98-
if let Some(ref chain_source) = self.chain_source {
99-
let block_hash = header.block_hash();
100-
for (txid, mut outputs) in txn_outputs.drain(..) {
101-
for (idx, output) in outputs.drain(..) {
102-
// Register any new outputs with the chain source for filtering and recurse
103-
// if it indicates that there are dependent transactions within the block
104-
// that had not been previously included in txdata.
105-
let output = WatchedOutput {
106-
block_hash: Some(block_hash),
107-
outpoint: OutPoint { txid, index: idx as u16 },
108-
script_pubkey: output.script_pubkey,
109-
};
110-
if let Some(tx) = chain_source.register_output(output) {
111-
dependent_txdata.push(tx);
127+
// Register any new outputs with the chain source for filtering, storing any dependent
128+
// transactions from within the block that previously had not been included in txdata.
129+
if let Some(ref chain_source) = self.chain_source {
130+
let block_hash = header.block_hash();
131+
for (txid, mut outputs) in txn_outputs.drain(..) {
132+
for (idx, output) in outputs.drain(..) {
133+
// Register any new outputs with the chain source for filtering and recurse
134+
// if it indicates that there are dependent transactions within the block
135+
// that had not been previously included in txdata.
136+
let output = WatchedOutput {
137+
block_hash: Some(block_hash),
138+
outpoint: OutPoint { txid, index: idx as u16 },
139+
script_pubkey: output.script_pubkey,
140+
};
141+
if let Some(tx) = chain_source.register_output(output) {
142+
dependent_txdata.push(tx);
143+
}
112144
}
113145
}
114146
}
@@ -134,6 +166,7 @@ where C::Target: chain::Filter,
134166
pub fn new(chain_source: Option<C>, broadcaster: T, logger: L, feeest: F, persister: P) -> Self {
135167
Self {
136168
monitors: RwLock::new(HashMap::new()),
169+
event_mutex: Mutex::new(HashSet::new()),
137170
chain_source,
138171
broadcaster,
139172
logger,
@@ -189,10 +222,14 @@ where C::Target: chain::Filter,
189222
/// 3) update(s) are applied to each remote copy of a ChannelMonitor,
190223
/// 4) once all remote copies are updated, you call this function with the update_id that
191224
/// completed, and once it is the latest the Channel will be re-enabled.
192-
pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: u64) {
193-
self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateCompleted(MonitorUpdated {
194-
funding_txo, monitor_update_id: highest_applied_update_id
195-
}));
225+
pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: Option<u64>) {
226+
if let Some(monitor_update_id) = highest_applied_update_id {
227+
self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateCompleted(MonitorUpdated {
228+
funding_txo, monitor_update_id
229+
}));
230+
} else {
231+
self.event_mutex.lock().unwrap().remove(&funding_txo);
232+
}
196233
}
197234

198235
#[cfg(any(test, feature = "fuzztarget", feature = "_test_utils"))]
@@ -346,12 +383,17 @@ where C::Target: chain::Filter,
346383
}
347384
// Even if updating the monitor returns an error, the monitor's state will
348385
// still be changed. So, persist the updated monitor despite the error.
349-
let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor);
386+
let persist_res = self.persister.update_persisted_channel(funding_txo, &Some(update), monitor);
350387
if let Err(ref e) = persist_res {
351388
log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e);
352389
}
353390
if update_res.is_err() {
354391
Err(ChannelMonitorUpdateErr::PermanentFailure)
392+
} else if self.user_provided_events.lock().unwrap().contains(&MonitorEvent::UpdateFailed(funding_txo)) {
393+
// If we have a pending UpdateFailed event which hasn't yet been received by
394+
// the ChannelManager, ensure we still fail channel updates for the failed
395+
// channel.
396+
Err(ChannelMonitorUpdateErr::PermanentFailure)
355397
} else {
356398
persist_res
357399
}
@@ -360,6 +402,12 @@ where C::Target: chain::Filter,
360402
}
361403

362404
fn release_pending_monitor_events(&self) -> Vec<MonitorEvent> {
405+
let ev_lock = self.event_mutex.lock().unwrap();
406+
if !ev_lock.is_empty() {
407+
log_error!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!");
408+
return self.user_provided_events.lock().unwrap().split_off(0);
409+
}
410+
363411
let mut pending_monitor_events = self.user_provided_events.lock().unwrap().split_off(0);
364412
for monitor in self.monitors.read().unwrap().values() {
365413
pending_monitor_events.append(&mut monitor.get_and_clear_pending_monitor_events());

‎lightning/src/chain/channelmonitor.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,17 @@ pub enum MonitorEvent {
208208

209209
/// XXX
210210
UpdateCompleted(MonitorUpdated),
211+
212+
/// XXX
213+
UpdateFailed(OutPoint),
211214
}
212215
impl_writeable_tlv_based_enum_upgradable!(MonitorEvent, ;
213216
(0, HTLCEvent),
214-
// Note that UpdateCompleted is currently never serialized to disk as it is generated only in ChainMonitor
217+
// Note that UpdateCompleted and UpdateFailed is currently never serialized to disk as they are
218+
// generated only in ChainMonitor
215219
(1, UpdateCompleted),
216220
(2, CommitmentTxConfirmed),
221+
(3, UpdateFailed),
217222
);
218223

219224
/// Simple structure sent back by `chain::Watch` when an HTLC from a forward channel is detected on
@@ -707,7 +712,17 @@ pub(crate) struct ChannelMonitorImpl<Signer: Sign> {
707712

708713
payment_preimages: HashMap<PaymentHash, PaymentPreimage>,
709714

715+
// Note that MonitorEvents MUST NOT be generated during update processing, only generated
716+
// during chain data processing. This prevents a race in ChainMonitor::update_channel (and
717+
// presumably user implementations thereof as well) where we update the in-memory channel
718+
// object, then before the persistence finishes (as its all under a read-lock), we return
719+
// pending events to the user or to the relevant ChannelManager. This could cause duplicate
720+
// events.
721+
// Note that because the `event_lock` in `ChainMonitor` is only taken in
722+
// block/transaction-connected events and *not* during block/transaction-disconnected events,
723+
// we further MUST NOT generate events during block/transaction-disconnection.
710724
pending_monitor_events: Vec<MonitorEvent>,
725+
711726
pending_events: Vec<Event>,
712727

713728
// Used to track on-chain events (i.e., transactions part of channels confirmed on chain) on
@@ -2947,7 +2962,7 @@ pub trait Persist<ChannelSigner: Sign> {
29472962
/// See [`ChannelMonitor::write`] for writing out a `ChannelMonitor`,
29482963
/// [`ChannelMonitorUpdate::write`] for writing out an update, and
29492964
/// [`ChannelMonitorUpdateErr`] for requirements when returning errors.
2950-
fn update_persisted_channel(&self, id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
2965+
fn update_persisted_channel(&self, id: OutPoint, update: &Option<ChannelMonitorUpdate>, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
29512966
}
29522967

29532968
impl<Signer: Sign, T: Deref, F: Deref, L: Deref> chain::Listen for (ChannelMonitor<Signer>, T, F, L)

0 commit comments

Comments
 (0)