lightningdevkit · Oct 7, 2021
diff --git a/‎lightning-persister/src/lib.rs
Lines changed: 6 additions & 1 deletion b/‎lightning-persister/src/lib.rs
Lines changed: 6 additions & 1 deletion
diff --git a/‎lightning/src/chain/chainmonitor.rs
Lines changed: 72 additions & 24 deletions b/‎lightning/src/chain/chainmonitor.rs
Lines changed: 72 additions & 24 deletions
diff --git a/‎lightning/src/chain/channelmonitor.rs
Lines changed: 17 additions & 2 deletions b/‎lightning/src/chain/channelmonitor.rs
Lines changed: 17 additions & 2 deletions
@@ -159,13 +159,18 @@ impl FilesystemPersister {
 }
 
 impl<ChannelSigner: Sign> channelmonitor::Persist<ChannelSigner> for FilesystemPersister {
+	// TODO: We really need a way for the persister to inform the user that its time to crash/shut
+	// down once these start returning failure.
+	// A PermanentFailure implies we need to shut down since we're force-closing channels without
+	// even broadcasting!
+
 	fn persist_new_channel(&self, funding_txo: OutPoint, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr> {
 		let filename = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
 		util::write_to_file(self.path_to_monitor_data(), filename, monitor)
 			.map_err(|_| ChannelMonitorUpdateErr::PermanentFailure)
 	}
 
-	fn update_persisted_channel(&self, funding_txo: OutPoint, _update: &ChannelMonitorUpdate, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr> {
+	fn update_persisted_channel(&self, funding_txo: OutPoint, _update: &Option<ChannelMonitorUpdate>, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr> {
 		let filename = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
 		util::write_to_file(self.path_to_monitor_data(), filename, monitor)
 			.map_err(|_| ChannelMonitorUpdateErr::PermanentFailure)
 
@@ -60,6 +60,20 @@ pub struct ChainMonitor<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: De
 {
 	/// The monitors
 	pub monitors: RwLock<HashMap<OutPoint, ChannelMonitor<ChannelSigner>>>,
+	/// Beyond the synchronization of `monitors` itself, we cannot handle user events until after
+	/// any chain updates have been stored on disk. This mutex is used to provide mutual exclusion
+	/// of event-processing/block-/transaction-connection.
+	/// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor
+	/// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping
+	/// the pending payment entry, and then reloading before the monitor is persisted, resulting in
+	/// the ChannelManager re-adding the same payment entry, before the same block is replayed,
+	/// resulting in a duplicate PaymentSent event.
+	///
+	/// XXX Describe what this means
+	/// XXX Figure out if its possible to have update ids here, I think no but it complicates the
+	/// channel_monitor_updated api a ton to make users track if they have the latest non-id state
+	/// stored :/
+	event_mutex: Mutex<HashSet<OutPoint>>,
 	chain_source: Option<C>,
 	broadcaster: T,
 	logger: L,
@@ -89,26 +103,44 @@ where C::Target: chain::Filter,
 		FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
 	{
 		let mut dependent_txdata = Vec::new();
-		let monitors = self.monitors.read().unwrap();
-		for monitor in monitors.values() {
-			let mut txn_outputs = process(monitor, txdata);
+		{
+			let monitors = self.monitors.write().unwrap();
+			for (funding_outpoint, monitor) in monitors.iter() {
+				let mut txn_outputs;
+				{
+					let mut ev_lock = self.event_mutex.lock().unwrap();
+					txn_outputs = process(monitor, txdata);
+					log_trace!(self.logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor));
+					match self.persister.update_persisted_channel(*funding_outpoint, &None, monitor) {
+						Ok(()) =>
+							log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
+						Err(ChannelMonitorUpdateErr::PermanentFailure) => {
+							self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateFailed(*funding_outpoint));
+						},
+						Err(ChannelMonitorUpdateErr::TemporaryFailure) => {
+							log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
+							ev_lock.insert(*funding_outpoint);
+						},
+					}
+				}
 
-			// Register any new outputs with the chain source for filtering, storing any dependent
-			// transactions from within the block that previously had not been included in txdata.
-			if let Some(ref chain_source) = self.chain_source {
-				let block_hash = header.block_hash();
-				for (txid, mut outputs) in txn_outputs.drain(..) {
-					for (idx, output) in outputs.drain(..) {
-						// Register any new outputs with the chain source for filtering and recurse
-						// if it indicates that there are dependent transactions within the block
-						// that had not been previously included in txdata.
-						let output = WatchedOutput {
-							block_hash: Some(block_hash),
-							outpoint: OutPoint { txid, index: idx as u16 },
-							script_pubkey: output.script_pubkey,
-						};
-						if let Some(tx) = chain_source.register_output(output) {
-							dependent_txdata.push(tx);
+				// Register any new outputs with the chain source for filtering, storing any dependent
+				// transactions from within the block that previously had not been included in txdata.
+				if let Some(ref chain_source) = self.chain_source {
+					let block_hash = header.block_hash();
+					for (txid, mut outputs) in txn_outputs.drain(..) {
+						for (idx, output) in outputs.drain(..) {
+							// Register any new outputs with the chain source for filtering and recurse
+							// if it indicates that there are dependent transactions within the block
+							// that had not been previously included in txdata.
+							let output = WatchedOutput {
+								block_hash: Some(block_hash),
+								outpoint: OutPoint { txid, index: idx as u16 },
+								script_pubkey: output.script_pubkey,
+							};
+							if let Some(tx) = chain_source.register_output(output) {
+								dependent_txdata.push(tx);
+							}
 						}
 					}
 				}
@@ -134,6 +166,7 @@ where C::Target: chain::Filter,
 	pub fn new(chain_source: Option<C>, broadcaster: T, logger: L, feeest: F, persister: P) -> Self {
 		Self {
 			monitors: RwLock::new(HashMap::new()),
+			event_mutex: Mutex::new(HashSet::new()),
 			chain_source,
 			broadcaster,
 			logger,
@@ -189,10 +222,14 @@ where C::Target: chain::Filter,
 	///  3) update(s) are applied to each remote copy of a ChannelMonitor,
 	///  4) once all remote copies are updated, you call this function with the update_id that
 	///     completed, and once it is the latest the Channel will be re-enabled.
-	pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: u64) {
-		self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateCompleted(MonitorUpdated {
-			funding_txo, monitor_update_id: highest_applied_update_id
-		}));
+	pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: Option<u64>) {
+		if let Some(monitor_update_id) = highest_applied_update_id {
+			self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateCompleted(MonitorUpdated {
+				funding_txo, monitor_update_id
+			}));
+		} else {
+			self.event_mutex.lock().unwrap().remove(&funding_txo);
+		}
 	}
 
 	#[cfg(any(test, feature = "fuzztarget", feature = "_test_utils"))]
@@ -346,12 +383,17 @@ where C::Target: chain::Filter,
 				}
 				// Even if updating the monitor returns an error, the monitor's state will
 				// still be changed. So, persist the updated monitor despite the error.
-				let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor);
+				let persist_res = self.persister.update_persisted_channel(funding_txo, &Some(update), monitor);
 				if let Err(ref e) = persist_res {
 					log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e);
 				}
 				if update_res.is_err() {
 					Err(ChannelMonitorUpdateErr::PermanentFailure)
+				} else if self.user_provided_events.lock().unwrap().contains(&MonitorEvent::UpdateFailed(funding_txo)) {
+					// If we have a pending UpdateFailed event which hasn't yet been received by
+					// the ChannelManager, ensure we still fail channel updates for the failed
+					// channel.
+					Err(ChannelMonitorUpdateErr::PermanentFailure)
 				} else {
 					persist_res
 				}
@@ -360,6 +402,12 @@ where C::Target: chain::Filter,
 	}
 
 	fn release_pending_monitor_events(&self) -> Vec<MonitorEvent> {
+		let ev_lock = self.event_mutex.lock().unwrap();
+		if !ev_lock.is_empty() {
+			log_error!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!");
+			return self.user_provided_events.lock().unwrap().split_off(0);
+		}
+
 		let mut pending_monitor_events = self.user_provided_events.lock().unwrap().split_off(0);
 		for monitor in self.monitors.read().unwrap().values() {
 			pending_monitor_events.append(&mut monitor.get_and_clear_pending_monitor_events());
 
@@ -208,12 +208,17 @@ pub enum MonitorEvent {
 
 	/// XXX
 	UpdateCompleted(MonitorUpdated),
+
+	/// XXX
+	UpdateFailed(OutPoint),
 }
 impl_writeable_tlv_based_enum_upgradable!(MonitorEvent, ;
 	(0, HTLCEvent),
-	// Note that UpdateCompleted is currently never serialized to disk as it is generated only in ChainMonitor
+	// Note that UpdateCompleted and UpdateFailed is currently never serialized to disk as they are
+	// generated only in ChainMonitor
 	(1, UpdateCompleted),
 	(2, CommitmentTxConfirmed),
+	(3, UpdateFailed),
 );
 
 /// Simple structure sent back by `chain::Watch` when an HTLC from a forward channel is detected on
@@ -707,7 +712,17 @@ pub(crate) struct ChannelMonitorImpl<Signer: Sign> {
 
 	payment_preimages: HashMap<PaymentHash, PaymentPreimage>,
 
+	// Note that MonitorEvents MUST NOT be generated during update processing, only generated
+	// during chain data processing. This prevents a race in ChainMonitor::update_channel (and
+	// presumably user implementations thereof as well) where we update the in-memory channel
+	// object, then before the persistence finishes (as its all under a read-lock), we return
+	// pending events to the user or to the relevant ChannelManager. This could cause duplicate
+	// events.
+	// Note that because the `event_lock` in `ChainMonitor` is only taken in
+	// block/transaction-connected events and *not* during block/transaction-disconnected events,
+	// we further MUST NOT generate events during block/transaction-disconnection.
 	pending_monitor_events: Vec<MonitorEvent>,
+
 	pending_events: Vec<Event>,
 
 	// Used to track on-chain events (i.e., transactions part of channels confirmed on chain) on
@@ -2947,7 +2962,7 @@ pub trait Persist<ChannelSigner: Sign> {
 	/// See [`ChannelMonitor::write`] for writing out a `ChannelMonitor`,
 	/// [`ChannelMonitorUpdate::write`] for writing out an update, and
 	/// [`ChannelMonitorUpdateErr`] for requirements when returning errors.
-	fn update_persisted_channel(&self, id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
+	fn update_persisted_channel(&self, id: OutPoint, update: &Option<ChannelMonitorUpdate>, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
 }
 
 impl<Signer: Sign, T: Deref, F: Deref, L: Deref> chain::Listen for (ChannelMonitor<Signer>, T, F, L)