Skip to content

Commit b42f470

Browse files
committed
Track channels which a given payment failed to traverse
When an HTLC fails, we currently rely on the scorer learning the failed channel and assigning an infinite (`u64::max_value()`) penalty to the channel so as to avoid retrying over the exact same path (if there's only one available path). This is common when trying to pay a mobile client behind an LSP if the mobile client is currently offline. This leads to the scorer being overly conservative in some cases - returning `u64::max_value()` when a given path hasn't been tried for a given payment may not be the best decision, even if that channel failed 50 minutes ago. By tracking channels which failed on a payment level and explicitly refusing to route over them we can relax the requirements on the scorer, allowing it to make different decisions on how to treat channels that failed relatively recently without causing payments to retry the same path forever. Closes lightningdevkit#1241, superseding lightningdevkit#1252.
1 parent e403999 commit b42f470

File tree

3 files changed

+55
-5
lines changed

3 files changed

+55
-5
lines changed

lightning/src/ln/channelmanager.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -3823,7 +3823,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
38233823
return;
38243824
}
38253825
mem::drop(channel_state_lock);
3826-
let retry = if let Some(payment_params_data) = payment_params {
3826+
let mut retry = if let Some(payment_params_data) = payment_params {
38273827
let path_last_hop = path.last().expect("Outbound payments must have had a valid path");
38283828
Some(RouteParameters {
38293829
payment_params: payment_params_data.clone(),
@@ -3839,6 +3839,9 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
38393839
let (network_update, short_channel_id, payment_retryable, onion_error_code, onion_error_data) = onion_utils::process_onion_failure(&self.secp_ctx, &self.logger, &source, err.data.clone());
38403840
#[cfg(not(test))]
38413841
let (network_update, short_channel_id, payment_retryable, _, _) = onion_utils::process_onion_failure(&self.secp_ctx, &self.logger, &source, err.data.clone());
3842+
if let Some(scid) = short_channel_id {
3843+
retry.as_mut().map(|r| r.payment_params.previously_failed_channels.push(scid));
3844+
}
38423845
// TODO: If we decided to blame ourselves (or one of our channels) in
38433846
// process_onion_failure we should close that channel as it implies our
38443847
// next-hop is needlessly blaming us!
@@ -3870,14 +3873,16 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
38703873
// ChannelDetails.
38713874
// TODO: For non-temporary failures, we really should be closing the
38723875
// channel here as we apparently can't relay through them anyway.
3876+
let scid = path.first().unwrap().short_channel_id;
3877+
retry.as_mut().map(|r| r.payment_params.previously_failed_channels.push(scid));
38733878
events::Event::PaymentPathFailed {
38743879
payment_id: Some(payment_id),
38753880
payment_hash: payment_hash.clone(),
38763881
rejected_by_dest: path.len() == 1,
38773882
network_update: None,
38783883
all_paths_failed,
38793884
path: path.clone(),
3880-
short_channel_id: Some(path.first().unwrap().short_channel_id),
3885+
short_channel_id: Some(scid),
38813886
retry,
38823887
#[cfg(test)]
38833888
error_code: Some(*failure_code),

lightning/src/ln/functional_test_utils.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -1492,7 +1492,7 @@ pub fn expect_payment_failed_conditions<'a, 'b, 'c, 'd, 'e>(
14921492
let mut events = node.node.get_and_clear_pending_events();
14931493
assert_eq!(events.len(), 1);
14941494
let expected_payment_id = match events.pop().unwrap() {
1495-
Event::PaymentPathFailed { payment_hash, rejected_by_dest, path, retry, payment_id, network_update,
1495+
Event::PaymentPathFailed { payment_hash, rejected_by_dest, path, retry, payment_id, network_update, short_channel_id,
14961496
#[cfg(test)]
14971497
error_code,
14981498
#[cfg(test)]
@@ -1502,6 +1502,9 @@ pub fn expect_payment_failed_conditions<'a, 'b, 'c, 'd, 'e>(
15021502
assert!(retry.is_some(), "expected retry.is_some()");
15031503
assert_eq!(retry.as_ref().unwrap().final_value_msat, path.last().unwrap().fee_msat, "Retry amount should match last hop in path");
15041504
assert_eq!(retry.as_ref().unwrap().payment_params.payee_pubkey, path.last().unwrap().pubkey, "Retry payee node_id should match last hop in path");
1505+
if let Some(scid) = short_channel_id {
1506+
assert!(retry.as_ref().unwrap().payment_params.previously_failed_channels.contains(&scid));
1507+
}
15051508

15061509
#[cfg(test)]
15071510
{

lightning/src/routing/router.rs

+44-2
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,11 @@ pub struct PaymentParameters {
225225
/// The maximum number of paths that may be used by MPP payments.
226226
/// Defaults to [`DEFAULT_MAX_MPP_PATH_COUNT`].
227227
pub max_mpp_path_count: u8,
228+
229+
/// A list of SCIDs which this payment was previously attempted over and which caused the
230+
/// payment to fail. Future attempts for the same payment shouldn't be relayed through any of
231+
/// these SCIDs.
232+
pub previously_failed_channels: Vec<u64>,
228233
}
229234

230235
impl_writeable_tlv_based!(PaymentParameters, {
@@ -233,6 +238,7 @@ impl_writeable_tlv_based!(PaymentParameters, {
233238
(2, features, option),
234239
(3, max_mpp_path_count, (default_value, DEFAULT_MAX_MPP_PATH_COUNT)),
235240
(4, route_hints, vec_type),
241+
(5, previously_failed_channels, vec_type),
236242
(6, expiry_time, option),
237243
});
238244

@@ -246,6 +252,7 @@ impl PaymentParameters {
246252
expiry_time: None,
247253
max_total_cltv_expiry_delta: DEFAULT_MAX_TOTAL_CLTV_EXPIRY_DELTA,
248254
max_mpp_path_count: DEFAULT_MAX_MPP_PATH_COUNT,
255+
previously_failed_channels: Vec::new(),
249256
}
250257
}
251258

@@ -985,15 +992,22 @@ where L::Target: Logger {
985992
(amount_to_transfer_over_msat < $next_hops_path_htlc_minimum_msat &&
986993
recommended_value_msat > $next_hops_path_htlc_minimum_msat));
987994

995+
let channel_failed_on_this_payment =
996+
payment_params.previously_failed_channels.contains(&short_channel_id);
997+
988998
// If HTLC minimum is larger than the amount we're going to transfer, we shouldn't
989999
// bother considering this channel. If retrying with recommended_value_msat may
9901000
// allow us to hit the HTLC minimum limit, set htlc_minimum_limit so that we go
9911001
// around again with a higher amount.
9921002
if contributes_sufficient_value && doesnt_exceed_max_path_length &&
993-
doesnt_exceed_cltv_delta_limit && may_overpay_to_meet_path_minimum_msat {
1003+
doesnt_exceed_cltv_delta_limit && !channel_failed_on_this_payment &&
1004+
may_overpay_to_meet_path_minimum_msat
1005+
{
9941006
hit_minimum_limit = true;
9951007
} else if contributes_sufficient_value && doesnt_exceed_max_path_length &&
996-
doesnt_exceed_cltv_delta_limit && over_path_minimum_msat {
1008+
doesnt_exceed_cltv_delta_limit && over_path_minimum_msat &&
1009+
!channel_failed_on_this_payment
1010+
{
9971011
// Note that low contribution here (limited by available_liquidity_msat)
9981012
// might violate htlc_minimum_msat on the hops which are next along the
9991013
// payment path (upstream to the payee). To avoid that, we recompute
@@ -1909,6 +1923,8 @@ mod tests {
19091923
use prelude::*;
19101924
use sync::{self, Arc};
19111925

1926+
use core::convert::TryInto;
1927+
19121928
fn get_channel_details(short_channel_id: Option<u64>, node_id: PublicKey,
19131929
features: InitFeatures, outbound_capacity_msat: u64) -> channelmanager::ChannelDetails {
19141930
channelmanager::ChannelDetails {
@@ -5482,6 +5498,32 @@ mod tests {
54825498
}
54835499
}
54845500

5501+
#[test]
5502+
fn avoids_recently_failed_paths() {
5503+
// Ensure that the router always avoids all of the `recently_failed_channels` channels by
5504+
// randomly inserting channels into it until we can't find a route anymore.
5505+
let (secp_ctx, network, _, _, logger) = build_graph();
5506+
let (_, our_id, _, nodes) = get_nodes(&secp_ctx);
5507+
let network_graph = network.read_only();
5508+
5509+
let scorer = test_utils::TestScorer::with_penalty(0);
5510+
let mut payment_params = PaymentParameters::from_node_id(nodes[6]).with_route_hints(last_hops(&nodes))
5511+
.with_max_mpp_path_count(1);
5512+
let keys_manager = test_utils::TestKeysInterface::new(&[0u8; 32], Network::Testnet);
5513+
let random_seed_bytes = keys_manager.get_secure_random_bytes();
5514+
5515+
loop {
5516+
if let Ok(route) = get_route(&our_id, &payment_params, &network_graph, None, 100, 0, Arc::clone(&logger), &scorer, &random_seed_bytes) {
5517+
for chan in route.paths[0].iter() {
5518+
assert!(!payment_params.previously_failed_channels.contains(&chan.short_channel_id));
5519+
}
5520+
let victim = (u64::from_ne_bytes(random_seed_bytes[0..8].try_into().unwrap()) as usize)
5521+
% route.paths[0].len();
5522+
payment_params.previously_failed_channels.push(route.paths[0][victim].short_channel_id);
5523+
} else { break; }
5524+
}
5525+
}
5526+
54855527
#[test]
54865528
fn limits_path_length() {
54875529
let (secp_ctx, network, _, _, logger) = build_line_graph();

0 commit comments

Comments
 (0)