lightningnetwork · guggero · May 8, 2025 · Jan 2, 2024 · Jan 2, 2024 · Nov 7, 2024
diff --git a/docs/release-notes/release-notes-0.19.0.md b/docs/release-notes/release-notes-0.19.0.md
@@ -122,6 +122,12 @@ when running LND with an aux component injected (custom channels).
   address is added to LND using the `ImportTapscript` RPC, LND previously failed
   to perform a cooperative close to that address.
 
+* [Bimodal pathfinding probability
+  improvements](https://github.com/lightningnetwork/lnd/pull/8330). A fallback
+  probability is used if the bimodal model is not applicable. Fixes are added
+  such that the probability is evaluated quicker and to be more accurate in
+  outdated scenarios.
+
 # New Features
 
 * Add support for [archiving channel backup](https://github.com/lightningnetwork/lnd/pull/9232)

diff --git a/routing/probability_bimodal.go b/routing/probability_bimodal.go
@@ -416,34 +416,38 @@ func cannotSend(failAmount, capacity lnwire.MilliSatoshi, now,
 
 // primitive computes the indefinite integral of our assumed (normalized)
 // liquidity probability distribution. The distribution of liquidity x here is
-// the function P(x) ~ exp(-x/s) + exp((x-c)/s), i.e., two exponentials residing
-// at the ends of channels. This means that we expect liquidity to be at either
-// side of the channel with capacity c. The s parameter (scale) defines how far
-// the liquidity leaks into the channel. A very low scale assumes completely
-// unbalanced channels, a very high scale assumes a random distribution. More
-// details can be found in
+// the function P(x) ~ exp(-x/s) + exp((x-c)/s) + 1/c, i.e., two exponentials
+// residing at the ends of channels. This means that we expect liquidity to be
+// at either side of the channel with capacity c. The s parameter (scale)
+// defines how far the liquidity leaks into the channel. A very low scale
+// assumes completely unbalanced channels, a very high scale assumes a random
+// distribution. More details can be found in
 // https://github.com/lightningnetwork/lnd/issues/5988#issuecomment-1131234858.
+// Additionally, we add a constant term 1/c to the distribution to avoid
+// normalization issues and to fall back to a uniform distribution should the
+// previous success and fail amounts contradict a bimodal distribution.
 func (p *BimodalEstimator) primitive(c, x float64) float64 {
 	s := float64(p.BimodalScaleMsat)
 
 	// The indefinite integral of P(x) is given by
-	// Int P(x) dx = H(x) = s * (-e(-x/s) + e((x-c)/s)),
+	// Int P(x) dx = H(x) = s * (-e(-x/s) + e((x-c)/s) + x/(c*s)),
 	// and its norm from 0 to c can be computed from it,
-	// norm = [H(x)]_0^c = s * (-e(-c/s) + 1 -(1 + e(-c/s))).
+	// norm = [H(x)]_0^c = s * (-e(-c/s) + 1 + 1/s -(-1 + e(-c/s))) =
+	// = s * (-2*e(-c/s) + 2 + 1/s).
+	// The prefactors s are left out, as they cancel out in the end.
+	// norm can only become zero, if c is zero, which we sorted out before
+	// calling this method.
 	ecs := math.Exp(-c / s)
-	exs := math.Exp(-x / s)
+	norm := -2*ecs + 2 + 1/s
 
 	// It would be possible to split the next term and reuse the factors
 	// from before, but this can lead to numerical issues with large
 	// numbers.
 	excs := math.Exp((x - c) / s)
-
-	// norm can only become zero, if c is zero, which we sorted out before
-	// calling this method.
-	norm := -2*ecs + 2
+	exs := math.Exp(-x / s)
 
 	// We end up with the primitive function of the normalized P(x).
-	return (-exs + excs) / norm
+	return (-exs + excs + x/(c*s)) / norm
 }
 
 // integral computes the integral of our liquidity distribution from the lower
@@ -484,43 +488,60 @@ func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
 		return 0.0, nil
 	}
 
-	// Mission control may have some outdated values, we correct them here.
-	// TODO(bitromortac): there may be better decisions to make in these
-	//  cases, e.g., resetting failAmount=cap and successAmount=0.
-
-	// failAmount should be capacity at max.
-	if failAmount > capacity {
-		log.Debugf("Correcting failAmount %v to capacity %v",
-			failAmount, capacity)
+	// The next statement is a safety check against an illogical condition.
+	// We discard the knowledge for the channel in that case since we have
+	// inconsistent data.
+	if failAmount <= successAmount {
+		log.Warnf("Fail amount (%s) is smaller than or equal to the "+
+			"success amount (%s) for capacity (%s)",
+			failAmountMsat, successAmountMsat, capacityMsat)
 
+		successAmount = 0
 		failAmount = capacity
 	}
 
-	// successAmount should be capacity at max.
-	if successAmount > capacity {
-		log.Debugf("Correcting successAmount %v to capacity %v",
-			successAmount, capacity)
-
-		successAmount = capacity
+	// Mission control may have some outdated values with regard to the
+	// current channel capacity between a node pair. This can happen in case
+	// a large parallel channel was closed or if a channel was downscaled
+	// and can lead to success and/or failure amounts to be out of the range
+	// [0, capacity]. We assume that the liquidity situation of the channel
+	// is similar as before due to flow bias.
+
+	// In case we have a large success we need to correct it to be in the
+	// valid range. We set the success amount close to the capacity, because
+	// we assume to still be able to send. Any possible failure (that must
+	// in this case be larger than the capacity) is corrected as well.
+	if successAmount >= capacity {
+		log.Debugf("Correcting success amount %s and failure amount "+
+			"%s to capacity %s", successAmountMsat,
+			failAmount, capacityMsat)
+
+		// We choose the success amount to be one less than the
+		// capacity, to both fit success and failure amounts into the
+		// capacity range in a consistent manner.
+		successAmount = capacity - 1
+		failAmount = capacity
 	}
 
-	// The next statement is a safety check against an illogical condition,
-	// otherwise the renormalization integral would become zero. This may
-	// happen if a large channel gets closed and smaller ones remain, but
-	// it should recover with the time decay.
-	if failAmount <= successAmount {
-		log.Tracef("fail amount (%v) is smaller than or equal the "+
-			"success amount (%v) for capacity (%v)",
-			failAmountMsat, successAmountMsat, capacityMsat)
+	// Having no or only a small success, but a large failure only needs
+	// adjustment of the failure amount.
+	if failAmount > capacity {
+		log.Debugf("Correcting failure amount %s to capacity %s",
+			failAmountMsat, capacityMsat)
 
-		return 0.0, nil
+		failAmount = capacity
 	}
 
 	// We cannot send more than the fail amount.
 	if amount >= failAmount {
 		return 0.0, nil
 	}
 
+	// We can send the amount if it is smaller than the success amount.
+	if amount <= successAmount {
+		return 1.0, nil
+	}
+
 	// The success probability for payment amount a is the integral over the
 	// prior distribution P(x), the probability to find liquidity between
 	// the amount a and channel capacity c (or failAmount a_f):

diff --git a/routing/probability_bimodal_test.go b/routing/probability_bimodal_test.go
@@ -11,10 +11,14 @@ import (
 )
 
 const (
-	smallAmount = lnwire.MilliSatoshi(400_000)
-	largeAmount = lnwire.MilliSatoshi(5_000_000)
-	capacity    = lnwire.MilliSatoshi(10_000_000)
-	scale       = lnwire.MilliSatoshi(400_000)
+	smallAmount = lnwire.MilliSatoshi(400_000_000)
+	largeAmount = lnwire.MilliSatoshi(5_000_000_000)
+	capacity    = lnwire.MilliSatoshi(10_000_000_000)
+	scale       = lnwire.MilliSatoshi(400_000_000)
+
+	// defaultTolerance is the default absolute tolerance for comparing
+	// probability calculations to expected values.
+	defaultTolerance = 0.001
 )
 
 // TestSuccessProbability tests that we get correct probability estimates for
@@ -25,7 +29,6 @@ func TestSuccessProbability(t *testing.T) {
 	tests := []struct {
 		name                string
 		expectedProbability float64
-		tolerance           float64
 		successAmount       lnwire.MilliSatoshi
 		failAmount          lnwire.MilliSatoshi
 		amount              lnwire.MilliSatoshi
@@ -78,7 +81,6 @@ func TestSuccessProbability(t *testing.T) {
 			failAmount:          capacity,
 			amount:              smallAmount,
 			expectedProbability: 0.684,
-			tolerance:           0.001,
 		},
 		// If we had an unsettled success, we are sure we can send a
 		// lower amount.
@@ -110,7 +112,6 @@ func TestSuccessProbability(t *testing.T) {
 			failAmount:          capacity,
 			amount:              smallAmount,
 			expectedProbability: 0.851,
-			tolerance:           0.001,
 		},
 		// If we had a large unsettled success before, we know we can
 		// send even larger payments with high probability.
@@ -122,7 +123,6 @@ func TestSuccessProbability(t *testing.T) {
 			failAmount:          capacity,
 			amount:              largeAmount,
 			expectedProbability: 0.998,
-			tolerance:           0.001,
 		},
 		// If we had a failure before, we can't send with the fail
 		// amount.
@@ -151,7 +151,6 @@ func TestSuccessProbability(t *testing.T) {
 			failAmount:          largeAmount,
 			amount:              smallAmount,
 			expectedProbability: 0.368,
-			tolerance:           0.001,
 		},
 		// From here on we deal with mixed previous successes and
 		// failures.
@@ -183,7 +182,6 @@ func TestSuccessProbability(t *testing.T) {
 			successAmount:       smallAmount,
 			amount:              smallAmount + largeAmount/10,
 			expectedProbability: 0.287,
-			tolerance:           0.001,
 		},
 		// We still can't send the fail amount.
 		{
@@ -194,22 +192,45 @@ func TestSuccessProbability(t *testing.T) {
 			amount:              largeAmount,
 			expectedProbability: 0.0,
 		},
-		// Same success and failure amounts (illogical).
+		// Same success and failure amounts (illogical), which gets
+		// reset to no knowledge.
 		{
 			name:                "previous f/s, same",
 			capacity:            capacity,
 			failAmount:          largeAmount,
 			successAmount:       largeAmount,
 			amount:              largeAmount,
-			expectedProbability: 0.0,
+			expectedProbability: 0.5,
 		},
-		// Higher success than failure amount (illogical).
+		// Higher success than failure amount (illogical), which gets
+		// reset to no knowledge.
 		{
-			name:                "previous f/s, higher success",
+			name:                "previous f/s, illogical",
 			capacity:            capacity,
 			failAmount:          smallAmount,
 			successAmount:       largeAmount,
-			expectedProbability: 0.0,
+			amount:              largeAmount,
+			expectedProbability: 0.5,
+		},
+		// Larger success and larger failure than the old capacity are
+		// rescaled to still give a very high success rate.
+		{
+			name:                "smaller cap, large success/fail",
+			capacity:            capacity,
+			failAmount:          2*capacity + 1,
+			successAmount:       2 * capacity,
+			amount:              largeAmount,
+			expectedProbability: 1.0,
+		},
+		// A lower success amount is not rescaled.
+		{
+			name:          "smaller cap, large fail",
+			capacity:      capacity,
+			successAmount: smallAmount / 2,
+			failAmount:    2 * capacity,
+			amount:        smallAmount,
+			// See "previous success, larger amount".
+			expectedProbability: 0.851,
 		},
 	}
 
@@ -228,7 +249,7 @@ func TestSuccessProbability(t *testing.T) {
 				test.failAmount, test.amount,
 			)
 			require.InDelta(t, test.expectedProbability, p,
-				test.tolerance)
+				defaultTolerance)
 			require.NoError(t, err)
 		})
 	}
@@ -244,6 +265,59 @@ func TestSuccessProbability(t *testing.T) {
 	})
 }
 
+// TestSmallScale tests that the probability formula works with small scale
+// values.
+func TestSmallScale(t *testing.T) {
+	var (
+		// We use the smallest possible scale value together with a
+		// large capacity. This is an extreme form of a bimodal
+		// distribution.
+		scale    lnwire.MilliSatoshi = 1
+		capacity lnwire.MilliSatoshi = 7e+09
+
+		// Success and failure amounts are chosen such that the expected
+		// balance must be somewhere in the middle of the channel, a
+		// value not expected when dealing with a bimodal distribution.
+		// In this case, the bimodal model fails to give good forecasts
+		// due to the numerics of the exponential functions, which get
+		// evaluated to exact zero floats.
+		successAmount lnwire.MilliSatoshi = 1.0e+09
+		failAmount    lnwire.MilliSatoshi = 4.0e+09
+	)
+
+	estimator := BimodalEstimator{
+		BimodalConfig: BimodalConfig{BimodalScaleMsat: scale},
+	}
+
+	// An amount that's close to the success amount should have a very high
+	// probability.
+	amtCloseSuccess := successAmount + 1
+	p, err := estimator.probabilityFormula(
+		capacity, successAmount, failAmount, amtCloseSuccess,
+	)
+	require.NoError(t, err)
+	require.InDelta(t, 1.0, p, defaultTolerance)
+
+	// An amount that's close to the fail amount should have a very low
+	// probability.
+	amtCloseFail := failAmount - 1
+	p, err = estimator.probabilityFormula(
+		capacity, successAmount, failAmount, amtCloseFail,
+	)
+	require.NoError(t, err)
+	require.InDelta(t, 0.0, p, defaultTolerance)
+
+	// In the region where the bimodal model doesn't give good forecasts, we
+	// fall back to a uniform model, which interpolates probabilities
+	// linearly.
+	amtLinear := successAmount + (failAmount-successAmount)*1/4
+	p, err = estimator.probabilityFormula(
+		capacity, successAmount, failAmount, amtLinear,
+	)
+	require.NoError(t, err)
+	require.InDelta(t, 0.75, p, defaultTolerance)
+}
+
 // TestIntegral tests certain limits of the probability distribution integral.
 func TestIntegral(t *testing.T) {
 	t.Parallel()
@@ -689,9 +763,24 @@ func TestLocalPairProbability(t *testing.T) {
 // FuzzProbability checks that we don't encounter errors related to NaNs.
 func FuzzProbability(f *testing.F) {
 	estimator := BimodalEstimator{
-		BimodalConfig: BimodalConfig{BimodalScaleMsat: scale},
+		BimodalConfig: BimodalConfig{BimodalScaleMsat: 400_000},
 	}
 
+	// Predefined seed reported in
+	// https://github.com/lightningnetwork/lnd/issues/9085. This test found
+	// a case where we could not compute a normalization factor because we
+	// learned that the balance lies somewhere in the middle of the channel,
+	// a surprising result for the bimodal model, which predicts two
+	// distinct modes at the edges and therefore has numerical issues in the
+	// middle. Additionally, the scale is small with respect to the values
+	// used here.
+	f.Add(
+		uint64(1_000_000_000),
+		uint64(300_000_000),
+		uint64(400_000_000),
+		uint64(300_000_000),
+	)
+
 	f.Fuzz(func(t *testing.T, capacity, successAmt, failAmt, amt uint64) {
 		if capacity == 0 {
 			return