Skip to content

Attributable failures #3065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/release-notes/eclair-vnext.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ When using anchor outputs, allows propagating our local commitment transaction t

This removes the need for increasing the commitment feerate based on mempool conditions, which ensures that channels won't be force-closed anymore when nodes disagree on the current feerate.

### Attributable failures

Eclair now supports attributable failures which allow nodes to prove they are not the source of the failure and provide timing data.
Previously a failing node could choose not to report the failure and we would penalize all nodes of the route.
If all nodes of the route support attributable failures, we only need to penalize two nodes (there is still some uncertainty as to which of the two nodes is the failing one).
See https://github.com/lightning/bolts/pull/1044 for more details.

Support is disabled by default as the spec is not yet final.
It can be enabled by setting `eclair.features.option_attributable_failure = optional` at the risk of being incompatible with the final spec.

### API changes

- `listoffers` now returns more details about each offer.
Expand Down
1 change: 1 addition & 0 deletions eclair-core/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ eclair {
option_shutdown_anysegwit = optional
option_dual_fund = optional
option_quiesce = optional
option_attributable_failure = disabled
option_onion_messages = optional
// This feature should only be enabled when acting as an LSP for mobile wallets.
// When activating this feature, the peer-storage section should be customized to match desired SLAs.
Expand Down
5 changes: 5 additions & 0 deletions eclair-core/src/main/scala/fr/acinq/eclair/Features.scala
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,10 @@ object Features {
val mandatory = 34
}

case object AttributableFailures extends Feature with InitFeature with NodeFeature with Bolt11Feature {
val rfcName = "option_attributable_failure"
val mandatory = 36
}
case object OnionMessages extends Feature with InitFeature with NodeFeature {
val rfcName = "option_onion_messages"
val mandatory = 38
Expand Down Expand Up @@ -373,6 +377,7 @@ object Features {
ShutdownAnySegwit,
DualFunding,
Quiescence,
AttributableFailures,
OnionMessages,
ProvideStorage,
ChannelType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ final case class CMD_ADD_HTLC(replyTo: ActorRef,

sealed trait HtlcSettlementCommand extends HasOptionalReplyToCommand with ForbiddenCommandDuringQuiescenceNegotiation with ForbiddenCommandWhenQuiescent { def id: Long }
final case class CMD_FULFILL_HTLC(id: Long, r: ByteVector32, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
final case class CMD_FAIL_HTLC(id: Long, reason: FailureReason, delay_opt: Option[FiniteDuration] = None, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
final case class CMD_FAIL_HTLC(id: Long, reason: FailureReason, htlcReceivedAt_opt: Option[TimestampMilli], delay_opt: Option[FiniteDuration] = None, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
final case class CMD_FAIL_MALFORMED_HTLC(id: Long, onionHash: ByteVector32, failureCode: Int, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
final case class CMD_UPDATE_FEE(feeratePerKw: FeeratePerKw, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HasOptionalReplyToCommand with ForbiddenCommandDuringQuiescenceNegotiation with ForbiddenCommandWhenQuiescent
final case class CMD_SIGN(replyTo_opt: Option[ActorRef] = None) extends HasOptionalReplyToCommand with ForbiddenCommandWhenQuiescent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -921,14 +921,14 @@ case class Commitments(params: ChannelParams,
case None => Left(UnknownHtlcId(channelId, fulfill.id))
}

def sendFail(cmd: CMD_FAIL_HTLC, nodeSecret: PrivateKey): Either[ChannelException, (Commitments, HtlcFailureMessage)] =
def sendFail(cmd: CMD_FAIL_HTLC, nodeSecret: PrivateKey, useAttributableFailures: Boolean): Either[ChannelException, (Commitments, HtlcFailureMessage)] =
getIncomingHtlcCrossSigned(cmd.id) match {
case Some(htlc) if CommitmentChanges.alreadyProposed(changes.localChanges.proposed, htlc.id) =>
// we have already sent a fail/fulfill for this htlc
Left(UnknownHtlcId(channelId, cmd.id))
case Some(htlc) =>
// we need the shared secret to build the error packet
OutgoingPaymentPacket.buildHtlcFailure(nodeSecret, cmd, htlc).map(fail => (copy(changes = changes.addLocalProposal(fail)), fail))
OutgoingPaymentPacket.buildHtlcFailure(nodeSecret, useAttributableFailures, cmd, htlc).map(fail => (copy(changes = changes.addLocalProposal(fail)), fail))
case None => Left(UnknownHtlcId(channelId, cmd.id))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
log.debug("delaying CMD_FAIL_HTLC with id={} for {}", c.id, delay)
context.system.scheduler.scheduleOnce(delay, self, c.copy(delay_opt = None))
stay()
case None => d.commitments.sendFail(c, nodeParams.privateKey) match {
case None => d.commitments.sendFail(c, nodeParams.privateKey, nodeParams.features.hasFeature(Features.AttributableFailures)) match {
case Right((commitments1, fail)) =>
if (c.commit) self ! CMD_SIGN()
context.system.eventStream.publish(AvailableBalanceChanged(self, d.channelId, d.aliases, commitments1, d.lastAnnouncement_opt))
Expand Down Expand Up @@ -668,7 +668,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
case PostRevocationAction.RejectHtlc(add) =>
log.debug("rejecting incoming htlc {}", add)
// NB: we don't set commit = true, we will sign all updates at once afterwards.
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(TemporaryChannelFailure(Some(d.channelUpdate))), commit = true)
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(TemporaryChannelFailure(Some(d.channelUpdate))), Some(TimestampMilli.now()), commit = true)
case PostRevocationAction.RelayFailure(result) =>
log.debug("forwarding {} to relayer", result)
relayer ! result
Expand Down Expand Up @@ -1498,7 +1498,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
}

case Event(c: CMD_FAIL_HTLC, d: DATA_SHUTDOWN) =>
d.commitments.sendFail(c, nodeParams.privateKey) match {
d.commitments.sendFail(c, nodeParams.privateKey, nodeParams.features.hasFeature(Features.AttributableFailures)) match {
case Right((commitments1, fail)) =>
if (c.commit) self ! CMD_SIGN()
handleCommandSuccess(c, d.copy(commitments = commitments1)) sending fail
Expand Down Expand Up @@ -1617,11 +1617,11 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
case PostRevocationAction.RelayHtlc(add) =>
// BOLT 2: A sending node SHOULD fail to route any HTLC added after it sent shutdown.
log.debug("closing in progress: failing {}", add)
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), commit = true)
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), Some(TimestampMilli.now()), commit = true)
case PostRevocationAction.RejectHtlc(add) =>
// BOLT 2: A sending node SHOULD fail to route any HTLC added after it sent shutdown.
log.debug("closing in progress: rejecting {}", add)
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), commit = true)
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), Some(TimestampMilli.now()), commit = true)
case PostRevocationAction.RelayFailure(result) =>
log.debug("forwarding {} to relayer", result)
relayer ! result
Expand Down Expand Up @@ -1861,7 +1861,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
case Event(c: HtlcSettlementCommand, d: DATA_CLOSING) =>
(c match {
case c: CMD_FULFILL_HTLC => d.commitments.sendFulfill(c)
case c: CMD_FAIL_HTLC => d.commitments.sendFail(c, nodeParams.privateKey)
case c: CMD_FAIL_HTLC => d.commitments.sendFail(c, nodeParams.privateKey, nodeParams.features.hasFeature(Features.AttributableFailures))
case c: CMD_FAIL_MALFORMED_HTLC => d.commitments.sendFailMalformed(c)
}) match {
case Right((commitments1, _)) =>
Expand Down
111 changes: 100 additions & 11 deletions eclair-core/src/main/scala/fr/acinq/eclair/crypto/Sphinx.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ import fr.acinq.eclair.wire.protocol._
import grizzled.slf4j.Logging
import scodec.Attempt
import scodec.bits.ByteVector
import scodec.codecs.uint32

import scala.annotation.tailrec
import scala.concurrent.duration.{DurationLong, FiniteDuration}
import scala.util.{Failure, Success, Try}

/**
Expand Down Expand Up @@ -282,24 +284,28 @@ object Sphinx extends Logging {
*/
case class CannotDecryptFailurePacket(unwrapped: ByteVector)

case class HoldTime(duration: FiniteDuration, remoteNodeId: PublicKey)

case class HtlcFailure(holdTimes: Seq[HoldTime], failure: Either[CannotDecryptFailurePacket, DecryptedFailurePacket])

object FailurePacket {

/**
* Create a failure packet that will be returned to the sender.
* Create a failure packet that needs to be wrapped before being returned to the sender.
* Each intermediate hop will add a layer of encryption and forward to the previous hop.
* Note that malicious intermediate hops may drop the packet or alter it (which breaks the mac).
*
* @param sharedSecret destination node's shared secret that was computed when the original onion for the HTLC
* was created or forwarded: see OnionPacket.create() and OnionPacket.wrap().
* @param failure failure message.
* @return a failure packet that can be sent to the destination node.
* @return a failure packet that still needs to be wrapped before being sent to the destination node.
*/
def create(sharedSecret: ByteVector32, failure: FailureMessage): ByteVector = {
val um = generateKey("um", sharedSecret)
val packet = FailureMessageCodecs.failureOnionCodec(Hmac256(um)).encode(failure).require.toByteVector
logger.debug(s"um key: $um")
logger.debug(s"raw error packet: ${packet.toHex}")
wrap(packet, sharedSecret)
packet
}

/**
Expand All @@ -322,25 +328,108 @@ object Sphinx extends Logging {
* it was sent by the corresponding node.
* Note that malicious nodes in the route may have altered the packet, triggering a decryption failure.
*
* @param packet failure packet.
* @param sharedSecrets nodes shared secrets.
* @param packet failure packet.
* @param attribution_opt attribution data for this failure packet.
* @param sharedSecrets nodes shared secrets.
* @return failure message if the origin of the packet could be identified and the packet decrypted, the unwrapped
* failure packet otherwise.
*/
@tailrec
def decrypt(packet: ByteVector, sharedSecrets: Seq[SharedSecret]): Either[CannotDecryptFailurePacket, DecryptedFailurePacket] = {
def decrypt(packet: ByteVector, attribution_opt: Option[ByteVector], sharedSecrets: Seq[SharedSecret], hopIndex: Int = 0): HtlcFailure = {
sharedSecrets match {
case Nil => Left(CannotDecryptFailurePacket(packet))
case Nil => HtlcFailure(Nil, Left(CannotDecryptFailurePacket(packet)))
case ss :: tail =>
val packet1 = wrap(packet, ss.secret)
val attribution1_opt = attribution_opt.flatMap(Attribution.unwrap(_, packet1, ss.secret, hopIndex))
val um = generateKey("um", ss.secret)
FailureMessageCodecs.failureOnionCodec(Hmac256(um)).decode(packet1.toBitVector) match {
case Attempt.Successful(value) => Right(DecryptedFailurePacket(ss.remoteNodeId, value.value))
case _ => decrypt(packet1, tail)
val HtlcFailure(downstreamHoldTimes, failure) = FailureMessageCodecs.failureOnionCodec(Hmac256(um)).decode(packet1.toBitVector) match {
case Attempt.Successful(value) => HtlcFailure(Nil, Right(DecryptedFailurePacket(ss.remoteNodeId, value.value)))
case _ => decrypt(packet1, attribution1_opt.map(_._2), tail, hopIndex + 1)
}
HtlcFailure(attribution1_opt.map(n => HoldTime(n._1, ss.remoteNodeId) +: downstreamHoldTimes).getOrElse(Nil), failure)
}
}

/**
* Attribution data is added to the failure packet and prevents a node from evading responsibility for its failures.
* Nodes that relay attribution data can prove that they are not the erring node and in case the erring node tries
* to hide, there will only be at most two nodes that can be the erring node (the last one to send attribution data
* and the one after it).
* It also adds timing data for each node on the path.
* https://github.com/lightning/bolts/pull/1044
*/
object Attribution {
val maxNumHops = 20
val holdTimeLength = 4
val hmacLength = 4 // HMACs are truncated to 4 bytes to save space
val totalLength = maxNumHops * holdTimeLength + maxNumHops * (maxNumHops + 1) / 2 * hmacLength // = 920

private def cipher(bytes: ByteVector, sharedSecret: ByteVector32): ByteVector = {
val key = generateKey("ammagext", sharedSecret)
val stream = generateStream(key, totalLength)
bytes xor stream
}

/**
* Get the HMACs from the attribution data.
* The layout of the attribution data is as follows (using maxNumHops = 3 for conciseness):
* holdTime(0) ++ holdTime(1) ++ holdTime(2) ++
* hmacs(0)(0) ++ hmacs(0)(1) ++ hmacs(0)(2) ++
* hmacs(1)(0) ++ hmacs(1)(1) ++
* hmacs(2)(0)
*
* Where `hmac(i)(j)` is the hmac added by node `i` (counted from the node that built the attribution data),
* assuming it is `maxNumHops - 1 - i - j` hops away from the erring node.
*/
private def getHmacs(bytes: ByteVector): Seq[Seq[ByteVector]] =
(0 until maxNumHops).map(i => (0 until (maxNumHops - i)).map(j => {
val start = maxNumHops * holdTimeLength + (maxNumHops * i - (i * (i - 1)) / 2 + j) * hmacLength
bytes.slice(start, start + hmacLength)
}))

/**
* Computes the HMACs for the node that is `minNumHop` hops away from us. Hence we only compute `maxNumHops - minNumHop` HMACs.
* HMACs are truncated to 4 bytes to save space. An attacker has only one try to guess the HMAC so 4 bytes should be enough.
*/
private def computeHmacs(mac: Mac32, failurePacket: ByteVector, holdTimes: ByteVector, hmacs: Seq[Seq[ByteVector]], minNumHop: Int): Seq[ByteVector] = {
(minNumHop until maxNumHops).map(i => {
val y = maxNumHops - i
mac.mac(failurePacket ++
holdTimes.take(y * holdTimeLength) ++
ByteVector.concat((0 until y - 1).map(j => hmacs(j)(i)))).bytes.take(hmacLength)
})
}

/**
* Create attribution data to send with the failure packet
*
* @param failurePacket the failure packet before being wrapped
*/
def create(previousAttribution_opt: Option[ByteVector], failurePacket: ByteVector, holdTime: FiniteDuration, sharedSecret: ByteVector32): ByteVector = {
val previousAttribution = previousAttribution_opt.getOrElse(ByteVector.low(totalLength))
val previousHmacs = getHmacs(previousAttribution).dropRight(1).map(_.drop(1))
val mac = Hmac256(generateKey("um", sharedSecret))
val holdTimes = uint32.encode(holdTime.toMillis).require.bytes ++ previousAttribution.take((maxNumHops - 1) * holdTimeLength)
val hmacs = computeHmacs(mac, failurePacket, holdTimes, previousHmacs, 0) +: previousHmacs
cipher(holdTimes ++ ByteVector.concat(hmacs.map(ByteVector.concat(_))), sharedSecret)
}

/**
* Unwrap one hop of attribution data
* @return a pair with the hold time for this hop and the attribution data for the next hop, or None if the attribution data was invalid
*/
def unwrap(encrypted: ByteVector, failurePacket: ByteVector, sharedSecret: ByteVector32, minNumHop: Int): Option[(FiniteDuration, ByteVector)] = {
val bytes = cipher(encrypted, sharedSecret)
val holdTime = uint32.decode(bytes.take(holdTimeLength).bits).require.value.milliseconds
val hmacs = getHmacs(bytes)
val mac = Hmac256(generateKey("um", sharedSecret))
if (computeHmacs(mac, failurePacket, bytes.take(maxNumHops * holdTimeLength), hmacs.drop(1), minNumHop) == hmacs.head.drop(minNumHop)) {
val unwrapped = bytes.slice(holdTimeLength, maxNumHops * holdTimeLength) ++ ByteVector.low(holdTimeLength) ++ ByteVector.concat((hmacs.drop(1) :+ Seq()).map(s => ByteVector.low(hmacLength) ++ ByteVector.concat(s)))
Some(holdTime, unwrapped)
} else {
None
}
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ object FailureSummary {
def apply(f: PaymentFailure): FailureSummary = f match {
case LocalFailure(_, route, t) => FailureSummary(FailureType.LOCAL, t.getMessage, route.map(h => HopSummary(h)).toList, route.headOption.map(_.nodeId))
case RemoteFailure(_, route, e) => FailureSummary(FailureType.REMOTE, e.failureMessage.message, route.map(h => HopSummary(h)).toList, Some(e.originNode))
case UnreadableRemoteFailure(_, route, _) => FailureSummary(FailureType.UNREADABLE_REMOTE, "could not decrypt failure onion", route.map(h => HopSummary(h)).toList, None)
case UnreadableRemoteFailure(_, route, _, _) => FailureSummary(FailureType.UNREADABLE_REMOTE, "could not decrypt failure onion", route.map(h => HopSummary(h)).toList, None)
}
}

Expand Down
2 changes: 1 addition & 1 deletion eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ class Peer(val nodeParams: NodeParams,
pending.proposed.find(_.htlc.id == msg.id) match {
case Some(htlc) =>
val failure = msg match {
case msg: WillFailHtlc => FailureReason.EncryptedDownstreamFailure(msg.reason)
case msg: WillFailHtlc => FailureReason.EncryptedDownstreamFailure(msg.reason, msg.attribution_opt)
case msg: WillFailMalformedHtlc => FailureReason.LocalFailure(createBadOnionFailure(msg.onionHash, msg.failureCode))
}
htlc.createFailureCommands(Some(failure))(log).foreach { case (channelId, cmd) => PendingCommandsDb.safeSend(register, nodeParams.db.pendingCommands, channelId, cmd) }
Expand Down
Loading