Skip to content

Commit 055695f

Browse files
Attributable failures (#3065)
Implements lightning/bolts#1044
1 parent 9b0c00a commit 055695f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+698
-404
lines changed

docs/release-notes/eclair-vnext.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,16 @@ When using anchor outputs, allows propagating our local commitment transaction t
1313

1414
This removes the need for increasing the commitment feerate based on mempool conditions, which ensures that channels won't be force-closed anymore when nodes disagree on the current feerate.
1515

16+
### Attributable failures
17+
18+
Eclair now supports attributable failures which allow nodes to prove they are not the source of the failure and provide timing data.
19+
Previously a failing node could choose not to report the failure and we would penalize all nodes of the route.
20+
If all nodes of the route support attributable failures, we only need to penalize two nodes (there is still some uncertainty as to which of the two nodes is the failing one).
21+
See https://github.com/lightning/bolts/pull/1044 for more details.
22+
23+
Support is disabled by default as the spec is not yet final.
24+
It can be enabled by setting `eclair.features.option_attributable_failure = optional` at the risk of being incompatible with the final spec.
25+
1626
### API changes
1727

1828
- `listoffers` now returns more details about each offer.

eclair-core/src/main/resources/reference.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ eclair {
7474
option_shutdown_anysegwit = optional
7575
option_dual_fund = optional
7676
option_quiesce = optional
77+
option_attributable_failure = disabled
7778
option_onion_messages = optional
7879
// This feature should only be enabled when acting as an LSP for mobile wallets.
7980
// When activating this feature, the peer-storage section should be customized to match desired SLAs.

eclair-core/src/main/scala/fr/acinq/eclair/Features.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,10 @@ object Features {
270270
val mandatory = 34
271271
}
272272

273+
case object AttributableFailures extends Feature with InitFeature with NodeFeature with Bolt11Feature {
274+
val rfcName = "option_attributable_failure"
275+
val mandatory = 36
276+
}
273277
case object OnionMessages extends Feature with InitFeature with NodeFeature {
274278
val rfcName = "option_onion_messages"
275279
val mandatory = 38
@@ -373,6 +377,7 @@ object Features {
373377
ShutdownAnySegwit,
374378
DualFunding,
375379
Quiescence,
380+
AttributableFailures,
376381
OnionMessages,
377382
ProvideStorage,
378383
ChannelType,

eclair-core/src/main/scala/fr/acinq/eclair/channel/ChannelData.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ final case class CMD_ADD_HTLC(replyTo: ActorRef,
215215

216216
sealed trait HtlcSettlementCommand extends HasOptionalReplyToCommand with ForbiddenCommandDuringQuiescenceNegotiation with ForbiddenCommandWhenQuiescent { def id: Long }
217217
final case class CMD_FULFILL_HTLC(id: Long, r: ByteVector32, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
218-
final case class CMD_FAIL_HTLC(id: Long, reason: FailureReason, delay_opt: Option[FiniteDuration] = None, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
218+
final case class CMD_FAIL_HTLC(id: Long, reason: FailureReason, htlcReceivedAt_opt: Option[TimestampMilli], delay_opt: Option[FiniteDuration] = None, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
219219
final case class CMD_FAIL_MALFORMED_HTLC(id: Long, onionHash: ByteVector32, failureCode: Int, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HtlcSettlementCommand
220220
final case class CMD_UPDATE_FEE(feeratePerKw: FeeratePerKw, commit: Boolean = false, replyTo_opt: Option[ActorRef] = None) extends HasOptionalReplyToCommand with ForbiddenCommandDuringQuiescenceNegotiation with ForbiddenCommandWhenQuiescent
221221
final case class CMD_SIGN(replyTo_opt: Option[ActorRef] = None) extends HasOptionalReplyToCommand with ForbiddenCommandWhenQuiescent

eclair-core/src/main/scala/fr/acinq/eclair/channel/Commitments.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -921,14 +921,14 @@ case class Commitments(params: ChannelParams,
921921
case None => Left(UnknownHtlcId(channelId, fulfill.id))
922922
}
923923

924-
def sendFail(cmd: CMD_FAIL_HTLC, nodeSecret: PrivateKey): Either[ChannelException, (Commitments, HtlcFailureMessage)] =
924+
def sendFail(cmd: CMD_FAIL_HTLC, nodeSecret: PrivateKey, useAttributableFailures: Boolean): Either[ChannelException, (Commitments, HtlcFailureMessage)] =
925925
getIncomingHtlcCrossSigned(cmd.id) match {
926926
case Some(htlc) if CommitmentChanges.alreadyProposed(changes.localChanges.proposed, htlc.id) =>
927927
// we have already sent a fail/fulfill for this htlc
928928
Left(UnknownHtlcId(channelId, cmd.id))
929929
case Some(htlc) =>
930930
// we need the shared secret to build the error packet
931-
OutgoingPaymentPacket.buildHtlcFailure(nodeSecret, cmd, htlc).map(fail => (copy(changes = changes.addLocalProposal(fail)), fail))
931+
OutgoingPaymentPacket.buildHtlcFailure(nodeSecret, useAttributableFailures, cmd, htlc).map(fail => (copy(changes = changes.addLocalProposal(fail)), fail))
932932
case None => Left(UnknownHtlcId(channelId, cmd.id))
933933
}
934934

eclair-core/src/main/scala/fr/acinq/eclair/channel/fsm/Channel.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
501501
log.debug("delaying CMD_FAIL_HTLC with id={} for {}", c.id, delay)
502502
context.system.scheduler.scheduleOnce(delay, self, c.copy(delay_opt = None))
503503
stay()
504-
case None => d.commitments.sendFail(c, nodeParams.privateKey) match {
504+
case None => d.commitments.sendFail(c, nodeParams.privateKey, nodeParams.features.hasFeature(Features.AttributableFailures)) match {
505505
case Right((commitments1, fail)) =>
506506
if (c.commit) self ! CMD_SIGN()
507507
context.system.eventStream.publish(AvailableBalanceChanged(self, d.channelId, d.aliases, commitments1, d.lastAnnouncement_opt))
@@ -668,7 +668,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
668668
case PostRevocationAction.RejectHtlc(add) =>
669669
log.debug("rejecting incoming htlc {}", add)
670670
// NB: we don't set commit = true, we will sign all updates at once afterwards.
671-
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(TemporaryChannelFailure(Some(d.channelUpdate))), commit = true)
671+
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(TemporaryChannelFailure(Some(d.channelUpdate))), Some(TimestampMilli.now()), commit = true)
672672
case PostRevocationAction.RelayFailure(result) =>
673673
log.debug("forwarding {} to relayer", result)
674674
relayer ! result
@@ -1498,7 +1498,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
14981498
}
14991499

15001500
case Event(c: CMD_FAIL_HTLC, d: DATA_SHUTDOWN) =>
1501-
d.commitments.sendFail(c, nodeParams.privateKey) match {
1501+
d.commitments.sendFail(c, nodeParams.privateKey, nodeParams.features.hasFeature(Features.AttributableFailures)) match {
15021502
case Right((commitments1, fail)) =>
15031503
if (c.commit) self ! CMD_SIGN()
15041504
handleCommandSuccess(c, d.copy(commitments = commitments1)) sending fail
@@ -1617,11 +1617,11 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
16171617
case PostRevocationAction.RelayHtlc(add) =>
16181618
// BOLT 2: A sending node SHOULD fail to route any HTLC added after it sent shutdown.
16191619
log.debug("closing in progress: failing {}", add)
1620-
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), commit = true)
1620+
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), Some(TimestampMilli.now()), commit = true)
16211621
case PostRevocationAction.RejectHtlc(add) =>
16221622
// BOLT 2: A sending node SHOULD fail to route any HTLC added after it sent shutdown.
16231623
log.debug("closing in progress: rejecting {}", add)
1624-
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), commit = true)
1624+
self ! CMD_FAIL_HTLC(add.id, FailureReason.LocalFailure(PermanentChannelFailure()), Some(TimestampMilli.now()), commit = true)
16251625
case PostRevocationAction.RelayFailure(result) =>
16261626
log.debug("forwarding {} to relayer", result)
16271627
relayer ! result
@@ -1861,7 +1861,7 @@ class Channel(val nodeParams: NodeParams, val channelKeys: ChannelKeys, val wall
18611861
case Event(c: HtlcSettlementCommand, d: DATA_CLOSING) =>
18621862
(c match {
18631863
case c: CMD_FULFILL_HTLC => d.commitments.sendFulfill(c)
1864-
case c: CMD_FAIL_HTLC => d.commitments.sendFail(c, nodeParams.privateKey)
1864+
case c: CMD_FAIL_HTLC => d.commitments.sendFail(c, nodeParams.privateKey, nodeParams.features.hasFeature(Features.AttributableFailures))
18651865
case c: CMD_FAIL_MALFORMED_HTLC => d.commitments.sendFailMalformed(c)
18661866
}) match {
18671867
case Right((commitments1, _)) =>

eclair-core/src/main/scala/fr/acinq/eclair/crypto/Sphinx.scala

Lines changed: 100 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ import fr.acinq.eclair.wire.protocol._
2323
import grizzled.slf4j.Logging
2424
import scodec.Attempt
2525
import scodec.bits.ByteVector
26+
import scodec.codecs.uint32
2627

2728
import scala.annotation.tailrec
29+
import scala.concurrent.duration.{DurationLong, FiniteDuration}
2830
import scala.util.{Failure, Success, Try}
2931

3032
/**
@@ -282,24 +284,28 @@ object Sphinx extends Logging {
282284
*/
283285
case class CannotDecryptFailurePacket(unwrapped: ByteVector)
284286

287+
case class HoldTime(duration: FiniteDuration, remoteNodeId: PublicKey)
288+
289+
case class HtlcFailure(holdTimes: Seq[HoldTime], failure: Either[CannotDecryptFailurePacket, DecryptedFailurePacket])
290+
285291
object FailurePacket {
286292

287293
/**
288-
* Create a failure packet that will be returned to the sender.
294+
* Create a failure packet that needs to be wrapped before being returned to the sender.
289295
* Each intermediate hop will add a layer of encryption and forward to the previous hop.
290296
* Note that malicious intermediate hops may drop the packet or alter it (which breaks the mac).
291297
*
292298
* @param sharedSecret destination node's shared secret that was computed when the original onion for the HTLC
293299
* was created or forwarded: see OnionPacket.create() and OnionPacket.wrap().
294300
* @param failure failure message.
295-
* @return a failure packet that can be sent to the destination node.
301+
* @return a failure packet that still needs to be wrapped before being sent to the destination node.
296302
*/
297303
def create(sharedSecret: ByteVector32, failure: FailureMessage): ByteVector = {
298304
val um = generateKey("um", sharedSecret)
299305
val packet = FailureMessageCodecs.failureOnionCodec(Hmac256(um)).encode(failure).require.toByteVector
300306
logger.debug(s"um key: $um")
301307
logger.debug(s"raw error packet: ${packet.toHex}")
302-
wrap(packet, sharedSecret)
308+
packet
303309
}
304310

305311
/**
@@ -322,25 +328,108 @@ object Sphinx extends Logging {
322328
* it was sent by the corresponding node.
323329
* Note that malicious nodes in the route may have altered the packet, triggering a decryption failure.
324330
*
325-
* @param packet failure packet.
326-
* @param sharedSecrets nodes shared secrets.
331+
* @param packet failure packet.
332+
* @param attribution_opt attribution data for this failure packet.
333+
* @param sharedSecrets nodes shared secrets.
327334
* @return failure message if the origin of the packet could be identified and the packet decrypted, the unwrapped
328335
* failure packet otherwise.
329336
*/
330-
@tailrec
331-
def decrypt(packet: ByteVector, sharedSecrets: Seq[SharedSecret]): Either[CannotDecryptFailurePacket, DecryptedFailurePacket] = {
337+
def decrypt(packet: ByteVector, attribution_opt: Option[ByteVector], sharedSecrets: Seq[SharedSecret], hopIndex: Int = 0): HtlcFailure = {
332338
sharedSecrets match {
333-
case Nil => Left(CannotDecryptFailurePacket(packet))
339+
case Nil => HtlcFailure(Nil, Left(CannotDecryptFailurePacket(packet)))
334340
case ss :: tail =>
335341
val packet1 = wrap(packet, ss.secret)
342+
val attribution1_opt = attribution_opt.flatMap(Attribution.unwrap(_, packet1, ss.secret, hopIndex))
336343
val um = generateKey("um", ss.secret)
337-
FailureMessageCodecs.failureOnionCodec(Hmac256(um)).decode(packet1.toBitVector) match {
338-
case Attempt.Successful(value) => Right(DecryptedFailurePacket(ss.remoteNodeId, value.value))
339-
case _ => decrypt(packet1, tail)
344+
val HtlcFailure(downstreamHoldTimes, failure) = FailureMessageCodecs.failureOnionCodec(Hmac256(um)).decode(packet1.toBitVector) match {
345+
case Attempt.Successful(value) => HtlcFailure(Nil, Right(DecryptedFailurePacket(ss.remoteNodeId, value.value)))
346+
case _ => decrypt(packet1, attribution1_opt.map(_._2), tail, hopIndex + 1)
340347
}
348+
HtlcFailure(attribution1_opt.map(n => HoldTime(n._1, ss.remoteNodeId) +: downstreamHoldTimes).getOrElse(Nil), failure)
341349
}
342350
}
343351

352+
/**
353+
* Attribution data is added to the failure packet and prevents a node from evading responsibility for its failures.
354+
* Nodes that relay attribution data can prove that they are not the erring node and in case the erring node tries
355+
* to hide, there will only be at most two nodes that can be the erring node (the last one to send attribution data
356+
* and the one after it).
357+
* It also adds timing data for each node on the path.
358+
* https://github.com/lightning/bolts/pull/1044
359+
*/
360+
object Attribution {
361+
val maxNumHops = 20
362+
val holdTimeLength = 4
363+
val hmacLength = 4 // HMACs are truncated to 4 bytes to save space
364+
val totalLength = maxNumHops * holdTimeLength + maxNumHops * (maxNumHops + 1) / 2 * hmacLength // = 920
365+
366+
private def cipher(bytes: ByteVector, sharedSecret: ByteVector32): ByteVector = {
367+
val key = generateKey("ammagext", sharedSecret)
368+
val stream = generateStream(key, totalLength)
369+
bytes xor stream
370+
}
371+
372+
/**
373+
* Get the HMACs from the attribution data.
374+
* The layout of the attribution data is as follows (using maxNumHops = 3 for conciseness):
375+
* holdTime(0) ++ holdTime(1) ++ holdTime(2) ++
376+
* hmacs(0)(0) ++ hmacs(0)(1) ++ hmacs(0)(2) ++
377+
* hmacs(1)(0) ++ hmacs(1)(1) ++
378+
* hmacs(2)(0)
379+
*
380+
* Where `hmac(i)(j)` is the hmac added by node `i` (counted from the node that built the attribution data),
381+
* assuming it is `maxNumHops - 1 - i - j` hops away from the erring node.
382+
*/
383+
private def getHmacs(bytes: ByteVector): Seq[Seq[ByteVector]] =
384+
(0 until maxNumHops).map(i => (0 until (maxNumHops - i)).map(j => {
385+
val start = maxNumHops * holdTimeLength + (maxNumHops * i - (i * (i - 1)) / 2 + j) * hmacLength
386+
bytes.slice(start, start + hmacLength)
387+
}))
388+
389+
/**
390+
* Computes the HMACs for the node that is `minNumHop` hops away from us. Hence we only compute `maxNumHops - minNumHop` HMACs.
391+
* HMACs are truncated to 4 bytes to save space. An attacker has only one try to guess the HMAC so 4 bytes should be enough.
392+
*/
393+
private def computeHmacs(mac: Mac32, failurePacket: ByteVector, holdTimes: ByteVector, hmacs: Seq[Seq[ByteVector]], minNumHop: Int): Seq[ByteVector] = {
394+
(minNumHop until maxNumHops).map(i => {
395+
val y = maxNumHops - i
396+
mac.mac(failurePacket ++
397+
holdTimes.take(y * holdTimeLength) ++
398+
ByteVector.concat((0 until y - 1).map(j => hmacs(j)(i)))).bytes.take(hmacLength)
399+
})
400+
}
401+
402+
/**
403+
* Create attribution data to send with the failure packet
404+
*
405+
* @param failurePacket the failure packet before being wrapped
406+
*/
407+
def create(previousAttribution_opt: Option[ByteVector], failurePacket: ByteVector, holdTime: FiniteDuration, sharedSecret: ByteVector32): ByteVector = {
408+
val previousAttribution = previousAttribution_opt.getOrElse(ByteVector.low(totalLength))
409+
val previousHmacs = getHmacs(previousAttribution).dropRight(1).map(_.drop(1))
410+
val mac = Hmac256(generateKey("um", sharedSecret))
411+
val holdTimes = uint32.encode(holdTime.toMillis).require.bytes ++ previousAttribution.take((maxNumHops - 1) * holdTimeLength)
412+
val hmacs = computeHmacs(mac, failurePacket, holdTimes, previousHmacs, 0) +: previousHmacs
413+
cipher(holdTimes ++ ByteVector.concat(hmacs.map(ByteVector.concat(_))), sharedSecret)
414+
}
415+
416+
/**
417+
* Unwrap one hop of attribution data
418+
* @return a pair with the hold time for this hop and the attribution data for the next hop, or None if the attribution data was invalid
419+
*/
420+
def unwrap(encrypted: ByteVector, failurePacket: ByteVector, sharedSecret: ByteVector32, minNumHop: Int): Option[(FiniteDuration, ByteVector)] = {
421+
val bytes = cipher(encrypted, sharedSecret)
422+
val holdTime = uint32.decode(bytes.take(holdTimeLength).bits).require.value.milliseconds
423+
val hmacs = getHmacs(bytes)
424+
val mac = Hmac256(generateKey("um", sharedSecret))
425+
if (computeHmacs(mac, failurePacket, bytes.take(maxNumHops * holdTimeLength), hmacs.drop(1), minNumHop) == hmacs.head.drop(minNumHop)) {
426+
val unwrapped = bytes.slice(holdTimeLength, maxNumHops * holdTimeLength) ++ ByteVector.low(holdTimeLength) ++ ByteVector.concat((hmacs.drop(1) :+ Seq()).map(s => ByteVector.low(hmacLength) ++ ByteVector.concat(s)))
427+
Some(holdTime, unwrapped)
428+
} else {
429+
None
430+
}
431+
}
432+
}
344433
}
345434

346435
/**

eclair-core/src/main/scala/fr/acinq/eclair/db/PaymentsDb.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ object FailureSummary {
250250
def apply(f: PaymentFailure): FailureSummary = f match {
251251
case LocalFailure(_, route, t) => FailureSummary(FailureType.LOCAL, t.getMessage, route.map(h => HopSummary(h)).toList, route.headOption.map(_.nodeId))
252252
case RemoteFailure(_, route, e) => FailureSummary(FailureType.REMOTE, e.failureMessage.message, route.map(h => HopSummary(h)).toList, Some(e.originNode))
253-
case UnreadableRemoteFailure(_, route, _) => FailureSummary(FailureType.UNREADABLE_REMOTE, "could not decrypt failure onion", route.map(h => HopSummary(h)).toList, None)
253+
case UnreadableRemoteFailure(_, route, _, _) => FailureSummary(FailureType.UNREADABLE_REMOTE, "could not decrypt failure onion", route.map(h => HopSummary(h)).toList, None)
254254
}
255255
}
256256

eclair-core/src/main/scala/fr/acinq/eclair/io/Peer.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ class Peer(val nodeParams: NodeParams,
339339
pending.proposed.find(_.htlc.id == msg.id) match {
340340
case Some(htlc) =>
341341
val failure = msg match {
342-
case msg: WillFailHtlc => FailureReason.EncryptedDownstreamFailure(msg.reason)
342+
case msg: WillFailHtlc => FailureReason.EncryptedDownstreamFailure(msg.reason, msg.attribution_opt)
343343
case msg: WillFailMalformedHtlc => FailureReason.LocalFailure(createBadOnionFailure(msg.onionHash, msg.failureCode))
344344
}
345345
htlc.createFailureCommands(Some(failure))(log).foreach { case (channelId, cmd) => PendingCommandsDb.safeSend(register, nodeParams.db.pendingCommands, channelId, cmd) }

0 commit comments

Comments
 (0)