Skip to content

Commit 6d5da6b

Browse files
Phase 3: Transaction tracing — protobuf context, PeerImp, NetworkOPs
- Add TraceContext protobuf message with trace_id, span_id, trace_flags - Add optional trace_context field (1001) to TMTransaction, TMProposeSet, TMValidation for cross-node trace propagation - Create TraceContextPropagator.h with extractFromProtobuf/injectToProtobuf utilities for serializing OTel context to/from P2P messages - Instrument PeerImp::handleTransaction with tx.receive span, tx hash, peer ID, and HashRouter suppression tracking attributes - Instrument NetworkOPsImp::processTransaction with tx.process span, tx hash, local flag, and sync/async path attributes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 42e4054 commit 6d5da6b

File tree

4 files changed

+131
-0
lines changed

4 files changed

+131
-0
lines changed

include/xrpl/proto/xrpl.proto

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,15 @@ message TMPublicKey {
8585
// If you want to send an amount that is greater than any single address of yours
8686
// you must first combine coins from one address to another.
8787

88+
// Trace context for OpenTelemetry distributed tracing across nodes.
89+
// Uses W3C Trace Context format internally.
90+
message TraceContext {
91+
optional bytes trace_id = 1; // 16-byte trace identifier
92+
optional bytes span_id = 2; // 8-byte parent span identifier
93+
optional uint32 trace_flags = 3; // bit 0 = sampled
94+
optional string trace_state = 4; // W3C tracestate header value
95+
}
96+
8897
enum TransactionStatus {
8998
tsNEW = 1; // origin node did/could not validate
9099
tsCURRENT = 2; // scheduled to go in this ledger
@@ -101,6 +110,9 @@ message TMTransaction {
101110
required TransactionStatus status = 2;
102111
optional uint64 receiveTimestamp = 3;
103112
optional bool deferred = 4; // not applied to open ledger
113+
114+
// Optional trace context for OpenTelemetry distributed tracing
115+
optional TraceContext trace_context = 1001;
104116
}
105117

106118
message TMTransactions {
@@ -149,6 +161,9 @@ message TMProposeSet {
149161

150162
// Number of hops traveled
151163
optional uint32 hops = 12 [deprecated = true];
164+
165+
// Optional trace context for OpenTelemetry distributed tracing
166+
optional TraceContext trace_context = 1001;
152167
}
153168

154169
enum TxSetStatus {
@@ -194,6 +209,9 @@ message TMValidation {
194209

195210
// Number of hops traveled
196211
optional uint32 hops = 3 [deprecated = true];
212+
213+
// Optional trace context for OpenTelemetry distributed tracing
214+
optional TraceContext trace_context = 1001;
197215
}
198216

199217
// An array of Endpoint messages
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#pragma once
2+
3+
/** Utilities for trace context propagation across nodes and HTTP headers.
4+
5+
Provides serialization/deserialization of OTel trace context to/from:
6+
- Protocol Buffer TraceContext messages (P2P cross-node propagation)
7+
- HTTP headers (W3C Trace Context for RPC)
8+
9+
Only compiled when XRPL_ENABLE_TELEMETRY is defined.
10+
*/
11+
12+
#ifdef XRPL_ENABLE_TELEMETRY
13+
14+
#include <opentelemetry/context/context.h>
15+
#include <opentelemetry/trace/default_span.h>
16+
#include <opentelemetry/trace/span_context.h>
17+
#include <opentelemetry/trace/trace_flags.h>
18+
#include <opentelemetry/trace/trace_id.h>
19+
20+
#include <xrpl/proto/xrpl.pb.h>
21+
22+
#include <cstdint>
23+
24+
namespace xrpl {
25+
namespace telemetry {
26+
27+
/** Extract OTel context from a protobuf TraceContext message.
28+
29+
@param proto The protobuf TraceContext received from a peer.
30+
@return An OTel Context with the extracted parent span, or an empty
31+
context if the protobuf fields are missing or invalid.
32+
*/
33+
inline opentelemetry::context::Context
34+
extractFromProtobuf(protocol::TraceContext const& proto)
35+
{
36+
namespace trace = opentelemetry::trace;
37+
38+
if (!proto.has_trace_id() || proto.trace_id().size() != 16 ||
39+
!proto.has_span_id() || proto.span_id().size() != 8)
40+
{
41+
return opentelemetry::context::Context{};
42+
}
43+
44+
trace::TraceId traceId(
45+
reinterpret_cast<std::uint8_t const*>(proto.trace_id().data()));
46+
trace::SpanId spanId(
47+
reinterpret_cast<std::uint8_t const*>(proto.span_id().data()));
48+
trace::TraceFlags flags(
49+
proto.has_trace_flags() ? static_cast<std::uint8_t>(proto.trace_flags())
50+
: trace::TraceFlags::kIsSampled);
51+
52+
trace::SpanContext spanCtx(traceId, spanId, flags, /* remote = */ true);
53+
54+
return opentelemetry::context::Context{}.SetValue(
55+
trace::kSpanKey,
56+
opentelemetry::nostd::shared_ptr<trace::Span>(
57+
new trace::DefaultSpan(spanCtx)));
58+
}
59+
60+
/** Inject the current span's trace context into a protobuf TraceContext.
61+
62+
@param ctx The OTel context containing the span to propagate.
63+
@param proto The protobuf TraceContext to populate.
64+
*/
65+
inline void
66+
injectToProtobuf(
67+
opentelemetry::context::Context const& ctx,
68+
protocol::TraceContext& proto)
69+
{
70+
namespace trace = opentelemetry::trace;
71+
72+
auto span = trace::GetSpan(ctx);
73+
if (!span)
74+
return;
75+
76+
auto const& spanCtx = span->GetContext();
77+
if (!spanCtx.IsValid())
78+
return;
79+
80+
// Serialize trace_id (16 bytes)
81+
auto const& traceId = spanCtx.trace_id();
82+
proto.set_trace_id(traceId.Id().data(), trace::TraceId::kSize);
83+
84+
// Serialize span_id (8 bytes)
85+
auto const& spanId = spanCtx.span_id();
86+
proto.set_span_id(spanId.Id().data(), trace::SpanId::kSize);
87+
88+
// Serialize flags
89+
proto.set_trace_flags(spanCtx.trace_flags().flags());
90+
}
91+
92+
} // namespace telemetry
93+
} // namespace xrpl
94+
95+
#endif // XRPL_ENABLE_TELEMETRY

src/xrpld/app/misc/NetworkOPs.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <xrpld/rpc/DeliveredAmount.h>
3030
#include <xrpld/rpc/MPTokenIssuanceID.h>
3131
#include <xrpld/rpc/ServerHandler.h>
32+
#include <xrpld/telemetry/TracingInstrumentation.h>
3233

3334
#include <xrpl/basics/UptimeClock.h>
3435
#include <xrpl/basics/mulDiv.h>
@@ -1223,16 +1224,26 @@ NetworkOPsImp::processTransaction(
12231224
bool bLocal,
12241225
FailHard failType)
12251226
{
1227+
XRPL_TRACE_TX(registry_.getTelemetry(), "tx.process");
1228+
XRPL_TRACE_SET_ATTR("xrpl.tx.hash", to_string(transaction->getID()).c_str());
1229+
XRPL_TRACE_SET_ATTR("xrpl.tx.local", bLocal);
1230+
12261231
auto ev = m_job_queue.makeLoadEvent(jtTXN_PROC, "ProcessTXN");
12271232

12281233
// preProcessTransaction can change our pointer
12291234
if (!preProcessTransaction(transaction))
12301235
return;
12311236

12321237
if (bLocal)
1238+
{
1239+
XRPL_TRACE_SET_ATTR("xrpl.tx.path", "sync");
12331240
doTransactionSync(transaction, bUnlimited, failType);
1241+
}
12341242
else
1243+
{
1244+
XRPL_TRACE_SET_ATTR("xrpl.tx.path", "async");
12351245
doTransactionAsync(transaction, bUnlimited, failType);
1246+
}
12361247
}
12371248

12381249
void

src/xrpld/overlay/detail/PeerImp.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <xrpld/overlay/Cluster.h>
99
#include <xrpld/overlay/detail/PeerImp.h>
1010
#include <xrpld/overlay/detail/Tuning.h>
11+
#include <xrpld/telemetry/TracingInstrumentation.h>
1112

1213
#include <xrpl/basics/UptimeClock.h>
1314
#include <xrpl/basics/base64.h>
@@ -1269,6 +1270,9 @@ PeerImp::handleTransaction(
12691270
bool eraseTxQueue,
12701271
bool batch)
12711272
{
1273+
XRPL_TRACE_TX(app_.getTelemetry(), "tx.receive");
1274+
XRPL_TRACE_SET_ATTR("xrpl.peer.id", static_cast<int64_t>(id_));
1275+
12721276
XRPL_ASSERT(eraseTxQueue != batch, ("xrpl::PeerImp::handleTransaction : valid inputs"));
12731277
if (tracking_.load() == Tracking::diverged)
12741278
return;
@@ -1287,6 +1291,7 @@ PeerImp::handleTransaction(
12871291
{
12881292
auto stx = std::make_shared<STTx const>(sit);
12891293
uint256 txID = stx->getTransactionID();
1294+
XRPL_TRACE_SET_ATTR("xrpl.tx.hash", to_string(txID).c_str());
12901295

12911296
// Charge strongly for attempting to relay a txn with tfInnerBatchTxn
12921297
// LCOV_EXCL_START
@@ -1320,9 +1325,11 @@ PeerImp::handleTransaction(
13201325

13211326
if (!app_.getHashRouter().shouldProcess(txID, id_, flags, tx_interval))
13221327
{
1328+
XRPL_TRACE_SET_ATTR("xrpl.tx.suppressed", true);
13231329
// we have seen this transaction recently
13241330
if (any(flags & HashRouterFlags::BAD))
13251331
{
1332+
XRPL_TRACE_SET_ATTR("xrpl.tx.status", "known_bad");
13261333
fee_.update(Resource::feeUselessData, "known bad");
13271334
JLOG(p_journal_.debug()) << "Ignoring known bad tx " << txID;
13281335
}

0 commit comments

Comments
 (0)