Skip to content

Commit 1f22c5f

Browse files
ctillercopybara-github
authored andcommitted
[chaotic-good] Add fathom metrics to ztrace (grpc#39328)
Ensure any collected fathom metrics show up in the ztrace so we can use them for debugging. Closes grpc#39328 COPYBARA_INTEGRATE_REVIEW=grpc#39328 from ctiller:trace-fathomable ce593b2 PiperOrigin-RevId: 750304444
1 parent 4ea0370 commit 1f22c5f

File tree

3 files changed

+62
-12
lines changed

3 files changed

+62
-12
lines changed

src/core/ext/transport/chaotic_good/data_endpoints.cc

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ void InputQueue::Cancel(uint64_t payload_tag) {
283283

284284
auto Endpoint::WriteLoop(uint32_t id,
285285
RefCountedPtr<OutputBuffers> output_buffers,
286-
std::shared_ptr<PromiseEndpoint> endpoint) {
286+
std::shared_ptr<PromiseEndpoint> endpoint,
287+
std::shared_ptr<TcpZTraceCollector> ztrace_collector) {
287288
output_buffers->AddEndpoint(id);
288289
std::vector<size_t> requested_metrics;
289290
std::optional<size_t> data_rate_metric =
@@ -294,13 +295,13 @@ auto Endpoint::WriteLoop(uint32_t id,
294295
return Loop([id, endpoint = std::move(endpoint),
295296
output_buffers = std::move(output_buffers),
296297
requested_metrics = std::move(requested_metrics),
297-
data_rate_metric]() {
298+
data_rate_metric, ztrace_collector]() {
298299
return TrySeq(
299300
output_buffers->Next(id),
300301
[endpoint, id,
301302
requested_metrics = absl::Span<const size_t>(requested_metrics),
302-
data_rate_metric,
303-
output_buffers](data_endpoints_detail::NextWrite next_write) {
303+
data_rate_metric, output_buffers,
304+
ztrace_collector](data_endpoints_detail::NextWrite next_write) {
304305
GRPC_TRACE_LOG(chaotic_good, INFO)
305306
<< "CHAOTIC_GOOD: Write " << next_write.bytes.Length()
306307
<< "b to data endpoint #" << id;
@@ -310,9 +311,25 @@ auto Endpoint::WriteLoop(uint32_t id,
310311
write_args.set_metrics_sink(EventEngine::Endpoint::WriteEventSink(
311312
requested_metrics,
312313
{EventEngine::Endpoint::WriteEvent::kSendMsg},
313-
[data_rate_metric, id, output_buffers](
314-
EventEngine::Endpoint::WriteEvent event, absl::Time,
314+
[data_rate_metric, id, output_buffers, ztrace_collector,
315+
endpoint](
316+
EventEngine::Endpoint::WriteEvent event,
317+
absl::Time timestamp,
315318
std::vector<EventEngine::Endpoint::WriteMetric> metrics) {
319+
ztrace_collector->Append([event, timestamp, &metrics,
320+
endpoint = endpoint.get()]() {
321+
EndpointWriteMetricsTrace trace{timestamp, event, {}};
322+
trace.metrics.reserve(metrics.size());
323+
for (const auto [id, value] : metrics) {
324+
if (auto name =
325+
endpoint->GetEventEngineEndpoint()->GetMetricName(
326+
id);
327+
name.has_value()) {
328+
trace.metrics.push_back({*name, value});
329+
}
330+
}
331+
return trace;
332+
});
316333
if (event != EventEngine::Endpoint::WriteEvent::kSendMsg) {
317334
return;
318335
}
@@ -405,13 +422,14 @@ Endpoint::Endpoint(uint32_t id, RefCountedPtr<OutputBuffers> output_buffers,
405422
read_party->Spawn(
406423
"read",
407424
[id, input_queues = std::move(input_queues), endpoint,
408-
ztrace_collector = std::move(ztrace_collector)]() {
425+
ztrace_collector]() {
409426
return ReadLoop(id, input_queues, endpoint,
410427
ztrace_collector);
411428
},
412429
[](absl::Status) {});
413430
return Map(
414-
WriteLoop(id, std::move(output_buffers), std::move(endpoint)),
431+
WriteLoop(id, std::move(output_buffers), std::move(endpoint),
432+
std::move(ztrace_collector)),
415433
[read_party](auto x) { return x; });
416434
});
417435
},

src/core/ext/transport/chaotic_good/data_endpoints.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ class Endpoint final {
221221
private:
222222
static auto WriteLoop(uint32_t id,
223223
RefCountedPtr<OutputBuffers> output_buffers,
224-
std::shared_ptr<PromiseEndpoint> endpoint);
224+
std::shared_ptr<PromiseEndpoint> endpoint,
225+
std::shared_ptr<TcpZTraceCollector> ztrace_collector);
225226
static auto ReadLoop(uint32_t id, RefCountedPtr<InputQueue> input_queues,
226227
std::shared_ptr<PromiseEndpoint> endpoint,
227228
std::shared_ptr<TcpZTraceCollector> ztrace_collector);

src/core/ext/transport/chaotic_good/tcp_ztrace_collector.h

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,35 @@ struct WriteFrameHeaderTrace {
7676
}
7777
};
7878

79+
struct EndpointWriteMetricsTrace {
80+
absl::Time timestamp;
81+
grpc_event_engine::experimental::EventEngine::Endpoint::WriteEvent
82+
write_event;
83+
std::vector<std::pair<absl::string_view, size_t>> metrics;
84+
85+
size_t MemoryUsage() const {
86+
return sizeof(*this) + sizeof(metrics[0]) * metrics.size();
87+
}
88+
89+
void RenderJson(Json::Object& object) const {
90+
std::string name;
91+
switch (write_event) {
92+
case grpc_event_engine::experimental::EventEngine::Endpoint::WriteEvent::
93+
kSendMsg:
94+
name = "SEND_MSG_METRICS";
95+
break;
96+
default:
97+
name = absl::StrCat("ENDPOINT_WRITE_METRICS_TYPE_",
98+
static_cast<int>(write_event));
99+
}
100+
object["metadata_type"] = Json::FromString(name);
101+
object["fathom_timestamp"] = Json::FromString(absl::StrCat(timestamp));
102+
for (const auto& [name, value] : metrics) {
103+
object.emplace(name, Json::FromNumber(value));
104+
}
105+
}
106+
};
107+
79108
struct WriteLargeFrameHeaderTrace {
80109
TcpDataFrameHeader data_header;
81110
std::vector<double> lb_decisions;
@@ -96,9 +125,11 @@ struct WriteLargeFrameHeaderTrace {
96125
}
97126
};
98127

99-
using TcpZTraceCollector = channelz::ZTraceCollector<
100-
tcp_ztrace_collector_detail::Config, ReadFrameHeaderTrace,
101-
ReadDataHeaderTrace, WriteFrameHeaderTrace, WriteLargeFrameHeaderTrace>;
128+
using TcpZTraceCollector =
129+
channelz::ZTraceCollector<tcp_ztrace_collector_detail::Config,
130+
ReadFrameHeaderTrace, ReadDataHeaderTrace,
131+
WriteFrameHeaderTrace, WriteLargeFrameHeaderTrace,
132+
EndpointWriteMetricsTrace>;
102133

103134
} // namespace grpc_core::chaotic_good
104135

0 commit comments

Comments
 (0)