Skip to content

Commit 964106a

Browse files
committed
Draft: redo tracing UI and add span close events
1 parent 5e984b9 commit 964106a

8 files changed

Lines changed: 314 additions & 250 deletions

File tree

dial9-tokio-telemetry/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ harness = false
129129
name = "threadlocal_encode"
130130
harness = false
131131

132+
[[example]]
133+
name = "tracing_sleep"
134+
required-features = ["tracing-layer"]
135+
132136
[[bench]]
133137
name = "tracing_layer_bench"
134138
harness = false

dial9-tokio-telemetry/src/tracing_layer.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
//! for production use with appropriate span filtering.
5858
5959
use crate::telemetry::{TelemetryHandle, clock_monotonic_ns, current_worker_id};
60+
use dial9_trace_format::TraceEvent;
6061
use dial9_trace_format::encoder::Schema;
6162
use dial9_trace_format::schema::FieldDef;
6263
use dial9_trace_format::types::{FieldType, FieldValue};
@@ -67,6 +68,13 @@ use tracing::callsite::Identifier;
6768
use tracing::span;
6869
use tracing_subscriber::{Layer, layer::Context, registry::LookupSpan};
6970

71+
#[derive(TraceEvent)]
72+
struct SpanCloseEvent {
73+
#[traceevent(timestamp)]
74+
timestamp_ns: u64,
75+
span_id: u64,
76+
}
77+
7078
// ── Per-callsite schema cache ───────────────────────────────────────────────
7179

7280
/// Cached schemas for a single callsite (one for enter, one for exit).
@@ -367,4 +375,15 @@ where
367375
enc.write_event(&schemas.exit, &values);
368376
});
369377
}
378+
379+
fn on_close(&self, id: span::Id, _ctx: Context<'_, S>) {
380+
let Some(handle) = TelemetryHandle::try_current() else {
381+
return;
382+
};
383+
384+
handle.record_encodable_event(&SpanCloseEvent {
385+
timestamp_ns: clock_monotonic_ns(),
386+
span_id: id.into_u64(),
387+
});
388+
}
370389
}

dial9-tokio-telemetry/tests/tracing_layer.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use tracing_subscriber::prelude::*;
1414
struct SpanEvents {
1515
enter_count: u32,
1616
exit_count: u32,
17+
close_count: u32,
1718
enter_names: Vec<String>,
1819
/// All (field_key, field_value) pairs seen on enter events.
1920
enter_fields: Vec<(String, String)>,
@@ -34,6 +35,7 @@ fn decode_span_events(path: &std::path::Path) -> SpanEvents {
3435
let mut result = SpanEvents {
3536
enter_count: 0,
3637
exit_count: 0,
38+
close_count: 0,
3739
enter_names: Vec::new(),
3840
enter_fields: Vec::new(),
3941
exit_fields: Vec::new(),
@@ -88,6 +90,8 @@ fn decode_span_events(path: &std::path::Path) -> SpanEvents {
8890
.push((field_def.name.clone(), v.to_owned()));
8991
}
9092
}
93+
} else if ev.name == "SpanCloseEvent" {
94+
result.close_count += 1;
9195
}
9296
})
9397
.unwrap();
@@ -185,6 +189,12 @@ fn span_events_appear_in_trace() {
185189
"enter/exit count mismatch"
186190
);
187191

192+
// Close events: one per unique span instance
193+
assert!(
194+
events.close_count > 0,
195+
"expected at least 1 SpanCloseEvent, got 0"
196+
);
197+
188198
// Span names
189199
assert!(
190200
events.enter_names.contains(&"handle_request".to_string()),

dial9-viewer/skills/analyze.js

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,12 @@ function accumulateTrace(acc, trace) {
198198

199199
// Span durations (native HDR histogram for bounded memory, exact percentiles)
200200
if (trace.customEvents && trace.customEvents.length > 0) {
201-
const { spansByWorker } = buildSpanData(trace.customEvents);
202-
for (const spans of Object.values(spansByWorker)) {
203-
for (const s of spans) {
204-
const dur = Math.max(1, Math.round(s.end - s.start));
205-
let h = acc.spanStats.get(s.spanName);
206-
if (!h) { h = createHistogram(); acc.spanStats.set(s.spanName, h); }
207-
h.record(dur);
208-
}
201+
const { allSpans } = buildSpanData(trace.customEvents);
202+
for (const s of allSpans) {
203+
const dur = Math.max(1, Math.round(s.end - s.start));
204+
let h = acc.spanStats.get(s.spanName);
205+
if (!h) { h = createHistogram(); acc.spanStats.set(s.spanName, h); }
206+
h.record(dur);
209207
}
210208
}
211209
}
@@ -767,8 +765,8 @@ function analyzeWorkerMain(cachePath) {
767765
partial.schedDelayWorst.sort((a, b) => b.delay - a.delay);
768766
partial.schedDelayWorst.length = Math.min(partial.schedDelayWorst.length, 100);
769767
if (trace.customEvents && trace.customEvents.length > 0) {
770-
const { spansByWorker } = buildSpanData(trace.customEvents);
771-
for (const ss of Object.values(spansByWorker)) for (const s of ss)
768+
const { allSpans } = buildSpanData(trace.customEvents);
769+
for (const s of allSpans)
772770
(partial.spanDurations[s.spanName] || (partial.spanDurations[s.spanName] = [])).push(Math.max(1, Math.round(s.end - s.start)));
773771
}
774772
process.stdout.write(JSON.stringify(partial) + '\n');

dial9-viewer/ui/demo-trace.bin

1.36 MB
Binary file not shown.

dial9-viewer/ui/test_trace_analysis.js

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -373,20 +373,18 @@ async function main() {
373373
{ name: "SpanExitEvent", timestamp: 1200, fields: { worker_id: 0, span_id: 2, span_name: "redis_get", fields: { key: "foo" } } },
374374
{ name: "SpanExitEvent", timestamp: 1300, fields: { worker_id: 0, span_id: 1, span_name: "handle_request", fields: { user_id: "42" } } },
375375
];
376-
const { spansByWorker, spanMeta } = buildSpanData(customEvents);
377-
const w0 = spansByWorker[0] || [];
378-
if (w0.length !== 2) fail(`Expected 2 span intervals on worker 0, got ${w0.length}`);
379-
if (w0[0].spanName !== "handle_request" && w0[1].spanName !== "handle_request")
380-
fail("Missing handle_request span");
381-
if (w0[0].spanName !== "redis_get" && w0[1].spanName !== "redis_get")
382-
fail("Missing redis_get span");
383-
// Verify sorted by start time
384-
if (w0[0].start > w0[1].start) fail("Spans not sorted by start time");
385-
// Verify enter/exit pairing
386-
const redis = w0.find(s => s.spanName === "redis_get");
376+
const { allSpans, spanMeta } = buildSpanData(customEvents);
377+
if (allSpans.length !== 2) fail(`Expected 2 spans, got ${allSpans.length}`);
378+
const redis = allSpans.find(s => s.spanName === "redis_get");
379+
const handle = allSpans.find(s => s.spanName === "handle_request");
380+
if (!redis || !handle) fail("Missing expected spans");
387381
if (redis.start !== 1100 || redis.end !== 1200) fail("redis_get timing wrong");
382+
if (redis.segments.length !== 1) fail(`Expected 1 segment, got ${redis.segments.length}`);
383+
if (redis.segments[0].workerId !== 0) fail("segment workerId wrong");
388384
if (!spanMeta.has(1) || !spanMeta.has(2)) fail("spanMeta missing entries");
389-
pass(`${w0.length} span intervals paired correctly`);
385+
// Verify sorted by start time
386+
if (allSpans[0].start > allSpans[1].start) fail("Spans not sorted by start time");
387+
pass(`${allSpans.length} spans paired correctly`);
390388
}
391389

392390
function testBuildSpanDataParent() {
@@ -396,17 +394,17 @@ async function main() {
396394
{ name: "SpanExitEvent", timestamp: 1200, fields: { worker_id: 0, span_id: 20, span_name: "child", fields: {} } },
397395
{ name: "SpanExitEvent", timestamp: 1300, fields: { worker_id: 0, span_id: 10, span_name: "root", fields: {} } },
398396
];
399-
const { spansByWorker } = buildSpanData(customEvents);
400-
const child = spansByWorker[0].find(s => s.spanName === "child");
397+
const { allSpans } = buildSpanData(customEvents);
398+
const child = allSpans.find(s => s.spanName === "child");
401399
if (child.parentSpanId !== 10) fail(`Expected parentSpanId=10, got ${child.parentSpanId}`);
402-
const root = spansByWorker[0].find(s => s.spanName === "root");
400+
const root = allSpans.find(s => s.spanName === "root");
403401
if (root.parentSpanId !== null) fail(`Expected root parentSpanId=null, got ${root.parentSpanId}`);
404402
pass("Parent span IDs preserved correctly");
405403
}
406404

407405
function testBuildSpanDataEmpty() {
408-
const { spansByWorker, spanMeta } = buildSpanData([]);
409-
if (Object.keys(spansByWorker).length !== 0) fail("Expected empty spansByWorker");
406+
const { allSpans, spanMeta } = buildSpanData([]);
407+
if (allSpans.length !== 0) fail("Expected empty allSpans");
410408
if (spanMeta.size !== 0) fail("Expected empty spanMeta");
411409
pass("Empty input produces empty output");
412410
}
@@ -421,11 +419,10 @@ async function main() {
421419
{ name: "SpanExitEvent", timestamp: 1400, fields: { worker_id: 0, span_id: 2, span_name: "mid", fields: {} } },
422420
{ name: "SpanExitEvent", timestamp: 1500, fields: { worker_id: 0, span_id: 1, span_name: "root", fields: {} } },
423421
];
424-
const { spansByWorker, maxDepth } = buildSpanData(customEvents);
425-
const spans = spansByWorker[0];
426-
const root = spans.find(s => s.spanName === "root");
427-
const mid = spans.find(s => s.spanName === "mid");
428-
const leaf = spans.find(s => s.spanName === "leaf");
422+
const { allSpans, maxDepth } = buildSpanData(customEvents);
423+
const root = allSpans.find(s => s.spanName === "root");
424+
const mid = allSpans.find(s => s.spanName === "mid");
425+
const leaf = allSpans.find(s => s.spanName === "leaf");
429426
if (root.depth !== 0) fail(`root depth=${root.depth}, expected 0`);
430427
if (mid.depth !== 1) fail(`mid depth=${mid.depth}, expected 1`);
431428
if (leaf.depth !== 2) fail(`leaf depth=${leaf.depth}, expected 2`);
@@ -441,31 +438,31 @@ async function main() {
441438
{ name: "SpanExitEvent", timestamp: 1200, fields: { worker_id: 0, span_id: 2, span_name: "b", fields: {} } },
442439
{ name: "SpanExitEvent", timestamp: 1300, fields: { worker_id: 0, span_id: 1, span_name: "a", fields: {} } },
443440
];
444-
const { spansByWorker } = buildSpanData(customEvents);
445-
if (!spansByWorker[0] || spansByWorker[0].length !== 2) fail("Expected 2 spans");
441+
const { allSpans } = buildSpanData(customEvents);
442+
if (allSpans.length !== 2) fail("Expected 2 spans");
446443
// Just verify it didn't crash; depths may be arbitrary due to cycle
447444
pass("Cyclic parent chain does not stack overflow");
448445
}
449446

450447
function testBuildSpanDataRecycledId() {
451-
// Span ID 1 used first as "alpha", then recycled as "beta"
448+
// Span ID 1 used first as "alpha", closed, then recycled as "beta"
452449
const customEvents = [
453450
{ name: "SpanEnterEvent", timestamp: 1000, fields: { worker_id: 0, span_id: 1, parent_span_id: null, span_name: "alpha", fields: {} } },
454451
{ name: "SpanExitEvent", timestamp: 1100, fields: { worker_id: 0, span_id: 1, span_name: "alpha", fields: {} } },
452+
{ name: "SpanCloseEvent", timestamp: 1150, fields: { span_id: 1 } },
455453
// Same span_id reused with different name
456454
{ name: "SpanEnterEvent", timestamp: 2000, fields: { worker_id: 0, span_id: 1, parent_span_id: null, span_name: "beta", fields: {} } },
457455
{ name: "SpanExitEvent", timestamp: 2100, fields: { worker_id: 0, span_id: 1, span_name: "beta", fields: {} } },
456+
{ name: "SpanCloseEvent", timestamp: 2150, fields: { span_id: 1 } },
458457
// Child of the recycled span
459458
{ name: "SpanEnterEvent", timestamp: 3000, fields: { worker_id: 0, span_id: 2, parent_span_id: 1, span_name: "child", fields: {} } },
460459
{ name: "SpanExitEvent", timestamp: 3100, fields: { worker_id: 0, span_id: 2, span_name: "child", fields: {} } },
461460
];
462-
const { spansByWorker } = buildSpanData(customEvents);
463-
const spans = spansByWorker[0];
464-
if (spans.length !== 3) fail(`Expected 3 spans, got ${spans.length}`);
465-
const alpha = spans.find(s => s.spanName === "alpha");
466-
const beta = spans.find(s => s.spanName === "beta");
461+
const { allSpans } = buildSpanData(customEvents);
462+
if (allSpans.length !== 3) fail(`Expected 3 spans, got ${allSpans.length}`);
463+
const alpha = allSpans.find(s => s.spanName === "alpha");
464+
const beta = allSpans.find(s => s.spanName === "beta");
467465
if (!alpha || !beta) fail("Missing alpha or beta span");
468-
// Both should exist as separate intervals despite same span_id
469466
if (alpha.start !== 1000 || beta.start !== 2000) fail("Span intervals not distinct");
470467
pass("Recycled span IDs produce separate intervals");
471468
}
@@ -477,14 +474,13 @@ async function main() {
477474
{ name: "SpanEnter:myapp::handle:src/main.rs:10", timestamp: 1000, fields: { worker_id: 0, span_id: 1, parent_span_id: null, span_name: "handle", request_id: "abc-123" } },
478475
{ name: "SpanExit:myapp::handle:src/main.rs:10", timestamp: 1100, fields: { worker_id: 0, span_id: 1, span_name: "handle", request_id: "abc-123" } },
479476
];
480-
const { spansByWorker } = buildSpanData(customEvents);
481-
const spans = spansByWorker[0];
482-
if (!spans || spans.length !== 1) fail(`Expected 1 span, got ${spans?.length}`);
483-
if (spans[0].spanName !== "handle") fail(`Expected span name 'handle', got '${spans[0].spanName}'`);
484-
if (spans[0].fields.request_id !== "abc-123") fail(`Expected request_id='abc-123', got '${spans[0].fields.request_id}'`);
477+
const { allSpans } = buildSpanData(customEvents);
478+
if (!allSpans || allSpans.length !== 1) fail(`Expected 1 span, got ${allSpans?.length}`);
479+
if (allSpans[0].spanName !== "handle") fail(`Expected span name 'handle', got '${allSpans[0].spanName}'`);
480+
if (allSpans[0].fields.request_id !== "abc-123") fail(`Expected request_id='abc-123', got '${allSpans[0].fields.request_id}'`);
485481
// Base fields should NOT appear in the user fields
486-
if (spans[0].fields.worker_id) fail("worker_id should not be in user fields");
487-
if (spans[0].fields.span_name) fail("span_name should not be in user fields");
482+
if (allSpans[0].fields.worker_id) fail("worker_id should not be in user fields");
483+
if (allSpans[0].fields.span_name) fail("span_name should not be in user fields");
488484
pass("Per-callsite schema with typed fields parsed correctly");
489485
}
490486

@@ -495,15 +491,39 @@ async function main() {
495491
// This enter has no matching exit (trace ended mid-span)
496492
{ name: "SpanEnter:app::b:f:2", timestamp: 1200, fields: { worker_id: 0, span_id: 2, parent_span_id: null, span_name: "b" } },
497493
];
498-
const { spansByWorker, unmatchedSpans } = buildSpanData(customEvents);
499-
const matched = spansByWorker[0] || [];
500-
if (matched.length !== 1) fail(`Expected 1 matched span, got ${matched.length}`);
494+
const { allSpans, unmatchedSpans } = buildSpanData(customEvents);
495+
if (allSpans.length !== 1) fail(`Expected 1 matched span, got ${allSpans.length}`);
501496
if (!unmatchedSpans || unmatchedSpans.length !== 1) fail(`Expected 1 unmatched span, got ${unmatchedSpans?.length}`);
502497
if (unmatchedSpans[0].spanName !== "b") fail(`Expected unmatched span 'b', got '${unmatchedSpans[0].spanName}'`);
503498
if (unmatchedSpans[0].spanId !== 2) fail(`Expected unmatched spanId 2, got ${unmatchedSpans[0].spanId}`);
504499
pass("Unmatched spans (enter without exit) detected correctly");
505500
}
506501

502+
function testBuildSpanDataMultiplePolls() {
503+
// A span entered/exited multiple times (async future polled 3 times with sleep gap)
504+
const customEvents = [
505+
{ name: "SpanEnter:app::f:f:1", timestamp: 1000, fields: { worker_id: 0, span_id: 1, parent_span_id: null, span_name: "my_fn" } },
506+
{ name: "SpanExit:app::f:f:1", timestamp: 1500, fields: { worker_id: 0, span_id: 1, span_name: "my_fn" } },
507+
{ name: "SpanEnter:app::f:f:1", timestamp: 100000, fields: { worker_id: 1, span_id: 1, parent_span_id: null, span_name: "my_fn" } },
508+
{ name: "SpanExit:app::f:f:1", timestamp: 100200, fields: { worker_id: 1, span_id: 1, span_name: "my_fn" } },
509+
{ name: "SpanEnter:app::f:f:1", timestamp: 100300, fields: { worker_id: 0, span_id: 1, parent_span_id: null, span_name: "my_fn" } },
510+
{ name: "SpanExit:app::f:f:1", timestamp: 100400, fields: { worker_id: 0, span_id: 1, span_name: "my_fn" } },
511+
{ name: "SpanCloseEvent", timestamp: 100500, fields: { span_id: 1 } },
512+
];
513+
const { allSpans } = buildSpanData(customEvents);
514+
if (allSpans.length !== 1) fail(`Expected 1 span, got ${allSpans.length}`);
515+
const s = allSpans[0];
516+
if (s.segments.length !== 3) fail(`Expected 3 segments, got ${s.segments.length}`);
517+
if (s.start !== 1000) fail(`Expected start=1000, got ${s.start}`);
518+
if (s.end !== 100400) fail(`Expected end=100400, got ${s.end}`);
519+
// activeNs = 500 + 200 + 100 = 800
520+
if (s.activeNs !== 800) fail(`Expected activeNs=800, got ${s.activeNs}`);
521+
// Workers: polled on both 0 and 1
522+
const workers = [...new Set(s.segments.map(seg => seg.workerId))].sort();
523+
if (workers.length !== 2 || workers[0] !== 0 || workers[1] !== 1) fail(`Expected workers [0,1], got ${workers}`);
524+
pass("Multiple polls grouped into single span with segments");
525+
}
526+
507527
// ── Regression: open PollStart at trace end must not create phantom poll (#194) ──
508528

509529
function testOpenPollStartDiscarded() {
@@ -576,6 +596,7 @@ async function main() {
576596
testBuildSpanDataRecycledId();
577597
testBuildSpanDataPerCallsiteSchema();
578598
testBuildSpanDataUnmatched();
599+
testBuildSpanDataMultiplePolls();
579600

580601
console.log("\n✓ All analysis checks passed!");
581602
}

0 commit comments

Comments
 (0)