From 5cc1eeecf00594cb8263b482af0e26041b749546 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Fri, 8 May 2026 17:35:18 +0000 Subject: [PATCH 1/3] fix: remove taskdump from metrics-service non-Linux dependencies tokio's `taskdump` feature only compiles on Linux (aarch64/x86/x86_64). The unconditional [dependencies] entry was causing macOS CI builds to fail. Move it to the linux-only target section where it already existed. --- examples/metrics-service/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/metrics-service/Cargo.toml b/examples/metrics-service/Cargo.toml index 463d199a..be1bd7b2 100644 --- a/examples/metrics-service/Cargo.toml +++ b/examples/metrics-service/Cargo.toml @@ -20,7 +20,7 @@ aws-config = "1" aws-sdk-dynamodb = { version = "1", default-features = false, features = ["behavior-version-latest", "default-https-client", "rt-tokio"] } serde = { version = "1", features = ["derive"] } serde_json = "1" -dial9-tokio-telemetry = { path = "../../dial9-tokio-telemetry", features = ["worker-s3", "tracing-layer", "taskdump"] } +dial9-tokio-telemetry = { path = "../../dial9-tokio-telemetry", features = ["worker-s3", "tracing-layer"] } reqwest = { version = "0.12", default-features = false, features = ["json"] } clap = { version = "4", features = ["derive"] } uuid = { version = "1", features = ["v4"] } From a24b14117525028c84f988f66fc4e32f9b967699 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Fri, 8 May 2026 18:03:38 +0000 Subject: [PATCH 2/3] add: TaskDumps UI to viewer --- .../skills/dial9-toolkit/scripts/analyze.js | 3 +- dial9-viewer/ui/flamegraph.js | 12 +- dial9-viewer/ui/trace_analysis.js | 34 +-- dial9-viewer/ui/trace_parser.js | 18 ++ dial9-viewer/ui/viewer.html | 195 +++++++++++++++++- 5 files changed, 236 insertions(+), 26 deletions(-) diff --git a/dial9-viewer/skills/dial9-toolkit/scripts/analyze.js b/dial9-viewer/skills/dial9-toolkit/scripts/analyze.js index 6605ab74..69ce90dc 100644 --- a/dial9-viewer/skills/dial9-toolkit/scripts/analyze.js +++ b/dial9-viewer/skills/dial9-toolkit/scripts/analyze.js @@ -677,6 +677,7 @@ async function parseWorkerMain(traceFile, cachePath) { callframeSymbols: mapToEntries(trace.callframeSymbols), threadNames: mapToEntries(trace.threadNames), runtimeWorkers: mapToEntries(trace.runtimeWorkers), + taskDumps: mapToEntries(trace.taskDumps), clockSyncAnchors: trace.clockSyncAnchors, clockOffsetNs: trace.clockOffsetNs, }}); for (const e of trace.events) writeLine({ t: 'e', d: e }); @@ -703,7 +704,7 @@ function loadCacheFile(cachePath) { const rec = JSON.parse(line); switch (rec.t) { case 'm': raw = rec.d; - for (const k of ['spawnLocations','taskSpawnLocs','taskSpawnTimes','taskTerminateTimes','callframeSymbols','threadNames','runtimeWorkers']) + for (const k of ['spawnLocations','taskSpawnLocs','taskSpawnTimes','taskTerminateTimes','callframeSymbols','threadNames','runtimeWorkers','taskDumps']) if (raw[k]) raw[k] = new Map(raw[k]); break; case 'e': events.push(rec.d); break; diff --git a/dial9-viewer/ui/flamegraph.js b/dial9-viewer/ui/flamegraph.js index 7a35ad1d..c75ac237 100644 --- a/dial9-viewer/ui/flamegraph.js +++ b/dial9-viewer/ui/flamegraph.js @@ -93,7 +93,7 @@ let selfCount = 0; let frameCount = 0; function walk(node) { - if (node.name.toLowerCase().includes(queryLower)) { + if (node.name.toLowerCase().includes(queryLower) || (node.fullName && node.fullName.toLowerCase().includes(queryLower))) { selfCount += node.self; frameCount++; } @@ -244,7 +244,7 @@ if (w < 0.5) continue; const isAncestor = !!node.isAncestor; - const searchMatch = !searching || node.name.toLowerCase().includes(qLower); + const searchMatch = !searching || node.name.toLowerCase().includes(qLower) || (node.treeNode && node.treeNode.fullName && node.treeNode.fullName.toLowerCase().includes(qLower)); const highlighted = highlightName != null && node.name === highlightName; const dimmed = (searching && !searchMatch) || (highlightName != null && !highlighted); let alpha = 1.0; @@ -498,12 +498,10 @@ tooltip.innerHTML = buildTooltipHtml(hit, pinned); tooltip.style.pointerEvents = pinned ? "auto" : "none"; tooltip.style.display = "block"; - // Clamp to viewport + // Position at top of the flamegraph container so it never covers hovered frames + const containerRect = container.getBoundingClientRect(); const tipX = Math.min(x + 12, window.innerWidth - tooltip.offsetWidth - 8); - let tipY = Math.max(8, y - 50); - if (tipY + tooltip.offsetHeight > window.innerHeight - 8) { - tipY = window.innerHeight - tooltip.offsetHeight - 8; - } + const tipY = Math.max(8, containerRect.top); tooltip.style.left = tipX + "px"; tooltip.style.top = tipY + "px"; if (pinned) { diff --git a/dial9-viewer/ui/trace_analysis.js b/dial9-viewer/ui/trace_analysis.js index 63e053fc..111b49d5 100644 --- a/dial9-viewer/ui/trace_analysis.js +++ b/dial9-viewer/ui/trace_analysis.js @@ -431,22 +431,26 @@ node.count++; for (const addr of chain) { const entry = callframeSymbols.get(addr); - const resolved = Array.isArray(entry) ? entry[0] : entry; - const key = resolved ? resolved.symbol : addr || "??"; - const formatted = formatFrame(addr, callframeSymbols); - if (!node.children.has(key)) { - node.children.set(key, { - name: formatted.text, - fullName: key, - location: resolved ? resolved.location : null, - docsUrl: formatted.docsUrl, - children: new Map(), - count: 0, - self: 0, - }); + // Expand inlined frames: an array entry means multiple frames at one address + const frames = Array.isArray(entry) ? entry : [entry]; + for (let fi = frames.length - 1; fi >= 0; fi--) { + const resolved = frames[fi]; + const key = resolved ? resolved.symbol : addr || "??"; + const formatted = resolved ? formatFrame(resolved) : formatFrame(addr, callframeSymbols); + if (!node.children.has(key)) { + node.children.set(key, { + name: formatted.text, + fullName: key, + location: resolved ? resolved.location : null, + docsUrl: formatted.docsUrl, + children: new Map(), + count: 0, + self: 0, + }); + } + node = node.children.get(key); + node.count++; } - node = node.children.get(key); - node.count++; } node.self++; } diff --git a/dial9-viewer/ui/trace_parser.js b/dial9-viewer/ui/trace_parser.js index 55f8a5e9..008cfde1 100644 --- a/dial9-viewer/ui/trace_parser.js +++ b/dial9-viewer/ui/trace_parser.js @@ -198,6 +198,7 @@ const cpuSamples = []; const threadNames = new Map(); const runtimeWorkers = new Map(); // runtime name → [workerId, ...] + const taskDumps = new Map(); // taskId → [{timestamp, callchain}] sorted by timestamp const customEvents = []; // unrecognized event types: {name, timestamp, fields} // { monotonicNs, realtimeNs } anchors used to recover wall clock. const clockSyncAnchors = []; @@ -214,6 +215,7 @@ "TaskSpawnEvent", "TaskTerminateEvent", "CpuSampleEvent", + "TaskDumpEvent", "SymbolTableEntry", "SegmentMetadataEvent", "ClockSyncEvent", @@ -380,6 +382,15 @@ } break; } + case "TaskDumpEvent": { + const taskId = num(v.task_id); + const chain = (v.callchain || []).map( + (addr) => "0x" + BigInt(addr).toString(16) + ); + if (!taskDumps.has(taskId)) taskDumps.set(taskId, []); + taskDumps.get(taskId).push({ timestamp: ts, callchain: chain }); + break; + } case "ClockSyncEvent": { const real = num(v.realtime_ns); if (real > 0) { @@ -468,6 +479,11 @@ return 0; }); + // Sort task dumps by timestamp for efficient lookup during rendering + for (const arr of taskDumps.values()) { + arr.sort((a, b) => a.timestamp - b.timestamp); + } + let clockOffsetNs = null; if (clockSyncAnchors.length > 0) { const a0 = clockSyncAnchors[0]; @@ -506,6 +522,7 @@ taskTerminateTimes, runtimeWorkers, customEvents, + taskDumps, clockSyncAnchors, clockOffsetNs, }; @@ -553,6 +570,7 @@ if (raw.callframeSymbols) raw.callframeSymbols = entriesToMap(raw.callframeSymbols); if (raw.threadNames) raw.threadNames = entriesToMap(raw.threadNames); if (raw.runtimeWorkers) raw.runtimeWorkers = entriesToMap(raw.runtimeWorkers); + if (raw.taskDumps) raw.taskDumps = entriesToMap(raw.taskDumps); break; case 'e': events.push(rec.d); break; case 'c': cpuSamples.push(rec.d); break; diff --git a/dial9-viewer/ui/viewer.html b/dial9-viewer/ui/viewer.html index c4134e29..1dbd3b2d 100644 --- a/dial9-viewer/ui/viewer.html +++ b/dial9-viewer/ui/viewer.html @@ -2594,7 +2594,24 @@

⌨ Keyboard

if (!isInstrumented) { labelHtml += ` · no wake data ⓘ`; } + const taskHasDumps = trace.taskDumps && trace.taskDumps.has(selectedTaskId) && trace.taskDumps.get(selectedTaskId).length > 0; + if (taskHasDumps) { + const dumpCount = trace.taskDumps.get(selectedTaskId).length; + labelHtml += ` · 🔥 idle flamegraph (${dumpCount})`; + } document.getElementById("task-detail-label").innerHTML = labelHtml; + if (taskHasDumps) { + document.getElementById("btn-idle-flamegraph").onclick = (e) => { + e.stopPropagation(); + e.preventDefault(); + try { + showIdleTimeFlamegraph(); + } catch (err) { + console.error("showIdleTimeFlamegraph error:", err); + showToast("idle-fg-err", "Error: " + err.message, "error", 5000); + } + }; + } const parent = c.parentElement; const dpr = devicePixelRatio || 1; @@ -2783,6 +2800,9 @@

⌨ Keyboard

} // Draw idle gaps between consecutive polls (where no wake→poll delay is shown) + // Look up task dumps for this task (sorted by timestamp) + const dumps = trace.taskDumps ? (trace.taskDumps.get(selectedTaskId) || []) : []; + let dumpIdx = 0; // cursor into dumps array for efficient lookup for (let i = 0; i < polls.length - 1; i++) { const gapStart = polls[i].end; const gapEnd = polls[i + 1].start; @@ -2796,9 +2816,43 @@

⌨ Keyboard

const x2 = Math.min(wakeX, LABEL_W + Math.min(drawW, nsToX(gapEnd, drawW))); const w = Math.max(x2 - x1, 0); if (w < 1) continue; - ctx.fillStyle = "#2a2a4a"; + + // Find task dump(s) for this idle period. + // A dump captured during poll[i-1] has ts in [poll[i-1].start, poll[i-1].end]. + // The capture triggers a spurious re-wake, so poll[i] is the spurious poll. + // The dump represents what the task is waiting on during THIS gap (after + // the spurious poll[i]). So we collect dumps with ts within poll[i-1]. + const gapDumps = []; + const prevPollStart = i > 0 ? polls[i - 1].start : polls[i].start; + while (dumpIdx < dumps.length && dumps[dumpIdx].timestamp < prevPollStart) dumpIdx++; + for (let di = dumpIdx; di < dumps.length && dumps[di].timestamp <= polls[i].start; di++) { + gapDumps.push(dumps[di]); + } + const hasDump = gapDumps.length > 0; + + ctx.fillStyle = hasDump ? "#2a2a5a" : "#2a2a4a"; ctx.fillRect(x1, bandTop, w, bandH); - ctx.strokeStyle = "#444"; + + // Cross-hatch pattern for idle periods with task dumps + if (hasDump) { + ctx.save(); + ctx.beginPath(); + ctx.rect(x1, bandTop, w, bandH); + ctx.clip(); + ctx.strokeStyle = "rgba(140, 120, 255, 0.35)"; + ctx.lineWidth = 1; + ctx.setLineDash([]); + const step = 8; + for (let hx = x1 - bandH; hx < x2; hx += step) { + ctx.beginPath(); + ctx.moveTo(hx, bandTop + bandH); + ctx.lineTo(hx + bandH, bandTop); + ctx.stroke(); + } + ctx.restore(); + } + + ctx.strokeStyle = hasDump ? "#7c6cff" : "#444"; ctx.lineWidth = 1; ctx.setLineDash([3, 3]); ctx.strokeRect(x1, bandTop, w, bandH); @@ -2812,7 +2866,10 @@

⌨ Keyboard

taskDetailHitRegions.push({ x1, x2, y1: bandTop, y2: bandTop + bandH, type: "idle", - detail: `Idle — waiting ${fmtDur(dur)} for waker (no wake received yet)`, + detail: hasDump + ? `Idle — waiting ${fmtDur(dur)} (click for async stack trace)` + : `Idle — waiting ${fmtDur(dur)} for waker (no wake received yet)`, + taskDumps: hasDump ? gapDumps : null, }); } @@ -4346,6 +4403,9 @@

⌨ Keyboard

break; } } + if (!found) { + document.getElementById("task-detail").style.cursor = (hit && hit.taskDumps) ? "pointer" : ""; + } const icon = hit ? (hit.type === "polling" ? "⚡" : hit.type === "scheduled" ? "⏳" : "💤") : ""; statusEl.textContent = hit ? `${icon} ${hit.detail}` : ""; }); @@ -4356,6 +4416,27 @@

⌨ Keyboard

renderAll(); } }); + document.getElementById("task-detail").addEventListener("click", (e) => { + const c = document.getElementById("task-detail-canvas"); + const rect = c.getBoundingClientRect(); + const mx = e.clientX - rect.left; + const my = e.clientY - rect.top; + // Check if clicking a waker region (existing behavior: select waker task) + for (const r of taskDetailWakeRegions) { + if (mx >= r.x1 && mx <= r.x2 && my >= r.y1 && my <= r.y2) { + selectedTaskId = r.wakerTaskId; + renderAll(); + return; + } + } + // Check if clicking an idle region with task dumps + for (const r of taskDetailHitRegions) { + if (mx >= r.x1 && mx <= r.x2 && my >= r.y1 && my <= r.y2 && r.taskDumps) { + showTaskDumpStack(r.taskDumps); + return; + } + } + }); window.addEventListener("resize", () => { if (trace) renderAll(); @@ -4412,6 +4493,114 @@

⌨ Keyboard

const fgContainer = document.getElementById("fg-container"); const fgInstance = FlamegraphRenderer.createFlamegraph(fgContainer); + function showTaskDumpStack(dumps) { + const samples = dumps.map(d => ({ callchain: d.callchain, workerId: 0 })); + fgActive = true; + schedActive = false; + const sidebar = document.getElementById("stack-sidebar"); + const title = document.getElementById("stack-sidebar-title"); + const body = document.getElementById("stack-sidebar-body"); + document.getElementById("sidebar-tabs").style.display = "none"; + title.textContent = `Waiting on — ${dumps.length} capture${dumps.length > 1 ? "s" : ""}`; + body.innerHTML = ""; + body.style.display = "flex"; + body.style.flexDirection = "column"; + const actions = document.createElement("div"); + actions.style.cssText = "display:flex;gap:8px;margin-bottom:6px;flex-shrink:0;align-items:center"; + actions.innerHTML = `${dumps.length} async stack capture${dumps.length > 1 ? "s" : ""}`; + body.appendChild(actions); + fgContainer.style.flex = "1"; + fgContainer.style.minHeight = "0"; + body.appendChild(fgContainer); + const wasHidden = sidebar.style.display !== "flex"; + sidebar.style.display = "flex"; + if (wasHidden && trace) requestAnimationFrame(renderAll); + requestAnimationFrame(() => { + fgInstance.setData(samples, trace.callframeSymbols); + fgInstance.resize(); + }); + } + + function showIdleTimeFlamegraph() { + if (!selectedTaskId || !trace.taskDumps) return; + const dumps = trace.taskDumps.get(selectedTaskId); + if (!dumps || dumps.length === 0) return; + + // Collect polls for this task to compute idle durations + const polls = []; + for (const w of workerIds) { + for (const s of workerSpans[w].polls) { + if (s.taskId === selectedTaskId) polls.push(s); + } + } + polls.sort((a, b) => a.start - b.start); + + // For each dump, find the idle period it belongs to and compute weight (duration in µs) + const weightedSamples = []; + let di = 0; + for (let i = 0; i < polls.length - 1 && di < dumps.length; i++) { + const gapStart = polls[i].end; + const gapEnd = polls[i + 1].start; + const dur = gapEnd - gapStart; + while (di < dumps.length && dumps[di].timestamp <= gapStart) di++; + for (let j = di; j < dumps.length && dumps[j].timestamp <= gapEnd; j++) { + // Weight = idle duration in µs (minimum 1) + const weight = Math.max(1, Math.round(dur / 1000)); + weightedSamples.push({ callchain: dumps[j].callchain, weight }); + } + } + // Also include dumps after the last poll (task still idle at trace end) + if (polls.length > 0) { + const lastEnd = polls[polls.length - 1].end; + const traceEnd = trace.maxTs || lastEnd; + while (di < dumps.length) { + const dur = traceEnd - lastEnd; + const weight = Math.max(1, Math.round(dur / 1000)); + weightedSamples.push({ callchain: dumps[di].callchain, weight }); + di++; + } + } + + if (weightedSamples.length === 0) return; + + // Expand weighted samples: repeat each sample proportional to weight + const totalWeight = weightedSamples.reduce((s, x) => s + x.weight, 0); + const scale = totalWeight > 10000 ? 10000 / totalWeight : 1; + const expandedSamples = []; + for (const ws of weightedSamples) { + const count = Math.max(1, Math.round(ws.weight * scale)); + for (let k = 0; k < count; k++) { + expandedSamples.push({ callchain: ws.callchain, workerId: 0 }); + } + } + + // Show in sidebar using the flamegraph renderer + fgActive = true; + schedActive = false; + const sidebar = document.getElementById("stack-sidebar"); + const title = document.getElementById("stack-sidebar-title"); + const body = document.getElementById("stack-sidebar-body"); + document.getElementById("sidebar-tabs").style.display = "none"; + title.textContent = `Idle time flamegraph — ${dumps.length} samples`; + body.innerHTML = ""; + body.style.display = "flex"; + body.style.flexDirection = "column"; + const actions = document.createElement("div"); + actions.style.cssText = "display:flex;gap:8px;margin-bottom:6px;flex-shrink:0;align-items:center"; + actions.innerHTML = `${dumps.length} task dumps, time-weighted`; + body.appendChild(actions); + fgContainer.style.flex = "1"; + fgContainer.style.minHeight = "0"; + body.appendChild(fgContainer); + const wasHidden = sidebar.style.display !== "flex"; + sidebar.style.display = "flex"; + if (wasHidden && trace) requestAnimationFrame(renderAll); + requestAnimationFrame(() => { + fgInstance.setData(expandedSamples, trace.callframeSymbols); + fgInstance.resize(); + }); + } + function showFlamegraph(selStart, selEnd) { const samples = FlamegraphRenderer.filterCpuSamples(trace.cpuSamples, selStart, selEnd); if (!samples.length) { From e172a273af3c60edca699daf7c60753532e241a6 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Fri, 8 May 2026 19:10:08 +0000 Subject: [PATCH 3/3] Address PR #377 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Fix inlined-frame iteration order in buildFlamegraphTree. Per blazesym, an array entry in callframeSymbols is [outermost, ..., innermost] — the real function at the address is at [0] and [i>0] are inlined callees. The previous implementation iterated N→0, which inverted the call graph in the flamegraph (inner inlined functions appeared as parents of the outer function they were inlined into). Now iterates 0→N, matching caller→callee. Skip nullish slots so sparse arrays (from out-of-order SymbolTableEntry events) no longer produce phantom (unknown) tree levels. 2. Wrap showTaskDumpStack click in try/catch matching the showIdleTimeFlamegraph pattern — errors now surface as a toast instead of silently failing. 3. Extract renderFlamegraphInSidebar({title, subtitle, samples}) helper. Both showTaskDumpStack and showIdleTimeFlamegraph now delegate to it, removing ~40 lines of duplicated sidebar setup. 4. Extract drawCrossHatch(x, y, w, h) helper for the diagonal stripe pattern used on idle periods that have a task dump. 5. Document taskDumps in the ParsedTrace schema (dial9-trace-loading/SKILL.md) so agent skills can find it. Extend the schema validator to understand Map (array-of-objects map values). 6. Add unit tests in test_trace_analysis.js covering: - Flamegraph inline frame ordering (catches the bug fixed in #1) - Sparse inline arrays with undefined slots - Unresolved address handling - TaskDumpEvent parsing shape, sort order, task-id integrity --- .../skills/dial9-trace-loading/SKILL.md | 1 + dial9-viewer/ui/test_all_skills_snippets.js | 13 ++ dial9-viewer/ui/test_trace_analysis.js | 118 ++++++++++++++++++ dial9-viewer/ui/trace_analysis.js | 11 +- dial9-viewer/ui/viewer.html | 93 +++++++------- 5 files changed, 189 insertions(+), 47 deletions(-) diff --git a/dial9-viewer/skills/dial9-trace-loading/SKILL.md b/dial9-viewer/skills/dial9-trace-loading/SKILL.md index 44677c7a..e55b59f4 100644 --- a/dial9-viewer/skills/dial9-trace-loading/SKILL.md +++ b/dial9-viewer/skills/dial9-trace-loading/SKILL.md @@ -35,6 +35,7 @@ description: Parse and load dial9 Tokio runtime trace files. Covers the ParsedTr hasSchedWait: boolean, // trace includes kernel scheduling wait data hasTaskTracking: boolean, // trace includes task spawn/terminate events taskInstrumented: Map, // task ID → whether task has tracing instrumentation + taskDumps: Map, // task ID → async stack captures (sorted by timestamp); see dial9-tokio-telemetry `taskdump` feature } ``` diff --git a/dial9-viewer/ui/test_all_skills_snippets.js b/dial9-viewer/ui/test_all_skills_snippets.js index 0abc2a60..038d5089 100644 --- a/dial9-viewer/ui/test_all_skills_snippets.js +++ b/dial9-viewer/ui/test_all_skills_snippets.js @@ -214,6 +214,14 @@ async function main() { .replace(/\[\w+\]:/g, '_dynamic_:') .replace(/:\s*Map<[^,]+,\s*([^>]+)>/g, (_, valType) => { const v = valType.trim(); + // Array of objects: Map → values are arrays whose elements have those keys. + // Emit a marker string that the later [{...}] pass won't double-process; + // a separate pass below expands it back to a JS array literal. + const arrObjMatch = v.match(/^\[\{([^}]+)\}\]$/); + if (arrObjMatch) { + const keys = arrObjMatch[1].split(',').map(k => k.trim()).filter(Boolean); + return ': {"_map_":"__ARR_OBJ__' + keys.join('|') + '__"}'; + } const objMatch = v.match(/\{([^}]+)\}/); if (objMatch) { const keys = objMatch[1].split(',').map(k => k.trim()).filter(Boolean); @@ -231,6 +239,11 @@ async function main() { .replace(/\[\{([^}]+)\}\]/g, (_, inner) => { const keys = inner.split(',').map(k => k.trim()).filter(Boolean); return '[{' + keys.map(k => `"${k}":"_any_"`).join(',') + '}]'; + }) + // Expand Map placeholder from earlier pass into a real array literal. + .replace(/"__ARR_OBJ__([^_"]+)__"/g, (_, keysPipe) => { + const keys = keysPipe.split('|'); + return '[{' + keys.map(k => `"${k}":"_any_"`).join(',') + '}]'; }); let docSkeleton; try { docSkeleton = (new Function('return {' + schemaJs + '}'))(); } diff --git a/dial9-viewer/ui/test_trace_analysis.js b/dial9-viewer/ui/test_trace_analysis.js index d0c76686..1597186e 100644 --- a/dial9-viewer/ui/test_trace_analysis.js +++ b/dial9-viewer/ui/test_trace_analysis.js @@ -367,6 +367,115 @@ async function main() { pass("buildFgData returns null for empty samples"); } + // Inlined frames: when callframeSymbols.get(addr) returns an array, per + // blazesym the array is ordered [outermost, ..., innermost]. entry[0] is the + // real function at the address; entry[i>0] are inlined callees so the call + // chain goes entry[0] -> entry[1] -> entry[2]. The flamegraph tree must + // descend in that same order (outermost as parent, innermost as leaf). + function testFlamegraphInlineOrder() { + const callframeSymbols = new Map([ + ["0x1000", [ + { symbol: "outer_fn", location: "outer.rs:10" }, + { symbol: "mid_fn", location: "mid.rs:20" }, + { symbol: "leaf_fn", location: "leaf.rs:30" }, + ]], + ]); + const samples = [{ callchain: ["0x1000"], workerId: 0 }]; + const tree = buildFlamegraphTree(samples, callframeSymbols); + if (tree.children.size !== 1) fail(`root has ${tree.children.size} children, expected 1`); + const outer = [...tree.children.values()][0]; + if (!outer.fullName.includes("outer_fn")) fail(`child of root is "${outer.fullName}", expected "outer_fn"`); + if (outer.children.size !== 1) fail(`outer has ${outer.children.size} children, expected 1`); + const mid = [...outer.children.values()][0]; + if (!mid.fullName.includes("mid_fn")) fail(`child of outer is "${mid.fullName}", expected "mid_fn"`); + const leaf = [...mid.children.values()][0]; + if (!leaf.fullName.includes("leaf_fn")) fail(`child of mid is "${leaf.fullName}", expected "leaf_fn"`); + if (leaf.self !== 1) fail(`leaf.self = ${leaf.self}, expected 1 (innermost frame is where the sample lands)`); + pass("Inlined frames expand outermost→innermost as parent→child in the flamegraph"); + } + + // The inline-expansion code must not crash when an address maps to an array + // with nullish elements (can happen with sparse SymbolTableEntry events or + // when a child inline is resolved before its parent frame). + function testFlamegraphInlineTolerantOfNullSlots() { + // arr[0] present, arr[1] undefined, arr[2] present. The iteration should + // skip the undefined slot rather than creating a "(unknown)" level. + const sparse = new Array(3); + sparse[0] = { symbol: "outer_fn", location: null }; + sparse[2] = { symbol: "leaf_fn", location: null }; + const callframeSymbols = new Map([["0x2000", sparse]]); + const samples = [{ callchain: ["0x2000"], workerId: 0 }]; + const tree = buildFlamegraphTree(samples, callframeSymbols); + // Expected: (all) -> outer_fn -> leaf_fn (sparse slot skipped) + const outer = [...tree.children.values()][0]; + if (!outer || !outer.fullName.includes("outer_fn")) fail(`expected outer_fn child, got ${outer && outer.fullName}`); + if (outer.children.size !== 1) fail(`outer has ${outer.children.size} children, expected 1 (sparse slot should be skipped)`); + const leaf = [...outer.children.values()][0]; + if (!leaf.fullName.includes("leaf_fn")) fail(`expected leaf_fn after outer_fn, got ${leaf.fullName}`); + pass("Sparse inline arrays do not produce phantom tree levels"); + } + + // An address that is not present in callframeSymbols should still produce + // a single-level child using the raw address as the key (so unresolved + // traces remain visible rather than collapsing). + function testFlamegraphUnknownAddress() { + const callframeSymbols = new Map(); // empty — address resolves to undefined + const samples = [{ callchain: ["0x3000"], workerId: 0 }]; + const tree = buildFlamegraphTree(samples, callframeSymbols); + if (tree.children.size !== 1) fail(`root has ${tree.children.size} children for single unresolved address`); + const node = [...tree.children.values()][0]; + if (node.self !== 1) fail(`unresolved node.self = ${node.self}, expected 1`); + pass("Unresolved addresses still produce a single tree level"); + } + + // ── TaskDumpEvent parsing (verified against the demo trace) ── + + function testTaskDumpsParsed() { + if (!trace.taskDumps) fail("trace.taskDumps should be a Map"); + if (!(trace.taskDumps instanceof Map)) fail("trace.taskDumps should be an instance of Map"); + pass(`trace.taskDumps is a Map with ${trace.taskDumps.size} task IDs`); + } + + function testTaskDumpsSortedByTimestamp() { + // Every value in taskDumps is an array sorted by timestamp — the renderer + // relies on this for its O(n) sweep across idle gaps. + for (const [tid, dumps] of trace.taskDumps) { + for (let i = 1; i < dumps.length; i++) { + if (dumps[i].timestamp < dumps[i - 1].timestamp) { + fail(`taskDumps for task ${tid} not sorted (index ${i})`); + } + } + } + pass("All taskDumps arrays are sorted by timestamp"); + } + + function testTaskDumpsShape() { + // Each dump is {timestamp, callchain} where callchain is an array of hex address strings. + for (const [tid, dumps] of trace.taskDumps) { + for (const d of dumps) { + if (typeof d.timestamp !== "number") fail(`dump.timestamp for task ${tid} is ${typeof d.timestamp}`); + if (!Array.isArray(d.callchain)) fail(`dump.callchain for task ${tid} is not an array`); + for (const addr of d.callchain) { + if (typeof addr !== "string" || !addr.startsWith("0x")) { + fail(`dump.callchain entry ${addr} not a hex string`); + } + } + break; // sample one per task is enough + } + } + pass("TaskDumps have expected {timestamp, callchain} shape with hex-string addresses"); + } + + function testTaskDumpsTaskIdsKnown() { + // Every task ID that has a dump should be a known spawned task (no orphans). + for (const tid of trace.taskDumps.keys()) { + if (!trace.taskSpawnTimes.has(tid)) { + fail(`task ${tid} has taskDumps but is not in taskSpawnTimes`); + } + } + pass("All taskDump task IDs refer to tasks that appear in taskSpawnTimes"); + } + // ── buildSpanData ── function testBuildSpanDataPairing() { @@ -822,6 +931,15 @@ async function main() { testFlattenFlamegraph(); testBuildFgData(); testBuildFgDataEmpty(); + testFlamegraphInlineOrder(); + testFlamegraphInlineTolerantOfNullSlots(); + testFlamegraphUnknownAddress(); + + console.log("\ntaskDumps:"); + testTaskDumpsParsed(); + testTaskDumpsSortedByTimestamp(); + testTaskDumpsShape(); + testTaskDumpsTaskIdsKnown(); console.log("\nbuildSpanData:"); testBuildSpanDataPairing(); diff --git a/dial9-viewer/ui/trace_analysis.js b/dial9-viewer/ui/trace_analysis.js index 111b49d5..6c2adc96 100644 --- a/dial9-viewer/ui/trace_analysis.js +++ b/dial9-viewer/ui/trace_analysis.js @@ -431,10 +431,17 @@ node.count++; for (const addr of chain) { const entry = callframeSymbols.get(addr); - // Expand inlined frames: an array entry means multiple frames at one address + // Expand inlined frames. Per blazesym, an array entry is ordered + // [outermost, ..., innermost]: entry[0] is the real function at this + // address, and entry[i>0] are inlined callees (entry[0] calls entry[1] + // calls entry[2], etc.). To walk the call graph caller→callee while + // descending the flamegraph tree, iterate 0 → N. Skip nullish slots + // that can appear in sparse arrays (rare, but can happen if inline + // SymbolTableEntry events arrive before their depth=0 sibling). const frames = Array.isArray(entry) ? entry : [entry]; - for (let fi = frames.length - 1; fi >= 0; fi--) { + for (let fi = 0; fi < frames.length; fi++) { const resolved = frames[fi]; + if (fi > 0 && !resolved) continue; const key = resolved ? resolved.symbol : addr || "??"; const formatted = resolved ? formatFrame(resolved) : formatFrame(addr, callframeSymbols); if (!node.children.has(key)) { diff --git a/dial9-viewer/ui/viewer.html b/dial9-viewer/ui/viewer.html index 1dbd3b2d..1855b257 100644 --- a/dial9-viewer/ui/viewer.html +++ b/dial9-viewer/ui/viewer.html @@ -2634,6 +2634,25 @@

⌨ Keyboard

return formatHumanDuration(ns); } + // Diagonal cross-hatch fill inside the rect (x, y, w, h), clipped. + // Used to visually flag idle periods that have an associated task dump. + function drawCrossHatch(x, y, w, h, { stroke = "rgba(140,120,255,0.35)", step = 8 } = {}) { + ctx.save(); + ctx.beginPath(); + ctx.rect(x, y, w, h); + ctx.clip(); + ctx.strokeStyle = stroke; + ctx.lineWidth = 1; + ctx.setLineDash([]); + for (let hx = x - h; hx < x + w; hx += step) { + ctx.beginPath(); + ctx.moveTo(hx, y + h); + ctx.lineTo(hx + h, y); + ctx.stroke(); + } + ctx.restore(); + } + const bandTop = 50, bandH = 30; ctx.font = "9px monospace"; ctx.textAlign = "center"; @@ -2835,21 +2854,7 @@

⌨ Keyboard

// Cross-hatch pattern for idle periods with task dumps if (hasDump) { - ctx.save(); - ctx.beginPath(); - ctx.rect(x1, bandTop, w, bandH); - ctx.clip(); - ctx.strokeStyle = "rgba(140, 120, 255, 0.35)"; - ctx.lineWidth = 1; - ctx.setLineDash([]); - const step = 8; - for (let hx = x1 - bandH; hx < x2; hx += step) { - ctx.beginPath(); - ctx.moveTo(hx, bandTop + bandH); - ctx.lineTo(hx + bandH, bandTop); - ctx.stroke(); - } - ctx.restore(); + drawCrossHatch(x1, bandTop, w, bandH); } ctx.strokeStyle = hasDump ? "#7c6cff" : "#444"; @@ -4432,7 +4437,12 @@

⌨ Keyboard

// Check if clicking an idle region with task dumps for (const r of taskDetailHitRegions) { if (mx >= r.x1 && mx <= r.x2 && my >= r.y1 && my <= r.y2 && r.taskDumps) { - showTaskDumpStack(r.taskDumps); + try { + showTaskDumpStack(r.taskDumps); + } catch (err) { + console.error("showTaskDumpStack error:", err); + showToast("task-dump-err", "Error: " + err.message, "error", 5000); + } return; } } @@ -4493,21 +4503,24 @@

⌨ Keyboard

const fgContainer = document.getElementById("fg-container"); const fgInstance = FlamegraphRenderer.createFlamegraph(fgContainer); - function showTaskDumpStack(dumps) { - const samples = dumps.map(d => ({ callchain: d.callchain, workerId: 0 })); + // Open the stack sidebar showing a flamegraph built from `samples`. + // Used by `showTaskDumpStack`, `showIdleTimeFlamegraph`, and any + // other caller that just wants "here's a pile of callchains, render + // them as a flamegraph in the sidebar". + function renderFlamegraphInSidebar({ title, subtitle, samples }) { fgActive = true; schedActive = false; const sidebar = document.getElementById("stack-sidebar"); - const title = document.getElementById("stack-sidebar-title"); + const titleEl = document.getElementById("stack-sidebar-title"); const body = document.getElementById("stack-sidebar-body"); document.getElementById("sidebar-tabs").style.display = "none"; - title.textContent = `Waiting on — ${dumps.length} capture${dumps.length > 1 ? "s" : ""}`; + titleEl.textContent = title; body.innerHTML = ""; body.style.display = "flex"; body.style.flexDirection = "column"; const actions = document.createElement("div"); actions.style.cssText = "display:flex;gap:8px;margin-bottom:6px;flex-shrink:0;align-items:center"; - actions.innerHTML = `${dumps.length} async stack capture${dumps.length > 1 ? "s" : ""}`; + actions.innerHTML = `${subtitle}`; body.appendChild(actions); fgContainer.style.flex = "1"; fgContainer.style.minHeight = "0"; @@ -4521,6 +4534,16 @@

⌨ Keyboard

}); } + function showTaskDumpStack(dumps) { + const samples = dumps.map(d => ({ callchain: d.callchain, workerId: 0 })); + const s = dumps.length > 1 ? "s" : ""; + renderFlamegraphInSidebar({ + title: `Waiting on — ${dumps.length} capture${s}`, + subtitle: `${dumps.length} async stack capture${s}`, + samples, + }); + } + function showIdleTimeFlamegraph() { if (!selectedTaskId || !trace.taskDumps) return; const dumps = trace.taskDumps.get(selectedTaskId); @@ -4574,30 +4597,10 @@

⌨ Keyboard

} } - // Show in sidebar using the flamegraph renderer - fgActive = true; - schedActive = false; - const sidebar = document.getElementById("stack-sidebar"); - const title = document.getElementById("stack-sidebar-title"); - const body = document.getElementById("stack-sidebar-body"); - document.getElementById("sidebar-tabs").style.display = "none"; - title.textContent = `Idle time flamegraph — ${dumps.length} samples`; - body.innerHTML = ""; - body.style.display = "flex"; - body.style.flexDirection = "column"; - const actions = document.createElement("div"); - actions.style.cssText = "display:flex;gap:8px;margin-bottom:6px;flex-shrink:0;align-items:center"; - actions.innerHTML = `${dumps.length} task dumps, time-weighted`; - body.appendChild(actions); - fgContainer.style.flex = "1"; - fgContainer.style.minHeight = "0"; - body.appendChild(fgContainer); - const wasHidden = sidebar.style.display !== "flex"; - sidebar.style.display = "flex"; - if (wasHidden && trace) requestAnimationFrame(renderAll); - requestAnimationFrame(() => { - fgInstance.setData(expandedSamples, trace.callframeSymbols); - fgInstance.resize(); + renderFlamegraphInSidebar({ + title: `Idle time flamegraph — ${dumps.length} samples`, + subtitle: `${dumps.length} task dumps, time-weighted`, + samples: expandedSamples, }); }