fix(transcripts): use queue-operation entries for real background agent timing

blackwell-systems · blackwell-systems · commit 9b56ced8c843 · 2026-03-01T01:53:04.000-07:00
Background Task tool_results fire at launch time (~1.5s), not actual
completion. Parse queue-operation/enqueue entries which carry the real
&lt;task-notification&gt; payload: tool_use_id, completion timestamp, duration_ms,
and total_tokens. Backfill AgentSpan.CompletedAt, Duration, and TotalTokens
from these notifications after the main scan.

Also propagate TotalTokens through ParseAgentTasks → AgentTask (was
hardcoded 0 with a comment "not available in transcript data").

Verified against session 927a3b9b: Agent A now 46s, Agent B 108s (was 1.5s).
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,19 @@
 
 All notable changes to claudewatch are documented here.
 
+## [v0.4.2] - 2026-03-01
+
+### Fixed
+
+- **Background agent timing** — `AgentSpan.CompletedAt` and `Duration` are now accurate for
+  background Task agents. Previously, the tool_result for a background task fires at launch
+  time (~1.5s), not completion, causing SAW wave timings to be severely understated. The fix
+  parses `queue-operation` / `enqueue` entries in JSONL transcripts, which carry a
+  `<task-notification>` payload with the real completion timestamp, `<tool-use-id>`, and
+  `<total_tokens>`. These values are backfilled onto matching spans after the scan. For the
+  SAW observability session: Agent A now shows 46s (was 1.5s), Agent B 108s (was 1.5s).
+  `TotalTokens` is now propagated from `AgentSpan` through `ParseAgentTasks` into `AgentTask`.
+
 ## [v0.4.1] - 2026-03-01
 
 ### Added
diff --git a/internal/claude/agents.go b/internal/claude/agents.go
@@ -25,7 +25,7 @@ func ParseAgentTasks(claudeDir string) ([]AgentTask, error) {
 			SessionID:   span.SessionID,
 			Status:      status,
 			DurationMs:  span.Duration.Milliseconds(),
-			TotalTokens: 0, // Token counts not available in transcript data.
+			TotalTokens: span.TotalTokens,
 			ToolUses:    0, // Tool use counts not available in transcript data.
 			Background:  span.Background,
 			CreatedAt:   span.LaunchedAt.Format("2006-01-02T15:04:05Z"),
diff --git a/internal/claude/transcripts.go b/internal/claude/transcripts.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"time"
 )
@@ -24,6 +25,7 @@ type AgentSpan struct {
 	Success      bool          `json:"success"`
 	ResultLength int           `json:"result_length"`
 	ToolUseID    string        `json:"tool_use_id"`
+	TotalTokens  int           `json:"total_tokens"`
 }
 
 // ParseSessionTranscripts scans all JSONL files under claudeDir/projects/
@@ -97,6 +99,9 @@ func ParseSingleTranscript(path string) ([]AgentSpan, error) {
 	// Map agentId -> tool_use_id from progress entries.
 	agentToToolUse := make(map[string]string)
 
+	// Real completion data for background agents, keyed by tool_use_id.
+	taskNotifications := make(map[string]taskNotification)
+
 	var spans []AgentSpan
 
 	scanner := bufio.NewScanner(f)
@@ -118,6 +123,8 @@ func ParseSingleTranscript(path string) ([]AgentSpan, error) {
 			processUserEntry(&entry, pending, &spans)
 		case "progress":
 			processProgressEntry(&entry, agentToToolUse)
+		case "queue-operation":
+			processQueueOperationEntry(&entry, taskNotifications)
 		}
 	}
 
@@ -146,6 +153,24 @@ func ParseSingleTranscript(path string) ([]AgentSpan, error) {
 		spans = append(spans, p.span)
 	}
 
+	// Backfill real completion times for background agents. Background task
+	// tool_results fire at launch time, so CompletedAt/Duration from
+	// processUserEntry is inaccurate. queue-operation/enqueue entries carry
+	// the real timestamp, duration_ms, and total_tokens.
+	for i := range spans {
+		if n, ok := taskNotifications[spans[i].ToolUseID]; ok {
+			if !n.completedAt.IsZero() {
+				spans[i].CompletedAt = n.completedAt
+				if !spans[i].LaunchedAt.IsZero() {
+					spans[i].Duration = n.completedAt.Sub(spans[i].LaunchedAt)
+				}
+			}
+			if n.totalTokens > 0 {
+				spans[i].TotalTokens = n.totalTokens
+			}
+		}
+	}
+
 	return spans, nil
 }
 
@@ -157,6 +182,8 @@ type TranscriptEntry struct {
 	Message         json.RawMessage `json:"message"`
 	Data            json.RawMessage `json:"data"`
 	ParentToolUseID string          `json:"parentToolUseID"`
+	Operation       string          `json:"operation"` // queue-operation: "enqueue" | "dequeue"
+	Content         string          `json:"content"`   // queue-operation: raw text content
 }
 
 // AssistantMessage represents an assistant-role message.
@@ -206,6 +233,15 @@ type pendingTask struct {
 	span AgentSpan
 }
 
+// taskNotification holds real completion data for background agents, extracted
+// from queue-operation/enqueue entries. Background task tool_results fire at
+// launch time (not completion), so the real CompletedAt and TotalTokens live
+// in these notification entries.
+type taskNotification struct {
+	completedAt time.Time
+	totalTokens int
+}
+
 // processAssistantEntry handles assistant-type entries, extracting Task
 // launches and TaskStop calls.
 func processAssistantEntry(entry *TranscriptEntry, sessionID string, pending map[string]*pendingTask, killedAgentIDs map[string]bool) {
@@ -322,6 +358,51 @@ func processProgressEntry(entry *TranscriptEntry, agentToToolUse map[string]stri
 	}
 }
 
+// processQueueOperationEntry handles queue-operation entries with operation
+// "enqueue". These carry real completion data for background Task agents inside
+// a <task-notification> XML block in the Content field: tool_use_id, duration_ms,
+// and total_tokens. The entry's Timestamp is the true completion time.
+func processQueueOperationEntry(entry *TranscriptEntry, notifications map[string]taskNotification) {
+	if entry.Operation != "enqueue" || entry.Content == "" {
+		return
+	}
+
+	toolUseID := extractXMLTag(entry.Content, "tool-use-id")
+	if toolUseID == "" {
+		return
+	}
+
+	n := taskNotification{
+		completedAt: ParseTimestamp(entry.Timestamp),
+	}
+
+	if raw := extractXMLTag(entry.Content, "total_tokens"); raw != "" {
+		if v, err := strconv.Atoi(raw); err == nil {
+			n.totalTokens = v
+		}
+	}
+
+	notifications[toolUseID] = n
+}
+
+// extractXMLTag returns the text content of the first occurrence of <tag>…</tag>
+// in s, or "" if not found. Uses simple string search — no XML parser needed
+// since the content format is controlled and well-structured.
+func extractXMLTag(s, tag string) string {
+	open := "<" + tag + ">"
+	close := "</" + tag + ">"
+	start := strings.Index(s, open)
+	if start == -1 {
+		return ""
+	}
+	start += len(open)
+	end := strings.Index(s[start:], close)
+	if end == -1 {
+		return ""
+	}
+	return strings.TrimSpace(s[start : start+end])
+}
+
 // resultContentLength computes the total text length of a tool_result's content.
 func resultContentLength(raw json.RawMessage, text string) int {
 	if text != "" {