fullsend-ai · ascerra · Jul 1, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 30, 2026
@@ -0,0 +1,56 @@
+package cli
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/fullsend-ai/fullsend/internal/telemetry"
+	"github.com/fullsend-ai/fullsend/internal/ui"
+)
+
+// TestScanOutputFilesSkipsTelemetryArtifacts pins that the host-side output
+// redaction scan does NOT rewrite the telemetry files. The NDJSON file is still
+// held open for append by the recorder during the scan, so an in-place rewrite
+// would truncate it out from under the open handle; and both files are
+// metadata-only by construction, so they don't need redaction. A normal output
+// file must still be sanitized.
+func TestScanOutputFiles_SkipsTelemetryArtifacts(t *testing.T) {
+	dir := t.TempDir()
+	const secret = "Token: ghp_FAKEtesttoken000000000000000000000000\n"
+
+	telem := filepath.Join(dir, telemetry.TelemetryFile)
+	summary := filepath.Join(dir, telemetry.SummaryFile)
+	normal := filepath.Join(dir, "output.txt")
+	// A sandbox agent could write a file that merely shares the telemetry name
+	// under its iteration output; that must still be sanitized (it is not the
+	// recorder's own artifact).
+	nested := filepath.Join(dir, "iteration-1", "output", telemetry.TelemetryFile)
+	require.NoError(t, os.MkdirAll(filepath.Dir(nested), 0o755))
+	for _, p := range []string{telem, summary, normal, nested} {
+		require.NoError(t, os.WriteFile(p, []byte(secret), 0o644))
+	}
+
+	err := scanOutputFiles(dir, "trace-id", ui.New(&bytes.Buffer{}))
+	require.NoError(t, err)
+
+	got, err := os.ReadFile(telem)
+	require.NoError(t, err)
+	assert.Equal(t, secret, string(got), "the recorder's own run-telemetry.jsonl must be left untouched")
+
+	got, err = os.ReadFile(summary)
+	require.NoError(t, err)
+	assert.Equal(t, secret, string(got), "the recorder's own run-summary.json must be left untouched")
+
+	got, err = os.ReadFile(normal)
+	require.NoError(t, err)
+	assert.NotContains(t, string(got), "ghp_FAKEtest", "non-telemetry output must still be sanitized")
+
+	got, err = os.ReadFile(nested)
+	require.NoError(t, err)
+	assert.NotContains(t, string(got), "ghp_FAKEtest", "an agent file sharing the telemetry name must still be sanitized")
+}
@@ -0,0 +1,195 @@
+package cli
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	agentruntime "github.com/fullsend-ai/fullsend/internal/runtime"
+	"github.com/fullsend-ai/fullsend/internal/security"
+	"github.com/fullsend-ai/fullsend/internal/telemetry"
+)
+
+func TestTelemetryExitCode(t *testing.T) {
+	err := fmt.Errorf("boom")
+	assert.Equal(t, 0, telemetryExitCode(0, nil), "clean run => 0")
+	assert.Equal(t, 3, telemetryExitCode(3, nil), "agent exit code preserved on success")
+	// A non-agent failure (e.g. a later step errors) can leave lastExitCode==0;
+	// never report that as success.
+	assert.Equal(t, 1, telemetryExitCode(0, err), "lastExitCode 0 + error => 1, never success")
+	// The real agent infra-failure path: rt.Run returns (-1, err), and runAgent
+	// now records that -1 instead of masking it as 1.
+	assert.Equal(t, -1, telemetryExitCode(-1, err), "infra failure (-1) preserved faithfully")
+}
+
+// TestTraceIDUnification pins the invariant runAgent relies on: the per-run
+// security trace id (a dashed UUID, injected into the sandbox as
+// FULLSEND_TRACE_ID) and the W3C telemetry trace id are the SAME underlying
+// value — the telemetry id is just the security id with dashes stripped. This
+// is what lets one id correlate security findings, telemetry, and child traces.
+func TestTraceIDUnification(t *testing.T) {
+	uuid := security.GenerateTraceID()
+	require.True(t, security.IsValidTraceID(uuid), "security id must stay a valid dashed UUID for the sandbox")
+
+	w := telemetry.TraceIDFromUUID(uuid)
+	assert.Equal(t, strings.ReplaceAll(uuid, "-", ""), w, "telemetry trace-id is the security id, dash-stripped")
+	assert.Regexp(t, regexp.MustCompile(`^[0-9a-f]{32}$`), w, "valid 32-hex W3C trace-id")
+}
+
+func TestResolveWorkItemID(t *testing.T) {
+	cases := []struct {
+		name        string
+		issueKey    string
+		repoFull    string
+		issueNumber string
+		issueURL    string
+		want        string
+	}{
+		{
+			name:        "ISSUE_KEY wins over everything",
+			issueKey:    "PROJ-7",
+			repoFull:    "octo/repo",
+			issueNumber: "9",
+			issueURL:    "https://github.com/octo/repo/issues/9",
+			want:        "PROJ-7",
+		},
+		{
+			name:        "repo + number forms canonical github key",
+			repoFull:    "octo/repo",
+			issueNumber: "2577",
+			issueURL:    "https://github.com/octo/repo/issues/2577",
+			want:        "octo/repo#2577",
+		},
+		{
+			name:     "falls back to issue URL when repo missing",
+			issueURL: "https://github.com/octo/repo/issues/9",
+			want:     "https://github.com/octo/repo/issues/9",
+		},
+		{
+			name:        "falls back to bare issue number",
+			issueNumber: "42",
+			want:        "42",
+		},
+		{
+			name: "unknown when nothing is set",
+			want: "unknown",
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Set all four explicitly (empty == unset) to isolate from ambient env.
+			t.Setenv("ISSUE_KEY", tc.issueKey)
+			t.Setenv("REPO_FULL_NAME", tc.repoFull)
+			t.Setenv("ISSUE_NUMBER", tc.issueNumber)
+			t.Setenv("GITHUB_ISSUE_URL", tc.issueURL)
+			assert.Equal(t, tc.want, resolveWorkItemID())
+		})
+	}
+}
+
+func TestChildScriptEnv_AppendsTraceparentOnce(t *testing.T) {
+	t.Setenv("FULLSEND_TEST_MARKER", "present")
+	const tp = "00-4f3a9c1b2d8e4a7c9f0b1e2d3c4a5b6d-a1b2c3d4e5f60718-01"
+
+	env := childScriptEnv(map[string]string{"FOO": "bar"}, tp)
+
+	traceparents, hasFoo, hasMarker := 0, false, false
+	for _, e := range env {
+		switch {
+		case strings.HasPrefix(e, "TRACEPARENT="):
+			traceparents++
+			assert.Equal(t, "TRACEPARENT="+tp, e)
+		case e == "FOO=bar":
+			hasFoo = true
+		case e == "FULLSEND_TEST_MARKER=present":
+			hasMarker = true
+		}
+	}
+	assert.Equal(t, 1, traceparents, "exactly one TRACEPARENT entry")
+	assert.True(t, hasFoo, "RunnerEnv must be preserved")
+	assert.True(t, hasMarker, "process environment must be preserved")
+}
+
+func TestChildScriptEnv_EmptyTraceparentOmitted(t *testing.T) {
+	env := childScriptEnv(map[string]string{"FOO": "bar"}, "")
+	for _, e := range env {
+		assert.False(t, strings.HasPrefix(e, "TRACEPARENT="), "no empty TRACEPARENT entry when disabled")
+	}
+}
+
+func TestAgentSpanEndAttrs(t *testing.T) {
+	var m agentruntime.RunMetrics
+	m.Model = "claude-opus-4-6"
+	m.InputTokens = 11
+	m.OutputTokens = 1505
+	m.CacheCreationInputTokens = 38832
+	m.CacheReadInputTokens = 109938
+	m.TotalCostUSD = 0.335349
+	m.ToolCalls.Store(11)
+
+	a := agentSpanEndAttrs(2, 0, "anthropic", &m)
+	assert.Equal(t, 2, a["iteration"])
+	assert.Equal(t, 0, a["exit_code"])
+	assert.Equal(t, "anthropic", a["gen_ai.system"], "gen_ai.system is sourced from the runtime, not hardcoded")
+	assert.Equal(t, "claude-opus-4-6", a["gen_ai.request.model"])
+	assert.Equal(t, 11, a["gen_ai.usage.input_tokens"])
+	assert.Equal(t, 1505, a["gen_ai.usage.output_tokens"])
+	assert.Equal(t, 38832, a["gen_ai.usage.cache_creation_input_tokens"])
+	assert.Equal(t, 109938, a["gen_ai.usage.cache_read_input_tokens"])
+	assert.Equal(t, 0.34, a["fullsend.cost_usd"], "cost rounded to cents")
+	assert.Equal(t, 11, a["fullsend.tool_calls"])
+}
+
+func TestAggregateRunMetrics(t *testing.T) {
+	var agg aggregateMetrics
+
+	var m1 agentruntime.RunMetrics
+	m1.NumTurns, m1.TotalCostUSD = 5, 0.10
+	m1.InputTokens, m1.OutputTokens = 10, 100
+	m1.CacheCreationInputTokens, m1.CacheReadInputTokens = 1000, 5000
+	m1.ToolCalls.Store(3)
+	m1.Model = "claude-opus-4-6"
+	aggregateRunMetrics(&agg, &m1, 1)
+
+	var m2 agentruntime.RunMetrics // second iteration, no model reported
+	m2.NumTurns, m2.TotalCostUSD = 2, 0.05
+	m2.InputTokens, m2.OutputTokens = 4, 40
+	m2.CacheCreationInputTokens, m2.CacheReadInputTokens = 200, 900
+	m2.ToolCalls.Store(2)
+	aggregateRunMetrics(&agg, &m2, 2)
+
+	assert.Equal(t, 7, agg.NumTurns)
+	assert.InDelta(t, 0.15, agg.TotalCostUSD, 1e-9)
+	assert.Equal(t, 14, agg.TokenUsage.Input)
+	assert.Equal(t, 140, agg.TokenUsage.Output)
+	assert.Equal(t, 1200, agg.TokenUsage.CacheCreation)
+	assert.Equal(t, 5900, agg.TokenUsage.CacheRead)
+	assert.Equal(t, 5, agg.ToolCalls)
+	assert.Equal(t, 2, agg.Iterations)
+	assert.Equal(t, "claude-opus-4-6", agg.Model, "last non-empty model is retained")
+}
+
+func TestToTelemetryMetrics(t *testing.T) {
+	var agg aggregateMetrics
+	agg.NumTurns = 7
+	agg.TotalCostUSD = 0.24261625
+	agg.TokenUsage.Input = 18432
+	agg.TokenUsage.Output = 2901
+	agg.TokenUsage.CacheCreation = 8000
+	agg.TokenUsage.CacheRead = 50000
+	agg.ToolCalls = 14
+	agg.Iterations = 3
+
+	m := toTelemetryMetrics(agg)
+	assert.Equal(t, 18432, m.InputTokens, "input must map from TokenUsage.Input")
+	assert.Equal(t, 2901, m.OutputTokens, "output must map from TokenUsage.Output")
+	assert.Equal(t, 8000, m.CacheCreationInputTokens, "cache_creation must map from TokenUsage.CacheCreation")
+	assert.Equal(t, 50000, m.CacheReadInputTokens, "cache_read must map from TokenUsage.CacheRead")
+	assert.InDelta(t, 0.24, m.TotalCostUSD, 1e-9, "cost rounded to 2 decimals")
+	assert.Equal(t, 7, m.NumTurns)
+	assert.Equal(t, 14, m.ToolCalls)
+}
@@ -23,6 +23,9 @@ type ClaudeRuntime struct{}
 
 func (ClaudeRuntime) Name() string { return "claude" }
 
+// System returns the OTEL GenAI `gen_ai.system` vendor for Claude Code's models.
+func (ClaudeRuntime) System() string { return "anthropic" }
+
 func (ClaudeRuntime) ConfigDir() string { return sandbox.SandboxClaudeConfig }
 
 func (ClaudeRuntime) WorkspaceDir() string { return sandbox.SandboxWorkspace }

@@ -24,9 +24,23 @@ type streamEvent struct {
 }
 
 // assistantMessage contains tool_use blocks from complete assistant messages.
+// Claude Code's stream-json nests the content array (and model) under "message";
+// older/flat shapes put content at the top level. We accept both.
 type assistantMessage struct {
 	Type    string          `json:"type"`
 	Content json.RawMessage `json:"content"`
+	Message struct {
+		Content json.RawMessage `json:"content"`
+		Model   string          `json:"model"`
+	} `json:"message"`
+}
+
+// systemEvent is Claude Code's initial "system"/"init" event, which carries the
+// resolved model name. The result event does not include the model.
+type systemEvent struct {
+	Type    string `json:"type"`
+	Subtype string `json:"subtype"`
+	Model   string `json:"model"`
 }
 
 type contentItem struct {
@@ -55,8 +69,10 @@ type resultEvent struct {
 	NumTurns     int     `json:"num_turns"`
 	TotalCostUSD float64 `json:"total_cost_usd"`
 	Usage        struct {
-		InputTokens  int `json:"input_tokens"`
-		OutputTokens int `json:"output_tokens"`
+		InputTokens              int `json:"input_tokens"`
+		OutputTokens             int `json:"output_tokens"`
+		CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
+		CacheReadInputTokens     int `json:"cache_read_input_tokens"`
 	} `json:"usage"`
 }
 
@@ -91,6 +107,13 @@ func progressParser(r io.Reader, printer *ui.Printer, start time.Time, metrics *
 			continue
 		}
 
+		if evt.Type == "system" {
+			var se systemEvent
+			if err := json.Unmarshal(line, &se); err == nil && se.Model != "" {
+				metrics.Model = se.Model
+			}
+		}
+
 		if evt.Type == "assistant" {
 			parseAssistantToolUse(line, printer, start, metrics, isCI)
 		}
@@ -102,6 +125,8 @@ func progressParser(r io.Reader, printer *ui.Printer, start time.Time, metrics *
 				metrics.TotalCostUSD = re.TotalCostUSD
 				metrics.InputTokens = re.Usage.InputTokens
 				metrics.OutputTokens = re.Usage.OutputTokens
+				metrics.CacheCreationInputTokens = re.Usage.CacheCreationInputTokens
+				metrics.CacheReadInputTokens = re.Usage.CacheReadInputTokens
 			}
 		}
 	}
@@ -113,8 +138,21 @@ func parseAssistantToolUse(line []byte, printer *ui.Printer, start time.Time, me
 		return
 	}
 
+	// Fall back to the assistant message's model when the system init event did
+	// not carry one, so gen_ai.request.model stays populated for all streams.
+	if metrics.Model == "" && msg.Message.Model != "" {
+		metrics.Model = msg.Message.Model
+	}
+
+	// Real Claude Code output nests content under "message"; fall back to the
+	// top-level "content" for older/flat shapes.
+	content := msg.Message.Content
+	if len(content) == 0 {
+		content = msg.Content
+	}
+
 	var items []contentItem
-	if err := json.Unmarshal(msg.Content, &items); err != nil {
+	if err := json.Unmarshal(content, &items); err != nil {
 		return
 	}