Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 176 additions & 30 deletions internal/cli/run.go

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions internal/cli/scan_output_telemetry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package cli

import (
"bytes"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/fullsend-ai/fullsend/internal/telemetry"
"github.com/fullsend-ai/fullsend/internal/ui"
)

// TestScanOutputFilesSkipsTelemetryArtifacts pins that the host-side output
// redaction scan does NOT rewrite the telemetry files. The NDJSON file is still
// held open for append by the recorder during the scan, so an in-place rewrite
// would truncate it out from under the open handle; and both files are
// metadata-only by construction, so they don't need redaction. A normal output
// file must still be sanitized.
func TestScanOutputFiles_SkipsTelemetryArtifacts(t *testing.T) {
dir := t.TempDir()
const secret = "Token: ghp_FAKEtesttoken000000000000000000000000\n"

telem := filepath.Join(dir, telemetry.TelemetryFile)
summary := filepath.Join(dir, telemetry.SummaryFile)
normal := filepath.Join(dir, "output.txt")
// A sandbox agent could write a file that merely shares the telemetry name
// under its iteration output; that must still be sanitized (it is not the
// recorder's own artifact).
nested := filepath.Join(dir, "iteration-1", "output", telemetry.TelemetryFile)
require.NoError(t, os.MkdirAll(filepath.Dir(nested), 0o755))
for _, p := range []string{telem, summary, normal, nested} {
require.NoError(t, os.WriteFile(p, []byte(secret), 0o644))
}

err := scanOutputFiles(dir, "trace-id", ui.New(&bytes.Buffer{}))
require.NoError(t, err)

got, err := os.ReadFile(telem)
require.NoError(t, err)
assert.Equal(t, secret, string(got), "the recorder's own run-telemetry.jsonl must be left untouched")

got, err = os.ReadFile(summary)
require.NoError(t, err)
assert.Equal(t, secret, string(got), "the recorder's own run-summary.json must be left untouched")

got, err = os.ReadFile(normal)
require.NoError(t, err)
assert.NotContains(t, string(got), "ghp_FAKEtest", "non-telemetry output must still be sanitized")

got, err = os.ReadFile(nested)
require.NoError(t, err)
assert.NotContains(t, string(got), "ghp_FAKEtest", "an agent file sharing the telemetry name must still be sanitized")
}
195 changes: 195 additions & 0 deletions internal/cli/telemetry_run_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package cli

import (
"fmt"
"regexp"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

agentruntime "github.com/fullsend-ai/fullsend/internal/runtime"
"github.com/fullsend-ai/fullsend/internal/security"
"github.com/fullsend-ai/fullsend/internal/telemetry"
)

func TestTelemetryExitCode(t *testing.T) {
err := fmt.Errorf("boom")
assert.Equal(t, 0, telemetryExitCode(0, nil), "clean run => 0")
assert.Equal(t, 3, telemetryExitCode(3, nil), "agent exit code preserved on success")
// A non-agent failure (e.g. a later step errors) can leave lastExitCode==0;
// never report that as success.
assert.Equal(t, 1, telemetryExitCode(0, err), "lastExitCode 0 + error => 1, never success")
// The real agent infra-failure path: rt.Run returns (-1, err), and runAgent
// now records that -1 instead of masking it as 1.
assert.Equal(t, -1, telemetryExitCode(-1, err), "infra failure (-1) preserved faithfully")
}

// TestTraceIDUnification pins the invariant runAgent relies on: the per-run
// security trace id (a dashed UUID, injected into the sandbox as
// FULLSEND_TRACE_ID) and the W3C telemetry trace id are the SAME underlying
// value — the telemetry id is just the security id with dashes stripped. This
// is what lets one id correlate security findings, telemetry, and child traces.
func TestTraceIDUnification(t *testing.T) {
uuid := security.GenerateTraceID()
require.True(t, security.IsValidTraceID(uuid), "security id must stay a valid dashed UUID for the sandbox")

w := telemetry.TraceIDFromUUID(uuid)
assert.Equal(t, strings.ReplaceAll(uuid, "-", ""), w, "telemetry trace-id is the security id, dash-stripped")
assert.Regexp(t, regexp.MustCompile(`^[0-9a-f]{32}$`), w, "valid 32-hex W3C trace-id")
}

func TestResolveWorkItemID(t *testing.T) {
cases := []struct {
name string
issueKey string
repoFull string
issueNumber string
issueURL string
want string
}{
{
name: "ISSUE_KEY wins over everything",
issueKey: "PROJ-7",
repoFull: "octo/repo",
issueNumber: "9",
issueURL: "https://github.com/octo/repo/issues/9",
want: "PROJ-7",
},
{
name: "repo + number forms canonical github key",
repoFull: "octo/repo",
issueNumber: "2577",
issueURL: "https://github.com/octo/repo/issues/2577",
want: "octo/repo#2577",
},
{
name: "falls back to issue URL when repo missing",
issueURL: "https://github.com/octo/repo/issues/9",
want: "https://github.com/octo/repo/issues/9",
},
{
name: "falls back to bare issue number",
issueNumber: "42",
want: "42",
},
{
name: "unknown when nothing is set",
want: "unknown",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// Set all four explicitly (empty == unset) to isolate from ambient env.
t.Setenv("ISSUE_KEY", tc.issueKey)
t.Setenv("REPO_FULL_NAME", tc.repoFull)
t.Setenv("ISSUE_NUMBER", tc.issueNumber)
t.Setenv("GITHUB_ISSUE_URL", tc.issueURL)
assert.Equal(t, tc.want, resolveWorkItemID())
})
}
}

func TestChildScriptEnv_AppendsTraceparentOnce(t *testing.T) {
t.Setenv("FULLSEND_TEST_MARKER", "present")
const tp = "00-4f3a9c1b2d8e4a7c9f0b1e2d3c4a5b6d-a1b2c3d4e5f60718-01"

env := childScriptEnv(map[string]string{"FOO": "bar"}, tp)

traceparents, hasFoo, hasMarker := 0, false, false
for _, e := range env {
switch {
case strings.HasPrefix(e, "TRACEPARENT="):
traceparents++
assert.Equal(t, "TRACEPARENT="+tp, e)
case e == "FOO=bar":
hasFoo = true
case e == "FULLSEND_TEST_MARKER=present":
hasMarker = true
}
}
assert.Equal(t, 1, traceparents, "exactly one TRACEPARENT entry")
assert.True(t, hasFoo, "RunnerEnv must be preserved")
assert.True(t, hasMarker, "process environment must be preserved")
}

func TestChildScriptEnv_EmptyTraceparentOmitted(t *testing.T) {
env := childScriptEnv(map[string]string{"FOO": "bar"}, "")
for _, e := range env {
assert.False(t, strings.HasPrefix(e, "TRACEPARENT="), "no empty TRACEPARENT entry when disabled")
}
}

func TestAgentSpanEndAttrs(t *testing.T) {
var m agentruntime.RunMetrics
m.Model = "claude-opus-4-6"
m.InputTokens = 11
m.OutputTokens = 1505
m.CacheCreationInputTokens = 38832
m.CacheReadInputTokens = 109938
m.TotalCostUSD = 0.335349
m.ToolCalls.Store(11)

a := agentSpanEndAttrs(2, 0, "anthropic", &m)
assert.Equal(t, 2, a["iteration"])
assert.Equal(t, 0, a["exit_code"])
assert.Equal(t, "anthropic", a["gen_ai.system"], "gen_ai.system is sourced from the runtime, not hardcoded")
assert.Equal(t, "claude-opus-4-6", a["gen_ai.request.model"])
assert.Equal(t, 11, a["gen_ai.usage.input_tokens"])
assert.Equal(t, 1505, a["gen_ai.usage.output_tokens"])
assert.Equal(t, 38832, a["gen_ai.usage.cache_creation_input_tokens"])
assert.Equal(t, 109938, a["gen_ai.usage.cache_read_input_tokens"])
assert.Equal(t, 0.34, a["fullsend.cost_usd"], "cost rounded to cents")
assert.Equal(t, 11, a["fullsend.tool_calls"])
}

func TestAggregateRunMetrics(t *testing.T) {
var agg aggregateMetrics

var m1 agentruntime.RunMetrics
m1.NumTurns, m1.TotalCostUSD = 5, 0.10
m1.InputTokens, m1.OutputTokens = 10, 100
m1.CacheCreationInputTokens, m1.CacheReadInputTokens = 1000, 5000
m1.ToolCalls.Store(3)
m1.Model = "claude-opus-4-6"
aggregateRunMetrics(&agg, &m1, 1)

var m2 agentruntime.RunMetrics // second iteration, no model reported
m2.NumTurns, m2.TotalCostUSD = 2, 0.05
m2.InputTokens, m2.OutputTokens = 4, 40
m2.CacheCreationInputTokens, m2.CacheReadInputTokens = 200, 900
m2.ToolCalls.Store(2)
aggregateRunMetrics(&agg, &m2, 2)

assert.Equal(t, 7, agg.NumTurns)
assert.InDelta(t, 0.15, agg.TotalCostUSD, 1e-9)
assert.Equal(t, 14, agg.TokenUsage.Input)
assert.Equal(t, 140, agg.TokenUsage.Output)
assert.Equal(t, 1200, agg.TokenUsage.CacheCreation)
assert.Equal(t, 5900, agg.TokenUsage.CacheRead)
assert.Equal(t, 5, agg.ToolCalls)
assert.Equal(t, 2, agg.Iterations)
assert.Equal(t, "claude-opus-4-6", agg.Model, "last non-empty model is retained")
}

func TestToTelemetryMetrics(t *testing.T) {
var agg aggregateMetrics
agg.NumTurns = 7
agg.TotalCostUSD = 0.24261625
agg.TokenUsage.Input = 18432
agg.TokenUsage.Output = 2901
agg.TokenUsage.CacheCreation = 8000
agg.TokenUsage.CacheRead = 50000
agg.ToolCalls = 14
agg.Iterations = 3

m := toTelemetryMetrics(agg)
assert.Equal(t, 18432, m.InputTokens, "input must map from TokenUsage.Input")
assert.Equal(t, 2901, m.OutputTokens, "output must map from TokenUsage.Output")
assert.Equal(t, 8000, m.CacheCreationInputTokens, "cache_creation must map from TokenUsage.CacheCreation")
assert.Equal(t, 50000, m.CacheReadInputTokens, "cache_read must map from TokenUsage.CacheRead")
assert.InDelta(t, 0.24, m.TotalCostUSD, 1e-9, "cost rounded to 2 decimals")
assert.Equal(t, 7, m.NumTurns)
assert.Equal(t, 14, m.ToolCalls)
}
3 changes: 3 additions & 0 deletions internal/runtime/claude.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ type ClaudeRuntime struct{}

func (ClaudeRuntime) Name() string { return "claude" }

// System returns the OTEL GenAI `gen_ai.system` vendor for Claude Code's models.
func (ClaudeRuntime) System() string { return "anthropic" }

func (ClaudeRuntime) ConfigDir() string { return sandbox.SandboxClaudeConfig }

func (ClaudeRuntime) WorkspaceDir() string { return sandbox.SandboxWorkspace }
Expand Down
44 changes: 41 additions & 3 deletions internal/runtime/claude_progress.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,23 @@ type streamEvent struct {
}

// assistantMessage contains tool_use blocks from complete assistant messages.
// Claude Code's stream-json nests the content array (and model) under "message";
// older/flat shapes put content at the top level. We accept both.
type assistantMessage struct {
Type string `json:"type"`
Content json.RawMessage `json:"content"`
Message struct {
Content json.RawMessage `json:"content"`
Model string `json:"model"`
} `json:"message"`
}

// systemEvent is Claude Code's initial "system"/"init" event, which carries the
// resolved model name. The result event does not include the model.
type systemEvent struct {
Type string `json:"type"`
Subtype string `json:"subtype"`
Model string `json:"model"`
}

type contentItem struct {
Expand Down Expand Up @@ -55,8 +69,10 @@ type resultEvent struct {
NumTurns int `json:"num_turns"`
TotalCostUSD float64 `json:"total_cost_usd"`
Usage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
CacheReadInputTokens int `json:"cache_read_input_tokens"`
} `json:"usage"`
}

Expand Down Expand Up @@ -91,6 +107,13 @@ func progressParser(r io.Reader, printer *ui.Printer, start time.Time, metrics *
continue
}

if evt.Type == "system" {
var se systemEvent
if err := json.Unmarshal(line, &se); err == nil && se.Model != "" {
metrics.Model = se.Model
}
}

if evt.Type == "assistant" {
parseAssistantToolUse(line, printer, start, metrics, isCI)
}
Expand All @@ -102,6 +125,8 @@ func progressParser(r io.Reader, printer *ui.Printer, start time.Time, metrics *
metrics.TotalCostUSD = re.TotalCostUSD
metrics.InputTokens = re.Usage.InputTokens
metrics.OutputTokens = re.Usage.OutputTokens
metrics.CacheCreationInputTokens = re.Usage.CacheCreationInputTokens
metrics.CacheReadInputTokens = re.Usage.CacheReadInputTokens
}
}
}
Expand All @@ -113,8 +138,21 @@ func parseAssistantToolUse(line []byte, printer *ui.Printer, start time.Time, me
return
}

// Fall back to the assistant message's model when the system init event did
// not carry one, so gen_ai.request.model stays populated for all streams.
if metrics.Model == "" && msg.Message.Model != "" {
metrics.Model = msg.Message.Model
}

// Real Claude Code output nests content under "message"; fall back to the
// top-level "content" for older/flat shapes.
content := msg.Message.Content
if len(content) == 0 {
content = msg.Content
}

var items []contentItem
if err := json.Unmarshal(msg.Content, &items); err != nil {
if err := json.Unmarshal(content, &items); err != nil {
return
}

Expand Down
Loading
Loading