Merge pull request #2860 from fullsend-ai/agent/2786-surface-api-errors

waynesun09 · web-flow · commit d0771f704fe5 · 2026-07-01T21:37:02.000Z
fix(#2786): surface agent API errors when Claude Code exits 0
diff --git a/internal/cli/run.go b/internal/cli/run.go
@@ -625,6 +625,7 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep
 	// registered before the post-script and cleanup defers so that — by LIFO
 	// order — it runs last and the summary captures the whole run.
 	var lastExitCode int
+	var transcriptErrorOverride bool
 	rec := telemetry.New(runDir, wTraceID, rootSpanID, agentName, workItemID, runStart)
 	defer func() { rec.Finalize(telemetryExitCode(lastExitCode, runErr)) }()
 
@@ -666,6 +667,10 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep
 				printer.StepWarn("Skipping post-script: agent run failed")
 				return
 			}
+			if transcriptErrorOverride {
+				printer.StepWarn("Skipping post-script: agent reported error via transcript")
+				return
+			}
 			postStart := time.Now()
 			printer.StepStart("Running post-script: " + h.PostScript)
 			postCmd := exec.Command(h.PostScript)
@@ -950,6 +955,7 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep
 
 	for iteration := 1; iteration <= maxIterations; iteration++ {
 		runCount = iteration
+		transcriptErrorOverride = false
 
 		// Each iteration gets its own subdirectory for output and transcripts.
 		iterDir := filepath.Join(runDir, fmt.Sprintf("iteration-%d", iteration))
@@ -1019,12 +1025,26 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep
 		}
 		lastExitCode = exitCode
 
+		// Check the tee'd output.jsonl for is_error:true result events.
+		// Claude Code may exit 0 on API/infrastructure failures (e.g.,
+		// invalid_grant, quota exhaustion) while setting is_error:true in
+		// the transcript. Treat these as failures so downstream gating
+		// (transcript surfacing, post-script skip) can act. See #2786.
+		if exitCode == 0 {
+			outputJSONL := filepath.Join(iterDir, "output.jsonl")
+			if te, ok := tx.ParseTranscriptFile(outputJSONL); ok && te.IsError {
+				printer.StepWarn(fmt.Sprintf("Agent exited with code 0 but transcript contains error: %s", te.ErrorMessage))
+				lastExitCode = 1
+				transcriptErrorOverride = true
+			}
+		}
+
 		printer.Blank()
 		// Non-zero exit is a warning, not a failure — the validation loop is the success gate.
-		if exitCode == 0 {
+		if lastExitCode == 0 {
 			printer.StepDone(fmt.Sprintf("Agent exited with code %d (%.1fs)", exitCode, time.Since(agentStart).Seconds()))
 		} else {
-			printer.StepWarn(fmt.Sprintf("Agent exited with code %d", exitCode))
+			printer.StepWarn(fmt.Sprintf("Agent exited with code %d", lastExitCode))
 		}
 
 		// 9b. Extract output files.
@@ -1114,16 +1134,15 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep
 	rec.SetModel(aggMetrics.Model)
 
 	// 9e-bis. Surface transcript errors in workflow logs (GitHub Actions).
-	// When the agent exits non-zero, parse transcript JSONL files and emit
-	// ::error:: annotations so operators can diagnose failures without
-	// downloading artifacts. See #704.
-	if lastExitCode != 0 {
-		lastIterDir := filepath.Join(runDir, fmt.Sprintf("iteration-%d", runCount))
-		lastTranscriptDir := filepath.Join(lastIterDir, "transcripts")
-		if errorSummaries := tx.ParseTranscriptErrors(lastTranscriptDir); len(errorSummaries) > 0 {
-			printer.StepWarn(fmt.Sprintf("Found %d transcript error(s) — emitting to workflow log", len(errorSummaries)))
-			tx.EmitTranscriptErrors(os.Stderr, errorSummaries)
-		}
+	// Parse transcript JSONL files and emit ::error:: annotations so operators
+	// can diagnose failures without downloading artifacts. This runs
+	// regardless of exit code because Claude Code may exit 0 with
+	// is_error:true on API/infrastructure failures. See #704, #2786.
+	lastIterDir := filepath.Join(runDir, fmt.Sprintf("iteration-%d", runCount))
+	lastTranscriptDir := filepath.Join(lastIterDir, "transcripts")
+	if errorSummaries := tx.ParseTranscriptErrors(lastTranscriptDir); len(errorSummaries) > 0 {
+		printer.StepWarn(fmt.Sprintf("Found %d transcript error(s) — emitting to workflow log", len(errorSummaries)))
+		tx.EmitTranscriptErrors(os.Stderr, errorSummaries)
 	}
 
 	// 9f. Post-agent output scan — redact secrets from extracted output.
diff --git a/internal/runtime/claude.go b/internal/runtime/claude.go
@@ -194,6 +194,10 @@ func (ClaudeRuntime) ParseTranscriptErrors(transcriptDir string) []TranscriptErr
 	return parseTranscriptErrors(transcriptDir)
 }
 
+func (ClaudeRuntime) ParseTranscriptFile(path string) (TranscriptError, bool) {
+	return parseTranscriptFile(path)
+}
+
 func (ClaudeRuntime) EmitTranscriptErrors(w io.Writer, summaries []TranscriptError) {
 	emitTranscriptErrors(w, summaries)
 }
diff --git a/internal/runtime/claude_transcript_test.go b/internal/runtime/claude_transcript_test.go
@@ -246,6 +246,80 @@ func TestEmitTranscriptErrors_NoSummaries(t *testing.T) {
 	}
 }
 
+// TestParseTranscriptFile_APIErrorExitZero covers the scenario from #2786:
+// Claude Code exits 0 with is_error:true and subtype "success" on API errors
+// (e.g., invalid_grant from a stale OIDC token).
+func TestParseTranscriptFile_APIErrorExitZero(t *testing.T) {
+	dir := t.TempDir()
+	// Real-world transcript shape: subtype is "success" but is_error is true.
+	content := `{"type":"system","subtype":"init","session_id":"abc123"}
+{"type":"result","subtype":"success","is_error":true,"result":"API Error: Error code invalid_grant: ID Token issued at 1782810237 is stale to sign-in.","session_id":"abc123"}
+`
+	path := filepath.Join(dir, "output.jsonl")
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	summary, ok := parseTranscriptFile(path)
+	if !ok {
+		t.Fatal("expected result event to be found")
+	}
+	if !summary.IsError {
+		t.Error("expected IsError to be true for API error with exit 0")
+	}
+	if summary.Subtype != "success" {
+		t.Errorf("expected subtype 'success', got %q", summary.Subtype)
+	}
+	if !strings.Contains(summary.ErrorMessage, "invalid_grant") {
+		t.Errorf("expected error message to contain 'invalid_grant', got %q", summary.ErrorMessage)
+	}
+}
+
+// TestParseTranscriptErrors_SurfacesErrorRegardlessOfExitCode verifies that
+// parseTranscriptErrors returns errors from transcripts where is_error:true,
+// which is the key fix from #2786 — errors must be surfaced even when the
+// process exit code was 0.
+func TestParseTranscriptErrors_SurfacesErrorRegardlessOfExitCode(t *testing.T) {
+	dir := t.TempDir()
+
+	// Transcript with is_error:true but subtype "success" (API error scenario).
+	content := `{"type":"result","subtype":"success","is_error":true,"result":"API Error: quota exhausted"}`
+	if err := os.WriteFile(filepath.Join(dir, "agent.jsonl"), []byte(content), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	summaries := parseTranscriptErrors(dir)
+	if len(summaries) != 1 {
+		t.Fatalf("expected 1 error summary, got %d", len(summaries))
+	}
+	if !summaries[0].IsError {
+		t.Error("expected IsError to be true")
+	}
+	if !strings.Contains(summaries[0].ErrorMessage, "quota exhausted") {
+		t.Errorf("unexpected error message: %q", summaries[0].ErrorMessage)
+	}
+}
+
+// TestClaudeRuntime_ParseTranscriptFile verifies the exported method on
+// ClaudeRuntime satisfies the TranscriptHandler interface.
+func TestClaudeRuntime_ParseTranscriptFile(t *testing.T) {
+	dir := t.TempDir()
+	content := `{"type":"result","subtype":"success","is_error":true,"result":"infrastructure error"}`
+	path := filepath.Join(dir, "output.jsonl")
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	var handler TranscriptHandler = ClaudeRuntime{}
+	summary, ok := handler.ParseTranscriptFile(path)
+	if !ok {
+		t.Fatal("expected result event to be found")
+	}
+	if !summary.IsError {
+		t.Error("expected IsError to be true")
+	}
+}
+
 func TestIsResultLine(t *testing.T) {
 	tests := []struct {
 		line string
diff --git a/internal/runtime/transcript.go b/internal/runtime/transcript.go
@@ -9,5 +9,9 @@ type TranscriptHandler interface {
 	ExtractTranscripts(sandboxName, agentLabel, outputDir string) error
 	ExtractDebugLog(sandboxName, localPath, debug string) error
 	ParseTranscriptErrors(transcriptDir string) []TranscriptError
+	// ParseTranscriptFile parses a single JSONL transcript or output file
+	// and returns the last result event, if any. Use this to check a tee'd
+	// output.jsonl for is_error:true without scanning an entire directory.
+	ParseTranscriptFile(path string) (TranscriptError, bool)
 	EmitTranscriptErrors(w io.Writer, summaries []TranscriptError)
 }

Original file line number	Diff line number	Diff line change
`@@ -194,6 +194,10 @@ func (ClaudeRuntime) ParseTranscriptErrors(transcriptDir string) []TranscriptErr`
`194`	`194`	`return parseTranscriptErrors(transcriptDir)`
`195`	`195`	`}`
`196`	`196`
	`197`	`+func (ClaudeRuntime) ParseTranscriptFile(path string) (TranscriptError, bool) {`
	`198`	`+ return parseTranscriptFile(path)`
	`199`	`+}`
	`200`	`+`
`197`	`201`	`func (ClaudeRuntime) EmitTranscriptErrors(w io.Writer, summaries []TranscriptError) {`
`198`	`202`	`emitTranscriptErrors(w, summaries)`
`199`	`203`	`}`