Skip to content

Commit 853a28a

Browse files
wesmclaude
andauthored
Codex: strengthen codex FIFO pairing test and add merge guard (#238)
## Summary - Strengthen `TestStreamFormatter_CodexMultiIDSameCommandDeterministicPairing` so it actually detects wrong-ID consumption (previous assertion passed either way) - Add CLAUDE.md rule: never merge PRs on behalf of the user ## Test plan - [x] `go test ./...` passes - [x] New assertion (count=3) fails if FIFO consumes wrong ID, passes with correct FIFO ordering 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 667ddff commit 853a28a

File tree

5 files changed

+116
-20
lines changed

5 files changed

+116
-20
lines changed

CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,5 @@ CLI reads this to find the daemon. If port 7373 is busy, daemon auto-increments.
9797
- No emojis in code or output (except commit messages)
9898
- Never amend commits; always create new commits for fixes
9999
- Never push or pull unless explicitly asked by the user
100+
- **NEVER merge pull requests.** Do not run `gh pr merge` or any equivalent. Only the user merges PRs. This is non-negotiable.
100101
- **NEVER change git branches without explicit user confirmation**. Always ask before switching, creating, or checking out branches. This is non-negotiable.

cmd/roborev/streamfmt.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"io"
88
"strings"
9+
"unicode"
910
)
1011

1112
// streamFormatter wraps an io.Writer to transform raw NDJSON stream output
@@ -177,11 +178,11 @@ func (f *streamFormatter) processCodexItem(eventType string, item *codexItem) {
177178
if eventType != "item.completed" {
178179
return
179180
}
180-
if text := strings.TrimSpace(item.Text); text != "" {
181+
if text := strings.TrimSpace(sanitizeControl(item.Text)); text != "" {
181182
f.writef("%s\n", text)
182183
}
183184
case "command_execution":
184-
cmd := strings.TrimSpace(item.Command)
185+
cmd := strings.TrimSpace(sanitizeControl(item.Command))
185186
if !f.shouldRenderCodexCommand(eventType, item, cmd) {
186187
return
187188
}
@@ -413,6 +414,25 @@ func writerIsTerminal(w io.Writer) bool {
413414
return false
414415
}
415416

417+
// sanitizeControl strips ANSI escape sequences and non-printable control
418+
// characters from s. This prevents terminal injection from untrusted
419+
// model/subprocess output embedded in codex JSONL fields.
420+
func sanitizeControl(s string) string {
421+
s = ansiEscapePattern.ReplaceAllString(s, "")
422+
// Replace newlines/carriage returns with spaces to avoid collapsing words.
423+
s = strings.ReplaceAll(s, "\r\n", " ")
424+
s = strings.ReplaceAll(s, "\n", " ")
425+
s = strings.ReplaceAll(s, "\r", " ")
426+
var b strings.Builder
427+
b.Grow(len(s))
428+
for _, r := range s {
429+
if r == '\t' || !unicode.IsControl(r) {
430+
b.WriteRune(r)
431+
}
432+
}
433+
return b.String()
434+
}
435+
416436
// jsonString extracts a string value from a raw JSON field.
417437
func jsonString(raw json.RawMessage) string {
418438
if raw == nil {

cmd/roborev/streamfmt_test.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,21 @@ func TestStreamFormatter_CodexUpdatedSuppressed(t *testing.T) {
275275
fix.assertEmpty(t)
276276
}
277277

278+
func TestStreamFormatter_CodexSanitizesControlChars(t *testing.T) {
279+
fix := newFixture(true)
280+
281+
// Agent message with ANSI escape and control chars
282+
fix.writeLine(`{"type":"item.completed","item":{"type":"agent_message","text":"\u001b[31mred\u001b[0m and \u0007bell"}}`)
283+
fix.assertContains(t, "red and bell")
284+
fix.assertNotContains(t, "\x1b")
285+
fix.assertNotContains(t, "\x07")
286+
287+
// Command with ANSI escape
288+
fix.writeLine(`{"type":"item.started","item":{"type":"command_execution","command":"bash -lc \u001b[32mls\u001b[0m"}}`)
289+
fix.assertContains(t, "Bash bash -lc ls")
290+
fix.assertNotContains(t, "\x1b")
291+
}
292+
278293
func TestStreamFormatter_CodexLifecycleSuppressed(t *testing.T) {
279294
fix := newFixture(true)
280295
fix.writeLine(`{"type":"thread.started","thread_id":"abc"}`)
@@ -384,23 +399,29 @@ func TestStreamFormatter_CodexMultiIDSameCommandDeterministicPairing(t *testing.
384399
fix := newFixture(true)
385400

386401
// Two started events with same command text but different IDs.
387-
// A command-only completion should consume the first (FIFO), leaving
388-
// the second ID valid for its own ID-only completion.
402+
// A command-only completion should consume the oldest ID (FIFO cmd_A),
403+
// leaving cmd_B valid for its own ID-only completion.
404+
// A final command-only completion with no remaining started counter
405+
// must render, proving state was fully drained.
389406
lines := []string{
390407
`{"type":"item.started","item":{"id":"cmd_A","type":"command_execution","command":"bash -lc ls"}}`,
391408
`{"type":"item.started","item":{"id":"cmd_B","type":"command_execution","command":"bash -lc ls"}}`,
392-
// Command-only completion should consume cmd_A (FIFO), not cmd_B.
409+
// Command-only completion consumes cmd_A (FIFO), counter 2→1.
393410
`{"type":"item.completed","item":{"type":"command_execution","command":"bash -lc ls"}}`,
394-
// ID-only completion for cmd_B should clear state without rendering.
411+
// ID-only completion for cmd_B clears remaining state, counter 1→0.
395412
`{"type":"item.completed","item":{"id":"cmd_B","type":"command_execution"}}`,
413+
// With all started state drained, this completion must render.
414+
// If the wrong ID was consumed above, a stale counter remains and
415+
// this would be suppressed instead.
416+
`{"type":"item.completed","item":{"type":"command_execution","command":"bash -lc ls"}}`,
396417
}
397418

398419
for _, line := range lines {
399420
fix.writeLine(line)
400421
}
401422

402-
// Two started renders, both completions suppressed = exactly 2 lines.
403-
fix.assertCount(t, "Bash bash -lc ls", 2)
423+
// 2 started renders + 1 unpaired completion render = 3.
424+
fix.assertCount(t, "Bash bash -lc ls", 3)
404425
}
405426

406427
func TestStreamFormatter_PartialWrites(t *testing.T) {

internal/daemon/normalize.go

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"encoding/json"
55
"regexp"
66
"strings"
7+
"unicode"
78
)
89

910
// ansiEscapePattern matches ANSI escape sequences (colors, cursor movement, etc.)
@@ -180,27 +181,22 @@ func NormalizeCodexOutput(line string) *OutputLine {
180181
}
181182

182183
if err := json.Unmarshal([]byte(line), &ev); err != nil {
183-
// Not JSON - return as raw text
184-
return &OutputLine{Text: stripANSI(line), Type: "text"}
184+
// Not JSON - return as raw text with full sanitization
185+
return &OutputLine{Text: sanitizeControl(line), Type: "text"}
185186
}
186187

187188
switch ev.Type {
188-
case "item.completed", "item.updated":
189+
case "item.completed":
189190
switch ev.Item.Type {
190191
case "agent_message":
191192
if ev.Item.Text != "" {
192-
text := strings.ReplaceAll(ev.Item.Text, "\n", " ")
193-
text = strings.ReplaceAll(text, "\r", "")
194-
return &OutputLine{Text: text, Type: "text"}
193+
return &OutputLine{Text: sanitizeControl(ev.Item.Text), Type: "text"}
195194
}
196195
case "command_execution":
197196
if ev.Item.Command != "" {
198-
return &OutputLine{Text: "[Command: " + ev.Item.Command + "]", Type: "tool"}
199-
}
200-
if ev.Type == "item.completed" {
201-
return &OutputLine{Text: "[Command completed]", Type: "tool"}
197+
return &OutputLine{Text: "[Command: " + sanitizeControl(ev.Item.Command) + "]", Type: "tool"}
202198
}
203-
return nil
199+
return &OutputLine{Text: "[Command completed]", Type: "tool"}
204200
case "file_change":
205201
return &OutputLine{Text: "[File change]", Type: "tool"}
206202
}
@@ -210,7 +206,7 @@ func NormalizeCodexOutput(line string) *OutputLine {
210206
switch ev.Item.Type {
211207
case "command_execution":
212208
if ev.Item.Command != "" {
213-
return &OutputLine{Text: "[Command: " + ev.Item.Command + "]", Type: "tool"}
209+
return &OutputLine{Text: "[Command: " + sanitizeControl(ev.Item.Command) + "]", Type: "tool"}
214210
}
215211
}
216212
return nil
@@ -274,6 +270,24 @@ func stripANSI(s string) string {
274270
return ansiEscapePattern.ReplaceAllString(s, "")
275271
}
276272

273+
// sanitizeControl strips ANSI escapes and non-printable control characters,
274+
// replacing newlines with spaces to avoid collapsing words. Used for
275+
// untrusted model/subprocess output that reaches terminals.
276+
func sanitizeControl(s string) string {
277+
s = stripANSI(s)
278+
s = strings.ReplaceAll(s, "\r\n", " ")
279+
s = strings.ReplaceAll(s, "\n", " ")
280+
s = strings.ReplaceAll(s, "\r", " ")
281+
var b strings.Builder
282+
b.Grow(len(s))
283+
for _, r := range s {
284+
if r == '\t' || !unicode.IsControl(r) {
285+
b.WriteRune(r)
286+
}
287+
}
288+
return b.String()
289+
}
290+
277291
// isToolCallJSON checks if a line is a tool call JSON object.
278292
// Tool calls have exactly "name" and "arguments" keys.
279293
func isToolCallJSON(line string) bool {

internal/daemon/normalize_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,30 @@ func TestNormalizeCodexOutput(t *testing.T) {
222222
wantText: "Line 1 Line 2",
223223
wantType: "text",
224224
},
225+
{
226+
name: "AgentMessageStripsANSI",
227+
input: `{"type":"item.completed","item":{"type":"agent_message","text":"\u001b[31mred\u001b[0m text"}}`,
228+
wantText: "red text",
229+
wantType: "text",
230+
},
231+
{
232+
name: "AgentMessageStripsControlChars",
233+
input: `{"type":"item.completed","item":{"type":"agent_message","text":"hello\u0007world"}}`,
234+
wantText: "helloworld",
235+
wantType: "text",
236+
},
237+
{
238+
name: "CommandStartedStripsANSI",
239+
input: `{"type":"item.started","item":{"type":"command_execution","command":"bash -lc \u001b[32mls\u001b[0m"}}`,
240+
wantText: "[Command: bash -lc ls]",
241+
wantType: "tool",
242+
},
243+
{
244+
name: "CommandCompletedStripsControlChars",
245+
input: `{"type":"item.completed","item":{"type":"command_execution","command":"ls\u0007\u001b[31m -la"}}`,
246+
wantText: "[Command: ls -la]",
247+
wantType: "tool",
248+
},
225249
{
226250
name: "CommandStarted",
227251
input: `{"type":"item.started","item":{"type":"command_execution","command":"bash -lc ls"}}`,
@@ -245,6 +269,16 @@ func TestNormalizeCodexOutput(t *testing.T) {
245269
input: `{"type":"item.updated","item":{"type":"command_execution"}}`,
246270
wantNil: true,
247271
},
272+
{
273+
name: "CommandUpdatedWithCommand",
274+
input: `{"type":"item.updated","item":{"type":"command_execution","command":"bash -lc ls"}}`,
275+
wantNil: true,
276+
},
277+
{
278+
name: "FileChangeUpdated",
279+
input: `{"type":"item.updated","item":{"type":"file_change"}}`,
280+
wantNil: true,
281+
},
248282
{
249283
name: "FileChange",
250284
input: `{"type":"item.completed","item":{"type":"file_change"}}`,
@@ -289,6 +323,12 @@ func TestNormalizeCodexOutput(t *testing.T) {
289323
wantText: "some plain text output",
290324
wantType: "text",
291325
},
326+
{
327+
name: "NonJSONStripsControlChars",
328+
input: "text with \x07bell and \x1b[31mcolor\x1b[0m",
329+
wantText: "text with bell and color",
330+
wantType: "text",
331+
},
292332
})
293333
}
294334

0 commit comments

Comments
 (0)