Skip to content

Commit 9a58f76

Browse files
authored
Add CLI integration coverage for stdin stream routing/race invariants (#11846)
Add integration coverage for stdin stream routing and race invariants
1 parent f9da48f commit 9a58f76

File tree

7 files changed

+585
-15
lines changed

7 files changed

+585
-15
lines changed
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import { runStreamCase, StreamEvent } from "../lib/stream-harness"
2+
3+
const START_PROMPT =
4+
'Run exactly this command and do not summarize until it finishes: sleep 12 && echo "done". After it finishes, reply with exactly "done".'
5+
const FOLLOWUP_PROMPT = 'After cancellation, reply with only "RACE-OK".'
6+
7+
async function main() {
8+
const startRequestId = `start-${Date.now()}`
9+
const cancelRequestId = `cancel-${Date.now()}`
10+
const followupRequestId = `message-${Date.now()}`
11+
const shutdownRequestId = `shutdown-${Date.now()}`
12+
13+
let initSeen = false
14+
let sentCancelAndFollowup = false
15+
let sentShutdown = false
16+
let cancelDoneCode: string | undefined
17+
let followupDoneCode: string | undefined
18+
let followupResult = ""
19+
let sawFollowupUserTurn = false
20+
let sawMisroutedToolResult = false
21+
let sawMessageControlError = false
22+
23+
await runStreamCase({
24+
onEvent(event: StreamEvent, context) {
25+
if (event.type === "system" && event.subtype === "init" && !initSeen) {
26+
initSeen = true
27+
context.sendCommand({
28+
command: "start",
29+
requestId: startRequestId,
30+
prompt: START_PROMPT,
31+
})
32+
return
33+
}
34+
35+
if (event.type === "control" && event.subtype === "error") {
36+
if (event.requestId === followupRequestId) {
37+
sawMessageControlError = true
38+
}
39+
throw new Error(
40+
`received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
41+
)
42+
}
43+
44+
if (
45+
!sentCancelAndFollowup &&
46+
event.type === "tool_use" &&
47+
event.requestId === startRequestId &&
48+
event.subtype === "command"
49+
) {
50+
context.sendCommand({
51+
command: "cancel",
52+
requestId: cancelRequestId,
53+
})
54+
context.sendCommand({
55+
command: "message",
56+
requestId: followupRequestId,
57+
prompt: FOLLOWUP_PROMPT,
58+
})
59+
sentCancelAndFollowup = true
60+
return
61+
}
62+
63+
if (
64+
event.type === "control" &&
65+
event.command === "cancel" &&
66+
event.subtype === "done" &&
67+
event.requestId === cancelRequestId
68+
) {
69+
cancelDoneCode = event.code
70+
return
71+
}
72+
73+
if (
74+
event.type === "control" &&
75+
event.command === "message" &&
76+
event.subtype === "done" &&
77+
event.requestId === followupRequestId
78+
) {
79+
followupDoneCode = event.code
80+
return
81+
}
82+
83+
if (
84+
event.type === "tool_result" &&
85+
event.requestId === followupRequestId &&
86+
typeof event.content === "string" &&
87+
event.content.includes("<user_message>")
88+
) {
89+
sawMisroutedToolResult = true
90+
return
91+
}
92+
93+
if (event.type === "user" && event.requestId === followupRequestId) {
94+
sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("RACE-OK")
95+
return
96+
}
97+
98+
if (event.type !== "result" || event.done !== true || event.requestId !== followupRequestId) {
99+
return
100+
}
101+
102+
followupResult = event.content ?? ""
103+
104+
if (followupResult.trim().length === 0) {
105+
throw new Error("follow-up after cancel produced an empty result")
106+
}
107+
if (cancelDoneCode !== "cancel_requested") {
108+
throw new Error(
109+
`cancel done code mismatch; expected cancel_requested, got "${cancelDoneCode ?? "none"}"`,
110+
)
111+
}
112+
if (followupDoneCode !== "responded" && followupDoneCode !== "queued") {
113+
throw new Error(
114+
`unexpected follow-up done code after cancel race; expected responded|queued, got "${followupDoneCode ?? "none"}"`,
115+
)
116+
}
117+
if (sawMessageControlError) {
118+
throw new Error("follow-up message emitted control error in cancel recovery race")
119+
}
120+
if (sawMisroutedToolResult) {
121+
throw new Error(
122+
"follow-up message was misrouted into tool_result (<user_message>) in cancel recovery race",
123+
)
124+
}
125+
if (!sawFollowupUserTurn) {
126+
throw new Error("follow-up after cancel did not appear as a normal user turn")
127+
}
128+
129+
console.log(`[PASS] cancel done code: "${cancelDoneCode}"`)
130+
console.log(`[PASS] follow-up done code: "${followupDoneCode}"`)
131+
console.log(`[PASS] follow-up user turn observed: ${sawFollowupUserTurn}`)
132+
console.log(`[PASS] follow-up result: "${followupResult}"`)
133+
134+
if (!sentShutdown) {
135+
context.sendCommand({
136+
command: "shutdown",
137+
requestId: shutdownRequestId,
138+
})
139+
sentShutdown = true
140+
}
141+
},
142+
onTimeoutMessage() {
143+
return [
144+
"timed out waiting for cancel-message-recovery-race validation",
145+
`initSeen=${initSeen}`,
146+
`sentCancelAndFollowup=${sentCancelAndFollowup}`,
147+
`cancelDoneCode=${cancelDoneCode ?? "none"}`,
148+
`followupDoneCode=${followupDoneCode ?? "none"}`,
149+
`sawFollowupUserTurn=${sawFollowupUserTurn}`,
150+
`sawMisroutedToolResult=${sawMisroutedToolResult}`,
151+
`sawMessageControlError=${sawMessageControlError}`,
152+
`haveFollowupResult=${Boolean(followupResult)}`,
153+
].join(" ")
154+
},
155+
})
156+
}
157+
158+
main().catch((error) => {
159+
console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
160+
process.exit(1)
161+
})
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import { runStreamCase, StreamEvent } from "../lib/stream-harness"
2+
3+
async function main() {
4+
const cancelRequestId = `cancel-${Date.now()}`
5+
const shutdownRequestId = `shutdown-${Date.now()}`
6+
7+
let initSeen = false
8+
let cancelAckSeen = false
9+
let cancelDoneSeen = false
10+
let shutdownSent = false
11+
12+
await runStreamCase({
13+
onEvent(event: StreamEvent, context) {
14+
if (event.type === "system" && event.subtype === "init" && !initSeen) {
15+
initSeen = true
16+
context.sendCommand({
17+
command: "cancel",
18+
requestId: cancelRequestId,
19+
})
20+
return
21+
}
22+
23+
if (
24+
event.type === "control" &&
25+
event.subtype === "ack" &&
26+
event.command === "cancel" &&
27+
event.requestId === cancelRequestId
28+
) {
29+
cancelAckSeen = true
30+
return
31+
}
32+
33+
if (
34+
event.type === "control" &&
35+
event.subtype === "done" &&
36+
event.command === "cancel" &&
37+
event.requestId === cancelRequestId
38+
) {
39+
cancelDoneSeen = true
40+
41+
if (event.code !== "no_active_task") {
42+
throw new Error(`cancel without task should return no_active_task, got "${event.code ?? "none"}"`)
43+
}
44+
if (event.success !== true) {
45+
throw new Error("cancel without task should be treated as successful no-op")
46+
}
47+
48+
if (!shutdownSent) {
49+
context.sendCommand({
50+
command: "shutdown",
51+
requestId: shutdownRequestId,
52+
})
53+
shutdownSent = true
54+
}
55+
return
56+
}
57+
58+
if (event.type === "control" && event.subtype === "error") {
59+
throw new Error(
60+
`unexpected control error command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
61+
)
62+
}
63+
},
64+
onTimeoutMessage() {
65+
return `timed out waiting for cancel-without-active-task validation (initSeen=${initSeen}, cancelAckSeen=${cancelAckSeen}, cancelDoneSeen=${cancelDoneSeen}, shutdownSent=${shutdownSent})`
66+
},
67+
})
68+
}
69+
70+
main().catch((error) => {
71+
console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
72+
process.exit(1)
73+
})

apps/cli/scripts/integration/cases/followup-after-completion.ts

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,9 @@ function parseEventContent(text: string | undefined): string {
77
return typeof text === "string" ? text : ""
88
}
99

10-
function validateFollowupAnswer(text: string): void {
11-
const normalized = text.toLowerCase()
12-
const containsExpected = /\b6\b/.test(normalized) || normalized.includes("six")
13-
const containsOldAnswer = /\b1\+1\b/.test(normalized) || /\b2\b/.test(normalized)
14-
const containsQuestionReference = normalized.includes("3+3")
15-
16-
if (!containsExpected) {
17-
throw new Error(`follow-up result did not answer the follow-up question; result="${text}"`)
18-
}
19-
20-
if (!containsQuestionReference && containsOldAnswer && !containsExpected) {
21-
throw new Error(`follow-up result appears anchored to first question; result="${text}"`)
10+
function validateFollowupResult(text: string): void {
11+
if (text.trim().length === 0) {
12+
throw new Error("follow-up produced an empty result")
2213
}
2314
}
2415

@@ -32,6 +23,9 @@ async function main() {
3223
let sentShutdown = false
3324
let firstResult = ""
3425
let followupResult = ""
26+
let followupDoneCode: string | undefined
27+
let sawFollowupUserTurn = false
28+
let sawMisroutedToolResult = false
3529

3630
await runStreamCase({
3731
onEvent(event: StreamEvent, context) {
@@ -52,6 +46,31 @@ async function main() {
5246
}
5347

5448
if (event.type !== "result" || event.done !== true) {
49+
if (
50+
event.type === "control" &&
51+
event.requestId === followupRequestId &&
52+
event.command === "message" &&
53+
event.subtype === "done"
54+
) {
55+
followupDoneCode = event.code
56+
return
57+
}
58+
59+
if (
60+
event.type === "tool_result" &&
61+
event.requestId === followupRequestId &&
62+
typeof event.content === "string" &&
63+
event.content.includes("<user_message>")
64+
) {
65+
sawMisroutedToolResult = true
66+
return
67+
}
68+
69+
if (event.type === "user" && event.requestId === followupRequestId) {
70+
sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3")
71+
return
72+
}
73+
5574
return
5675
}
5776

@@ -77,7 +96,22 @@ async function main() {
7796
}
7897

7998
followupResult = parseEventContent(event.content)
80-
validateFollowupAnswer(followupResult)
99+
validateFollowupResult(followupResult)
100+
101+
if (followupDoneCode !== "responded") {
102+
throw new Error(
103+
`follow-up message was not routed as ask response; code="${followupDoneCode ?? "none"}"`,
104+
)
105+
}
106+
107+
if (!sawFollowupUserTurn) {
108+
throw new Error("follow-up did not appear as a normal user turn in stream output")
109+
}
110+
111+
if (sawMisroutedToolResult) {
112+
throw new Error("follow-up message was misrouted into tool_result (<user_message>), old bug reproduced")
113+
}
114+
81115
console.log(`[PASS] first result="${firstResult}"`)
82116
console.log(`[PASS] follow-up result="${followupResult}"`)
83117

0 commit comments

Comments
 (0)