Skip to content

Commit 4ed745e

Browse files
fix(test): deflake stderr_pump + host_guard on CI
macOS/Windows CI failed on two timing-fragile tests this PR added; ubuntu (faster runner) masked both. No production code changed. - cancel_stops_pump_while_child_alive: cancelled on a fixed 150ms sleep, but a cold interpreter on a loaded CI runner hadn't emitted the line yet, so the file was empty at cancel. Now polls the (unbuffered) file until the line is observed, then cancels and asserts the handle resolves well inside the child's now-30s lifetime — deterministic regardless of runner speed. - run_allows_loopback_when_escape_hatch_set: 10s LISTENING wait was too tight for a cold python stdlib http.server on a loaded macOS runner (~10s observed; ~2s locally). Bumped to 30s. Local ci-checks.sh green; targeted tests pass in 0.2s / 1.3s.
1 parent 17eb56c commit 4ed745e

2 files changed

Lines changed: 54 additions & 23 deletions

File tree

crates/mcp-loadtest/src/protocol/transport/stderr_pump.rs

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -147,18 +147,22 @@ mod tests {
147147
std::env::var("MCP_LOADTEST_PYTHON").unwrap_or_else(|_| "python".to_string())
148148
}
149149

150-
/// Cancelling the token must stop the pump **promptly** and **flush the
151-
/// file before exiting**, even while the source is still alive (the
152-
/// shutdown / Drop path).
150+
/// Cancelling the token must stop the pump **promptly** even while the
151+
/// source is still alive (the shutdown / Drop path), with the captured
152+
/// file left intact.
153153
///
154-
/// The child writes one stderr line, flushes, then sleeps 5s without
155-
/// emitting more — so the pump is genuinely blocked in `next_line()` (not
156-
/// at EOF) when we cancel. We cancel ~150ms in, assert the `JoinHandle`
157-
/// resolves in well under 2s, and assert the early line was flushed to the
158-
/// file. The child's 5s sleep is never awaited: we explicitly `start_kill`
159-
/// + bounded-`wait` it so it's reaped immediately instead of letting the
160-
/// runtime block on it at teardown (the old 60s-`ping` child did exactly
161-
/// that, making this test take ~59s).
154+
/// The child writes one stderr line, flushes, then sleeps 30s without
155+
/// emitting more — so the pump stays genuinely blocked in `next_line()`
156+
/// (not at EOF) the whole time. Rather than a fixed sleep before
157+
/// cancelling (racy: on a slow/loaded CI runner the child + interpreter
158+
/// may not have emitted the line inside a fixed window, leaving the file
159+
/// empty at cancel — the original macOS/Windows CI flake), we **poll the
160+
/// file until the line is observed**. `tokio::fs::File` is unbuffered, so
161+
/// the line is visible the instant the pump's read arm writes it. Only
162+
/// then do we cancel and assert the `JoinHandle` resolves far inside the
163+
/// child's 30s lifetime (a broken cancel would block ~30s). The child is
164+
/// reaped explicitly (`start_kill` + bounded `wait`) so the runtime never
165+
/// blocks on its sleep at teardown (that original bug made this ~59s).
162166
#[tokio::test]
163167
async fn cancel_stops_pump_while_child_alive() {
164168
let log = temp_log("cancel");
@@ -167,12 +171,12 @@ mod tests {
167171
.expect("create temp log");
168172

169173
// Stdlib-only: emit one stderr line + flush, then stay alive (no more
170-
// output) for 5s. Cross-platform via the resolved interpreter.
174+
// output) for 30s. Cross-platform via the resolved interpreter.
171175
let mut child = tokio::process::Command::new(python())
172176
.args([
173177
"-c",
174178
"import sys,time; sys.stderr.write('early\\n'); \
175-
sys.stderr.flush(); time.sleep(5)",
179+
sys.stderr.flush(); time.sleep(30)",
176180
])
177181
.stderr(std::process::Stdio::piped())
178182
.stdout(std::process::Stdio::null())
@@ -185,24 +189,48 @@ mod tests {
185189
let cancel = CancellationToken::new();
186190
let handle = spawn_stderr_pump(stderr, file, false, cancel.clone());
187191

188-
// Let the pump read+write the early line, then cancel while the child
189-
// is still sleeping (source alive, pump parked in next_line()).
190-
tokio::time::sleep(Duration::from_millis(150)).await;
191-
cancel.cancel();
192+
// Deterministically wait until the pump has actually read+written the
193+
// line (child still sleeping → no EOF; this proves the alive-source
194+
// read path). Generous bound tolerates slow-CI process startup;
195+
// normally completes in well under 100ms.
196+
let observed = {
197+
let deadline = std::time::Instant::now() + Duration::from_secs(30);
198+
loop {
199+
if tokio::fs::read_to_string(&log)
200+
.await
201+
.map(|c| c.contains("early"))
202+
.unwrap_or(false)
203+
{
204+
break true;
205+
}
206+
if std::time::Instant::now() >= deadline {
207+
break false;
208+
}
209+
tokio::time::sleep(Duration::from_millis(20)).await;
210+
}
211+
};
212+
assert!(
213+
observed,
214+
"pump never wrote the child's stderr line to the file \
215+
(real pump bug, not a timing flake)"
216+
);
192217

193-
// Prompt stop: the handle must resolve far inside 2s after cancel.
194-
tokio::time::timeout(Duration::from_secs(2), handle)
218+
// Source is still alive (child sleeping 30s). Cancelling must stop the
219+
// pump promptly: a broken cancel would leave the handle pending until
220+
// the child's 30s sleep ends, so 5s is unambiguous yet CI-tolerant.
221+
cancel.cancel();
222+
tokio::time::timeout(Duration::from_secs(5), handle)
195223
.await
196-
.expect("cancel should stop the pump well within 2s")
224+
.expect("cancel must stop the pump within 5s (source still alive)")
197225
.expect("pump task panicked");
198226

199-
// Cancel arm must have flushed the buffered line before breaking.
227+
// The captured line survived the cancel/flush exit path.
200228
let contents = tokio::fs::read_to_string(&log)
201229
.await
202230
.expect("read captured log");
203231
assert!(
204232
contents.contains("early"),
205-
"cancel path must flush the buffered line before exiting, got: {contents:?}"
233+
"cancel path must preserve the captured line, got: {contents:?}"
206234
);
207235

208236
// Reap the still-sleeping child now (don't leave it for the runtime to

crates/mcp-loadtest/tests/host_guard.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,10 @@ async fn run_allows_loopback_when_escape_hatch_set() {
135135

136136
let stdout = child.stdout.take().expect("child stdout");
137137
let mut lines = BufReader::new(stdout).lines();
138-
let listening = tokio::time::timeout(Duration::from_secs(10), lines.next_line())
138+
// 30s, not 10s: a cold Python + stdlib http.server bind on a loaded
139+
// macOS GitHub runner can edge past 10s (observed on CI); locally this
140+
// resolves in ~2s. Generous bound trades nothing for CI determinism.
141+
let listening = tokio::time::timeout(Duration::from_secs(30), lines.next_line())
139142
.await
140143
.expect("timed out waiting for LISTENING line")
141144
.expect("read stdout")

0 commit comments

Comments
 (0)