@@ -147,18 +147,22 @@ mod tests {
147147 std:: env:: var ( "MCP_LOADTEST_PYTHON" ) . unwrap_or_else ( |_| "python" . to_string ( ) )
148148 }
149149
150- /// Cancelling the token must stop the pump **promptly** and **flush the
151- /// file before exiting**, even while the source is still alive ( the
152- /// shutdown / Drop path) .
150+ /// Cancelling the token must stop the pump **promptly** even while the
151+ /// source is still alive ( the shutdown / Drop path), with the captured
152+ /// file left intact .
153153 ///
154- /// The child writes one stderr line, flushes, then sleeps 5s without
155- /// emitting more — so the pump is genuinely blocked in `next_line()` (not
156- /// at EOF) when we cancel. We cancel ~150ms in, assert the `JoinHandle`
157- /// resolves in well under 2s, and assert the early line was flushed to the
158- /// file. The child's 5s sleep is never awaited: we explicitly `start_kill`
159- /// + bounded-`wait` it so it's reaped immediately instead of letting the
160- /// runtime block on it at teardown (the old 60s-`ping` child did exactly
161- /// that, making this test take ~59s).
154+ /// The child writes one stderr line, flushes, then sleeps 30s without
155+ /// emitting more — so the pump stays genuinely blocked in `next_line()`
156+ /// (not at EOF) the whole time. Rather than a fixed sleep before
157+ /// cancelling (racy: on a slow/loaded CI runner the child + interpreter
158+ /// may not have emitted the line inside a fixed window, leaving the file
159+ /// empty at cancel — the original macOS/Windows CI flake), we **poll the
160+ /// file until the line is observed**. `tokio::fs::File` is unbuffered, so
161+ /// the line is visible the instant the pump's read arm writes it. Only
162+ /// then do we cancel and assert the `JoinHandle` resolves far inside the
163+ /// child's 30s lifetime (a broken cancel would block ~30s). The child is
164+ /// reaped explicitly (`start_kill` + bounded `wait`) so the runtime never
165+ /// blocks on its sleep at teardown (that original bug made this ~59s).
162166 #[ tokio:: test]
163167 async fn cancel_stops_pump_while_child_alive ( ) {
164168 let log = temp_log ( "cancel" ) ;
@@ -167,12 +171,12 @@ mod tests {
167171 . expect ( "create temp log" ) ;
168172
169173 // Stdlib-only: emit one stderr line + flush, then stay alive (no more
170- // output) for 5s . Cross-platform via the resolved interpreter.
174+ // output) for 30s . Cross-platform via the resolved interpreter.
171175 let mut child = tokio:: process:: Command :: new ( python ( ) )
172176 . args ( [
173177 "-c" ,
174178 "import sys,time; sys.stderr.write('early\\ n'); \
175- sys.stderr.flush(); time.sleep(5 )",
179+ sys.stderr.flush(); time.sleep(30 )",
176180 ] )
177181 . stderr ( std:: process:: Stdio :: piped ( ) )
178182 . stdout ( std:: process:: Stdio :: null ( ) )
@@ -185,24 +189,48 @@ mod tests {
185189 let cancel = CancellationToken :: new ( ) ;
186190 let handle = spawn_stderr_pump ( stderr, file, false , cancel. clone ( ) ) ;
187191
188- // Let the pump read+write the early line, then cancel while the child
189- // is still sleeping (source alive, pump parked in next_line()).
190- tokio:: time:: sleep ( Duration :: from_millis ( 150 ) ) . await ;
191- cancel. cancel ( ) ;
192+ // Deterministically wait until the pump has actually read+written the
193+ // line (child still sleeping → no EOF; this proves the alive-source
194+ // read path). Generous bound tolerates slow-CI process startup;
195+ // normally completes in well under 100ms.
196+ let observed = {
197+ let deadline = std:: time:: Instant :: now ( ) + Duration :: from_secs ( 30 ) ;
198+ loop {
199+ if tokio:: fs:: read_to_string ( & log)
200+ . await
201+ . map ( |c| c. contains ( "early" ) )
202+ . unwrap_or ( false )
203+ {
204+ break true ;
205+ }
206+ if std:: time:: Instant :: now ( ) >= deadline {
207+ break false ;
208+ }
209+ tokio:: time:: sleep ( Duration :: from_millis ( 20 ) ) . await ;
210+ }
211+ } ;
212+ assert ! (
213+ observed,
214+ "pump never wrote the child's stderr line to the file \
215+ (real pump bug, not a timing flake)"
216+ ) ;
192217
193- // Prompt stop: the handle must resolve far inside 2s after cancel.
194- tokio:: time:: timeout ( Duration :: from_secs ( 2 ) , handle)
218+ // Source is still alive (child sleeping 30s). Cancelling must stop the
219+ // pump promptly: a broken cancel would leave the handle pending until
220+ // the child's 30s sleep ends, so 5s is unambiguous yet CI-tolerant.
221+ cancel. cancel ( ) ;
222+ tokio:: time:: timeout ( Duration :: from_secs ( 5 ) , handle)
195223 . await
196- . expect ( "cancel should stop the pump well within 2s " )
224+ . expect ( "cancel must stop the pump within 5s (source still alive) " )
197225 . expect ( "pump task panicked" ) ;
198226
199- // Cancel arm must have flushed the buffered line before breaking .
227+ // The captured line survived the cancel/flush exit path .
200228 let contents = tokio:: fs:: read_to_string ( & log)
201229 . await
202230 . expect ( "read captured log" ) ;
203231 assert ! (
204232 contents. contains( "early" ) ,
205- "cancel path must flush the buffered line before exiting , got: {contents:?}"
233+ "cancel path must preserve the captured line, got: {contents:?}"
206234 ) ;
207235
208236 // Reap the still-sleeping child now (don't leave it for the runtime to
0 commit comments