Skip to content

Commit d9b36fe

Browse files
authored
fix(cli): surface cloud-exec stream failures instead of silent success (#804)
## What Cloud `exec` streams stdout/stderr over a WebSocket. When that attach fails (e.g. a 401 on the upgrade), the client recovered the exit code via a status probe and reported a silent `exit 0` with no output — indistinguishable from a command that genuinely produced none. ## Changes - `emit_or_fallback` now carries the disconnect cause in `ExecResult.error_message` (was `None`); it is only reached when the output stream failed. - `StreamManager::start()` surfaces a non-empty `error_message` as an error (exit 1 + message) instead of dropping it. - REST `401`/`403` mapping reads `unauthorized (HTTP 401)` / `forbidden (HTTP 403)` instead of a bare `auth: <body>`. - Adds `debug!`/`trace!` to the previously-uninstrumented WS attach path (connect, per-frame, stdout/stderr, exit-frame, status-probe fallback). ## Verification - Real cloud `exec`: before = silent `exit 0`, empty; after = `exit 1` + clear cause. - Unit reproducer `terminal::tests::stream_manager_surfaces_exec_error_message_as_error` verified RED→GREEN. - 4 WS attach unit tests still pass. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Distinguished error messages for authentication (HTTP 401) versus authorization (HTTP 403) failures * Improved error reporting when process execution streaming is interrupted * **Chores** * Enhanced diagnostic logging for WebSocket connection handling <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 011e52a commit d9b36fe

3 files changed

Lines changed: 93 additions & 4 deletions

File tree

src/boxlite/src/rest/error.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,13 @@ pub(crate) fn map_http_error(status: StatusCode, body: &ErrorModel) -> BoxliteEr
5656
pub(crate) fn map_http_status(status: StatusCode, text: &str) -> BoxliteError {
5757
match status.as_u16() {
5858
404 => BoxliteError::NotFound(text.to_string()),
59-
401 | 403 => BoxliteError::Config(format!("auth: {}", text)),
59+
// Keep the `auth:` prefix (callers key on it) but state the actual
60+
// failure: 401 = credentials rejected (expired, or wrong credential
61+
// type for this endpoint — e.g. cloud exec's WS attach requires an API
62+
// key, not a browser/OIDC token); 403 = authenticated but not allowed
63+
// (often a stale org/path_prefix — `auth login` re-resolves it).
64+
401 => BoxliteError::Config(format!("auth: unauthorized (HTTP 401): {}", text)),
65+
403 => BoxliteError::Config(format!("auth: forbidden (HTTP 403): {}", text)),
6066
// Bare 5xx with no envelope ⇒ an intermediary spoke, not us.
6167
// The most common cause is a proxy / load balancer that
6268
// couldn't reach the destination (Clash returns 502 with

src/boxlite/src/rest/litebox.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -490,8 +490,12 @@ async fn attach_ws(
490490
) {
491491
let path = format!("/boxes/{}/executions/{}/attach", box_id, execution_id);
492492
let stream = match client.connect_ws(&path).await {
493-
Ok(s) => s,
493+
Ok(s) => {
494+
tracing::debug!(path = %path, "WS attach: connected");
495+
s
496+
}
494497
Err(e) => {
498+
tracing::debug!(path = %path, error = %e, "WS attach: connect failed; falling back to status probe (output will not stream)");
495499
emit_or_fallback(
496500
client,
497501
box_id,
@@ -631,15 +635,30 @@ async fn attach_ws_pump(
631635
// idle watchdog for the rest of the session.
632636
first_frame_seen = true;
633637

638+
tracing::trace!(
639+
kind = match &frame {
640+
Message::Binary(_) => "binary",
641+
Message::Text(_) => "text",
642+
Message::Close(_) => "close",
643+
Message::Ping(_) => "ping",
644+
Message::Pong(_) => "pong",
645+
Message::Frame(_) => "raw",
646+
},
647+
len = frame.len(),
648+
"WS attach: frame received",
649+
);
650+
634651
match frame {
635652
Message::Binary(bytes) => {
636653
if let Some((channel, payload)) = bytes.split_first() {
637654
let text = String::from_utf8_lossy(payload).into_owned();
638655
match *channel {
639656
0x01 => {
657+
tracing::trace!(len = text.len(), "WS attach: stdout frame");
640658
let _ = stdout_tx.send(text);
641659
}
642660
0x02 => {
661+
tracing::trace!(len = text.len(), "WS attach: stderr frame");
643662
let _ = stderr_tx.send(text);
644663
}
645664
other => {
@@ -650,6 +669,7 @@ async fn attach_ws_pump(
650669
}
651670
Message::Text(text) => match parse_control_frame(&text) {
652671
ControlFrame::Exit { exit_code } => {
672+
tracing::debug!(exit_code, "WS attach: exit control frame");
653673
let _ = result_tx.send(ExecResult {
654674
exit_code,
655675
error_message: None,
@@ -681,6 +701,10 @@ async fn attach_ws_pump(
681701
// distinguish "exec really finished" from "transient WS drop".
682702
match probe_execution_status(client, box_id, execution_id).await {
683703
ProbeResult::Terminal(result) => {
704+
tracing::debug!(
705+
cause = %disconnect_cause,
706+
"WS attach: disconnected without an exit frame — taking exit code from status probe (any unstreamed stdout/stderr is lost)"
707+
);
684708
let _ = result_tx.send(result);
685709
return;
686710
}
@@ -826,6 +850,7 @@ async fn emit_or_fallback(
826850
result_tx: &mpsc::UnboundedSender<ExecResult>,
827851
cause: String,
828852
) {
853+
tracing::debug!(cause = %cause, "WS attach: emit_or_fallback — recovering exit code from status probe (stdout/stderr not streamed)");
829854
let status_path = format!("/boxes/{}/executions/{}", box_id, execution_id);
830855
let status_probe = tokio::time::timeout(
831856
std::time::Duration::from_secs(5),
@@ -834,9 +859,16 @@ async fn emit_or_fallback(
834859
if let Ok(Ok(info)) = status_probe.await {
835860
match info.status.as_str() {
836861
"completed" | "killed" | "timed_out" => {
862+
// The exec finished server-side, but we only reach
863+
// emit_or_fallback when the output stream failed (never
864+
// connected, or dropped and couldn't reconnect), so stdout/
865+
// stderr were not delivered to the caller. Surface the cause as
866+
// an error_message rather than reporting a clean exit: a caller
867+
// otherwise can't tell "command produced no output" from "we
868+
// lost the output". The real exit code is still preserved.
837869
let _ = result_tx.send(ExecResult {
838870
exit_code: info.exit_code.unwrap_or(-1),
839-
error_message: None,
871+
error_message: Some(cause.clone()),
840872
});
841873
return;
842874
}

src/cli/src/terminal/mod.rs

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ impl<'a> StreamManager<'a> {
191191
h.abort();
192192
}
193193
if io_done {
194-
break exit_status.unwrap().exit_code;
194+
break exit_status.as_ref().unwrap().exit_code;
195195
}
196196
}
197197
Err(e) => {
@@ -232,6 +232,15 @@ impl<'a> StreamManager<'a> {
232232
}
233233
};
234234

235+
// The exec carried a diagnostic (its output stream failed and the exit
236+
// code was recovered out-of-band, or the container init died): surface
237+
// it as an error instead of returning a silent, possibly-misleading
238+
// exit code. Without this the CLI reports success while stdout/stderr
239+
// were never delivered.
240+
if let Some(message) = exit_status.as_ref().and_then(|s| s.error_message.clone()) {
241+
anyhow::bail!("exec did not complete normally: {message}");
242+
}
243+
235244
Ok(exit_code)
236245
}
237246
}
@@ -460,4 +469,46 @@ mod tests {
460469
elapsed,
461470
);
462471
}
472+
473+
/// A result that carries an `error_message` (the exec output stream failed
474+
/// and the exit code was recovered out-of-band, so stdout/stderr were never
475+
/// delivered) must surface as an `Err`, not a silent exit code. Guards the
476+
/// regression where the CLI reported a clean `exit 0` for a cloud exec whose
477+
/// WS attach was rejected 401 and produced no output.
478+
#[test]
479+
fn stream_manager_surfaces_exec_error_message_as_error() {
480+
let rt = tokio::runtime::Builder::new_multi_thread()
481+
.worker_threads(2)
482+
.enable_all()
483+
.build()
484+
.expect("build runtime");
485+
486+
let (mut exec, stdout_tx, stderr_tx, _stdin_rx, result_tx) =
487+
boxlite::Execution::stub("stream-mgr-error-message");
488+
489+
let result = rt.block_on(async {
490+
tokio::spawn(async move {
491+
let _ = result_tx.send(boxlite::ExecResult {
492+
exit_code: 0,
493+
error_message: Some(
494+
"WS connect failed: WS auth rejected (401 Unauthorized)".to_string(),
495+
),
496+
});
497+
drop(stdout_tx);
498+
drop(stderr_tx);
499+
});
500+
StreamManager::new(&mut exec, /*interactive*/ false, /*tty*/ false)
501+
.start()
502+
.await
503+
});
504+
505+
let err = result.expect_err(
506+
"a result carrying error_message must surface as Err, not a silent exit code",
507+
);
508+
let rendered = format!("{err:#}");
509+
assert!(
510+
rendered.contains("401 Unauthorized"),
511+
"surfaced error must include the stream-failure cause, got: {rendered}"
512+
);
513+
}
463514
}

0 commit comments

Comments
 (0)