spiceai · lukekim · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "spiceio"
-version = "0.5.3"
+version = "0.5.4"
 edition = "2024"
 description = "S3-compatible API proxy to SMB file shares"
 license = "Apache-2.0"

diff --git a/scripts/bench-live.sh b/scripts/bench-live.sh
@@ -193,18 +193,27 @@ bench_concurrent_read() {
 # and runs the same dd-based write/read tests. Establishes the hard
 # ceiling for what the link can do, so we can attribute spiceio's
 # translation overhead.
+#
+# The mount uses the macOS Keychain for credentials rather than embedding
+# the password in the mount URL — embedded passwords leak via shell
+# history and `ps` argv listings, and shelling out to `perl -MURI::Escape`
+# adds a non-portable dependency. To prep the keychain once:
+#   mount_smbfs //user@server/share /tmp/mnt   # interactive; offers
+#                                                "Remember in keychain"
+#   umount /tmp/mnt
+# If the keychain entry is missing, the baseline is skipped.
 bench_mount_baseline() {
     local user="$SPICEIO_SMB_USER"
-    local pass="$SPICEIO_SMB_PASS"
     local server="$SMB_SERVER"
     local share="$SMB_SHARE"
 
     MOUNT_POINT="/tmp/spiceio-bench-mount-$$"
     mkdir -p "$MOUNT_POINT"
-    local escaped_pass
-    escaped_pass=$(printf '%s' "$pass" | perl -MURI::Escape -ne 'print uri_escape($_)')
-    if ! mount_smbfs -N "//${user}:${escaped_pass}@${server}/${share}" "$MOUNT_POINT" 2>/dev/null; then
-        echo "  (mount_smbfs failed — skipping baseline)"
+    # -N: no interactive password prompt. mount_smbfs will pull credentials
+    # from the macOS Keychain if available; we deliberately do not pass the
+    # password in argv so it doesn't leak via process listings.
+    if ! mount_smbfs -N "//${user}@${server}/${share}" "$MOUNT_POINT" 2>/dev/null; then
+        echo "  (mount_smbfs failed — set up keychain credentials first; skipping baseline)"
         rmdir "$MOUNT_POINT" 2>/dev/null
         MOUNT_POINT=""
         return

diff --git a/scripts/stress-concurrent.sh b/scripts/stress-concurrent.sh
@@ -28,6 +28,12 @@ FAIL=0
 CONCURRENCY="${SPICEIO_STRESS_CONCURRENCY:-16}"
 CURL_TIMEOUT="${SPICEIO_STRESS_TIMEOUT:-30}"
 
+# Capture spiceio stderr for a post-run guard. We still tee to the script's
+# stderr so CI logs stream live; the file is only used to fail the build if
+# any unexpected `[spiceio] error:` lines slip through (e.g., something we
+# thought was an expected transient turns out to be logged as an error).
+SPICEIO_STDERR="${TMPDIR_BASE}/spiceio-stderr.log"
+
 # ── Cleanup ────────────────────────────────────────────────────────────────
 
 SPICEIO_PID=""
@@ -95,7 +101,7 @@ SPICEIO_SMB_DOMAIN="$SMB_DOMAIN" \
 SPICEIO_SMB_SHARE="$SMB_SHARE" \
 SPICEIO_BUCKET="$BUCKET" \
 SPICEIO_REGION="$REGION" \
-"$SPICEIO_BIN" &
+"$SPICEIO_BIN" 2> >(tee "$SPICEIO_STDERR" >&2) &
 SPICEIO_PID=$!
 
 for i in $(seq 1 30); do
@@ -394,6 +400,28 @@ for i in $(seq 1 "$LARGE_N"); do
 done
 echo "  integrity: $((PASS - PREV_PASS))/${LARGE_N} verified"
 
+# ════════════════════════════════════════════════════════════════════════════
+# Stderr guard
+# ════════════════════════════════════════════════════════════════════════════
+#
+# Any `[spiceio] error:` lines indicate spiceio surfaced something at error
+# level — that should not happen during a clean stress run. Expected
+# transients (sharing violations, NotFound, etc.) are mapped to typed
+# `io::ErrorKind`s and logged via `slog!` (without the "error:" prefix).
+# A hit here means we regressed: a new failure mode is leaking through the
+# generic 500 InternalError arm of `io_to_s3_error`. Fail the build so
+# someone investigates instead of merging it silently again.
+
+# Give the tee subshell a moment to flush after we killed spiceio in cleanup
+# (which runs on EXIT, so we have to flush sync here, before cleanup fires).
+sync 2>/dev/null || true
+if [[ -s "$SPICEIO_STDERR" ]] && grep -q '\[spiceio\] error:' "$SPICEIO_STDERR"; then
+    echo ""
+    echo "FAIL: spiceio emitted unexpected error log lines during stress run:"
+    grep '\[spiceio\] error:' "$SPICEIO_STDERR" | head -20
+    FAIL=$((FAIL + 1))
+fi
+
 # ════════════════════════════════════════════════════════════════════════════
 # Summary
 # ════════════════════════════════════════════════════════════════════════════

diff --git a/scripts/test-extended.sh b/scripts/test-extended.sh
@@ -31,6 +31,10 @@ FAIL=0
 CONCURRENCY="${SPICEIO_EXT_CONCURRENCY:-8}"
 CURL_TIMEOUT="${SPICEIO_EXT_TIMEOUT:-60}"
 
+# Capture spiceio stderr so the post-run guard can flag unexpected
+# `[spiceio] error:` lines. We tee back to stderr so CI still streams live.
+SPICEIO_STDERR="${TMPDIR_BASE}/spiceio-stderr.log"
+
 # ── Cleanup ────────────────────────────────────────────────────────────────
 
 SPICEIO_PID=""
@@ -113,7 +117,7 @@ SPICEIO_SMB_DOMAIN="$SMB_DOMAIN" \
 SPICEIO_SMB_SHARE="$SMB_SHARE" \
 SPICEIO_BUCKET="$BUCKET" \
 SPICEIO_REGION="$REGION" \
-"$SPICEIO_BIN" &
+"$SPICEIO_BIN" 2> >(tee "$SPICEIO_STDERR" >&2) &
 SPICEIO_PID=$!
 
 for i in $(seq 1 60); do
@@ -427,6 +431,23 @@ ORIG_MD5=$(md5 -q "${SC_DIR}/src")
 GOT_MD5=$(md5 -q "${SC_DIR}/dl-after")
 assert_eq "full GET after cancellation succeeds" "$ORIG_MD5" "$GOT_MD5"
 
+# ════════════════════════════════════════════════════════════════════════════
+# Stderr guard
+# ════════════════════════════════════════════════════════════════════════════
+#
+# Expected transients (sharing violations, NotFound, etc.) are mapped to
+# typed `io::ErrorKind`s and logged via `slog!` without the "error:" prefix.
+# A `[spiceio] error:` hit here means a new failure mode is leaking through
+# the generic 500 InternalError arm of `io_to_s3_error` — fail the build.
+
+sync 2>/dev/null || true
+if [[ -s "$SPICEIO_STDERR" ]] && grep -q '\[spiceio\] error:' "$SPICEIO_STDERR"; then
+    echo ""
+    echo "FAIL: spiceio emitted unexpected error log lines during extended run:"
+    grep '\[spiceio\] error:' "$SPICEIO_STDERR" | head -20
+    FAIL=$((FAIL + 1))
+fi
+
 # ════════════════════════════════════════════════════════════════════════════
 # Summary
 # ════════════════════════════════════════════════════════════════════════════

diff --git a/scripts/test-sccache.sh b/scripts/test-sccache.sh
@@ -24,6 +24,10 @@ TEST_PREFIX="spiceio-test-$$"
 PASS=0
 FAIL=0
 
+# Capture spiceio stderr so the post-run guard can flag unexpected
+# `[spiceio] error:` lines. We tee back to stderr so CI still streams live.
+SPICEIO_STDERR=$(mktemp /tmp/spiceio-sccache-stderr.XXXXXX)
+
 # ── Cleanup on exit ─────────────────────────────────────────────────────────
 
 SPICEIO_PID=""
@@ -43,6 +47,7 @@ cleanup() {
         wait "$SPICEIO_PID" 2>/dev/null || true
     fi
     rm -rf /tmp/spiceio-test-*
+    rm -f "$SPICEIO_STDERR"
 }
 trap cleanup EXIT
 
@@ -116,7 +121,7 @@ SPICEIO_SMB_DOMAIN="$SMB_DOMAIN" \
 SPICEIO_SMB_SHARE="$SMB_SHARE" \
 SPICEIO_BUCKET="$BUCKET" \
 SPICEIO_REGION="$REGION" \
-"$SPICEIO_BIN" &
+"$SPICEIO_BIN" 2> >(tee -a "$SPICEIO_STDERR" >&2) &
 SPICEIO_PID=$!
 
 echo "[test] waiting for spiceio on ${BIND}..."
@@ -281,7 +286,7 @@ SPICEIO_SMB_SHARE="$SMB_SHARE" \
 SPICEIO_BUCKET="$BUCKET" \
 SPICEIO_REGION="$REGION" \
 SPICEIO_LOG_FILE="$SPICEIO_LOG2" \
-"$SPICEIO_BIN" &
+"$SPICEIO_BIN" 2> >(tee -a "$SPICEIO_STDERR" >&2) &
 SPICEIO_PID2=$!
 
 echo "[test] waiting for second spiceio instance..."
@@ -404,3 +409,18 @@ else
     echo "[test] FAIL: expected cache hits > 0 (got ${CACHE_HITS:-0}) and write errors == 0 (got ${WRITE_ERRORS:-0})"
     exit 1
 fi
+
+# ── Stderr guard ────────────────────────────────────────────────────────────
+#
+# Any `[spiceio] error:` line means a code path is leaking through the
+# generic 500 InternalError arm of `io_to_s3_error`. Expected transients
+# (NotFound on HEAD probes, sharing violations, etc.) are mapped to typed
+# `io::ErrorKind`s and logged via `slog!` without the "error:" prefix.
+
+sync 2>/dev/null || true
+if [[ -s "$SPICEIO_STDERR" ]] && grep -q '\[spiceio\] error:' "$SPICEIO_STDERR"; then
+    echo ""
+    echo "[test] FAIL: spiceio emitted unexpected error log lines:"
+    grep '\[spiceio\] error:' "$SPICEIO_STDERR" | head -20
+    exit 1
+fi
diff --git a/src/s3/router.rs b/src/s3/router.rs
@@ -593,14 +593,23 @@ async fn handle_get_object(
 
     // Build response with streaming body.
     //
-    // Channel capacity is sized to match the SMB pipeline depth so a full
-    // batch of reads can dump into the channel without blocking the producer.
-    // That lets the SMB-reading task immediately issue the next pipelined
-    // batch (incurring its round-trip) while the HTTP-sending task drains
-    // the previous batch into the wire — back-to-back batches overlap, which
-    // is the difference between filling and starving the 10G link.
-    let (body, tx) = SpiceioBody::channel(crate::smb::ops::READ_PIPELINE_DEPTH);
+    // Channel capacity is sized so a full SMB pipeline batch can dump into
+    // the channel without blocking the producer — that lets the SMB-reading
+    // task immediately issue the next pipelined round-trip while the
+    // HTTP-sending task drains the previous batch, overlapping back-to-back
+    // batches.
+    //
+    // But we also cap by a per-request memory budget: with a configured
+    // `SPICEIO_SMB_MAX_IO` of 1 MiB and `READ_PIPELINE_DEPTH = 64`, an
+    // uncapped channel would buffer up to 64 MiB per concurrent GetObject.
+    // The budget keeps per-request memory bounded — at default 64 KiB
+    // chunks it stays at the full pipeline depth (4 MiB), and at 1 MiB
+    // chunks it falls to 8 (still room to overlap).
+    const STREAM_CHANNEL_MAX_BYTES: usize = 8 * 1024 * 1024;
     let chunk_size = handle.max_chunk;
+    let channel_cap = (STREAM_CHANNEL_MAX_BYTES / chunk_size as usize)
+        .clamp(1, crate::smb::ops::READ_PIPELINE_DEPTH);
+    let (body, tx) = SpiceioBody::channel(channel_cap);
 
     // Spawn background task to stream pipelined SMB reads into the channel.
     // Sends batches of read requests to fill the network pipe, then pushes
@@ -1452,6 +1461,19 @@ fn io_to_s3_error(e: &io::Error) -> Response<SpiceioBody> {
             crate::serr!("[spiceio] access denied: {e}");
             error_response(StatusCode::FORBIDDEN, "AccessDenied", "Access Denied")
         }
+        // Sharing violations are an expected transient under concurrent
+        // writes (the underlying SMB server refused to open a file held by
+        // another handle). Map to 503 SlowDown — the standard S3 retryable
+        // status — and log at info level instead of `serr!`, since these
+        // are not bugs and shouldn't paint CI logs red.
+        io::ErrorKind::ResourceBusy => {
+            crate::slog!("[spiceio] busy (retry): {e}");
+            error_response(
+                StatusCode::SERVICE_UNAVAILABLE,
+                "SlowDown",
+                "Please reduce your request rate.",
+            )
+        }
         _ => {
             crate::serr!("[spiceio] error: {e}");
             error_response(
@@ -1510,3 +1532,42 @@ async fn collect_body(req: Request<Incoming>) -> Bytes {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// The whole reason this file exists: unit tests in `smb::client` already
+    /// verify `STATUS_SHARING_VIOLATION → io::ErrorKind::ResourceBusy`. What
+    /// was missing was the next hop — that the HTTP layer maps that kind to
+    /// a retryable S3 status, not 500 InternalError. Without this assertion,
+    /// the prior regression (sharing violations leaking as 500 with noisy
+    /// `error:` logs) had no test to catch it.
+    #[test]
+    fn io_to_s3_error_maps_resource_busy_to_slow_down() {
+        let err = io::Error::new(io::ErrorKind::ResourceBusy, "sharing violation: foo");
+        let resp = io_to_s3_error(&err);
+        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
+    }
+
+    #[test]
+    fn io_to_s3_error_maps_not_found_to_404() {
+        let err = io::Error::new(io::ErrorKind::NotFound, "missing");
+        let resp = io_to_s3_error(&err);
+        assert_eq!(resp.status(), StatusCode::NOT_FOUND);
+    }
+
+    #[test]
+    fn io_to_s3_error_maps_permission_denied_to_403() {
+        let err = io::Error::new(io::ErrorKind::PermissionDenied, "nope");
+        let resp = io_to_s3_error(&err);
+        assert_eq!(resp.status(), StatusCode::FORBIDDEN);
+    }
+
+    #[test]
+    fn io_to_s3_error_other_falls_back_to_500() {
+        let err = io::Error::other("unexpected");
+        let resp = io_to_s3_error(&err);
+        assert_eq!(resp.status(), StatusCode::INTERNAL_SERVER_ERROR);
+    }
+}
diff --git a/src/smb/protocol.rs b/src/smb/protocol.rs
@@ -491,6 +491,11 @@ pub fn decode_read_response_owned(body: Vec<u8>) -> Option<Bytes> {
 /// body) and returns a `Bytes` slice referencing the response payload. Avoids
 /// the extra body-copy that `decode_read_response_owned` would require if the
 /// caller had to split body off first.
+///
+/// Treats `data_offset` as an absolute offset from the start of the SMB2
+/// message and rejects offsets that fall inside the 64-byte header, so a
+/// malformed server response cannot trick us into returning header bytes as
+/// the read payload.
 pub fn decode_read_response_from_msg(msg: Vec<u8>) -> Option<Bytes> {
     if msg.len() < SMB2_HEADER_SIZE + 17 {
         return None;
@@ -499,7 +504,12 @@ pub fn decode_read_response_from_msg(msg: Vec<u8>) -> Option<Bytes> {
     let data_offset = u16::from_le_bytes(body[2..4].try_into().unwrap()) as usize;
     let data_length = u32::from_le_bytes(body[4..8].try_into().unwrap()) as usize;
 
-    // `data_offset` is from the start of the SMB2 message, not the body.
+    // `data_offset` is from the start of the SMB2 message. It must not point
+    // inside the header (or before it) — a malformed server response with an
+    // offset < SMB2_HEADER_SIZE would otherwise slice into header bytes.
+    if data_offset < SMB2_HEADER_SIZE {
+        return None;
+    }
     let start = data_offset;
     let end = start.checked_add(data_length)?;
     if end > msg.len() {
@@ -926,6 +936,34 @@ mod tests {
         assert!(decode_read_response_from_msg(msg).is_none());
     }
 
+    #[test]
+    fn decode_read_response_from_msg_rejects_offset_inside_header() {
+        // A malformed server response with data_offset < SMB2_HEADER_SIZE
+        // would otherwise have us slice into the SMB2 header bytes and return
+        // them as payload. The decoder must reject that case.
+        let mut msg = vec![0u8; SMB2_HEADER_SIZE + 32];
+        // Seed the header with a sentinel so we'd notice if it ever leaked
+        // back as payload.
+        for (i, b) in msg[..SMB2_HEADER_SIZE].iter_mut().enumerate() {
+            *b = 0xA0 | (i as u8 & 0x0F);
+        }
+        let body = &mut msg[SMB2_HEADER_SIZE..];
+        // data_offset = 16 (inside the header, well before SMB2_HEADER_SIZE).
+        body[2..4].copy_from_slice(&16u16.to_le_bytes());
+        body[4..8].copy_from_slice(&8u32.to_le_bytes());
+        assert!(decode_read_response_from_msg(msg).is_none());
+    }
+
+    #[test]
+    fn decode_read_response_from_msg_rejects_offset_zero() {
+        // data_offset = 0 is also inside the header.
+        let mut msg = vec![0u8; SMB2_HEADER_SIZE + 32];
+        let body = &mut msg[SMB2_HEADER_SIZE..];
+        body[2..4].copy_from_slice(&0u16.to_le_bytes());
+        body[4..8].copy_from_slice(&4u32.to_le_bytes());
+        assert!(decode_read_response_from_msg(msg).is_none());
+    }
+
     #[test]
     fn decode_write_response_valid() {
         let mut body = vec![0u8; 16];