Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spiceio"
version = "0.5.3"
version = "0.5.4"
edition = "2024"
description = "S3-compatible API proxy to SMB file shares"
license = "Apache-2.0"
Expand Down
19 changes: 14 additions & 5 deletions scripts/bench-live.sh
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,27 @@ bench_concurrent_read() {
# and runs the same dd-based write/read tests. Establishes the hard
# ceiling for what the link can do, so we can attribute spiceio's
# translation overhead.
#
# The mount uses the macOS Keychain for credentials rather than embedding
# the password in the mount URL — embedded passwords leak via shell
# history and `ps` argv listings, and shelling out to `perl -MURI::Escape`
# adds a non-portable dependency. To prep the keychain once:
# mount_smbfs //user@server/share /tmp/mnt # interactive; offers
# "Remember in keychain"
# umount /tmp/mnt
# If the keychain entry is missing, the baseline is skipped.
bench_mount_baseline() {
local user="$SPICEIO_SMB_USER"
local pass="$SPICEIO_SMB_PASS"
local server="$SMB_SERVER"
local share="$SMB_SHARE"

MOUNT_POINT="/tmp/spiceio-bench-mount-$$"
mkdir -p "$MOUNT_POINT"
local escaped_pass
escaped_pass=$(printf '%s' "$pass" | perl -MURI::Escape -ne 'print uri_escape($_)')
if ! mount_smbfs -N "//${user}:${escaped_pass}@${server}/${share}" "$MOUNT_POINT" 2>/dev/null; then
echo " (mount_smbfs failed — skipping baseline)"
# -N: no interactive password prompt. mount_smbfs will pull credentials
# from the macOS Keychain if available; we deliberately do not pass the
# password in argv so it doesn't leak via process listings.
if ! mount_smbfs -N "//${user}@${server}/${share}" "$MOUNT_POINT" 2>/dev/null; then
echo " (mount_smbfs failed — set up keychain credentials first; skipping baseline)"
rmdir "$MOUNT_POINT" 2>/dev/null
MOUNT_POINT=""
return
Expand Down
42 changes: 41 additions & 1 deletion scripts/stress-concurrent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ FAIL=0
CONCURRENCY="${SPICEIO_STRESS_CONCURRENCY:-16}"
CURL_TIMEOUT="${SPICEIO_STRESS_TIMEOUT:-30}"

# Capture spiceio stderr for a post-run guard. We still tee to the script's
# stderr so CI logs stream live; the file is only used to fail the build if
# any unexpected `[spiceio] error:` lines slip through (e.g., something we
# thought was an expected transient turns out to be logged as an error).
SPICEIO_STDERR="${TMPDIR_BASE}/spiceio-stderr.log"

# ── Cleanup ────────────────────────────────────────────────────────────────

SPICEIO_PID=""
Expand Down Expand Up @@ -95,7 +101,7 @@ SPICEIO_SMB_DOMAIN="$SMB_DOMAIN" \
SPICEIO_SMB_SHARE="$SMB_SHARE" \
SPICEIO_BUCKET="$BUCKET" \
SPICEIO_REGION="$REGION" \
"$SPICEIO_BIN" &
"$SPICEIO_BIN" 2> >(tee "$SPICEIO_STDERR" >&2) &
SPICEIO_PID=$!

for i in $(seq 1 30); do
Expand Down Expand Up @@ -394,6 +400,40 @@ for i in $(seq 1 "$LARGE_N"); do
done
echo " integrity: $((PASS - PREV_PASS))/${LARGE_N} verified"

# ════════════════════════════════════════════════════════════════════════════
# Stderr guard
# ════════════════════════════════════════════════════════════════════════════
#
# Any `[spiceio] error:` lines indicate spiceio surfaced something at error
# level — that should not happen during a clean stress run. Expected
# transients (sharing violations, NotFound, etc.) are mapped to typed
# `io::ErrorKind`s and logged via `slog!` (without the "error:" prefix).
# A hit here means we regressed: a new failure mode is leaking through the
# generic 500 InternalError arm of `io_to_s3_error`. Fail the build so
# someone investigates instead of merging it silently again.

# Stop spiceio *before* grepping the captured stderr. spiceio's stderr is
# piped through `tee` running in a process substitution; the tee process
# only finishes draining and closes the capture file when spiceio closes its
# stderr fd. The cleanup trap runs on EXIT, so it would fire too late for
# the guard below — we have to kill+wait here. Set SPICEIO_PID empty so the
# trap's own kill becomes a no-op.
if [[ -n "$SPICEIO_PID" ]]; then
kill "$SPICEIO_PID" 2>/dev/null || true
wait "$SPICEIO_PID" 2>/dev/null || true
SPICEIO_PID=""
fi
# tee in process substitution isn't directly waitable from bash, so give it
# a brief moment to drain after spiceio closed its stderr.
sleep 0.2

if [[ -s "$SPICEIO_STDERR" ]] && grep -q '\[spiceio\] error:' "$SPICEIO_STDERR"; then
echo ""
echo "FAIL: spiceio emitted unexpected error log lines during stress run:"
grep '\[spiceio\] error:' "$SPICEIO_STDERR" | head -20
FAIL=$((FAIL + 1))
fi

# ════════════════════════════════════════════════════════════════════════════
# Summary
# ════════════════════════════════════════════════════════════════════════════
Expand Down
33 changes: 32 additions & 1 deletion scripts/test-extended.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ FAIL=0
CONCURRENCY="${SPICEIO_EXT_CONCURRENCY:-8}"
CURL_TIMEOUT="${SPICEIO_EXT_TIMEOUT:-60}"

# Capture spiceio stderr so the post-run guard can flag unexpected
# `[spiceio] error:` lines. We tee back to stderr so CI still streams live.
SPICEIO_STDERR="${TMPDIR_BASE}/spiceio-stderr.log"

# ── Cleanup ────────────────────────────────────────────────────────────────

SPICEIO_PID=""
Expand Down Expand Up @@ -113,7 +117,7 @@ SPICEIO_SMB_DOMAIN="$SMB_DOMAIN" \
SPICEIO_SMB_SHARE="$SMB_SHARE" \
SPICEIO_BUCKET="$BUCKET" \
SPICEIO_REGION="$REGION" \
"$SPICEIO_BIN" &
"$SPICEIO_BIN" 2> >(tee "$SPICEIO_STDERR" >&2) &
SPICEIO_PID=$!

for i in $(seq 1 60); do
Expand Down Expand Up @@ -427,6 +431,33 @@ ORIG_MD5=$(md5 -q "${SC_DIR}/src")
GOT_MD5=$(md5 -q "${SC_DIR}/dl-after")
assert_eq "full GET after cancellation succeeds" "$ORIG_MD5" "$GOT_MD5"

# ════════════════════════════════════════════════════════════════════════════
# Stderr guard
# ════════════════════════════════════════════════════════════════════════════
#
# Expected transients (sharing violations, NotFound, etc.) are mapped to
# typed `io::ErrorKind`s and logged via `slog!` without the "error:" prefix.
# A `[spiceio] error:` hit here means a new failure mode is leaking through
# the generic 500 InternalError arm of `io_to_s3_error` — fail the build.

# Stop spiceio *before* grepping the captured stderr. tee in the process
# substitution only closes the capture file when spiceio's stderr fd closes,
# so we have to kill+wait here rather than rely on the EXIT trap. Clear
# SPICEIO_PID so the trap's own kill becomes a no-op.
if [[ -n "$SPICEIO_PID" ]]; then
kill "$SPICEIO_PID" 2>/dev/null || true
wait "$SPICEIO_PID" 2>/dev/null || true
SPICEIO_PID=""
fi
sleep 0.2 # tee in <(...) isn't directly waitable; let it drain.

if [[ -s "$SPICEIO_STDERR" ]] && grep -q '\[spiceio\] error:' "$SPICEIO_STDERR"; then
echo ""
echo "FAIL: spiceio emitted unexpected error log lines during extended run:"
grep '\[spiceio\] error:' "$SPICEIO_STDERR" | head -20
FAIL=$((FAIL + 1))
fi

# ════════════════════════════════════════════════════════════════════════════
# Summary
# ════════════════════════════════════════════════════════════════════════════
Expand Down
39 changes: 37 additions & 2 deletions scripts/test-sccache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ TEST_PREFIX="spiceio-test-$$"
PASS=0
FAIL=0

# Capture spiceio stderr so the post-run guard can flag unexpected
# `[spiceio] error:` lines. We tee back to stderr so CI still streams live.
SPICEIO_STDERR=$(mktemp /tmp/spiceio-sccache-stderr.XXXXXX)

# ── Cleanup on exit ─────────────────────────────────────────────────────────

SPICEIO_PID=""
Expand All @@ -43,6 +47,7 @@ cleanup() {
wait "$SPICEIO_PID" 2>/dev/null || true
fi
rm -rf /tmp/spiceio-test-*
rm -f "$SPICEIO_STDERR"
}
trap cleanup EXIT

Expand Down Expand Up @@ -116,7 +121,7 @@ SPICEIO_SMB_DOMAIN="$SMB_DOMAIN" \
SPICEIO_SMB_SHARE="$SMB_SHARE" \
SPICEIO_BUCKET="$BUCKET" \
SPICEIO_REGION="$REGION" \
"$SPICEIO_BIN" &
"$SPICEIO_BIN" 2> >(tee -a "$SPICEIO_STDERR" >&2) &
SPICEIO_PID=$!

echo "[test] waiting for spiceio on ${BIND}..."
Expand Down Expand Up @@ -281,7 +286,7 @@ SPICEIO_SMB_SHARE="$SMB_SHARE" \
SPICEIO_BUCKET="$BUCKET" \
SPICEIO_REGION="$REGION" \
SPICEIO_LOG_FILE="$SPICEIO_LOG2" \
"$SPICEIO_BIN" &
"$SPICEIO_BIN" 2> >(tee -a "$SPICEIO_STDERR" >&2) &
SPICEIO_PID2=$!

echo "[test] waiting for second spiceio instance..."
Expand Down Expand Up @@ -404,3 +409,33 @@ else
echo "[test] FAIL: expected cache hits > 0 (got ${CACHE_HITS:-0}) and write errors == 0 (got ${WRITE_ERRORS:-0})"
exit 1
fi

# ── Stderr guard ────────────────────────────────────────────────────────────
#
# Any `[spiceio] error:` line means a code path is leaking through the
# generic 500 InternalError arm of `io_to_s3_error`. Expected transients
# (NotFound on HEAD probes, sharing violations, etc.) are mapped to typed
# `io::ErrorKind`s and logged via `slog!` without the "error:" prefix.

# Stop both spiceio instances *before* grepping the captured stderr. tee in
# the process substitutions only closes the capture file when each spiceio
# closes its stderr fd, so we have to kill+wait here rather than rely on
# the EXIT trap. Clear PIDs so the trap's own kills become no-ops.
if [[ -n "$SPICEIO_PID2" ]]; then
kill "$SPICEIO_PID2" 2>/dev/null || true
wait "$SPICEIO_PID2" 2>/dev/null || true
SPICEIO_PID2=""
fi
if [[ -n "$SPICEIO_PID" ]]; then
kill "$SPICEIO_PID" 2>/dev/null || true
wait "$SPICEIO_PID" 2>/dev/null || true
SPICEIO_PID=""
fi
sleep 0.2 # tee in <(...) isn't directly waitable; let it drain.

if [[ -s "$SPICEIO_STDERR" ]] && grep -q '\[spiceio\] error:' "$SPICEIO_STDERR"; then
echo ""
echo "[test] FAIL: spiceio emitted unexpected error log lines:"
grep '\[spiceio\] error:' "$SPICEIO_STDERR" | head -20
exit 1
fi
131 changes: 124 additions & 7 deletions src/s3/router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,24 @@ async fn handle_list_objects(

// ── GetObject (streaming, with Range + Conditional) ─────────────────────────

/// Compute the SMB-reads→HTTP-writes channel capacity for a streaming
/// GetObject given the SMB-negotiated chunk size.
///
/// Sized to hold one full SMB read pipeline batch (so back-to-back batches
/// can overlap) but capped by an 8 MiB per-request memory budget so a
/// configured `SPICEIO_SMB_MAX_IO` of 1 MiB doesn't blow up to 64 MiB of
/// buffering per concurrent stream.
///
/// Guards against `chunk_size = 0`: `handle.max_chunk` ultimately comes from
/// the SMB server's `max_read_size` in the negotiate response. If a server
/// (or a misconfiguration) yields 0, naive `BUDGET / chunk_size` would
/// divide-by-zero panic and crash the streaming task. We floor at 1 here.
fn stream_channel_capacity(chunk_size: u32) -> usize {
const STREAM_CHANNEL_MAX_BYTES: usize = 8 * 1024 * 1024;
let chunk_size_for_cap = (chunk_size as usize).max(1);
(STREAM_CHANNEL_MAX_BYTES / chunk_size_for_cap).clamp(1, crate::smb::ops::READ_PIPELINE_DEPTH)
}

async fn handle_get_object(
hdrs: &http::HeaderMap,
share: &ShareSession,
Expand Down Expand Up @@ -593,14 +611,18 @@ async fn handle_get_object(

// Build response with streaming body.
//
// Channel capacity is sized to match the SMB pipeline depth so a full
// batch of reads can dump into the channel without blocking the producer.
// That lets the SMB-reading task immediately issue the next pipelined
// batch (incurring its round-trip) while the HTTP-sending task drains
// the previous batch into the wire — back-to-back batches overlap, which
// is the difference between filling and starving the 10G link.
let (body, tx) = SpiceioBody::channel(crate::smb::ops::READ_PIPELINE_DEPTH);
// Channel capacity is sized so a full SMB pipeline batch can dump into
// the channel without blocking the producer — that lets the SMB-reading
// task immediately issue the next pipelined round-trip while the
// HTTP-sending task drains the previous batch, overlapping back-to-back
// batches. We also cap by a per-request memory budget: with a configured
// `SPICEIO_SMB_MAX_IO` of 1 MiB and `READ_PIPELINE_DEPTH = 64`, an
// uncapped channel would buffer up to 64 MiB per concurrent GetObject.
// At default 64 KiB chunks the channel stays at the full pipeline depth
// (4 MiB); at 1 MiB chunks it falls to 8 (still room to overlap).
let chunk_size = handle.max_chunk;
Comment thread
lukekim marked this conversation as resolved.
let channel_cap = stream_channel_capacity(chunk_size);
let (body, tx) = SpiceioBody::channel(channel_cap);

// Spawn background task to stream pipelined SMB reads into the channel.
// Sends batches of read requests to fill the network pipe, then pushes
Expand Down Expand Up @@ -1452,6 +1474,19 @@ fn io_to_s3_error(e: &io::Error) -> Response<SpiceioBody> {
crate::serr!("[spiceio] access denied: {e}");
error_response(StatusCode::FORBIDDEN, "AccessDenied", "Access Denied")
}
// Sharing violations are an expected transient under concurrent
// writes (the underlying SMB server refused to open a file held by
// another handle). Map to 503 SlowDown — the standard S3 retryable
// status — and log at info level instead of `serr!`, since these
// are not bugs and shouldn't paint CI logs red.
io::ErrorKind::ResourceBusy => {
crate::slog!("[spiceio] busy (retry): {e}");
error_response(
StatusCode::SERVICE_UNAVAILABLE,
"SlowDown",
"Please reduce your request rate.",
)
}
_ => {
crate::serr!("[spiceio] error: {e}");
error_response(
Expand Down Expand Up @@ -1510,3 +1545,85 @@ async fn collect_body(req: Request<Incoming>) -> Bytes {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

/// The whole reason this file exists: unit tests in `smb::client` already
/// verify `STATUS_SHARING_VIOLATION → io::ErrorKind::ResourceBusy`. What
/// was missing was the next hop — that the HTTP layer maps that kind to
/// a retryable S3 status, not 500 InternalError. Without this assertion,
/// the prior regression (sharing violations leaking as 500 with noisy
/// `error:` logs) had no test to catch it.
#[test]
fn io_to_s3_error_maps_resource_busy_to_slow_down() {
let err = io::Error::new(io::ErrorKind::ResourceBusy, "sharing violation: foo");
let resp = io_to_s3_error(&err);
assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
}

#[test]
fn io_to_s3_error_maps_not_found_to_404() {
let err = io::Error::new(io::ErrorKind::NotFound, "missing");
let resp = io_to_s3_error(&err);
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
}

#[test]
fn io_to_s3_error_maps_permission_denied_to_403() {
let err = io::Error::new(io::ErrorKind::PermissionDenied, "nope");
let resp = io_to_s3_error(&err);
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
}

#[test]
fn io_to_s3_error_other_falls_back_to_500() {
let err = io::Error::other("unexpected");
let resp = io_to_s3_error(&err);
assert_eq!(resp.status(), StatusCode::INTERNAL_SERVER_ERROR);
}

// ── stream_channel_capacity ─────────────────────────────────────────────

#[test]
fn stream_channel_capacity_handles_zero_chunk() {
// The SMB negotiate response is the source of `max_chunk`; an
// unusual server (or a misconfiguration) could yield 0. The naive
// `BUDGET / 0` would panic and crash the streaming task — the
// floor-at-1 inside `stream_channel_capacity` keeps it safe. The
// exact value doesn't matter (a 0-byte chunk is nonsense anyway);
// we only assert (a) we don't panic and (b) the result is a sane
// pipeline-depth-bounded slot count.
let cap = stream_channel_capacity(0);
assert!(
(1..=crate::smb::ops::READ_PIPELINE_DEPTH).contains(&cap),
"expected 1..={} got {cap}",
crate::smb::ops::READ_PIPELINE_DEPTH
);
}

#[test]
fn stream_channel_capacity_default_chunk_uses_full_pipeline() {
// 64 KiB chunks → 8 MiB / 64 KiB = 128, clamped to the pipeline
// depth so a full SMB read batch can dump into the channel.
assert_eq!(
stream_channel_capacity(65536),
crate::smb::ops::READ_PIPELINE_DEPTH
);
}

#[test]
fn stream_channel_capacity_large_chunk_falls_below_pipeline() {
// 1 MiB chunks → 8 MiB / 1 MiB = 8, well below pipeline depth.
// This is the case the per-request memory budget exists to handle.
assert_eq!(stream_channel_capacity(1024 * 1024), 8);
}

#[test]
fn stream_channel_capacity_huge_chunk_floors_at_one() {
// A chunk bigger than the entire budget still yields a 1-slot
// channel, never 0 — the producer can always make progress.
assert_eq!(stream_channel_capacity(16 * 1024 * 1024), 1);
}
}
Loading
Loading