Skip to content

Commit 6abd992

Browse files
fix(shuffle): strip object key before parsing ObjectStoreUrl for S3 shuffle
ObjectStoreUrl::parse rejects URLs containing anything beyond the scheme and authority. The runtime-backed object store shuffle reader was passing the full shuffle file URL (e.g. s3://bucket/job-id/.../data-35.arrow) into ObjectStoreUrl::parse, causing async queries with S3 shuffle storage to fail with: ObjectStoreUrl must only contain scheme and authority, got: /job-id/.../data-35.arrow Slice the URL up to url::Position::BeforePath when looking up the object store in the runtime registry. The full path is still used to fetch the individual object via store.get(). Adds a regression test that pins the invariant.
1 parent 7e9872a commit 6abd992

1 file changed

Lines changed: 29 additions & 5 deletions

File tree

ballista/core/src/execution_plans/shuffle_reader.rs

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -951,11 +951,14 @@ async fn fetch_partition_object_store_with_runtime(
951951

952952
// Get the object store from the RuntimeEnv's registry
953953
// This uses the credentials configured in the runtime (e.g., SpiceObjectStoreRegistry)
954-
let object_store_url = ObjectStoreUrl::parse(&url).map_err(|e| {
955-
BallistaError::General(format!(
956-
"Failed to parse object store URL '{path}': {e:?}"
957-
))
958-
})?;
954+
// ObjectStoreUrl::parse rejects anything beyond scheme + authority, so strip the path
955+
// (which contains the object key) before parsing.
956+
let object_store_url = ObjectStoreUrl::parse(&url[..url::Position::BeforePath])
957+
.map_err(|e| {
958+
BallistaError::General(format!(
959+
"Failed to parse object store URL '{path}': {e:?}"
960+
))
961+
})?;
959962

960963
let store = runtime_env.object_store(&object_store_url).map_err(|e| {
961964
BallistaError::FetchFailed(
@@ -2250,4 +2253,25 @@ mod tests {
22502253

22512254
Ok(())
22522255
}
2256+
2257+
/// Regression test: ObjectStoreUrl::parse rejects URLs containing a path,
2258+
/// so when resolving the object store for a shuffle file we must strip the
2259+
/// object key (everything past the authority) before parsing. Without this,
2260+
/// an S3 shuffle path like `s3://bucket/path/data.arrow` fails with
2261+
/// "ObjectStoreUrl must only contain scheme and authority".
2262+
#[test]
2263+
fn test_object_store_url_strips_path() {
2264+
let path =
2265+
"s3://my-bucket/shuffle/job-id/1/4i1vaNv/1/0/data-35.arrow".to_string();
2266+
let url = Url::parse(&path).unwrap();
2267+
2268+
ObjectStoreUrl::parse(&url[..url::Position::BeforePath])
2269+
.expect("scheme+authority slice must parse as an ObjectStoreUrl");
2270+
2271+
assert!(
2272+
ObjectStoreUrl::parse(url.as_str()).is_err(),
2273+
"passing the full URL (with object key) should still fail — \
2274+
this guards the invariant the fix relies on"
2275+
);
2276+
}
22532277
}

0 commit comments

Comments
 (0)