Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,25 @@ zarrs_storage = { version = "0.4.0", features = ["async"] }
aws-config = { version = "1.5.18" }
aws-sdk-s3 = { version = "1.78.0" }
criterion = { version = "0.7.0", features = ["async_tokio"] }
google-cloud-storage = "1.6.0"
walkdir = { version = "2.5.0" }

[[bench]]
name = "shared"
path = "benches/shared.rs"
# shared benchmark code, not meant to be ran alone
test = false
bench = false

[[bench]]
name = "s3_bench"
path = "benches/s3_bench.rs"
harness = false
required-features = ["icechunk"]
required-features = ["icechunk", "datafusion"]

[[bench]]
name = "gcs_bench"
path = "benches/gcs_bench.rs"
harness = false
required-features = ["icechunk", "datafusion"]

97 changes: 97 additions & 0 deletions benches/gcs_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
mod shared;

use std::collections::HashMap;
use std::sync::Arc;

use criterion::{criterion_group, criterion_main, Criterion};
use datafusion::datasource::listing::ListingTableUrl;
use icechunk::config::GcsCredentials;
use icechunk::{ObjectStorage, Repository};
use zarrs_icechunk::AsyncIcechunkStore;

use shared::{CloudStorageBenchBackend, TestFixture, run_benchmark_group};

// ============================================================================
// GCS Backend Implementation
// ============================================================================

struct GCSBenchBackend {
_bucket: String,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: everything is private unless you say it's public in rust, no need for the underscores for class members.

_prefix: String,
}

impl GCSBenchBackend {
async fn new(bucket: String, prefix: String) -> Self {
Self {
_bucket: bucket,
_prefix: prefix,
}
}
}

#[async_trait::async_trait]
impl CloudStorageBenchBackend for GCSBenchBackend {
async fn create_icechunk_store(url: &str) -> Arc<AsyncIcechunkStore> {
let listing_url = ListingTableUrl::parse(url).unwrap();
let bucket = listing_url
.object_store()
.as_str()
.replace("gs://", "")
.trim_end_matches("/")
.to_string();

let credentials = GcsCredentials::FromEnv;

let store = Arc::new(
ObjectStorage::new_gcs(
bucket,
Some(listing_url.prefix().as_ref().to_string()),
Some(credentials),
None,
)
.await
.unwrap()
);

let repo = match Repository::open(None, store.clone(), HashMap::new()).await {
Ok(repo) => repo,
Err(_) => {
Repository::create(None, store, HashMap::new())
.await
.unwrap()
}
};
let session = repo.writable_session("main").await.unwrap();

Arc::new(AsyncIcechunkStore::new(session))
}

async fn cleanup(&self) {
// Cleanup is handled by the TestFixture Drop implementation
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I follow... the test fixture calls this method when it's dropped, but you haven't implemented anything here (but you did for the s3 version)?
Generally speaking though, why do we need this, that gets called in the test fixture drop? why not simply implement drop on the gcs and s3 backends directly?

// which uses the icechunk store to clean up resources
}

fn bucket(&self) -> &str {
&self._bucket
}

fn prefix(&self) -> &str {
&self._prefix
}
}


fn gcs_benchmark_group(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
let url = "gs://zarr-unit-tests/test_data_gcs";

let fixture = rt.block_on(async {
let backend = GCSBenchBackend::new("zarr-unit-tests".into(), "test_data_gcs".into()).await;
TestFixture::new(backend, url).await
});

run_benchmark_group(fixture.get_session(), c, "gcs_benchmarks");
}

criterion_group!(gcs_benches, gcs_benchmark_group);
criterion_main!(gcs_benches);
Loading