2323
2424CONSTRUCTORS = {
2525 "s3" : ic .s3_storage ,
26+ "s3_ob" : ic .storage .s3_object_store_storage ,
2627 "gcs" : ic .gcs_storage ,
2728 "tigris" : ic .tigris_storage ,
2829 "local" : ic .local_filesystem_storage ,
3132TEST_BUCKETS = {
3233 "s3" : dict (store = "s3" , bucket = "icechunk-test" , region = "us-east-1" ),
3334 "gcs" : dict (store = "gcs" , bucket = "icechunk-test-gcp" , region = "us-east1" ),
35+ # "gcs": dict(store="gcs", bucket="arraylake-scratch", region="us-east1"),
3436 # not using region="auto", because for now we pass this directly to coiled.
3537 "r2" : dict (store = "r2" , bucket = "icechunk-test-r2" , region = "us-east-1" ),
3638 # "tigris": dict(
3941 "tigris" : dict (store = "tigris" , bucket = "icechunk-test" , region = "iad" ),
4042 "local" : dict (store = "local" , bucket = platformdirs .site_cache_dir ()),
4143}
44+ TEST_BUCKETS ["s3_ob" ] = TEST_BUCKETS ["s3" ]
4245BUCKETS = {
4346 "s3" : dict (store = "s3" , bucket = PUBLIC_DATA_BUCKET , region = "us-east-1" ),
4447 "gcs" : dict (store = "gcs" , bucket = PUBLIC_DATA_BUCKET + "-gcs" , region = "us-east1" ),
@@ -205,7 +208,16 @@ def store(self) -> ic.IcechunkStore:
205208
206209
207210@dataclass (kw_only = True )
208- class BenchmarkDataset (Dataset ):
211+ class BenchmarkWriteDataset (Dataset ):
212+ num_arrays : int
213+ shape : tuple [int , ...]
214+ chunks : tuple [int , ...]
215+ # whether to skip this one on local runs
216+ skip_local : bool = False
217+
218+
219+ @dataclass (kw_only = True )
220+ class BenchmarkReadDataset (Dataset ):
209221 # data variable to load in `time_xarray_read_chunks`
210222 load_variables : list [str ] | None = None
211223 # Passed to .isel for `time_xarray_read_chunks`
@@ -377,7 +389,7 @@ def setup_era5(*args, **kwargs):
377389 arrays = [],
378390)
379391
380- ERA5 = BenchmarkDataset (
392+ ERA5 = BenchmarkReadDataset (
381393 # weatherbench2 data - 5 years
382394 skip_local = False ,
383395 storage_config = StorageConfig (prefix = "era5-weatherbench" ),
@@ -390,15 +402,15 @@ def setup_era5(*args, **kwargs):
390402 # setupfn=partial(setup_ingest_for_benchmarks, ingest=ERA5_WB),
391403)
392404
393- ERA5_ARCO = BenchmarkDataset (
405+ ERA5_ARCO = BenchmarkReadDataset (
394406 skip_local = False ,
395407 storage_config = StorageConfig (prefix = "era5-arco" ),
396408 first_byte_variable = "latitude" ,
397409 group = "1x721x1440" ,
398410 setupfn = partial (setup_ingest_for_benchmarks , ingest = ERA5_ARCO_INGEST ),
399411)
400412
401- # ERA5_LARGE = BenchmarkDataset (
413+ # ERA5_LARGE = BenchmarkReadDataset (
402414# skip_local=True,
403415# storage_config=StorageConfig(
404416# bucket="icechunk-public-data", prefix="era5-weatherbench2"
@@ -411,7 +423,7 @@ def setup_era5(*args, **kwargs):
411423# # by mistake
412424# )
413425
414- ERA5_SINGLE = BenchmarkDataset (
426+ ERA5_SINGLE = BenchmarkReadDataset (
415427 # Single NCAR AWS PDS ERA5 netCDF
416428 storage_config = StorageConfig (prefix = "perf-era5-single" ),
417429 load_variables = ["PV" ],
@@ -420,15 +432,15 @@ def setup_era5(*args, **kwargs):
420432 setupfn = setup_era5_single ,
421433)
422434
423- GB_128MB_CHUNKS = BenchmarkDataset (
435+ GB_128MB_CHUNKS = BenchmarkReadDataset (
424436 storage_config = StorageConfig (prefix = "gb-128mb-chunks" ),
425437 load_variables = ["array" ],
426438 chunk_selector = {},
427439 first_byte_variable = None ,
428440 setupfn = partial (setup_synthetic_gb_dataset , chunk_shape = (64 , 512 , 512 )),
429441)
430442
431- GB_8MB_CHUNKS = BenchmarkDataset (
443+ GB_8MB_CHUNKS = BenchmarkReadDataset (
432444 storage_config = StorageConfig (prefix = "gb-8mb-chunks" ),
433445 load_variables = ["array" ],
434446 chunk_selector = {},
@@ -437,7 +449,7 @@ def setup_era5(*args, **kwargs):
437449)
438450
439451# TODO
440- GPM_IMERG_VIRTUAL = BenchmarkDataset (
452+ GPM_IMERG_VIRTUAL = BenchmarkReadDataset (
441453 storage_config = StorageConfig (
442454 store = "s3" ,
443455 bucket = "earthmover-icechunk-us-west-2" ,
@@ -451,3 +463,17 @@ def setup_era5(*args, **kwargs):
451463 chunk_selector = {"time" : 1 },
452464 first_byte_variable = "lat" ,
453465)
466+
467+
468+ PANCAKE_WRITES = BenchmarkWriteDataset (
469+ storage_config = StorageConfig (prefix = "pancake_writes" ),
470+ num_arrays = 1 ,
471+ shape = (320 , 720 , 1441 ),
472+ chunks = (1 , - 1 , - 1 ),
473+ )
474+ SIMPLE_1D = BenchmarkWriteDataset (
475+ storage_config = StorageConfig (prefix = "simple_1d_writes" ),
476+ num_arrays = 1 ,
477+ shape = (2000 * 1000 ,),
478+ chunks = (1000 ,),
479+ )
0 commit comments