Skip to content

Commit f35b16b

Browse files
committed
ZarrV3: make sure to use non vsi streaming filenames when reading sharded datasets
1 parent f2bfd56 commit f35b16b

File tree

2 files changed

+119
-2
lines changed

2 files changed

+119
-2
lines changed

autotest/gdrivers/zarr_driver.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import gdaltest
2424
import pytest
25+
import webserver
2526

2627
from osgeo import gdal, osr
2728

@@ -6163,6 +6164,117 @@ def test_zarr_read_simple_sharding_read_errors(tmp_vsimem):
61636164
assert "invalid chunk location for chunk" in error_msgs
61646165

61656166

6167+
###############################################################################
6168+
# Test accessing a sharded dataset via /vsicurl/
6169+
6170+
6171+
@pytest.mark.require_curl()
6172+
@gdaltest.enable_exceptions()
6173+
def test_zarr_read_simple_sharding_network():
6174+
6175+
compressors = gdal.GetDriverByName("Zarr").GetMetadataItem("COMPRESSORS")
6176+
if "zstd" not in compressors:
6177+
pytest.skip("compressor zstd not available")
6178+
6179+
webserver_process = None
6180+
webserver_port = 0
6181+
6182+
(webserver_process, webserver_port) = webserver.launch(
6183+
handler=webserver.DispatcherHttpHandler
6184+
)
6185+
if webserver_port == 0:
6186+
pytest.skip()
6187+
6188+
try:
6189+
6190+
handler = webserver.SequentialHandler()
6191+
handler.add("GET", "/test.zarr/", 404)
6192+
handler.add("HEAD", "/test.zarr/.zmetadata", 404)
6193+
handler.add("HEAD", "/test.zarr/.zarray", 404)
6194+
handler.add("HEAD", "/test.zarr/.zgroup", 404)
6195+
zarr_json = json.dumps(
6196+
{
6197+
"shape": [2, 2],
6198+
"data_type": "uint8",
6199+
"chunk_grid": {
6200+
"name": "regular",
6201+
"configuration": {"chunk_shape": [2, 2]},
6202+
},
6203+
"chunk_key_encoding": {
6204+
"name": "default",
6205+
"configuration": {"separator": "/"},
6206+
},
6207+
"fill_value": 0,
6208+
"codecs": [
6209+
{
6210+
"name": "sharding_indexed",
6211+
"configuration": {
6212+
"chunk_shape": [2, 2],
6213+
"codecs": [
6214+
{"name": "bytes", "configuration": {"endian": "little"}}
6215+
],
6216+
"index_codecs": [
6217+
{"name": "bytes", "configuration": {"endian": "little"}}
6218+
],
6219+
},
6220+
}
6221+
],
6222+
"attributes": {},
6223+
"zarr_format": 3,
6224+
"node_type": "array",
6225+
"storage_transformers": [],
6226+
}
6227+
)
6228+
handler.add(
6229+
"HEAD",
6230+
"/test.zarr/zarr.json",
6231+
200,
6232+
{"Content-Length": "%d" % len(zarr_json)},
6233+
)
6234+
handler.add(
6235+
"GET",
6236+
"/test.zarr/zarr.json",
6237+
200,
6238+
{"Content-Length": "%d" % len(zarr_json)},
6239+
zarr_json,
6240+
)
6241+
handler.add("HEAD", "/test.zarr/zarr.json.aux.xml", 404)
6242+
handler.add("HEAD", "/test.zarr/zarr.aux", 404)
6243+
handler.add("HEAD", "/test.zarr/zarr.AUX", 404)
6244+
handler.add("HEAD", "/test.zarr/zarr.json.aux", 404)
6245+
handler.add("HEAD", "/test.zarr/zarr.json.AUX", 404)
6246+
handler.add("HEAD", "/test.zarr/zarr.json.gmac", 404)
6247+
handler.add("HEAD", "/test.zarr/c/0/0", 200, {"Content-Length": "65536"})
6248+
data = struct.pack("<Q", 0) + struct.pack("<Q", 4)
6249+
data = b"\x00" * (16384 - len(data)) + data
6250+
handler.add(
6251+
"GET",
6252+
"/test.zarr/c/0/0",
6253+
206,
6254+
{"Content-Length": "16384", "Content-Range": "bytes 49152-65535/65536"},
6255+
data,
6256+
expected_headers={"Range": "bytes=49152-65535"},
6257+
)
6258+
handler.add(
6259+
"GET",
6260+
"/test.zarr/c/0/0",
6261+
206,
6262+
{"Content-Length": "16384", "Content-Range": "bytes 0-16383/65536"},
6263+
b"\x01\x02\x03\x04" + (16384 - 4) * b"\x00",
6264+
expected_headers={"Range": "bytes=0-16383"},
6265+
)
6266+
with webserver.install_http_handler(handler):
6267+
ds = gdal.Open(
6268+
'ZARR:"/vsicurl/http://localhost:%d/test.zarr"' % webserver_port
6269+
)
6270+
assert ds.GetRasterBand(1).ReadBlock(0, 0) == b"\x01\x02\x03\x04"
6271+
6272+
finally:
6273+
webserver.server_stop(webserver_process, webserver_port)
6274+
6275+
gdal.VSICurlClearCache()
6276+
6277+
61666278
###############################################################################
61676279
# Test a sharded dataset, where sharding happens after a transpose codec.
61686280

frmts/zarr/zarr_v3_array.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,13 @@ bool ZarrV3Array::LoadBlockData(const uint64_t *blockIndices, bool bUseMutex,
414414

415415
// For network file systems, get the streaming version of the filename,
416416
// as we don't need arbitrary seeking in the file
417-
osFilename = VSIFileManager::GetHandler(osFilename.c_str())
418-
->GetStreamingFilename(osFilename);
417+
// ... unless we do partial decoding, in which case range requests within
418+
// a shard are much more efficient
419+
if (!(poCodecs && poCodecs->SupportsPartialDecoding()))
420+
{
421+
osFilename = VSIFileManager::GetHandler(osFilename.c_str())
422+
->GetStreamingFilename(osFilename);
423+
}
419424

420425
// First if we have a tile presence cache, check tile presence from it
421426
bool bEarlyRet;

0 commit comments

Comments
 (0)