Description
Zarr version
v2.18.2
Numcodecs version
v0.12.1
Python Version
3.12.4
Operating System
Linux
Installation
using conda
Description
I get the following error when trying to open a dataset compressed with tensorstore using the zstd compressor.
RuntimeError: Zstd decompression error: invalid input data
Steps to reproduce
In [8]: ds = ts.open({
...: 'driver': 'zarr',
...: 'kvstore': {
...: 'driver': 'file',
...: 'path': 'tmp/zarr_zstd_dataset',
...: },
...: 'metadata': {
...: 'compressor': {
...: 'id': 'zstd',
...: 'level': 3,
...: },
...: 'shape': [1024, 1024],
...: 'chunks': [64, 64],
...: 'dtype': '|u1',
...: 'dimension_separator': '/',
...: },
...: 'create': True,
...: 'delete_existing': True,
...: }).result()
In [9]: ds[:,:] = 5
In [10]: import zarr
In [11]: arr = zarr.open_array("tmp/zarr_zstd_dataset")
In [12]: arr[:,:]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[12], line 1
----> 1 arr[:,:]
File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:798, in Array.__getitem__(self, selection)
796 result = self.vindex[selection]
797 elif is_pure_orthogonal_indexing(pure_selection, self.ndim):
--> 798 result = self.get_orthogonal_selection(pure_selection, fields=fields)
799 else:
800 result = self.get_basic_selection(pure_selection, fields=fields)
File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:1080, in Array.get_orthogonal_selection(self, selection, out, fields)
1077 # setup indexer
1078 indexer = OrthogonalIndexer(selection, self)
-> 1080 return self._get_selection(indexer=indexer, out=out, fields=fields)
File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:1343, in Array._get_selection(self, indexer, out, fields)
1340 if math.prod(out_shape) > 0:
1341 # allow storage to get multiple items at once
1342 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
-> 1343 self._chunk_getitems(
1344 lchunk_coords,
1345 lchunk_selection,
1346 out,
1347 lout_selection,
1348 drop_axes=indexer.drop_axes,
1349 fields=fields,
1350 )
1351 if out.shape:
1352 return out
File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:2183, in Array._chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes, fields)
2181 for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection):
2182 if ckey in cdatas:
-> 2183 self._process_chunk(
2184 out,
2185 cdatas[ckey],
2186 chunk_select,
2187 drop_axes,
2188 out_is_ndarray,
2189 fields,
2190 out_select,
2191 partial_read_decode=partial_read_decode,
2192 )
2193 else:
2194 # check exception type
2195 if self._fill_value is not None:
File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:2096, in Array._process_chunk(self, out, cdata, chunk_selection, drop_axes, out_is_ndarray, fields, out_selection, partial_read_decode)
2094 except ArrayIndexError:
2095 cdata = cdata.read_full()
-> 2096 chunk = self._decode_chunk(cdata)
2098 # select data from chunk
2099 if fields:
File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:2352, in Array._decode_chunk(self, cdata, start, nitems, expected_shape)
2350 chunk = self._compressor.decode_partial(cdata, start, nitems)
2351 else:
-> 2352 chunk = self._compressor.decode(cdata)
2353 else:
2354 chunk = cdata
File numcodecs/zstd.pyx:219, in numcodecs.zstd.Zstd.decode()
File numcodecs/zstd.pyx:153, in numcodecs.zstd.decompress()
RuntimeError: Zstd decompression error: invalid input data
Additional output
$ conda env export
name: zarr_python
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- aiohttp=3.9.5=py312h98912ed_0
- aiosignal=1.3.1=pyhd8ed1ab_0
- aom=3.9.1=hac33072_0
- asciitree=0.3.3=py_2
- asttokens=2.4.1=pyhd8ed1ab_0
- attrs=23.2.0=pyh71513ae_0
- blosc=1.21.6=hef167b5_0
- brotli-python=1.1.0=py312h30efb56_1
- bzip2=1.0.8=h4bc722e_7
- c-ares=1.32.3=h4bc722e_0
- ca-certificates=2024.7.4=hbcca054_0
- certifi=2024.7.4=pyhd8ed1ab_0
- cffi=1.16.0=py312hf06ca03_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- dav1d=1.2.1=hd590300_0
- decorator=5.1.1=pyhd8ed1ab_0
- exceptiongroup=1.2.2=pyhd8ed1ab_0
- executing=2.0.1=pyhd8ed1ab_0
- fasteners=0.17.3=pyhd8ed1ab_0
- frozenlist=1.4.1=py312h98912ed_0
- fsspec=2024.6.1=pyhff2d567_0
- h2=4.1.0=pyhd8ed1ab_0
- hpack=4.0.0=pyh9f0ad1d_0
- hyperframe=6.0.1=pyhd8ed1ab_0
- idna=3.7=pyhd8ed1ab_0
- ipython=8.26.0=pyh707e725_0
- jedi=0.19.1=pyhd8ed1ab_0
- keyutils=1.6.1=h166bdaf_0
- krb5=1.21.3=h659f571_0
- ld_impl_linux-64=2.40=hf3520f5_7
- libabseil=20240116.2=cxx17_he02047a_1
- libavif16=1.1.0=h9b56c87_0
- libblas=3.9.0=23_linux64_openblas
- libcblas=3.9.0=23_linux64_openblas
- libcurl=8.9.0=hdb1bdb2_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=hd590300_2
- libexpat=2.6.2=h59595ed_0
- libffi=3.4.2=h7f98852_5
- libgcc-ng=14.1.0=h77fa898_0
- libgfortran-ng=14.1.0=h69a702a_0
- libgfortran5=14.1.0=hc5f4f2c_0
- libgomp=14.1.0=h77fa898_0
- libjpeg-turbo=3.0.0=hd590300_1
- liblapack=3.9.0=23_linux64_openblas
- libnghttp2=1.58.0=h47da74e_1
- libnsl=2.0.1=hd590300_0
- libopenblas=0.3.27=pthreads_hac2b453_1
- libpng=1.6.43=h2797004_0
- libprotobuf=4.25.3=h08a7969_0
- libsqlite=3.46.0=hde9e2c9_0
- libssh2=1.11.0=h0841786_0
- libstdcxx-ng=14.1.0=hc0a3c3a_0
- libuuid=2.38.1=h0b41bf4_0
- libwebp-base=1.4.0=hd590300_0
- libxcrypt=4.4.36=hd590300_1
- libzlib=1.3.1=h4ab18f5_1
- lz4-c=1.9.4=hcb278e6_0
- matplotlib-inline=0.1.7=pyhd8ed1ab_0
- ml_dtypes=0.4.0=py312h1d6d2e6_1
- msgpack-python=1.0.8=py312h2492b07_0
- multidict=6.0.5=py312h98912ed_0
- ncurses=6.5=h59595ed_0
- numcodecs=0.12.1=py312h7070661_1
- numpy=1.26.4=py312heda63a1_0
- openssl=3.3.1=h4bc722e_2
- parso=0.8.4=pyhd8ed1ab_0
- pexpect=4.9.0=pyhd8ed1ab_0
- pickleshare=0.7.5=py_1003
- pip=24.0=pyhd8ed1ab_0
- prompt-toolkit=3.0.47=pyha770c72_0
- ptyprocess=0.7.0=pyhd3deb0d_0
- pure_eval=0.2.3=pyhd8ed1ab_0
- pybind11-abi=4=hd8ed1ab_3
- pycparser=2.22=pyhd8ed1ab_0
- pygments=2.18.0=pyhd8ed1ab_0
- pysocks=1.7.1=pyha2e5f31_6
- python=3.12.4=h194c7f8_0_cpython
- python_abi=3.12=4_cp312
- rav1e=0.6.6=he8a937b_2
- readline=8.2=h8228510_1
- requests=2.32.3=pyhd8ed1ab_0
- setuptools=71.0.4=pyhd8ed1ab_0
- six=1.16.0=pyh6c4a22f_0
- snappy=1.2.1=ha2e4443_0
- stack_data=0.6.2=pyhd8ed1ab_0
- svt-av1=2.1.2=hac33072_0
- tensorstore=0.1.62=py312h7e2185d_0
- tk=8.6.13=noxft_h4845f30_101
- traitlets=5.14.3=pyhd8ed1ab_0
- typing_extensions=4.12.2=pyha770c72_0
- tzdata=2024a=h0c530f3_0
- urllib3=2.2.2=pyhd8ed1ab_1
- wcwidth=0.2.13=pyhd8ed1ab_0
- wheel=0.43.0=pyhd8ed1ab_1
- xz=5.2.6=h166bdaf_0
- yarl=1.9.4=py312h98912ed_0
- zarr=2.18.2=pyhd8ed1ab_0
- zstandard=0.23.0=py312h3483029_0
- zstd=1.5.6=ha6fb4c9_0
prefix: /home/mkitti/review_temp/conda/3/x86_64/envs/zarr_python
xref: google/tensorstore#182