Skip to content

Commit d9b3213

Browse files
authored
Test decoding data written by h5py (#47)
1 parent f8afffb commit d9b3213

File tree

2 files changed

+91
-5
lines changed

2 files changed

+91
-5
lines changed

test/CondaPkg.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
[deps]
2-
hdf5plugin = ""
3-
h5py = ""
4-
imagecodecs = ""
1+
[pip.deps]
2+
imagecodecs = "==2025.3.30"
3+
hdf5plugin = "==5.1.0"
4+
h5py = "==3.13.0"

test/hdf5-compat.jl

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import Blosc
1515
import CodecZstd
1616

1717
using PythonCall
18-
pyimport("hdf5plugin")
18+
hdf5plugin = pyimport("hdf5plugin")
1919
h5py = pyimport("h5py")
2020

2121
# Useful links:
@@ -52,6 +52,30 @@ codecs = [
5252
) for element_size in [1:20; 1023; typemax(UInt32);]];
5353
]
5454

55+
function decode_h5_chunk(chunk::AbstractVector{UInt8}, id::Integer, client_data)
56+
if id == 1
57+
decode(ChunkCodecLibZlib.ZlibCodec(), chunk)
58+
elseif id == 2
59+
decode(ChunkCodecCore.ShuffleCodec(client_data[1]), chunk)
60+
elseif id == 307
61+
decode(ChunkCodecLibBzip2.BZ2Codec(), chunk)
62+
elseif id == 32015
63+
decode(ChunkCodecLibZstd.ZstdCodec(), chunk)
64+
elseif id == 32001
65+
decode(ChunkCodecLibBlosc.BloscCodec(), chunk)
66+
else
67+
error("Unsupported filter id: $(id)")
68+
end
69+
end
70+
71+
test_h5py_options = [
72+
((;compression=hdf5plugin.Zstd(clevel=3)), 100),
73+
((;compression=hdf5plugin.Blosc(cname="zstd", clevel=3), shuffle=true), 100),
74+
((;compression=hdf5plugin.BZip2(blocksize=5)), 10),
75+
((;compression="gzip", compression_opts=3), 100),
76+
((;compression="gzip", shuffle=true), 100),
77+
]
78+
5579
@testset "$(jl_options) $(h5_options)" for (jl_options, h5_options, trials) in codecs
5680
h5file = tempname()
5781
srange = ChunkCodecCore.decoded_size_range(jl_options)
@@ -79,3 +103,65 @@ codecs = [
79103
end
80104
end
81105
end
106+
107+
function make_h5py_file(options)
108+
f = h5py.File.in_memory()
109+
f.create_dataset("a"; options...)
110+
f.flush()
111+
hdf_data = collect(PyArray(f.id.get_file_image()))
112+
f.close()
113+
return hdf_data
114+
end
115+
116+
function decode_h5_data(hdf_data)
117+
h5open(hdf_data, "r"; name = "in_memory.h5") do f
118+
ds = f["a"]
119+
filters = HDF5.get_create_properties(ds).filters
120+
chunk_size = HDF5.get_chunk(ds)
121+
data_size = size(ds)
122+
out = zeros(eltype(ds), data_size)
123+
for chunkinfo in HDF5.get_chunk_info_all(ds)
124+
start = chunkinfo.addr + firstindex(hdf_data)
125+
stop = start + chunkinfo.size - 1
126+
chunk = hdf_data[start:stop]
127+
for i in length(filters):-1:1
128+
if chunkinfo.filter_mask & (1 << (i - 1)) != 0
129+
continue
130+
end
131+
filter = filters[HDF5.Filters.ExternalFilter, i]
132+
chunk = decode_h5_chunk(chunk, filter.filter_id, filter.data)
133+
end
134+
chunkstart = chunkinfo.offset .+ 1
135+
chunkstop = min.(chunkstart .+ chunk_size .- 1, data_size)
136+
real_chunksize = chunkstop .- chunkstart .+ 1
137+
shaped_chunkdata = reshape(reinterpret(eltype(out), chunk), chunk_size...)
138+
copyto!(
139+
out,
140+
CartesianIndices(((range.(chunkstart, chunkstop))...,)),
141+
shaped_chunkdata,
142+
CartesianIndices(((range.(1, real_chunksize))...,))
143+
)
144+
end
145+
out
146+
end
147+
end
148+
149+
@testset "HDF5 compatibility with h5py $(options)" for (options, trials) in test_h5py_options
150+
decoded_sizes = [
151+
1:10;
152+
rand((1:2000000), trials);
153+
]
154+
for s in decoded_sizes
155+
choice = rand(1:3)
156+
data = if choice == 1
157+
rand_test_data(s)
158+
elseif choice == 2
159+
randn(s)
160+
elseif choice == 3
161+
randn(2, s)
162+
end
163+
hdf_data = make_h5py_file((;data, options...))
164+
decoded_data = decode_h5_data(hdf_data)
165+
@test decoded_data == permutedims(data, ((ndims(data):-1:1)...,))
166+
end
167+
end

0 commit comments

Comments
 (0)