@@ -15,7 +15,7 @@ import Blosc
15
15
import CodecZstd
16
16
17
17
using PythonCall
18
- pyimport (" hdf5plugin" )
18
+ hdf5plugin = pyimport (" hdf5plugin" )
19
19
h5py = pyimport (" h5py" )
20
20
21
21
# Useful links:
@@ -52,6 +52,30 @@ codecs = [
52
52
) for element_size in [1 : 20 ; 1023 ; typemax (UInt32);]];
53
53
]
54
54
55
+ function decode_h5_chunk (chunk:: AbstractVector{UInt8} , id:: Integer , client_data)
56
+ if id == 1
57
+ decode (ChunkCodecLibZlib. ZlibCodec (), chunk)
58
+ elseif id == 2
59
+ decode (ChunkCodecCore. ShuffleCodec (client_data[1 ]), chunk)
60
+ elseif id == 307
61
+ decode (ChunkCodecLibBzip2. BZ2Codec (), chunk)
62
+ elseif id == 32015
63
+ decode (ChunkCodecLibZstd. ZstdCodec (), chunk)
64
+ elseif id == 32001
65
+ decode (ChunkCodecLibBlosc. BloscCodec (), chunk)
66
+ else
67
+ error (" Unsupported filter id: $(id) " )
68
+ end
69
+ end
70
+
71
+ test_h5py_options = [
72
+ ((;compression= hdf5plugin. Zstd (clevel= 3 )), 100 ),
73
+ ((;compression= hdf5plugin. Blosc (cname= " zstd" , clevel= 3 ), shuffle= true ), 100 ),
74
+ ((;compression= hdf5plugin. BZip2 (blocksize= 5 )), 10 ),
75
+ ((;compression= " gzip" , compression_opts= 3 ), 100 ),
76
+ ((;compression= " gzip" , shuffle= true ), 100 ),
77
+ ]
78
+
55
79
@testset " $(jl_options) $(h5_options) " for (jl_options, h5_options, trials) in codecs
56
80
h5file = tempname ()
57
81
srange = ChunkCodecCore. decoded_size_range (jl_options)
@@ -79,3 +103,65 @@ codecs = [
79
103
end
80
104
end
81
105
end
106
+
107
+ function make_h5py_file (options)
108
+ f = h5py. File. in_memory ()
109
+ f. create_dataset (" a" ; options... )
110
+ f. flush ()
111
+ hdf_data = collect (PyArray (f. id. get_file_image ()))
112
+ f. close ()
113
+ return hdf_data
114
+ end
115
+
116
+ function decode_h5_data (hdf_data)
117
+ h5open (hdf_data, " r" ; name = " in_memory.h5" ) do f
118
+ ds = f[" a" ]
119
+ filters = HDF5. get_create_properties (ds). filters
120
+ chunk_size = HDF5. get_chunk (ds)
121
+ data_size = size (ds)
122
+ out = zeros (eltype (ds), data_size)
123
+ for chunkinfo in HDF5. get_chunk_info_all (ds)
124
+ start = chunkinfo. addr + firstindex (hdf_data)
125
+ stop = start + chunkinfo. size - 1
126
+ chunk = hdf_data[start: stop]
127
+ for i in length (filters): - 1 : 1
128
+ if chunkinfo. filter_mask & (1 << (i - 1 )) != 0
129
+ continue
130
+ end
131
+ filter = filters[HDF5. Filters. ExternalFilter, i]
132
+ chunk = decode_h5_chunk (chunk, filter. filter_id, filter. data)
133
+ end
134
+ chunkstart = chunkinfo. offset .+ 1
135
+ chunkstop = min .(chunkstart .+ chunk_size .- 1 , data_size)
136
+ real_chunksize = chunkstop .- chunkstart .+ 1
137
+ shaped_chunkdata = reshape (reinterpret (eltype (out), chunk), chunk_size... )
138
+ copyto! (
139
+ out,
140
+ CartesianIndices (((range .(chunkstart, chunkstop)). .. ,)),
141
+ shaped_chunkdata,
142
+ CartesianIndices (((range .(1 , real_chunksize)). .. ,))
143
+ )
144
+ end
145
+ out
146
+ end
147
+ end
148
+
149
+ @testset " HDF5 compatibility with h5py $(options) " for (options, trials) in test_h5py_options
150
+ decoded_sizes = [
151
+ 1 : 10 ;
152
+ rand ((1 : 2000000 ), trials);
153
+ ]
154
+ for s in decoded_sizes
155
+ choice = rand (1 : 3 )
156
+ data = if choice == 1
157
+ rand_test_data (s)
158
+ elseif choice == 2
159
+ randn (s)
160
+ elseif choice == 3
161
+ randn (2 , s)
162
+ end
163
+ hdf_data = make_h5py_file ((;data, options... ))
164
+ decoded_data = decode_h5_data (hdf_data)
165
+ @test decoded_data == permutedims (data, ((ndims (data): - 1 : 1 ). .. ,))
166
+ end
167
+ end
0 commit comments