Skip to content

Commit be65ad9

Browse files
committed
small codec improvement while trying to solve partial chunk issue
1 parent d4957ee commit be65ad9

File tree

2 files changed

+8
-11
lines changed

2 files changed

+8
-11
lines changed

kerchunk/open_meteo.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
try:
2525
import omfiles
26-
from omfiles.omfiles_numcodecs import TurboPfor
2726
except ModuleNotFoundError: # pragma: no cover
2827
raise ImportError(
2928
"omfiles is required for kerchunking Open-Meteo files. Please install with "
@@ -167,14 +166,10 @@ def __repr__(self):
167166
return r
168167

169168
# Register codecs
170-
numcodecs.register_codec(TurboPfor, "pfor")
169+
# NOTE: TurboPfor is register as `turbo_pfor` by omfiles already
171170
numcodecs.register_codec(Delta2D, "delta2d")
172171
numcodecs.register_codec(Reshape, "reshape")
173172

174-
# print(numcodecs.registry.codec_registry)
175-
# codec = numcodecs.get_codec({"id": "pfor_serializer"})
176-
# print(codec)
177-
178173

179174
class SingleOmToZarr:
180175
"""Translate a .om file into Zarr metadata"""
@@ -244,14 +239,13 @@ def translate(self):
244239
"shape": shape,
245240
"chunks": chunks,
246241
"dtype": str(dtype),
247-
"compressor": {"id": "pfor", "length": blocksize}, # As main compressor
242+
"compressor": {"id": "turbo_pfor", "chunk_elements": blocksize}, # As main compressor
248243
"fill_value": None,
249244
"order": "C",
250245
"filters": [
251246
{"id": "fixedscaleoffset", "scale": scale_factor, "offset": add_offset, "dtype": "f4", "astype": "i2"},
252247
{"id": "delta2d", "dtype": "<i2"},
253248
{"id": "reshape", "shape": [chunks[1], chunks[2]]}, # Reshape to 2D
254-
# {"id": "pfor_serializer", "blocksize": blocksize},
255249
]
256250
}
257251

tests/test_open_meteo.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from kerchunk.open_meteo import SingleOmToZarr, SupportedDomain
1212
from kerchunk.utils import fs_as_store
1313

14+
zarr.config.config["async"]["concurrency"] = 1
1415

1516
def test_single_om_to_zarr():
1617
# Path to test file - adjust as needed
@@ -44,6 +45,7 @@ def test_single_om_to_zarr():
4445
z = group["data"] # Here we just use a dummy data name we have hardcoded in SingleOmToZarr
4546

4647
print("z.shape", z.shape)
48+
print("z.chunks", z.chunks)
4749

4850
# Verify basic metadata matches original file
4951
reader = omfiles.OmFilePyReader(test_file)
@@ -52,11 +54,12 @@ def test_single_om_to_zarr():
5254
assert list(z.chunks) == reader.chunk_dimensions, f"Chunks mismatch: {z.chunks} vs {reader.chunk_dimensions}"
5355

5456
# TODO: Using the following chunk_index leads to a double free / corruption error!
55-
# Most likely, because zarr and open-meteo treat partial chunks differently.
57+
# Even with a concurrency of 1: `zarr.config.config["async"]["concurrency"] = 1`
58+
# Most likely, because zarr and open-meteo treat partial chunks differently:
5659
# om-files encode partial chunks with a reduced dimension, while zarr most likely expects a full block of data?
57-
# chunk_index = (slice(0, 100), 2878, ...)
60+
# chunk_index = (slice(90, 100), 2878, ...)
5861

59-
# Test retrieving a specific chunk (same chunk as in your example)
62+
# Test retrieving a specific chunk
6063
chunk_index = (5, 5, ...)
6164

6265
# Get direct chunk data

0 commit comments

Comments
 (0)