small codec improvement while trying to solve partial chunk issue

terraputix · terraputix · commit be65ad911836 · 2025-04-22T09:19:02.000+02:00
diff --git a/kerchunk/open_meteo.py b/kerchunk/open_meteo.py
@@ -23,7 +23,6 @@
 
 try:
     import omfiles
-    from omfiles.omfiles_numcodecs import TurboPfor
 except ModuleNotFoundError:  # pragma: no cover
     raise ImportError(
         "omfiles is required for kerchunking Open-Meteo files. Please install with "
@@ -167,14 +166,10 @@ def __repr__(self):
         return r
 
 # Register codecs
-numcodecs.register_codec(TurboPfor, "pfor")
+# NOTE: TurboPfor is register as `turbo_pfor` by omfiles already
 numcodecs.register_codec(Delta2D, "delta2d")
 numcodecs.register_codec(Reshape, "reshape")
 
-# print(numcodecs.registry.codec_registry)
-# codec = numcodecs.get_codec({"id": "pfor_serializer"})
-# print(codec)
-
 
 class SingleOmToZarr:
     """Translate a .om file into Zarr metadata"""
@@ -244,14 +239,13 @@ def translate(self):
             "shape": shape,
             "chunks": chunks,
             "dtype": str(dtype),
-            "compressor": {"id": "pfor", "length": blocksize},  # As main compressor
+            "compressor": {"id": "turbo_pfor", "chunk_elements": blocksize},  # As main compressor
             "fill_value": None,
             "order": "C",
             "filters": [
                 {"id": "fixedscaleoffset", "scale": scale_factor, "offset": add_offset, "dtype": "f4", "astype": "i2"},
                 {"id": "delta2d", "dtype": "<i2"},
                 {"id": "reshape", "shape": [chunks[1], chunks[2]]},  # Reshape to 2D
-                # {"id": "pfor_serializer", "blocksize": blocksize},
             ]
         }
 
diff --git a/tests/test_open_meteo.py b/tests/test_open_meteo.py
@@ -11,6 +11,7 @@
 from kerchunk.open_meteo import SingleOmToZarr, SupportedDomain
 from kerchunk.utils import fs_as_store
 
+zarr.config.config["async"]["concurrency"] = 1
 
 def test_single_om_to_zarr():
     # Path to test file - adjust as needed
@@ -44,6 +45,7 @@ def test_single_om_to_zarr():
     z = group["data"] # Here we just use a dummy data name we have hardcoded in SingleOmToZarr
 
     print("z.shape", z.shape)
+    print("z.chunks", z.chunks)
 
     # Verify basic metadata matches original file
     reader = omfiles.OmFilePyReader(test_file)
@@ -52,11 +54,12 @@ def test_single_om_to_zarr():
     assert list(z.chunks) == reader.chunk_dimensions, f"Chunks mismatch: {z.chunks} vs {reader.chunk_dimensions}"
 
     # TODO: Using the following chunk_index leads to a double free / corruption error!
-    # Most likely, because zarr and open-meteo treat partial chunks differently.
+    # Even with a concurrency of 1: `zarr.config.config["async"]["concurrency"] = 1`
+    # Most likely, because zarr and open-meteo treat partial chunks differently:
     # om-files encode partial chunks with a reduced dimension, while zarr most likely expects a full block of data?
-    # chunk_index = (slice(0, 100), 2878, ...)
+    # chunk_index = (slice(90, 100), 2878, ...)
 
-    # Test retrieving a specific chunk (same chunk as in your example)
+    # Test retrieving a specific chunk
     chunk_index = (5, 5, ...)
 
     # Get direct chunk data