improves heuristic in default implementation of GridIndex._chunk_io

mabruzzo · mabruzzo · commit d3e7ca269a1d · 2025-06-26T21:58:55.000-04:00
This commit improves the heuristic used by `GridIndex`’s default implementation of the `_chunk_io` method (with the `"auto"` chunk-sizing strategy) for determining how many grids to read in a given iteration.

For context, the heuristic was historically hardcoded to a value of 1000 grids. This works well for AMR simulations with small grids (e.g. 16^3, 32^3), but the heuristic is problematic when you have unified skmulations

This commit adopts a heuristic that tries to limit the number of grids in order to make sure we don't run out of memory.
diff --git a/yt/geometry/grid_geometry_handler.py b/yt/geometry/grid_geometry_handler.py
@@ -1,6 +1,7 @@
 import abc
 import weakref
 from collections import defaultdict
+from functools import cached_property
 
 import numpy as np
 
@@ -389,6 +390,11 @@ def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None):
 
     _grid_chunksize = 1000
 
+    @cached_property
+    def _max_grid_cell_count(self):
+        """Returns the max number of cells in a grid"""
+        return self.grid_dimensions.prod(axis=1).max()
+
     def _chunk_io(
         self,
         dobj,
@@ -413,11 +419,33 @@ def _chunk_io(
         if chunk_sizing == "auto":
             chunk_ngrids = len(gobjs)
             if chunk_ngrids > 0:
+                # historically, we hardcoded `_grid_chunksize` to 1000. For context,
+                # `_grid_chunksize`, this is the
+                # number of grids for this object to load (assuming no parallelism).
+                # While this heuristic works well with small AMR grids (e.g. 16^3 or
+                # 32^3 cells), this was problematic with Uniform resolution snapshots
+                # (e.g. Cholla snapshots commonly have 256^3 cells per grid)
+                #
+                # Our new heuristic adopts a toy model:
+                # - we pick a `_grid_chunksize` such that holding arrays for
+                #   `_field_count` fields in memory at once will never take up
+                #   more than `_max_num_bytes`
+                _field_count = 10  # an arbitrary value
+                _max_num_bytes = int(1e9)  # another arbitrary value
+
+                # if we assume double-precision field values, then a field array for
+                # single grid requires up to the following number of bytes
+                _bytes_per_field_per_grid = 8 * int(self._max_grid_cell_count)
+
+                _grid_chunksize = max(
+                    _max_num_bytes // (_bytes_per_field_per_grid * _field_count), 1
+                )
+
                 nproc = int(ytcfg.get("yt", "internals", "global_parallel_size"))
                 chunking_factor = np.int64(
-                    np.ceil(self._grid_chunksize * nproc / chunk_ngrids)
+                    np.ceil(_grid_chunksize * nproc / chunk_ngrids)
                 )
-                size = max(self._grid_chunksize // chunking_factor, 1)
+                size = max(_grid_chunksize // chunking_factor, 1)
             else:
                 size = self._grid_chunksize
         elif chunk_sizing == "config_file":