Skip to content

Commit d3e7ca2

Browse files
committed
improves heuristic in default implementation of GridIndex._chunk_io
This commit improves the heuristic used by `GridIndex`’s default implementation of the `_chunk_io` method (with the `"auto"` chunk-sizing strategy) for determining how many grids to read in a given iteration. For context, the heuristic was historically hardcoded to a value of 1000 grids. This works well for AMR simulations with small grids (e.g. 16^3, 32^3), but the heuristic is problematic when you have unified skmulations This commit adopts a heuristic that tries to limit the number of grids in order to make sure we don't run out of memory.
1 parent 0ba2cfa commit d3e7ca2

1 file changed

Lines changed: 30 additions & 2 deletions

File tree

yt/geometry/grid_geometry_handler.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import abc
22
import weakref
33
from collections import defaultdict
4+
from functools import cached_property
45

56
import numpy as np
67

@@ -389,6 +390,11 @@ def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None):
389390

390391
_grid_chunksize = 1000
391392

393+
@cached_property
394+
def _max_grid_cell_count(self):
395+
"""Returns the max number of cells in a grid"""
396+
return self.grid_dimensions.prod(axis=1).max()
397+
392398
def _chunk_io(
393399
self,
394400
dobj,
@@ -413,11 +419,33 @@ def _chunk_io(
413419
if chunk_sizing == "auto":
414420
chunk_ngrids = len(gobjs)
415421
if chunk_ngrids > 0:
422+
# historically, we hardcoded `_grid_chunksize` to 1000. For context,
423+
# `_grid_chunksize`, this is the
424+
# number of grids for this object to load (assuming no parallelism).
425+
# While this heuristic works well with small AMR grids (e.g. 16^3 or
426+
# 32^3 cells), this was problematic with Uniform resolution snapshots
427+
# (e.g. Cholla snapshots commonly have 256^3 cells per grid)
428+
#
429+
# Our new heuristic adopts a toy model:
430+
# - we pick a `_grid_chunksize` such that holding arrays for
431+
# `_field_count` fields in memory at once will never take up
432+
# more than `_max_num_bytes`
433+
_field_count = 10 # an arbitrary value
434+
_max_num_bytes = int(1e9) # another arbitrary value
435+
436+
# if we assume double-precision field values, then a field array for
437+
# single grid requires up to the following number of bytes
438+
_bytes_per_field_per_grid = 8 * int(self._max_grid_cell_count)
439+
440+
_grid_chunksize = max(
441+
_max_num_bytes // (_bytes_per_field_per_grid * _field_count), 1
442+
)
443+
416444
nproc = int(ytcfg.get("yt", "internals", "global_parallel_size"))
417445
chunking_factor = np.int64(
418-
np.ceil(self._grid_chunksize * nproc / chunk_ngrids)
446+
np.ceil(_grid_chunksize * nproc / chunk_ngrids)
419447
)
420-
size = max(self._grid_chunksize // chunking_factor, 1)
448+
size = max(_grid_chunksize // chunking_factor, 1)
421449
else:
422450
size = self._grid_chunksize
423451
elif chunk_sizing == "config_file":

0 commit comments

Comments
 (0)