diff --git a/nose_ignores.txt b/nose_ignores.txt
index 6db1089c71f..c3db3d70c05 100644
--- a/nose_ignores.txt
+++ b/nose_ignores.txt
@@ -52,3 +52,4 @@
 --ignore-file=test_cf_radial_pytest\.py
 --ignore-file=test_data_containers\.py
 --ignore-file=test_fields_pytest\.py
+--ignore-file=test_cholla_load\.py
diff --git a/yt/frontends/cholla/data_structures.py b/yt/frontends/cholla/data_structures.py
index 237e345a498..2524cb9e757 100644
--- a/yt/frontends/cholla/data_structures.py
+++ b/yt/frontends/cholla/data_structures.py
@@ -9,16 +9,17 @@
 from yt.geometry.api import Geometry
 from yt.geometry.grid_geometry_handler import GridIndex
 from yt.utilities.logger import ytLogger as mylog
-from yt.utilities.on_demand_imports import _h5py as h5py
+from yt.utilities.on_demand_imports import _h5py
 
 from .fields import ChollaFieldInfo
+from .misc import _determine_data_layout
 
 
 class ChollaGrid(AMRGridPatch):
     _id_offset = 0
 
-    def __init__(self, id, index, level, dims):
-        super().__init__(id, filename=index.index_filename, index=index)
+    def __init__(self, id, index, level, dims, filename):
+        super().__init__(id, filename=filename, index=index)
         self.Parent = None
         self.Children = []
         self.Level = level
@@ -27,6 +28,7 @@ def __init__(self, id, index, level, dims):
 
 class ChollaHierarchy(GridIndex):
     grid = ChollaGrid
+    _grid_chunksize = 1
 
     def __init__(self, ds, dataset_type="cholla"):
         self.dataset_type = dataset_type
@@ -39,27 +41,53 @@ def __init__(self, ds, dataset_type="cholla"):
         super().__init__(ds, dataset_type)
 
     def _detect_output_fields(self):
-        with h5py.File(self.index_filename, mode="r") as h5f:
-            self.field_list = [("cholla", k) for k in h5f.keys()]
+        with _h5py.File(self.index_filename, mode="r") as h5f:
+            grp = h5f.get("field", h5f)
+            self.field_list = [("cholla", k) for k in grp.keys()]
 
     def _count_grids(self):
-        self.num_grids = 1
+        with _h5py.File(self.index_filename, "r") as f:
+            self._blockid_location_arr, self._block_mapping = _determine_data_layout(f)
+        self.num_grids = self._blockid_location_arr.size
 
     def _parse_index(self):
-        self.grid_left_edge[0][:] = self.ds.domain_left_edge[:]
-        self.grid_right_edge[0][:] = self.ds.domain_right_edge[:]
-        self.grid_dimensions[0][:] = self.ds.domain_dimensions[:]
-        self.grid_particle_count[0][0] = 0
-        self.grid_levels[0][0] = 0
+        self.grids = np.empty(self.num_grids, dtype="object")
+
+        shape_arr = np.array(self._blockid_location_arr.shape)
+        dims_local = (self.ds.domain_dimensions[:] / shape_arr).astype("=i8")
+
+        for idx3D, blockid in np.ndenumerate(self._blockid_location_arr):
+            idx3D_arr = np.array(idx3D)
+            left_frac = idx3D_arr / shape_arr
+            right_frac = (1 + idx3D_arr) / shape_arr
+
+            level = 0
+
+            self.grids[blockid] = self.grid(
+                blockid,
+                index=self,
+                level=level,
+                dims=dims_local,
+                filename=self._block_mapping.fname_template.format(blockid=blockid),
+            )
+
+            self.grid_left_edge[blockid, :] = left_frac
+            self.grid_right_edge[blockid, :] = right_frac
+            self.grid_dimensions[blockid, :] = dims_local
+            self.grid_levels[blockid, 0] = level
+            self.grid_particle_count[blockid, 0] = 0
+
+        slope = self.ds.domain_width / self.ds.arr(np.ones(3), "code_length")
+        self.grid_left_edge = self.grid_left_edge * slope + self.ds.domain_left_edge
+        self.grid_right_edge = self.grid_right_edge * slope + self.ds.domain_left_edge
+
         self.max_level = 0
 
     def _populate_grid_objects(self):
-        self.grids = np.empty(self.num_grids, dtype="object")
         for i in range(self.num_grids):
-            g = self.grid(i, self, self.grid_levels.flat[i], self.grid_dimensions[i])
+            g = self.grids[i]
             g._prepare_grid()
             g._setup_dx()
-            self.grids[i] = g
 
 
 class ChollaDataset(Dataset):
@@ -100,7 +128,7 @@ def _set_code_unit_attributes(self):
             setdefaultattr(self, key, self.quan(1, unit))
 
     def _parse_parameter_file(self):
-        with h5py.File(self.parameter_filename, mode="r") as h5f:
+        with _h5py.File(self.parameter_filename, mode="r") as h5f:
             attrs = h5f.attrs
             self.parameters = dict(attrs.items())
             self.domain_left_edge = attrs["bounds"][:].astype("=f8")
@@ -108,7 +136,7 @@ def _parse_parameter_file(self):
                 "=f8"
             )
             self.dimensionality = len(attrs["dims"][:])
-            self.domain_dimensions = attrs["dims"][:].astype("=f8")
+            self.domain_dimensions = attrs["dims"][:].astype("=i8")
             self.current_time = attrs["t"][:]
             self._periodicity = tuple(attrs.get("periodicity", (False, False, False)))
             self.gamma = attrs.get("gamma", 5.0 / 3.0)
@@ -167,7 +195,7 @@ def _is_valid(cls, filename: str, *args, **kwargs) -> bool:
             return False
 
         try:
-            fileh = h5py.File(filename, mode="r")
+            fileh = _h5py.File(filename, mode="r")
         except OSError:
             return False
 
diff --git a/yt/frontends/cholla/io.py b/yt/frontends/cholla/io.py
index 0d216864076..af5f280c4af 100644
--- a/yt/frontends/cholla/io.py
+++ b/yt/frontends/cholla/io.py
@@ -1,7 +1,6 @@
-import numpy as np
-
 from yt.utilities.io_handler import BaseIOHandler
-from yt.utilities.on_demand_imports import _h5py as h5py
+
+from .misc import _CachedH5Openner
 
 
 class ChollaIOHandler(BaseIOHandler):
@@ -14,22 +13,28 @@ def _read_particle_coords(self, chunks, ptf):
     def _read_particle_fields(self, chunks, ptf, selector):
         raise NotImplementedError
 
-    def _read_fluid_selection(self, chunks, selector, fields, size):
-        data = {}
-        for field in fields:
-            data[field] = np.empty(size, dtype="float64")
-
-        with h5py.File(self.ds.parameter_filename, "r") as fh:
-            ind = 0
+    def io_iter(self, chunks, fields):
+        # this is loosely inspired by the implementation used for Enzo/Enzo-E
+        # - those other implementations use the lower-level hdf5 interface. Unclear
+        #   whether that affords any advantages...
+        mapper = self.ds.index._block_mapping
+        with _CachedH5Openner(mode="r") as h5_context_manager:
             for chunk in chunks:
-                for grid in chunk.objs:
-                    nd = 0
+                for obj in chunk.objs:
+                    if obj.filename is None:  # unclear when this case arises...
+                        continue
+
+                    # ensure the file containing data for obj is open
+                    fh = h5_context_manager.open_fh(obj.filename)
+
+                    # access the HDF5 group containing the datasets of field values
+                    grp = fh[mapper.h5_group]
+                    # get the indices in a generic dataset that correspond to obj.id
+                    idx = mapper.idx_map[obj.id]
+
                     for field in fields:
                         ftype, fname = field
-                        values = fh[fname][:].astype("=f8")
-                        nd = grid.select(selector, values, data[field], ind)
-                    ind += nd
-        return data
+                        yield field, obj, grp[fname][idx].astype("=f8")
 
     def _read_chunk_data(self, chunk, fields):
         raise NotImplementedError
diff --git a/yt/frontends/cholla/misc.py b/yt/frontends/cholla/misc.py
index e69de29bb2d..75d3b55551d 100644
--- a/yt/frontends/cholla/misc.py
+++ b/yt/frontends/cholla/misc.py
@@ -0,0 +1,214 @@
+import os
+import typing
+from collections import defaultdict
+from collections.abc import Mapping
+from dataclasses import dataclass
+
+import numpy as np
+
+# this is a hacky workaround to get _h5py.File to work in annotations. We can probably
+# address this issue more robustly by directly modifying yt.utilities.on_demand_imports
+if typing.TYPE_CHECKING:
+    import h5py as _h5py
+else:
+    from yt.utilities.on_demand_imports import _h5py
+
+
+class _CachedH5Openner:
+    """
+    A simple context manager that helps implement the idiom where data is read
+    from (or written to) one or more HDF5 and we want to wait to close the
+    previous HDF5 file until it is time to open a new file. This lets us avoid
+    overhead in cases where we would close and then immediately reopen the
+    same file.
+
+    By using a context manager, we're able to properly cleanup in the event
+    that an exception occurs.
+    """
+
+    def __init__(self, mode="r"):
+        self._filename = None
+        self._fh = None
+        self._mode = mode
+
+    def open_fh(self, filename):
+        if self._filename == filename:
+            return self._fh
+        if self._fh is not None:
+            self._fh.close()
+        self._fh = _h5py.File(filename, self._mode)
+        self._filename = filename
+        return self._fh
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc, value, tb):
+        if self._fh is not None:
+            self._fh.close()
+
+
+@dataclass(kw_only=True, slots=True, frozen=True)
+class _BlockDiskMapping:
+    """Contains info for mapping blockids to locations in hdf5 files
+
+    Notes
+    -----
+    At the time of writing, this is primarily meant to provide a mapping for
+    field data. In the future, we may initialize a separate instance to
+    provide a mapping for particle data
+    """
+
+    # ``fname_template.format(blockid=...)`` produces the file containing blockid (this
+    # can properly handle cases where all blocks are stored in a single file)
+    fname_template: str
+    # hdf5 group containing the field data
+    h5_group: str
+    # maps blockid to an index that select all associated data from a field-dataset
+    idx_map: Mapping[int, tuple[int | slice, ...]]
+
+
+def _infer_blockid_location_arr(fname_template, global_dims, arr_shape):
+    # used when hdf5 files don't have an explicit "domain" group
+    blockid_location_arr = np.empty(shape=tuple(int(e) for e in arr_shape), dtype="i8")
+    if blockid_location_arr.size == 1:
+        # primarily intended to handle the result of older concatenation scripts (it
+        # also handles the case when only a single block is used, which is okay)
+        blockid_location_arr[0, 0, 0] = 0
+    else:  # handle distributed cholla datasets
+        local_dims, rem = np.divmod(global_dims, blockid_location_arr.shape)
+        assert np.all(rem == 0) and np.all(local_dims > 0)
+        for blockid in range(0, blockid_location_arr.size):
+            with _h5py.File(fname_template.format(blockid=blockid), "r") as f:
+                tmp, rem = np.divmod(f.attrs["offset"][:], local_dims)
+            assert np.all(rem == 0)  # sanity check
+            idx3D = tuple(int(e) for e in tmp)
+            blockid_location_arr[idx3D] = blockid
+    return blockid_location_arr
+
+
+def _determine_data_layout(f: _h5py.File) -> tuple[np.ndarray, _BlockDiskMapping]:
+    """Determine the data layout of the snapshot
+
+    The premise is that the basic different data formats shouldn't
+    matter outside of this function."""
+    filename = f.filename
+
+    # STEP 1: infer the template for all Cholla data-files by inspecting filename
+    # ===========================================================================
+    # There are 2 conventions for the names of Cholla's data-files:
+    #  1. "root.h5.{blockid}" is the standard format Cholla uses when writing files
+    #     storing a single snapshot. Each MPI-rank will write a separate file and
+    #     replace ``{blockid}`` with MPI-rank (Modern Cholla versions without MPI
+    #     replace ``{blockid}`` with ``0``)
+    #  2. "root.h5": is the standard format used by Cholla's concatenation scripts
+    #     (older versions of Cholla without MPI also used this format to name outputs)
+    inferred_fname_template, cur_filename_suffix = _infer_fname_template(filename)
+
+    # STEP 2: Check whether the hdf5 file has a flat structure
+    # ========================================================
+    # Historically, we would always store datasets directly in the root group of the
+    # data file. More recent concatenation scripts store no data in groups.
+    flat_structure = any(not isinstance(elem, _h5py.Group) for elem in f.values())
+
+    # STEP 3: Extract basic domain info information from the file(s)
+    # ==============================================================
+    has_explicit_domain_info = "domain" in f
+    if has_explicit_domain_info:
+        # this branch primarily handles concatenated files made with newer logic
+        blockid_location_arr = f["domain/blockid_location_arr"][...]
+        field_idx_map = {
+            int(blockid): (i, slice(None), slice(None), slice(None))
+            for i, blockid in enumerate(f["domain/stored_blockid_list"][...])
+        }
+        consolidated_data = len(field_idx_map) == blockid_location_arr.size
+        if not consolidated_data:
+            # in the near future, we may support one of the 2 cases:
+            # > if (flat_structure):
+            # >     _common_idx = (slice(None), slice(None), slice(None))
+            # > else:
+            # >     _common_idx = (0, slice(None), slice(None), slice(None))
+            # > field_idx_map = defaultdict(lambda arg=_common_idx: arg)
+            raise ValueError(
+                "no support for reading Cholla datasets where data is distributed "
+                "among files that explicitly encode domain info."
+            )
+    else:  # (not has_explicit_domain_info)
+        # this branch covers distributed datasets (directly written by Cholla) and
+        # older concatenated files.
+        #
+        # historically, when the dataset is concatenated (in post-processing),
+        # the "nprocs" hdf5 attribute has been dropped
+        blockid_location_arr = _infer_blockid_location_arr(
+            fname_template=inferred_fname_template,
+            global_dims=f.attrs["dims"].astype("=i8"),
+            arr_shape=f.attrs.get("nprocs", np.array([1, 1, 1])).astype("=i8"),
+        )
+        consolidated_data = blockid_location_arr.size == 1
+
+        def _get_common_idx():
+            return (slice(None), slice(None), slice(None))
+
+        field_idx_map = defaultdict(_get_common_idx)
+
+    # STEP 4: Finalize the fname template
+    # ===================================
+    if consolidated_data:
+        fname_template = filename
+    elif cur_filename_suffix != 0:
+        raise ValueError(  # mostly just a sanity check!
+            "filename passed to yt.load for a distributed cholla dataset must "
+            "end in '.0'"
+        )
+    else:
+        fname_template = inferred_fname_template
+
+    mapping = _BlockDiskMapping(
+        fname_template=fname_template,
+        h5_group="./" if flat_structure else "field",
+        idx_map=field_idx_map,
+    )
+    return blockid_location_arr, mapping
+
+
+def _infer_fname_template(filename: str) -> tuple[str, int | None]:
+    """Infers the template for all Cholla data-files based on the filename
+    passed to ``yt.load``.
+
+    string from the process-id suffix, and returns both parts in a 2-tuple.
+
+    There are 2 conventions for the names of Cholla's data-files:
+      1. "root.h5.{blockid}" is the standard format Cholla uses when writing
+         files storing a single snapshot. Each MPI-rank will write a separate
+         file and replace ``{blockid}`` with MPI-rank (Modern Cholla versions
+         without MPI replace ``{blockid}`` with ``0``)
+      2. "root.h5": is the standard format used by Cholla's concatenation
+         scripts (older versions of Cholla without MPI also used this format
+         to name outputs)
+
+    Returns
+    -------
+    template: str
+        The path to the file containing a blockid is given by calling
+        ``template.format(blockid=<blockid>)``. (This will work whether
+        all blocks are stored in 1 file or blocks are distributed across
+        files)
+    cur_blockid_suffix: int or None
+        The blockid specified in the suffix of ``filename``. If there isn't a
+        suffix, then this will be None.
+    """
+
+    # at this time, we expect the suffix to be the minimum number of characters
+    # that are necessary to represent the process id. For flexibility, we will
+    # allow extra zero-padding
+
+    _dir, _base = os.path.split(filename)
+    match _base.rpartition("."):
+        case ("", ".", _):  # Cholla never writes a file like this
+            raise ValueError(
+                f"1st character in {filename!r} is the only '.' in the file's name"
+            )
+        case (prefix, ".", suffix) if suffix.isdecimal():
+            return os.path.join(_dir, f"{prefix}.{{blockid}}"), int(suffix)
+        case _:
+            return (filename, None)
diff --git a/yt/frontends/cholla/tests/test_cholla_load.py b/yt/frontends/cholla/tests/test_cholla_load.py
new file mode 100644
index 00000000000..ddf004649b8
--- /dev/null
+++ b/yt/frontends/cholla/tests/test_cholla_load.py
@@ -0,0 +1,336 @@
+"""
+Tests the Cholla-frontend by generating synthetic datasets
+
+In more detail, Cholla has had a couple of historical data formats.
+- see the ``ChollaDataFmt`` enumeration for a description of each format
+- we choose to use synthetic datasets in case there is a future where the
+  data-format changes again and we want to maintain backwards compatability
+  without uploading more and more sample Cholla datasets
+- the logic for creating synthetic datasets is adapted from similar logic
+  used for testing the ``cholla_utils`` python package
+  - the ``cholla_utils`` python package is developed within the Cholla
+    repository that provides a light-weight (compared to yt) interface for
+    loading datasets
+  - ideally, we will try to keep the testing logic relatively consistent
+    between the 2 packages
+"""
+
+import enum
+import typing
+from collections.abc import Sequence
+
+import numpy as np
+import pytest
+
+import yt
+from yt.frontends.cholla.misc import _CachedH5Openner
+from yt.testing import requires_module
+
+# this is a hacky workaround to get h5py.File to work in annotations. We can probably
+# address this issue more robustly by directly modifying yt.utilities.on_demand_imports
+if typing.TYPE_CHECKING:
+    import h5py
+else:
+    from yt.utilities.on_demand_imports import _h5py as h5py
+
+
+class ChollaDataFmt(enum.Enum):
+    """Describes the format of the grid data"""
+
+    # the format directly written by Cholla (each block is written to a separate file)
+    DISTRIBUTED = (enum.auto(), False)
+    # Cholla's older concatenation scripts (that are no longer available), would
+    # combine all blocks into 1 giant block. The resulting generally appears as if
+    # Cholla was run with a single process that evolved a single giant block of data
+    LEGACY_CONCAT = (enum.auto(), True)
+    # Cholla's newer concatenation scripts combine all of the data into a single file,
+    # but retains the original block structure
+    CONCAT = (enum.auto(), True)
+
+    def __new__(cls, value: typing.Any, is_single_file: bool):
+        # based on example from docs
+        if isinstance(value, enum.auto):
+            value = len(cls.__members__) + 1
+
+        obj = object.__new__(cls)
+        obj._value_ = value
+        obj.is_single_file = is_single_file
+        return obj
+
+    def __repr__(self):
+        # based on example from docs (when we want to hide the underlying value)
+        return f"<{self.__class__.__name__}, {self.name}>"
+
+
+def _generate_array(shape: tuple[int, ...], *, start: int = 0):
+    # used to generate an array of unique values of a given shape
+    size = np.prod(shape)
+    return np.arange(start, start + size).reshape(shape).astype("f8")
+
+
+def _add_standard_header_attrs(f: h5py.File):
+    # we could customize this quite a bit... (but, that seems unnecessary)
+
+    # fields that must be handled separately:
+    # - particle & field headers always have:
+    #   * "bounds", "dx", "domain" (handled by _write_domain_prop_attrs)
+    #   * "dims", "n_fields", "nprocs"
+    #   * sometimes: "offset", "dims_local" (depends on the style)
+    # - particle headers may also have:
+    #   * "dt_particles" (this can be different from "dt")
+    #   * "t_particles" (as far as I can tell, this is the same as "t")
+    #   * sometimes: "n_particles_local" (depends on the file-style)
+
+    f.attrs["Git Commit Hash"] = np.array(["<garbage>"], dtype=object)
+    f.attrs["Macro Flags"] = np.array(["<garbage>"], dtype=object)
+    f.attrs["cholla"] = np.array([""], dtype=object)
+    f.attrs["density_unit"] = np.array([6.76810999e-32], dtype="f8")
+    f.attrs["energy_unit"] = np.array([6.47112563e-10], dtype="f8")
+    f.attrs["gamma"] = np.array([1.66666667], dtype="f8")
+    f.attrs["length_unit"] = np.array([3.08567758e21], dtype="f8")
+    f.attrs["mass_unit"] = np.array([1.98847e33], dtype="f8")
+    f.attrs["time_unit"] = np.array([3.15569e10], dtype="f8")
+    f.attrs["velocity_unit"] = np.array([9.77813911e10], dtype="f8")
+    f.attrs["n_step"] = np.array([0], dtype="i4")
+    f.attrs["t"] = np.array([0.0], dtype="f8")
+    f.attrs["dt"] = np.array([0.0], dtype="f8")
+
+
+def _write_domain_prop_attrs(f: h5py.File, global_shape: tuple[int, ...]):
+    # we could customize this quite a bit... (but, that seems unnecessary)
+
+    dx = np.array([1.0 for _ in global_shape])
+    domain = dx * global_shape
+    f.attrs["dx"] = dx
+    f.attrs["domain"] = domain
+    f.attrs["bounds"] = -0.25 * domain
+
+
+def _generate_files(
+    root_path: str,
+    nprocs: Sequence[int],
+    global_shape: Sequence[int],
+    *,
+    data_format: ChollaDataFmt | None = None,
+    field_names: Sequence[str] | None = None,
+) -> tuple[dict[str, np.ndarray], str]:
+    """
+    Generates file(s) that emulate a dataset holding results from a
+    hypothetical Cholla simulation
+
+    Parameters
+    ----------
+    root_path
+        Prefix of the path where the dataset is written
+    nprocs
+        Specifies the number of processes that the hypothetical Cholla
+        simulation used
+    global_shape
+        Specifies the global shape of each field in the hypothetical
+        Cholla simulation (each MPI process would be responsible for evolving
+        a subsection of the global shape).
+    data_format
+        Specifies the format of the dataset
+    field_names
+        The names of the fields to include in the output dataset
+
+    Returns
+    -------
+    global_arrays: dict[str, np.ndarray]
+        A dictionary of the global concatenated fields
+    root_fname: str
+        Path to one of the files. For distributed datasets this is
+        always process 0.
+    """
+    # check and sanitize arguments
+
+    if isinstance(field_names, str):
+        raise TypeError("field_names can't be a string")
+    elif field_names is None:
+        field_names = ["density"]
+    elif len(field_names) != len(set(field_names)):
+        raise ValueError("field_names must hold unique names")
+
+    if len(nprocs) != 3:
+        raise ValueError("nprocs must be a 3 element array")
+    elif any(int(e) != e for e in nprocs):
+        raise ValueError("nprocs must contain integers")
+    elif any(e < 1 for e in nprocs):
+        raise ValueError("nprocs must contain positive values")
+    else:
+        nprocs = tuple(int(e) for e in nprocs)
+
+    if len(global_shape) != 3:
+        raise ValueError("global_shape must be a 3 element array")
+    elif any(int(e) != e for e in global_shape):
+        raise ValueError("global_shape must contain integers")
+    elif any(e < 1 for e in global_shape):
+        raise ValueError("global_shape must contain positive values")
+    else:
+        global_shape = tuple(int(e) for e in global_shape)
+
+    # infer the shape of each block (and perform a sanity check)
+    cc_block_shape, remainder = np.divmod(global_shape, nprocs)
+    if (cc_block_shape == 0).any():
+        raise ValueError(
+            "nprocs contains a value exceeding the corrsponding length in global_shape"
+        )
+    elif (remainder != 0).any():
+        raise ValueError(
+            "at least 1 element of global_shape isn't evenly divisible by nprocs"
+        )
+
+    if data_format is ChollaDataFmt.LEGACY_CONCAT:
+        cc_block_shape = np.array(global_shape)
+        nprocs = (1, 1, 1)
+
+    # construct the global arrays
+    global_arrays = {}
+    start_offset = 0
+    for name in field_names:
+        arr = _generate_array(global_shape, start=start_offset)
+        global_arrays[name] = arr
+        start_offset = arr.max() + 1
+
+    # prepare to creating the files
+    blockid_location_arr = np.arange(np.prod(nprocs)).reshape(nprocs)
+    match data_format:
+        case ChollaDataFmt.DISTRIBUTED:
+            fname_template = f"{root_path}.h5.{{blockid:d}}"
+            field_grp = "/"
+            field_dset_shape = tuple(cc_block_shape)
+        case ChollaDataFmt.LEGACY_CONCAT:
+            fname_template = f"{root_path}.h5"
+            field_grp = "/"
+            field_dset_shape = global_shape
+        case ChollaDataFmt.CONCAT:
+            fname_template = f"{root_path}.h5"
+            field_grp = "field"
+            field_dset_shape = (np.prod(nprocs),) + tuple(cc_block_shape)
+        case _:
+            raise RuntimeError(f"unknown data format: {data_format}")
+
+    # actually create the file
+    with _CachedH5Openner(mode="w") as h5_context_manager:
+        for idx3d, blockid in np.ndenumerate(blockid_location_arr):
+            f = h5_context_manager.open_fh(fname_template.format(blockid=blockid))
+
+            # selects the region of a global arrays relevant for the current block
+            src_slc = (
+                slice(idx3d[0] * cc_block_shape[0], (idx3d[0] + 1) * cc_block_shape[0]),
+                slice(idx3d[1] * cc_block_shape[1], (idx3d[1] + 1) * cc_block_shape[1]),
+                slice(idx3d[2] * cc_block_shape[2], (idx3d[2] + 1) * cc_block_shape[2]),
+            )
+
+            # determine the region of the output dataset relevant for the current block
+            # and write any extra output-specific metadata
+            match data_format:
+                case ChollaDataFmt.DISTRIBUTED:
+                    dst_idx = (...,)
+
+                    f.attrs["offset"] = np.array([int(slc.start) for slc in src_slc])
+                    f.attrs["dims_local"] = cc_block_shape
+
+                case ChollaDataFmt.LEGACY_CONCAT:
+                    dst_idx = (...,)
+
+                case ChollaDataFmt.CONCAT:
+                    dst_idx = (blockid, ...)
+
+                    if blockid == 0:
+                        f.create_group("domain")
+                        f["domain"]["blockid_location_arr"] = blockid_location_arr
+                        f["domain"]["stored_blockid_list"] = np.arange(
+                            blockid_location_arr.size
+                        )
+
+                        f.create_group("field")
+
+                case _:
+                    raise RuntimeError(f"unknown data format: {data_format}")
+
+            if (blockid == 0) or (not data_format.is_single_file):
+                # write some common metadata
+                f.attrs["dims"] = np.array(global_shape)
+                f.attrs["nprocs"] = np.array(nprocs)
+                f.attrs["n_fields"] = np.array([len(field_names)])
+                _add_standard_header_attrs(f)
+                _write_domain_prop_attrs(f, global_shape=global_shape)
+                # create the datasets that will hold the fields
+                for field_name in field_names:
+                    f[field_grp].create_dataset(
+                        name=field_name, shape=field_dset_shape, dtype="f8"
+                    )
+
+            # actually record the field data
+            for field_name in field_names:
+                f[field_grp][field_name][dst_idx] = global_arrays[field_name][src_slc]
+    return global_arrays, fname_template.format(blockid=0)
+
+
+_CASES = [
+    {
+        "nprocs": (1, 1, 1),
+        "global_shape": (8, 8, 8),
+        "data_format": ChollaDataFmt.DISTRIBUTED,
+    },
+    {
+        "nprocs": (2, 2, 2),
+        "global_shape": (4, 16, 8),
+        "data_format": ChollaDataFmt.DISTRIBUTED,
+    },
+    {
+        "nprocs": (1, 4, 2),
+        "global_shape": (4, 16, 8),
+        "data_format": ChollaDataFmt.DISTRIBUTED,
+    },
+    # there no point going through lots of varieties of ChollaDataFmt.LEGACY_CONCAT
+    # -> the files always look very similar to each other
+    {
+        "nprocs": (2, 2, 2),
+        "global_shape": (4, 16, 8),
+        "data_format": ChollaDataFmt.LEGACY_CONCAT,
+    },
+    # it's definitely worth checking ChollaDataFmt.LEGACY_CONCAT when there is only
+    # 1 process as well as when there are multiple processes
+    {
+        "nprocs": (1, 1, 1),
+        "global_shape": (8, 8, 8),
+        "data_format": ChollaDataFmt.CONCAT,
+    },
+    {
+        "nprocs": (2, 2, 2),
+        "global_shape": (4, 16, 8),
+        "data_format": ChollaDataFmt.CONCAT,
+    },
+]
+
+
+@requires_module("h5py")
+@pytest.mark.parametrize("kwargs", _CASES)
+def test_load(tmp_path, kwargs):
+    # generate a synthetic dataset and make sure that the loaded values are correct
+
+    # Step 1: create the synthetic dataset
+    # -> global_arr is a dict that maps each field_name to an array that holding the
+    #    fully concatenated array that holds the expected field values
+    # -> root_fname is the path that should be passed to yt.load
+    global_arr, root_fname = _generate_files(
+        root_path=str(tmp_path / "0"), field_names=["density", "momentum_x"], **kwargs
+    )
+
+    # Step 2: load the dataset and build a covering grid spanning the full domain
+    # -> accessing a field from the covering grid should return a field with the same
+    #    shape as accessing the corresponding field in global_arr
+    ds = yt.load(root_fname)
+    grid = ds.covering_grid(
+        level=0, left_edge=ds.domain_left_edge, dims=ds.domain_dimensions
+    )
+
+    # Step 3: actually compare the loaded field values against our expectations
+    for field_name, expected_arr in global_arr.items():
+        np.testing.assert_equal(
+            actual=grid["cholla", field_name].ndview,
+            desired=expected_arr,
+            err_msg=f"there was an issue with loading the {field_name!r} field",
+        )
diff --git a/yt/frontends/cholla/tests/test_outputs.py b/yt/frontends/cholla/tests/test_outputs.py
index 2e77fa2f4f5..c8b46fdbb99 100644
--- a/yt/frontends/cholla/tests/test_outputs.py
+++ b/yt/frontends/cholla/tests/test_outputs.py
@@ -1,3 +1,7 @@
+"""
+Tests the Cholla frontend by using a real dataset
+"""
+
 from numpy.testing import assert_equal
 
 import yt