diff --git a/nose_ignores.txt b/nose_ignores.txt index 6db1089c71f..c3db3d70c05 100644 --- a/nose_ignores.txt +++ b/nose_ignores.txt @@ -52,3 +52,4 @@ --ignore-file=test_cf_radial_pytest\.py --ignore-file=test_data_containers\.py --ignore-file=test_fields_pytest\.py +--ignore-file=test_cholla_load\.py diff --git a/yt/frontends/cholla/data_structures.py b/yt/frontends/cholla/data_structures.py index 237e345a498..2524cb9e757 100644 --- a/yt/frontends/cholla/data_structures.py +++ b/yt/frontends/cholla/data_structures.py @@ -9,16 +9,17 @@ from yt.geometry.api import Geometry from yt.geometry.grid_geometry_handler import GridIndex from yt.utilities.logger import ytLogger as mylog -from yt.utilities.on_demand_imports import _h5py as h5py +from yt.utilities.on_demand_imports import _h5py from .fields import ChollaFieldInfo +from .misc import _determine_data_layout class ChollaGrid(AMRGridPatch): _id_offset = 0 - def __init__(self, id, index, level, dims): - super().__init__(id, filename=index.index_filename, index=index) + def __init__(self, id, index, level, dims, filename): + super().__init__(id, filename=filename, index=index) self.Parent = None self.Children = [] self.Level = level @@ -27,6 +28,7 @@ def __init__(self, id, index, level, dims): class ChollaHierarchy(GridIndex): grid = ChollaGrid + _grid_chunksize = 1 def __init__(self, ds, dataset_type="cholla"): self.dataset_type = dataset_type @@ -39,27 +41,53 @@ def __init__(self, ds, dataset_type="cholla"): super().__init__(ds, dataset_type) def _detect_output_fields(self): - with h5py.File(self.index_filename, mode="r") as h5f: - self.field_list = [("cholla", k) for k in h5f.keys()] + with _h5py.File(self.index_filename, mode="r") as h5f: + grp = h5f.get("field", h5f) + self.field_list = [("cholla", k) for k in grp.keys()] def _count_grids(self): - self.num_grids = 1 + with _h5py.File(self.index_filename, "r") as f: + self._blockid_location_arr, self._block_mapping = _determine_data_layout(f) + self.num_grids = self._blockid_location_arr.size def _parse_index(self): - self.grid_left_edge[0][:] = self.ds.domain_left_edge[:] - self.grid_right_edge[0][:] = self.ds.domain_right_edge[:] - self.grid_dimensions[0][:] = self.ds.domain_dimensions[:] - self.grid_particle_count[0][0] = 0 - self.grid_levels[0][0] = 0 + self.grids = np.empty(self.num_grids, dtype="object") + + shape_arr = np.array(self._blockid_location_arr.shape) + dims_local = (self.ds.domain_dimensions[:] / shape_arr).astype("=i8") + + for idx3D, blockid in np.ndenumerate(self._blockid_location_arr): + idx3D_arr = np.array(idx3D) + left_frac = idx3D_arr / shape_arr + right_frac = (1 + idx3D_arr) / shape_arr + + level = 0 + + self.grids[blockid] = self.grid( + blockid, + index=self, + level=level, + dims=dims_local, + filename=self._block_mapping.fname_template.format(blockid=blockid), + ) + + self.grid_left_edge[blockid, :] = left_frac + self.grid_right_edge[blockid, :] = right_frac + self.grid_dimensions[blockid, :] = dims_local + self.grid_levels[blockid, 0] = level + self.grid_particle_count[blockid, 0] = 0 + + slope = self.ds.domain_width / self.ds.arr(np.ones(3), "code_length") + self.grid_left_edge = self.grid_left_edge * slope + self.ds.domain_left_edge + self.grid_right_edge = self.grid_right_edge * slope + self.ds.domain_left_edge + self.max_level = 0 def _populate_grid_objects(self): - self.grids = np.empty(self.num_grids, dtype="object") for i in range(self.num_grids): - g = self.grid(i, self, self.grid_levels.flat[i], self.grid_dimensions[i]) + g = self.grids[i] g._prepare_grid() g._setup_dx() - self.grids[i] = g class ChollaDataset(Dataset): @@ -100,7 +128,7 @@ def _set_code_unit_attributes(self): setdefaultattr(self, key, self.quan(1, unit)) def _parse_parameter_file(self): - with h5py.File(self.parameter_filename, mode="r") as h5f: + with _h5py.File(self.parameter_filename, mode="r") as h5f: attrs = h5f.attrs self.parameters = dict(attrs.items()) self.domain_left_edge = attrs["bounds"][:].astype("=f8") @@ -108,7 +136,7 @@ def _parse_parameter_file(self): "=f8" ) self.dimensionality = len(attrs["dims"][:]) - self.domain_dimensions = attrs["dims"][:].astype("=f8") + self.domain_dimensions = attrs["dims"][:].astype("=i8") self.current_time = attrs["t"][:] self._periodicity = tuple(attrs.get("periodicity", (False, False, False))) self.gamma = attrs.get("gamma", 5.0 / 3.0) @@ -167,7 +195,7 @@ def _is_valid(cls, filename: str, *args, **kwargs) -> bool: return False try: - fileh = h5py.File(filename, mode="r") + fileh = _h5py.File(filename, mode="r") except OSError: return False diff --git a/yt/frontends/cholla/io.py b/yt/frontends/cholla/io.py index 0d216864076..af5f280c4af 100644 --- a/yt/frontends/cholla/io.py +++ b/yt/frontends/cholla/io.py @@ -1,7 +1,6 @@ -import numpy as np - from yt.utilities.io_handler import BaseIOHandler -from yt.utilities.on_demand_imports import _h5py as h5py + +from .misc import _CachedH5Openner class ChollaIOHandler(BaseIOHandler): @@ -14,22 +13,28 @@ def _read_particle_coords(self, chunks, ptf): def _read_particle_fields(self, chunks, ptf, selector): raise NotImplementedError - def _read_fluid_selection(self, chunks, selector, fields, size): - data = {} - for field in fields: - data[field] = np.empty(size, dtype="float64") - - with h5py.File(self.ds.parameter_filename, "r") as fh: - ind = 0 + def io_iter(self, chunks, fields): + # this is loosely inspired by the implementation used for Enzo/Enzo-E + # - those other implementations use the lower-level hdf5 interface. Unclear + # whether that affords any advantages... + mapper = self.ds.index._block_mapping + with _CachedH5Openner(mode="r") as h5_context_manager: for chunk in chunks: - for grid in chunk.objs: - nd = 0 + for obj in chunk.objs: + if obj.filename is None: # unclear when this case arises... + continue + + # ensure the file containing data for obj is open + fh = h5_context_manager.open_fh(obj.filename) + + # access the HDF5 group containing the datasets of field values + grp = fh[mapper.h5_group] + # get the indices in a generic dataset that correspond to obj.id + idx = mapper.idx_map[obj.id] + for field in fields: ftype, fname = field - values = fh[fname][:].astype("=f8") - nd = grid.select(selector, values, data[field], ind) - ind += nd - return data + yield field, obj, grp[fname][idx].astype("=f8") def _read_chunk_data(self, chunk, fields): raise NotImplementedError diff --git a/yt/frontends/cholla/misc.py b/yt/frontends/cholla/misc.py index e69de29bb2d..75d3b55551d 100644 --- a/yt/frontends/cholla/misc.py +++ b/yt/frontends/cholla/misc.py @@ -0,0 +1,214 @@ +import os +import typing +from collections import defaultdict +from collections.abc import Mapping +from dataclasses import dataclass + +import numpy as np + +# this is a hacky workaround to get _h5py.File to work in annotations. We can probably +# address this issue more robustly by directly modifying yt.utilities.on_demand_imports +if typing.TYPE_CHECKING: + import h5py as _h5py +else: + from yt.utilities.on_demand_imports import _h5py + + +class _CachedH5Openner: + """ + A simple context manager that helps implement the idiom where data is read + from (or written to) one or more HDF5 and we want to wait to close the + previous HDF5 file until it is time to open a new file. This lets us avoid + overhead in cases where we would close and then immediately reopen the + same file. + + By using a context manager, we're able to properly cleanup in the event + that an exception occurs. + """ + + def __init__(self, mode="r"): + self._filename = None + self._fh = None + self._mode = mode + + def open_fh(self, filename): + if self._filename == filename: + return self._fh + if self._fh is not None: + self._fh.close() + self._fh = _h5py.File(filename, self._mode) + self._filename = filename + return self._fh + + def __enter__(self): + return self + + def __exit__(self, exc, value, tb): + if self._fh is not None: + self._fh.close() + + +@dataclass(kw_only=True, slots=True, frozen=True) +class _BlockDiskMapping: + """Contains info for mapping blockids to locations in hdf5 files + + Notes + ----- + At the time of writing, this is primarily meant to provide a mapping for + field data. In the future, we may initialize a separate instance to + provide a mapping for particle data + """ + + # ``fname_template.format(blockid=...)`` produces the file containing blockid (this + # can properly handle cases where all blocks are stored in a single file) + fname_template: str + # hdf5 group containing the field data + h5_group: str + # maps blockid to an index that select all associated data from a field-dataset + idx_map: Mapping[int, tuple[int | slice, ...]] + + +def _infer_blockid_location_arr(fname_template, global_dims, arr_shape): + # used when hdf5 files don't have an explicit "domain" group + blockid_location_arr = np.empty(shape=tuple(int(e) for e in arr_shape), dtype="i8") + if blockid_location_arr.size == 1: + # primarily intended to handle the result of older concatenation scripts (it + # also handles the case when only a single block is used, which is okay) + blockid_location_arr[0, 0, 0] = 0 + else: # handle distributed cholla datasets + local_dims, rem = np.divmod(global_dims, blockid_location_arr.shape) + assert np.all(rem == 0) and np.all(local_dims > 0) + for blockid in range(0, blockid_location_arr.size): + with _h5py.File(fname_template.format(blockid=blockid), "r") as f: + tmp, rem = np.divmod(f.attrs["offset"][:], local_dims) + assert np.all(rem == 0) # sanity check + idx3D = tuple(int(e) for e in tmp) + blockid_location_arr[idx3D] = blockid + return blockid_location_arr + + +def _determine_data_layout(f: _h5py.File) -> tuple[np.ndarray, _BlockDiskMapping]: + """Determine the data layout of the snapshot + + The premise is that the basic different data formats shouldn't + matter outside of this function.""" + filename = f.filename + + # STEP 1: infer the template for all Cholla data-files by inspecting filename + # =========================================================================== + # There are 2 conventions for the names of Cholla's data-files: + # 1. "root.h5.{blockid}" is the standard format Cholla uses when writing files + # storing a single snapshot. Each MPI-rank will write a separate file and + # replace ``{blockid}`` with MPI-rank (Modern Cholla versions without MPI + # replace ``{blockid}`` with ``0``) + # 2. "root.h5": is the standard format used by Cholla's concatenation scripts + # (older versions of Cholla without MPI also used this format to name outputs) + inferred_fname_template, cur_filename_suffix = _infer_fname_template(filename) + + # STEP 2: Check whether the hdf5 file has a flat structure + # ======================================================== + # Historically, we would always store datasets directly in the root group of the + # data file. More recent concatenation scripts store no data in groups. + flat_structure = any(not isinstance(elem, _h5py.Group) for elem in f.values()) + + # STEP 3: Extract basic domain info information from the file(s) + # ============================================================== + has_explicit_domain_info = "domain" in f + if has_explicit_domain_info: + # this branch primarily handles concatenated files made with newer logic + blockid_location_arr = f["domain/blockid_location_arr"][...] + field_idx_map = { + int(blockid): (i, slice(None), slice(None), slice(None)) + for i, blockid in enumerate(f["domain/stored_blockid_list"][...]) + } + consolidated_data = len(field_idx_map) == blockid_location_arr.size + if not consolidated_data: + # in the near future, we may support one of the 2 cases: + # > if (flat_structure): + # > _common_idx = (slice(None), slice(None), slice(None)) + # > else: + # > _common_idx = (0, slice(None), slice(None), slice(None)) + # > field_idx_map = defaultdict(lambda arg=_common_idx: arg) + raise ValueError( + "no support for reading Cholla datasets where data is distributed " + "among files that explicitly encode domain info." + ) + else: # (not has_explicit_domain_info) + # this branch covers distributed datasets (directly written by Cholla) and + # older concatenated files. + # + # historically, when the dataset is concatenated (in post-processing), + # the "nprocs" hdf5 attribute has been dropped + blockid_location_arr = _infer_blockid_location_arr( + fname_template=inferred_fname_template, + global_dims=f.attrs["dims"].astype("=i8"), + arr_shape=f.attrs.get("nprocs", np.array([1, 1, 1])).astype("=i8"), + ) + consolidated_data = blockid_location_arr.size == 1 + + def _get_common_idx(): + return (slice(None), slice(None), slice(None)) + + field_idx_map = defaultdict(_get_common_idx) + + # STEP 4: Finalize the fname template + # =================================== + if consolidated_data: + fname_template = filename + elif cur_filename_suffix != 0: + raise ValueError( # mostly just a sanity check! + "filename passed to yt.load for a distributed cholla dataset must " + "end in '.0'" + ) + else: + fname_template = inferred_fname_template + + mapping = _BlockDiskMapping( + fname_template=fname_template, + h5_group="./" if flat_structure else "field", + idx_map=field_idx_map, + ) + return blockid_location_arr, mapping + + +def _infer_fname_template(filename: str) -> tuple[str, int | None]: + """Infers the template for all Cholla data-files based on the filename + passed to ``yt.load``. + + string from the process-id suffix, and returns both parts in a 2-tuple. + + There are 2 conventions for the names of Cholla's data-files: + 1. "root.h5.{blockid}" is the standard format Cholla uses when writing + files storing a single snapshot. Each MPI-rank will write a separate + file and replace ``{blockid}`` with MPI-rank (Modern Cholla versions + without MPI replace ``{blockid}`` with ``0``) + 2. "root.h5": is the standard format used by Cholla's concatenation + scripts (older versions of Cholla without MPI also used this format + to name outputs) + + Returns + ------- + template: str + The path to the file containing a blockid is given by calling + ``template.format(blockid=)``. (This will work whether + all blocks are stored in 1 file or blocks are distributed across + files) + cur_blockid_suffix: int or None + The blockid specified in the suffix of ``filename``. If there isn't a + suffix, then this will be None. + """ + + # at this time, we expect the suffix to be the minimum number of characters + # that are necessary to represent the process id. For flexibility, we will + # allow extra zero-padding + + _dir, _base = os.path.split(filename) + match _base.rpartition("."): + case ("", ".", _): # Cholla never writes a file like this + raise ValueError( + f"1st character in {filename!r} is the only '.' in the file's name" + ) + case (prefix, ".", suffix) if suffix.isdecimal(): + return os.path.join(_dir, f"{prefix}.{{blockid}}"), int(suffix) + case _: + return (filename, None) diff --git a/yt/frontends/cholla/tests/test_cholla_load.py b/yt/frontends/cholla/tests/test_cholla_load.py new file mode 100644 index 00000000000..ddf004649b8 --- /dev/null +++ b/yt/frontends/cholla/tests/test_cholla_load.py @@ -0,0 +1,336 @@ +""" +Tests the Cholla-frontend by generating synthetic datasets + +In more detail, Cholla has had a couple of historical data formats. +- see the ``ChollaDataFmt`` enumeration for a description of each format +- we choose to use synthetic datasets in case there is a future where the + data-format changes again and we want to maintain backwards compatability + without uploading more and more sample Cholla datasets +- the logic for creating synthetic datasets is adapted from similar logic + used for testing the ``cholla_utils`` python package + - the ``cholla_utils`` python package is developed within the Cholla + repository that provides a light-weight (compared to yt) interface for + loading datasets + - ideally, we will try to keep the testing logic relatively consistent + between the 2 packages +""" + +import enum +import typing +from collections.abc import Sequence + +import numpy as np +import pytest + +import yt +from yt.frontends.cholla.misc import _CachedH5Openner +from yt.testing import requires_module + +# this is a hacky workaround to get h5py.File to work in annotations. We can probably +# address this issue more robustly by directly modifying yt.utilities.on_demand_imports +if typing.TYPE_CHECKING: + import h5py +else: + from yt.utilities.on_demand_imports import _h5py as h5py + + +class ChollaDataFmt(enum.Enum): + """Describes the format of the grid data""" + + # the format directly written by Cholla (each block is written to a separate file) + DISTRIBUTED = (enum.auto(), False) + # Cholla's older concatenation scripts (that are no longer available), would + # combine all blocks into 1 giant block. The resulting generally appears as if + # Cholla was run with a single process that evolved a single giant block of data + LEGACY_CONCAT = (enum.auto(), True) + # Cholla's newer concatenation scripts combine all of the data into a single file, + # but retains the original block structure + CONCAT = (enum.auto(), True) + + def __new__(cls, value: typing.Any, is_single_file: bool): + # based on example from docs + if isinstance(value, enum.auto): + value = len(cls.__members__) + 1 + + obj = object.__new__(cls) + obj._value_ = value + obj.is_single_file = is_single_file + return obj + + def __repr__(self): + # based on example from docs (when we want to hide the underlying value) + return f"<{self.__class__.__name__}, {self.name}>" + + +def _generate_array(shape: tuple[int, ...], *, start: int = 0): + # used to generate an array of unique values of a given shape + size = np.prod(shape) + return np.arange(start, start + size).reshape(shape).astype("f8") + + +def _add_standard_header_attrs(f: h5py.File): + # we could customize this quite a bit... (but, that seems unnecessary) + + # fields that must be handled separately: + # - particle & field headers always have: + # * "bounds", "dx", "domain" (handled by _write_domain_prop_attrs) + # * "dims", "n_fields", "nprocs" + # * sometimes: "offset", "dims_local" (depends on the style) + # - particle headers may also have: + # * "dt_particles" (this can be different from "dt") + # * "t_particles" (as far as I can tell, this is the same as "t") + # * sometimes: "n_particles_local" (depends on the file-style) + + f.attrs["Git Commit Hash"] = np.array([""], dtype=object) + f.attrs["Macro Flags"] = np.array([""], dtype=object) + f.attrs["cholla"] = np.array([""], dtype=object) + f.attrs["density_unit"] = np.array([6.76810999e-32], dtype="f8") + f.attrs["energy_unit"] = np.array([6.47112563e-10], dtype="f8") + f.attrs["gamma"] = np.array([1.66666667], dtype="f8") + f.attrs["length_unit"] = np.array([3.08567758e21], dtype="f8") + f.attrs["mass_unit"] = np.array([1.98847e33], dtype="f8") + f.attrs["time_unit"] = np.array([3.15569e10], dtype="f8") + f.attrs["velocity_unit"] = np.array([9.77813911e10], dtype="f8") + f.attrs["n_step"] = np.array([0], dtype="i4") + f.attrs["t"] = np.array([0.0], dtype="f8") + f.attrs["dt"] = np.array([0.0], dtype="f8") + + +def _write_domain_prop_attrs(f: h5py.File, global_shape: tuple[int, ...]): + # we could customize this quite a bit... (but, that seems unnecessary) + + dx = np.array([1.0 for _ in global_shape]) + domain = dx * global_shape + f.attrs["dx"] = dx + f.attrs["domain"] = domain + f.attrs["bounds"] = -0.25 * domain + + +def _generate_files( + root_path: str, + nprocs: Sequence[int], + global_shape: Sequence[int], + *, + data_format: ChollaDataFmt | None = None, + field_names: Sequence[str] | None = None, +) -> tuple[dict[str, np.ndarray], str]: + """ + Generates file(s) that emulate a dataset holding results from a + hypothetical Cholla simulation + + Parameters + ---------- + root_path + Prefix of the path where the dataset is written + nprocs + Specifies the number of processes that the hypothetical Cholla + simulation used + global_shape + Specifies the global shape of each field in the hypothetical + Cholla simulation (each MPI process would be responsible for evolving + a subsection of the global shape). + data_format + Specifies the format of the dataset + field_names + The names of the fields to include in the output dataset + + Returns + ------- + global_arrays: dict[str, np.ndarray] + A dictionary of the global concatenated fields + root_fname: str + Path to one of the files. For distributed datasets this is + always process 0. + """ + # check and sanitize arguments + + if isinstance(field_names, str): + raise TypeError("field_names can't be a string") + elif field_names is None: + field_names = ["density"] + elif len(field_names) != len(set(field_names)): + raise ValueError("field_names must hold unique names") + + if len(nprocs) != 3: + raise ValueError("nprocs must be a 3 element array") + elif any(int(e) != e for e in nprocs): + raise ValueError("nprocs must contain integers") + elif any(e < 1 for e in nprocs): + raise ValueError("nprocs must contain positive values") + else: + nprocs = tuple(int(e) for e in nprocs) + + if len(global_shape) != 3: + raise ValueError("global_shape must be a 3 element array") + elif any(int(e) != e for e in global_shape): + raise ValueError("global_shape must contain integers") + elif any(e < 1 for e in global_shape): + raise ValueError("global_shape must contain positive values") + else: + global_shape = tuple(int(e) for e in global_shape) + + # infer the shape of each block (and perform a sanity check) + cc_block_shape, remainder = np.divmod(global_shape, nprocs) + if (cc_block_shape == 0).any(): + raise ValueError( + "nprocs contains a value exceeding the corrsponding length in global_shape" + ) + elif (remainder != 0).any(): + raise ValueError( + "at least 1 element of global_shape isn't evenly divisible by nprocs" + ) + + if data_format is ChollaDataFmt.LEGACY_CONCAT: + cc_block_shape = np.array(global_shape) + nprocs = (1, 1, 1) + + # construct the global arrays + global_arrays = {} + start_offset = 0 + for name in field_names: + arr = _generate_array(global_shape, start=start_offset) + global_arrays[name] = arr + start_offset = arr.max() + 1 + + # prepare to creating the files + blockid_location_arr = np.arange(np.prod(nprocs)).reshape(nprocs) + match data_format: + case ChollaDataFmt.DISTRIBUTED: + fname_template = f"{root_path}.h5.{{blockid:d}}" + field_grp = "/" + field_dset_shape = tuple(cc_block_shape) + case ChollaDataFmt.LEGACY_CONCAT: + fname_template = f"{root_path}.h5" + field_grp = "/" + field_dset_shape = global_shape + case ChollaDataFmt.CONCAT: + fname_template = f"{root_path}.h5" + field_grp = "field" + field_dset_shape = (np.prod(nprocs),) + tuple(cc_block_shape) + case _: + raise RuntimeError(f"unknown data format: {data_format}") + + # actually create the file + with _CachedH5Openner(mode="w") as h5_context_manager: + for idx3d, blockid in np.ndenumerate(blockid_location_arr): + f = h5_context_manager.open_fh(fname_template.format(blockid=blockid)) + + # selects the region of a global arrays relevant for the current block + src_slc = ( + slice(idx3d[0] * cc_block_shape[0], (idx3d[0] + 1) * cc_block_shape[0]), + slice(idx3d[1] * cc_block_shape[1], (idx3d[1] + 1) * cc_block_shape[1]), + slice(idx3d[2] * cc_block_shape[2], (idx3d[2] + 1) * cc_block_shape[2]), + ) + + # determine the region of the output dataset relevant for the current block + # and write any extra output-specific metadata + match data_format: + case ChollaDataFmt.DISTRIBUTED: + dst_idx = (...,) + + f.attrs["offset"] = np.array([int(slc.start) for slc in src_slc]) + f.attrs["dims_local"] = cc_block_shape + + case ChollaDataFmt.LEGACY_CONCAT: + dst_idx = (...,) + + case ChollaDataFmt.CONCAT: + dst_idx = (blockid, ...) + + if blockid == 0: + f.create_group("domain") + f["domain"]["blockid_location_arr"] = blockid_location_arr + f["domain"]["stored_blockid_list"] = np.arange( + blockid_location_arr.size + ) + + f.create_group("field") + + case _: + raise RuntimeError(f"unknown data format: {data_format}") + + if (blockid == 0) or (not data_format.is_single_file): + # write some common metadata + f.attrs["dims"] = np.array(global_shape) + f.attrs["nprocs"] = np.array(nprocs) + f.attrs["n_fields"] = np.array([len(field_names)]) + _add_standard_header_attrs(f) + _write_domain_prop_attrs(f, global_shape=global_shape) + # create the datasets that will hold the fields + for field_name in field_names: + f[field_grp].create_dataset( + name=field_name, shape=field_dset_shape, dtype="f8" + ) + + # actually record the field data + for field_name in field_names: + f[field_grp][field_name][dst_idx] = global_arrays[field_name][src_slc] + return global_arrays, fname_template.format(blockid=0) + + +_CASES = [ + { + "nprocs": (1, 1, 1), + "global_shape": (8, 8, 8), + "data_format": ChollaDataFmt.DISTRIBUTED, + }, + { + "nprocs": (2, 2, 2), + "global_shape": (4, 16, 8), + "data_format": ChollaDataFmt.DISTRIBUTED, + }, + { + "nprocs": (1, 4, 2), + "global_shape": (4, 16, 8), + "data_format": ChollaDataFmt.DISTRIBUTED, + }, + # there no point going through lots of varieties of ChollaDataFmt.LEGACY_CONCAT + # -> the files always look very similar to each other + { + "nprocs": (2, 2, 2), + "global_shape": (4, 16, 8), + "data_format": ChollaDataFmt.LEGACY_CONCAT, + }, + # it's definitely worth checking ChollaDataFmt.LEGACY_CONCAT when there is only + # 1 process as well as when there are multiple processes + { + "nprocs": (1, 1, 1), + "global_shape": (8, 8, 8), + "data_format": ChollaDataFmt.CONCAT, + }, + { + "nprocs": (2, 2, 2), + "global_shape": (4, 16, 8), + "data_format": ChollaDataFmt.CONCAT, + }, +] + + +@requires_module("h5py") +@pytest.mark.parametrize("kwargs", _CASES) +def test_load(tmp_path, kwargs): + # generate a synthetic dataset and make sure that the loaded values are correct + + # Step 1: create the synthetic dataset + # -> global_arr is a dict that maps each field_name to an array that holding the + # fully concatenated array that holds the expected field values + # -> root_fname is the path that should be passed to yt.load + global_arr, root_fname = _generate_files( + root_path=str(tmp_path / "0"), field_names=["density", "momentum_x"], **kwargs + ) + + # Step 2: load the dataset and build a covering grid spanning the full domain + # -> accessing a field from the covering grid should return a field with the same + # shape as accessing the corresponding field in global_arr + ds = yt.load(root_fname) + grid = ds.covering_grid( + level=0, left_edge=ds.domain_left_edge, dims=ds.domain_dimensions + ) + + # Step 3: actually compare the loaded field values against our expectations + for field_name, expected_arr in global_arr.items(): + np.testing.assert_equal( + actual=grid["cholla", field_name].ndview, + desired=expected_arr, + err_msg=f"there was an issue with loading the {field_name!r} field", + ) diff --git a/yt/frontends/cholla/tests/test_outputs.py b/yt/frontends/cholla/tests/test_outputs.py index 2e77fa2f4f5..c8b46fdbb99 100644 --- a/yt/frontends/cholla/tests/test_outputs.py +++ b/yt/frontends/cholla/tests/test_outputs.py @@ -1,3 +1,7 @@ +""" +Tests the Cholla frontend by using a real dataset +""" + from numpy.testing import assert_equal import yt