From d3509b8d2a3688038421458f99c0089ff3d61b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 25 Jun 2025 16:55:32 +0200 Subject: [PATCH 01/10] HDF5: Render scalar datasets into {1} --- src/IO/HDF5/HDF5IOHandler.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 010b66865c..78f02338d0 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -1294,15 +1294,24 @@ void HDF5IOHandlerImpl::openDataset( *dtype = d; int ndims = H5Sget_simple_extent_ndims(dataset_space); - std::vector dims(ndims, 0); - std::vector maxdims(ndims, 0); + if (ndims == 0) + { + // Is a scalar. Since the openPMD-api frontend supports no scalar + // datasets, return the extent as {1} + *parameters.extent = {1}; + } + else + { + std::vector dims(ndims, 0); + std::vector maxdims(ndims, 0); - H5Sget_simple_extent_dims(dataset_space, dims.data(), maxdims.data()); - Extent e; - for (auto const &val : dims) - e.push_back(val); - auto extent = parameters.extent; - *extent = e; + H5Sget_simple_extent_dims(dataset_space, dims.data(), maxdims.data()); + Extent e; + for (auto const &val : dims) + e.push_back(val); + auto &extent = parameters.extent; + *extent = e; + } herr_t status; status = H5Sclose(dataset_space); From 20123d80e6b40bc38639383b1a10b17207255833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 15:17:13 +0200 Subject: [PATCH 02/10] Read from scalar datasets --- src/IO/HDF5/HDF5IOHandler.cpp | 69 ++++++++++++++++++++--------- test/python/unittest/API/APITest.py | 25 +++++++++++ 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 78f02338d0..881ef76ff3 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -2022,28 +2022,55 @@ void HDF5IOHandlerImpl::readDataset( "[HDF5] Internal error: Failed to open HDF5 dataset during dataset " "read"); - std::vector start; - for (auto const &val : parameters.offset) - start.push_back(static_cast(val)); - std::vector stride(start.size(), 1); /* contiguous region */ - std::vector count(start.size(), 1); /* single region */ - std::vector block; - for (auto const &val : parameters.extent) - block.push_back(static_cast(val)); - memspace = - H5Screate_simple(static_cast(block.size()), block.data(), nullptr); filespace = H5Dget_space(dataset_id); - status = H5Sselect_hyperslab( - filespace, - H5S_SELECT_SET, - start.data(), - stride.data(), - count.data(), - block.data()); - VERIFY( - status == 0, - "[HDF5] Internal error: Failed to select hyperslab during dataset " - "read"); + int ndims = H5Sget_simple_extent_ndims(filespace); + std::cout << "DATASET DIMENSIONS: " << ndims << std::endl; + + if (ndims == 0) + { + if (parameters.offset != Offset{0} || parameters.extent != Extent{1}) + { + std::stringstream errorMessage; + errorMessage + << "HDF5 dataset '" << concrete_h5_file_position(writable) + << "' is scalar (dimensionality 0) and must be accessed with " + "offset [0] and extent [1]. Was accessed with offset "; + auxiliary::write_vec_to_stream(errorMessage, parameters.offset) + << " and extent "; + auxiliary::write_vec_to_stream(errorMessage, parameters.extent) + << "."; + throw error::WrongAPIUsage(errorMessage.str()); + } + memspace = H5Screate_simple(0, nullptr, nullptr); + VERIFY( + memspace > 0, + "[HDF5] Internal error: Failed to create memspace during dataset " + "read"); + } + else + { + std::vector start; + for (auto const &val : parameters.offset) + start.push_back(static_cast(val)); + std::vector stride(start.size(), 1); /* contiguous region */ + std::vector count(start.size(), 1); /* single region */ + std::vector block; + for (auto const &val : parameters.extent) + block.push_back(static_cast(val)); + memspace = H5Screate_simple( + static_cast(block.size()), block.data(), nullptr); + status = H5Sselect_hyperslab( + filespace, + H5S_SELECT_SET, + start.data(), + stride.data(), + count.data(), + block.data()); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to select hyperslab during dataset " + "read"); + } void *data = parameters.data.get(); diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 588a723925..51ece113e0 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2239,6 +2239,31 @@ def testSeriesConstructors(self): s = io.Series(f, io.Access.create, c) s.close() + def testScalarHdf5Fields(self): + file = "../samples/scalar_hdf5.h5" + series_write = io.Series(file, io.Access.create) + E_x = series_write.write_iterations()[0].meshes["E"]["x"] + E_x.reset_dataset(io.Dataset(np.dtype(np.int_), [1])) + E_x[:] = np.array([43]) + series_write.close() + + # Now turn E_x into a scalar + import h5py + with h5py.File(file, "r+") as f: + E = f["data"]["0"]["meshes"]["E"] + reapply_attributes = {key: val for key, val in E["x"].attrs.items()} + print("ATTRIBUTES:", reapply_attributes) + del E["x"] + E["x"] = 44 + for key, val in reapply_attributes.items(): + E["x"].attrs[key] = val + + series_read = io.Series(file, io.Access.read_only) + loaded_from_scalar = series_read.iterations[0].meshes["E"]["x"][:] + series_read.flush() + self.assertEqual(loaded_from_scalar, np.array([44])) + series_read.close() + if __name__ == '__main__': unittest.main() From 9b464868835200979cde0eece5b6c97c5e6e39fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 15:50:56 +0200 Subject: [PATCH 03/10] Support dataset modification --- src/IO/HDF5/HDF5IOHandler.cpp | 69 ++++++++++++++++++++--------- test/python/unittest/API/APITest.py | 11 +++++ 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 881ef76ff3..586b326707 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -1564,28 +1564,55 @@ void HDF5IOHandlerImpl::writeDataset( "[HDF5] Internal error: Failed to open HDF5 dataset during dataset " "write"); - std::vector start; - for (auto const &val : parameters.offset) - start.push_back(static_cast(val)); - std::vector stride(start.size(), 1); /* contiguous region */ - std::vector count(start.size(), 1); /* single region */ - std::vector block; - for (auto const &val : parameters.extent) - block.push_back(static_cast(val)); - memspace = - H5Screate_simple(static_cast(block.size()), block.data(), nullptr); filespace = H5Dget_space(dataset_id); - status = H5Sselect_hyperslab( - filespace, - H5S_SELECT_SET, - start.data(), - stride.data(), - count.data(), - block.data()); - VERIFY( - status == 0, - "[HDF5] Internal error: Failed to select hyperslab during dataset " - "write"); + int ndims = H5Sget_simple_extent_ndims(filespace); + std::cout << "DATASET DIMENSIONS: " << ndims << std::endl; + + if (ndims == 0) + { + if (parameters.offset != Offset{0} || parameters.extent != Extent{1}) + { + std::stringstream errorMessage; + errorMessage + << "HDF5 dataset '" << concrete_h5_file_position(writable) + << "' is scalar (dimensionality 0) and must be accessed with " + "offset [0] and extent [1]. Was accessed with offset "; + auxiliary::write_vec_to_stream(errorMessage, parameters.offset) + << " and extent "; + auxiliary::write_vec_to_stream(errorMessage, parameters.extent) + << "."; + throw error::WrongAPIUsage(errorMessage.str()); + } + memspace = H5Screate_simple(0, nullptr, nullptr); + VERIFY( + memspace > 0, + "[HDF5] Internal error: Failed to create memspace during dataset " + "write"); + } + else + { + std::vector start; + for (auto const &val : parameters.offset) + start.push_back(static_cast(val)); + std::vector stride(start.size(), 1); /* contiguous region */ + std::vector count(start.size(), 1); /* single region */ + std::vector block; + for (auto const &val : parameters.extent) + block.push_back(static_cast(val)); + memspace = H5Screate_simple( + static_cast(block.size()), block.data(), nullptr); + status = H5Sselect_hyperslab( + filespace, + H5S_SELECT_SET, + start.data(), + stride.data(), + count.data(), + block.data()); + VERIFY( + status == 0, + "[HDF5] Internal error: Failed to select hyperslab during dataset " + "write"); + } void const *data = parameters.data.get(); diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 51ece113e0..c10bcd7be0 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2264,6 +2264,17 @@ def testScalarHdf5Fields(self): self.assertEqual(loaded_from_scalar, np.array([44])) series_read.close() + series_read_write = io.Series(file, io.Access.read_write) + E_x = series_read_write.iterations[0].meshes["E"]["x"] + E_x[:] = np.array([45]) + series_read_write.close() + + series_read_again = io.Series(file, io.Access.read_only) + loaded_from_scalar = series_read_again.iterations[0].meshes["E"]["x"][:] + series_read_again.flush() + self.assertEqual(loaded_from_scalar, np.array([45])) + series_read_again.close() + if __name__ == '__main__': unittest.main() From 89773967c7d2f3e31aa07b4d7893517423511a4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 16:11:38 +0200 Subject: [PATCH 04/10] CI integration of h5py --- .../ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml | 1 + .../ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/spack.yaml | 1 + .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/spack.yaml | 1 + .github/workflows/linux.yml | 4 ++-- .github/workflows/macos.yml | 2 +- conda.yml | 1 + test/python/unittest/API/APITest.py | 8 ++++++-- 7 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml b/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml index 13a986c4a9..f41ee55fa7 100644 --- a/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml +++ b/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml @@ -8,6 +8,7 @@ spack: specs: - adios2 - hdf5 + - py-h5py packages: hdf5: diff --git a/.github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/spack.yaml b/.github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/spack.yaml index cffff52ef4..77d96118a3 100644 --- a/.github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/spack.yaml +++ b/.github/ci/spack-envs/clangtidy_nopy_ompi_h5_ad2/spack.yaml @@ -9,6 +9,7 @@ spack: - adios2@2.10 - hdf5 - openmpi + - py-h5py packages: adios2: diff --git a/.github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/spack.yaml b/.github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/spack.yaml index 2e51e80968..19d9420358 100644 --- a/.github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/spack.yaml +++ b/.github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/spack.yaml @@ -9,6 +9,7 @@ spack: - adios2@2.10 - hdf5 - openmpi + - py-h5py packages: adios2: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 8566ab6beb..3a582aeaee 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -260,7 +260,7 @@ jobs: - name: Install run: | sudo apt-get update - sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas + sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi # TODO ADIOS2 - name: Build env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig} @@ -286,7 +286,7 @@ jobs: run: | apk update apk add hdf5-dev - python3.10 -m pip install numpy + python3.10 -m pip install numpy h5py - name: Build env: {CXXFLAGS: -Werror} run: | diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index db003484a0..bf888445e2 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -25,7 +25,7 @@ jobs: brew install adios2 || true brew install hdf5-mpi || true brew install python || true - python3 -m pip install -U mpi4py numpy pandas + python3 -m pip install -U mpi4py numpy pandas h5py set -e - name: Build env: {CXXFLAGS: -Werror, MACOSX_DEPLOYMENT_TARGET: 11.0} diff --git a/conda.yml b/conda.yml index e82567b2ae..70fc817bf6 100644 --- a/conda.yml +++ b/conda.yml @@ -24,6 +24,7 @@ dependencies: - doxygen - git - hdf5=*=mpi_openmpi_* + - h5py - mamba - make - mpi4py diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index c10bcd7be0..af3f79f2f9 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2240,6 +2240,8 @@ def testSeriesConstructors(self): s.close() def testScalarHdf5Fields(self): + if "hdf5" not in io.variants: + return file = "../samples/scalar_hdf5.h5" series_write = io.Series(file, io.Access.create) E_x = series_write.write_iterations()[0].meshes["E"]["x"] @@ -2251,7 +2253,8 @@ def testScalarHdf5Fields(self): import h5py with h5py.File(file, "r+") as f: E = f["data"]["0"]["meshes"]["E"] - reapply_attributes = {key: val for key, val in E["x"].attrs.items()} + reapply_attributes = \ + {key: val for key, val in E["x"].attrs.items()} print("ATTRIBUTES:", reapply_attributes) del E["x"] E["x"] = 44 @@ -2270,7 +2273,8 @@ def testScalarHdf5Fields(self): series_read_write.close() series_read_again = io.Series(file, io.Access.read_only) - loaded_from_scalar = series_read_again.iterations[0].meshes["E"]["x"][:] + loaded_from_scalar = \ + series_read_again.iterations[0].meshes["E"]["x"][:] series_read_again.flush() self.assertEqual(loaded_from_scalar, np.array([45])) series_read_again.close() From e5f5b4a2276dd47a47c36ece0451eeb2e9c50a57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 16:48:33 +0200 Subject: [PATCH 05/10] Remove debugging output --- src/IO/HDF5/HDF5IOHandler.cpp | 2 -- test/python/unittest/API/APITest.py | 1 - 2 files changed, 3 deletions(-) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 586b326707..fb49d99521 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -1566,7 +1566,6 @@ void HDF5IOHandlerImpl::writeDataset( filespace = H5Dget_space(dataset_id); int ndims = H5Sget_simple_extent_ndims(filespace); - std::cout << "DATASET DIMENSIONS: " << ndims << std::endl; if (ndims == 0) { @@ -2051,7 +2050,6 @@ void HDF5IOHandlerImpl::readDataset( filespace = H5Dget_space(dataset_id); int ndims = H5Sget_simple_extent_ndims(filespace); - std::cout << "DATASET DIMENSIONS: " << ndims << std::endl; if (ndims == 0) { diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index af3f79f2f9..93621047b9 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2255,7 +2255,6 @@ def testScalarHdf5Fields(self): E = f["data"]["0"]["meshes"]["E"] reapply_attributes = \ {key: val for key, val in E["x"].attrs.items()} - print("ATTRIBUTES:", reapply_attributes) del E["x"] E["x"] = 44 for key, val in reapply_attributes.items(): From 6656ca436ddb2e29e2ded9b88255b7063e05fd02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 16:51:36 +0200 Subject: [PATCH 06/10] Try setting an explicit Python executable in MacOS workflow --- .github/workflows/macos.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index bf888445e2..80f21cdbdf 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -40,7 +40,9 @@ jobs: -DopenPMD_USE_MPI=ON \ -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ - -DopenPMD_USE_INVASIVE_TESTS=ON + -DopenPMD_USE_INVASIVE_TESTS=ON \ + -DPython_EXECUTABLE=python3 \ + -DPYTHON_EXECUTABLE=python3 cmake --build build --parallel 3 ctest --test-dir build --verbose From 330019dfabbf05f545db74d4b88747ade752ba38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 17:04:59 +0200 Subject: [PATCH 07/10] Ok, don't run it on MacOS if the runner does not like it --- .github/workflows/macos.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 80f21cdbdf..db003484a0 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -25,7 +25,7 @@ jobs: brew install adios2 || true brew install hdf5-mpi || true brew install python || true - python3 -m pip install -U mpi4py numpy pandas h5py + python3 -m pip install -U mpi4py numpy pandas set -e - name: Build env: {CXXFLAGS: -Werror, MACOSX_DEPLOYMENT_TARGET: 11.0} @@ -40,9 +40,7 @@ jobs: -DopenPMD_USE_MPI=ON \ -DopenPMD_USE_HDF5=ON \ -DopenPMD_USE_ADIOS2=ON \ - -DopenPMD_USE_INVASIVE_TESTS=ON \ - -DPython_EXECUTABLE=python3 \ - -DPYTHON_EXECUTABLE=python3 + -DopenPMD_USE_INVASIVE_TESTS=ON cmake --build build --parallel 3 ctest --test-dir build --verbose From e67c6ab68583ec8607caf7966658dcdcc95dce81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Jun 2025 17:07:40 +0200 Subject: [PATCH 08/10] Don't fail the test if h5py is not available --- test/python/unittest/API/APITest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 93621047b9..be1758d4bd 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2242,6 +2242,11 @@ def testSeriesConstructors(self): def testScalarHdf5Fields(self): if "hdf5" not in io.variants: return + try: + import h5py + except ImportError: + return + file = "../samples/scalar_hdf5.h5" series_write = io.Series(file, io.Access.create) E_x = series_write.write_iterations()[0].meshes["E"]["x"] @@ -2250,7 +2255,6 @@ def testScalarHdf5Fields(self): series_write.close() # Now turn E_x into a scalar - import h5py with h5py.File(file, "r+") as f: E = f["data"]["0"]["meshes"]["E"] reapply_attributes = \ From ea183920c8fc999810e0e2231608c71d49344428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 27 Jun 2025 17:26:25 +0200 Subject: [PATCH 09/10] Document the test --- test/python/unittest/API/APITest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index be1758d4bd..6337807f33 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2247,6 +2247,15 @@ def testScalarHdf5Fields(self): except ImportError: return + # While the openPMD-api (currently) does not create scalar HDF5 + # datasets, we should at least try reading and modifying them in files + # that were created elsewhere. Scalar here refers to a dataset without + # dimension. Interacting with them in the openPMD-api is possible by + # specifying a single element, i.e. offset=[0], extent=[1]. + # For testing this, create a dataset, then use h5py to create a scalar + # dataset in the file. Then, open first for reading, then for + # modifying. + file = "../samples/scalar_hdf5.h5" series_write = io.Series(file, io.Access.create) E_x = series_write.write_iterations()[0].meshes["E"]["x"] From 35d95329522d7e9bfe7d4c1aca8588c42060c0b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 30 Jun 2025 11:11:38 +0200 Subject: [PATCH 10/10] CI fixes --- .github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml b/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml index f41ee55fa7..f50c7e9815 100644 --- a/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml +++ b/.github/ci/spack-envs/clang15_py311_nompi_h5_ad2/spack.yaml @@ -11,6 +11,8 @@ spack: - py-h5py packages: + py-h5py: + variants: ~mpi hdf5: variants: ~mpi adios2: