From 90cb6e73377a3d4ae3737d6bbeb1bf19b1d8a955 Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Tue, 26 Sep 2023 03:42:13 -0700 Subject: [PATCH 1/3] Added a check in the HDF5 data reader to check that the metadata for each field actually matches the dimensions of the data fields. Added a helper function for conduit to allow the calculation of a product of a data array's elements. --- include/lbann/utils/CMakeLists.txt | 1 + include/lbann/utils/conduit_extensions.hpp | 45 +++++++++++++++++++ .../readers/data_reader_HDF5.cpp | 19 +++++++- 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 include/lbann/utils/conduit_extensions.hpp diff --git a/include/lbann/utils/CMakeLists.txt b/include/lbann/utils/CMakeLists.txt index 3b793f96f2d..70270f92456 100644 --- a/include/lbann/utils/CMakeLists.txt +++ b/include/lbann/utils/CMakeLists.txt @@ -32,6 +32,7 @@ set_full_path(THIS_DIR_HEADERS cloneable.hpp commify.hpp compiler_control.hpp + conduit_extensions.hpp dataset.hpp describable.hpp description.hpp diff --git a/include/lbann/utils/conduit_extensions.hpp b/include/lbann/utils/conduit_extensions.hpp new file mode 100644 index 00000000000..4be35f07917 --- /dev/null +++ b/include/lbann/utils/conduit_extensions.hpp @@ -0,0 +1,45 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. +// Written by the LBANN Research Team (B. Van Essen, et al.) listed in +// the CONTRIBUTORS file. +// +// LLNL-CODE-697807. +// All rights reserved. +// +// This file is part of LBANN: Livermore Big Artificial Neural Network +// Toolkit. For details, see http://software.llnl.gov/LBANN or +// http://github.com/LBANN. +// +// Licensed under the Apache License, Version 2.0 (the "Licensee"); you +// may not use this file except in compliance with the License. You may +// obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the license. +//////////////////////////////////////////////////////////////////////////////// + +#include "conduit/conduit_data_array.hpp" + +namespace conduit { + +template +T +data_array_prod(DataArray a) +{ + T res = 1; + for(index_t i = 0; i < a.number_of_elements(); i++) + { + const T &val = a.element(i); + res *= val; + } + + return res; +} + +} // conduit diff --git a/src/data_ingestion/readers/data_reader_HDF5.cpp b/src/data_ingestion/readers/data_reader_HDF5.cpp index ead9a86ec2e..cc9b150b37b 100644 --- a/src/data_ingestion/readers/data_reader_HDF5.cpp +++ b/src/data_ingestion/readers/data_reader_HDF5.cpp @@ -25,6 +25,7 @@ // ///////////////////////////////////////////////////////////////////////////////// #include "conduit/conduit_relay_mpi.hpp" +#include "lbann/utils/conduit_extensions.hpp" #include "lbann/data_ingestion/readers/data_reader_HDF5.hpp" #include "lbann/data_ingestion/readers/data_reader_sample_list_impl.hpp" @@ -357,7 +358,23 @@ void hdf5_data_reader::load_sample(conduit::Node& node, original_path, node[new_pathname]); } - + // Check that the dimensions of each node matches its metadata + if (metadata.has_child(HDF5_METADATA_KEY_DIMS)) { + int n_elts = node[pathname].dtype().number_of_elements(); + conduit::int64_array data_array_dims = metadata[HDF5_METADATA_KEY_DIMS].value(); + auto expected_n_elts = data_array_prod(data_array_dims); + + if (n_elts != expected_n_elts) { + LBANN_WARNING("Ingesting sample field ", + pathname, + " for sample ", + sample_name, + " where the dimensions in the metadata don't match the actual field: ", + expected_n_elts, + " != ", + n_elts); + } + } // check to see if there are integer types left in the sample and warn the // user auto dtype = node[new_pathname].dtype(); From de52603a5332eaddb8f7d8eb0e570f309291a9a8 Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Mon, 26 Feb 2024 18:06:19 -0800 Subject: [PATCH 2/3] Work in progress on making a more robust HDF5 data reader that can handle hyperslabs. --- CMakeLists.txt | 2 +- .../data_ingestion/data_store_conduit.hpp | 2 +- .../readers/data_reader_HDF5.hpp | 5 + include/lbann/utils/conduit_extensions.hpp | 370 ++++++++++++++++++ scripts/build_lbann.sh | 1 + scripts/customize_build_env.sh | 4 +- scripts/utilities.sh | 4 + src/callbacks/variable_minibatch.cpp | 12 +- .../coordinator/buffered_data_coordinator.cpp | 2 +- src/data_ingestion/data_store_conduit.cpp | 4 +- .../readers/data_reader_HDF5.cpp | 113 ++++++ .../readers/unit_test/CMakeLists.txt | 1 + .../data_reader_HDF5_c3fd_file_ingest.cpp | 70 +++- ...data_reader_HDF5_cosmoflow_file_ingest.cpp | 2 + .../hdf5_c3fd_test_data_and_schemas.yaml | 17 + .../hdf5_cosmoflow_test_data_and_schemas.yaml | 13 + .../training_algorithm_factory_test.cpp | 8 +- 17 files changed, 610 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 03c7f3da39a..1b1adf72f73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -608,7 +608,7 @@ if (LBANN_WITH_HWLOC) endif (LBANN_WITH_HWLOC) if (LBANN_WITH_CONDUIT) - find_package(Conduit 0.7.0 CONFIG REQUIRED) + find_package(Conduit 0.8.9 CONFIG REQUIRED) message(STATUS "Found CONDUIT: ${Conduit_DIR}") if (NOT TARGET conduit::conduit OR NOT TARGET conduit::conduit_mpi) diff --git a/include/lbann/data_ingestion/data_store_conduit.hpp b/include/lbann/data_ingestion/data_store_conduit.hpp index 342cf23b89e..e55445c8e53 100644 --- a/include/lbann/data_ingestion/data_store_conduit.hpp +++ b/include/lbann/data_ingestion/data_store_conduit.hpp @@ -541,7 +541,7 @@ class data_store_conduit // methods follow //========================================================================= - void start_exchange_data_by_sample(size_t current_pos, size_t mb_size); + void start_exchange_data_by_sample(uint64_t current_pos, uint64_t mb_size); void finish_exchange_data_by_sample(); void setup_data_store_buffers(); diff --git a/include/lbann/data_ingestion/readers/data_reader_HDF5.hpp b/include/lbann/data_ingestion/readers/data_reader_HDF5.hpp index 123d79abec8..f7a1fb622b5 100644 --- a/include/lbann/data_ingestion/readers/data_reader_HDF5.hpp +++ b/include/lbann/data_ingestion/readers/data_reader_HDF5.hpp @@ -46,6 +46,8 @@ class DataReaderHDF5WhiteboxTester; #define HDF5_METADATA_KEY_TRANSPOSE "transpose" #define HDF5_METADATA_KEY_COERCE "coerce" #define HDF5_METADATA_KEY_PACK "pack" +#define HDF5_METADATA_KEY_PARALLEL_IO "parallel_io" +#define HDF5_METADATA_KEY_PARALLEL_SPLIT "parallel_split" /** Valid string values for a metadata file */ #define HDF5_METADATA_VALUE_COERCE_FLOAT "float" #define HDF5_METADATA_VALUE_COERCE_DOUBLE "double" @@ -55,6 +57,7 @@ class DataReaderHDF5WhiteboxTester; #define HDF5_METADATA_VALUE_LAYOUT_HWC "hwc" #define HDF5_METADATA_VALUE_LAYOUT_CDHW "cdhw" #define HDF5_METADATA_VALUE_LAYOUT_DHWC "dhwc" +#define HDF5_METADATA_VALUE_TRUE "true" namespace lbann { @@ -78,6 +81,8 @@ static std::set const hdf5_metadata_valid_keys = { HDF5_METADATA_KEY_TRANSPOSE, HDF5_METADATA_KEY_COERCE, HDF5_METADATA_KEY_PACK, + HDF5_METADATA_KEY_PARALLEL_IO, + HDF5_METADATA_KEY_PARALLEL_SPLIT }; /** diff --git a/include/lbann/utils/conduit_extensions.hpp b/include/lbann/utils/conduit_extensions.hpp index 4be35f07917..40dd4c5c2cc 100644 --- a/include/lbann/utils/conduit_extensions.hpp +++ b/include/lbann/utils/conduit_extensions.hpp @@ -25,6 +25,8 @@ //////////////////////////////////////////////////////////////////////////////// #include "conduit/conduit_data_array.hpp" +#include "conduit_relay_io_hdf5.hpp" +#include "hdf5.h" namespace conduit { @@ -42,4 +44,372 @@ data_array_prod(DataArray a) return res; } +namespace relay { +namespace io { +#define MAXDIMS 4 +//----------------------------------------------------------------------------- +// This example tests reads of slabs from a hdf5 dataset. +// +// we may provide something like this in in the relay hdf5 interface +// in the future. +//----------------------------------------------------------------------------- +bool +hdf5_read_dset_slab(const std::string &file_path, + const std::string &fetch_path, + const DataType &dtype, + void *data_ptr) +{ + // assume fetch_path points to a hdf5 dataset + // open the hdf5 file for reading + hid_t h5_file_id = H5Fopen(file_path.c_str(), + H5F_ACC_RDONLY, + H5P_DEFAULT); + CONDUIT_CHECK_HDF5_ERROR(h5_file_id, + "Error opening HDF5 file for reading: " << file_path); + + // open the dataset + hid_t h5_dset_id = H5Dopen( h5_file_id, fetch_path.c_str(),H5P_DEFAULT); + + CONDUIT_CHECK_HDF5_ERROR(h5_dset_id, + "Error opening HDF5 dataset at: " << fetch_path); + + + // get info about the dataset + hid_t h5_dspace_id = H5Dget_space(h5_dset_id); + CONDUIT_CHECK_HDF5_ERROR(h5_dspace_id, + "Error reading HDF5 Dataspace: " << h5_dset_id); + + // BVE + +// int H5Sget_simple_extent_dims ( hid_t space_id, +// hsize_t dims[], +// hsize_t maxdims[] +// ) + index_t rank = H5Sget_simple_extent_ndims(h5_dspace_id); + + std::cout << "The data space has a ndims of " << rank << std::endl; + hsize_t dims[MAXDIMS] = {0,0,0,0}; + hsize_t maxdims[MAXDIMS] = {0,0,0,0}; + H5Sget_simple_extent_dims(h5_dspace_id, dims, maxdims); + + for(int i = 0; i < MAXDIMS; i++) { + std::cout << "I think that I have dims["< h5_nelems) + { + // we have an error, but to try to clean up the hdf5 handles + // before we issue the error. + + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + CONDUIT_ERROR("Can't slab fetch a buffer larger than the source" + " hdf5 data set. Requested number of elements" + << dtype.number_of_elements() + << " hdf5 dataset number of elements" << h5_nelems); + } + + + // we need to compute an offset, stride, and element bytes + // that will work for reading in the general case + // right now we assume the dest type of data and the hdf5 datasets + // data type are compatible + + // conduit's offsets, strides, are all in terms of bytes + // hdf5's are in terms of elements + + // what we really want is a way to read bytes from the hdf5 dset with + // out any type conversion, but that doesn't exist. + + // general support would include reading a a view of one type that + // points to a buffer of another + // (for example a view of doubles that is defined on a buffer of bytes) + + // but hdf5 doesn't support slab fetch across datatypes + // so for now we make sure the datatype is consistent. + + DataType h5_dt = conduit::relay::io::hdf5_dtype_to_conduit_dtype(h5_dtype_id,1); + + if( h5_dt.id() != dtype.id() ) + { + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + CONDUIT_INFO("Cannot fetch hdf5 slab of buffer and view are" + "different data types.") + return false; + } + + + + hid_t h5_status = 0; + + hsize_t elem_bytes = dtype.element_bytes(); + hsize_t offset = dtype.offset() / elem_bytes; // in bytes, convert to elems + hsize_t stride = dtype.stride() / elem_bytes; // in bytes, convert to elems + hsize_t num_ele = dtype.number_of_elements(); + + CONDUIT_INFO("slab dtype: " << dtype.to_json()); + + CONDUIT_INFO("hdf5 slab: " << + " element_offset: " << offset << + " element_stride: " << stride << + " number_of_elements: " << num_ele); + + h5_status = H5Sselect_hyperslab(h5_dspace_id, + H5S_SELECT_SET, + &offset, + &stride, + &num_ele, + 0); // 0 here means NULL pointers; HDF5 *knows* dimension is 1 + // check subset sel + CONDUIT_CHECK_HDF5_ERROR(h5_status, + "Error selecting hyper slab from HDF5 dataspace: " << h5_dspace_id); + + + hid_t h5_dspace_compact_id = H5Screate_simple(1, + &num_ele, + NULL); + + CONDUIT_CHECK_HDF5_ERROR(h5_dspace_id, + "Failed to create HDF5 data space (memory dspace)"); + + h5_status = H5Dread(h5_dset_id, // data set id + h5_dtype_id, // memory type id // use same data type? + h5_dspace_compact_id, // memory space id ... + h5_dspace_id, // file space id + H5P_DEFAULT, + data_ptr); + // check read + CONDUIT_CHECK_HDF5_ERROR(h5_status, + "Error reading bytes from HDF5 dataset: " << h5_dset_id); + + // close the data space + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + // close the compact data space + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_compact_id), + "Error closing HDF5 data space (memory dspace)" << file_path); + + + // close the dataset + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + return true; +} + +bool +hdf5_read_dataspace(const hid_t h5_file_id, //const std::string &file_path, + const std::string &fetch_path, + const DataType &dtype, + const conduit::Node &read_opts) +{ + // assume fetch_path points to a hdf5 dataset + // open the hdf5 file for reading + // hid_t h5_file_id = H5Fopen(file_path.c_str(), + // H5F_ACC_RDONLY, + // H5P_DEFAULT); + // CONDUIT_CHECK_HDF5_ERROR(h5_file_id, + // "Error opening HDF5 file for reading: " << file_path); + + const std::string &file_path = "foo"; + + // open the dataset + hid_t h5_dset_id = H5Dopen( h5_file_id, fetch_path.c_str(),H5P_DEFAULT); + + CONDUIT_CHECK_HDF5_ERROR(h5_dset_id, + "Error opening HDF5 dataset at: " << fetch_path); + + + // get info about the dataset + hid_t h5_dspace_id = H5Dget_space(h5_dset_id); + CONDUIT_CHECK_HDF5_ERROR(h5_dspace_id, + "Error reading HDF5 Dataspace: " << h5_dset_id); + + // check for empty case + if(H5Sget_simple_extent_type(h5_dspace_id) == H5S_NULL) + { + // we have an error, but to try to clean up the hdf5 handles + // before we issue the error. + + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + CONDUIT_ERROR("Can't slab fetch from an empty hdf5 data set."); + } + + hid_t h5_dtype_id = H5Dget_type(h5_dset_id); + + CONDUIT_CHECK_HDF5_ERROR(h5_dtype_id, + "Error reading HDF5 Datatype: " + << h5_dset_id); + + // TODO: bounds check (check that we are fetching a subset of the elems) + index_t h5_nelems = H5Sget_simple_extent_npoints(h5_dspace_id); + if( dtype.number_of_elements() > h5_nelems) + { + // we have an error, but to try to clean up the hdf5 handles + // before we issue the error. + + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + CONDUIT_ERROR("Can't slab fetch a buffer larger than the source" + " hdf5 data set. Requested number of elements" + << dtype.number_of_elements() + << " hdf5 dataset number of elements" << h5_nelems); + } + + + // we need to compute an offset, stride, and element bytes + // that will work for reading in the general case + // right now we assume the dest type of data and the hdf5 datasets + // data type are compatible + + // conduit's offsets, strides, are all in terms of bytes + // hdf5's are in terms of elements + + // what we really want is a way to read bytes from the hdf5 dset with + // out any type conversion, but that doesn't exist. + + // general support would include reading a a view of one type that + // points to a buffer of another + // (for example a view of doubles that is defined on a buffer of bytes) + + // but hdf5 doesn't support slab fetch across datatypes + // so for now we make sure the datatype is consistent. + + DataType h5_dt = conduit::relay::io::hdf5_dtype_to_conduit_dtype(h5_dtype_id,1); + + if( h5_dt.id() != dtype.id() ) + { + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + CONDUIT_INFO("Cannot fetch hdf5 slab of buffer and view are" + "different data types.") + return false; + } + + + + hid_t h5_status = 0; + + hsize_t elem_bytes = dtype.element_bytes(); + hsize_t offset = dtype.offset() / elem_bytes; // in bytes, convert to elems + hsize_t stride = dtype.stride() / elem_bytes; // in bytes, convert to elems + hsize_t num_ele = dtype.number_of_elements(); + + CONDUIT_INFO("slab dtype: " << dtype.to_json()); + + CONDUIT_INFO("hdf5 slab: " << + " element_offset: " << offset << + " element_stride: " << stride << + " number_of_elements: " << num_ele); + + h5_status = H5Sselect_hyperslab(h5_dspace_id, + H5S_SELECT_SET, + &offset, + &stride, + &num_ele, + 0); // 0 here means NULL pointers; HDF5 *knows* dimension is 1 + // check subset sel + CONDUIT_CHECK_HDF5_ERROR(h5_status, + "Error selecting hyper slab from HDF5 dataspace: " << h5_dspace_id); + + + // check read + CONDUIT_CHECK_HDF5_ERROR(h5_status, + "Error reading bytes from HDF5 dataset: " << h5_dset_id); + + // close the data space + CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_id), + "Error closing HDF5 data space: " << file_path); + + // // close the compact data space + // CONDUIT_CHECK_HDF5_ERROR(H5Sclose(h5_dspace_compact_id), + // "Error closing HDF5 data space (memory dspace)" << file_path); + + + // close the dataset + CONDUIT_CHECK_HDF5_ERROR(H5Dclose(h5_dset_id), + "Error closing HDF5 dataset: " << file_path); + + // close the hdf5 file + CONDUIT_CHECK_HDF5_ERROR(H5Fclose(h5_file_id), + "Error closing HDF5 file: " << file_path); + + return h5_status; + // return true; +} + +} // io + +} // relay + } // conduit diff --git a/scripts/build_lbann.sh b/scripts/build_lbann.sh index df9d091a356..a5aaf0b3978 100755 --- a/scripts/build_lbann.sh +++ b/scripts/build_lbann.sh @@ -1115,6 +1115,7 @@ EOF ########################################################################################## # Now that the config file is generated set the field + echo "Looking for a config file with pattern ${LBANN_LABEL} ${CENTER_COMPILER} ${LBANN_HOME}" find_cmake_config_file ${LBANN_LABEL} ${CENTER_COMPILER} ${LBANN_HOME} if [[ ! -z "${MATCHED_CONFIG_FILE_PATH}" ]]; then if [[ -e "${MATCHED_CONFIG_FILE_PATH}" && -r "${MATCHED_CONFIG_FILE_PATH}" ]]; then diff --git a/scripts/customize_build_env.sh b/scripts/customize_build_env.sh index cc958770f3a..19de838b582 100644 --- a/scripts/customize_build_env.sh +++ b/scripts/customize_build_env.sh @@ -275,8 +275,8 @@ set_center_specific_spack_dependencies() CENTER_DEPENDENCIES="^hdf5+hl" CENTER_BLAS_LIBRARY="blas=accelerate" ;; - "m1") - CENTER_DEPENDENCIES="^hdf5+hl ^python@3.10 ^protobuf@3.21.5 ^py-protobuf@4.21.5" + "m1" | "m2") + CENTER_DEPENDENCIES="^hdf5+hl ^python@3.9.6 ^protobuf@3.21.5 ^py-protobuf@4.21.5" CENTER_BLAS_LIBRARY="blas=accelerate" CENTER_COMPILER="%apple-clang" ;; diff --git a/scripts/utilities.sh b/scripts/utilities.sh index 97445aeb541..8818a3fe97e 100644 --- a/scripts/utilities.sh +++ b/scripts/utilities.sh @@ -51,8 +51,12 @@ function find_cmake_config_file() { # OS X and Linux have different flags for the stat call SYS_UNAME=$(uname -s) if [[ ${SYS_UNAME} = "Darwin" ]]; then + CMD="find ${lbann_build_dir} -maxdepth 1 -type f -name "LBANN_${HOST}_${label}-${SYS}-${generic_compiler}@*.cmake" -exec stat -f '%a %N' {} \; -print | sort -nr | awk 'NR==1,NR==1 {print $2}'" + echo "${CMD}" MATCHED_CONFIG_FILE_PATH=$(find ${lbann_build_dir} -maxdepth 1 -type f -name "LBANN_${HOST}_${label}-${SYS}-${generic_compiler}@*.cmake" -exec stat -f '%a %N' {} \; -print | sort -nr | awk 'NR==1,NR==1 {print $2}') else + CMD="find ${lbann_build_dir} -maxdepth 1 -type f -name "LBANN_${HOST}_${label}-${SYS}-${generic_compiler}@*.cmake" -exec stat -c '%X %n' {} \; -print | sort -nr | awk 'NR==1,NR==1 {print $2}'" + echo "${CMD}" MATCHED_CONFIG_FILE_PATH=$(find ${lbann_build_dir} -maxdepth 1 -type f -name "LBANN_${HOST}_${label}-${SYS}-${generic_compiler}@*.cmake" -exec stat -c '%X %n' {} \; -print | sort -nr | awk 'NR==1,NR==1 {print $2}') fi if [[ -n "${MATCHED_CONFIG_FILE_PATH}" ]]; then diff --git a/src/callbacks/variable_minibatch.cpp b/src/callbacks/variable_minibatch.cpp index e91b9393db0..050e9b2b519 100644 --- a/src/callbacks/variable_minibatch.cpp +++ b/src/callbacks/variable_minibatch.cpp @@ -177,9 +177,9 @@ float variable_minibatch::get_current_learning_rate(model* m) const return 0.0f; } -step_minibatch::step_minibatch(size_t starting_mbsize, - size_t step, - size_t ramp_time) +step_minibatch::step_minibatch(uint64_t starting_mbsize, + uint64_t step, + uint64_t ramp_time) : variable_minibatch(starting_mbsize), m_step(step), m_ramp_time(ramp_time) {} @@ -209,7 +209,7 @@ void step_minibatch::write_specific_proto(lbann_data::Callback& proto) const msg->set_ramp_time(m_ramp_time); } -minibatch_schedule::minibatch_schedule(size_t starting_mbsize, +minibatch_schedule::minibatch_schedule(uint64_t starting_mbsize, std::vector steps) : variable_minibatch(starting_mbsize), m_steps(std::move(steps)) { @@ -221,9 +221,9 @@ minibatch_schedule::minibatch_schedule(size_t starting_mbsize, } bool minibatch_schedule::schedule(model* m, - size_t& new_mbsize, + uint64_t& new_mbsize, float& new_lr, - size_t& ramp_time) + uint64_t& ramp_time) { const auto& c = static_cast(m->get_execution_context()); diff --git a/src/data_ingestion/coordinator/buffered_data_coordinator.cpp b/src/data_ingestion/coordinator/buffered_data_coordinator.cpp index 8a401acca3e..21f45dfab8b 100644 --- a/src/data_ingestion/coordinator/buffered_data_coordinator.cpp +++ b/src/data_ingestion/coordinator/buffered_data_coordinator.cpp @@ -167,7 +167,7 @@ int buffered_data_coordinator::fetch_to_local_matrix( // Compute the size of the current local mini-batch const uint64_t end_pos = std::min(relative_base_position + loaded_mini_batch_size, - dr->m_shuffled_indices.size()); + (uint64_t) dr->m_shuffled_indices.size()); const uint64_t local_mini_batch_size = std::min( ((end_pos - relative_base_position) + ds.get_sample_stride() - 1) / ds.get_sample_stride(), diff --git a/src/data_ingestion/data_store_conduit.cpp b/src/data_ingestion/data_store_conduit.cpp index 9f7d87caf7c..3b127793d2b 100644 --- a/src/data_ingestion/data_store_conduit.cpp +++ b/src/data_ingestion/data_store_conduit.cpp @@ -1819,8 +1819,8 @@ void data_store_conduit::profile_timing() } } -void data_store_conduit::start_exchange_mini_batch_data(size_t current_pos, - size_t mb_size, +void data_store_conduit::start_exchange_mini_batch_data(uint64_t current_pos, + uint64_t mb_size, bool at_new_epoch) { if (is_local_cache() && is_fully_loaded()) { diff --git a/src/data_ingestion/readers/data_reader_HDF5.cpp b/src/data_ingestion/readers/data_reader_HDF5.cpp index cc9b150b37b..16960696b1e 100644 --- a/src/data_ingestion/readers/data_reader_HDF5.cpp +++ b/src/data_ingestion/readers/data_reader_HDF5.cpp @@ -358,6 +358,119 @@ void hdf5_data_reader::load_sample(conduit::Node& node, original_path, node[new_pathname]); } + + if (metadata.has_child(HDF5_METADATA_KEY_PARALLEL_IO) && + (conduit_to_string(metadata[HDF5_METADATA_KEY_PARALLEL_IO]) == + HDF5_METADATA_VALUE_TRUE)){ + int nprocs = get_comm()->get_procs_per_trainer(); + // int num_io_partitions = dc::get_number_of_io_partitions(); + // if ((nprocs % dc::get_number_of_io_partitions()) != 0) { + // LBANN_ERROR("nprocs should be divisible by num of io partitions otherwise " + // "this wont work"); + // } + LBANN_MSG("I think that I have ", nprocs, " procs "); + // LBANN_MSG("I think that I have ", nprocs, " procs, and ", num_io_partitions); + + // ) { + // if (pathname == "NodeFeatures") { + // ------------------------------------------------------------------ + // Now read a subset of that 2D array from the HDF5 file. + // Two rows, two columns; total of four elements. + if (!metadata.has_child(HDF5_METADATA_KEY_DIMS)) { + LBANN_ERROR("HDF5 field has requested parallel I/O but does not have a valid dims field."); + } + int n_elts = node[pathname].dtype().number_of_elements(); + conduit::int64_array data_array_dims = metadata[HDF5_METADATA_KEY_DIMS].value(); + int constexpr rnrows = 2; //4; + int constexpr rncols = 2; + int constexpr reltcount = rnrows * rncols; + // int constexpr reltcount = n_elts;//rnrows * rncols; + //get_comm()->get_rank_in_trainer() + int rank = get_comm()->get_rank_in_trainer(); + const int ranks = get_comm()->get_procs_per_trainer(); + std::cout << "How many ranks do I see " << rank << " out of " << ranks << std::endl; + // As noted earlier, HDF5 orders all dimensions from slowest- to + // fastest-varying. In this two-dimensional example, row (or y-index) + // always comes before column (or x-index). If working with a 3D + // dataset, level (or z-index) would come before row. + int p_sizes[1]{rnrows * rncols}; + // int p_sizes[2]{rnrows, rncols}; + // offset to row 0, column 1 + int p_offsets[1]{0}; + // int p_offsets[2]{0, 1}; + // int p_offsets[2]{0, 1}; + // read every row, every other column + int p_strides[1]{2}; + // int p_strides[2]{1, 2}; + std::cout << "Here is p_sizes" << p_sizes[0] << " and " << p_sizes[1] << std::endl; + // Store pointers to these parameters in the read_opts Node + conduit::Node read_opts; + //read_opts["sizes"].set(p_sizes, rank); + read_opts["sizes"].set_external(p_sizes, 1 /*2*//*ranks*/); + read_opts["offsets"].set_external(p_offsets, 1/*2*//*ranks*/); + read_opts["strides"].set_external(p_strides, 1/*2*//*ranks*/); + //read_opts["foo"] + // conduit::DataType read_opts_too; + // read_opts_too.set_number_of_elements(conduit::index_t(p_offsets)); + // read_opts_too.set_offset(conduit::index_t(p_offsets)); + // read_opts_too.set_strides(conduit::index_t(p_strides)); + + //read_opts["sizes"][{4,5}, 0] + +std::cout << "\nHDF5 Options for reading the array:" << std::endl; +read_opts.print(); + +// hid_t h5_status = conduit::relay::io::hdf5_read_dataspace(file_handle, original_path, conduit::DataType::double, read_opts); +// CONDUIT_CHECK_HDF5_ERROR(h5_status, +// "Error selecting hyper slab from HDF5 dataspace: "); +// "Error selecting hyper slab from HDF5 dataspace: " << h5_dspace_id); + + +// Read some of the 2D array in the HDF5 file into an array of doubles +conduit::Node read_data; +double p_data_out[reltcount]{42, 42, 42, 42}; +read_data.set_external(p_data_out, reltcount); +std::string in_path; +//in_path.append(fname).append(":").append(pathname); + std::cout << "Reading original path " << original_path << std::endl; +conduit::relay::io::hdf5_read(file_handle, original_path, read_opts, read_data); + +std::cout << "\nHDF5 Read results:" << std::endl; +read_data.info().print(); +read_data.print_detailed(); + + if (get_comm()->am_trainer_master()) { +// Show what we read + std::stringstream msg; +msg << "Trainer master Subset of array, read from '" << in_path << "'" << std::endl; +for (int j = 0; j < rnrows; ++j) +{ + for (int i = 0; i < rncols; ++i) + { + msg << std::right << std::setw(8) << p_data_out[j * rncols + i]; + } + msg << std::endl; +} + std::cout << msg.str(); + }else { + std::stringstream msg; +msg << "Other rank Subset of array, read from '" << in_path << "'" << std::endl; +for (int j = 0; j < rnrows; ++j) +{ + for (int i = 0; i < rncols; ++i) + { + msg << std::right << std::setw(8) << p_data_out[j * rncols + i]; + } + msg << std::endl; +} + std::cout << msg.str(); + } + } + // conduit::Node io_about; + // conduit::relay::io::about(io_about); + // std::cout << "\nRelay I/O Info and Default Options:" << std::endl; + // std::cout << io_about.to_yaml() << std::endl; + // https://llnl-conduit.readthedocs.io/en/latest/relay_io.html // Check that the dimensions of each node matches its metadata if (metadata.has_child(HDF5_METADATA_KEY_DIMS)) { int n_elts = node[pathname].dtype().number_of_elements(); diff --git a/src/data_ingestion/readers/unit_test/CMakeLists.txt b/src/data_ingestion/readers/unit_test/CMakeLists.txt index 2dd4e4a1709..e6037286933 100644 --- a/src/data_ingestion/readers/unit_test/CMakeLists.txt +++ b/src/data_ingestion/readers/unit_test/CMakeLists.txt @@ -33,6 +33,7 @@ set_full_path(THIS_DIR_MPI_CATCH2_TEST_FILES data_reader_smiles_fetch_datum_test.cpp data_reader_smiles_sample_list_test.cpp data_reader_HDF5_3D_UNet_file_ingest.cpp + data_reader_HDF5_conduit_file_ingest.cpp data_reader_HDF5_cosmoflow_file_ingest.cpp data_reader_HDF5_c3fd_file_ingest.cpp data_reader_HDF5_hrrl_file_ingest.cpp diff --git a/src/data_ingestion/readers/unit_test/data_reader_HDF5_c3fd_file_ingest.cpp b/src/data_ingestion/readers/unit_test/data_reader_HDF5_c3fd_file_ingest.cpp index 9d22094b8ec..662b811837d 100644 --- a/src/data_ingestion/readers/unit_test/data_reader_HDF5_c3fd_file_ingest.cpp +++ b/src/data_ingestion/readers/unit_test/data_reader_HDF5_c3fd_file_ingest.cpp @@ -47,20 +47,82 @@ TEST_CASE("HDF5 C3FD data reader file ingest tests", "[.filesystem][data_reader][hdf5][c3fd][file_ingest]") { // initialize stuff (boilerplate) + auto& comm = unit_test::utilities::current_world_comm(); lbann::init_random(0, 2); lbann::init_data_seq_random(42); conduit::Node node; node.parse(hdf5_c3fd_data_sample, "yaml"); + std::cout << "Here is the file that we are reading" << std::endl; + node.info().print(); + node.print_detailed(); + auto hdf5_dr = std::make_unique(); + hdf5_dr->set_comm(&comm); DataReaderHDF5WhiteboxTester white_box_tester; + #if 0 // create working directory std::string work_dir = create_test_directory("hdf5_reader"); + // open hdf5 file and obtain a handle + hid_t h5_id = + conduit::relay::io::hdf5_create_file(work_dir + "/C3FD_test_sample.hdf5"); + // write data + conduit::relay::io::hdf5_write(node, h5_id); + // close our file + conduit::relay::io::hdf5_close_file(h5_id); + + hid_t h5_fid = conduit::relay::io::hdf5_open_file_for_read( + work_dir + "/C3FD_test_sample.hdf5"); + const std::string original_path = "/RUN_ID/000000001"; + const std::string new_pathname = "000000001"; + + // Setup the data schema for this C3FD data set + conduit::Node& data_schema = white_box_tester.get_data_schema(*hdf5_dr); + data_schema.parse(hdf5_c3fd_data_schema, "yaml"); + #endif +#if 0 SECTION("HDF5 C3FD write and then read to HDF5 file") { + // Read in the experiment schema and setup the data reader + conduit::Node& experiment_schema = + white_box_tester.get_experiment_schema(*hdf5_dr); + experiment_schema.parse(hdf5_c3fd_experiment_schema, "yaml"); + // experiment_schema.print(); + white_box_tester.parse_schemas(*hdf5_dr); + + // white_box_tester.print_metadata(*hdf5_dr); + + conduit::Node test_node; + white_box_tester.load_sample(*hdf5_dr, + test_node[new_pathname], + h5_fid, + original_path); + + // Check to see if the HRRL sample can be read via the data + // reader's load_sample method. Note that this will coerce and + // normalize all data fields as specified in the data set and + // experiment schemas. + std::vector fields = {"NodeFeatures", + "EdgeFeatures", + "COOList"}; + check_node_fields(node, + test_node, + data_schema, + fields, + original_path, + new_pathname); + } +#endif +#if 1 + SECTION("HDF5 Parallel I/O C3FD write and then read to HDF5 file") + { +#if 1 + // create working directory + std::string work_dir = create_test_directory("hdf5_reader"); + // open hdf5 file and obtain a handle hid_t h5_id = conduit::relay::io::hdf5_create_file(work_dir + "/C3FD_test_sample.hdf5"); @@ -77,15 +139,16 @@ TEST_CASE("HDF5 C3FD data reader file ingest tests", // Setup the data schema for this C3FD data set conduit::Node& data_schema = white_box_tester.get_data_schema(*hdf5_dr); data_schema.parse(hdf5_c3fd_data_schema, "yaml"); + #endif // Read in the experiment schema and setup the data reader conduit::Node& experiment_schema = white_box_tester.get_experiment_schema(*hdf5_dr); - experiment_schema.parse(hdf5_c3fd_experiment_schema, "yaml"); + experiment_schema.parse(hdf5_c3fd_experiment_parallel_io_schema, "yaml"); // experiment_schema.print(); white_box_tester.parse_schemas(*hdf5_dr); - // white_box_tester.print_metadata(*hdf5_dr); + white_box_tester.print_metadata(*hdf5_dr); conduit::Node test_node; white_box_tester.load_sample(*hdf5_dr, @@ -93,7 +156,7 @@ TEST_CASE("HDF5 C3FD data reader file ingest tests", h5_fid, original_path); - // Check to see if the HRRL sample can be read via the data + // Check to see if the C3FD sample can be read via the data // reader's load_sample method. Note that this will coerce and // normalize all data fields as specified in the data set and // experiment schemas. @@ -107,4 +170,5 @@ TEST_CASE("HDF5 C3FD data reader file ingest tests", original_path, new_pathname); } +#endif } diff --git a/src/data_ingestion/readers/unit_test/data_reader_HDF5_cosmoflow_file_ingest.cpp b/src/data_ingestion/readers/unit_test/data_reader_HDF5_cosmoflow_file_ingest.cpp index d86af958a79..29755eb9681 100644 --- a/src/data_ingestion/readers/unit_test/data_reader_HDF5_cosmoflow_file_ingest.cpp +++ b/src/data_ingestion/readers/unit_test/data_reader_HDF5_cosmoflow_file_ingest.cpp @@ -46,11 +46,13 @@ TEST_CASE("HDF5 CosmoFlow data reader file ingest tests", "[.filesystem][data_reader][hdf5][cosmoflow][file_ingest]") { + auto& comm = unit_test::utilities::current_world_comm(); // initialize stuff (boilerplate) lbann::init_random(0, 2); lbann::init_data_seq_random(42); auto hdf5_dr = std::make_unique(); + hdf5_dr->set_comm(&comm); DataReaderHDF5WhiteboxTester white_box_tester; const std::string original_path = "000000001"; diff --git a/src/data_ingestion/readers/unit_test/test_data/hdf5_c3fd_test_data_and_schemas.yaml b/src/data_ingestion/readers/unit_test/test_data/hdf5_c3fd_test_data_and_schemas.yaml index dae27821ec6..8b7a89cdb72 100644 --- a/src/data_ingestion/readers/unit_test/test_data/hdf5_c3fd_test_data_and_schemas.yaml +++ b/src/data_ingestion/readers/unit_test/test_data/hdf5_c3fd_test_data_and_schemas.yaml @@ -84,3 +84,20 @@ COOList: pack: "coo_list" coerce: "float" )FOO"; + +const std::string hdf5_c3fd_experiment_parallel_io_schema = R"FOO( +NodeFeatures: + metadata: + pack: "node_fts" + coerce: "float" + parallel_io: true + parallel_split: [0, 1] +EdgeFeatures: + metadata: + pack: "edge_fts" + coerce: "float" +COOList: + metadata: + pack: "coo_list" + coerce: "float" +)FOO"; diff --git a/src/data_ingestion/readers/unit_test/test_data/hdf5_cosmoflow_test_data_and_schemas.yaml b/src/data_ingestion/readers/unit_test/test_data/hdf5_cosmoflow_test_data_and_schemas.yaml index e9ba8e74989..2e5682e465c 100644 --- a/src/data_ingestion/readers/unit_test/test_data/hdf5_cosmoflow_test_data_and_schemas.yaml +++ b/src/data_ingestion/readers/unit_test/test_data/hdf5_cosmoflow_test_data_and_schemas.yaml @@ -60,6 +60,19 @@ unitPar: ordering: 1 )FOO"; +const std::string hdf5_cosmoflow_experiment_parallel_io_schema = R"FOO( +full: + metadata: + coerce: float + pack: volume + parallel_io: true + hsize_t offset[4] = {0, m_hyperslab_dims[1] * spatial_offset, 0, 0}; +unitPar: + metadata: + coerce: float + pack: params +)FOO"; + const std::string hdf5_cosmoflow_experiment_schema = R"FOO( full: metadata: diff --git a/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp b/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp index 29358651f3a..ce49b7001db 100644 --- a/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp +++ b/src/execution_algorithms/unit_test/training_algorithm_factory_test.cpp @@ -31,7 +31,7 @@ #include "lbann/utils/make_abstract.hpp" #include -#include +// #include #include #include "lbann/proto/training_algorithm.pb.h" @@ -87,7 +87,7 @@ parameters { // Protobuf will log stuff to stderr when errors occur. We know // these errors will occur, but they'll add potentially confusing // output to the catch run, so we silence them in this section - google::protobuf::LogSilencer tmp_silence_pb_logs; + // google::protobuf::LogSilencer tmp_silence_pb_logs; lbann_data::TrainingAlgorithm algo_msg; REQUIRE_FALSE( @@ -105,7 +105,7 @@ parameters { } })proto"; - google::protobuf::LogSilencer tmp_silence_pb_logs; + // google::protobuf::LogSilencer tmp_silence_pb_logs; lbann_data::TrainingAlgorithm algo_msg; REQUIRE_FALSE( pb::TextFormat::ParseFromString(unknown_class_prototext, &algo_msg)); @@ -123,7 +123,7 @@ parameters { } })proto"; - google::protobuf::LogSilencer tmp_silence_pb_logs; + // google::protobuf::LogSilencer tmp_silence_pb_logs; lbann_data::TrainingAlgorithm algo_msg; REQUIRE_FALSE( pb::TextFormat::ParseFromString(bad_class_prototext, &algo_msg)); From 0c1aa7065ff5148742f1eca3bbaec6b5b8998868 Mon Sep 17 00:00:00 2001 From: "Brian C. Van Essen" Date: Mon, 26 Feb 2024 18:07:06 -0800 Subject: [PATCH 3/3] Added missing file. --- .../data_reader_HDF5_conduit_file_ingest.cpp | 516 ++++++++++++++++++ 1 file changed, 516 insertions(+) create mode 100644 src/data_ingestion/readers/unit_test/data_reader_HDF5_conduit_file_ingest.cpp diff --git a/src/data_ingestion/readers/unit_test/data_reader_HDF5_conduit_file_ingest.cpp b/src/data_ingestion/readers/unit_test/data_reader_HDF5_conduit_file_ingest.cpp new file mode 100644 index 00000000000..06819b8aa5b --- /dev/null +++ b/src/data_ingestion/readers/unit_test/data_reader_HDF5_conduit_file_ingest.cpp @@ -0,0 +1,516 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. +// Written by the LBANN Research Team (B. Van Essen, et al.) listed in +// the CONTRIBUTORS file. +// +// LLNL-CODE-697807. +// All rights reserved. +// +// This file is part of LBANN: Livermore Big Artificial Neural Network +// Toolkit. For details, see http://software.llnl.gov/LBANN or +// https://github.com/LLNL/LBANN. +// +// Licensed under the Apache License, Version 2.0 (the "Licensee"); you +// may not use this file except in compliance with the License. You may +// obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the license. +//////////////////////////////////////////////////////////////////////////////// +#include "Catch2BasicSupport.hpp" + +#include "MPITestHelpers.hpp" +#include "TestHelpers.hpp" +#include "lbann/proto/lbann.pb.h" +#include "lbann/proto/proto_common.hpp" +#include +#include "lbann/utils/conduit_extensions.hpp" + +#include +#include +// #include +// #include "conduit_relay.hpp" +#include +//#include "hdf5.h" +#include +#include +#include + +#include "./data_reader_common_HDF5_test_utils.hpp" +#include "./data_reader_common_catch2.hpp" + +#include "./test_data/hdf5_c3fd_test_data_and_schemas.yaml" +#include "lbann/data_ingestion/readers/data_reader_HDF5.hpp" + +//----------------------------------------------------------------------------- +// helper to create an HDF5 dataset +herr_t +create_hdf5_nd_dataset(std::string fname, std::string path, int rank, int const * dims, + hid_t mem_type, hid_t file_type, void * to_write) +{ + hid_t file; + herr_t status = 0; + + // initialize count and dimensions + std::vector hdims(rank); + for (int d = 0; d < rank; ++d) + { + hdims[d] = dims[d]; + } + + // create the file + file = H5Fcreate(fname.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Create, init a dataspace for the dataset + hid_t dataset, dataspace; + dataspace = H5Screate_simple(rank, hdims.data(), NULL); + + // Create, init the dataset. Element type is double. + dataset = H5Dcreate(file, path.c_str(), file_type, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset, mem_type, H5S_ALL, H5S_ALL, H5P_DEFAULT, to_write); + status = H5Dclose(dataset); + + // close the dataspace and file + status = H5Sclose(dataspace); + status = H5Fclose(file); + + return status; +} + +TEST_CASE("HDF5 Conduit Hyperslab data reader file ingest tests", + "[.filesystem][data_reader][hdf5][conduit][file_ingest]") +{ + // initialize stuff (boilerplate) + auto& comm = unit_test::utilities::current_world_comm(); + lbann::init_random(0, 2); + lbann::init_data_seq_random(42); + + conduit::Node node; + node.parse(hdf5_c3fd_data_sample, "yaml"); + + auto hdf5_dr = std::make_unique(); + hdf5_dr->set_comm(&comm); + DataReaderHDF5WhiteboxTester white_box_tester; + +#if 1 + SECTION("HDF5 Parallel I/O Conduit read from HDF5 file") + { + // create working directory + std::string work_dir = create_test_directory("hdf5_reader"); + + // Example from https://llnl-conduit.readthedocs.io/en/latest/relay_io.html#hdf5-hyperslabs + // ------------------------------------------------------------------ + // Create a 2D array and show it off. + int constexpr rank = 2; + int constexpr nrows = 3; + int constexpr ncols = 4; + int constexpr eltcount = nrows * ncols; + double data[eltcount]; + for (int i = 0; i < eltcount; ++i) + { + data[i] = i; + } + + std::cout << "Array, in memory:\n"; + for (int j = 0; j < nrows; ++j) + { + for (int i = 0; i < ncols; ++i) + { + std::cout << std::right << std::setw(4) << data[j * ncols + i]; + } + std::cout << std::endl; + } + + // Create an HDF5 file with a 2D array. + herr_t status = 0; + // HDF5 dimensions are ordered from slowest- to fastest-varying. + // This is the same as C and C++ nested arrays and opposite from + // many people's geometric intuition. + hsize_t hdims[rank]{ nrows, ncols }; + + const char* fname = "t_relay_io_hdf5_read_ndarray.hdf5"; + hid_t file = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Create, initialize a dataspace for the dataset + hid_t dataset, dataspace; + dataspace = H5Screate_simple(rank, hdims, NULL); + + // Create, initialize the dataset. Element type is double. + const char* dsname = "twoDarray"; + dataset = H5Dcreate(file, dsname, H5T_NATIVE_DOUBLE, dataspace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + status = H5Dclose(dataset); + + +// herr_t H5Sset_extent_simple ( hid_t space_id, +// int rank, +// const hsize_t dims[], +// const hsize_t max[] +// ) + + // close the dataspace and file + status = H5Sclose(dataspace); + status = H5Fclose(file); + + std::cout << "\nsaved array to '" << fname << ":" << dsname << "'" << std::endl; + + + + // ------------------------------------------------------------------ + // Now read a subset of that 2D array from the HDF5 file. + // Two rows, two columns; total of four elements. + int constexpr rnrows = 2; + int constexpr rncols = 2; + int constexpr reltcount = rnrows * rncols; + // As noted earlier, HDF5 orders all dimensions from slowest- to + // fastest-varying. In this two-dimensional example, row (or y-index) + // always comes before column (or x-index). If working with a 3D + // dataset, level (or z-index) would come before row. + int p_sizes[rank]{ rnrows, rncols }; + // offset to row 0, column 1 + int p_offsets[rank]{ 0, 1 }; + // read every row, every other column + int p_strides[rank]{ 1, 2 }; + // Store pointers to these parameters in the read_opts Node + conduit::Node read_opts; + read_opts["sizes"].set_external(p_sizes, rank); + read_opts["offsets"].set_external(p_offsets, rank); + read_opts["strides"].set_external(p_strides, rank); + + std::cout << "\nHDF5 Options for reading the array:" << std::endl; + read_opts.print(); + + // Read some of the 2D array in the HDF5 file into an array of doubles + conduit::Node read_data; + double p_data_out[reltcount]{42, 42, 42, 42}; + read_data.set_external(p_data_out, reltcount); + std::cout << "Check the info befor ereading "<< std::endl; + read_data.info().print(); + std::string in_path; + in_path.append(fname).append(":").append(dsname); + conduit::relay::io::hdf5_read(in_path.c_str(), read_opts, read_data); + std::cout << "Check the info after reading "<< std::endl; + // if(err) { + // std::cout << "I think that is failed." << std::endl; + // } + // CONDUIT_CHECK_HDF5_ERROR(conduit::relay::io::hdf5_read(in_path.c_str(), read_opts, read_data), "Error opening hyperslab file."); + + read_data.info().print(); + read_data.print_detailed(); + + // Show what we read + std::cout << "Subset of array, read from '" << in_path << "'" << std::endl; + double *foo = read_data.value(); + + for (int j = 0; j < rnrows; ++j) + { + for (int i = 0; i < rncols; ++i) + { + std::cout << std::right << std::setw(8) << foo[j * rncols + i]; + } + std::cout << std::endl; + } + for (int j = 0; j < rnrows; ++j) + { + for (int i = 0; i < rncols; ++i) + { + std::cout << std::right << std::setw(8) << p_data_out[j * rncols + i]; + } + std::cout << std::endl; + } + +#if 0 + // open hdf5 file and obtain a handle + hid_t h5_id = + conduit::relay::io::hdf5_create_file(work_dir + "/C3FD_test_sample.hdf5"); + // write data + conduit::relay::io::hdf5_write(node, h5_id); + // close our file + conduit::relay::io::hdf5_close_file(h5_id); + + hid_t h5_fid = conduit::relay::io::hdf5_open_file_for_read( + work_dir + "/C3FD_test_sample.hdf5"); + const std::string original_path = "/RUN_ID/000000001"; + const std::string new_pathname = "000000001"; + + // Setup the data schema for this C3FD data set + conduit::Node& data_schema = white_box_tester.get_data_schema(*hdf5_dr); + data_schema.parse(hdf5_c3fd_data_schema, "yaml"); + + // Read in the experiment schema and setup the data reader + conduit::Node& experiment_schema = + white_box_tester.get_experiment_schema(*hdf5_dr); + experiment_schema.parse(hdf5_c3fd_experiment_parallel_io_schema, "yaml"); + // experiment_schema.print(); + white_box_tester.parse_schemas(*hdf5_dr); + + white_box_tester.print_metadata(*hdf5_dr); + + conduit::Node test_node; + white_box_tester.load_sample(*hdf5_dr, + test_node[new_pathname], + h5_fid, + original_path); + + // Check to see if the HRRL sample can be read via the data + // reader's load_sample method. Note that this will coerce and + // normalize all data fields as specified in the data set and + // experiment schemas. + std::vector fields = {"NodeFeatures", + "EdgeFeatures", + "COOList"}; + check_node_fields(node, + test_node, + data_schema, + fields, + original_path, + new_pathname); +#endif + } + + SECTION("HDF5 Parallel I/O Conduit read from HDF5 file - part 2") + { + // create working directory + std::string work_dir = create_test_directory("hdf5_reader"); + +// create a simple buffer of doubles + conduit::Node n; + + n["full_data"].set(conduit::DataType::c_double(20)); + + double *vin = n["full_data"].value(); + + for(int i=0;i<20;i++) + { + vin[i] = i; + } + + std::cout << "Example Full Data" << std::endl; + + n.print(); + conduit::relay::io::hdf5_write(n,"tout_hdf5_slab_opts"); + // conduit::relay::io::hdf5_write(n,"tout_hdf5_slab_opts.hdf5"); + + // read 10 [1->11) entries (as above test, but using hdf5 read options) + + conduit::Node n_res; + conduit::Node opts; + opts["offset"] = 1; + opts["stride"] = 2; + opts["size"] = 10; + + conduit::Node nload; + conduit::relay::io::hdf5_read("tout_hdf5_slab_opts:full_data",opts,nload); + // conduit::relay::io::hdf5_read("tout_hdf5_slab_opts.hdf5:full_data",opts,nload); + nload.print(); + + + nload.info().print(); + nload.print_detailed(); + + std::cout << "Load Result" << std::endl; + nload.print(); + + double *vload = nload.value(); + for(int i=0;i<10;i++) + { + std::cout << vload[i] << " =?= " << (1.0 + i * 2.0) << std::endl; + // CHECK(vload[i],1.0 + i * 2.0,1e-3); + } + } +#endif + + SECTION("HDF5 Parallel I/O Conduit read from HDF5 file - part 3") + { + int constexpr rank = 2; + int constexpr ncols = 5; + int constexpr nrows = 3; + int constexpr dset_size[rank] = { nrows, ncols }; + int constexpr nelts = ncols * nrows; + + conduit::Node n_in(conduit::DataType::float64(nelts)); + + conduit::float64_array val_in = n_in.value(); + + for(conduit::index_t i=0;i= 0, to crash if the test fails + // ASSERT_GE(status, 0) << "Error creating the HDF5 test dataset."; + + // read in the whole thing + conduit::Node n_whole_out; + + conduit::relay::io::hdf5_read("tout_hdf5_r_2D_array.hdf5:myobj",n_whole_out); + + std::cout << "Read the whole data set (doubles from 0 through 14):\n"; + n_whole_out.print(); + + // should contain ncols x nrows elements + CHECK(nelts == n_whole_out.dtype().number_of_elements()); + + conduit::float64_array val_whole_out = n_whole_out.value(); + + for(conduit::index_t i=0;i offset_ary; + offset_ary.push_back(rrowoff); + offset_ary.push_back(rcoloff); + read_opts["offsets"].set(offset_ary); + + conduit::Node n_out; + + conduit::relay::io::hdf5_read("tout_hdf5_r_2D_array.hdf5:myobj",read_opts,n_out); + + std::cout << "Read partial data set (2 rows, 3 cols, starting at (1, 1)):\n"; + n_out.print_detailed(); + + // should contain ncols x nrows elements + if(rnelts != n_out.dtype().number_of_elements()) { + std::cout << "Number of elements is wrong " << rnelts << " != " << n_out.dtype().number_of_elements() << std::endl; + } + + conduit::float64_array val_out = n_out.value(); + + conduit::index_t offset = ncols * rrowoff; + conduit::index_t linear_idx = 0; + for (conduit::index_t j = 0; j < rnrows; j++) + { + for (conduit::index_t i = 0; i < rncols; i++) + { + CHECK(val_in[offset + rcoloff + i] == val_out[linear_idx]); + linear_idx += 1; + } + offset += ncols; + } + + // make sure we aren't leaking + // EXPECT_EQ(check_h5_open_ids(),DO_NO_HARM); + } + + SECTION("HDF5 Parallel I/O Conduit read from HDF5 file - part 4") + { + // create a simple buffer of doubles + conduit::Node n; + + n["full_data"].set(conduit::DataType::c_double(20)); + + double *vin = n["full_data"].value(); + + for(int i=0;i<20;i++) + { + vin[i] = i; + } + + std::cout << "T4 Example Full Data" << std::endl; + + n.print(); + + conduit::relay::io::hdf5_write(n,"tout_hdf5_slab.hdf5"); + + conduit::Node nload; + nload.set(conduit::DataType::c_double(10)); + + double *vload = nload.value(); + + // stride to read every other entry into compact storage + conduit::relay::io::hdf5_read_dset_slab("tout_hdf5_slab.hdf5", + "full_data", + conduit::DataType::c_double(10, + sizeof(double), // offset 1 double + sizeof(double)*2, //stride 2 doubles + sizeof(double)), + vload); + std::cout << "T4 Load Result" << std::endl;; + nload.print(); + + for(int i=0;i<10;i++) + { + CHECK(vload[i] == 1.0 + i * 2.0); + } + } + + SECTION("HDF5 Parallel I/O Conduit read from HDF5 file - part 5") + { + // create a simple buffer of doubles + conduit::Node n; + + n["full_data"].set(conduit::DataType::c_double(20)); + + double *vin = n["full_data"].value(); + + for(int i=0;i<20;i++) + { + vin[i] = i; + } + + std::cout << "Example Full Data" << std::endl; + + n.print(); + conduit::relay::io::hdf5_write(n,"tout_hdf5_slab_opts_take2"); + // conduit::relay::io::hdf5_write(n,"tout_hdf5_slab_opts_take2.hdf5"); + + // read 10 [1->11) entries (as above test, but using hdf5 read options) + + conduit::Node n_res; + conduit::Node opts; + opts["offset"] = 1; + opts["stride"] = 2; + opts["size"] = 10; + + conduit::Node nload; + conduit::relay::io::hdf5_read("tout_hdf5_slab_opts_take2:full_data",opts,nload); + // conduit::relay::io::hdf5_read("tout_hdf5_slab_opts_take2.hdf5:full_data",opts,nload); + nload.print(); + + std::cout << "Load Result"<< std::endl; + nload.print(); + + double *vload = nload.value(); + for(int i=0;i<10;i++) + { + CHECK(vload[i] == 1.0 + i * 2.0); + // EXPECT_NEAR(vload[i],1.0 + i * 2.0,1e-3); + } + + } +}