diff --git a/CMakeLists.txt b/CMakeLists.txt index 32007025a6..f84dce16d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -805,6 +805,7 @@ if(openPMD_BUILD_TESTING) test/Files_SerialIO/close_and_reopen_test.cpp test/Files_SerialIO/filebased_write_test.cpp test/Files_SerialIO/issue_1744_unique_ptrs_at_close_time.cpp + test/Files_SerialIO/components_without_extent.cpp ) elseif(${test_name} STREQUAL "ParallelIO" AND openPMD_HAVE_MPI) list(APPEND ${out_list} diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index 123b0a58e0..205047160d 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -94,6 +94,11 @@ Using the Streaming API (i.e. ``SeriesInterface::readIteration()``) will do this Parsing eagerly might be very expensive for a Series with many iterations, but will avoid bugs by forgotten calls to ``Iteration::open()``. In complex environments, calling ``Iteration::open()`` on an already open environment does no harm (and does not incur additional runtime cost for additional ``open()`` calls). +As of openPMD-api 0.17.0, the parser verifies that all records within a mesh or within a particle species have consistent shapes / extents. +This is used for filling in the shape for constant components that do not define it. +In order to skip this check in the error case, the key ``{"verify_homogeneous_extents": false}`` may be set (alternatively ``export OPENPMD_VERIFY_HOMOGENEOUS_EXTENTS=0`` will do the same). +This will help read datasets with inconsistent metadata definitions. + The key ``resizable`` can be passed to ``Dataset`` options. It if set to ``{"resizable": true}``, this declares that it shall be allowed to increased the ``Extent`` of a ``Dataset`` via ``resetDataset()`` at a later time, i.e., after it has been first declared (and potentially written). For HDF5, resizable Datasets come with a performance penalty. diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp index 80513683f9..507b68d350 100644 --- a/include/openPMD/Dataset.hpp +++ b/include/openPMD/Dataset.hpp @@ -54,9 +54,21 @@ class Dataset */ JOINED_DIMENSION = std::numeric_limits::max(), /** - * Some backends (i.e. JSON and TOML in template mode) support the - * creation of dataset with undefined datatype and extent. - * The extent should be given as {UNDEFINED_EXTENT} for that. + * In some use cases, the extent needs not be specified. + * For these, specify Extent{UNDEFINED_EXTENT}. + * Use cases: + * + * 1. Some backends (i.e. JSON and TOML in template mode) support the + * creation of dataset with undefined datatype and extent. + * The extent should be given as {UNDEFINED_EXTENT} for that. + * 2. With openPMD 2.0, the shape of constant components may be omitted + * in writing if it is defined somewhere else as part + * of the same Mesh / Species. + * (https://github.com/openPMD/openPMD-standard/pull/289) + * When reading such datasets, the openPMD-api will try to fill in + * the missing extents, so the extent for constistently-defined + * datasets should ideally not be reported by the read-side API + * as undefined. */ UNDEFINED_EXTENT = std::numeric_limits::max() - 1 }; @@ -87,5 +99,8 @@ class Dataset std::optional joinedDimension() const; static std::optional joinedDimension(Extent const &); + + bool undefinedExtent() const; + static bool undefinedExtent(Extent const &); }; } // namespace openPMD diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 29b3de8bff..681343c0df 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -314,6 +314,7 @@ class AbstractIOHandler internal::SeriesStatus m_seriesStatus = internal::SeriesStatus::Default; IterationEncoding m_encoding = IterationEncoding::groupBased; OpenpmdStandard m_standard = auxiliary::parseStandard(getStandardDefault()); + bool m_verify_homogeneous_extents = true; }; // AbstractIOHandler } // namespace openPMD diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index 67a6be50ab..0aa35f3a74 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -22,6 +22,7 @@ #include "openPMD/ChunkInfo.hpp" #include "openPMD/Dataset.hpp" +#include "openPMD/Error.hpp" #include "openPMD/IterationEncoding.hpp" #include "openPMD/Streaming.hpp" #include "openPMD/auxiliary/Export.hpp" diff --git a/include/openPMD/Record.hpp b/include/openPMD/Record.hpp index 791c4c15f8..246b458b5f 100644 --- a/include/openPMD/Record.hpp +++ b/include/openPMD/Record.hpp @@ -53,7 +53,8 @@ class Record : public BaseRecord void flush_impl(std::string const &, internal::FlushParams const &) override; - void read(); + + [[nodiscard]] internal::HomogenizeExtents read(); }; // Record template diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp index ee29a6d7fa..f8791569b9 100644 --- a/include/openPMD/RecordComponent.hpp +++ b/include/openPMD/RecordComponent.hpp @@ -165,6 +165,8 @@ class RecordComponent : public BaseRecordComponent * * Shrinking any dimension's extent. * * Changing the number of dimensions. * + * The dataset extent may be empty to indicate undefined extents. + * * Backend support for resizing datasets: * * JSON: Supported * * ADIOS2: Supported as of ADIOS2 2.7.0 @@ -544,6 +546,26 @@ OPENPMD_protected void verifyChunk(Datatype, Offset const &, Extent const &) const; }; // RecordComponent +namespace internal +{ + // Must put this after the definition of RecordComponent due to the + // deque + struct HomogenizeExtents + { + std::deque without_extent; + std::optional retrieved_extent; + bool verify_homogeneous_extents = true; + + explicit HomogenizeExtents(); + HomogenizeExtents(bool verify_homogeneous_extents); + + void check_extent(Attributable const &callsite, RecordComponent &); + auto merge(Attributable const &callsite, HomogenizeExtents) + -> HomogenizeExtents &; + void homogenize(Attributable const &callsite) &&; + }; +} // namespace internal + } // namespace openPMD #include "RecordComponent.tpp" diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index 732b2d1b5c..172d302c48 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -20,6 +20,7 @@ */ #pragma once +#include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/OutOfRangeMsg.hpp" @@ -30,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +57,7 @@ namespace internal { class IterationData; class SeriesData; + struct HomogenizeExtents; class SharedAttributableData { @@ -106,6 +109,7 @@ namespace internal friend class openPMD::Attributable; using SharedData_t = std::shared_ptr; + using A_MAP = SharedData_t::element_type::A_MAP; public: AttributableData(); @@ -152,6 +156,32 @@ namespace internal std::shared_ptr(self, [](auto const *) {})); return res; } + + inline auto attributes() -> A_MAP & + { + return operator*().m_attributes; + } + [[nodiscard]] inline auto attributes() const -> A_MAP const & + { + return operator*().m_attributes; + } + [[nodiscard]] inline auto readAttribute(std::string const &name) const + -> Attribute const & + { + auto const &attr = attributes(); + if (auto it = attr.find(name); it != attr.end()) + { + return it->second; + } + else + { + throw error::ReadError( + error::AffectedObject::Attribute, + error::Reason::NotFound, + std::nullopt, + "Not found: '" + name + "'."); + } + } }; template @@ -209,6 +239,7 @@ class Attributable friend T &internal::makeOwning(T &self, Series); friend class StatefulSnapshotsContainer; friend class internal::AttributableData; + friend struct internal::HomogenizeExtents; protected: // tag for internal constructor diff --git a/src/Dataset.cpp b/src/Dataset.cpp index a56c566805..f0c39cfc9d 100644 --- a/src/Dataset.cpp +++ b/src/Dataset.cpp @@ -95,4 +95,13 @@ std::optional Dataset::joinedDimension(Extent const &extent) } return res; } + +bool Dataset::undefinedExtent() const +{ + return undefinedExtent(extent); +} +bool Dataset::undefinedExtent(Extent const &e) +{ + return e.size() == 1 && e.at(0) == Dataset::UNDEFINED_EXTENT; +} } // namespace openPMD diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 44e862ffe2..0e8b92cd47 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -802,6 +802,12 @@ void ADIOS2IOHandlerImpl::createDataset( "only is not possible."); } + if (Dataset::undefinedExtent(parameters.extent)) + { + throw error::OperationUnsupportedInBackend( + "ADIOS2", "No support for Datasets with undefined extent."); + } + if (!writable->written) { /* Sanitize name */ @@ -962,6 +968,12 @@ void ADIOS2IOHandlerImpl::extendDataset( VERIFY_ALWAYS( access::write(m_handler->m_backendAccess), "[ADIOS2] Cannot extend datasets in read-only mode."); + if (Dataset::undefinedExtent(parameters.extent)) + { + throw error::OperationUnsupportedInBackend( + "ADIOS2", "No support for Datasets with undefined extent."); + } + setAndGetFilePosition(writable); auto file = refreshFileFromParent(writable, /* preferParentFile = */ false); std::string name = nameOfVariable(writable); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 010b66865c..0c3b957478 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -475,6 +475,11 @@ void HDF5IOHandlerImpl::createDataset( error::throwOperationUnsupportedInBackend( "HDF5", "Joined Arrays currently only supported in ADIOS2"); } + else if (Dataset::undefinedExtent(parameters.extent)) + { + throw error::OperationUnsupportedInBackend( + "HDF5", "No support for Datasets with undefined extent."); + } if (!writable->written) { @@ -845,6 +850,11 @@ void HDF5IOHandlerImpl::extendDataset( error::throwOperationUnsupportedInBackend( "HDF5", "Joined Arrays currently only supported in ADIOS2"); } + else if (Dataset::undefinedExtent(parameters.extent)) + { + throw error::OperationUnsupportedInBackend( + "HDF5", "No support for Datasets with undefined extent."); + } auto res = getFile(writable); if (!res) diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 64cfaa39d0..e3561d1387 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -638,6 +638,9 @@ void Iteration::readMeshes(std::string const &meshesPath) IOHandler()->enqueue(IOTask(&m, aList)); IOHandler()->flush(internal::defaultFlushParams); + // Find constant scalar meshes. shape generally required for meshes, + // shape also required for scalars. + // https://github.com/openPMD/openPMD-standard/pull/289 auto att_begin = aList.attributes->begin(); auto att_end = aList.attributes->end(); auto value = std::find(att_begin, att_end, "value"); diff --git a/src/Mesh.cpp b/src/Mesh.cpp index c5cdefe483..e74caddd73 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -21,6 +21,7 @@ #include "openPMD/Mesh.hpp" #include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" +#include "openPMD/RecordComponent.hpp" #include "openPMD/Series.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/UnitDimension.hpp" @@ -430,6 +431,8 @@ void Mesh::flush_impl( void Mesh::read() { + internal::HomogenizeExtents homogenizeExtents( + IOHandler()->m_verify_homogeneous_extents); internal::EraseStaleEntries map{*this}; using DT = Datatype; @@ -579,6 +582,7 @@ void Mesh::read() if (scalar()) { T_RecordComponent::read(); + homogenizeExtents.check_extent(*this, *this); } else { @@ -603,7 +607,9 @@ void Mesh::read() << "' and will skip it due to read error:\n" << err.what() << std::endl; map.forget(component); + continue; } + homogenizeExtents.check_extent(*this, rc); } Parameter dList; @@ -630,10 +636,14 @@ void Mesh::read() << "' and will skip it due to read error:\n" << err.what() << std::endl; map.forget(component); + continue; } + homogenizeExtents.check_extent(*this, rc); } } + std::move(homogenizeExtents).homogenize(*this); + readBase(); readAttributes(ReadMode::FullyReread); diff --git a/src/ParticleSpecies.cpp b/src/ParticleSpecies.cpp index 4006cc82ba..e908c4fc1d 100644 --- a/src/ParticleSpecies.cpp +++ b/src/ParticleSpecies.cpp @@ -19,6 +19,7 @@ * If not, see . */ #include "openPMD/ParticleSpecies.hpp" +#include "openPMD/RecordComponent.hpp" #include "openPMD/Series.hpp" #include "openPMD/auxiliary/DerefDynamicCast.hpp" #include "openPMD/backend/Writable.hpp" @@ -35,6 +36,8 @@ ParticleSpecies::ParticleSpecies() void ParticleSpecies::read() { + internal::HomogenizeExtents homogenizeExtents( + IOHandler()->m_verify_homogeneous_extents); /* obtain all non-scalar records */ Parameter pList; IOHandler()->enqueue(IOTask(this, pList)); @@ -76,17 +79,17 @@ void ParticleSpecies::read() auto att_begin = aList.attributes->begin(); auto att_end = aList.attributes->end(); auto value = std::find(att_begin, att_end, "value"); - auto shape = std::find(att_begin, att_end, "shape"); - if (value != att_end && shape != att_end) + if (value != att_end) { RecordComponent &rc = r; IOHandler()->enqueue(IOTask(&rc, pOpen)); IOHandler()->flush(internal::defaultFlushParams); rc.get().m_isConstant = true; } + internal::HomogenizeExtents recordExtents; try { - r.read(); + recordExtents = r.read(); } catch (error::ReadError const &err) { @@ -95,7 +98,9 @@ void ParticleSpecies::read() << err.what() << std::endl; map.forget(record_name); + continue; } + homogenizeExtents.merge(*this, std::move(recordExtents)); } } @@ -115,6 +120,7 @@ void ParticleSpecies::read() Parameter dOpen; for (auto const &record_name : *dList.datasets) { + internal::HomogenizeExtents recordExtents; try { Record &r = map[record_name]; @@ -127,7 +133,7 @@ void ParticleSpecies::read() rc.setWritten(false, Attributable::EnqueueAsynchronously::No); rc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent)); rc.setWritten(true, Attributable::EnqueueAsynchronously::No); - r.read(); + recordExtents = r.read(); } catch (error::ReadError const &err) { @@ -138,9 +144,13 @@ void ParticleSpecies::read() map.forget(record_name); //(*this)[record_name].erase(RecordComponent::SCALAR); // this->erase(record_name); + continue; } + homogenizeExtents.merge(*this, std::move(recordExtents)); } + std::move(homogenizeExtents).homogenize(*this); + readAttributes(ReadMode::FullyReread); } diff --git a/src/Record.cpp b/src/Record.cpp index 7d41fce5c2..b3bb57a700 100644 --- a/src/Record.cpp +++ b/src/Record.cpp @@ -19,7 +19,9 @@ * If not, see . */ #include "openPMD/Record.hpp" +#include "openPMD/Error.hpp" #include "openPMD/RecordComponent.hpp" +#include "openPMD/ThrowError.hpp" #include "openPMD/UnitDimension.hpp" #include "openPMD/backend/BaseRecord.hpp" @@ -103,21 +105,30 @@ void Record::flush_impl( } } -void Record::read() +auto Record::read() -> internal::HomogenizeExtents { + internal::HomogenizeExtents res(IOHandler()->m_verify_homogeneous_extents); + auto check_extent = [&](RecordComponent &rc) { + res.check_extent(*this, rc); + }; if (scalar()) { - /* using operator[] will incorrectly update parent */ - try - { - T_RecordComponent::read(/* require_unit_si = */ true); - } - catch (error::ReadError const &err) - { - std::cerr << "Cannot read scalar record component and will skip it " - "due to read error:\n" - << err.what() << std::endl; - } + [&]() { + /* using operator[] will incorrectly update parent */ + try + { + T_RecordComponent::read(/* require_unit_si = */ true); + } + catch (error::ReadError const &err) + { + std::cerr + << "Cannot read scalar record component and will skip it " + "due to read error:\n" + << err.what() << std::endl; + return; // from lambda + } + check_extent(*this); + }(); } else { @@ -142,7 +153,9 @@ void Record::read() << "' and will skip it due to read error:\n" << err.what() << std::endl; this->container().erase(component); + continue; } + check_extent(rc); } Parameter dList; @@ -169,13 +182,16 @@ void Record::read() << "' and will skip it due to read error:\n" << err.what() << std::endl; this->container().erase(component); + continue; } + check_extent(rc); } } readBase(); readAttributes(ReadMode::FullyReread); + return res; } template class BaseRecord; diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index d5420ea3b8..be1903d6da 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -24,7 +24,9 @@ #include "openPMD/Error.hpp" #include "openPMD/IO/Format.hpp" #include "openPMD/Series.hpp" +#include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Memory.hpp" +#include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/backend/Attributable.hpp" #include "openPMD/backend/BaseRecord.hpp" @@ -55,6 +57,116 @@ namespace internal a.setDirtyRecursive(true); m_chunks.push(std::move(task)); } + + static constexpr char const *note_on_deactivating_this_check = R"( +Note: In order to ignore inconsistent / incomplete extent definitions, +set the environment variable OPENPMD_VERIFY_HOMOGENEOUS_EXTENTS=0 +or alternatively the JSON option {"verify_homogeneous_extents": false}. + )"; + + HomogenizeExtents::HomogenizeExtents() = default; + HomogenizeExtents::HomogenizeExtents(bool verify_homogeneous_extents_in) + : verify_homogeneous_extents(verify_homogeneous_extents_in) + {} + + void HomogenizeExtents::check_extent( + Attributable const &callsite, RecordComponent &rc) + { + auto extent = rc.getExtent(); + if (Dataset::undefinedExtent(extent)) + { + without_extent.emplace_back(rc); + } + else if (retrieved_extent.has_value()) + { + if (verify_homogeneous_extents && extent != *retrieved_extent) + { + std::stringstream error_msg; + error_msg << "Inconsistent extents found for Record '" + << callsite.myPath().openPMDPath() << "': Component '" + << rc.myPath().openPMDPath() << "' has extent"; + auxiliary::write_vec_to_stream(error_msg, extent) << ", but "; + auxiliary::write_vec_to_stream(error_msg, *retrieved_extent) + << " was found previously." + << note_on_deactivating_this_check; + throw error::ReadError( + error::AffectedObject::Group, + error::Reason::UnexpectedContent, + std::nullopt, + error_msg.str()); + } + } + else + { + retrieved_extent = std::move(extent); + } + } + + auto HomogenizeExtents::merge( + Attributable const &callsite, HomogenizeExtents other) + -> HomogenizeExtents & + { + if (retrieved_extent.has_value() && other.retrieved_extent.has_value()) + { + if (verify_homogeneous_extents && + *retrieved_extent != *other.retrieved_extent) + { + std::stringstream error_msg; + error_msg << "Inconsistent extents found for Record '" + << callsite.myPath().openPMDPath() << "': "; + auxiliary::write_vec_to_stream(error_msg, *retrieved_extent) + << " vs. "; + auxiliary::write_vec_to_stream( + error_msg, *other.retrieved_extent) + << "." << note_on_deactivating_this_check; + throw error::ReadError( + error::AffectedObject::Group, + error::Reason::UnexpectedContent, + std::nullopt, + error_msg.str()); + } + } + else if (!retrieved_extent.has_value()) + { + retrieved_extent = std::move(other.retrieved_extent); + } + + for (auto &rc : other.without_extent) + { + this->without_extent.emplace_back(std::move(rc)); + } + return *this; + } + + void HomogenizeExtents::homogenize(Attributable const &callsite) && + { + if (!retrieved_extent.has_value()) + { + if (verify_homogeneous_extents) + { + throw error::ReadError( + error::AffectedObject::Group, + error::Reason::UnexpectedContent, + std::nullopt, + "No extent found for any component contained in '" + + callsite.myPath().openPMDPath() + "'." + + note_on_deactivating_this_check); + } + else + { + return; + } + } + auto &ext = *retrieved_extent; + for (auto &rc : without_extent) + { + rc.setWritten(false, Attributable::EnqueueAsynchronously::No); + rc.resetDataset(Dataset(Datatype::UNDEFINED, ext)); + rc.setWritten(true, Attributable::EnqueueAsynchronously::No); + } + without_extent.clear(); + } + } // namespace internal RecordComponent::RecordComponent() : BaseRecordComponent(NoInit()) @@ -108,7 +220,14 @@ RecordComponent &RecordComponent::resetDataset(Dataset d) throw std::runtime_error("Dataset extent must be at least 1D."); if (d.empty()) { - if (d.dtype != Datatype::UNDEFINED) + if (d.extent.empty()) + { + throw error::Internal( + "A zero-dimensional dataset is not to be considered empty, but " + "undefined. This error is an internal safeguard against future " + "changes that might not consider this."); + } + else if (d.dtype != Datatype::UNDEFINED) { return makeEmpty(std::move(d)); } @@ -155,7 +274,7 @@ Extent RecordComponent::getExtent() const } else { - return {1}; + return {Dataset::UNDEFINED_EXTENT}; } } @@ -280,6 +399,13 @@ void RecordComponent::flush( { setUnitSI(1); } + auto constant_component_write_shape = [&]() { + auto extent = getExtent(); + return !Dataset::undefinedExtent(extent) && + std::none_of(extent.begin(), extent.end(), [](auto val) { + return val == Dataset::JOINED_DIMENSION; + }); + }; if (!written()) { if (constant()) @@ -299,16 +425,20 @@ void RecordComponent::flush( Operation::WRITE_ATT>::ChangesOverSteps::IfPossible; } IOHandler()->enqueue(IOTask(this, aWrite)); - aWrite.name = "shape"; - Attribute a(getExtent()); - aWrite.dtype = a.dtype; - aWrite.resource = a.getResource(); - if (isVBased) + if (constant_component_write_shape()) { - aWrite.changesOverSteps = Parameter< - Operation::WRITE_ATT>::ChangesOverSteps::IfPossible; + + aWrite.name = "shape"; + Attribute a(getExtent()); + aWrite.dtype = a.dtype; + aWrite.resource = a.getResource(); + if (isVBased) + { + aWrite.changesOverSteps = Parameter< + Operation::WRITE_ATT>::ChangesOverSteps::IfPossible; + } + IOHandler()->enqueue(IOTask(this, aWrite)); } - IOHandler()->enqueue(IOTask(this, aWrite)); } else { @@ -323,6 +453,13 @@ void RecordComponent::flush( { if (constant()) { + if (!constant_component_write_shape()) + { + throw error::WrongAPIUsage( + "Extended constant component from a previous shape to " + "one that cannot be written (empty or with joined " + "dimension)."); + } bool isVBased = retrieveSeries().iterationEncoding() == IterationEncoding::variableBased; Parameter aWrite; @@ -390,25 +527,27 @@ namespace void RecordComponent::readBase(bool require_unit_si) { using DT = Datatype; - // auto & rc = get(); - Parameter aRead; + auto &rc = get(); - if (constant() && !empty()) - { - aRead.name = "value"; - IOHandler()->enqueue(IOTask(this, aRead)); - IOHandler()->flush(internal::defaultFlushParams); + readAttributes(ReadMode::FullyReread); - Attribute a(*aRead.resource); - DT dtype = *aRead.dtype; + auto read_constant = [&]() { + Attribute a = rc.readAttribute("value"); + DT dtype = a.dtype; setWritten(false, Attributable::EnqueueAsynchronously::No); switchNonVectorType(dtype, *this, a); setWritten(true, Attributable::EnqueueAsynchronously::No); - aRead.name = "shape"; - IOHandler()->enqueue(IOTask(this, aRead)); - IOHandler()->flush(internal::defaultFlushParams); - a = Attribute(*aRead.resource); + if (!containsAttribute("shape")) + { + setWritten(false, Attributable::EnqueueAsynchronously::No); + resetDataset(Dataset(dtype, {Dataset::UNDEFINED_EXTENT})); + setWritten(true, Attributable::EnqueueAsynchronously::No); + + return; + } + + a = rc.attributes().at("shape"); Extent e; // uint64_t check @@ -418,7 +557,7 @@ void RecordComponent::readBase(bool require_unit_si) else { std::ostringstream oss; - oss << "Unexpected datatype (" << *aRead.dtype + oss << "Unexpected datatype (" << a.dtype << ") for attribute 'shape' (" << determineDatatype() << " aka uint64_t)"; throw error::ReadError( @@ -431,9 +570,12 @@ void RecordComponent::readBase(bool require_unit_si) setWritten(false, Attributable::EnqueueAsynchronously::No); resetDataset(Dataset(dtype, e)); setWritten(true, Attributable::EnqueueAsynchronously::No); - } + }; - readAttributes(ReadMode::FullyReread); + if (constant() && !empty()) + { + read_constant(); + } if (require_unit_si) { @@ -447,7 +589,8 @@ void RecordComponent::readBase(bool require_unit_si) "'" + myPath().openPMDPath() + "'."); } - if (!getAttribute("unitSI").getOptional().has_value()) + if (auto attr = getAttribute("unitSI"); + !attr.getOptional().has_value()) { throw error::ReadError( error::AffectedObject::Attribute, @@ -455,8 +598,8 @@ void RecordComponent::readBase(bool require_unit_si) {}, "Unexpected Attribute datatype for 'unitSI' (expected double, " "found " + - datatypeToString(Attribute(*aRead.resource).dtype) + - ") in '" + myPath().openPMDPath() + "'."); + datatypeToString(attr.dtype) + ") in '" + + myPath().openPMDPath() + "'."); } } } diff --git a/src/Series.cpp b/src/Series.cpp index 72e8740340..8dac039e5a 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -32,6 +32,7 @@ #include "openPMD/IterationEncoding.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Date.hpp" +#include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Mpi.hpp" @@ -138,6 +139,8 @@ struct Series::ParsedInput std::string filenamePostfix; std::optional filenameExtension; int filenamePadding = -1; + // optional fields + bool verify_homogeneous_extents = true; }; // ParsedInput std::string Series::openPMD() const @@ -838,7 +841,17 @@ void Series::init( // Either an MPI_Comm or none, the template works for both options MPI_Communicator &&...comm) { - auto init_directly = [this, &comm..., at, &filepath]( + auto emplace_parse_config_options_into_iohandler = + [](AbstractIOHandler &ioHandler, ParsedInput &input) { + ioHandler.m_verify_homogeneous_extents = + input.verify_homogeneous_extents; + }; + + auto init_directly = [this, + &comm..., + at, + &filepath, + &emplace_parse_config_options_into_iohandler]( std::unique_ptr parsed_input, json::TracingJSON tracing_json) { auto io_handler = createIOHandler( @@ -850,12 +863,17 @@ void Series::init( comm..., tracing_json, filepath); + emplace_parse_config_options_into_iohandler(*io_handler, *parsed_input); initSeries(std::move(io_handler), std::move(parsed_input)); json::warnGlobalUnusedOptions(tracing_json); }; - auto init_deferred = [this, at, &filepath, &options, &comm...]( - std::string const &parsed_directory) { + auto init_deferred = [this, + at, + &filepath, + &options, + &emplace_parse_config_options_into_iohandler, + &comm...](std::string const &parsed_directory) { // Set a temporary IOHandler so that API calls which require a present // IOHandler don't fail writable().IOHandler = @@ -865,8 +883,12 @@ void Series::init( series.iterations.linkHierarchy(writable()); series.m_rankTable.m_attributable.linkHierarchy(writable()); series.m_deferred_initialization = - [called_this_already = false, filepath, options, at, comm...]( - Series &s) mutable { + [called_this_already = false, + filepath, + options, + at, + emplace_parse_config_options_into_iohandler, + comm...](Series &s) mutable { if (called_this_already) { throw std::runtime_error("Must be called one time only"); @@ -896,6 +918,8 @@ void Series::init( comm..., tracing_json, filepath); + emplace_parse_config_options_into_iohandler( + *io_handler, *parsed_input); auto res = io_handler.get(); s.initSeries(std::move(io_handler), std::move(parsed_input)); json::warnGlobalUnusedOptions(tracing_json); @@ -2940,6 +2964,14 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input) auto &series = get(); getJsonOption( options, "defer_iteration_parsing", series.m_parseLazily); + input.verify_homogeneous_extents = + auxiliary::getEnvNum( + "OPENPMD_VERIFY_HOMOGENEOUS_EXTENTS", + input.verify_homogeneous_extents ? 1 : 0) != 0; + getJsonOption( + options, + "verify_homogeneous_extents", + input.verify_homogeneous_extents); internal::SeriesData::SourceSpecifiedViaJSON rankTableSource; if (getJsonOptionLowerCase(options, "rank_table", rankTableSource.value)) { diff --git a/src/backend/PatchRecord.cpp b/src/backend/PatchRecord.cpp index 5d2b38d50f..7c84147c81 100644 --- a/src/backend/PatchRecord.cpp +++ b/src/backend/PatchRecord.cpp @@ -41,7 +41,7 @@ PatchRecord::setUnitDimension(std::map const &udim) void PatchRecord::flush_impl( std::string const &path, internal::FlushParams const &flushParams) { - if (!this->datasetDefined()) + if (!this->scalar()) { if (IOHandler()->m_frontendAccess != Access::READ_ONLY) Container::flush( diff --git a/test/Files_SerialIO/SerialIOTests.hpp b/test/Files_SerialIO/SerialIOTests.hpp index f5e770681b..c534ded579 100644 --- a/test/Files_SerialIO/SerialIOTests.hpp +++ b/test/Files_SerialIO/SerialIOTests.hpp @@ -12,3 +12,7 @@ namespace issue_1744_unique_ptrs_at_close_time { auto issue_1744_unique_ptrs_at_close_time() -> void; } +namespace components_without_extent +{ +auto components_without_extent() -> void; +} diff --git a/test/Files_SerialIO/components_without_extent.cpp b/test/Files_SerialIO/components_without_extent.cpp new file mode 100644 index 0000000000..0fcc3f9e3a --- /dev/null +++ b/test/Files_SerialIO/components_without_extent.cpp @@ -0,0 +1,268 @@ +#include "SerialIOTests.hpp" + +#include "openPMD/openPMD.hpp" + +#include + +#include +#include + +namespace components_without_extent +{ +constexpr char const *filepath = "../samples/components_without_extent.json"; + +void particle_offset_without_extent() +{ + // write + { + openPMD::Series write(filepath, openPMD::Access::CREATE); + auto it0 = write.writeIterations()[0]; + auto e = it0.particles["e"]; + for (auto comp_id : {"x", "y", "z"}) + { + auto position_comp = e["position"][comp_id]; + position_comp.resetDataset({openPMD::Datatype::FLOAT, {5}}); + std::unique_ptr data{new float[5]}; + std::iota(data.get(), data.get() + 5, 0); + position_comp.storeChunk(std::move(data), {0}, {5}); + + auto offset_comp = e["positionOffset"][comp_id]; + offset_comp.resetDataset( + {openPMD::Datatype::INT, {openPMD::Dataset::UNDEFINED_EXTENT}}); + offset_comp.makeConstant(0); + } + write.close(); + } + + // read + { + openPMD::Series read(filepath, openPMD::Access::READ_RANDOM_ACCESS); + auto e = read.snapshots()[0].particles["e"]; + for (auto const &record : e) + { + for (auto const &component : record.second) + { + REQUIRE(component.second.getExtent() == openPMD::Extent{5}); + } + } + } +} + +void particles_without_any_extent() +{ + // write + { + openPMD::Series write(filepath, openPMD::Access::CREATE); + auto it0 = write.writeIterations()[0]; + auto e = it0.particles["e"]; + for (auto comp_id : {"x", "y", "z"}) + { + auto position_comp = e["position"][comp_id]; + position_comp.resetDataset( + {openPMD::Datatype::INT, {openPMD::Dataset::UNDEFINED_EXTENT}}); + position_comp.makeConstant(0); + + auto offset_comp = e["positionOffset"][comp_id]; + offset_comp.resetDataset( + {openPMD::Datatype::INT, {openPMD::Dataset::UNDEFINED_EXTENT}}); + offset_comp.makeConstant(0); + } + write.close(); + } + + // read + { + openPMD::Series read(filepath, openPMD::Access::READ_RANDOM_ACCESS); + REQUIRE(!read.snapshots()[0].particles.contains("e")); + } + + { + openPMD::Series read( + filepath, + openPMD::Access::READ_RANDOM_ACCESS, + R"({"verify_homogeneous_extents": false})"); + REQUIRE(read.snapshots()[0].particles.contains("e")); + auto e = read.snapshots()[0].particles["e"]; + for (auto const &record : e) + { + for (auto const &component : record.second) + { + REQUIRE( + component.second.getExtent() == + openPMD::Extent{openPMD::Dataset::UNDEFINED_EXTENT}); + } + } + } +} + +void particles_without_inconsistent_extent() +{ + // write + { + openPMD::Series write(filepath, openPMD::Access::CREATE); + auto it0 = write.writeIterations()[0]; + auto e = it0.particles["e"]; + for (auto comp_id : {"x", "y", "z"}) + { + auto position_comp = e["position"][comp_id]; + position_comp.resetDataset({openPMD::Datatype::INT, {5}}); + position_comp.makeConstant(0); + + auto offset_comp = e["positionOffset"][comp_id]; + offset_comp.resetDataset({openPMD::Datatype::INT, {10}}); + offset_comp.makeConstant(0); + } + write.close(); + } + + // read + { + openPMD::Series read(filepath, openPMD::Access::READ_RANDOM_ACCESS); + REQUIRE(!read.snapshots()[0].particles.contains("e")); + } + + { + openPMD::Series read( + filepath, + openPMD::Access::READ_RANDOM_ACCESS, + R"({"verify_homogeneous_extents": false})"); + REQUIRE(read.snapshots()[0].particles.contains("e")); + auto e = read.snapshots()[0].particles["e"]; + for (auto const &component : e["position"]) + { + REQUIRE(component.second.getExtent() == openPMD::Extent{5}); + } + for (auto const &component : e["positionOffset"]) + { + REQUIRE(component.second.getExtent() == openPMD::Extent{10}); + } + } +} + +void meshes_with_incomplete_extent() +{ + // write + { + openPMD::Series write(filepath, openPMD::Access::CREATE); + auto it0 = write.writeIterations()[0]; + auto E = it0.meshes["E"]; + for (auto comp_id : {"x"}) + { + auto comp = E[comp_id]; + comp.resetDataset({openPMD::Datatype::FLOAT, {5}}); + std::unique_ptr data{new float[5]}; + std::iota(data.get(), data.get() + 5, 0); + comp.storeChunk(std::move(data), {0}, {5}); + } + for (auto comp_id : {"y", "z"}) + { + auto comp = E[comp_id]; + comp.resetDataset( + {openPMD::Datatype::INT, {openPMD::Dataset::UNDEFINED_EXTENT}}); + comp.makeConstant(0); + } + write.close(); + } + + // read + { + openPMD::Series read(filepath, openPMD::Access::READ_RANDOM_ACCESS); + auto E = read.snapshots()[0].meshes["E"]; + for (auto const &component : E) + { + REQUIRE(component.second.getExtent() == openPMD::Extent{5}); + } + } +} + +void meshes_with_inconsistent_extent() +{ + // write + { + openPMD::Series write(filepath, openPMD::Access::CREATE); + auto it0 = write.writeIterations()[0]; + auto E = it0.meshes["E"]; + size_t i = 1; + for (auto comp_id : {"x", "y", "z"}) + { + size_t extent = i++ * 5; + auto comp = E[comp_id]; + comp.resetDataset({openPMD::Datatype::FLOAT, {extent}}); + std::unique_ptr data{new float[extent]}; + std::iota(data.get(), data.get() + extent, 0); + comp.storeChunk(std::move(data), {0}, {extent}); + } + write.close(); + } + + // read + { + openPMD::Series read(filepath, openPMD::Access::READ_RANDOM_ACCESS); + REQUIRE(!read.snapshots()[0].meshes.contains("E")); + } + + // read + { + openPMD::Series read( + filepath, + openPMD::Access::READ_RANDOM_ACCESS, + R"({"verify_homogeneous_extents": false})"); + auto E = read.snapshots()[0].meshes["E"]; + size_t i = 1; + for (auto const &component : E) + { + REQUIRE(component.second.getExtent() == openPMD::Extent{5 * i++}); + } + } +} + +void meshes_without_any_extent() +{ + // write + { + openPMD::Series write(filepath, openPMD::Access::CREATE); + auto it0 = write.writeIterations()[0]; + auto E = it0.meshes["E"]; + for (auto comp_id : {"x", "y", "z"}) + { + auto comp = E[comp_id]; + comp.resetDataset( + {openPMD::Datatype::FLOAT, + {openPMD::Dataset::UNDEFINED_EXTENT}}); + comp.makeConstant(0); + } + write.close(); + } + + // read + { + openPMD::Series read(filepath, openPMD::Access::READ_RANDOM_ACCESS); + REQUIRE(!read.snapshots()[0].meshes.contains("E")); + } + + // read + { + openPMD::Series read( + filepath, + openPMD::Access::READ_RANDOM_ACCESS, + R"({"verify_homogeneous_extents": false})"); + auto E = read.snapshots()[0].meshes["E"]; + for (auto const &component : E) + { + REQUIRE( + component.second.getExtent() == + openPMD::Extent{openPMD::Dataset::UNDEFINED_EXTENT}); + } + } +} + +auto components_without_extent() -> void +{ + particle_offset_without_extent(); + particles_without_any_extent(); + particles_without_inconsistent_extent(); + meshes_with_incomplete_extent(); + meshes_with_inconsistent_extent(); + meshes_without_any_extent(); +} +} // namespace components_without_extent diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index e4b3dead4f..ccb6cd803a 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -768,6 +768,11 @@ TEST_CASE("issue_1744_unique_ptrs_at_close_time", "[serial]") #endif } +TEST_CASE("components_without_extent", "[serial]") +{ + components_without_extent::components_without_extent(); +} + #if openPMD_HAVE_ADIOS2 TEST_CASE("close_and_reopen_test", "[serial]") { @@ -809,7 +814,9 @@ inline void empty_dataset_test(std::string const &file_ending) } { Series series( - "../samples/empty_datasets." + file_ending, Access::READ_ONLY); + "../samples/empty_datasets." + file_ending, + Access::READ_ONLY, + R"({"verify_homogeneous_extents": false})"); REQUIRE(series.iterations.contains(1)); REQUIRE(series.iterations.count(1) == 1); @@ -2722,7 +2729,8 @@ TEST_CASE("empty_alternate_fbpic", "[serial][hdf5]") { Series s = Series( "../samples/issue-sample/empty_alternate_fbpic_%T.h5", - Access::READ_ONLY); + Access::READ_ONLY, + R"({"verify_homogeneous_extents": false})"); REQUIRE(s.iterations.contains(50)); REQUIRE(s.iterations[50].particles.contains("electrons")); REQUIRE( @@ -5856,21 +5864,21 @@ void variableBasedSeries(std::string const &file) // this tests changing extents and dimensionalities // across iterations - auto E_y = iteration.meshes["E"]["y"]; + auto B_y = iteration.meshes["B"]["y"]; unsigned dimensionality = i % 3 + 1; unsigned len = i + 1; Extent changingExtent(dimensionality, len); - E_y.resetDataset({openPMD::Datatype::INT, changingExtent}); + B_y.resetDataset({openPMD::Datatype::INT, changingExtent}); std::vector changingData( std::pow(len, dimensionality), dimensionality); - E_y.storeChunk( + B_y.storeChunk( changingData, Offset(dimensionality, 0), changingExtent); // this tests datasets that are present in one iteration, but not // in others - auto E_z = iteration.meshes["E"][std::to_string(i)]; - E_z.resetDataset({Datatype::INT, {1}}); - E_z.makeConstant(i); + auto rho_i = iteration.meshes["rho"][std::to_string(i)]; + rho_i.resetDataset({Datatype::INT, {1}}); + rho_i.makeConstant(i); // this tests attributes that are present in one iteration, but not // in others iteration.meshes["E"].setAttribute("attr_" + std::to_string(i), i); @@ -5986,11 +5994,11 @@ void variableBasedSeries(std::string const &file) REQUIRE(chunk2.get()[i] == int(iteration.iterationIndex)); } - auto E_y = iteration.meshes["E"]["y"]; + auto B_y = iteration.meshes["B"]["y"]; unsigned dimensionality = iteration.iterationIndex % 3 + 1; unsigned len = iteration.iterationIndex + 1; Extent changingExtent(dimensionality, len); - REQUIRE(E_y.getExtent() == changingExtent); + REQUIRE(B_y.getExtent() == changingExtent); last_iteration_index = iteration.iterationIndex; @@ -6001,7 +6009,7 @@ void variableBasedSeries(std::string const &file) { // component is present <=> (otherIteration == i) REQUIRE( - iteration.meshes["E"].contains( + iteration.meshes["rho"].contains( std::to_string(otherIteration)) == (otherIteration == iteration.iterationIndex)); REQUIRE( @@ -6010,7 +6018,8 @@ void variableBasedSeries(std::string const &file) (otherIteration <= iteration.iterationIndex)); } REQUIRE( - iteration.meshes["E"][std::to_string(iteration.iterationIndex)] + iteration + .meshes["rho"][std::to_string(iteration.iterationIndex)] .getAttribute("value") .get() == int(iteration.iterationIndex)); REQUIRE( @@ -6742,7 +6751,11 @@ void extendDataset(std::string const &ext, std::string const &jsonConfig) } { - Series read(filename, Access::READ_ONLY, jsonConfig); + Series read( + filename, + Access::READ_ONLY, + json::merge( + jsonConfig, R"({"verify_homogeneous_extents": false})")); auto E_x = read.iterations[0].meshes["E"]["x"]; REQUIRE(E_x.getExtent() == Extent{10, 5}); auto chunk = E_x.loadChunk({0, 0}, {10, 5}); @@ -7068,8 +7081,11 @@ void unfinished_iteration_test( auto tryReading = [&config, file, encoding]( Access access, std::string const &additionalConfig = "{}") { + auto merged_config = json::merge( + json::merge(config, additionalConfig), + R"({"verify_homogeneous_extents": false})"); { - Series read(file, access, json::merge(config, additionalConfig)); + Series read(file, access, merged_config); std::vector iterations; std::cout << "Going to list iterations in " << file << ":" @@ -7100,7 +7116,7 @@ void unfinished_iteration_test( if (encoding == IterationEncoding::fileBased && access == Access::READ_ONLY) { - Series read(file, access, json::merge(config, additionalConfig)); + Series read(file, access, merged_config); if (additionalConfig == "{}") { // Eager parsing, defective iteration has already been removed diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 588a723925..2441a1fa04 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -1905,9 +1905,9 @@ def makeIteratorRoundTrip(self, backend, file_ending): E_x.reset_dataset(DS(np.dtype("int"), extent)) E_x.store_chunk(data, [0], extent) - E_y = it.meshes["E"]["y"] - E_y.reset_dataset(DS(np.dtype("int"), [2, 2])) - span = E_y.store_chunk().current_buffer() + B_y = it.meshes["B"]["y"] + B_y.reset_dataset(DS(np.dtype("int"), [2, 2])) + span = B_y.store_chunk().current_buffer() span[0, 0] = 0 span[0, 1] = 1 span[1, 0] = 2 @@ -1930,8 +1930,8 @@ def makeIteratorRoundTrip(self, backend, file_ending): lastIterationIndex = it.iteration_index E_x = it.meshes["E"]["x"] chunk = E_x.load_chunk([0], extent) - E_y = it.meshes["E"]["y"] - chunk2 = E_y.load_chunk([0, 0], [2, 2]) + B_y = it.meshes["B"]["y"] + chunk2 = B_y.load_chunk([0, 0], [2, 2]) it.close() for i in range(len(data)):