Skip to content

Make shape an optional attribute for constant components #1661

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ if(openPMD_BUILD_TESTING)
test/Files_SerialIO/close_and_reopen_test.cpp
test/Files_SerialIO/filebased_write_test.cpp
test/Files_SerialIO/issue_1744_unique_ptrs_at_close_time.cpp
test/Files_SerialIO/components_without_extent.cpp
)
elseif(${test_name} STREQUAL "ParallelIO" AND openPMD_HAVE_MPI)
list(APPEND ${out_list}
Expand Down
5 changes: 5 additions & 0 deletions docs/source/details/backendconfig.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ Using the Streaming API (i.e. ``SeriesInterface::readIteration()``) will do this
Parsing eagerly might be very expensive for a Series with many iterations, but will avoid bugs by forgotten calls to ``Iteration::open()``.
In complex environments, calling ``Iteration::open()`` on an already open environment does no harm (and does not incur additional runtime cost for additional ``open()`` calls).

As of openPMD-api 0.17.0, the parser verifies that all records within a mesh or within a particle species have consistent shapes / extents.
This is used for filling in the shape for constant components that do not define it.
In order to skip this check in the error case, the key ``{"verify_homogeneous_extents": false}`` may be set (alternatively ``export OPENPMD_VERIFY_HOMOGENEOUS_EXTENTS=0`` will do the same).
This will help read datasets with inconsistent metadata definitions.

The key ``resizable`` can be passed to ``Dataset`` options.
It if set to ``{"resizable": true}``, this declares that it shall be allowed to increased the ``Extent`` of a ``Dataset`` via ``resetDataset()`` at a later time, i.e., after it has been first declared (and potentially written).
For HDF5, resizable Datasets come with a performance penalty.
Expand Down
21 changes: 18 additions & 3 deletions include/openPMD/Dataset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,21 @@ class Dataset
*/
JOINED_DIMENSION = std::numeric_limits<std::uint64_t>::max(),
/**
* Some backends (i.e. JSON and TOML in template mode) support the
* creation of dataset with undefined datatype and extent.
* The extent should be given as {UNDEFINED_EXTENT} for that.
* In some use cases, the extent needs not be specified.
* For these, specify Extent{UNDEFINED_EXTENT}.
* Use cases:
*
* 1. Some backends (i.e. JSON and TOML in template mode) support the
* creation of dataset with undefined datatype and extent.
* The extent should be given as {UNDEFINED_EXTENT} for that.
* 2. With openPMD 2.0, the shape of constant components may be omitted
* in writing if it is defined somewhere else as part
* of the same Mesh / Species.
* (https://github.com/openPMD/openPMD-standard/pull/289)
* When reading such datasets, the openPMD-api will try to fill in
* the missing extents, so the extent for constistently-defined
* datasets should ideally not be reported by the read-side API
* as undefined.
*/
UNDEFINED_EXTENT = std::numeric_limits<std::uint64_t>::max() - 1
};
Expand Down Expand Up @@ -87,5 +99,8 @@ class Dataset

std::optional<size_t> joinedDimension() const;
static std::optional<size_t> joinedDimension(Extent const &);

bool undefinedExtent() const;
static bool undefinedExtent(Extent const &);
};
} // namespace openPMD
1 change: 1 addition & 0 deletions include/openPMD/IO/AbstractIOHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ class AbstractIOHandler
internal::SeriesStatus m_seriesStatus = internal::SeriesStatus::Default;
IterationEncoding m_encoding = IterationEncoding::groupBased;
OpenpmdStandard m_standard = auxiliary::parseStandard(getStandardDefault());
bool m_verify_homogeneous_extents = true;
}; // AbstractIOHandler

} // namespace openPMD
1 change: 1 addition & 0 deletions include/openPMD/IO/IOTask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "openPMD/ChunkInfo.hpp"
#include "openPMD/Dataset.hpp"
#include "openPMD/Error.hpp"
#include "openPMD/IterationEncoding.hpp"
#include "openPMD/Streaming.hpp"
#include "openPMD/auxiliary/Export.hpp"
Expand Down
3 changes: 2 additions & 1 deletion include/openPMD/Record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ class Record : public BaseRecord<RecordComponent>

void
flush_impl(std::string const &, internal::FlushParams const &) override;
void read();

[[nodiscard]] internal::HomogenizeExtents read();
}; // Record

template <typename T>
Expand Down
22 changes: 22 additions & 0 deletions include/openPMD/RecordComponent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ class RecordComponent : public BaseRecordComponent
* * Shrinking any dimension's extent.
* * Changing the number of dimensions.
*
* The dataset extent may be empty to indicate undefined extents.
*
* Backend support for resizing datasets:
* * JSON: Supported
* * ADIOS2: Supported as of ADIOS2 2.7.0
Expand Down Expand Up @@ -544,6 +546,26 @@ OPENPMD_protected
void verifyChunk(Datatype, Offset const &, Extent const &) const;
}; // RecordComponent

namespace internal
{
// Must put this after the definition of RecordComponent due to the
// deque<RecordComponent>
struct HomogenizeExtents
{
std::deque<RecordComponent> without_extent;
std::optional<Extent> retrieved_extent;
bool verify_homogeneous_extents = true;

explicit HomogenizeExtents();
HomogenizeExtents(bool verify_homogeneous_extents);

void check_extent(Attributable const &callsite, RecordComponent &);
auto merge(Attributable const &callsite, HomogenizeExtents)
-> HomogenizeExtents &;
void homogenize(Attributable const &callsite) &&;
};
} // namespace internal

} // namespace openPMD

#include "RecordComponent.tpp"
31 changes: 31 additions & 0 deletions include/openPMD/backend/Attributable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*/
#pragma once

#include "openPMD/Error.hpp"
#include "openPMD/IO/AbstractIOHandler.hpp"
#include "openPMD/ThrowError.hpp"
#include "openPMD/auxiliary/OutOfRangeMsg.hpp"
Expand All @@ -30,6 +31,7 @@
#include <exception>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <vector>
Expand All @@ -55,6 +57,7 @@ namespace internal
{
class IterationData;
class SeriesData;
struct HomogenizeExtents;

class SharedAttributableData
{
Expand Down Expand Up @@ -106,6 +109,7 @@ namespace internal
friend class openPMD::Attributable;

using SharedData_t = std::shared_ptr<SharedAttributableData>;
using A_MAP = SharedData_t::element_type::A_MAP;

public:
AttributableData();
Expand Down Expand Up @@ -152,6 +156,32 @@ namespace internal
std::shared_ptr<typename T::Data_t>(self, [](auto const *) {}));
return res;
}

inline auto attributes() -> A_MAP &
{
return operator*().m_attributes;
}
[[nodiscard]] inline auto attributes() const -> A_MAP const &
{
return operator*().m_attributes;
}
[[nodiscard]] inline auto readAttribute(std::string const &name) const
-> Attribute const &
{
auto const &attr = attributes();
if (auto it = attr.find(name); it != attr.end())
{
return it->second;
}
else
{
throw error::ReadError(
error::AffectedObject::Attribute,
error::Reason::NotFound,
std::nullopt,
"Not found: '" + name + "'.");
}
}
};

template <typename, typename>
Expand Down Expand Up @@ -209,6 +239,7 @@ class Attributable
friend T &internal::makeOwning(T &self, Series);
friend class StatefulSnapshotsContainer;
friend class internal::AttributableData;
friend struct internal::HomogenizeExtents;

protected:
// tag for internal constructor
Expand Down
9 changes: 9 additions & 0 deletions src/Dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,13 @@ std::optional<size_t> Dataset::joinedDimension(Extent const &extent)
}
return res;
}

bool Dataset::undefinedExtent() const
{
return undefinedExtent(extent);
}
bool Dataset::undefinedExtent(Extent const &e)
{
return e.size() == 1 && e.at(0) == Dataset::UNDEFINED_EXTENT;
}
} // namespace openPMD
12 changes: 12 additions & 0 deletions src/IO/ADIOS/ADIOS2IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,12 @@ void ADIOS2IOHandlerImpl::createDataset(
"only is not possible.");
}

if (Dataset::undefinedExtent(parameters.extent))
{
throw error::OperationUnsupportedInBackend(
"ADIOS2", "No support for Datasets with undefined extent.");
}

if (!writable->written)
{
/* Sanitize name */
Expand Down Expand Up @@ -962,6 +968,12 @@ void ADIOS2IOHandlerImpl::extendDataset(
VERIFY_ALWAYS(
access::write(m_handler->m_backendAccess),
"[ADIOS2] Cannot extend datasets in read-only mode.");
if (Dataset::undefinedExtent(parameters.extent))
{
throw error::OperationUnsupportedInBackend(
"ADIOS2", "No support for Datasets with undefined extent.");
}

setAndGetFilePosition(writable);
auto file = refreshFileFromParent(writable, /* preferParentFile = */ false);
std::string name = nameOfVariable(writable);
Expand Down
10 changes: 10 additions & 0 deletions src/IO/HDF5/HDF5IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ void HDF5IOHandlerImpl::createDataset(
error::throwOperationUnsupportedInBackend(
"HDF5", "Joined Arrays currently only supported in ADIOS2");
}
else if (Dataset::undefinedExtent(parameters.extent))
{
throw error::OperationUnsupportedInBackend(
"HDF5", "No support for Datasets with undefined extent.");
}

if (!writable->written)
{
Expand Down Expand Up @@ -845,6 +850,11 @@ void HDF5IOHandlerImpl::extendDataset(
error::throwOperationUnsupportedInBackend(
"HDF5", "Joined Arrays currently only supported in ADIOS2");
}
else if (Dataset::undefinedExtent(parameters.extent))
{
throw error::OperationUnsupportedInBackend(
"HDF5", "No support for Datasets with undefined extent.");
}

auto res = getFile(writable);
if (!res)
Expand Down
3 changes: 3 additions & 0 deletions src/Iteration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,9 @@ void Iteration::readMeshes(std::string const &meshesPath)
IOHandler()->enqueue(IOTask(&m, aList));
IOHandler()->flush(internal::defaultFlushParams);

// Find constant scalar meshes. shape generally required for meshes,
// shape also required for scalars.
// https://github.com/openPMD/openPMD-standard/pull/289
auto att_begin = aList.attributes->begin();
auto att_end = aList.attributes->end();
auto value = std::find(att_begin, att_end, "value");
Expand Down
10 changes: 10 additions & 0 deletions src/Mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "openPMD/Mesh.hpp"
#include "openPMD/Error.hpp"
#include "openPMD/IO/AbstractIOHandler.hpp"
#include "openPMD/RecordComponent.hpp"
#include "openPMD/Series.hpp"
#include "openPMD/ThrowError.hpp"
#include "openPMD/UnitDimension.hpp"
Expand Down Expand Up @@ -430,6 +431,8 @@ void Mesh::flush_impl(

void Mesh::read()
{
internal::HomogenizeExtents homogenizeExtents(
IOHandler()->m_verify_homogeneous_extents);
internal::EraseStaleEntries<Mesh &> map{*this};

using DT = Datatype;
Expand Down Expand Up @@ -579,6 +582,7 @@ void Mesh::read()
if (scalar())
{
T_RecordComponent::read();
homogenizeExtents.check_extent(*this, *this);
}
else
{
Expand All @@ -603,7 +607,9 @@ void Mesh::read()
<< "' and will skip it due to read error:\n"
<< err.what() << std::endl;
map.forget(component);
continue;
}
homogenizeExtents.check_extent(*this, rc);
}

Parameter<Operation::LIST_DATASETS> dList;
Expand All @@ -630,10 +636,14 @@ void Mesh::read()
<< "' and will skip it due to read error:\n"
<< err.what() << std::endl;
map.forget(component);
continue;
}
homogenizeExtents.check_extent(*this, rc);
}
}

std::move(homogenizeExtents).homogenize(*this);

readBase();

readAttributes(ReadMode::FullyReread);
Expand Down
18 changes: 14 additions & 4 deletions src/ParticleSpecies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "openPMD/ParticleSpecies.hpp"
#include "openPMD/RecordComponent.hpp"
#include "openPMD/Series.hpp"
#include "openPMD/auxiliary/DerefDynamicCast.hpp"
#include "openPMD/backend/Writable.hpp"
Expand All @@ -35,6 +36,8 @@ ParticleSpecies::ParticleSpecies()

void ParticleSpecies::read()
{
internal::HomogenizeExtents homogenizeExtents(
IOHandler()->m_verify_homogeneous_extents);
/* obtain all non-scalar records */
Parameter<Operation::LIST_PATHS> pList;
IOHandler()->enqueue(IOTask(this, pList));
Expand Down Expand Up @@ -76,17 +79,17 @@ void ParticleSpecies::read()
auto att_begin = aList.attributes->begin();
auto att_end = aList.attributes->end();
auto value = std::find(att_begin, att_end, "value");
auto shape = std::find(att_begin, att_end, "shape");
if (value != att_end && shape != att_end)
if (value != att_end)
{
RecordComponent &rc = r;
IOHandler()->enqueue(IOTask(&rc, pOpen));
IOHandler()->flush(internal::defaultFlushParams);
rc.get().m_isConstant = true;
}
internal::HomogenizeExtents recordExtents;
try
{
r.read();
recordExtents = r.read();
}
catch (error::ReadError const &err)
{
Expand All @@ -95,7 +98,9 @@ void ParticleSpecies::read()
<< err.what() << std::endl;

map.forget(record_name);
continue;
}
homogenizeExtents.merge(*this, std::move(recordExtents));
}
}

Expand All @@ -115,6 +120,7 @@ void ParticleSpecies::read()
Parameter<Operation::OPEN_DATASET> dOpen;
for (auto const &record_name : *dList.datasets)
{
internal::HomogenizeExtents recordExtents;
try
{
Record &r = map[record_name];
Expand All @@ -127,7 +133,7 @@ void ParticleSpecies::read()
rc.setWritten(false, Attributable::EnqueueAsynchronously::No);
rc.resetDataset(Dataset(*dOpen.dtype, *dOpen.extent));
rc.setWritten(true, Attributable::EnqueueAsynchronously::No);
r.read();
recordExtents = r.read();
}
catch (error::ReadError const &err)
{
Expand All @@ -138,9 +144,13 @@ void ParticleSpecies::read()
map.forget(record_name);
//(*this)[record_name].erase(RecordComponent::SCALAR);
// this->erase(record_name);
continue;
}
homogenizeExtents.merge(*this, std::move(recordExtents));
}

std::move(homogenizeExtents).homogenize(*this);

readAttributes(ReadMode::FullyReread);
}

Expand Down
Loading
Loading