From c35ec6ad21176a25968e880b0fc3cb0814eef533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 16:29:30 +0200 Subject: [PATCH 01/16] wip first commit --- CMakeLists.txt | 3 +- .../openPMD/toolkit/ExternalBlockStorage.hpp | 49 ++++++ .../toolkit/ExternalBlockStorage_internal.hpp | 19 +++ src/toolkit/ExternalBlockStorage.cpp | 139 ++++++++++++++++++ 4 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 include/openPMD/toolkit/ExternalBlockStorage.hpp create mode 100644 include/openPMD/toolkit/ExternalBlockStorage_internal.hpp create mode 100644 src/toolkit/ExternalBlockStorage.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 11d09e498e..f484117b2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -431,7 +431,8 @@ set(CORE_SOURCE src/snapshots/IteratorTraits.cpp src/snapshots/RandomAccessIterator.cpp src/snapshots/Snapshots.cpp - src/snapshots/StatefulIterator.cpp) + src/snapshots/StatefulIterator.cpp + src/toolkit/ExternalBlockStorage.cpp) set(IO_SOURCE src/IO/AbstractIOHandler.cpp src/IO/AbstractIOHandlerImpl.cpp diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp new file mode 100644 index 0000000000..d540e8d7cc --- /dev/null +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct ExternalBlockStorageBackend +{ + virtual void + put(std::string const &identifier, void const *data, size_t len) = 0; + virtual ~ExternalBlockStorageBackend(); +}; + +struct StdioBuilder +{ + std::string m_directory; + std::optional m_openMode = std::nullopt; + + auto setDirectory(std::string directory) -> StdioBuilder &; + auto setOpenMode(std::string openMode) -> StdioBuilder &; + + operator ExternalBlockStorage(); +}; +} // namespace openPMD::internal + +namespace openPMD +{ +class ExternalBlockStorage +{ +private: + std::unique_ptr m_worker; + ExternalBlockStorage( + std::unique_ptr); + + friend struct internal::StdioBuilder; + +public: + static auto makeStdioSession(std::string directory) + -> internal::StdioBuilder; +}; +} // namespace openPMD diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp new file mode 100644 index 0000000000..2adaaa05f6 --- /dev/null +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +namespace openPMD::internal +{ +struct ExternalBlockStorageStdio : ExternalBlockStorageBackend +{ +private: + std::string m_directory; + std::string m_openMode; + +public: + ExternalBlockStorageStdio(std::string directory, std::string openMode); + void + put(std::string const &identifier, void const *data, size_t len) override; + ~ExternalBlockStorageStdio() override; +}; +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp new file mode 100644 index 0000000000..5b5a7dc2ac --- /dev/null +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -0,0 +1,139 @@ + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +#include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + +#include +#include +#include + +namespace +{ +auto concat_filepath(std::string const &s1, std::string const &s2) + -> std::string +{ + if (s1.empty()) + { + return s2; + } + if (s2.empty()) + { + return s1; + } + bool ends_with_slash = + *s1.crbegin() == openPMD::auxiliary::directory_separator; + bool starts_with_slash = + *s2.cbegin() == openPMD::auxiliary::directory_separator; + + if (ends_with_slash ^ starts_with_slash) + { + return s1 + s2; + } + else if (ends_with_slash && starts_with_slash) + { + return s1 + (s2.c_str() + 1); + } + else + { + return s1 + openPMD::auxiliary::directory_separator + s2; + } +} +} // namespace + +namespace openPMD::internal +{ +ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; + +ExternalBlockStorageStdio::ExternalBlockStorageStdio( + std::string directory, std::string openMode) + : m_directory(std::move(directory)), m_openMode(std::move(openMode)) +{ + if (m_directory.empty()) + { + throw std::invalid_argument( + "ExternalBlockStorageStdio: directory cannot be empty"); + } + + // Ensure the directory exists and is writable + if (!auxiliary::create_directories(m_directory)) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to create or access " + "directory: " + + m_directory); + } +} + +ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; + +void ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) +{ + if (len == 0) + { + return; + } + + // Generate a unique filename using a simple counter approach (can be + // extended) + static size_t counter = 0; + std::string filename = m_directory + "/block_" + std::to_string(counter++); + std::string filepath = concat_filepath(m_directory, identifier); + + FILE *file = std::fopen(filepath.c_str(), "wb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for writing: " + + filepath); + } + + size_t written = std::fwrite(data, 1, len, file); + if (written != len) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to write full data to file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after writing: " + + filepath); + } +} +} // namespace openPMD::internal +namespace openPMD +{ +auto ExternalBlockStorage::makeStdioSession(std::string directory) + -> internal::StdioBuilder +{ + return internal::StdioBuilder{std::move(directory)}; +} + +auto internal::StdioBuilder::setDirectory(std::string directory) + -> StdioBuilder & +{ + m_directory = std::move(directory); + return *this; +} +auto internal::StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +{ + m_openMode = std::move(openMode); + return *this; +} + +internal::StdioBuilder::operator ExternalBlockStorage() +{ + return ExternalBlockStorage{ + std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +ExternalBlockStorage::ExternalBlockStorage( + std::unique_ptr worker) + : m_worker(std::move(worker)) +{} +} // namespace openPMD From ddd55d4e4999525f72d2642415bc7dff8a039907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 18:48:45 +0200 Subject: [PATCH 02/16] Some cleaning --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 6 ++- .../toolkit/ExternalBlockStorage_internal.hpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 49 ++++++++++--------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index d540e8d7cc..9b8cd1da44 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -14,8 +14,9 @@ namespace openPMD::internal { struct ExternalBlockStorageBackend { - virtual void - put(std::string const &identifier, void const *data, size_t len) = 0; + virtual auto + put(std::string const &identifier, void const *data, size_t len) + -> std::string = 0; virtual ~ExternalBlockStorageBackend(); }; @@ -28,6 +29,7 @@ struct StdioBuilder auto setOpenMode(std::string openMode) -> StdioBuilder &; operator ExternalBlockStorage(); + auto build() -> ExternalBlockStorage; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp index 2adaaa05f6..10a3e724be 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -12,8 +12,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend public: ExternalBlockStorageStdio(std::string directory, std::string openMode); - void - put(std::string const &identifier, void const *data, size_t len) override; + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; ~ExternalBlockStorageStdio() override; }; } // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 5b5a7dc2ac..6344330ee3 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -67,20 +67,15 @@ ExternalBlockStorageStdio::ExternalBlockStorageStdio( ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; -void ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) +auto ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) -> std::string { + std::string filepath = concat_filepath(m_directory, identifier); if (len == 0) { - return; + return filepath; } - // Generate a unique filename using a simple counter approach (can be - // extended) - static size_t counter = 0; - std::string filename = m_directory + "/block_" + std::to_string(counter++); - std::string filepath = concat_filepath(m_directory, identifier); - FILE *file = std::fopen(filepath.c_str(), "wb"); if (!file) { @@ -103,33 +98,39 @@ void ExternalBlockStorageStdio::put( "ExternalBlockStorageStdio: failed to close file after writing: " + filepath); } -} -} // namespace openPMD::internal -namespace openPMD -{ -auto ExternalBlockStorage::makeStdioSession(std::string directory) - -> internal::StdioBuilder -{ - return internal::StdioBuilder{std::move(directory)}; + + return filepath; } -auto internal::StdioBuilder::setDirectory(std::string directory) - -> StdioBuilder & +auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & { m_directory = std::move(directory); return *this; } -auto internal::StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & { m_openMode = std::move(openMode); return *this; } -internal::StdioBuilder::operator ExternalBlockStorage() +StdioBuilder::operator ExternalBlockStorage() { - return ExternalBlockStorage{ - std::make_unique( - std::move(m_directory), std::move(m_openMode).value_or("wb"))}; + return ExternalBlockStorage{std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +auto StdioBuilder::build() -> ExternalBlockStorage +{ + return *this; +} +} // namespace openPMD::internal + +namespace openPMD +{ +auto ExternalBlockStorage::makeStdioSession(std::string directory) + -> internal::StdioBuilder +{ + return internal::StdioBuilder{std::move(directory)}; } ExternalBlockStorage::ExternalBlockStorage( From fffa52c3e8e23c2682416878c7487c7347cc941f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 20:15:48 +0200 Subject: [PATCH 03/16] Hmm maybe usable --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 18 +++ src/toolkit/ExternalBlockStorage.cpp | 143 +++++++++++++++++- 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 9b8cd1da44..318cfbdc87 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,5 +1,9 @@ #pragma once +#include "openPMD/Dataset.hpp" + +#include + #include #include #include @@ -45,7 +49,21 @@ class ExternalBlockStorage friend struct internal::StdioBuilder; public: + explicit ExternalBlockStorage(); + static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; + + // returns created JSON key + template + auto store( + Extent globalExtent, + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T const *data) -> std::string; + + static void sanitizeString(std::string &s); }; } // namespace openPMD diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 6344330ee3..27404259ff 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,12 +1,17 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/DatatypeMacros.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" #include #include +#include +#include #include +#include +#include namespace { @@ -70,7 +75,10 @@ ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( std::string const &identifier, void const *data, size_t len) -> std::string { - std::string filepath = concat_filepath(m_directory, identifier); + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + std::string filepath = concat_filepath(m_directory, sanitized); + if (len == 0) { return filepath; @@ -127,14 +135,139 @@ auto StdioBuilder::build() -> ExternalBlockStorage namespace openPMD { +ExternalBlockStorage::ExternalBlockStorage() = default; +ExternalBlockStorage::ExternalBlockStorage( + std::unique_ptr worker) + : m_worker(std::move(worker)) +{} + auto ExternalBlockStorage::makeStdioSession(std::string directory) -> internal::StdioBuilder { return internal::StdioBuilder{std::move(directory)}; } -ExternalBlockStorage::ExternalBlockStorage( - std::unique_ptr worker) - : m_worker(std::move(worker)) -{} +template +auto ExternalBlockStorage::store( + Extent globalExtent, + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T const *data) -> std::string +{ + // JSON Identifier: running counter. + // Do not use an array to avoid reindexing upon deletion. + + // Filesystem Identifier: JSON path + running counter. + + // For each externally handled data block, store: + // 1. Filesystem identifier + // 2. Offset, Extent + auto &dataset = fullJsonDataset[path]; + + // running_index denotes the last *used* block index in the dataset + using running_index_t = uint64_t; + running_index_t running_index = [&]() -> running_index_t { + if (auto it = dataset.find("_running_index"); it != dataset.end()) + { + auto res = it->get(); + ++res; + *it = res; + return res; + } + else + { + dataset["_running_index"] = 0; + return 0; + } + }(); + + constexpr size_t padding = 6; + std::string index_as_str = [running_index]() { + auto res = std::to_string(running_index); + auto size = res.size(); + if (size >= padding) + { + return res; + } + std::stringstream padded; + for (size_t i = 0; i < padding - size; ++i) + { + padded << '0'; + } + padded << res; + return padded.str(); + }(); + + if (dataset.contains(index_as_str)) + { + throw std::runtime_error( + "Inconsistent state: Index " + index_as_str + " already in use."); + } + + auto check_metadata = [&dataset](char const *key, auto const &value) { + using value_t = + std::remove_reference_t>; + if (auto it = dataset.find(key); it != dataset.end()) + { + auto const &stored_value = it->get(); + if (stored_value != value) + { + throw std::runtime_error( + "Inconsistent chunk storage in key " + std::string(key) + + "."); + } + } + else + { + dataset[key] = value; + } + }; + std::string type = typeid(T).name(); // TODO use sth more portable + check_metadata("_type", type); + check_metadata("_byte_width", sizeof(T)); + check_metadata("_extent", globalExtent); + + auto &block = dataset[index_as_str]; + block["offset"] = blockOffset; + block["extent"] = blockExtent; + std::stringstream filesystem_identifier; + filesystem_identifier << path.to_string() << "--" << index_as_str << ".dat"; + auto escaped_filesystem_identifier = m_worker->put( + filesystem_identifier.str(), + data, + std::accumulate( + blockExtent.begin(), + blockExtent.end(), + sizeof(T), + [](size_t left, size_t right) { return left * right; })); + block["external_ref"] = escaped_filesystem_identifier; + return index_as_str; +} + +void ExternalBlockStorage::sanitizeString(std::string &s) +{ + // Replace invalid characters with underscore + for (char &c : s) + { + if (c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || + c == '"' || c == '<' || c == '>' || c == '|' || c == '\n' || + c == '\r' || c == '\t' || c == '\0' || c == ' ') + { + c = '_'; + } + } +} + +#define OPENPMD_INSTANTIATE(type) \ + template auto ExternalBlockStorage::store( \ + Extent globalExtent, \ + Offset blockOffset, \ + Extent blockExtent, \ + nlohmann::json & fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type const *data) -> std::string; +OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) +#undef OPENPMD_INSTANTIATE } // namespace openPMD From 946ba158f8fc3ff09f5845718b28f64870e2323b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 20:39:19 +0200 Subject: [PATCH 04/16] Quickly use this in the JSON backend bin/openpmd-pipe --infile samples/git-sample/data%T.bp --outfile data.json --outconfig "$(echo -e 'json.dataset.mode = "template"\njson.attribute.mode = "short"')" --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 3 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 49 +++++++++++++++---- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 7c02f38ddd..82d417038c 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -29,6 +29,7 @@ #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/backend/Variant_internal.hpp" #include "openPMD/config.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" #include #include @@ -177,6 +178,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif + ExternalBlockStorage externalBlockStorage; + void init(openPMD::json::TracingJSON config); ~JSONIOHandlerImpl() override; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 88e221bea5..7e778718fa 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -33,6 +33,7 @@ #include "openPMD/auxiliary/TypeTraits.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" #include #include @@ -440,6 +441,9 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) (void)_; warnUnusedJson(backendConfig.value()); } + + externalBlockStorage = + ExternalBlockStorage::makeStdioSession("./external_blocks/"); } JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; @@ -1139,6 +1143,22 @@ void JSONIOHandlerImpl::deleteAttribute( j.erase(parameters.name); } +namespace +{ + struct StoreExternally + { + template + static void call( + ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) + { + blockStorage.store( + std::forward(args)..., static_cast(ptr)); + } + + static constexpr char const *errorMsg = "StoreExternally"; + }; +} // namespace + void JSONIOHandlerImpl::writeDataset( Writable *writable, Parameter ¶meters) { @@ -1148,21 +1168,32 @@ void JSONIOHandlerImpl::writeDataset( auto pos = setAndGetFilePosition(writable); auto file = refreshFileFromParent(writable); - auto &j = obtainJsonContents(writable); + auto filePosition = setAndGetFilePosition(writable, false); + auto &jsonRoot = *obtainJsonContents(file); + auto &j = jsonRoot[filePosition->id]; switch (verifyDataset(parameters, j)) { case DatasetMode::Dataset: break; case DatasetMode::Template: - if (!m_datasetMode.m_skipWarnings) - { - std::cerr - << "[JSON/TOML backend: Warning] Trying to write data to a " - "template dataset. Will skip." - << '\n'; - m_datasetMode.m_skipWarnings = true; - } + switchDatasetType( + parameters.dtype, + externalBlockStorage, + parameters.data.get(), + j.at("extent").get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id); + // if (!m_datasetMode.m_skipWarnings) + // { + // std::cerr + // << "[JSON/TOML backend: Warning] Trying to write data to a " + // "template dataset. Will skip." + // << '\n'; + // m_datasetMode.m_skipWarnings = true; + // } return; } From 07cae4e2046da183d6c74b9309e883fb8a2d1d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 08:42:39 +0200 Subject: [PATCH 05/16] Better and generalized handling for datatypes --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 40 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 13 +++++- src/IO/JSON/JSONIOHandlerImpl.cpp | 13 +++--- src/toolkit/ExternalBlockStorage.cpp | 15 ++++--- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 82d417038c..f8f3cc49c0 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -154,6 +154,46 @@ void from_json(const nlohmann::json &j, std::complex &p) } } // namespace std +namespace openPMD::internal +{ +auto jsonDatatypeToString(Datatype dt) -> std::string; + +struct JsonDatatypeHandling +{ + template + static auto encodeDatatype(nlohmann::json &j) -> bool + { + auto const &needed_datatype = + jsonDatatypeToString(determineDatatype()); + if (auto it = j.find("datatype"); it != j.end()) + { + return it.value().get() == needed_datatype; + } + else + { + j["datatype"] = needed_datatype; + return true; + } + } + + template + static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool + { + if (auto it = j.find("datatype"); it != j.end()) + { + switchDatasetType( + stringToDatatype(it.value().get()), + std::forward(args)...); + return true; + } + else + { + return false; + } + } +}; +} // namespace openPMD::internal + namespace openPMD { class JSONIOHandlerImpl : public AbstractIOHandlerImpl diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 318cfbdc87..5bbb87091e 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -39,6 +39,17 @@ struct StdioBuilder namespace openPMD { +// used nowhere, just shows the signatures +// TODO: replace this with a concept upon switching to C++20 +struct DatatypeHandling_Interface +{ + template + static auto encodeDatatype(nlohmann::json &) -> bool; + + template + static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool; +}; + class ExternalBlockStorage { private: @@ -55,7 +66,7 @@ class ExternalBlockStorage -> internal::StdioBuilder; // returns created JSON key - template + template auto store( Extent globalExtent, Offset blockOffset, diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 7e778718fa..10e09add1b 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -163,7 +163,10 @@ namespace } } } +} // namespace +namespace internal +{ // Does the same as datatypeToString(), but this makes sure that we don't // accidentally change the JSON schema by modifying datatypeToString() std::string jsonDatatypeToString(Datatype dt) @@ -252,7 +255,7 @@ namespace } return "Unreachable!"; } -} // namespace +} // namespace internal auto JSONIOHandlerImpl::retrieveDatasetMode( openPMD::json::TracingJSON &config) const -> DatasetMode_s @@ -637,7 +640,7 @@ void JSONIOHandlerImpl::createDataset( } setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; - dset["datatype"] = jsonDatatypeToString(parameter.dtype); + dset["datatype"] = internal::jsonDatatypeToString(parameter.dtype); switch (localMode) { @@ -1151,7 +1154,7 @@ namespace static void call( ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) { - blockStorage.store( + blockStorage.store( std::forward(args)..., static_cast(ptr)); } @@ -1235,7 +1238,7 @@ void JSONIOHandlerImpl::writeAttribute( { case AttributeMode::Long: (*jsonVal)[filePosition->id]["attributes"][name] = { - {"datatype", jsonDatatypeToString(parameter.dtype)}, + {"datatype", internal::jsonDatatypeToString(parameter.dtype)}, {"value", value}}; break; case AttributeMode::Short: @@ -2397,7 +2400,7 @@ nlohmann::json JSONIOHandlerImpl::platformSpecifics() Datatype::BOOL}; for (auto &datatype : datatypes) { - res[jsonDatatypeToString(datatype)] = toBytes(datatype); + res[internal::jsonDatatypeToString(datatype)] = toBytes(datatype); } return res; } diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 27404259ff..a33039df86 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -2,6 +2,7 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" +#include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" @@ -147,7 +148,7 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } -template +template auto ExternalBlockStorage::store( Extent globalExtent, Offset blockOffset, @@ -224,8 +225,10 @@ auto ExternalBlockStorage::store( dataset[key] = value; } }; - std::string type = typeid(T).name(); // TODO use sth more portable - check_metadata("_type", type); + if (!DatatypeHandling::template encodeDatatype(dataset)) + { + throw std::runtime_error("Inconsistent chunk storage in datatype."); + } check_metadata("_byte_width", sizeof(T)); check_metadata("_extent", globalExtent); @@ -260,14 +263,16 @@ void ExternalBlockStorage::sanitizeString(std::string &s) } } -#define OPENPMD_INSTANTIATE(type) \ - template auto ExternalBlockStorage::store( \ +#define OPENPMD_INSTANTIATE_DATATYPEHANDLING(datatypehandling, type) \ + template auto ExternalBlockStorage::store( \ Extent globalExtent, \ Offset blockOffset, \ Extent blockExtent, \ nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type const *data) -> std::string; +#define OPENPMD_INSTANTIATE(type) \ + OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) #undef OPENPMD_INSTANTIATE } // namespace openPMD From a7a91f42348ef4c1d71ffd6552cc1874de166245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 09:38:54 +0200 Subject: [PATCH 06/16] structure for aws --- CMakeLists.txt | 4 + .../openPMD/toolkit/ExternalBlockStorage.hpp | 45 ++++++ .../toolkit/ExternalBlockStorage_internal.hpp | 15 ++ src/toolkit/ExternalBlockStorage.cpp | 128 +++++++++++++++++- 4 files changed, 190 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f484117b2d..5e8ac693c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -382,6 +382,8 @@ else() endif() unset(openPMD_REQUIRED_ADIOS2_COMPONENTS) +find_package(AWSSDK REQUIRED COMPONENTS s3) + # external library: pybind11 (optional) include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake) @@ -552,6 +554,8 @@ if(openPMD_HAVE_ADIOS2) endif() endif() +target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES}) + # Runtime parameter and API status checks ("asserts") if(openPMD_USE_VERIFY) target_compile_definitions(openPMD PRIVATE openPMD_USE_VERIFY=1) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 5bbb87091e..d69db5a16b 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -2,9 +2,11 @@ #include "openPMD/Dataset.hpp" +#include #include #include +#include #include #include #include @@ -35,6 +37,41 @@ struct StdioBuilder operator ExternalBlockStorage(); auto build() -> ExternalBlockStorage; }; + +struct AwsBuilder +{ + struct init_credentials_tag_t + {}; + static constexpr init_credentials_tag_t init_credentials_tag = {}; + + AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey); + + enum class Scheme : uint8_t + { + HTTP, + HTTPS + }; + std::string m_bucketName; + std::string m_accessKeyId; + std::string m_secretKey; + std::optional m_sessionToken; + std::initializer_list m_credentials; + std::optional m_endpointOverride; + std::optional m_region; + std::optional m_scheme; + + auto setBucketName(std::string bucketName) -> AwsBuilder &; + auto setCredentials(std::string accessKeyId, std::string secretKey) + -> AwsBuilder &; + auto setSessionToken(std::string sessionToken) -> AwsBuilder &; + auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; + auto setRegion(std::string regionName) -> AwsBuilder &; + auto setScheme(Scheme s) -> AwsBuilder &; + + operator ExternalBlockStorage(); + auto build() -> ExternalBlockStorage; +}; } // namespace openPMD::internal namespace openPMD @@ -58,12 +95,17 @@ class ExternalBlockStorage std::unique_ptr); friend struct internal::StdioBuilder; + friend struct internal::AwsBuilder; public: explicit ExternalBlockStorage(); static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; + template + static auto makeAwsSession( + std::string bucketName, std::string accessKeyId, std::string secretKey) + -> internal::AwsBuilder; // returns created JSON key template @@ -77,4 +119,7 @@ class ExternalBlockStorage static void sanitizeString(std::string &s); }; + +// Implementations + } // namespace openPMD diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp index 10a3e724be..2ad77e7965 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -2,6 +2,8 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include + namespace openPMD::internal { struct ExternalBlockStorageStdio : ExternalBlockStorageBackend @@ -16,4 +18,17 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend -> std::string override; ~ExternalBlockStorageStdio() override; }; + +struct ExternalBlockStorageAws : ExternalBlockStorageBackend +{ +private: + Aws::S3::S3Client m_client; + std::string m_bucketName; + +public: + ExternalBlockStorageAws(Aws::S3::S3Client, std::string bucketName); + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; + ~ExternalBlockStorageAws() override; +}; } // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index a33039df86..7672b1016e 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,10 +1,15 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" -#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + +#include +#include +#include #include #include @@ -12,7 +17,8 @@ #include #include #include -#include +#include +#include namespace { @@ -122,6 +128,58 @@ auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & return *this; } +ExternalBlockStorageAws::ExternalBlockStorageAws( + Aws::S3::S3Client client, std::string bucketName) + : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) +{} +ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; + +AwsBuilder::AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey) + : m_bucketName(std::move(bucketName)) + , m_accessKeyId(std::move(accessKeyId)) + , m_secretKey(std::move(secretKey)) +{} + +auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & +{ + m_bucketName = std::move(bucketName); + return *this; +} + +auto internal::AwsBuilder::setCredentials( + std::string accessKeyId, std::string secretKey) -> AwsBuilder & +{ + m_accessKeyId = std::move(accessKeyId); + m_secretKey = std::move(secretKey); + return *this; +} + +auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & +{ + m_endpointOverride = std::move(endpoint); + return *this; +} + +auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & +{ + m_region = std::move(regionName); + return *this; +} + +auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & +{ + m_scheme = s; + return *this; +} + +auto internal::AwsBuilder::setSessionToken(std::string sessionToken) + -> AwsBuilder & +{ + m_sessionToken = std::move(sessionToken); + return *this; +} + StdioBuilder::operator ExternalBlockStorage() { return ExternalBlockStorage{std::make_unique( @@ -132,6 +190,63 @@ auto StdioBuilder::build() -> ExternalBlockStorage { return *this; } + +AwsBuilder::operator ExternalBlockStorage() +{ + Aws::Client::ClientConfiguration config; + + if (m_endpointOverride.has_value()) + { + config.endpointOverride = *m_endpointOverride; + } + if (m_region.has_value()) + { + config.region = *m_region; + } + else + { + config.region = "us-east-1"; + } + if (m_scheme.has_value()) + { + switch (*m_scheme) + { + case Scheme::HTTP: + config.scheme = Aws::Http::Scheme::HTTP; + break; + case Scheme::HTTPS: + config.scheme = Aws::Http::Scheme::HTTPS; + break; + break; + } + } + + // default timeout + config.connectTimeoutMs = 5000; + config.requestTimeoutMs = 15000; + + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { + if (m_sessionToken.has_value()) + { + return {m_accessKeyId, m_secretKey, *m_sessionToken}; + } + else + { + return {m_accessKeyId, m_secretKey}; + } + }(); + + // Create the S3 client + Aws::S3::S3Client s3_client( + aws_credentials, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + + // Create the AWS storage backend + return ExternalBlockStorage{std::make_unique( + std::move(s3_client), std::move(m_bucketName))}; +} } // namespace openPMD::internal namespace openPMD @@ -148,6 +263,15 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } +template +auto ExternalBlockStorage::makeAwsSession( + std::string bucketName, std::string accessKeyId, std::string secretKey) + -> internal::AwsBuilder +{ + return internal::AwsBuilder( + std::move(bucketName), std::move(accessKeyId), std::move(secretKey)); +} + template auto ExternalBlockStorage::store( Extent globalExtent, From 58dd6d16e2129180cb8f9d306f94e24f34b5b3c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 10:59:58 +0200 Subject: [PATCH 07/16] first untested implementation for S3 --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 6 -- src/toolkit/ExternalBlockStorage.cpp | 78 +++++++++++++++++-- 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index d69db5a16b..53cf0d36ea 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -2,7 +2,6 @@ #include "openPMD/Dataset.hpp" -#include #include #include @@ -40,10 +39,6 @@ struct StdioBuilder struct AwsBuilder { - struct init_credentials_tag_t - {}; - static constexpr init_credentials_tag_t init_credentials_tag = {}; - AwsBuilder( std::string bucketName, std::string accessKeyId, std::string secretKey); @@ -102,7 +97,6 @@ class ExternalBlockStorage static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; - template static auto makeAwsSession( std::string bucketName, std::string accessKeyId, std::string secretKey) -> internal::AwsBuilder; diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 7672b1016e..3e59ea0260 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -10,15 +10,19 @@ #include #include #include +#include +#include +#include #include +#include #include #include #include #include +#include #include #include -#include namespace { @@ -82,7 +86,7 @@ ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( std::string const &identifier, void const *data, size_t len) -> std::string { - auto sanitized = identifier; + auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); std::string filepath = concat_filepath(m_directory, sanitized); @@ -131,9 +135,74 @@ auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & ExternalBlockStorageAws::ExternalBlockStorageAws( Aws::S3::S3Client client, std::string bucketName) : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) -{} +{ + Aws::S3::Model::CreateBucketRequest create_request; + create_request.SetBucket(m_bucketName); + auto create_outcome = m_client.CreateBucket(create_request); + if (!create_outcome.IsSuccess()) + { + std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " + "Warning: Failed to create bucket (may already exist): " + << create_outcome.GetError().GetMessage() << std::endl; + } + else + { + std::cout << "Bucket created: " << m_bucketName << std::endl; + } +} ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; +namespace +{ + struct membuf : std::streambuf + { + membuf(char const *base, std::size_t size) + { + // hm hm + auto p = const_cast(base); + this->setg(p, p, p + size); // setup get area + } + }; + + struct imemstream : std::iostream + { + imemstream(char const *base, std::size_t size) + : std::iostream(&m_buf), m_buf(base, size) + {} + + private: + membuf m_buf; + }; +} // namespace + +auto ExternalBlockStorageAws::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + + Aws::S3::Model::PutObjectRequest put_request; + put_request.SetBucket(m_bucketName); + put_request.SetKey(sanitized); + + auto input_data = Aws::MakeShared( + "PutObjectInputStream", reinterpret_cast(data), len); + std::static_pointer_cast(input_data); + + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File uploaded successfully to S3!" << std::endl; + } + else + { + std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() + << std::endl; + } + return sanitized; +} + AwsBuilder::AwsBuilder( std::string bucketName, std::string accessKeyId, std::string secretKey) : m_bucketName(std::move(bucketName)) @@ -263,7 +332,6 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } -template auto ExternalBlockStorage::makeAwsSession( std::string bucketName, std::string accessKeyId, std::string secretKey) -> internal::AwsBuilder @@ -360,7 +428,7 @@ auto ExternalBlockStorage::store( block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; - filesystem_identifier << path.to_string() << "--" << index_as_str << ".dat"; + filesystem_identifier << path.to_string() << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, From 157586ba94fb3e670bce84412d85fa7edc83541b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 14:45:54 +0200 Subject: [PATCH 08/16] Reordering --- ...ernalBlockStorage_internal.hpp => Aws.hpp} | 13 ------ include/openPMD/toolkit/AwsBuilder.hpp | 45 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 45 +------------------ include/openPMD/toolkit/Stdio.hpp | 19 ++++++++ include/openPMD/toolkit/StdioBuilder.hpp | 24 ++++++++++ src/toolkit/ExternalBlockStorage.cpp | 4 +- 6 files changed, 92 insertions(+), 58 deletions(-) rename include/openPMD/toolkit/{ExternalBlockStorage_internal.hpp => Aws.hpp} (58%) create mode 100644 include/openPMD/toolkit/AwsBuilder.hpp create mode 100644 include/openPMD/toolkit/Stdio.hpp create mode 100644 include/openPMD/toolkit/StdioBuilder.hpp diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/Aws.hpp similarity index 58% rename from include/openPMD/toolkit/ExternalBlockStorage_internal.hpp rename to include/openPMD/toolkit/Aws.hpp index 2ad77e7965..737629ec2b 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -6,19 +6,6 @@ namespace openPMD::internal { -struct ExternalBlockStorageStdio : ExternalBlockStorageBackend -{ -private: - std::string m_directory; - std::string m_openMode; - -public: - ExternalBlockStorageStdio(std::string directory, std::string openMode); - auto put(std::string const &identifier, void const *data, size_t len) - -> std::string override; - ~ExternalBlockStorageStdio() override; -}; - struct ExternalBlockStorageAws : ExternalBlockStorageBackend { private: diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp new file mode 100644 index 0000000000..3bb8cef491 --- /dev/null +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct AwsBuilder +{ + AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey); + + enum class Scheme : uint8_t + { + HTTP, + HTTPS + }; + std::string m_bucketName; + std::string m_accessKeyId; + std::string m_secretKey; + std::optional m_sessionToken; + std::initializer_list m_credentials; + std::optional m_endpointOverride; + std::optional m_region; + std::optional m_scheme; + + auto setBucketName(std::string bucketName) -> AwsBuilder &; + auto setCredentials(std::string accessKeyId, std::string secretKey) + -> AwsBuilder &; + auto setSessionToken(std::string sessionToken) -> AwsBuilder &; + auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; + auto setRegion(std::string regionName) -> AwsBuilder &; + auto setScheme(Scheme s) -> AwsBuilder &; + + operator ::openPMD::ExternalBlockStorage(); + auto build() -> ::openPMD::ExternalBlockStorage; +}; +} // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 53cf0d36ea..f795b34eb3 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,6 +1,8 @@ #pragma once #include "openPMD/Dataset.hpp" +#include "openPMD/toolkit/AwsBuilder.hpp" +#include "openPMD/toolkit/StdioBuilder.hpp" #include @@ -24,49 +26,6 @@ struct ExternalBlockStorageBackend -> std::string = 0; virtual ~ExternalBlockStorageBackend(); }; - -struct StdioBuilder -{ - std::string m_directory; - std::optional m_openMode = std::nullopt; - - auto setDirectory(std::string directory) -> StdioBuilder &; - auto setOpenMode(std::string openMode) -> StdioBuilder &; - - operator ExternalBlockStorage(); - auto build() -> ExternalBlockStorage; -}; - -struct AwsBuilder -{ - AwsBuilder( - std::string bucketName, std::string accessKeyId, std::string secretKey); - - enum class Scheme : uint8_t - { - HTTP, - HTTPS - }; - std::string m_bucketName; - std::string m_accessKeyId; - std::string m_secretKey; - std::optional m_sessionToken; - std::initializer_list m_credentials; - std::optional m_endpointOverride; - std::optional m_region; - std::optional m_scheme; - - auto setBucketName(std::string bucketName) -> AwsBuilder &; - auto setCredentials(std::string accessKeyId, std::string secretKey) - -> AwsBuilder &; - auto setSessionToken(std::string sessionToken) -> AwsBuilder &; - auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; - auto setRegion(std::string regionName) -> AwsBuilder &; - auto setScheme(Scheme s) -> AwsBuilder &; - - operator ExternalBlockStorage(); - auto build() -> ExternalBlockStorage; -}; } // namespace openPMD::internal namespace openPMD diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp new file mode 100644 index 0000000000..10a3e724be --- /dev/null +++ b/include/openPMD/toolkit/Stdio.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +namespace openPMD::internal +{ +struct ExternalBlockStorageStdio : ExternalBlockStorageBackend +{ +private: + std::string m_directory; + std::string m_openMode; + +public: + ExternalBlockStorageStdio(std::string directory, std::string openMode); + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; + ~ExternalBlockStorageStdio() override; +}; +} // namespace openPMD::internal diff --git a/include/openPMD/toolkit/StdioBuilder.hpp b/include/openPMD/toolkit/StdioBuilder.hpp new file mode 100644 index 0000000000..7d93048167 --- /dev/null +++ b/include/openPMD/toolkit/StdioBuilder.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct StdioBuilder +{ + std::string m_directory; + std::optional m_openMode = std::nullopt; + + auto setDirectory(std::string directory) -> StdioBuilder &; + auto setOpenMode(std::string openMode) -> StdioBuilder &; + + operator ::openPMD::ExternalBlockStorage(); + auto build() -> ::openPMD::ExternalBlockStorage; +}; +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 3e59ea0260..fc08eadc4a 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,11 +1,11 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" -#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" - #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/toolkit/Stdio.hpp" #include #include From 1bcc35bb27a251cffc2681b014a72e1e5680657d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:00:06 +0200 Subject: [PATCH 09/16] continue restructuring --- CMakeLists.txt | 6 +- src/toolkit/Aws.cpp | 80 +++++++ src/toolkit/AwsBuilder.cpp | 117 ++++++++++ src/toolkit/ExternalBlockStorage.cpp | 321 +-------------------------- src/toolkit/Stdio.cpp | 68 ++++++ src/toolkit/StdioBuilder.cpp | 31 +++ 6 files changed, 305 insertions(+), 318 deletions(-) create mode 100644 src/toolkit/Aws.cpp create mode 100644 src/toolkit/AwsBuilder.cpp create mode 100644 src/toolkit/Stdio.cpp create mode 100644 src/toolkit/StdioBuilder.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e8ac693c6..df9b13f036 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,7 +434,11 @@ set(CORE_SOURCE src/snapshots/RandomAccessIterator.cpp src/snapshots/Snapshots.cpp src/snapshots/StatefulIterator.cpp - src/toolkit/ExternalBlockStorage.cpp) + src/toolkit/ExternalBlockStorage.cpp + src/toolkit/AwsBuilder.cpp + src/toolkit/Aws.cpp + src/toolkit/StdioBuilder.cpp + src/toolkit/Stdio.cpp) set(IO_SOURCE src/IO/AbstractIOHandler.cpp src/IO/AbstractIOHandlerImpl.cpp diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp new file mode 100644 index 0000000000..2f05ec9402 --- /dev/null +++ b/src/toolkit/Aws.cpp @@ -0,0 +1,80 @@ +#include "openPMD/toolkit/Aws.hpp" + +#include +#include + +#include + +namespace +{ +struct membuf : std::streambuf +{ + membuf(char const *base, std::size_t size) + { + auto p = const_cast(base); + this->setg(p, p, p + size); + } +}; + +struct imemstream : std::iostream +{ + imemstream(char const *base, std::size_t size) + : std::iostream(&m_buf), m_buf(base, size) + {} + +private: + membuf m_buf; +}; +} // namespace + +namespace openPMD::internal +{ +ExternalBlockStorageAws::ExternalBlockStorageAws( + Aws::S3::S3Client client, std::string bucketName) + : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) +{ + Aws::S3::Model::CreateBucketRequest create_request; + create_request.SetBucket(m_bucketName); + auto create_outcome = m_client.CreateBucket(create_request); + if (!create_outcome.IsSuccess()) + { + std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " + "Warning: Failed to create bucket (may already exist): " + << create_outcome.GetError().GetMessage() << std::endl; + } + else + { + std::cout << "Bucket created: " << m_bucketName << std::endl; + } +} +ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; + +auto ExternalBlockStorageAws::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + + Aws::S3::Model::PutObjectRequest put_request; + put_request.SetBucket(m_bucketName); + put_request.SetKey(sanitized); + + auto input_data = Aws::MakeShared( + "PutObjectInputStream", reinterpret_cast(data), len); + std::static_pointer_cast(input_data); + + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File uploaded successfully to S3!" << std::endl; + } + else + { + std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() + << std::endl; + } + return sanitized; +} + +} // namespace openPMD::internal diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp new file mode 100644 index 0000000000..13caa1f878 --- /dev/null +++ b/src/toolkit/AwsBuilder.cpp @@ -0,0 +1,117 @@ +#include "openPMD/toolkit/AwsBuilder.hpp" + +#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +#include +#include +#include + +namespace openPMD::internal +{ +AwsBuilder::AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey) + : m_bucketName(std::move(bucketName)) + , m_accessKeyId(std::move(accessKeyId)) + , m_secretKey(std::move(secretKey)) +{} + +auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & +{ + m_bucketName = std::move(bucketName); + return *this; +} + +auto internal::AwsBuilder::setCredentials( + std::string accessKeyId, std::string secretKey) -> AwsBuilder & +{ + m_accessKeyId = std::move(accessKeyId); + m_secretKey = std::move(secretKey); + return *this; +} + +auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & +{ + m_endpointOverride = std::move(endpoint); + return *this; +} + +auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & +{ + m_region = std::move(regionName); + return *this; +} + +auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & +{ + m_scheme = s; + return *this; +} + +auto internal::AwsBuilder::setSessionToken(std::string sessionToken) + -> AwsBuilder & +{ + m_sessionToken = std::move(sessionToken); + return *this; +} + +AwsBuilder::operator ExternalBlockStorage() +{ + Aws::Client::ClientConfiguration config; + + if (m_endpointOverride.has_value()) + { + config.endpointOverride = *m_endpointOverride; + } + if (m_region.has_value()) + { + config.region = *m_region; + } + else + { + config.region = "us-east-1"; + } + if (m_scheme.has_value()) + { + switch (*m_scheme) + { + case Scheme::HTTP: + config.scheme = Aws::Http::Scheme::HTTP; + break; + case Scheme::HTTPS: + config.scheme = Aws::Http::Scheme::HTTPS; + break; + break; + } + } + + config.connectTimeoutMs = 5000; + config.requestTimeoutMs = 15000; + + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { + if (m_sessionToken.has_value()) + { + return {m_accessKeyId, m_secretKey, *m_sessionToken}; + } + else + { + return {m_accessKeyId, m_secretKey}; + } + }(); + + Aws::S3::S3Client s3_client( + aws_credentials, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + + return ExternalBlockStorage{std::make_unique( + std::move(s3_client), std::move(m_bucketName))}; +} + +auto AwsBuilder::build() -> ExternalBlockStorage +{ + return *this; +} + +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index fc08eadc4a..fefaa00858 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,323 +1,19 @@ - #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" -#include "openPMD/auxiliary/Filesystem.hpp" -#include "openPMD/toolkit/Aws.hpp" -#include "openPMD/toolkit/Stdio.hpp" -#include -#include -#include -#include -#include -#include +#include -#include -#include -#include #include #include #include -#include -#include -#include - -namespace -{ -auto concat_filepath(std::string const &s1, std::string const &s2) - -> std::string -{ - if (s1.empty()) - { - return s2; - } - if (s2.empty()) - { - return s1; - } - bool ends_with_slash = - *s1.crbegin() == openPMD::auxiliary::directory_separator; - bool starts_with_slash = - *s2.cbegin() == openPMD::auxiliary::directory_separator; - - if (ends_with_slash ^ starts_with_slash) - { - return s1 + s2; - } - else if (ends_with_slash && starts_with_slash) - { - return s1 + (s2.c_str() + 1); - } - else - { - return s1 + openPMD::auxiliary::directory_separator + s2; - } -} -} // namespace namespace openPMD::internal { ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; - -ExternalBlockStorageStdio::ExternalBlockStorageStdio( - std::string directory, std::string openMode) - : m_directory(std::move(directory)), m_openMode(std::move(openMode)) -{ - if (m_directory.empty()) - { - throw std::invalid_argument( - "ExternalBlockStorageStdio: directory cannot be empty"); - } - - // Ensure the directory exists and is writable - if (!auxiliary::create_directories(m_directory)) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to create or access " - "directory: " + - m_directory); - } -} - -ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; - -auto ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) -> std::string -{ - auto sanitized = identifier + ".dat"; - ExternalBlockStorage::sanitizeString(sanitized); - std::string filepath = concat_filepath(m_directory, sanitized); - - if (len == 0) - { - return filepath; - } - - FILE *file = std::fopen(filepath.c_str(), "wb"); - if (!file) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to open file for writing: " + - filepath); - } - - size_t written = std::fwrite(data, 1, len, file); - if (written != len) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to write full data to file: " + - filepath); - } - - if (std::fclose(file) != 0) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to close file after writing: " + - filepath); - } - - return filepath; -} - -auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & -{ - m_directory = std::move(directory); - return *this; -} -auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & -{ - m_openMode = std::move(openMode); - return *this; } -ExternalBlockStorageAws::ExternalBlockStorageAws( - Aws::S3::S3Client client, std::string bucketName) - : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) -{ - Aws::S3::Model::CreateBucketRequest create_request; - create_request.SetBucket(m_bucketName); - auto create_outcome = m_client.CreateBucket(create_request); - if (!create_outcome.IsSuccess()) - { - std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " - "Warning: Failed to create bucket (may already exist): " - << create_outcome.GetError().GetMessage() << std::endl; - } - else - { - std::cout << "Bucket created: " << m_bucketName << std::endl; - } -} -ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; - -namespace -{ - struct membuf : std::streambuf - { - membuf(char const *base, std::size_t size) - { - // hm hm - auto p = const_cast(base); - this->setg(p, p, p + size); // setup get area - } - }; - - struct imemstream : std::iostream - { - imemstream(char const *base, std::size_t size) - : std::iostream(&m_buf), m_buf(base, size) - {} - - private: - membuf m_buf; - }; -} // namespace - -auto ExternalBlockStorageAws::put( - std::string const &identifier, void const *data, size_t len) -> std::string -{ - auto sanitized = identifier; - ExternalBlockStorage::sanitizeString(sanitized); - - Aws::S3::Model::PutObjectRequest put_request; - put_request.SetBucket(m_bucketName); - put_request.SetKey(sanitized); - - auto input_data = Aws::MakeShared( - "PutObjectInputStream", reinterpret_cast(data), len); - std::static_pointer_cast(input_data); - - auto put_outcome = m_client.PutObject(put_request); - - if (put_outcome.IsSuccess()) - { - std::cout << "File uploaded successfully to S3!" << std::endl; - } - else - { - std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() - << std::endl; - } - return sanitized; -} - -AwsBuilder::AwsBuilder( - std::string bucketName, std::string accessKeyId, std::string secretKey) - : m_bucketName(std::move(bucketName)) - , m_accessKeyId(std::move(accessKeyId)) - , m_secretKey(std::move(secretKey)) -{} - -auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & -{ - m_bucketName = std::move(bucketName); - return *this; -} - -auto internal::AwsBuilder::setCredentials( - std::string accessKeyId, std::string secretKey) -> AwsBuilder & -{ - m_accessKeyId = std::move(accessKeyId); - m_secretKey = std::move(secretKey); - return *this; -} - -auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & -{ - m_endpointOverride = std::move(endpoint); - return *this; -} - -auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & -{ - m_region = std::move(regionName); - return *this; -} - -auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & -{ - m_scheme = s; - return *this; -} - -auto internal::AwsBuilder::setSessionToken(std::string sessionToken) - -> AwsBuilder & -{ - m_sessionToken = std::move(sessionToken); - return *this; -} - -StdioBuilder::operator ExternalBlockStorage() -{ - return ExternalBlockStorage{std::make_unique( - std::move(m_directory), std::move(m_openMode).value_or("wb"))}; -} - -auto StdioBuilder::build() -> ExternalBlockStorage -{ - return *this; -} - -AwsBuilder::operator ExternalBlockStorage() -{ - Aws::Client::ClientConfiguration config; - - if (m_endpointOverride.has_value()) - { - config.endpointOverride = *m_endpointOverride; - } - if (m_region.has_value()) - { - config.region = *m_region; - } - else - { - config.region = "us-east-1"; - } - if (m_scheme.has_value()) - { - switch (*m_scheme) - { - case Scheme::HTTP: - config.scheme = Aws::Http::Scheme::HTTP; - break; - case Scheme::HTTPS: - config.scheme = Aws::Http::Scheme::HTTPS; - break; - break; - } - } - - // default timeout - config.connectTimeoutMs = 5000; - config.requestTimeoutMs = 15000; - - auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { - if (m_sessionToken.has_value()) - { - return {m_accessKeyId, m_secretKey, *m_sessionToken}; - } - else - { - return {m_accessKeyId, m_secretKey}; - } - }(); - - // Create the S3 client - Aws::S3::S3Client s3_client( - aws_credentials, - config, - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - false); - - // Create the AWS storage backend - return ExternalBlockStorage{std::make_unique( - std::move(s3_client), std::move(m_bucketName))}; -} -} // namespace openPMD::internal - namespace openPMD { ExternalBlockStorage::ExternalBlockStorage() = default; @@ -349,17 +45,8 @@ auto ExternalBlockStorage::store( nlohmann::json::json_pointer const &path, T const *data) -> std::string { - // JSON Identifier: running counter. - // Do not use an array to avoid reindexing upon deletion. - - // Filesystem Identifier: JSON path + running counter. - - // For each externally handled data block, store: - // 1. Filesystem identifier - // 2. Offset, Extent auto &dataset = fullJsonDataset[path]; - // running_index denotes the last *used* block index in the dataset using running_index_t = uint64_t; running_index_t running_index = [&]() -> running_index_t { if (auto it = dataset.find("_running_index"); it != dataset.end()) @@ -421,8 +108,8 @@ auto ExternalBlockStorage::store( { throw std::runtime_error("Inconsistent chunk storage in datatype."); } - check_metadata("_byte_width", sizeof(T)); - check_metadata("_extent", globalExtent); + check_metadata("byte_width", sizeof(T)); + check_metadata("extent", globalExtent); auto &block = dataset[index_as_str]; block["offset"] = blockOffset; @@ -443,7 +130,6 @@ auto ExternalBlockStorage::store( void ExternalBlockStorage::sanitizeString(std::string &s) { - // Replace invalid characters with underscore for (char &c : s) { if (c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || @@ -467,4 +153,5 @@ void ExternalBlockStorage::sanitizeString(std::string &s) OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) #undef OPENPMD_INSTANTIATE + } // namespace openPMD diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp new file mode 100644 index 0000000000..dac83d35c9 --- /dev/null +++ b/src/toolkit/Stdio.cpp @@ -0,0 +1,68 @@ +#include "openPMD/toolkit/Stdio.hpp" + +#include "openPMD/auxiliary/Filesystem.hpp" + +#include +#include + +namespace openPMD::internal +{ +ExternalBlockStorageStdio::ExternalBlockStorageStdio( + std::string directory, std::string openMode) + : m_directory(std::move(directory)), m_openMode(std::move(openMode)) +{ + if (m_directory.empty()) + { + throw std::invalid_argument( + "ExternalBlockStorageStdio: directory cannot be empty"); + } + + if (!auxiliary::create_directories(m_directory)) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to create or access " + "directory: " + + m_directory); + } +} + +ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; + +auto ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier + ".dat"; + ExternalBlockStorage::sanitizeString(sanitized); + std::string filepath = m_directory + "/" + sanitized; + + if (len == 0) + { + return filepath; + } + + FILE *file = std::fopen(filepath.c_str(), "wb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for writing: " + + filepath); + } + + size_t written = std::fwrite(data, 1, len, file); + if (written != len) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to write full data to file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after writing: " + + filepath); + } + + return filepath; +} +} // namespace openPMD::internal diff --git a/src/toolkit/StdioBuilder.cpp b/src/toolkit/StdioBuilder.cpp new file mode 100644 index 0000000000..8fa5f6bb6f --- /dev/null +++ b/src/toolkit/StdioBuilder.cpp @@ -0,0 +1,31 @@ +#include "openPMD/toolkit/StdioBuilder.hpp" + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/toolkit/Stdio.hpp" + +#include + +namespace openPMD::internal +{ +auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & +{ + m_directory = std::move(directory); + return *this; +} +auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +{ + m_openMode = std::move(openMode); + return *this; +} + +StdioBuilder::operator ExternalBlockStorage() +{ + return ExternalBlockStorage{std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +auto StdioBuilder::build() -> ExternalBlockStorage +{ + return *this; +} +} // namespace openPMD::internal From 25ec92d4eb50765b4584d9f7c51dc8c3f6b0cf2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:25:49 +0200 Subject: [PATCH 10/16] Some first little MPI awareness --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 1 + src/IO/JSON/JSONIOHandlerImpl.cpp | 157 ++++++++++-------- src/toolkit/ExternalBlockStorage.cpp | 9 +- 3 files changed, 100 insertions(+), 67 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f795b34eb3..f1b5b83671 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -68,6 +68,7 @@ class ExternalBlockStorage Extent blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, + std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; static void sanitizeString(std::string &s); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 10e09add1b..2550a7f02a 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1148,6 +1148,30 @@ void JSONIOHandlerImpl::deleteAttribute( namespace { + template + auto + write_rank_to_stream_with_sufficient_padding(Stream &s, int rank, int size) + -> Stream & + { + auto num_digits = [](unsigned n) -> unsigned { + constexpr auto max = std::numeric_limits::max(); + unsigned base_10 = 1; + unsigned res = 1; + while (base_10 < max) + { + base_10 *= 10; + if (n / base_10 == 0) + { + return res; + } + ++res; + } + return res; + }; + s << std::setw(num_digits(size - 1)) << std::setfill('0') << rank; + return s; + } + struct StoreExternally { template @@ -1179,7 +1203,22 @@ void JSONIOHandlerImpl::writeDataset( { case DatasetMode::Dataset: break; - case DatasetMode::Template: + case DatasetMode::Template: { + std::optional rankInfix; +#if openPMD_HAVE_MPI + if (m_communicator.has_value()) + { + auto &comm = *m_communicator; + // TODO maybe cache the result for this computation + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream s; + s << "r"; + write_rank_to_stream_with_sufficient_padding(s, rank, size); + rankInfix = s.str(); + } +#endif switchDatasetType( parameters.dtype, externalBlockStorage, @@ -1188,7 +1227,8 @@ void JSONIOHandlerImpl::writeDataset( parameters.offset, parameters.extent, jsonRoot, - filePosition->id); + filePosition->id, + std::move(rankInfix)); // if (!m_datasetMode.m_skipWarnings) // { // std::cerr @@ -1199,6 +1239,7 @@ void JSONIOHandlerImpl::writeDataset( // } return; } + } switchType(parameters.dtype, j, parameters); @@ -2146,53 +2187,37 @@ auto JSONIOHandlerImpl::putJsonContents( }; #if openPMD_HAVE_MPI - auto num_digits = [](unsigned n) -> unsigned { - constexpr auto max = std::numeric_limits::max(); - unsigned base_10 = 1; - unsigned res = 1; - while (base_10 < max) + auto parallelImplementation = [this, &filename, &writeSingleFile]( + MPI_Comm comm) { + auto path = fullPath(*filename); + auto dirpath = path + ".parallel"; + if (!auxiliary::create_directories(dirpath)) { - base_10 *= 10; - if (n / base_10 == 0) - { - return res; - } - ++res; + throw std::runtime_error( + "Failed creating directory '" + dirpath + + "' for parallel JSON output"); } - return res; - }; - - auto parallelImplementation = - [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) { - auto path = fullPath(*filename); - auto dirpath = path + ".parallel"; - if (!auxiliary::create_directories(dirpath)) - { - throw std::runtime_error( - "Failed creating directory '" + dirpath + - "' for parallel JSON output"); - } - int rank = 0, size = 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream subfilePath; - // writeSingleFile will prepend the base dir - subfilePath << *filename << ".parallel/mpi_rank_" - << std::setw(num_digits(size - 1)) << std::setfill('0') - << rank << [&]() { - switch (m_fileFormat) - { - case FileFormat::Json: - return ".json"; - case FileFormat::Toml: - return ".toml"; - } - throw std::runtime_error("Unreachable!"); - }(); - writeSingleFile(subfilePath.str()); - if (rank == 0) - { - constexpr char const *readme_msg = R"( + int rank = 0, size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream subfilePath; + // writeSingleFile will prepend the base dir + subfilePath << *filename << ".parallel/mpi_rank_"; + write_rank_to_stream_with_sufficient_padding(subfilePath, rank, size) + << [&]() { + switch (m_fileFormat) + { + case FileFormat::Json: + return ".json"; + case FileFormat::Toml: + return ".toml"; + } + throw std::runtime_error("Unreachable!"); + }(); + writeSingleFile(subfilePath.str()); + if (rank == 0) + { + constexpr char const *readme_msg = R"( This folder has been created by a parallel instance of the JSON backend in openPMD. There is one JSON file for each parallel writer MPI rank. The parallel JSON backend performs no metadata or data aggregation at all. @@ -2202,26 +2227,26 @@ There is no support in the openPMD-api for reading this folder as a single dataset. For reading purposes, either pick a single .json file and read that, or merge the .json files somehow (no tooling provided for this (yet)). )"; - std::fstream readme_file; - readme_file.open( - dirpath + "/README.txt", - std::ios_base::out | std::ios_base::trunc); - readme_file << readme_msg + 1; - readme_file.close(); - if (!readme_file.good() && - !filename.fileState->printedReadmeWarningAlready) - { - std::cerr - << "[Warning] Something went wrong in trying to create " - "README file at '" - << dirpath - << "/README.txt'. Will ignore and continue. The README " - "message would have been:\n----------\n" - << readme_msg + 1 << "----------" << std::endl; - filename.fileState->printedReadmeWarningAlready = true; - } + std::fstream readme_file; + readme_file.open( + dirpath + "/README.txt", + std::ios_base::out | std::ios_base::trunc); + readme_file << readme_msg + 1; + readme_file.close(); + if (!readme_file.good() && + !filename.fileState->printedReadmeWarningAlready) + { + std::cerr + << "[Warning] Something went wrong in trying to create " + "README file at '" + << dirpath + << "/README.txt'. Will ignore and continue. The README " + "message would have been:\n----------\n" + << readme_msg + 1 << "----------" << std::endl; + filename.fileState->printedReadmeWarningAlready = true; } - }; + } + }; std::shared_ptr res; if (m_communicator.has_value()) diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index fefaa00858..2a0f4fc683 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -43,6 +43,7 @@ auto ExternalBlockStorage::store( Extent blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, + std::optional infix, T const *data) -> std::string { auto &dataset = fullJsonDataset[path]; @@ -115,7 +116,12 @@ auto ExternalBlockStorage::store( block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; - filesystem_identifier << path.to_string() << "--" << index_as_str; + filesystem_identifier << path.to_string(); + if (infix.has_value()) + { + filesystem_identifier << "--" << *infix; + } + filesystem_identifier << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, @@ -148,6 +154,7 @@ void ExternalBlockStorage::sanitizeString(std::string &s) Extent blockExtent, \ nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ + std::optional infix, \ type const *data) -> std::string; #define OPENPMD_INSTANTIATE(type) \ OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) From 942e214bd952414f880a353300fab36519f5be40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:59:45 +0200 Subject: [PATCH 11/16] WIP: Config for external block storage from JSON --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 132 ++++-- .../openPMD/toolkit/ExternalBlockStorage.hpp | 5 + src/IO/JSON/JSONIOHandlerImpl.cpp | 445 ++++++++++-------- 3 files changed, 341 insertions(+), 241 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index f8f3cc49c0..2482e30e6d 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -33,6 +33,7 @@ #include #include +#include #if openPMD_HAVE_MPI #include #endif @@ -196,6 +197,15 @@ struct JsonDatatypeHandling namespace openPMD { +namespace dataset_mode_types +{ + struct Dataset_t + {}; + struct Template_t + {}; + using External_t = std::shared_ptr; +} // namespace dataset_mode_types + class JSONIOHandlerImpl : public AbstractIOHandlerImpl { using json = nlohmann::json; @@ -218,8 +228,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif - ExternalBlockStorage externalBlockStorage; - void init(openPMD::json::TracingJSON config); ~JSONIOHandlerImpl() override; @@ -286,42 +294,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::future flush(); -private: -#if openPMD_HAVE_MPI - std::optional m_communicator; -#endif - - using FILEHANDLE = std::fstream; - - // map each Writable to its associated file - // contains only the filename, without the OS path - std::unordered_map m_files; - - std::unordered_map> m_jsonVals; - - // files that have logically, but not physically been written to - std::unordered_set m_dirty; - - /* - * Is set by constructor. - */ - FileFormat m_fileFormat{}; - - /* - * Under which key do we find the backend configuration? - * -> "json" for the JSON backend, "toml" for the TOML backend. - */ - std::string backendConfigKey() const; - - /* - * First return value: The location of the JSON value (either "json" or - * "toml") Second return value: The value that was maybe found at this place - */ - std::pair> - getBackendConfig(openPMD::json::TracingJSON &) const; - - std::string m_originalExtension; - /* * Was the config value explicitly user-chosen, or are we still working with * defaults? @@ -336,17 +308,36 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // Dataset IO mode // ///////////////////// - enum class DatasetMode + struct DatasetMode + : std::variant< + dataset_mode_types::Dataset_t, + dataset_mode_types::Template_t, + dataset_mode_types::External_t> { - Dataset, - Template + using Dataset_t = dataset_mode_types::Dataset_t; + using Template_t = dataset_mode_types::Template_t; + using External_t = dataset_mode_types::External_t; + constexpr static Dataset_t Dataset{}; + constexpr static Template_t Template{}; + + using variant_t = std::variant< + dataset_mode_types::Dataset_t, + dataset_mode_types::Template_t, + External_t>; + using variant_t ::operator=; + + // casts needed because of + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943 + inline auto as_base() const -> variant_t const & + { + return *this; + } + inline auto as_base() -> variant_t & + { + return *this; + } }; - // IOMode m_mode{}; - // SpecificationVia m_IOModeSpecificationVia = - // SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready - // = false; - struct DatasetMode_s { // Initialized in init() @@ -361,8 +352,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl m_mode, m_specificationVia, m_skipWarnings}; } }; - DatasetMode_s m_datasetMode; - DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; /////////////////////// // Attribute IO mode // @@ -381,8 +370,50 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl AttributeMode m_mode{}; SpecificationVia m_specificationVia = SpecificationVia::DefaultValue; }; - AttributeMode_s m_attributeMode; +private: +#if openPMD_HAVE_MPI + std::optional m_communicator; +#endif + + using FILEHANDLE = std::fstream; + + // map each Writable to its associated file + // contains only the filename, without the OS path + std::unordered_map m_files; + + std::unordered_map> m_jsonVals; + + // files that have logically, but not physically been written to + std::unordered_set m_dirty; + + /* + * Is set by constructor. + */ + FileFormat m_fileFormat{}; + + /* + * Under which key do we find the backend configuration? + * -> "json" for the JSON backend, "toml" for the TOML backend. + */ + std::string backendConfigKey() const; + + /* + * First return value: The location of the JSON value (either "json" or + * "toml") Second return value: The value that was maybe found at this place + */ + std::pair> + getBackendConfig(openPMD::json::TracingJSON &) const; + static std::pair> + getBackendConfig( + openPMD::json::TracingJSON &, std::string const &configLocation); + + std::string m_originalExtension; + + DatasetMode_s m_datasetMode; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + + AttributeMode_s m_attributeMode; AttributeMode_s retrieveAttributeMode(openPMD::json::TracingJSON &config) const; @@ -432,7 +463,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static std::pair getExtent(nlohmann::json &j); + static std::pair + getExtent(nlohmann::json &j, DatasetMode const &baseMode); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f1b5b83671..7a26647970 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -71,6 +71,11 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; + auto externalStorageLocation() const -> nlohmann::json + { + return "implement me"; + } + static void sanitizeString(std::string &s); }; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 2550a7f02a..9656a271d0 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -31,6 +31,7 @@ #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" +#include "openPMD/auxiliary/Variant.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" @@ -43,6 +44,7 @@ #include #include #include +#include namespace openPMD { @@ -257,15 +259,74 @@ namespace internal } } // namespace internal +namespace +{ + void parse_internal_mode( + nlohmann::json const &mode_j, + std::string const &configLocation, + JSONIOHandlerImpl::DatasetMode_s &res) + { + using DatasetMode = JSONIOHandlerImpl::DatasetMode; + using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + + DatasetMode &ioMode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; + bool &skipWarnings = res.m_skipWarnings; + + auto modeOption = openPMD::json::asLowerCaseStringDynamic(mode_j); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "dataset") + { + ioMode = DatasetMode::Dataset; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template_no_warn") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + skipWarnings = true; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "dataset", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'dataset' and 'template'."); + } + } + void parse_external_mode( + [[maybe_unused]] json::TracingJSON mode, + [[maybe_unused]] std::string const &configLocation, + JSONIOHandlerImpl::DatasetMode_s &res) + { + using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + + res.m_mode = std::make_shared( + ExternalBlockStorage::makeStdioSession("./external_blocks")); + res.m_specificationVia = SpecificationVia::Manually; + } +} // namespace + auto JSONIOHandlerImpl::retrieveDatasetMode( openPMD::json::TracingJSON &config) const -> DatasetMode_s { // start with / copy from current config auto res = m_datasetMode; - DatasetMode &ioMode = res.m_mode; - SpecificationVia &specificationVia = res.m_specificationVia; - bool &skipWarnings = res.m_skipWarnings; - if (auto [configLocation, maybeConfig] = getBackendConfig(config); + + if (auto [configLocation, maybeConfig] = + getBackendConfig(config, backendConfigKey()); maybeConfig.has_value()) { auto jsonConfig = maybeConfig.value(); @@ -274,38 +335,14 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto datasetConfig = jsonConfig["dataset"]; if (datasetConfig.json().contains("mode")) { - auto modeOption = openPMD::json::asLowerCaseStringDynamic( - datasetConfig["mode"].json()); - if (!modeOption.has_value()) - { - throw error::BackendConfigSchema( - {configLocation, "mode"}, - "Invalid value of non-string type (accepted values are " - "'dataset' and 'template'."); - } - auto mode = modeOption.value(); - if (mode == "dataset") - { - ioMode = DatasetMode::Dataset; - specificationVia = SpecificationVia::Manually; - } - else if (mode == "template") - { - ioMode = DatasetMode::Template; - specificationVia = SpecificationVia::Manually; - } - else if (mode == "template_no_warn") + auto mode = datasetConfig["mode"]; + if (mode.json().is_object()) { - ioMode = DatasetMode::Template; - specificationVia = SpecificationVia::Manually; - skipWarnings = true; + parse_external_mode(std::move(mode), configLocation, res); } else { - throw error::BackendConfigSchema( - {configLocation, "dataset", "mode"}, - "Invalid value: '" + mode + - "' (accepted values are 'dataset' and 'template'."); + parse_internal_mode(mode.json(), configLocation, res); } } } @@ -377,7 +414,13 @@ std::string JSONIOHandlerImpl::backendConfigKey() const std::pair> JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const { - std::string configLocation = backendConfigKey(); + return getBackendConfig(config, backendConfigKey()); +} + +std::pair> +JSONIOHandlerImpl::getBackendConfig( + openPMD::json::TracingJSON &config, std::string const &configLocation) +{ if (config.json().contains(configLocation)) { return std::make_pair( @@ -444,9 +487,6 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) (void)_; warnUnusedJson(backendConfig.value()); } - - externalBlockStorage = - ExternalBlockStorage::makeStdioSession("./external_blocks/"); } JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; @@ -642,47 +682,51 @@ void JSONIOHandlerImpl::createDataset( auto &dset = jsonVal[name]; dset["datatype"] = internal::jsonDatatypeToString(parameter.dtype); - switch (localMode) - { - case DatasetMode::Dataset: { - auto extent = parameter.extent; - switch (parameter.dtype) - { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; - } - default: - break; - } - if (parameter.extent.size() != 1 || - parameter.extent[0] != Dataset::UNDEFINED_EXTENT) - { - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : parameter.dtype); - } - break; - } - case DatasetMode::Template: - if (parameter.extent != Extent{0} && - parameter.extent[0] != Dataset::UNDEFINED_EXTENT) - { - dset["extent"] = parameter.extent; - } - else - { - // no-op - // If extent is empty or no datatype is defined, don't bother - // writing it. - // The datatype is written above anyway. - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + auto extent = parameter.extent; + switch (parameter.dtype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + break; + } + if (parameter.extent.size() != 1 || + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json + ? std::optional{} + : parameter.dtype); + } + }, + [&](DatasetMode::Template_t const &) { + if (parameter.extent != Extent{0} && + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + dset["extent"] = parameter.extent; + } + else + { + // no-op + // If extent is empty or no datatype is defined, don't + // bother writing it. The datatype is written above + // anyway. + } + }, + [&](DatasetMode::External_t const &) { + dset["extent"] = parameter.extent; + }}, + localMode.as_base()); + writable->written = true; m_dirty.emplace(file); } @@ -732,7 +776,8 @@ void JSONIOHandlerImpl::extendDataset( try { Extent datasetExtent; - std::tie(datasetExtent, localIOMode) = getExtent(j); + std::tie(datasetExtent, localIOMode) = + getExtent(j, m_datasetMode.m_mode); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Cannot change dimensionality of a dataset") @@ -750,38 +795,40 @@ void JSONIOHandlerImpl::extendDataset( "[JSON] The specified location contains no valid dataset"); } - switch (localIOMode) - { - case DatasetMode::Dataset: { - auto extent = parameters.extent; - auto datatype = stringToDatatype(j["datatype"].get()); - switch (datatype) - { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; - } - default: - // nothing to do - break; - } - // TOML does not support nulls, so initialize with zero - nlohmann::json newData = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : datatype); - nlohmann::json &oldData = j["data"]; - mergeInto(newData, oldData); - j["data"] = newData; - } - break; - case DatasetMode::Template: { - j["extent"] = parameters.extent; - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + auto extent = parameters.extent; + auto datatype = + stringToDatatype(j["datatype"].get()); + switch (datatype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + // nothing to do + break; + } + // TOML does not support nulls, so initialize with zero + nlohmann::json newData = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : datatype); + nlohmann::json &oldData = j["data"]; + mergeInto(newData, oldData); + j["data"] = newData; + }, + [&](DatasetMode::Template_t const &) { + j["extent"] = parameters.extent; + }, + [&](DatasetMode::External_t const &) { + j["extent"] = parameters.extent; + }}, + localIOMode.as_base()); writable->written = true; } @@ -977,7 +1024,7 @@ void JSONIOHandlerImpl::openDataset( *parameters.dtype = Datatype(stringToDatatype(datasetJson["datatype"].get())); - *parameters.extent = getExtent(datasetJson).first; + *parameters.extent = getExtent(datasetJson, m_datasetMode.m_mode).first; writable->written = true; } @@ -1199,49 +1246,49 @@ void JSONIOHandlerImpl::writeDataset( auto &jsonRoot = *obtainJsonContents(file); auto &j = jsonRoot[filePosition->id]; - switch (verifyDataset(parameters, j)) - { - case DatasetMode::Dataset: - break; - case DatasetMode::Template: { - std::optional rankInfix; + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + switchType(parameters.dtype, j, parameters); + }, + [&](DatasetMode::Template_t const &) { + if (!m_datasetMode.m_skipWarnings) + { + std::cerr << "[JSON/TOML backend: Warning] Trying to write " + "data to a " + "template dataset. Will skip." + << '\n'; + m_datasetMode.m_skipWarnings = true; + } + }, + [&](DatasetMode::External_t const &external) { + std::optional rankInfix; #if openPMD_HAVE_MPI - if (m_communicator.has_value()) - { - auto &comm = *m_communicator; - // TODO maybe cache the result for this computation - int rank, size; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream s; - s << "r"; - write_rank_to_stream_with_sufficient_padding(s, rank, size); - rankInfix = s.str(); - } + if (m_communicator.has_value()) + { + auto &comm = *m_communicator; + // TODO maybe cache the result for this computation + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream s; + s << "r"; + write_rank_to_stream_with_sufficient_padding(s, rank, size); + rankInfix = s.str(); + } #endif - switchDatasetType( - parameters.dtype, - externalBlockStorage, - parameters.data.get(), - j.at("extent").get(), - parameters.offset, - parameters.extent, - jsonRoot, - filePosition->id, - std::move(rankInfix)); - // if (!m_datasetMode.m_skipWarnings) - // { - // std::cerr - // << "[JSON/TOML backend: Warning] Trying to write data to a " - // "template dataset. Will skip." - // << '\n'; - // m_datasetMode.m_skipWarnings = true; - // } - return; - } - } - - switchType(parameters.dtype, j, parameters); + switchDatasetType( + parameters.dtype, + *external, + parameters.data.get(), + j.at("extent").get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id, + std::move(rankInfix)); + }}, + verifyDataset(parameters, j).as_base()); writable->written = true; } @@ -1320,30 +1367,35 @@ void JSONIOHandlerImpl::readDataset( auto &j = obtainJsonContents(writable); DatasetMode localMode = verifyDataset(parameters, j); - switch (localMode) - { - case DatasetMode::Template: - std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " - "backend. Will fill with zeroes instead." - << '\n'; - switchNonVectorType( - parameters.dtype, parameters.data.get(), parameters.extent); - return; - case DatasetMode::Dataset: - try - { - switchType(parameters.dtype, j["data"], parameters); - } - catch (json::basic_json::type_error &) - { - throw error::ReadError( - error::AffectedObject::Dataset, - error::Reason::UnexpectedContent, - "JSON", - "The given path does not contain a valid dataset."); - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + try + { + switchType( + parameters.dtype, j["data"], parameters); + } + catch (json::basic_json::type_error &) + { + throw error::ReadError( + error::AffectedObject::Dataset, + error::Reason::UnexpectedContent, + "JSON", + "The given path does not contain a valid dataset."); + } + }, + [&](DatasetMode::Template_t const &) { + std::cerr + << "[Warning] Cannot read chunks in Template mode of JSON " + "backend. Will fill with zeroes instead." + << '\n'; + switchNonVectorType( + parameters.dtype, parameters.data.get(), parameters.extent); + }, + [&](DatasetMode::External_t const &) { + throw std::runtime_error("Unimplemented"); + }}, + localMode.as_base()); } namespace @@ -1865,7 +1917,8 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) return res; } -auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) +auto JSONIOHandlerImpl::getExtent( + nlohmann::json &j, DatasetMode const &baseMode) -> std::pair { Extent res; @@ -1894,7 +1947,10 @@ auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) } else if (j.contains("extent")) { - ioMode = DatasetMode::Template; + ioMode = + std::holds_alternative(baseMode.as_base()) + ? baseMode + : DatasetMode{DatasetMode::Template}; res = j["extent"].get(); } else @@ -2137,18 +2193,25 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - switch (m_datasetMode.m_mode) - { - case DatasetMode::Dataset: - (*it->second)["platform_byte_widths"] = platformSpecifics(); - (*it->second)[JSONDefaults::openpmd_internal] - [JSONDefaults::DatasetMode] = "dataset"; - break; - case DatasetMode::Template: - (*it->second)[JSONDefaults::openpmd_internal] - [JSONDefaults::DatasetMode] = "template"; - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "dataset"; + }, + [&](DatasetMode::Template_t const &) { + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "template"; + }, + [&](DatasetMode::External_t const &external) { + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)["external_storage"] = + external->externalStorageLocation(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "external"; + }}, + m_datasetMode.m_mode.as_base()); switch (m_attributeMode.m_mode) { @@ -2377,7 +2440,7 @@ auto JSONIOHandlerImpl::verifyDataset( try { Extent datasetExtent; - std::tie(datasetExtent, res) = getExtent(j); + std::tie(datasetExtent, res) = getExtent(j, m_datasetMode.m_mode); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Read/Write request does not fit the dataset's dimension"); From 353ca21a6fa0ddc9957156cd0a9e059bd9092317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 30 Sep 2025 15:34:00 +0200 Subject: [PATCH 12/16] Add configuration --- src/IO/JSON/JSONIOHandlerImpl.cpp | 116 ++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 9656a271d0..686d8fe641 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -306,15 +306,123 @@ namespace "' (accepted values are 'dataset' and 'template'."); } } + + template + auto optionalOrElse(std::optional o, OrElse &&orElse) -> T + { + if (o.has_value()) + { + return *std::move(o); + } + else + { + return std::forward(orElse)(); + } + } + void parse_external_mode( - [[maybe_unused]] json::TracingJSON mode, - [[maybe_unused]] std::string const &configLocation, + json::TracingJSON mode, + std::string const &configLocation, JSONIOHandlerImpl::DatasetMode_s &res) { using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + using ExternalBlockStorage = openPMD::ExternalBlockStorage; + + auto get_mandatory = [&](char const *key, + bool lowercase) -> std::string { + if (!mode.json().contains(key)) + { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Mandatory key."); + } + auto const &val = mode.json({key}); + return optionalOrElse( + lowercase ? openPMD::json::asLowerCaseStringDynamic(val) + : openPMD::json::asStringDynamic(val), + [&]() -> std::string { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, + "Must be of string type."); + }); + }; + auto if_contains_optional = + [&](char const *key, bool lowercase, auto &&then) { + if (!mode.json().contains(key)) + { + return; + } + auto const &val = mode.json({key}); + static_cast(then)(optionalOrElse( + lowercase ? openPMD::json::asLowerCaseStringDynamic(val) + : openPMD::json::asStringDynamic(val), + [&]() -> std::string { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, + "Must be of string type."); + })); + }; + auto modeString = get_mandatory("type", true); + + if (modeString == "stdio") + { + auto builder = ExternalBlockStorage::makeStdioSession( + get_mandatory("directory", false)); + + if_contains_optional("open_mode", false, [&](std::string openMode) { + builder.setOpenMode(std::move(openMode)); + }); + + res.m_mode = + std::make_shared(builder.build()); + } + else if (modeString == "aws") + { + openPMD::internal::AwsBuilder builder( + get_mandatory("bucket_name", false), + get_mandatory("access_key_id", false), + get_mandatory("secret_access_key", false)); + + if_contains_optional( + "session_token", false, [&](std::string sessionToken) { + builder.setSessionToken(std::move(sessionToken)); + }); + if_contains_optional( + "endpoint", false, [&](std::string endpointOverride) { + builder.setEndpointOverride(std::move(endpointOverride)); + }); + if_contains_optional("region", false, [&](std::string region) { + builder.setRegion(std::move(region)); + }); + if_contains_optional( + "scheme", true, [&](std::string const &scheme) { + if (scheme == "http") + { + builder.setScheme( + openPMD::internal::AwsBuilder::Scheme::HTTP); + } + else if (scheme == "https") + { + builder.setScheme( + openPMD::internal::AwsBuilder::Scheme::HTTPS); + } + else + { + throw error::BackendConfigSchema( + {configLocation, "mode", "scheme"}, + "Must be either 'http' or 'https'."); + } + }); + + res.m_mode = + std::make_shared(builder.build()); + } + else + { + throw error::BackendConfigSchema( + {configLocation, "mode", "type"}, + "Must be either 'stdio' or 'aws'."); + } - res.m_mode = std::make_shared( - ExternalBlockStorage::makeStdioSession("./external_blocks")); res.m_specificationVia = SpecificationVia::Manually; } } // namespace From 7c4a7f1d89fc3fc688ca27e63434acf29cc8bfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 30 Sep 2025 18:31:16 +0200 Subject: [PATCH 13/16] Add option to init AWS API --- include/openPMD/Series.hpp | 3 + src/Series.cpp | 121 ++++++++++++++++++++++++------------- 2 files changed, 83 insertions(+), 41 deletions(-) diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index 841f238006..8da75c9d99 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -35,6 +35,7 @@ #include "openPMD/config.hpp" #include "openPMD/snapshots/Snapshots.hpp" #include "openPMD/version.hpp" +#include #if openPMD_HAVE_MPI #include @@ -238,6 +239,8 @@ namespace internal std::optional> m_deferred_initialization = std::nullopt; + std::optional m_manageAwsAPI = std::nullopt; + void close(); #if openPMD_HAVE_MPI diff --git a/src/Series.cpp b/src/Series.cpp index b1a1b33d3a..881bcde8b2 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -46,6 +46,8 @@ #include "openPMD/snapshots/StatefulIterator.hpp" #include "openPMD/version.hpp" +#include + #include #include #include @@ -948,38 +950,25 @@ void Series::init( } } -template -auto Series::initIOHandler( - std::string const &filepath, - std::string const &options, - Access at, - bool resolve_generic_extension, - MPI_Communicator &&...comm) - -> std::tuple, TracingJSON> +namespace { - auto &series = get(); - - json::TracingJSON optionsJson = json::parseOptions( - options, - std::forward(comm)..., - /* considerFiles = */ true); - auto input = parseInput(filepath); - if (resolve_generic_extension && input->format == Format::GENERIC && - !access::create(at)) + template + void do_resolve_generic_extension_read( + ParsedInput_t &input, std::string const &filepath, Access at) { auto isPartOfSeries = - input->iterationEncoding == IterationEncoding::fileBased + input.iterationEncoding == IterationEncoding::fileBased ? matcher( - input->filenamePrefix, - input->filenamePadding, - input->filenamePostfix, + input.filenamePrefix, + input.filenamePadding, + input.filenamePostfix, std::nullopt) - : matcher(input->name, -1, "", std::nullopt); + : matcher(input.name, -1, "", std::nullopt); std::optional extension; std::set additional_extensions; autoDetectPadding( isPartOfSeries, - input->path, + input.path, [&extension, &additional_extensions](std::string const &, Match const &match) { auto const &ext = match.extension.value(); @@ -1012,8 +1001,8 @@ auto Series::initIOHandler( std::nullopt, error.str()); } - input->filenameExtension = *extension; - input->format = determineFormat(*extension); + input.filenameExtension = *extension; + input.format = determineFormat(*extension); } else if (access::read(at)) { @@ -1025,30 +1014,68 @@ auto Series::initIOHandler( } } + template + void do_resolve_generic_extension_write(ParsedInput_t &input) + { + { + if (input.format == /* still */ Format::GENERIC) + { + throw error::WrongAPIUsage( + "Unable to automatically determine filename extension. " + "Please " + "specify in some way."); + } + else if (input.format == Format::ADIOS2_BP) + { + // Since ADIOS2 has multiple extensions depending on the engine, + // we need to pass this job on to the backend + input.filenameExtension = ".%E"; + } + else + { + input.filenameExtension = suffix(input.format); + } + } + } +} // namespace + +template +auto Series::initIOHandler( + std::string const &filepath, + std::string const &options, + Access at, + bool resolve_generic_extension, + MPI_Communicator &&...comm) + -> std::tuple, TracingJSON> +{ + auto &series = get(); + + json::TracingJSON optionsJson = json::parseOptions( + options, + std::forward(comm)..., + /* considerFiles = */ true); + auto input = parseInput(filepath); + + if (resolve_generic_extension && input->format == Format::GENERIC && + !access::create(at)) + { + do_resolve_generic_extension_read(*input, filepath, at); + } + // default options series.m_parseLazily = at == Access::READ_LINEAR; // now check for user-specified options parseJsonOptions(optionsJson, *input); + if (series.m_manageAwsAPI.has_value()) + { + Aws::InitAPI(*series.m_manageAwsAPI); + } + if (resolve_generic_extension && !input->filenameExtension.has_value()) { - if (input->format == /* still */ Format::GENERIC) - { - throw error::WrongAPIUsage( - "Unable to automatically determine filename extension. Please " - "specify in some way."); - } - else if (input->format == Format::ADIOS2_BP) - { - // Since ADIOS2 has multiple extensions depending on the engine, - // we need to pass this job on to the backend - input->filenameExtension = ".%E"; - } - else - { - input->filenameExtension = suffix(input->format); - } + do_resolve_generic_extension_write(*input); } return std::make_tuple(std::move(input), std::move(optionsJson)); } @@ -3013,6 +3040,14 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input) { series.m_rankTable.m_rankTableSource = std::move(rankTableSource); } + { + bool doManageAwsAPI = false; + getJsonOption(options, "init_aws_api", doManageAwsAPI); + if (doManageAwsAPI) + { + series.m_manageAwsAPI = std::make_optional(); + } + } // backend key { std::map const backendDescriptors{ @@ -3099,6 +3134,10 @@ namespace internal // we must not throw in a destructor try { + if (m_manageAwsAPI.has_value()) + { + Aws::ShutdownAPI(*m_manageAwsAPI); + } close(); } catch (std::exception const &ex) From c82d360a5df8c272e0cedb52fdc05d56d2f6ac3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 14:47:55 +0200 Subject: [PATCH 14/16] Add verifySSL parameter --- include/openPMD/toolkit/AwsBuilder.hpp | 2 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 16 ++++++++++++++++ src/toolkit/AwsBuilder.cpp | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp index 3bb8cef491..4a8ad691b9 100644 --- a/include/openPMD/toolkit/AwsBuilder.hpp +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -30,6 +30,7 @@ struct AwsBuilder std::optional m_endpointOverride; std::optional m_region; std::optional m_scheme; + std::optional m_verifySSL; auto setBucketName(std::string bucketName) -> AwsBuilder &; auto setCredentials(std::string accessKeyId, std::string secretKey) @@ -38,6 +39,7 @@ struct AwsBuilder auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; auto setRegion(std::string regionName) -> AwsBuilder &; auto setScheme(Scheme s) -> AwsBuilder &; + auto setVerifySSL(bool verify) -> AwsBuilder &; operator ::openPMD::ExternalBlockStorage(); auto build() -> ::openPMD::ExternalBlockStorage; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 686d8fe641..8695f5958d 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -361,6 +361,19 @@ namespace "Must be of string type."); })); }; + auto if_contains_optional_bool = [&](char const *key, auto &&then) { + if (!mode.json().contains(key)) + { + return; + } + auto const &val = mode.json({key}); + if (!val.is_boolean()) + { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Must be of boolean type."); + } + static_cast(then)(val.get()); + }; auto modeString = get_mandatory("type", true); if (modeString == "stdio") @@ -393,6 +406,9 @@ namespace if_contains_optional("region", false, [&](std::string region) { builder.setRegion(std::move(region)); }); + if_contains_optional_bool("verify_ssl", [&](bool verifySSL) { + builder.setVerifySSL(verifySSL); + }); if_contains_optional( "scheme", true, [&](std::string const &scheme) { if (scheme == "http") diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index 13caa1f878..cb21fd399a 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -48,6 +48,12 @@ auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & return *this; } +auto AwsBuilder::setVerifySSL(bool verify) -> AwsBuilder & +{ + m_verifySSL = verify; + return *this; +} + auto internal::AwsBuilder::setSessionToken(std::string sessionToken) -> AwsBuilder & { @@ -88,6 +94,11 @@ AwsBuilder::operator ExternalBlockStorage() config.connectTimeoutMs = 5000; config.requestTimeoutMs = 15000; + if (m_verifySSL.has_value()) + { + config.verifySSL = *m_verifySSL; + } + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { if (m_sessionToken.has_value()) { From 359506f0591e0ac7b585e98dd14219602f1eb6e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 14:48:12 +0200 Subject: [PATCH 15/16] Add TODO comment --- src/IO/JSON/JSONIOHandlerImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 8695f5958d..7f953d8ac5 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -391,6 +391,8 @@ namespace else if (modeString == "aws") { openPMD::internal::AwsBuilder builder( + // TODO: bucket_name: introduce expansion pattern for openPMD + // file name get_mandatory("bucket_name", false), get_mandatory("access_key_id", false), get_mandatory("secret_access_key", false)); From c9e0350dbb86b0af5877164a3d824c131404b0c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 15:22:16 +0200 Subject: [PATCH 16/16] Add meta information object --- include/openPMD/toolkit/Aws.hpp | 8 +++- .../openPMD/toolkit/ExternalBlockStorage.hpp | 8 ++-- include/openPMD/toolkit/Stdio.hpp | 2 + src/toolkit/Aws.cpp | 21 ++++++++- src/toolkit/AwsBuilder.cpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 6 +++ src/toolkit/Stdio.cpp | 47 ++++++++++++++++++- 7 files changed, 86 insertions(+), 10 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 737629ec2b..5051fa2fbc 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -11,11 +11,17 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend private: Aws::S3::S3Client m_client; std::string m_bucketName; + std::optional m_endpoint; public: - ExternalBlockStorageAws(Aws::S3::S3Client, std::string bucketName); + ExternalBlockStorageAws( + Aws::S3::S3Client, + std::string bucketName, + std::optional endpoint); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + [[nodiscard]] auto externalStorageLocation() const + -> nlohmann::json override; ~ExternalBlockStorageAws() override; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 7a26647970..6634321809 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -24,6 +24,9 @@ struct ExternalBlockStorageBackend virtual auto put(std::string const &identifier, void const *data, size_t len) -> std::string = 0; + [[nodiscard]] virtual auto externalStorageLocation() const + -> nlohmann::json = 0; + virtual ~ExternalBlockStorageBackend(); }; } // namespace openPMD::internal @@ -71,10 +74,7 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; - auto externalStorageLocation() const -> nlohmann::json - { - return "implement me"; - } + [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; static void sanitizeString(std::string &s); }; diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 10a3e724be..1fb8713b67 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -14,6 +14,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend ExternalBlockStorageStdio(std::string directory, std::string openMode); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + [[nodiscard]] auto externalStorageLocation() const + -> nlohmann::json override; ~ExternalBlockStorageStdio() override; }; } // namespace openPMD::internal diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 2f05ec9402..50aff10007 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -30,8 +30,12 @@ struct imemstream : std::iostream namespace openPMD::internal { ExternalBlockStorageAws::ExternalBlockStorageAws( - Aws::S3::S3Client client, std::string bucketName) - : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) + Aws::S3::S3Client client, + std::string bucketName, + std::optional endpoint) + : m_client{std::move(client)} + , m_bucketName(std::move(bucketName)) + , m_endpoint(std::move(endpoint)) { Aws::S3::Model::CreateBucketRequest create_request; create_request.SetBucket(m_bucketName); @@ -77,4 +81,17 @@ auto ExternalBlockStorageAws::put( return sanitized; } +[[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const + -> nlohmann::json +{ + nlohmann::json j; + j["provider"] = "s3"; + if (m_endpoint.has_value()) + { + j["endpoint"] = *m_endpoint; + } + j["bucket"] = m_bucketName; + return j; +} + } // namespace openPMD::internal diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index cb21fd399a..cc3cdc87ef 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -117,7 +117,9 @@ AwsBuilder::operator ExternalBlockStorage() false); return ExternalBlockStorage{std::make_unique( - std::move(s3_client), std::move(m_bucketName))}; + std::move(s3_client), + std::move(m_bucketName), + std::move(m_endpointOverride))}; } auto AwsBuilder::build() -> ExternalBlockStorage diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 2a0f4fc683..2d29023c9c 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -134,6 +134,12 @@ auto ExternalBlockStorage::store( return index_as_str; } +[[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const + -> nlohmann::json +{ + return m_worker->externalStorageLocation(); +} + void ExternalBlockStorage::sanitizeString(std::string &s) { for (char &c : s) diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index dac83d35c9..c3fecf6f2a 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -5,6 +5,39 @@ #include #include +namespace +{ +auto concat_filepath(std::string const &s1, std::string const &s2) + -> std::string +{ + if (s1.empty()) + { + return s2; + } + if (s2.empty()) + { + return s1; + } + bool ends_with_slash = + *s1.crbegin() == openPMD::auxiliary::directory_separator; + bool starts_with_slash = + *s2.cbegin() == openPMD::auxiliary::directory_separator; + + if (ends_with_slash ^ starts_with_slash) + { + return s1 + s2; + } + else if (ends_with_slash && starts_with_slash) + { + return s1 + (s2.c_str() + 1); + } + else + { + return s1 + openPMD::auxiliary::directory_separator + s2; + } +} +} // namespace + namespace openPMD::internal { ExternalBlockStorageStdio::ExternalBlockStorageStdio( @@ -33,7 +66,7 @@ auto ExternalBlockStorageStdio::put( { auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); - std::string filepath = m_directory + "/" + sanitized; + std::string filepath = concat_filepath(m_directory, sanitized); if (len == 0) { @@ -63,6 +96,16 @@ auto ExternalBlockStorageStdio::put( filepath); } - return filepath; + return sanitized; +} + +[[nodiscard]] auto ExternalBlockStorageStdio::externalStorageLocation() const + -> nlohmann::json +{ + nlohmann::json j; + j["provider"] = "stdio"; + j["directory"] = m_directory; + j["open_mode"] = m_openMode; + return j; } } // namespace openPMD::internal