Skip to content

Commit fa714a9

Browse files
Resizable Datasets (#829)
* Frontend support for resizing datasets * Backend support for resizing datasets * Test resizable datasets
1 parent 2f1798f commit fa714a9

10 files changed

+426
-57
lines changed

include/openPMD/Dataset.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ class Dataset
4040
public:
4141
Dataset(Datatype, Extent, std::string options = "{}");
4242

43+
/**
44+
* @brief Constructor that sets the datatype to undefined.
45+
*
46+
* Helpful for resizing datasets, since datatypes need not be given twice.
47+
*
48+
*/
49+
Dataset( Extent );
50+
4351
Dataset& extend(Extent newExtent);
4452
Dataset& setChunkSize(Extent const&);
4553
Dataset& setCompression(std::string const&, uint8_t const);

include/openPMD/IO/IOTask.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ template<>
281281
struct OPENPMDAPI_EXPORT Parameter< Operation::EXTEND_DATASET > : public AbstractParameter
282282
{
283283
Parameter() = default;
284-
Parameter(Parameter const & p) : AbstractParameter(),
285-
name(p.name), extent(p.extent) {}
284+
Parameter(Parameter const & p) : AbstractParameter(), extent(p.extent) {}
286285

287286
std::unique_ptr< AbstractParameter >
288287
clone() const override
@@ -291,7 +290,6 @@ struct OPENPMDAPI_EXPORT Parameter< Operation::EXTEND_DATASET > : public Abstrac
291290
new Parameter< Operation::EXTEND_DATASET >(*this));
292291
}
293292

294-
std::string name = "";
295293
Extent extent = {};
296294
};
297295

include/openPMD/RecordComponent.hpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,26 @@ class RecordComponent : public BaseRecordComponent
100100

101101
RecordComponent& setUnitSI(double);
102102

103-
RecordComponent& resetDataset(Dataset);
103+
/**
104+
* @brief Declare the dataset's type and extent.
105+
*
106+
* Calling this again after flushing will require resizing the dataset.
107+
* Support for this depends on the backend.
108+
* Unsupported are:
109+
* * Changing the datatype.
110+
* * Shrinking any dimension's extent.
111+
* * Changing the number of dimensions.
112+
*
113+
* Backend support for resizing datasets:
114+
* * JSON: Supported
115+
* * ADIOS1: Unsupported
116+
* * ADIOS2: Supported as of ADIOS2 2.7.0
117+
* * HDF5: (Currently) unsupported.
118+
* Will be probably supported as soon as chunking is supported in HDF5.
119+
*
120+
* @return RecordComponent&
121+
*/
122+
RecordComponent & resetDataset( Dataset );
104123

105124
uint8_t getDimensionality() const;
106125
Extent getExtent() const;
@@ -196,6 +215,10 @@ class RecordComponent : public BaseRecordComponent
196215
std::shared_ptr< std::queue< IOTask > > m_chunks;
197216
std::shared_ptr< Attribute > m_constantValue;
198217
std::shared_ptr< bool > m_isEmpty = std::make_shared< bool >( false );
218+
// User has extended the dataset, but the EXTEND task must yet be flushed
219+
// to the backend
220+
std::shared_ptr< bool > m_hasBeenExtended =
221+
std::make_shared< bool >( false );
199222

200223
private:
201224
void flush(std::string const&);

src/Dataset.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,12 @@ Dataset::Dataset(Datatype d, Extent e, std::string options_in)
3434
options{std::move(options_in)}
3535
{ }
3636

37-
Dataset&
38-
Dataset::extend(Extent newExtents)
37+
Dataset::Dataset( Extent e ) : Dataset( Datatype::UNDEFINED, std::move( e ) )
38+
{
39+
}
40+
41+
Dataset &
42+
Dataset::extend( Extent newExtents )
3943
{
4044
if( newExtents.size() != rank )
4145
throw std::runtime_error("Dimensionality of extended Dataset must match the original dimensionality");

src/IO/ADIOS/ADIOS2IOHandler.cpp

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -403,15 +403,58 @@ void ADIOS2IOHandlerImpl::createDataset(
403403
}
404404
}
405405

406-
void ADIOS2IOHandlerImpl::extendDataset(
407-
Writable *, const Parameter< Operation::EXTEND_DATASET > & )
406+
namespace detail
407+
{
408+
struct DatasetExtender
409+
{
410+
template< typename T, typename... Args >
411+
void
412+
operator()(
413+
adios2::IO & IO,
414+
std::string const & variable,
415+
Extent const & newShape )
416+
{
417+
auto var = IO.InquireVariable< T >( variable );
418+
if( !var )
419+
{
420+
throw std::runtime_error(
421+
"[ADIOS2] Unable to retrieve variable for resizing: '" +
422+
variable + "'." );
423+
}
424+
adios2::Dims dims;
425+
dims.reserve( newShape.size() );
426+
for( auto ext : newShape )
427+
{
428+
dims.push_back( ext );
429+
}
430+
var.SetShape( dims );
431+
}
432+
433+
std::string errorMsg = "ADIOS2: extendDataset()";
434+
};
435+
} // namespace detail
436+
437+
void
438+
ADIOS2IOHandlerImpl::extendDataset(
439+
Writable * writable,
440+
const Parameter< Operation::EXTEND_DATASET > & parameters )
408441
{
409-
throw std::runtime_error(
410-
"[ADIOS2] Dataset extension not implemented in ADIOS backend" );
442+
VERIFY_ALWAYS(
443+
m_handler->m_backendAccess != Access::READ_ONLY,
444+
"[ADIOS2] Cannot extend datasets in read-only mode." );
445+
setAndGetFilePosition( writable );
446+
auto file = refreshFileFromParent( writable );
447+
std::string name = nameOfVariable( writable );
448+
auto & filedata = getFileData( file );
449+
static detail::DatasetExtender de;
450+
Datatype dt = detail::fromADIOS2Type( filedata.m_IO.VariableType( name ) );
451+
switchAdios2VariableType( dt, de, filedata.m_IO, name, parameters.extent );
411452
}
412453

413-
void ADIOS2IOHandlerImpl::openFile(
414-
Writable * writable, const Parameter< Operation::OPEN_FILE > & parameters )
454+
void
455+
ADIOS2IOHandlerImpl::openFile(
456+
Writable * writable,
457+
const Parameter< Operation::OPEN_FILE > & parameters )
415458
{
416459
if ( !auxiliary::directory_exists( m_handler->directory ) )
417460
{

src/IO/HDF5/HDF5IOHandler.cpp

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -346,25 +346,36 @@ HDF5IOHandlerImpl::extendDataset(Writable* writable,
346346
if( !writable->written )
347347
throw std::runtime_error("[HDF5] Extending an unwritten Dataset is not possible.");
348348

349-
auto file = getFile(writable->parent).get();
350-
hid_t node_id, dataset_id;
351-
node_id = H5Gopen(file.id,
352-
concrete_h5_file_position(writable->parent).c_str(),
353-
H5P_DEFAULT);
354-
VERIFY(node_id >= 0, "[HDF5] Internal error: Failed to open HDF5 group during dataset extension");
355-
356-
/* Sanitize name */
357-
std::string name = parameters.name;
358-
if( auxiliary::starts_with(name, '/') )
359-
name = auxiliary::replace_first(name, "/", "");
360-
if( !auxiliary::ends_with(name, '/') )
361-
name += '/';
362-
363-
dataset_id = H5Dopen(node_id,
364-
name.c_str(),
349+
auto res = getFile( writable );
350+
if( !res )
351+
res = getFile( writable->parent );
352+
hid_t dataset_id = H5Dopen(res.get().id,
353+
concrete_h5_file_position(writable).c_str(),
365354
H5P_DEFAULT);
366355
VERIFY(dataset_id >= 0, "[HDF5] Internal error: Failed to open HDF5 dataset during dataset extension");
367356

357+
// Datasets may only be extended if they have chunked layout, so let's see
358+
// whether this one does
359+
{
360+
hid_t dataset_space = H5Dget_space( dataset_id );
361+
int ndims = H5Sget_simple_extent_ndims( dataset_space );
362+
VERIFY(
363+
ndims >= 0,
364+
"[HDF5]: Internal error: Failed to retrieve dimensionality of "
365+
"dataset "
366+
"during dataset read." );
367+
hid_t propertyList = H5Dget_create_plist( dataset_id );
368+
std::vector< hsize_t > chunkExtent( ndims, 0 );
369+
int chunkDimensionality =
370+
H5Pget_chunk( propertyList, ndims, chunkExtent.data() );
371+
if( chunkDimensionality < 0 )
372+
{
373+
throw std::runtime_error(
374+
"[HDF5] Cannot extend datasets unless written with chunked "
375+
"layout (currently unsupported)." );
376+
}
377+
}
378+
368379
std::vector< hsize_t > size;
369380
for( auto const& val : parameters.extent )
370381
size.push_back(static_cast< hsize_t >(val));
@@ -375,8 +386,6 @@ HDF5IOHandlerImpl::extendDataset(Writable* writable,
375386

376387
status = H5Dclose(dataset_id);
377388
VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 dataset during dataset extension");
378-
status = H5Gclose(node_id);
379-
VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 group during dataset extension");
380389
}
381390

382391
void

src/IO/JSON/JSONIOHandlerImpl.cpp

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -248,18 +248,42 @@ namespace openPMD
248248
}
249249
}
250250

251+
namespace
252+
{
253+
void
254+
mergeInto( nlohmann::json & into, nlohmann::json & from );
255+
void
256+
mergeInto( nlohmann::json & into, nlohmann::json & from )
257+
{
258+
if( !from.is_array() )
259+
{
260+
into = from; // copy
261+
}
262+
else
263+
{
264+
size_t size = from.size();
265+
for( size_t i = 0; i < size; ++i )
266+
{
267+
if( !from[ i ].is_null() )
268+
{
269+
mergeInto( into[ i ], from[ i ] );
270+
}
271+
}
272+
}
273+
}
274+
} // namespace
251275

252-
void JSONIOHandlerImpl::extendDataset(
276+
void
277+
JSONIOHandlerImpl::extendDataset(
253278
Writable * writable,
254-
Parameter< Operation::EXTEND_DATASET > const & parameters
255-
)
279+
Parameter< Operation::EXTEND_DATASET > const & parameters )
256280
{
257-
VERIFY_ALWAYS(m_handler->m_backendAccess != Access::READ_ONLY,
281+
VERIFY_ALWAYS(
282+
m_handler->m_backendAccess != Access::READ_ONLY,
258283
"[JSON] Cannot extend a dataset in read-only mode." )
259-
refreshFileFromParent( writable );
260284
setAndGetFilePosition( writable );
261-
auto name = removeSlashes( parameters.name );
262-
auto & j = obtainJsonContents( writable )[name];
285+
refreshFileFromParent( writable );
286+
auto & j = obtainJsonContents( writable );
263287

264288
try
265289
{
@@ -280,25 +304,32 @@ namespace openPMD
280304
}
281305
} catch( json::basic_json::type_error & )
282306
{
283-
throw std::runtime_error( "[JSON] The specified location contains no valid dataset" );
307+
throw std::runtime_error(
308+
"[JSON] The specified location contains no valid dataset" );
284309
}
285310
switch( stringToDatatype( j[ "datatype" ].get< std::string >() ) )
286311
{
287312
case Datatype::CFLOAT:
288313
case Datatype::CDOUBLE:
289314
case Datatype::CLONG_DOUBLE:
290315
{
316+
// @todo test complex resizing
291317
auto complexExtent = parameters.extent;
292318
complexExtent.push_back( 2 );
293-
j["data"] = initializeNDArray( complexExtent );
319+
nlohmann::json newData = initializeNDArray( complexExtent );
320+
nlohmann::json & oldData = j[ "data" ];
321+
mergeInto( newData, oldData );
322+
j[ "data" ] = newData;
294323
break;
295324
}
296325
default:
297-
j["data"] = initializeNDArray( parameters.extent );
326+
nlohmann::json newData = initializeNDArray( parameters.extent );
327+
nlohmann::json & oldData = j[ "data" ];
328+
mergeInto( newData, oldData );
329+
j[ "data" ] = newData;
298330
break;
299331
}
300332
writable->written = true;
301-
302333
}
303334

304335
namespace

0 commit comments

Comments
 (0)