Skip to content

Commit 1a709f6

Browse files
dqwujayeshkrishna
authored andcommitted
Applying a chunking strategy to all non-scalar HDF5 variables
For HDF5 type, all record variables were previously chunked using the full dimension length. With very large dimensions, this could cause dataset creation to fail in the HDF5 library. Introduce a simple, effective chunking strategy inspired by NetCDF4, which now applies to all non-record (non-scalar) variables as well. This ensures reliable dataset creation even for large dimensions.
1 parent aafe37e commit 1a709f6

File tree

1 file changed

+48
-18
lines changed

1 file changed

+48
-18
lines changed

src/clib/pioc_support.cpp

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6939,28 +6939,58 @@ int spio_hdf5_def_var(iosystem_desc_t *ios, file_desc_t *file, const char *name,
69396939

69406940
file->hdf5_vars[varid].hdf5_type = h5_xtype;
69416941

6942-
if(ndims > 0){
6943-
hsize_t cdim[H5S_MAX_RANK];
6942+
if (ndims > 0)
6943+
{
6944+
hsize_t cdim[H5S_MAX_RANK];
6945+
int unlimdim = 0;
6946+
PIO_Offset type_size = spio_get_nc_type_size(xtype);
6947+
size_t suggested_size = 0;
69446948

6945-
for(i = 0; i < ndims; i++){
6946-
cdim[i] = mdims[i];
6947-
}
6949+
/* Count unlimited dimensions */
6950+
for (int d = 0; d < ndims; d++)
6951+
{
6952+
if (dims[d] == PIO_UNLIMITED)
6953+
unlimdim++;
6954+
}
69486955

6949-
if(dims[0] == PIO_UNLIMITED){
6950-
/* Chunk size along rec dim is always 1 */
6951-
cdim[0] = 1;
6952-
}
6956+
assert(unlimdim <= 1);
69536957

6954-
if(H5Pset_chunk(dcpl_id, ndims, cdim) < 0){
6955-
return pio_err(ios, file, PIO_EHDF5ERR, __FILE__, __LINE__,
6956-
"Defining variable (%s, varid = %d) in file (%s, ncid=%d) using HDF5 iotype failed. "
6957-
"The low level (HDF5) I/O library call failed to set the size of the chunks used to store a chunked layout dataset",
6958-
name, varid, pio_get_fname_from_file(file), file->pio_ncid);
6959-
}
6960-
}
6958+
/* Determine base chunk size for fixed dimensions */
6959+
if (ndims > unlimdim)
6960+
{
6961+
double target_elems = (double)(PIO_CHUNK_SIZE) / (double)type_size;
6962+
suggested_size = (size_t)pow(target_elems, 1.0 / (ndims - unlimdim));
69616963

6962-
if((ndims > 0) && (dims[0] == PIO_UNLIMITED)){
6963-
mdims[0] = H5S_UNLIMITED;
6964+
if (suggested_size < 1)
6965+
suggested_size = 1;
6966+
}
6967+
else
6968+
{
6969+
/* All dimensions are unlimited (we assume only one unlimited dimension) */
6970+
suggested_size = 1;
6971+
}
6972+
6973+
/* Set chunk size for each dimension */
6974+
for (int d = 0; d < ndims; d++)
6975+
{
6976+
if (dims[d] == PIO_UNLIMITED)
6977+
{
6978+
mdims[d] = H5S_UNLIMITED;
6979+
6980+
cdim[d] = 1; /* Chunk size along unlimited dimension is always 1 */
6981+
}
6982+
else
6983+
cdim[d] = (suggested_size > dims[d]) ? dims[d] : suggested_size;
6984+
}
6985+
6986+
/* Apply chunking to dataset creation property list */
6987+
if (H5Pset_chunk(dcpl_id, ndims, cdim) < 0)
6988+
{
6989+
return pio_err(ios, file, PIO_EHDF5ERR, __FILE__, __LINE__,
6990+
"Defining variable (%s, varid = %d) in file (%s, ncid=%d) using HDF5 iotype failed. "
6991+
"The low level (HDF5) I/O library call failed to set the size of the chunks used to store a chunked layout dataset",
6992+
name, varid, pio_get_fname_from_file(file), file->pio_ncid);
6993+
}
69646994
}
69656995

69666996
sid = H5Screate_simple(ndims, dims, mdims);

0 commit comments

Comments
 (0)