Skip to content

Commit c1e64b7

Browse files
gicmojgrewe
authored andcommitted
[h5x] auto convert ascii to utf8 strings
TBD
1 parent c865fd0 commit c1e64b7

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

backend/hdf5/h5x/H5DataType.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "H5DataType.hpp"
1111

1212
#include <memory>
13+
#include <mutex>
1314
#include <cstring>
1415

1516
namespace nix {
@@ -187,6 +188,8 @@ bool DataType::enum_equal(const DataType &other) const {
187188
}
188189

189190
} // h5x
191+
192+
// boolean types
190193
static herr_t bitfield2bool(hid_t src_id,
191194
hid_t dst_id,
192195
H5T_cdata_t *cdata,
@@ -257,6 +260,73 @@ static herr_t bitfield2bool(hid_t src_id,
257260
return 0;
258261
}
259262

263+
// string type conversion
264+
static void ascii2utf8_one(void *buffer,
265+
size_t i,
266+
size_t stride_src,
267+
size_t stride_dst)
268+
{
269+
char *base = static_cast<char *>(buffer);
270+
char **src = reinterpret_cast<char **>(base + (i * stride_src));
271+
char **dst = reinterpret_cast<char **>(base + (i * stride_dst));
272+
273+
*dst = ::strdup(*src);
274+
}
275+
276+
static herr_t ascii2utf8(hid_t src_id,
277+
hid_t dst_id,
278+
H5T_cdata_t *cdata,
279+
size_t nl,
280+
size_t buf_stride,
281+
size_t bkg_stride,
282+
void *buf_i,
283+
void *bkg_i,
284+
hid_t dxpl) {
285+
286+
size_t si;
287+
size_t so;
288+
289+
// document for what this function should to at:
290+
// https://support.hdfgroup.org/HDF5/doc/H5.user/Datatypes.html#Datatypes-DataConversion
291+
292+
switch (cdata->command) {
293+
case H5T_CONV_INIT: {
294+
cdata->need_bkg = H5T_BKG_NO;
295+
296+
if (!H5Tis_variable_str(src_id) || !H5Tis_variable_str(dst_id)) {
297+
return -1;
298+
}
299+
300+
return 0;
301+
}
302+
case H5T_CONV_FREE:
303+
return 0; //Nothing to do
304+
case H5T_CONV_CONV:
305+
break;
306+
}
307+
308+
si = H5Tget_size(src_id);
309+
so = H5Tget_size(dst_id);
310+
311+
if (buf_stride == 0) {
312+
if (si >= so) {
313+
for (size_t i = 0; i < nl; i++) {
314+
ascii2utf8_one(buf_i, i, si, so);
315+
}
316+
} else {
317+
for (size_t i = nl; i > 0; i--) {
318+
ascii2utf8_one(buf_i, i - 1, si, so);
319+
}
320+
}
321+
} else {
322+
for (size_t i = 0; i < nl; i++) {
323+
ascii2utf8_one(buf_i, i, buf_stride, buf_stride);
324+
}
325+
}
326+
327+
return 0;
328+
}
329+
260330
h5x::DataType data_type_to_h5_filetype(DataType dtype) {
261331

262332
/* The switch is structured in a way in order to get
@@ -293,6 +363,19 @@ h5x::DataType data_type_to_h5_filetype(DataType dtype) {
293363

294364
h5x::DataType data_type_to_h5_memtype(DataType dtype) {
295365

366+
static std::once_flag init_flag;
367+
368+
std::call_once(init_flag, [](){
369+
h5x::DataType utf8type = h5x::DataType::makeStrType(H5T_VARIABLE, H5T_CSET_UTF8);
370+
h5x::DataType asciitype = h5x::DataType::makeStrType(H5T_VARIABLE, H5T_CSET_ASCII);
371+
372+
H5Tregister(H5T_PERS_SOFT,
373+
"ascii2utf8",
374+
asciitype.h5id(),
375+
utf8type.h5id(),
376+
ascii2utf8);
377+
});
378+
296379
// See data_type_to_h5_filetype for the reason why the switch is structured
297380
// in the way it is.
298381

0 commit comments

Comments
 (0)