diff --git a/backend/hdf5/h5x/H5DataType.cpp b/backend/hdf5/h5x/H5DataType.cpp index 401693f16..098900c9f 100644 --- a/backend/hdf5/h5x/H5DataType.cpp +++ b/backend/hdf5/h5x/H5DataType.cpp @@ -10,6 +10,7 @@ #include "H5DataType.hpp" #include +#include #include namespace nix { @@ -187,6 +188,8 @@ bool DataType::enum_equal(const DataType &other) const { } } // h5x + +// boolean types static herr_t bitfield2bool(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, @@ -257,6 +260,73 @@ static herr_t bitfield2bool(hid_t src_id, return 0; } +// string type conversion +static void ascii2utf8_one(void *buffer, + size_t i, + size_t stride_src, + size_t stride_dst) +{ + char *base = static_cast(buffer); + char **src = reinterpret_cast(base + (i * stride_src)); + char **dst = reinterpret_cast(base + (i * stride_dst)); + + *dst = ::strdup(*src); +} + +static herr_t ascii2utf8(hid_t src_id, + hid_t dst_id, + H5T_cdata_t *cdata, + size_t nl, + size_t buf_stride, + size_t bkg_stride, + void *buf_i, + void *bkg_i, + hid_t dxpl) { + + size_t si; + size_t so; + + // document for what this function should to at: + // https://support.hdfgroup.org/HDF5/doc/H5.user/Datatypes.html#Datatypes-DataConversion + + switch (cdata->command) { + case H5T_CONV_INIT: { + cdata->need_bkg = H5T_BKG_NO; + + if (!H5Tis_variable_str(src_id) || !H5Tis_variable_str(dst_id)) { + return -1; + } + + return 0; + } + case H5T_CONV_FREE: + return 0; //Nothing to do + case H5T_CONV_CONV: + break; + } + + si = H5Tget_size(src_id); + so = H5Tget_size(dst_id); + + if (buf_stride == 0) { + if (si >= so) { + for (size_t i = 0; i < nl; i++) { + ascii2utf8_one(buf_i, i, si, so); + } + } else { + for (size_t i = nl; i > 0; i--) { + ascii2utf8_one(buf_i, i - 1, si, so); + } + } + } else { + for (size_t i = 0; i < nl; i++) { + ascii2utf8_one(buf_i, i, buf_stride, buf_stride); + } + } + + return 0; +} + h5x::DataType data_type_to_h5_filetype(DataType dtype) { /* The switch is structured in a way in order to get @@ -293,6 +363,19 @@ h5x::DataType data_type_to_h5_filetype(DataType dtype) { h5x::DataType data_type_to_h5_memtype(DataType dtype) { + static std::once_flag init_flag; + + std::call_once(init_flag, [](){ + h5x::DataType utf8type = h5x::DataType::makeStrType(H5T_VARIABLE, H5T_CSET_UTF8); + h5x::DataType asciitype = h5x::DataType::makeStrType(H5T_VARIABLE, H5T_CSET_ASCII); + + H5Tregister(H5T_PERS_SOFT, + "ascii2utf8", + asciitype.h5id(), + utf8type.h5id(), + ascii2utf8); + }); + // See data_type_to_h5_filetype for the reason why the switch is structured // in the way it is. diff --git a/test/hdf5/TestH5.cpp b/test/hdf5/TestH5.cpp index 31fe285b0..36c27fb3a 100644 --- a/test/hdf5/TestH5.cpp +++ b/test/hdf5/TestH5.cpp @@ -10,6 +10,8 @@ #include "TestH5.hpp" +#include + #include "hdf5/FileHDF5.hpp" #include "hdf5/h5x/H5Exception.hpp" #include "hdf5/h5x/H5PList.hpp" @@ -379,4 +381,17 @@ void TestH5::testUTF8() { h5group.createLink(g, "zelda"); h5group.linkInfo("zelda", li); CPPUNIT_ASSERT_EQUAL(H5T_CSET_UTF8, li.cset); + + // ASCII in the file, utf8 in memory + nix::hdf5::h5x::DataType ascii_type = nix::hdf5::h5x::DataType::makeStrType(H5T_VARIABLE, H5T_CSET_ASCII); + std::string test_str = "Hallo"; + + const nix::Hydra hydra(test_str); + + nix::hdf5::DataSpace fileSpace = nix::hdf5::DataSpace::create(hydra.shape(), false); + nix::hdf5::Attribute attr_ascii = g.createAttr("ascii", ascii_type, fileSpace); + attr_ascii.write(nix::hdf5::data_type_to_h5_memtype(nix::DataType::String), hydra.shape(), hydra.data()); + + std::string str_out; + g.getAttr("ascii", str_out); }