POD struct with fixed length string

#include <some_header_files>
namespace shim {
    struct pod_t {
            using max_lenght = std::integral_constant<size_t,FIXED_SIZE>;
            size_t id;
            char name[max_lenght::value];
    };
}
  • enclosed in some namespace
  • compile time length definition to some FIXED_SIZE
  • ID field: we will get back to this
  • and out fixed length string
can transfer data directly, no need for scatter/gather ops

NON POD class with std::string

#include <some_header_files>
namespace non::intrusive {
    struct non_pod_t { // wrapper to aid C++ template mechanism, zero runtime cost
        size_t id;
        std::string name;
    };
}
  • enclosed in some namespace
  • we have the extra field
  • but the content is stored somewhere else
need for scatter/gather ops to fetch string content

Do we need the ID field at all?


std::vector<std::string> non_pod = ...;
h5::write(ds, non_pod.data(), h5::offset{K}, ...);
std::string record = h5::read(ds, h5::offset{K}, h5::count{1}, ...);

std::vector<char[N]> pod_t = ...; 
    
  • we could just take an indexable container
  • write to a specific location: `h5::offset{..}`
  • or do an indexed read from it
  • here is the fixed length variant
The vector of strings still needs scatter/gather ops

Type Mapping between C and HDF5


  namespace h5 {
    template <> hid_t inline register_struct<shim::pod_t>(){
        hid_t at_00 = H5Tcopy(H5T_C_S1);
        H5Tset_size(at_00, ::shim::pod_t::max_lenght::value);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (shim::pod_t));
        H5Tinsert(ct_00, "id",	HOFFSET(shim::pod_t,id), H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "name", HOFFSET(shim::pod_t,name), at_00);

        H5Tclose(at_00); 
        return ct_00;
    };
}
H5CPP_REGISTER_STRUCT(shim::pod_t);
  • must be within `h5` namespace
  • is a template specialization
  • we do our CAPI type definitions
  • close not needed descriptors
  • returned hid_t descriptor will be closed with RAII
  • finally register the custom type with H5CPP

H5bench for HDF5 CAPI and H5CPP with pythonic syntax


  namespace bh = h5::bench;
  bh::throughput(
    bh::name{"test case name..."}, 
      bh::arg_x{x,y,z,...}, 
      bh::warmup{m}, bh::sample{n},
      h5::ds_t | h5::fd_t | ...,
    [&](size_t idx, size_t size) -> double {
        for (size_t k = 0; k < size; k++)
            h5::append(ds, data[k]);
        return transfer_size[idx];
    });
  • namespace alias to h5::bench
  • throughput measurement takes arguments in arbitrary order
  • nice to have a name
  • arguments: {10, 100, 1'000, 10'000, ...}
  • burn-in and sample size for robust control
  • optional descriptors are reset|flush with each batch execution
  • and the lambda function to do the job
  • where you return transfered data size in bytes

EXAMPLE: CAPI Fixed length string


  using custom_t = char[42]; // type alias

  std::vector<size_t> transfer_size;
  for (auto i : record_size)
      transfer_size.push_back(i * sizeof(custom_t));
  std::vector<custom_t> data = convert<custom_t>(strings);
  
  // modify VL type to fixed length
  h5::dt_t<custom_t> dt{H5Tcreate(H5T_STRING, sizeof(custom_t))};
  H5Tset_cset(dt, H5T_CSET_UTF8); 
  std::vector<h5::ds_t> ds;
  std::vector<h5::sp_t> file_space;

  std::vector ds;
  for(auto size: record_size) ds.push_back(
          h5::create(fd, fmt::format("FLstring CAPI-{:010d}", size), 
          chunk_size, h5::current_dims{size}, dt));

  // actual measurement
  bh::throughput(
      bh::name{"FLstring CAPI"}, record_size, warmup, sample,
      [&](hsize_t idx, hsize_t size) -< double {
          // memory space
          h5::sp_t mem_space{H5Screate_simple(1, &size, nullptr )};
          H5Sselect_all(mem_space);
          // file space
          h5::sp_t file_space{H5Dget_space(ds[idx])};
          H5Sselect_all(file_space);

          H5Dwrite( ds[idx], dt, mem_space, file_space, H5P_DEFAULT, data.data());
          return transfer_size[idx];
      });
  
  • setup dataset being transfered, and compute the mini-batch size in bytes
  • create fixed length type descriptor
  • pass `h5::dt_t<custom_t> ds` to `h5::create`
  • actual throughput measurement
  • is a lambda function, prototype constrols behaviour
  • protecting all resources with H5CPP RAII is straighforward
  • use HDF5 CAPI calls to measure library performance
  • return the amount of data transferred in bytes

parameters:


      bh::arg_x record_size{10'000, 100'000, 1'000'000};
      bh::warmup warmup{3};
      bh::sample sample{10};
      h5::dcpl_t chunk_size = h5::chunk{4096};
    
  • control experiment size
  • set `warmup` to zero for single write
  • number of measured executions
  • do all your `dcpl, dapl, ...` in one location

results


        [name                                              ][total events][Mi events/s] [ms runtime / stddev] [    MiB/s / stddev ]
        FLstring h5::append<pod_t>                                  10000      42.0521         0.24     0.027    1359.60     161.3
        FLstring h5::append<pod_t>                                 100000      47.7487         2.09     0.024    1527.30      17.5
        FLstring h5::append<pod_t>                                1000000      47.2072        21.18     0.041    1510.30       2.9
        VLstring h5::append<std::vector<std::string>>               10000       5.9641         1.68     0.423      94.70      23.0
        VLstring h5::append<std::vector<std::string>>              100000       6.1827        16.17     0.377      92.30       2.1
        VLstring h5::append<std::vector<std::string>>             1000000       6.1993       161.31     4.139      92.60       2.2
        VLstring h5::write<std::vector<const char*>>                10000       0.0971       103.03     7.445       1.20       0.6
        VLstring h5::write<std::vector<const char*>>               100000       0.0935      1068.98      -nan       1.10       0.5
        VLstring h5::write<std::vector<const char*>>              1000000       0.0852     11735.32      -nan       0.90       0.8
        VLstring std::vector<std::string>                           10000       0.0440       227.41      -nan       0.00       0.0
        VLstring std::vector<std::string>                          100000       0.0705      1419.31      -nan       0.60       0.4
        VLstring std::vector<std::string>                         1000000       0.0786     12729.59      -nan       0.80       0.5
        FLstring CAPI                                               10000     826.4463         0.01     0.000   15633.60     218.4
        FLstring CAPI                                              100000     225.4791         0.44     0.015    4508.00     143.5
        FLstring CAPI                                             1000000     163.4227         6.12     0.019    3267.70      10.3
        VLstring CAPI                                               10000       0.0779       128.35      -nan       0.80       0.5
        VLstring CAPI                                              100000       0.0763      1311.10      -nan       0.70       0.5
        VLstring CAPI                                             1000000       0.0765     13071.64      -nan       0.70       0.6
        C++ IOstream                                                10000       1.0186         9.82     0.153      14.80       0.0
        C++ IOstream                                               100000       1.0089        99.12     1.311      14.80       0.0
        C++ IOstream                                              1000000       0.9586      1043.17      -nan      13.70       0.6        
      
  • only subset is printed here, can be recorded to HDF5
  • C++ IO stream as baseline
  • fixed length string and POD with string does rather well
  • when are appending, not overwriting; performance is still good
  • VL string updates are significantly behind in all implementations