DOLFINx
DOLFINx C++ interface
HDF5Interface.h
1// Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2//
3// This file is part of DOLFINx (https://www.fenicsproject.org)
4//
5// SPDX-License-Identifier: LGPL-3.0-or-later
6
7#pragma once
8
9#include <array>
10#include <cassert>
11#include <chrono>
12#include <cstdint>
13#include <dolfinx/common/log.h>
14#include <filesystem>
15#include <hdf5.h>
16#include <mpi.h>
17#include <string>
18#include <vector>
19
20namespace dolfinx::io
21{
22
24
26{
27#define HDF5_FAIL -1
28public:
34 static hid_t open_file(MPI_Comm comm, const std::filesystem::path& filename,
35 const std::string& mode, const bool use_mpi_io);
36
39 static void close_file(const hid_t handle);
40
43 static void flush_file(const hid_t handle);
44
48 static std::filesystem::path get_filename(hid_t handle);
49
60 template <typename T>
61 static void write_dataset(const hid_t handle, const std::string& dataset_path,
62 const T* data,
63 const std::array<std::int64_t, 2>& range,
64 const std::vector<std::int64_t>& global_size,
65 bool use_mpi_io, bool use_chunking);
66
75 template <typename T>
76 static std::vector<T> read_dataset(const hid_t handle,
77 const std::string& dataset_path,
78 const std::array<std::int64_t, 2>& range);
79
84 static bool has_dataset(const hid_t handle, const std::string& dataset_path);
85
90 static std::vector<std::int64_t>
91 get_dataset_shape(const hid_t handle, const std::string& dataset_path);
92
99 static void set_mpi_atomicity(const hid_t handle, const bool atomic);
100
105 static bool get_mpi_atomicity(const hid_t handle);
106
107private:
111 static void add_group(const hid_t handle, const std::string& dataset_path);
112
113 // Return HDF5 data type
114 template <typename T>
115 static hid_t hdf5_type()
116 {
117 throw std::runtime_error("Cannot get HDF5 primitive data type. "
118 "No specialised function for this data type");
119 }
120};
122//---------------------------------------------------------------------------
123template <>
124inline hid_t HDF5Interface::hdf5_type<float>()
125{
126 return H5T_NATIVE_FLOAT;
127}
128//---------------------------------------------------------------------------
129template <>
130inline hid_t HDF5Interface::hdf5_type<double>()
131{
132 return H5T_NATIVE_DOUBLE;
133}
134//---------------------------------------------------------------------------
135template <>
136inline hid_t HDF5Interface::hdf5_type<int>()
137{
138 return H5T_NATIVE_INT;
139}
140//---------------------------------------------------------------------------
141template <>
142inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
143{
144 return H5T_NATIVE_INT64;
145}
146//---------------------------------------------------------------------------
147template <>
148inline hid_t HDF5Interface::hdf5_type<std::size_t>()
149{
150 if (sizeof(std::size_t) == sizeof(unsigned long))
151 return H5T_NATIVE_ULONG;
152 else if (sizeof(std::size_t) == sizeof(unsigned int))
153 return H5T_NATIVE_UINT;
154
155 throw std::runtime_error("Cannot determine size of std::size_t. "
156 "std::size_t is not the same size as long or int");
157}
158//---------------------------------------------------------------------------
159template <typename T>
161 const hid_t file_handle, const std::string& dataset_path, const T* data,
162 const std::array<std::int64_t, 2>& range,
163 const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
164{
165 // Data rank
166 const int rank = global_size.size();
167 assert(rank != 0);
168 if (rank > 2)
169 {
170 throw std::runtime_error("Cannot write dataset to HDF5 file"
171 "Only rank 1 and rank 2 dataset are supported");
172 }
173
174 // Get HDF5 data type
175 const hid_t h5type = hdf5_type<T>();
176
177 // Hyperslab selection parameters
178 std::vector<hsize_t> count(global_size.begin(), global_size.end());
179 count[0] = range[1] - range[0];
180
181 // Data offsets
182 std::vector<hsize_t> offset(rank, 0);
183 offset[0] = range[0];
184
185 // Dataset dimensions
186 const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
187
188 // Create a global data space
189 const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
190 if (filespace0 == HDF5_FAIL)
191 throw std::runtime_error("Failed to create HDF5 data space");
192
193 // Set chunking parameters
194 hid_t chunking_properties;
195 if (use_chunking)
196 {
197 // Set chunk size and limit to 1kB min/1MB max
198 hsize_t chunk_size = dimsf[0] / 2;
199 if (chunk_size > 1048576)
200 chunk_size = 1048576;
201 if (chunk_size < 1024)
202 chunk_size = 1024;
203
204 hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
205 chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
206 H5Pset_chunk(chunking_properties, rank, chunk_dims);
207 }
208 else
209 chunking_properties = H5P_DEFAULT;
210
211 // Check that group exists and recursively create if required
212 const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
213 add_group(file_handle, group_name);
214
215 // Create global dataset (using dataset_path)
216 const hid_t dset_id
217 = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
218 H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
219 if (dset_id == HDF5_FAIL)
220 throw std::runtime_error("Failed to create HDF5 global dataset.");
221
222 // Generic status report
223 herr_t status;
224
225 // Close global data space
226 status = H5Sclose(filespace0);
227 if (status == HDF5_FAIL)
228 throw std::runtime_error("Failed to close HDF5 global data space.");
229
230 // Create a local data space
231 const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
232 if (memspace == HDF5_FAIL)
233 throw std::runtime_error("Failed to create HDF5 local data space.");
234
235 // Create a file dataspace within the global space - a hyperslab
236 const hid_t filespace1 = H5Dget_space(dset_id);
237 status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
238 nullptr, count.data(), nullptr);
239 if (status == HDF5_FAIL)
240 throw std::runtime_error("Failed to create HDF5 dataspace.");
241
242 // Set parallel access
243 const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
244 if (use_mpi_io)
245 {
246#ifdef H5_HAVE_PARALLEL
247 status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
248 if (status == HDF5_FAIL)
249 {
250 throw std::runtime_error(
251 "Failed to set HDF5 data transfer property list.");
252 }
253
254#else
255 throw std::runtime_error("HDF5 library has not been configured with MPI");
256#endif
257 }
258
259 // Write local dataset into selected hyperslab
260 status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
261 if (status == HDF5_FAIL)
262 {
263 throw std::runtime_error(
264 "Failed to write HDF5 local dataset into hyperslab.");
265 }
266
267 if (use_chunking)
268 {
269 // Close chunking properties
270 status = H5Pclose(chunking_properties);
271 if (status == HDF5_FAIL)
272 throw std::runtime_error("Failed to close HDF5 chunking properties.");
273 }
274
275 // Close dataset collectively
276 status = H5Dclose(dset_id);
277 if (status == HDF5_FAIL)
278 throw std::runtime_error("Failed to close HDF5 dataset.");
279
280 // Close hyperslab
281 status = H5Sclose(filespace1);
282 if (status == HDF5_FAIL)
283 throw std::runtime_error("Failed to close HDF5 hyperslab.");
284
285 // Close local dataset
286 status = H5Sclose(memspace);
287 if (status == HDF5_FAIL)
288 throw std::runtime_error("Failed to close local HDF5 dataset.");
289
290 // Release file-access template
291 status = H5Pclose(plist_id);
292 if (status == HDF5_FAIL)
293 throw std::runtime_error("Failed to release HDF5 file-access template.");
294}
295//---------------------------------------------------------------------------
296template <typename T>
297inline std::vector<T>
298HDF5Interface::read_dataset(const hid_t file_handle,
299 const std::string& dataset_path,
300 const std::array<std::int64_t, 2>& range)
301{
302 auto timer_start = std::chrono::system_clock::now();
303
304 // Open the dataset
305 const hid_t dset_id
306 = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
307 if (dset_id == HDF5_FAIL)
308 throw std::runtime_error("Failed to open HDF5 global dataset.");
309
310 // Open dataspace
311 const hid_t dataspace = H5Dget_space(dset_id);
312 if (dataspace == HDF5_FAIL)
313 throw std::runtime_error("Failed to open HDF5 data space.");
314
315 // Get rank of data set
316 const int rank = H5Sget_simple_extent_ndims(dataspace);
317 assert(rank >= 0);
318 if (rank > 2)
319 LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
320
321 // Allocate data for shape
322 std::vector<hsize_t> shape(rank);
323
324 // Get size in each dimension
325 const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
326 if (ndims != rank)
327 throw std::runtime_error("Failed to get dimensionality of dataspace");
328
329 // Hyperslab selection
330 std::vector<hsize_t> offset(rank, 0);
331 std::vector<hsize_t> count = shape;
332 if (range[0] != -1 and range[1] != -1)
333 {
334 offset[0] = range[0];
335 count[0] = range[1] - range[0];
336 }
337 else
338 offset[0] = 0;
339
340 // Select a block in the dataset beginning at offset[], with
341 // size=count[]
342 [[maybe_unused]] herr_t status = H5Sselect_hyperslab(
343 dataspace, H5S_SELECT_SET, offset.data(), nullptr, count.data(), nullptr);
344 if (status == HDF5_FAIL)
345 throw std::runtime_error("Failed to select HDF5 hyperslab.");
346
347 // Create a memory dataspace
348 const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
349 if (memspace == HDF5_FAIL)
350 throw std::runtime_error("Failed to create HDF5 dataspace.");
351
352 // Create local data to read into
353 std::size_t data_size = 1;
354 for (std::size_t i = 0; i < count.size(); ++i)
355 data_size *= count[i];
356 std::vector<T> data(data_size);
357
358 // Read data on each process
359 const hid_t h5type = hdf5_type<T>();
360 status
361 = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
362 if (status == HDF5_FAIL)
363 throw std::runtime_error("Failed to read HDF5 data.");
364
365 // Close dataspace
366 status = H5Sclose(dataspace);
367 if (status == HDF5_FAIL)
368 throw std::runtime_error("Failed to close HDF5 dataspace.");
369
370 // Close memspace
371 status = H5Sclose(memspace);
372 if (status == HDF5_FAIL)
373 throw std::runtime_error("Failed to close HDF5 memory space.");
374
375 // Close dataset
376 status = H5Dclose(dset_id);
377 if (status == HDF5_FAIL)
378 throw std::runtime_error("Failed to close HDF5 dataset.");
379
380 auto timer_end = std::chrono::system_clock::now();
381 std::chrono::duration<double> dt = (timer_end - timer_start);
382 double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
383
384 LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
385
386 return data;
387}
388//---------------------------------------------------------------------------
390} // namespace dolfinx::io
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:26
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:124
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:153
static std::filesystem::path get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:136
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:203
static hid_t open_file(MPI_Comm comm, const std::filesystem::path &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:130
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:244
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:236
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:75
Support for file IO.
Definition: ADIOS2Writers.h:39