ESDM
Middleware for Earth System Data
|
This file implements ESDM types, and associated methods. More...
#include <esdm-internal.h>
#include <esdm-grid.h>
#include <esdm.h>
#include <inttypes.h>
#include <smd.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Macros | |
#define | _GNU_SOURCE /* See feature_test_macros(7) */ |
#define | DEBUG_ENTER ESDM_DEBUG_COM_FMT("DATATYPES", "", "") |
#define | DEBUG(fmt, ...) ESDM_DEBUG_COM_FMT("DATATYPES", fmt, __VA_ARGS__) |
Functions | |
esdm_status | esdm_container_create (const char *name, int allow_overwrite, esdm_container_t **oc) |
bool | esdm_container_dataset_exists (esdm_container_t *c, char const *name) |
int | esdm_container_dataset_count (esdm_container_t *c) |
esdm_dataset_t * | esdm_container_dataset_from_array (esdm_container_t *c, int i) |
void | esdmI_container_register_dataset (esdm_container_t *c, esdm_dataset_t *dset) |
void | esdmI_container_init (char const *name, esdm_container_t **out_container) |
esdm_status | esdmI_create_dataset_from_metadata (esdm_container_t *c, json_t *json, esdm_dataset_t **out) |
esdm_status | esdm_container_open_md_parse (esdm_container_t *c, char *md, int size) |
esdm_status | esdm_container_open_md_load (esdm_container_t *c, char **out_md, int *out_size) |
esdm_status | esdm_container_open (char const *name, int esdm_mode_flags, esdm_container_t **out_container) |
void | esdmI_datasets_reference_metadata_create (esdm_container_t *c, smd_string_stream_t *s) |
void | esdmI_container_metadata_create (esdm_container_t *c, smd_string_stream_t *s) |
esdm_status | esdm_container_commit (esdm_container_t *c) |
esdm_status | esdm_container_close (esdm_container_t *c) |
esdm_status | esdm_container_delete_attribute (esdm_container_t *c, const char *name) |
esdm_status | esdm_container_link_attribute (esdm_container_t *c, int overwrite, smd_attr_t *attr) |
esdm_status | esdm_container_get_attributes (esdm_container_t *c, smd_attr_t **out_metadata) |
esdm_status | esdmI_container_destroy (esdm_container_t *c) |
__attribute__ ((warn_unused_result)) | |
int64_t | esdm_dataspace_get_dims (esdm_dataspace_t *d) |
int64_t | esdm_dataspace_total_bytes (esdm_dataspace_t *d) |
int64_t const * | esdm_dataspace_get_size (esdm_dataspace_t *d) |
int64_t const * | esdm_dataspace_get_offset (esdm_dataspace_t *d) |
esdm_status | esdmI_fragment_create (esdm_dataset_t *d, esdm_dataspace_t *sspace, void *buf, esdm_fragment_t **out_fragment) |
esdm_status | esdm_fragment_retrieve (esdm_fragment_t *fragment) |
esdm_status | esdm_fragment_load (esdm_fragment_t *fragment) |
esdm_status | esdm_fragment_unload (esdm_fragment_t *fragment) |
void | esdm_fragment_metadata_create (esdm_fragment_t *f, smd_string_stream_t *stream) |
esdm_status | esdm_fragment_commit (esdm_fragment_t *f) |
esdm_status | esdm_container_delete (esdm_container_t *c) |
esdm_status | esdm_dataset_delete (esdm_dataset_t *d) |
esdm_status | esdm_fragment_destroy (esdm_fragment_t *frag) |
void | esdm_dataset_init (esdm_container_t *c, const char *name, esdm_dataspace_t *dspace, esdm_dataset_t **out_dataset) |
esdm_status | esdm_dataset_create (esdm_container_t *c, const char *name, esdm_dataspace_t *dspace, esdm_dataset_t **out_dataset) |
esdm_status | esdm_dataset_set_compression_hint (esdm_dataset_t *dset, scil_user_hints_t const *hints) |
esdm_status | esdm_dataset_open_md_load (esdm_dataset_t *dset, char **out_md, int *out_size) |
esdm_backend_t * | esdmI_get_backend (char const *plugin_id) |
esdm_status | esdmI_create_fragment_from_metadata (esdm_dataset_t *dset, json_t *json, esdm_fragment_t **out_fragment) |
esdm_status | esdm_dataspace_set_stride (esdm_dataspace_t *space, int64_t *stride) |
esdm_status | esdm_dataspace_copyDatalayout (esdm_dataspace_t *space, esdm_dataspace_t *source) |
esdm_status | esdm_dataset_open_md_parse (esdm_dataset_t *d, char *md, int size) |
esdm_status | esdm_dataset_ref (esdm_dataset_t *d) |
esdm_status | esdm_dataset_by_name (esdm_container_t *c, const char *name, int esdm_mode_flags, esdm_dataset_t **out_dataset) |
esdm_status | esdm_dataset_open (esdm_container_t *c, const char *name, int esdm_mode_flags, esdm_dataset_t **out_dataset) |
void | esdmI_dataset_metadata_create (esdm_dataset_t *d, smd_string_stream_t *s) |
esdm_status | esdm_dataset_commit (esdm_dataset_t *d) |
esdm_status | esdm_dataset_update (esdm_dataset_t *dataset) |
esdm_status | esdmI_dataset_fragmentsCoveringRegion (esdm_dataset_t *dataset, esdmI_hypercube_t *region, int64_t *out_count, esdm_fragment_t ***out_fragments, esdmI_hypercubeSet_t **out_uncovered, bool *out_fullyCovered) |
esdm_fragment_t * | esdmI_dataset_createFragment (esdm_dataset_t *dataset, esdm_dataspace_t *memspace, void *buf, bool *out_newFragment) |
esdm_fragment_t * | esdmI_dataset_lookupFragmentForShape (esdm_dataset_t *dataset, esdm_dataspace_t *shape) |
esdm_status | esdm_dataset_close (esdm_dataset_t *dset) |
esdm_status | esdmI_dataset_destroy (esdm_dataset_t *dset) |
esdm_status | esdm_dataset_delete_attribute (esdm_dataset_t *dataset, const char *name) |
esdm_status | esdm_dataset_get_attributes (esdm_dataset_t *dataset, smd_attr_t **out_metadata) |
esdm_status | esdm_dataset_rename (esdm_dataset_t *d, const char *name) |
esdm_status | esdm_dataspace_create_full (int64_t dims, int64_t *sizes, int64_t *offset, esdm_type_t type, esdm_dataspace_t **out_dataspace) |
esdm_status | esdm_dataspace_create (int64_t dims, int64_t *sizes, esdm_type_t type, esdm_dataspace_t **out_dataspace) |
esdm_status | esdmI_dataspace_createFromHypercube (esdmI_hypercube_t *extends, esdm_type_t type, esdm_dataspace_t **out_space) |
esdm_status | esdm_dataspace_copy (esdm_dataspace_t *orig, esdm_dataspace_t **out_dataspace) |
esdm_status | esdmI_dataspace_getExtends (esdm_dataspace_t *space, esdmI_hypercube_t **out_extends) |
esdm_status | esdmI_dataspace_setExtends (esdm_dataspace_t *space, esdmI_hypercube_t *extends) |
esdm_status | esdm_dataspace_subspace (esdm_dataspace_t *dataspace, int64_t dims, int64_t *size, int64_t *offset, esdm_dataspace_t **out_dataspace) |
esdm_status | esdm_dataspace_makeContiguous (esdm_dataspace_t *dataspace, esdm_dataspace_t **out_dataspace) |
void | esdm_dataspace_print (esdm_dataspace_t *d) |
void | esdm_dataspace_serialize (const esdm_dataspace_t *space, smd_string_stream_t *stream) |
esdm_status | esdmI_dataspace_createFromJson (json_t *json, esdm_dataset_t *dataset, esdm_dataspace_t **out_dataspace) |
void | esdm_dataspace_getEffectiveStride (esdm_dataspace_t *space, int64_t *out_stride) |
int64_t | esdm_dataspace_elementOffset (esdm_dataspace_t *space, int64_t *coords) |
esdm_status | esdm_dataspace_fill (esdm_dataspace_t *dataspace, void *data, void *fillElement) |
void | esdm_fragment_print (esdm_fragment_t *f) |
esdm_status | esdm_dataspace_destroy (esdm_dataspace_t *d) |
uint64_t | esdm_dataspace_element_count (esdm_dataspace_t *subspace) |
esdm_status | esdm_dataset_name_dims (esdm_dataset_t *d, char *const *names) |
esdm_status | esdm_dataset_rename_dim (esdm_dataset_t *d, char const *name, int i) |
void | esdm_dataset_set_status_dirty (esdm_dataset_t *d) |
void | esdm_container_set_status_dirty (esdm_container_t *c) |
esdm_status | esdm_dataset_get_name_dims (esdm_dataset_t *d, char const *const **out_names) |
esdm_status | esdm_dataset_link_attribute (esdm_dataset_t *dset, int overwrite, smd_attr_t *attr) |
esdm_status | esdm_dataset_iterator (esdm_container_t *container, esdm_dataset_iterator_t **iter) |
char const * | esdm_dataset_name (esdm_dataset_t *d) |
int64_t const * | esdm_dataset_get_actual_size (esdm_dataset_t *dset) |
int64_t const * | esdm_dataset_get_size (esdm_dataset_t *dset) |
esdm_status | esdm_dataset_update_size (esdm_dataset_t *d, uint64_t *sizes) |
esdm_type_t | esdm_dataset_get_type (esdm_dataset_t *d) |
esdm_type_t | esdm_dataspace_get_type (esdm_dataspace_t *d) |
Variables | |
const int64_t | kInitialGridSlotCount = 8 |
This file implements ESDM types, and associated methods.
esdm_status esdm_container_close | ( | esdm_container_t * | container | ) |
Close a container object. If it isn't in use any more free it.
Warning: This throws an error if there are any datasets within this container that are still open. Make sure to close all datasets first.
[in] | container | an existing container object that is no longer needed |
esdm_status esdm_container_commit | ( | esdm_container_t * | container | ) |
Make container persistent to storage. Enqueue for writing to backends.
Calling container commit may trigger subsequent commits for datasets that are part of the container.
[in] | container | pointer to an existing container which is to be committed to storage |
esdm_status esdm_container_create | ( | const char * | name, |
int | allow_overwrite, | ||
esdm_container_t ** | out_container | ||
) |
Create a new container.
[in] | name | string to identify the container, must not be empty |
[out] | out_container | returns a pointer to the new container |
esdm_status esdm_container_open | ( | const char * | name, |
int | esdm_mode_flags, | ||
esdm_container_t ** | out_container | ||
) |
Open an existing container.
[in] | name | string to identify the container, must not be empty |
[out] | out_container | returns a pointer to the container |
esdm_status esdm_dataset_close | ( | esdm_dataset_t * | dataset | ) |
Close a dataset object, if it isn't used anymore, it's metadata will be unloaded
This function is not thread-safe. Only a single master thread must be used to call into ESDM.
[in] | dataset | an existing dataset object that is no longer needed |
esdm_status esdm_dataset_commit | ( | esdm_dataset_t * | dataset | ) |
Make dataset persistent to storage. Schedule for writing to backends.
[in] | dataset | pointer to an existing dataset which is to be committed to storage |
esdm_status esdm_dataset_create | ( | esdm_container_t * | container, |
const char * | name, | ||
esdm_dataspace_t * | dataspace, | ||
esdm_dataset_t ** | out_dataset | ||
) |
Create a new dataset.
[in] | container | pointer to an existing container to which the new dataset will be linked |
[in] | name | identifier for the new dataset, must not be empty |
[in] | dataspace | pointer to an existing dataspace which defines the shape of the data that will be stored within the dataset |
[out] | out_dataset | returns a pointer to the new dataset |
esdm_status esdm_dataset_open | ( | esdm_container_t * | container, |
const char * | name, | ||
int | esdm_mode_flags, | ||
esdm_dataset_t ** | out_dataset | ||
) |
Open a dataset.
[in] | container | pointer to an open container that contains the dataset that is to be opened |
[in] | name | identifier of the dataset within the container, must not be empty |
[out] | out_dataset | returns a pointer to the opened dataset |
esdm_status esdm_dataspace_copy | ( | esdm_dataspace_t * | orig, |
esdm_dataspace_t ** | out_dataspace | ||
) |
Create a copy of a dataspace.
[in] | orig | the dataspace to copy |
[out] | out_dataspace | pointer to the new dataspace |
esdm_status esdm_dataspace_copyDatalayout | ( | esdm_dataspace_t * | dataspace, |
esdm_dataspace_t * | strideSource | ||
) |
Copy the stride information from one dataspace to another.
This is useful when defining a subspace that is supposed to access the same buffer as the enclosing dataspace. A simple esdm_dataspace_subspace()
will assume contiguous storage for the subspace, a subsequent call esdm_dataspace_copyDatalayout(subspace, bufferSpace)
will provide the subspace with the correct stride values to access its possibly non-contiguous part from the same buffer. Note that it is still necessary to adjust the buffer's address by means of esdm_dataspace_elementOffset()
to compute the actual address of the subspace's first element.
The strideSource
must have the same dimension count as the dataspace
.
[in,out] | dataspace | the dataspace to update |
[in] | strideSource | the dataspace that provides the data layout information which is to be copied |
ESDM_SUCCESS
esdm_status esdm_dataspace_create | ( | int64_t | dims, |
int64_t * | sizes, | ||
esdm_type_t | type, | ||
esdm_dataspace_t ** | out_dataspace | ||
) |
Create a new dataspace.
[in] | dims | count of dimensions of the new dataspace |
[in] | sizes | array of the sizes of the different dimensions, the length of this array is dims. Must not be NULL unless dims == 0 |
[in] | type | the datatype for each data point |
[out] | out_dataspace | pointer to the new dataspace |
esdm_status esdm_dataspace_create_full | ( | int64_t | dims, |
int64_t * | size, | ||
int64_t * | offset, | ||
esdm_type_t | type, | ||
esdm_dataspace_t ** | out_dataspace | ||
) |
Create a new dataspace.
[in] | dims | count of dimensions of the new dataspace |
[in] | sizes | array of the sizes of the different dimensions, the length of this array is dims. Must not be NULL unless dims == 0 |
[in] | offset | array containing the logical coordinates of the first data point in this dataspace |
[in] | type | the datatype for each data point |
[out] | out_dataspace | pointer to the new dataspace |
esdm_status esdm_dataspace_destroy | ( | esdm_dataspace_t * | dataspace | ) |
Destruct and free a dataspace object.
[in] | dataspace | an existing dataspace object that is no longer needed |
"_destroy" sounds too destructive, this will be renamed to esdm_dataspace_close().
uint64_t esdm_dataspace_element_count | ( | esdm_dataspace_t * | dataspace | ) |
Returns the number of datapoints in the dataspace.
int64_t esdm_dataspace_elementOffset | ( | esdm_dataspace_t * | space, |
int64_t * | coords | ||
) |
Get the offset in bytes of the element at the given logical position. The resulting offset may be negative if a custom stride has been set that has negative component(s). Otherwise, a contiguous C order multidimensional array is assumed, producing only positive offsets.
[in] | space | the dataspace to query |
[in] | coords | an array with the coordinates of the element's logical location |
esdm_status esdm_dataspace_fill | ( | esdm_dataspace_t * | dataspace, |
void * | data, | ||
void * | fillElement | ||
) |
Overwrite a buffer with a fill value.
This functions sets all elements in the given data
buffer to the value given by *fillElement
. The amount and offsets of the data
elements to set is controlled by the dataspace
argument.
[in] | dataspace | description of the area to overwrite |
[in,out] | data | pointer to the first element to set |
[in] | fillElement | pointer to a single element which is used as a prototype. |
void esdm_dataspace_getEffectiveStride | ( | esdm_dataspace_t * | space, |
int64_t * | out_stride | ||
) |
Get the effective stride of a dataspace.
If a stride has been set for the dataspace, that stride is copied to the out_stride
array, otherwise the effective stride is calculated and returned in that same array.
[in] | space | the dataspace to query |
[out] | out_stride | pointer to an array of size space->dims which will be filled with the components of the stride. |
As with esdm_dataspace_set_stride()
, the stride is given in terms of fundamental datatype elements and needs to be multiplied with esdm_sizeof(space->type)
to get the stride in bytes.
esdm_status esdm_dataspace_makeContiguous | ( | esdm_dataspace_t * | dataspace, |
esdm_dataspace_t ** | out_dataspace | ||
) |
Define a dataspace that covers the same logical hypercube as the given dataspace, but which uses the standard, contiguous C array element order. The stride of the original dataspace will be ignored totally.
[in] | dataspace | the dataspace that is to be copied |
[out] | out_dataspace | pointer to the new contiguous dataspace |
ESDM_SUCCESS
void esdm_dataspace_serialize | ( | const esdm_dataspace_t * | dataspace, |
smd_string_stream_t * | stream | ||
) |
Serializes dataspace description.
e.g., to store along with fragment
esdm_status esdm_dataspace_set_stride | ( | esdm_dataspace_t * | dataspace, |
int64_t * | stride | ||
) |
Specify a non-standard serialization order for a dataspace.
This can be used to handle FORTRAN arrays, for example, or do some crazy stuff like inverted dimensions, or to skip over holes. Use carefully, or don't use at all. You have been warned.
[in,out] | dataspace | the dataspace that is to be modified |
[in] | dims | number of entries in the stride argument, must match the dimension count of the dataspace |
[in] | stride | array with dims entries, each entry gives the number of elements to skip over when increasing the respective coordinate by one. |
Examples:
A C array int array[7][11]
does not need a stride, the stride is implicitly assumed to be (11, 1)
.
To handle a FORTRAN array INTEGER :: array(7, 11)
, use the following call: esdm_dataspace_set_stride(dataspace, 2, (int64_t[2]){1, 7});
To use only a 3x5 part of an existing C array int array[7][11]
, starting at (1, 2), use these calls: esdm_dataspace_t* subspace; esdm_dataspace_subspace(parent, 2, (int64_t[2]){3, 5}, (int64_t[2]){1, 2}, &subspace); esdm_dataspace_set_stride(subspace, (int64_t[2]){11, 1}); After this, the 2D coordinates will be mapped to the buffer offsets like this: (1,2)=0, (1,3)=1, (1,4)=2, (1,5)=3, (1,6)=4, (2,2)=11, (2,3)=12, (2,4)=13, (2,5)=14, (2,6)=15, (3,2)=22, (3,3)=23, (3,4)=24, (3,5)=25, (3,6)=26,
esdm_status esdm_dataspace_subspace | ( | esdm_dataspace_t * | dataspace, |
int64_t | dims, | ||
int64_t * | size, | ||
int64_t * | offset, | ||
esdm_dataspace_t ** | out_dataspace | ||
) |
TODO: remove dims parameter for good
int64_t esdm_dataspace_total_bytes | ( | esdm_dataspace_t * | d | ) |
Returns the number of bytes covered by the dataspace.
esdm_status esdm_fragment_commit | ( | esdm_fragment_t * | fragment | ) |
Make fragment persistent to storage. Schedule for writing to backends.
[in] | fragment | pointer to an existing fragment which is to be committed to storage |
esdm_status esdm_fragment_destroy | ( | esdm_fragment_t * | fragment | ) |
Destruct and free a fragment object.
[in] | fragment | an existing fragment object that is no longer needed |
"_destroy" sounds too destructive, this will be renamed to esdm_fragment_close().
esdm_status esdm_fragment_load | ( | esdm_fragment_t * | fragment | ) |
Like esdm_fragment_retrieve(), but more permissive: Does not throw an ESDM_DIRTY_DATA_ERROR, simply ensures that the fragments data is available in memory.
void esdm_fragment_print | ( | esdm_fragment_t * | fragment | ) |
Serializes fragment for storage.
esdm_status esdm_fragment_retrieve | ( | esdm_fragment_t * | fragment | ) |
Fetch data from disk if possible. Loads fragments that are not loaded, noops on those that are loaded and clean, and errors out on those that are dirty or deleted.
XXX: This should probably be turned into an internal interface.
esdm_status esdm_fragment_unload | ( | esdm_fragment_t * | fragment | ) |
Ensure that the fragment has no data in memory.
If the fragment is dirty, it is committed, turning it into a persistent fragment. If the fragment is persistent, its buffer is released, turning it into an unloaded fragment. If the fragment is deleted or not loaded, nothing is done successfully.
esdm_status esdmI_container_destroy | ( | esdm_container_t * | container | ) |
Destruct and free a container object.
[in] | container | an existing container object that is no longer needed |
"_destroy" sounds too destructive, this will be renamed to esdm_container_close().
esdm_status esdmI_dataset_destroy | ( | esdm_dataset_t * | dataset | ) |
Destruct and free a dataset object.
[in] | dataset | an existing dataset object that is no longer needed |
"_destroy" sounds too destructive, this will be renamed to esdm_dataset_close().
esdm_status esdmI_dataspace_createFromHypercube | ( | esdmI_hypercube_t * | extends, |
esdm_type_t | type, | ||
esdm_dataspace_t ** | out_space | ||
) |
Create a dataspace. Takes the shape in the form of an esdmI_hypercube_t
.
Similar to esdm_dataspace_create()
, but takes an esdmI_hypercube_t*
instead of a pair of offset
and size
arrays.
[in] | extends | the logical shape of the dataspace that is to be created |
[out] | out_space | returns a new dataspace object that needs to be destructed by the caller |
esdm_status esdmI_dataspace_createFromJson | ( | json_t * | json, |
esdm_dataset_t * | dataset, | ||
esdm_dataspace_t ** | out_dataspace | ||
) |
Create a dataspace object from its JSON description (which was produced via a call to esdm_dataspace_serialize()).
[in] | json | the JSON data describing the dataspace |
[in] | dataset | the dataset that is to be linked to the dataspace (this provides the datatype) |
[out] | out_dataspace | will point to a valid dataspace object after a successful return |
ESDM_SUCCESS
on success, ESDM_INVALID_DATA_ERROR
in case of any inconsistencies in the JSON data esdm_status esdmI_dataspace_getExtends | ( | esdm_dataspace_t * | space, |
esdmI_hypercube_t ** | out_extends | ||
) |
Get the logical extends covered by a dataspace in the form of an esdmI_hypercube_t
.
[in] | space | the dataspace to query |
[out] | out_extends | returns a pointer to a hypercube with the extends of the dataspace, the caller is responsible to destroy the returned pointer |
esdm_status esdmI_dataspace_setExtends | ( | esdm_dataspace_t * | space, |
esdmI_hypercube_t * | extends | ||
) |
Set the logical extends covered by a dataspace in the form of an esdmI_hypercube_t
.
[in] | space | the dataspace to query |
[in] | extends | a hypercube with the extends of the dataspace |
esdm_status esdmI_fragment_create | ( | esdm_dataset_t * | d, |
esdm_dataspace_t * | sspace, | ||
void * | buf, | ||
esdm_fragment_t ** | out_fragment | ||
) |
TODO: there should be a mode to auto-commit on creation?
How does this integrate with the scheduler? On auto-commit this merely beeing pushed to sched for dispatch?