XZ Utils 5.3.5beta
Data Structures | Macros | Enumerations | Functions | Variables
string_conversion.c File Reference

Conversion of strings to filter chain and vice versa. More...

#include "filter_common.h"

Data Structures

struct  lzma_str
 
struct  name_value_map
 
struct  option_map
 

Macros

#define STR_ALLOC_SIZE   800
 
#define NAME_LEN_MAX   11
 
#define OPTMAP_USE_NAME_VALUE_MAP   0x01
 
#define OPTMAP_USE_BYTE_SUFFIX   0x02
 
#define OPTMAP_NO_STRFY_ZERO   0x04
 
#define LZMA12_PRESET_STR   "0-9[e]"
 Help string for presets. More...
 
#define MY_IS_DIGIT(c)   ((c) >= '0' && (c) <= '9')
 

Enumerations

enum  { OPTMAP_TYPE_UINT32 , OPTMAP_TYPE_LZMA_MODE , OPTMAP_TYPE_LZMA_MATCH_FINDER , OPTMAP_TYPE_LZMA_PRESET }
 

Functions

static lzma_ret str_init (lzma_str *str, const lzma_allocator *allocator)
 
static void str_free (lzma_str *str, const lzma_allocator *allocator)
 
static bool str_is_full (const lzma_str *str)
 
static lzma_ret str_finish (char **dest, lzma_str *str, const lzma_allocator *allocator)
 
static void str_append_str (lzma_str *str, const char *s)
 
static void str_append_u32 (lzma_str *str, uint32_t v, bool use_byte_suffix)
 
static const char * parse_options (const char **const str, const char *str_end, void *filter_options, const option_map *const optmap, const size_t optmap_size)
 
static const char * parse_lzma12_preset (const char **const str, const char *str_end, uint32_t *preset)
 
static const char * set_lzma12_preset (const char **const str, const char *str_end, void *filter_options)
 
static const char * parse_lzma12 (const char **const str, const char *str_end, void *filter_options)
 
static const char * parse_filter (const char **const str, const char *str_end, lzma_filter *filter, const lzma_allocator *allocator, bool only_xz)
 
static const char * str_to_filters (const char **const str, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator)
 
const char * lzma_str_to_filters (const char *str, int *error_pos, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator)
 Convert a string to a filter chain. More...
 
static void strfy_filter (lzma_str *dest, const char *delimiter, const option_map *optmap, size_t optmap_count, const void *filter_options)
 
lzma_ret lzma_str_from_filters (char **output_str, const lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator)
 Convert a filter chain to a string. More...
 
lzma_ret lzma_str_list_filters (char **output_str, lzma_vli filter_id, uint32_t flags, const lzma_allocator *allocator)
 List available filters and/or their options (for help message) More...
 

Variables

static const name_value_map lzma12_mode_map []
 
static const name_value_map lzma12_mf_map []
 
static const option_map lzma12_optmap []
 
struct {
   char   name [NAME_LEN_MAX+1]
 Name of the filter. More...
 
   uint32_t   opts_size
 
   lzma_vli   id
 Filter ID. More...
 
   const char *(*   parse )(const char **str, const char
      *str_end, void *filter_options)
 
   const option_map *   optmap
 
   uint8_t   strfy_encoder
 
   uint8_t   strfy_decoder
 
   bool   allow_null
 
filter_name_map []
 

Detailed Description

Conversion of strings to filter chain and vice versa.

Macro Definition Documentation

◆ STR_ALLOC_SIZE

#define STR_ALLOC_SIZE   800

How much memory to allocate for strings. For now, no realloc is used so this needs to be big enough even though there of course is an overflow check still.

FIXME? Using a fixed size is wasteful if the application doesn't free the string fairly quickly but this can be improved later if needed.

◆ NAME_LEN_MAX

#define NAME_LEN_MAX   11

Maximum length for filter and option names. 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes

◆ OPTMAP_USE_NAME_VALUE_MAP

#define OPTMAP_USE_NAME_VALUE_MAP   0x01

For option_map.flags: Use .u.map to do convert the input value to an integer. Without this flag, .u.range.{min,max} are used as the allowed range for the integer.

◆ OPTMAP_USE_BYTE_SUFFIX

#define OPTMAP_USE_BYTE_SUFFIX   0x02

For option_map.flags: Allow KiB/MiB/GiB in input string and use them in the stringified output if the value is an exact multiple of these. This is used e.g. for LZMA1/2 dictionary size.

◆ OPTMAP_NO_STRFY_ZERO

#define OPTMAP_NO_STRFY_ZERO   0x04

For option_map.flags: If the integer value is zero then this option won't be included in the stringified output. It's used e.g. for BCJ filter start offset which usually is zero.

◆ LZMA12_PRESET_STR

#define LZMA12_PRESET_STR   "0-9[e]"

Help string for presets.

Enumeration Type Documentation

◆ anonymous enum

anonymous enum

Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, it doesn't need to be specified in the initializers as it is the implicit value.

Function Documentation

◆ parse_options()

static const char * parse_options ( const char **const  str,
const char *  str_end,
void *  filter_options,
const option_map *const  optmap,
const size_t  optmap_size 
)
static

Decodes options from a string for one filter (name1=value1,name2=value2). Caller must have allocated memory for filter_options already and set the initial default values. This is called from the filter-specific parse_* functions.

The input string starts at *str and the address in str_end is the first char that is not part of the string anymore. So no '\0' terminator is used. *str is advanced everytime something has been decoded successfully.

References name, NAME_LEN_MAX, optmap, and str.

◆ parse_filter()

static const char * parse_filter ( const char **const  str,
const char *  str_end,
lzma_filter filter,
const lzma_allocator allocator,
bool  only_xz 
)
static

Finds the name of the filter at the beginning of the string and calls filter_name_map[i].parse() to decode the filter-specific options. The caller must have set str_end so that exactly one filter and its options are present without any trailing characters.

◆ str_to_filters()

static const char * str_to_filters ( const char **const  str,
lzma_filter filters,
uint32_t  flags,
const lzma_allocator allocator 
)
static

Converts the string to a filter chain (array of lzma_filter structures).

*str is advanced everytime something has been decoded successfully. This way the caller knows where in the string a possible error occurred.

◆ lzma_str_to_filters()

const char * lzma_str_to_filters ( const char *  str,
int *  error_pos,
lzma_filter filters,
uint32_t  flags,
const lzma_allocator allocator 
)

Convert a string to a filter chain.

This tries to make it easier to write applications that allow users to set custom compression options. This only handles the filter configuration (including presets) but not the number of threads, block size, check type, or memory limits.

The input string can be either a preset or a filter chain. Presets begin with a digit 0-9 and may be followed by zero or more flags which are lower-case letters. Currently only "e" is supported, matching LZMA_PRESET_EXTREME. For partial xz command line syntax compatibility, a preset string may start with a single dash "-".

A filter chain consists of one or more "filtername:opt1=value1,opt2=value2" strings separated by one or more spaces. Leading and trailing spaces are ignored. All names and values must be lower-case. Extra commas in the option list are ignored. The order of filters is significant: when encoding, the uncompressed input data goes to the leftmost filter first. Normally "lzma2" is the last filter in the chain.

If one wishes to avoid spaces, for example, to avoid shell quoting, it is possible to use two dashes "--" instead of spaces to separate the filters.

For xz command line compatibility, each filter may be prefixed with two dashes "--" and the colon ":" separating the filter name from the options may be replaced with an equals sign "=".

By default, only filters that can be used in the .xz format are accepted. To allow all filters (LZMA1) use the flag LZMA_STR_ALL_FILTERS.

By default, very basic validation is done for the filter chain as a whole, for example, that LZMA2 is only used as the last filter in the chain. The validation isn't perfect though and it's possible that this function succeeds but using the filter chain for encoding or decoding will still result in LZMA_OPTIONS_ERROR. To disable this validation, use the flag LZMA_STR_NO_VALIDATION.

The available filter names and their options are available via lzma_str_list_filters(). See the xz man page for the description of filter names and options.

Parameters
strUser-supplied string describing a preset or a filter chain. If a default value is needed and you don't know what would be good, use "6" since that is the default preset in xz too.
error_posIf this isn't NULL, this value will be set on both success and on all errors. This tells the location of the error in the string. This is an int to make it straightforward to use this as printf() field width. The value is guaranteed to be in the range [0, INT_MAX] even if strlen(str) somehow was greater than INT_MAX.
filtersAn array of lzma_filter structures. There must be LZMA_FILTERS_MAX + 1 (that is, five) elements in the array. The old contents are ignored so it doesn't need to be initialized. This array is modified only if this function returns LZMA_OK. Once the allocated filter options are no longer needed, lzma_filters_free() can be used to free the options (it doesn't free the filters array itself).
flagsBitwise-or of zero or more of the flags LZMA_STR_ALL_FILTERS and LZMA_STR_NO_VALIDATION.
allocatorlzma_allocator for custom allocator functions. Set to NULL to use malloc() and free().
Returns
On success, NULL is returned. On error, a statically-allocated error message is returned which together with the error_pos should give some idea what is wrong.

For command line applications, below is an example how an error message can be displayed. Note the use of an empty string for the field width. If "^" was used there it would create an off-by-one error except at the very beginning of the line.

const char *str = ...; // From user
int pos;
const char *msg = lzma_str_to_filters(str, &pos, filters, 0, NULL);
if (msg != NULL) {
printf("%s: Error in XZ compression options:\n", argv[0]);
printf("%s: %s\n", argv[0], str);
printf("%s: %*s^\n", argv[0], errpos, "");
printf("%s: %s\n", argv[0], msg);
}
static lzma_filter filters[LZMA_FILTERS_MAX+1]
Filters needed for all encoding all formats, and also decoding in raw data.
Definition: coder.c:36
const char * lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) lzma_nothrow lzma_attr_warn_unused_result
Convert a string to a filter chain.
Definition: string_conversion.c:994
#define LZMA_FILTERS_MAX
Maximum number of filters in a chain.
Definition: filter.h:26
const char * str
Table column heading string.
Definition: list.c:109
Filter options.
Definition: filter.h:43

◆ strfy_filter()

static void strfy_filter ( lzma_str dest,
const char *  delimiter,
const option_map optmap,
size_t  optmap_count,
const void *  filter_options 
)
static

Converts options of one filter to a string.

The caller must have already put the filter name in the destination string. Since it is possible that no options will be needed, the caller won't have put a delimiter character (':' or '=') in the string yet. We will add it if at least one option will be added to the string.

References optmap.

◆ lzma_str_from_filters()

lzma_ret lzma_str_from_filters ( char **  str,
const lzma_filter filters,
uint32_t  flags,
const lzma_allocator allocator 
)

Convert a filter chain to a string.

Use cases:

  • Verbose output showing the full encoder options to the user (use LZMA_STR_ENCODER in flags)
  • Showing the filters and options that are required to decode a file (use LZMA_STR_DECODER in flags)
  • Showing the filter names without any options in informational messages where the technical details aren't important (no flags). In this case the .options in the filters array are ignored and may be NULL even if a filter has a mandatory options structure.

Note that even if the filter chain was specified using a preset, the resulting filter chain isn't reversed to a preset. So if you specify "6" to lzma_str_to_filters() then lzma_str_from_filters() will produce a string containing "lzma2".

Parameters
strOn success *str will be set to point to an allocated string describing the given filter chain. Old value is ignored. On error *str is always set to NULL.
filtersArray of 1-4 filters and a terminating element with .id = LZMA_VLI_UNKNOWN.
flagsBitwise-or of zero or more of the flags LZMA_STR_ENCODER, LZMA_STR_DECODER, LZMA_STR_GETOPT_LONG, and LZMA_STR_NO_SPACES.
allocatorlzma_allocator for custom allocator functions. Set to NULL to use malloc() and free().
Returns
- LZMA_OK
  • LZMA_OPTIONS_ERROR: Empty filter chain (filters[0].id == LZMA_VLI_UNKNOWN) or the filter chain includes a Filter ID that is not supported by this function.
  • LZMA_MEM_ERROR
  • LZMA_PROG_ERROR

Referenced by message_filters_show().

◆ lzma_str_list_filters()

lzma_ret lzma_str_list_filters ( char **  str,
lzma_vli  filter_id,
uint32_t  flags,
const lzma_allocator allocator 
)

List available filters and/or their options (for help message)

If a filter_id is given then only one line is created which contains the filter name. If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then the options required for encoding or decoding are listed on the same line too.

If filter_id is LZMA_VLI_UNKNOWN then all supported .xz-compatible filters are listed:

  • If neither LZMA_STR_ENCODER nor LZMA_STR_DECODER is used then the supported filter names are listed on a single line separated by spaces.
  • If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then filters and the supported options are listed one filter per line. There won't be a '
    ' after the last filter.
  • If LZMA_STR_ALL_FILTERS is used then the list will include also those filters that cannot be used in the .xz format (LZMA1).
Parameters
strOn success *str will be set to point to an allocated string listing the filters and options. Old value is ignored. On error *str is always set to NULL.
filter_idFilter ID or LZMA_VLI_UNKNOWN.
flagsBitwise-or of zero or more of the flags LZMA_STR_ALL_FILTERS, LZMA_STR_ENCODER, LZMA_STR_DECODER, and LZMA_STR_GETOPT_LONG.
allocatorlzma_allocator for custom allocator functions. Set to NULL to use malloc() and free().
Returns
- LZMA_OK
  • LZMA_OPTIONS_ERROR: Unsupported filter_id or flags
  • LZMA_MEM_ERROR
  • LZMA_PROG_ERROR

Variable Documentation

◆ lzma12_mode_map

const name_value_map lzma12_mode_map[]
static
Initial value:
= {
{ "fast", LZMA_MODE_FAST },
{ "normal", LZMA_MODE_NORMAL },
{ "", 0 }
}
@ LZMA_MODE_FAST
Fast compression.
Definition: lzma12.h:162
@ LZMA_MODE_NORMAL
Normal compression.
Definition: lzma12.h:170

◆ lzma12_mf_map

const name_value_map lzma12_mf_map[]
static
Initial value:
= {
{ "hc3", LZMA_MF_HC3 },
{ "hc4", LZMA_MF_HC4 },
{ "bt2", LZMA_MF_BT2 },
{ "bt3", LZMA_MF_BT3 },
{ "bt4", LZMA_MF_BT4 },
{ "", 0 }
}
@ LZMA_MF_HC4
Hash Chain with 2-, 3-, and 4-byte hashing.
Definition: lzma12.h:93
@ LZMA_MF_BT4
Binary Tree with 2-, 3-, and 4-byte hashing.
Definition: lzma12.h:124
@ LZMA_MF_HC3
Hash Chain with 2- and 3-byte hashing.
Definition: lzma12.h:82
@ LZMA_MF_BT2
Binary Tree with 2-byte hashing.
Definition: lzma12.h:104
@ LZMA_MF_BT3
Binary Tree with 2- and 3-byte hashing.
Definition: lzma12.h:113

◆ name

char name[NAME_LEN_MAX+1]

Name of the filter.

Referenced by parse_options().

◆ opts_size

uint32_t opts_size

For lzma_str_to_filters: Size of the filter-specific options structure.

◆ id

Filter ID.

◆ parse

const char *(* parse) (const char **str, const char *str_end, void *filter_options) ( const char **  str,
const char *  str_end,
void *  filter_options 
)

For lzma_str_to_filters: Function to parse the filter-specific options. The filter_options will already have been allocated using lzma_alloc_zero().

◆ optmap

const option_map* optmap

For lzma_str_from_filters: If the flag LZMA_STR_ENCODER is used then the first strfy_encoder elements of optmap are stringified. With LZMA_STR_DECODER strfy_decoder is used. Currently encoders use all flags that decoders do but if that changes then this needs to be changed too, for example, add a new OPTMAP flag to skip printing some decoder-only flags.

Referenced by parse_options(), and strfy_filter().

◆ allow_null

bool allow_null

For lzma_str_from_filters: If true, lzma_filter.options is allowed to be NULL. In that case, only the filter name is printed without any options.