32#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
63#define U_UNICODE_VERSION "15.0"
157#define UCHAR_MIN_VALUE 0
167#define UCHAR_MAX_VALUE 0x10ffff
173#define U_MASK(x) ((uint32_t)1<<(x))
535#ifndef U_HIDE_DEPRECATED_API
650#ifndef U_HIDE_DEPRECATED_API
669#ifndef U_HIDE_DEPRECATED_API
682#ifndef U_HIDE_DEPRECATED_API
701#ifndef U_HIDE_DEPRECATED_API
727#ifndef U_HIDE_DEPRECATED_API
740#ifndef U_HIDE_DEPRECATED_API
756#ifndef U_HIDE_DEPRECATED_API
868#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
871#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
873#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
875#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
877#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
879#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
882#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
884#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
886#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
889#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
891#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
893#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
896#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
898#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
900#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
903#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
905#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
907#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
909#define U_GC_CS_MASK U_MASK(U_SURROGATE)
912#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
914#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
916#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
918#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
920#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
923#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
925#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
927#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
929#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
932#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
934#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
939 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
942#define U_GC_LC_MASK \
943 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
946#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
949#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
952#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
956 (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
960 (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
961 U_GC_PI_MASK|U_GC_PF_MASK)
964#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
1024#ifndef U_HIDE_DEPRECATED_API
1054#ifndef U_HIDE_DEPRECATED_API
1903#ifndef U_HIDE_DEPRECATED_API
1940#ifndef U_HIDE_DEPRECATED_API
1965#ifndef U_HIDE_DEPRECATED_API
1977#ifndef U_HIDE_DEPRECATED_API
2000 U_SHORT_PROPERTY_NAME,
2001 U_LONG_PROPERTY_NAME,
2002#ifndef U_HIDE_DEPRECATED_API
2042#ifndef U_HIDE_DEPRECATED_API
2072#ifndef U_HIDE_DEPRECATED_API
2096 U_JG_NO_JOINING_GROUP,
2205#ifndef U_HIDE_DEPRECATED_API
2256#ifndef U_HIDE_DEPRECATED_API
2288 U_WB_EXTENDNUMLET = 7,
2320#ifndef U_HIDE_DEPRECATED_API
2358 U_SB_SCONTINUE = 14,
2359#ifndef U_HIDE_DEPRECATED_API
2385 U_LB_ALPHABETIC = 2,
2386 U_LB_BREAK_BOTH = 3,
2387 U_LB_BREAK_AFTER = 4,
2388 U_LB_BREAK_BEFORE = 5,
2389 U_LB_MANDATORY_BREAK = 6,
2390 U_LB_CONTINGENT_BREAK = 7,
2391 U_LB_CLOSE_PUNCTUATION = 8,
2392 U_LB_COMBINING_MARK = 9,
2393 U_LB_CARRIAGE_RETURN = 10,
2394 U_LB_EXCLAMATION = 11,
2397 U_LB_IDEOGRAPHIC = 14,
2401 U_LB_INFIX_NUMERIC = 16,
2402 U_LB_LINE_FEED = 17,
2403 U_LB_NONSTARTER = 18,
2405 U_LB_OPEN_PUNCTUATION = 20,
2406 U_LB_POSTFIX_NUMERIC = 21,
2407 U_LB_PREFIX_NUMERIC = 22,
2408 U_LB_QUOTATION = 23,
2409 U_LB_COMPLEX_CONTEXT = 24,
2410 U_LB_SURROGATE = 25,
2412 U_LB_BREAK_SYMBOLS = 27,
2442#ifndef U_HIDE_DEPRECATED_API
2470#ifndef U_HIDE_DEPRECATED_API
2494 U_HST_NOT_APPLICABLE,
2497 U_HST_TRAILING_JAMO,
2500#ifndef U_HIDE_DEPRECATED_API
2950#define U_NO_NUMERIC_VALUE ((double)-123456789.)
3460#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
3504#if !UCONFIG_NO_NORMALIZATION
3590 char *buffer, int32_t bufferLength,
3593#ifndef U_HIDE_DEPRECATED_API
3614 char *dest, int32_t destCapacity,
3725U_CAPI const char* U_EXPORT2
3798U_CAPI const char* U_EXPORT2
4148#if !UCONFIG_NO_NORMALIZATION
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category "Lu" (uppercase letter).
U_CAPI UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
@ UBLOCK_COUNT
One more than the highest normal UBlockCode value.
@ UBLOCK_OPTICAL_CHARACTER_RECOGNITION
@ UBLOCK_LATIN_EXTENDED_G
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
@ UBLOCK_INSCRIPTIONAL_PAHLAVI
@ UBLOCK_YIJING_HEXAGRAM_SYMBOLS
@ UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS
@ UBLOCK_ARABIC_EXTENDED_B
@ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
@ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION
@ UBLOCK_CURRENCY_SYMBOLS
@ UBLOCK_MISCELLANEOUS_SYMBOLS
@ UBLOCK_ANCIENT_GREEK_NUMBERS
@ UBLOCK_SUTTON_SIGNWRITING
@ UBLOCK_ANATOLIAN_HIEROGLYPHS
@ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING
@ UBLOCK_SUPPLEMENTAL_PUNCTUATION
@ UBLOCK_MEROITIC_HIEROGLYPHS
@ UBLOCK_INDIC_SIYAQ_NUMBERS
@ UBLOCK_KAKTOVIK_NUMERALS
@ UBLOCK_COPTIC_EPACT_NUMBERS
@ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS
@ UBLOCK_TAMIL_SUPPLEMENT
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
@ UBLOCK_NYIAKENG_PUACHUE_HMONG
@ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT
@ UBLOCK_ETHIOPIC_EXTENDED_B
@ UBLOCK_TRANSPORT_AND_MAP_SYMBOLS
@ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
@ UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A
@ UBLOCK_PRIVATE_USE_AREA
Same as UBLOCK_PRIVATE_USE.
@ UBLOCK_SINHALA_ARCHAIC_NUMBERS
@ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
@ UBLOCK_KHITAN_SMALL_SCRIPT
@ UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT
@ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION
@ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT
@ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT
@ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED
@ UBLOCK_EGYPTIAN_HIEROGLYPHS
@ UBLOCK_TANGUT_COMPONENTS
@ UBLOCK_CHEROKEE_SUPPLEMENT
@ UBLOCK_LATIN_EXTENDED_A
@ UBLOCK_SUNDANESE_SUPPLEMENT
@ UBLOCK_CYRILLIC_EXTENDED_A
@ UBLOCK_HANGUL_JAMO_EXTENDED_B
@ UBLOCK_SUPPLEMENTAL_ARROWS_A
@ UBLOCK_DEVANAGARI_EXTENDED_A
@ UBLOCK_INSCRIPTIONAL_PARTHIAN
@ UBLOCK_ARABIC_EXTENDED_A
@ UBLOCK_CYRILLIC_EXTENDED_C
@ UBLOCK_OTTOMAN_SIYAQ_NUMBERS
@ UBLOCK_LINEAR_B_SYLLABARY
@ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS
@ UBLOCK_SPACING_MODIFIER_LETTERS
@ UBLOCK_MEROITIC_CURSIVE
@ UBLOCK_CYRILLIC_EXTENDED_B
@ UBLOCK_GENERAL_PUNCTUATION
@ UBLOCK_MONGOLIAN_SUPPLEMENT
@ UBLOCK_MISCELLANEOUS_TECHNICAL
@ UBLOCK_TAI_XUAN_JING_SYMBOLS
@ UBLOCK_CONTROL_PICTURES
@ UBLOCK_GREEK
Unicode 3.2 renames this block to "Greek and Coptic".
@ UBLOCK_COUNTING_ROD_NUMERALS
@ UBLOCK_LATIN_EXTENDED_E
@ UBLOCK_LINEAR_B_IDEOGRAMS
@ UBLOCK_RUMI_NUMERAL_SYMBOLS
@ UBLOCK_HIGH_PRIVATE_USE_SURROGATES
@ UBLOCK_MEETEI_MAYEK_EXTENSIONS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H
@ UBLOCK_COMMON_INDIC_NUMBER_FORMS
@ UBLOCK_ZANABAZAR_SQUARE
@ UBLOCK_ARABIC_EXTENDED_C
@ UBLOCK_ENCLOSED_ALPHANUMERICS
@ UBLOCK_COMBINING_HALF_MARKS
@ UBLOCK_GLAGOLITIC_SUPPLEMENT
@ UBLOCK_IMPERIAL_ARAMAIC
@ UBLOCK_BRAILLE_PATTERNS
@ UBLOCK_MATHEMATICAL_OPERATORS
@ UBLOCK_NO_BLOCK
New No_Block value in Unicode 4.
@ UBLOCK_TANGUT_SUPPLEMENT
@ UBLOCK_SMALL_FORM_VARIANTS
@ UBLOCK_GEORGIAN_EXTENDED
@ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
@ UBLOCK_LATIN_EXTENDED_D
@ UBLOCK_LATIN_EXTENDED_ADDITIONAL
@ UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
@ UBLOCK_SHORTHAND_FORMAT_CONTROLS
@ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
@ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
@ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS
@ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS
@ UBLOCK_ETHIOPIC_EXTENDED
@ UBLOCK_PRIVATE_USE
Same as UBLOCK_PRIVATE_USE_AREA.
@ UBLOCK_GEORGIAN_SUPPLEMENT
@ UBLOCK_HANGUL_COMPATIBILITY_JAMO
@ UBLOCK_ARABIC_SUPPLEMENT
@ UBLOCK_HANGUL_SYLLABLES
@ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS
@ UBLOCK_CJK_COMPATIBILITY
@ UBLOCK_CJK_RADICALS_SUPPLEMENT
@ UBLOCK_ARABIC_PRESENTATION_FORMS_B
@ UBLOCK_ARABIC_PRESENTATION_FORMS_A
@ UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
@ UBLOCK_BOPOMOFO_EXTENDED
@ UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION
@ UBLOCK_LATIN_EXTENDED_F
@ UBLOCK_CYPRIOT_SYLLABARY
@ UBLOCK_ETHIOPIC_SUPPLEMENT
@ UBLOCK_OLD_SOUTH_ARABIAN
@ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B
@ UBLOCK_BAMUM_SUPPLEMENT
@ UBLOCK_CYRILLIC_SUPPLEMENT
@ UBLOCK_MYANMAR_EXTENDED_B
@ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION
@ UBLOCK_SYRIAC_SUPPLEMENT
@ UBLOCK_ORNAMENTAL_DINGBATS
@ UBLOCK_OLD_NORTH_ARABIAN
@ UBLOCK_DEVANAGARI_EXTENDED
@ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A
@ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS
@ UBLOCK_SUPPLEMENTAL_ARROWS_C
@ UBLOCK_LATIN_EXTENDED_B
@ UBLOCK_MODIFIER_TONE_LETTERS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS
@ UBLOCK_CJK_COMPATIBILITY_FORMS
@ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
@ UBLOCK_VEDIC_EXTENSIONS
@ UBLOCK_CYRILLIC_EXTENDED_D
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
@ UBLOCK_LATIN_1_SUPPLEMENT
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G
@ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
@ UBLOCK_CAUCASIAN_ALBANIAN
@ UBLOCK_ALCHEMICAL_SYMBOLS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F
@ UBLOCK_MYANMAR_EXTENDED_A
@ UBLOCK_EARLY_DYNASTIC_CUNEIFORM
@ UBLOCK_PHONETIC_EXTENSIONS
@ UBLOCK_GEOMETRIC_SHAPES
@ UBLOCK_LATIN_EXTENDED_C
@ UBLOCK_SUPPLEMENTAL_ARROWS_B
@ UBLOCK_ETHIOPIC_EXTENDED_A
@ UBLOCK_GEOMETRIC_SHAPES_EXTENDED
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A
@ UBLOCK_ALPHABETIC_PRESENTATION_FORMS
@ UBLOCK_LETTERLIKE_SYMBOLS
@ UBLOCK_ZNAMENNY_MUSICAL_NOTATION
@ UBLOCK_CYRILLIC_SUPPLEMENTARY
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
@ UBLOCK_HANGUL_JAMO_EXTENDED_A
@ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS
@ UBLOCK_SMALL_KANA_EXTENSION
@ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
@ UBLOCK_COMBINING_DIACRITICAL_MARKS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E
@ UBLOCK_VARIATION_SELECTORS
U_CAPI UBool u_isblank(UChar32 c)
Determines whether the specified code point is a "blank" or "horizontal space", a character that visi...
U_CAPI UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
UCharNameChoice
Selector constants for u_charName().
@ U_CHAR_NAME_CHOICE_COUNT
One more than the highest normal UCharNameChoice value.
@ U_UNICODE_10_CHAR_NAME
The Unicode_1_Name property value which is of little practical value.
@ U_CHAR_NAME_ALIAS
Corrected name from NameAliases.txt.
@ U_EXTENDED_CHAR_NAME
Standard or synthetic character name.
@ U_UNICODE_CHAR_NAME
Unicode character name (Name property).
U_CAPI UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
U_CAPI int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
U_CAPI int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
U_CAPI UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
U_CAPI UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
U_CAPI double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
U_CAPI UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData....
U_CAPI UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
UJoiningType
Joining Type constants.
@ U_JT_COUNT
One more than the highest normal UJoiningType value.
U_CAPI UBool u_isbase(UChar32 c)
Non-standard: Determines whether the specified code point is a base character.
U_CAPI UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
U_CAPI UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
U_CAPI void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the "age" of the code point.
U_CAPI UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
U_CAPI int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
UIndicPositionalCategory
Indic Positional Category constants.
@ U_INPC_VISUAL_ORDER_LEFT
@ U_INPC_TOP_AND_BOTTOM_AND_RIGHT
@ U_INPC_TOP_AND_BOTTOM_AND_LEFT
@ U_INPC_TOP_AND_LEFT_AND_RIGHT
@ U_INPC_BOTTOM_AND_RIGHT
U_CAPI void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
UCharDirection
This specifies the language directional property of a character set.
@ U_EUROPEAN_NUMBER_TERMINATOR
ET.
@ U_RIGHT_TO_LEFT_ARABIC
AL.
@ U_POP_DIRECTIONAL_ISOLATE
PDI.
@ U_COMMON_NUMBER_SEPARATOR
CS.
@ U_DIR_NON_SPACING_MARK
NSM.
@ U_FIRST_STRONG_ISOLATE
FSI.
@ U_POP_DIRECTIONAL_FORMAT
PDF.
@ U_CHAR_DIRECTION_COUNT
One more than the highest UCharDirection value.
@ U_WHITE_SPACE_NEUTRAL
WS.
@ U_RIGHT_TO_LEFT_OVERRIDE
RLO.
@ U_RIGHT_TO_LEFT_EMBEDDING
RLE.
@ U_EUROPEAN_NUMBER_SEPARATOR
ES.
@ U_LEFT_TO_RIGHT_ISOLATE
LRI.
@ U_LEFT_TO_RIGHT_OVERRIDE
LRO.
@ U_LEFT_TO_RIGHT_EMBEDDING
LRE.
@ U_RIGHT_TO_LEFT_ISOLATE
RLI.
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
@ U_PROPERTY_NAME_CHOICE_COUNT
One more than the highest normal UPropertyNameChoice value.
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
U_CAPI UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
U_CAPI int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
U_CAPI UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
U_CAPI UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
ULineBreak
Line Break constants.
@ U_LB_CONDITIONAL_JAPANESE_STARTER
@ U_LB_INSEPARABLE
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0.
@ U_LB_COUNT
One more than the highest normal ULineBreak value.
@ U_LB_REGIONAL_INDICATOR
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Returns an empty string.
U_CAPI UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category "Ll" (lowercase letter).
U_CAPI UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
UCharCategory
Data for enumerated Unicode general category types.
@ U_GENERAL_OTHER_TYPES
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNE...
@ U_PARAGRAPH_SEPARATOR
Zp.
@ U_INITIAL_PUNCTUATION
Pi.
@ U_UNASSIGNED
Non-category for unassigned and non-character code points.
@ U_COMBINING_SPACING_MARK
Mc.
@ U_CONNECTOR_PUNCTUATION
Pc.
@ U_CHAR_CATEGORY_COUNT
One higher than the last enum UCharCategory constant.
@ U_DECIMAL_DIGIT_NUMBER
Nd.
U_CAPI UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
UVerticalOrientation
Vertical Orientation constants.
@ U_VO_TRANSFORMED_UPRIGHT
@ U_VO_TRANSFORMED_ROTATED
U_CAPI UBool u_isdefined(UChar32 c)
Determines whether the specified code point is "defined", which usually means that it is assigned a c...
UJoiningGroup
Joining Group constants.
@ U_JG_BURUSHASKI_YEH_BARREE
@ U_JG_HANIFI_ROHINGYA_PA
@ U_JG_MANICHAEAN_THAMEDH
@ U_JG_COUNT
One more than the highest normal UJoiningGroup value.
@ U_JG_MANICHAEAN_DHAMEDH
@ U_JG_MANICHAEAN_HUNDRED
@ U_JG_HANIFI_ROHINGYA_KINNA_YA
UHangulSyllableType
Hangul Syllable Type constants.
@ U_HST_COUNT
One more than the highest normal UHangulSyllableType value.
U_CAPI UChar32 u_getBidiPairedBracket(UChar32 c)
Maps the specified character to its paired bracket character.
U_CAPI UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
USentenceBreak
Sentence Break constants.
@ U_SB_COUNT
One more than the highest normal USentenceBreak value.
U_CAPI int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c...
UEastAsianWidth
East Asian Width constants.
@ U_EA_COUNT
One more than the highest normal UEastAsianWidth value.
U_CAPI UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData....
U_CAPI UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
U_CAPI const UCPMap * u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode)
Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
U_CAPI UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
U_CAPI UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
U_CAPI UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.
U_CAPI UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function).
U_CAPI const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
UGraphemeClusterBreak
Grapheme Cluster Break constants.
@ U_GCB_REGIONAL_INDICATOR
@ U_GCB_COUNT
One more than the highest normal UGraphemeClusterBreak value.
U_CAPI const USet * u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode)
Returns a frozen USet for a binary property.
U_CAPI uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
U_CAPI void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
U_CAPI UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
U_CAPI void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive,...
U_CAPI UChar32 u_charMirror(UChar32 c)
Maps the specified character to a "mirror-image" character.
U_CAPI UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
U_CAPI UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible in an identifier according to Java.
U_CAPI UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData....
U_CAPI UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
U_CAPI int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
U_CAPI UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier,...
U_CAPI UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
UNumericType
Numeric Type constants.
@ U_NT_COUNT
One more than the highest normal UNumericType value.
UDecompositionType
Decomposition Type constants.
@ U_DT_COUNT
One more than the highest normal UDecompositionType value.
U_CAPI UBool u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which)
Returns true if the property is true for the string.
U_CAPI int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
UProperty
Selection constants for Unicode properties.
@ UCHAR_GRAPHEME_CLUSTER_BREAK
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
@ UCHAR_BIDI_PAIRED_BRACKET
String property Bidi_Paired_Bracket (new in Unicode 6.3).
@ UCHAR_ALPHABETIC
Binary property Alphabetic.
@ UCHAR_POSIX_GRAPH
Binary property graph (a C/POSIX character class).
@ UCHAR_RGI_EMOJI_TAG_SEQUENCE
Binary property of strings RGI_Emoji_Tag_Sequence.
@ UCHAR_RGI_EMOJI
Binary property of strings RGI_Emoji.
@ UCHAR_BLOCK
Enumerated property Block.
@ UCHAR_INVALID_CODE
Represents a nonexistent or invalid property or property value.
@ UCHAR_SEGMENT_STARTER
Binary Property Segment_Starter.
@ UCHAR_PREPENDED_CONCATENATION_MARK
Binary property Prepended_Concatenation_Mark.
@ UCHAR_REGIONAL_INDICATOR
Binary property Regional_Indicator.
@ UCHAR_OTHER_PROPERTY_START
First constant for Unicode properties with unusual value types.
@ UCHAR_S_TERM
Binary property STerm (new in Unicode 4.0.1).
@ UCHAR_WHITE_SPACE
Binary property White_Space.
@ UCHAR_CANONICAL_COMBINING_CLASS
Enumerated property Canonical_Combining_Class.
@ UCHAR_SOFT_DOTTED
Binary property Soft_Dotted (new in Unicode 3.2).
@ UCHAR_GRAPHEME_LINK
Binary property Grapheme_Link (new in Unicode 3.2).
@ UCHAR_PATTERN_SYNTAX
Binary property Pattern_Syntax (new in Unicode 4.1).
@ UCHAR_GRAPHEME_EXTEND
Binary property Grapheme_Extend (new in Unicode 3.2).
@ UCHAR_CASED
Binary property Cased.
@ UCHAR_XID_START
Binary property XID_Start.
@ UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
Binary property of strings RGI_Emoji_ZWJ_Sequence.
@ UCHAR_VERTICAL_ORIENTATION
Enumerated property Vertical_Orientation.
@ UCHAR_NFKC_INERT
Binary property NFKC_Inert.
@ UCHAR_INT_LIMIT
One more than the last constant for enumerated/integer Unicode properties.
@ UCHAR_RGI_EMOJI_FLAG_SEQUENCE
Binary property of strings RGI_Emoji_Flag_Sequence.
@ UCHAR_OTHER_PROPERTY_LIMIT
One more than the last constant for Unicode properties with unusual value types.
@ UCHAR_PATTERN_WHITE_SPACE
Binary property Pattern_White_Space (new in Unicode 4.1).
@ UCHAR_VARIATION_SELECTOR
Binary property Variation_Selector (new in Unicode 4.0.1).
@ UCHAR_NUMERIC_VALUE
Double property Numeric_Value.
@ UCHAR_DOUBLE_START
First constant for double Unicode properties.
@ UCHAR_HEX_DIGIT
Binary property Hex_Digit.
@ UCHAR_SIMPLE_TITLECASE_MAPPING
String property Simple_Titlecase_Mapping.
@ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
Enumerated property Trail_Canonical_Combining_Class.
@ UCHAR_DIACRITIC
Binary property Diacritic.
@ UCHAR_XID_CONTINUE
Binary property XID_Continue.
@ UCHAR_NFKC_QUICK_CHECK
Enumerated property NFKC_Quick_Check.
@ UCHAR_HYPHEN
Binary property Hyphen.
@ UCHAR_RADICAL
Binary property Radical (new in Unicode 3.2).
@ UCHAR_BIDI_PAIRED_BRACKET_TYPE
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
@ UCHAR_DASH
Binary property Dash.
@ UCHAR_ASCII_HEX_DIGIT
Binary property ASCII_Hex_Digit.
@ UCHAR_ID_CONTINUE
Binary property ID_Continue.
@ UCHAR_NFKD_INERT
Binary property NFKD_Inert.
@ UCHAR_BIDI_MIRRORING_GLYPH
String property Bidi_Mirroring_Glyph.
@ UCHAR_LEAD_CANONICAL_COMBINING_CLASS
Enumerated property Lead_Canonical_Combining_Class.
@ UCHAR_UNICODE_1_NAME
String property Unicode_1_Name.
@ UCHAR_BIDI_MIRRORED
Binary property Bidi_Mirrored.
@ UCHAR_CASE_IGNORABLE
Binary property Case_Ignorable.
@ UCHAR_EAST_ASIAN_WIDTH
Enumerated property East_Asian_Width.
@ UCHAR_STRING_LIMIT
One more than the last constant for string Unicode properties.
@ UCHAR_EMOJI_PRESENTATION
Binary property Emoji_Presentation.
@ UCHAR_GRAPHEME_BASE
Binary property Grapheme_Base (new in Unicode 3.2).
@ UCHAR_NFKD_QUICK_CHECK
Enumerated property NFKD_Quick_Check.
@ UCHAR_NAME
String property Name.
@ UCHAR_UPPERCASE
Binary property Uppercase.
@ UCHAR_CASE_FOLDING
String property Case_Folding.
@ UCHAR_INDIC_SYLLABIC_CATEGORY
Enumerated property Indic_Syllabic_Category.
@ UCHAR_MATH
Binary property Math.
@ UCHAR_NUMERIC_TYPE
Enumerated property Numeric_Type.
@ UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE
Binary property of strings RGI_Emoji_Modifier_Sequence.
@ UCHAR_ISO_COMMENT
Deprecated string property ISO_Comment.
@ UCHAR_SCRIPT
Enumerated property Script.
@ UCHAR_CHANGES_WHEN_TITLECASED
Binary property Changes_When_Titlecased.
@ UCHAR_EXTENDED_PICTOGRAPHIC
Binary property Extended_Pictographic.
@ UCHAR_DEPRECATED
Binary property Deprecated (new in Unicode 3.2).
@ UCHAR_CHANGES_WHEN_UPPERCASED
Binary property Changes_When_Uppercased.
@ UCHAR_CHANGES_WHEN_CASEFOLDED
Binary property Changes_When_Casefolded.
@ UCHAR_INDIC_POSITIONAL_CATEGORY
Enumerated property Indic_Positional_Category.
@ UCHAR_DEFAULT_IGNORABLE_CODE_POINT
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
@ UCHAR_GENERAL_CATEGORY
Enumerated property General_Category.
@ UCHAR_INT_START
First constant for enumerated/integer Unicode properties.
@ UCHAR_EMOJI_MODIFIER
Binary property Emoji_Modifier.
@ UCHAR_NFD_QUICK_CHECK
Enumerated property NFD_Quick_Check.
@ UCHAR_IDS_BINARY_OPERATOR
Binary property IDS_Binary_Operator (new in Unicode 3.2).
@ UCHAR_BINARY_START
First constant for binary Unicode properties.
@ UCHAR_TERMINAL_PUNCTUATION
Binary property Terminal_Punctuation.
@ UCHAR_GENERAL_CATEGORY_MASK
Bitmask property General_Category_Mask.
@ UCHAR_MASK_START
First constant for bit-mask Unicode properties.
@ UCHAR_DECOMPOSITION_TYPE
Enumerated property Decomposition_Type.
@ UCHAR_TITLECASE_MAPPING
String property Titlecase_Mapping.
@ UCHAR_HANGUL_SYLLABLE_TYPE
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
@ UCHAR_LINE_BREAK
Enumerated property Line_Break.
@ UCHAR_SIMPLE_UPPERCASE_MAPPING
String property Simple_Uppercase_Mapping.
@ UCHAR_POSIX_ALNUM
Binary property alnum (a C/POSIX character class).
@ UCHAR_JOINING_TYPE
Enumerated property Joining_Type.
@ UCHAR_EMOJI_KEYCAP_SEQUENCE
Binary property of strings Emoji_Keycap_Sequence.
@ UCHAR_QUOTATION_MARK
Binary property Quotation_Mark.
@ UCHAR_NFC_INERT
Binary property NFC_Inert.
@ UCHAR_LOWERCASE_MAPPING
String property Lowercase_Mapping.
@ UCHAR_SIMPLE_CASE_FOLDING
String property Simple_Case_Folding.
@ UCHAR_JOIN_CONTROL
Binary property Join_Control.
@ UCHAR_NONCHARACTER_CODE_POINT
Binary property Noncharacter_Code_Point.
@ UCHAR_BIDI_CONTROL
Binary property Bidi_Control.
@ UCHAR_CHANGES_WHEN_LOWERCASED
Binary property Changes_When_Lowercased.
@ UCHAR_BINARY_LIMIT
One more than the last constant for binary Unicode properties.
@ UCHAR_IDS_TRINARY_OPERATOR
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
@ UCHAR_ID_START
Binary property ID_Start.
@ UCHAR_AGE
String property Age.
@ UCHAR_WORD_BREAK
Enumerated property Word_Break (new in Unicode 4.1).
@ UCHAR_DOUBLE_LIMIT
One more than the last constant for double Unicode properties.
@ UCHAR_EMOJI_MODIFIER_BASE
Binary property Emoji_Modifier_Base.
@ UCHAR_EMOJI_COMPONENT
Binary property Emoji_Component.
@ UCHAR_POSIX_BLANK
Binary property blank (a C/POSIX character class).
@ UCHAR_SIMPLE_LOWERCASE_MAPPING
String property Simple_Lowercase_Mapping.
@ UCHAR_NFD_INERT
Binary property NFD_Inert.
@ UCHAR_POSIX_PRINT
Binary property print (a C/POSIX character class).
@ UCHAR_SENTENCE_BREAK
Enumerated property Sentence_Break (new in Unicode 4.1).
@ UCHAR_IDEOGRAPHIC
Binary property Ideographic.
@ UCHAR_UNIFIED_IDEOGRAPH
Binary property Unified_Ideograph (new in Unicode 3.2).
@ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
Binary property Changes_When_NFKC_Casefolded.
@ UCHAR_NFC_QUICK_CHECK
Enumerated property NFC_Quick_Check.
@ UCHAR_CASE_SENSITIVE
Binary property Case_Sensitive.
@ UCHAR_UPPERCASE_MAPPING
String property Uppercase_Mapping.
@ UCHAR_BIDI_CLASS
Enumerated property Bidi_Class.
@ UCHAR_BASIC_EMOJI
Binary property of strings Basic_Emoji.
@ UCHAR_MASK_LIMIT
One more than the last constant for bit-mask Unicode properties.
@ UCHAR_JOINING_GROUP
Enumerated property Joining_Group.
@ UCHAR_LOGICAL_ORDER_EXCEPTION
Binary property Logical_Order_Exception (new in Unicode 3.2).
@ UCHAR_EXTENDER
Binary property Extender.
@ UCHAR_STRING_START
First constant for string Unicode properties.
@ UCHAR_SCRIPT_EXTENSIONS
Miscellaneous property Script_Extensions (new in Unicode 6.0).
@ UCHAR_FULL_COMPOSITION_EXCLUSION
Binary property Full_Composition_Exclusion.
@ UCHAR_EMOJI
Binary property Emoji.
@ UCHAR_LOWERCASE
Binary property Lowercase.
@ UCHAR_CHANGES_WHEN_CASEMAPPED
Binary property Changes_When_Casemapped.
@ UCHAR_POSIX_XDIGIT
Binary property xdigit (a C/POSIX character class).
U_CAPI int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
U_CAPI UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
U_CAPI UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
U_CAPI UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData....
U_CAPI int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
UIndicSyllabicCategory
Indic Syllabic Category constants.
@ U_INSC_INVISIBLE_STACKER
@ U_INSC_SYLLABLE_MODIFIER
@ U_INSC_MODIFYING_LETTER
@ U_INSC_REGISTER_SHIFTER
@ U_INSC_CONSONANT_KILLER
@ U_INSC_CONSONANT_HEAD_LETTER
@ U_INSC_BRAHMI_JOINING_NUMBER
@ U_INSC_CANTILLATION_MARK
@ U_INSC_VOWEL_INDEPENDENT
@ U_INSC_CONSONANT_MEDIAL
@ U_INSC_CONSONANT_WITH_STACKER
@ U_INSC_CONSONANT_PREFIXED
@ U_INSC_CONSONANT_SUCCEEDING_REPHA
@ U_INSC_CONSONANT_PLACEHOLDER
@ U_INSC_CONSONANT_PRECEDING_REPHA
@ U_INSC_CONSONANT_SUBJOINED
@ U_INSC_CONSONANT_INITIAL_POSTFIXED
UWordBreakValues
Word Break constants.
@ U_WB_REGIONAL_INDICATOR
@ U_WB_COUNT
One more than the highest normal UWordBreakValues value.
U_CAPI UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier.
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
@ U_BPT_CLOSE
Close paired bracket.
@ U_BPT_COUNT
One more than the highest normal UBidiPairedBracketType value.
@ U_BPT_NONE
Not a paired bracket.
@ U_BPT_OPEN
Open paired bracket.
U_CAPI const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases....
This file defines an abstract map from Unicode code points to integer values.
struct UCPMap UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
int8_t UBool
The ICU boolean type, a signed-byte integer.
#define U_CAPI
This is used to declare a function as a public ICU C API.
char16_t UChar
The base type for UTF-16 code units and pointers.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.