Base Framework
|
#include <base/string/Unicode.h>
Public Types | |
enum | MultibyteEncoding { UTF7, UTF8, UTF16, UTF16BE, UTF16LE, UTF32, UTF32BE, UTF32LE } |
enum | { ERROR_EMPTY = 0, ERROR_INCOMPLETE = -1, ERROR_BAD_ENCODING = -2, INVALID_UCS4_CHARACTER = -3, INVALID_UCS2_CHARACTER = -4 } |
enum | EncodingFlags { ADD_BOM = 1, EAT_BOM = 2, EXPECT_BOM = 4, ASSUME_NATIVE_BYTE_ORDER = 8, ASSUME_BE = 16, ASSUME_LE = 32 } |
Static Public Member Functions | |
static unsigned int | getMaximumNumberOfMultibytes (MultibyteEncoding encoding) noexcept |
static Literal | getMIMECharset (MultibyteEncoding encoding) noexcept |
static bool | isSurrogateCode (unsigned int value) noexcept |
static bool | isUCS4 (unsigned int value) noexcept |
static int | readUCS4 (const uint8 *src, const uint8 *end, ucs4 &ch) noexcept |
static int | readUCS4 (const uint8 *src, ucs4 &ch) noexcept |
static int | readUCS4 (const utf16 *src, ucs4 &ch) noexcept |
static int | readUCS4 (const char16_t *src, ucs4 &ch) noexcept |
static MemoryDiff | getUTF8StringLength (const uint8 *src, const uint8 *end) noexcept |
static MemoryDiff | getUTF8StringLength (const uint8 *src) noexcept |
static MemoryDiff | getUTF8StringLength (const uint8 *src, MemorySize size) noexcept |
static MemoryDiff | getStringLength (const utf16 *src) noexcept |
static MemoryDiff | getStringLength (const utf16 *src, MemorySize size) noexcept |
static MemoryDiff | getStringLength (const char16_t *src) noexcept |
static MemoryDiff | getStringLength (const char16_t *src, MemorySize size) noexcept |
static MemoryDiff | getStringLength (const wchar *src) noexcept |
static MemoryDiff | getStringLength (const wchar *src, MemorySize size) noexcept |
static MemorySize | getUTF8Bytes (ucs4 ch) noexcept |
static MemorySize | writeUTF8 (uint8 *dest, ucs4 ch) noexcept |
static MemorySize | getUTF16Words (ucs4 ch) noexcept |
static MemorySize | writeUTF16 (utf16 *dest, ucs4 ch) noexcept |
static MemoryDiff | UCS2ToUTF8 (uint8 *dest, const ucs2 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UTF16ToUTF8 (uint8 *dest, const utf16 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UTF16ToUTF8 (uint8 *dest, const char16_t *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UCS4ToUCS2 (ucs2 *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UCS2ToUCS4 (ucs4 *dest, const ucs2 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UCS4ToUTF8 (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UTF32ToUTF8 (uint8 *dest, const char32_t *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UTF8ToUTF16 (utf16 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept |
static MemoryDiff | UTF8ToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept |
static MemoryDiff | UTF8ToUCS4 (ucs4 *dest, const char *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept |
static MemoryDiff | UCS4ToUTF16BE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept |
static MemoryDiff | UCS4ToUTF16LE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept |
static MemoryDiff | UTF16ToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept |
static MemoryDiff | UTF16ToUCS4 (ucs4 *dest, const utf16 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UTF16ToUCS4 (ucs4 *dest, const char16_t *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UCS4ToUTF16 (utf16 *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UCS4ToUTF16 (char16_t *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept |
static MemoryDiff | UCS4ToUTF32BE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept |
static MemoryDiff | UCS4ToUTF32LE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept |
static MemoryDiff | UTF32LEToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) |
static MemoryDiff | UTF32BEToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) |
static MemoryDiff | UCS4ToWChar (wchar *dest, const ucs4 *src, MemorySize size) noexcept |
static MemoryDiff | WCharToUCS4 (ucs4 *dest, const wchar *src, MemorySize size) noexcept |
static MemoryDiff | UTF8ToWChar (wchar *dest, const uint8 *src, MemorySize size) noexcept |
static MemoryDiff | WCharToUTF8 (uint8 *dest, const wchar *src, MemorySize size) noexcept |
Static Public Attributes | |
static constexpr ucs4 | BOM = 0x0000feff |
static constexpr ucs4 | MAX = 0x10ffff |
static constexpr ucs4 | MAX_ISO = 0x7ffffff |
static constexpr ucs4 | BAD = 0xffffffff |
Unicode helper functions.
anonymous enum |
Encoding flags.
Multibyte encoding.
Enumerator | |
---|---|
UTF7 | Unicode transformation format (UTF-7). |
UTF8 | Unicode transformation format (UTF-8). |
UTF16 | Unicode transformation format (UTF-16). |
UTF16BE | Unicode transformation format (UTF-16) with big endian byte order. |
UTF16LE | Unicode transformation format (UTF-16) with little endian byte order. |
UTF32 | Unicode transformation format (UTF-32). |
UTF32BE | Unicode transformation format (UTF-32) with big endian byte order. |
UTF32LE | Unicode transformation format (UTF-32) with little endian byte order. |
|
inlinestaticnoexcept |
Returns the maximum number of bytes required to represent any UCS-4 character.
|
staticnoexcept |
Returns a MIME charsets for the specified encoding.
encoding | The multibyte encoding. |
|
staticnoexcept |
Returns the number of characters.
|
staticnoexcept |
Returns the number of characters.
|
staticnoexcept |
Returns the number of characters.
|
staticnoexcept |
Returns the number of characters.
|
staticnoexcept |
Returns the number of characters.
|
staticnoexcept |
Returns the number of characters.
|
inlinestaticnoexcept |
Returns the number of UTF-16 words required to represent the given code.
|
inlinestaticnoexcept |
Returns number of bytes required for UTF-8 encoding of the given UCS4 character. Returns 0 if invalid UCS4 character.
|
staticnoexcept |
Validates if the the given null-terminated string is using valid UTF-8 encoding. Returns the number of characters if valid. Otherwise returns negative status.
|
staticnoexcept |
Validates if the the given string is using valid UTF-8 encoding. Returns the number of characters if valid. Otherwise returns negative status.
|
staticnoexcept |
Returns the number of characters.
|
inlinestaticnoexcept |
Returns true if the code is reserved surrogate code.
|
inlinestaticnoexcept |
Returns true if the code is a valid UCS4 code.
|
inlinestaticnoexcept |
Read UCS4 from null terminated UTF-16 sequence.
|
staticnoexcept |
Converts the UTF-8 bytes into UCS4 character.
src | The start of the buffer. |
end | The end of the buffer. |
ch | The result character. |
|
staticnoexcept |
Read UCS4 from null terminated UTF-8 sequence.
|
staticnoexcept |
Read UCS4 from null terminated UTF-16 sequence.
|
staticnoexcept |
Low-level method which converts an UCS-2 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff. UCS-2 is deprecated.
dest | The destination buffer (may be nullptr). |
src | The UCS-2 encoded string. |
size | The number of characters in the UCS-2 encoded string. |
flags | The encoding flags. |
|
staticnoexcept |
Low-level method which converts an UCS-2 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).
dest | The destination buffer (may be nullptr). |
src | The UCS-2 encoded string. |
size | The number of characters in the UCS-2 encoded string. |
flags | The encoding flags. The default is 0. |
|
staticnoexcept |
Low-level method which converts an UCS-4 encoded string to UCS-2 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). UCS-2 is deprecated.
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. |
|
inlinestaticnoexcept |
Convert in-memory (no BOM) UCS-4 to UTF-16.
|
staticnoexcept |
Convert in-memory (no BOM) UCS-4 to UTF-16.
|
staticnoexcept |
Low-level method which converts an UCS-4 encoded string to UTF-16BE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF16BE)).
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. The default is ADD_BOM. |
|
staticnoexcept |
Low-level method which converts an UCS-4 encoded string to UTF-16LE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF16LE)).
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. The default is ADD_BOM. |
|
staticnoexcept |
Low-level method which converts an UCS-4 encoded string to UTF-32BE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF32BE)).
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. The default is ADD_BOM. |
|
staticnoexcept |
Low-level method which converts an UCS-4 encoded string to UTF-32LE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF32LE)).
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. The default is ADD_BOM. |
|
staticnoexcept |
Low-level method which converts an UCS-4 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. The default is 0. |
|
inlinestaticnoexcept |
Converts UCS-4 string to wchar string.
|
inlinestaticnoexcept |
Convert in-memory (no BOM) UTF-16 to UCS-4.
|
staticnoexcept |
Low-level method which converts an UTF-16 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff.
dest | The destination buffer (may be nullptr). |
src | The UTF-16 encoded string. |
size | The number of bytes in the UTF-16 encoded string. |
flags | The encoding flags. The default is EAT_BOM. |
|
staticnoexcept |
Convert in-memory (no BOM) UTF-16 to UCS-4.
|
inlinestaticnoexcept |
Low-level method which converts an UTF16 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).
dest | The destination buffer (may be nullptr). |
src | The UTF16 encoded string. |
size | The number of characters in the UTF16 encoded string. |
flags | The encoding flags. The default is 0. |
|
staticnoexcept |
Low-level method which converts an UTF16 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).
dest | The destination buffer (may be nullptr). |
src | The UTF16 encoded string. |
size | The number of characters in the UTF16 encoded string. |
flags | The encoding flags. The default is 0. |
|
static |
Low-level method which converts an UTF-32 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). See the technical report available at http://www.unicode.org/unicode/reports/tr19. The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff.
dest | The destination buffer (may be nullptr). |
src | The UTF-32 encoded string. |
size | The number of bytes in the UTF-32 encoded string. |
flags | The encoding flags. The default is EAT_BOM. |
|
static |
Low-level method which converts an UTF-32 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). See the technical report available at http://www.unicode.org/unicode/reports/tr19. The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff.
dest | The destination buffer (may be nullptr). |
src | The UTF-32 encoded string. |
size | The number of bytes in the UTF-32 encoded string. |
flags | The encoding flags. The default is EAT_BOM. |
|
inlinestaticnoexcept |
Low-level method which converts an UCS-4 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).
dest | The destination buffer (may be nullptr). |
src | The UCS-4 encoded string. |
size | The number of characters in the UCS-4 encoded string. |
flags | The encoding flags. The default is 0. |
|
inlinestaticnoexcept |
Low-level method which converts an UTF-8 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size).
dest | The destination buffer (may be nullptr). |
src | The UTF-8 encoded string. |
size | The number of bytes in the UTF-8 encoded string. |
flags | The encoding flags. The default is EAT_BOM. |
|
staticnoexcept |
Low-level method which converts an UTF-8 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size).
dest | The destination buffer (may be nullptr). |
src | The UTF-8 encoded string. |
size | The number of bytes in the UTF-8 encoded string. |
flags | The encoding flags. The default is EAT_BOM. |
|
staticnoexcept |
Low-level method which converts an UTF-8 encoded string to UTF-16 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size).
dest | The destination buffer (may be nullptr). |
src | The UTF-8 encoded string. |
size | The number of bytes in the UTF-8 encoded string. |
flags | The encoding flags. The default is EAT_BOM. |
|
inlinestaticnoexcept |
Converts UTF-8 string to wchar string.
|
inlinestaticnoexcept |
Converts wchar string to UCS-4 string.
|
inlinestaticnoexcept |
Converts wchar string to UTF-8 string.
|
inlinestaticnoexcept |
Writes the given code as UTF-16. Destination must ave room for minimum 2 words.
|
inlinestaticnoexcept |
|
staticconstexpr |
Invalid code.
|
staticconstexpr |
Specifies the byte order mark.
|
staticconstexpr |
Specifies the maximum valid UCS4 code (Unicode).
|
staticconstexpr |
The ISO/IEC 10646 standard.