Base Framework
Public Types | Static Public Member Functions | Static Public Attributes | List of all members
Unicode Class Reference

#include <base/string/Unicode.h>

Public Types

enum  MultibyteEncoding {
  UTF7, UTF8, UTF16, UTF16BE,
  UTF16LE, UTF32, UTF32BE, UTF32LE
}
 
enum  {
  ERROR_EMPTY = 0, ERROR_INCOMPLETE = -1, ERROR_BAD_ENCODING = -2, INVALID_UCS4_CHARACTER = -3,
  INVALID_UCS2_CHARACTER = -4
}
 
enum  EncodingFlags {
  ADD_BOM = 1, EAT_BOM = 2, EXPECT_BOM = 4, ASSUME_NATIVE_BYTE_ORDER = 8,
  ASSUME_BE = 16, ASSUME_LE = 32
}
 

Static Public Member Functions

static unsigned int getMaximumNumberOfMultibytes (MultibyteEncoding encoding) noexcept
 
static Literal getMIMECharset (MultibyteEncoding encoding) noexcept
 
static bool isSurrogateCode (unsigned int value) noexcept
 
static bool isUCS4 (unsigned int value) noexcept
 
static int readUCS4 (const uint8 *src, const uint8 *end, ucs4 &ch) noexcept
 
static int readUCS4 (const uint8 *src, ucs4 &ch) noexcept
 
static int readUCS4 (const utf16 *src, ucs4 &ch) noexcept
 
static int readUCS4 (const char16_t *src, ucs4 &ch) noexcept
 
static MemoryDiff getUTF8StringLength (const uint8 *src, const uint8 *end) noexcept
 
static MemoryDiff getUTF8StringLength (const uint8 *src) noexcept
 
static MemoryDiff getUTF8StringLength (const uint8 *src, MemorySize size) noexcept
 
static MemoryDiff getStringLength (const utf16 *src) noexcept
 
static MemoryDiff getStringLength (const utf16 *src, MemorySize size) noexcept
 
static MemoryDiff getStringLength (const char16_t *src) noexcept
 
static MemoryDiff getStringLength (const char16_t *src, MemorySize size) noexcept
 
static MemoryDiff getStringLength (const wchar *src) noexcept
 
static MemoryDiff getStringLength (const wchar *src, MemorySize size) noexcept
 
static MemorySize getUTF8Bytes (ucs4 ch) noexcept
 
static MemorySize writeUTF8 (uint8 *dest, ucs4 ch) noexcept
 
static MemorySize getUTF16Words (ucs4 ch) noexcept
 
static MemorySize writeUTF16 (utf16 *dest, ucs4 ch) noexcept
 
static MemoryDiff UCS2ToUTF8 (uint8 *dest, const ucs2 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UTF16ToUTF8 (uint8 *dest, const utf16 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UTF16ToUTF8 (uint8 *dest, const char16_t *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UCS4ToUCS2 (ucs2 *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UCS2ToUCS4 (ucs4 *dest, const ucs2 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UCS4ToUTF8 (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UTF32ToUTF8 (uint8 *dest, const char32_t *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UTF8ToUTF16 (utf16 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept
 
static MemoryDiff UTF8ToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept
 
static MemoryDiff UTF8ToUCS4 (ucs4 *dest, const char *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept
 
static MemoryDiff UCS4ToUTF16BE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept
 
static MemoryDiff UCS4ToUTF16LE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept
 
static MemoryDiff UTF16ToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM) noexcept
 
static MemoryDiff UTF16ToUCS4 (ucs4 *dest, const utf16 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UTF16ToUCS4 (ucs4 *dest, const char16_t *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UCS4ToUTF16 (utf16 *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UCS4ToUTF16 (char16_t *dest, const ucs4 *src, MemorySize size, unsigned int flags=0) noexcept
 
static MemoryDiff UCS4ToUTF32BE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept
 
static MemoryDiff UCS4ToUTF32LE (uint8 *dest, const ucs4 *src, MemorySize size, unsigned int flags=ADD_BOM) noexcept
 
static MemoryDiff UTF32LEToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM)
 
static MemoryDiff UTF32BEToUCS4 (ucs4 *dest, const uint8 *src, MemorySize size, unsigned int flags=EAT_BOM)
 
static MemoryDiff UCS4ToWChar (wchar *dest, const ucs4 *src, MemorySize size) noexcept
 
static MemoryDiff WCharToUCS4 (ucs4 *dest, const wchar *src, MemorySize size) noexcept
 
static MemoryDiff UTF8ToWChar (wchar *dest, const uint8 *src, MemorySize size) noexcept
 
static MemoryDiff WCharToUTF8 (uint8 *dest, const wchar *src, MemorySize size) noexcept
 

Static Public Attributes

static constexpr ucs4 BOM = 0x0000feff
 
static constexpr ucs4 MAX = 0x10ffff
 
static constexpr ucs4 MAX_ISO = 0x7ffffff
 
static constexpr ucs4 BAD = 0xffffffff
 

Detailed Description

Unicode helper functions.

Member Enumeration Documentation

◆ anonymous enum

anonymous enum
Enumerator
ERROR_INCOMPLETE 

No codes to read.

ERROR_BAD_ENCODING 

Missing code for character.

INVALID_UCS4_CHARACTER 

Invalid encoding.

INVALID_UCS2_CHARACTER 

Invalid UCS2 character.

◆ EncodingFlags

Encoding flags.

Enumerator
ADD_BOM 

Specifies that a BOM should be inserted when encoding to UTF.

EAT_BOM 

Specifies that the BOM should be skipped if present.

EXPECT_BOM 

Specifies that the BOM must be present.

ASSUME_NATIVE_BYTE_ORDER 

Specifies that the encoding is in native byte order.

ASSUME_BE 

Specifies that the encoding is in big endian byte order (ignored if ASSUME_NATIVE_BYTE_ORDER is set).

ASSUME_LE 

Specifies that the encoding is in little endian byte order (ignored if ASSUME_NATIVE_BYTE_ORDER or ASSUME_BE are set).

◆ MultibyteEncoding

Multibyte encoding.

Enumerator
UTF7 

Unicode transformation format (UTF-7).

UTF8 

Unicode transformation format (UTF-8).

UTF16 

Unicode transformation format (UTF-16).

UTF16BE 

Unicode transformation format (UTF-16) with big endian byte order.

UTF16LE 

Unicode transformation format (UTF-16) with little endian byte order.

UTF32 

Unicode transformation format (UTF-32).

UTF32BE 

Unicode transformation format (UTF-32) with big endian byte order.

UTF32LE 

Unicode transformation format (UTF-32) with little endian byte order.

Member Function Documentation

◆ getMaximumNumberOfMultibytes()

static unsigned int Unicode::getMaximumNumberOfMultibytes ( MultibyteEncoding  encoding)
inlinestaticnoexcept

Returns the maximum number of bytes required to represent any UCS-4 character.

◆ getMIMECharset()

static Literal Unicode::getMIMECharset ( MultibyteEncoding  encoding)
staticnoexcept

Returns a MIME charsets for the specified encoding.

Parameters
encodingThe multibyte encoding.

◆ getStringLength() [1/6]

static MemoryDiff Unicode::getStringLength ( const char16_t *  src)
staticnoexcept

Returns the number of characters.

◆ getStringLength() [2/6]

static MemoryDiff Unicode::getStringLength ( const char16_t *  src,
MemorySize  size 
)
staticnoexcept

Returns the number of characters.

◆ getStringLength() [3/6]

static MemoryDiff Unicode::getStringLength ( const utf16 *  src)
staticnoexcept

Returns the number of characters.

◆ getStringLength() [4/6]

static MemoryDiff Unicode::getStringLength ( const utf16 *  src,
MemorySize  size 
)
staticnoexcept

Returns the number of characters.

◆ getStringLength() [5/6]

static MemoryDiff Unicode::getStringLength ( const wchar *  src)
staticnoexcept

Returns the number of characters.

◆ getStringLength() [6/6]

static MemoryDiff Unicode::getStringLength ( const wchar *  src,
MemorySize  size 
)
staticnoexcept

Returns the number of characters.

◆ getUTF16Words()

static MemorySize Unicode::getUTF16Words ( ucs4  ch)
inlinestaticnoexcept

Returns the number of UTF-16 words required to represent the given code.

◆ getUTF8Bytes()

static MemorySize Unicode::getUTF8Bytes ( ucs4  ch)
inlinestaticnoexcept

Returns number of bytes required for UTF-8 encoding of the given UCS4 character. Returns 0 if invalid UCS4 character.

◆ getUTF8StringLength() [1/3]

static MemoryDiff Unicode::getUTF8StringLength ( const uint8 *  src)
staticnoexcept

Validates if the the given null-terminated string is using valid UTF-8 encoding. Returns the number of characters if valid. Otherwise returns negative status.

◆ getUTF8StringLength() [2/3]

static MemoryDiff Unicode::getUTF8StringLength ( const uint8 *  src,
const uint8 *  end 
)
staticnoexcept

Validates if the the given string is using valid UTF-8 encoding. Returns the number of characters if valid. Otherwise returns negative status.

◆ getUTF8StringLength() [3/3]

static MemoryDiff Unicode::getUTF8StringLength ( const uint8 *  src,
MemorySize  size 
)
staticnoexcept

Returns the number of characters.

◆ isSurrogateCode()

static bool Unicode::isSurrogateCode ( unsigned int  value)
inlinestaticnoexcept

Returns true if the code is reserved surrogate code.

◆ isUCS4()

static bool Unicode::isUCS4 ( unsigned int  value)
inlinestaticnoexcept

Returns true if the code is a valid UCS4 code.

◆ readUCS4() [1/4]

static int Unicode::readUCS4 ( const char16_t *  src,
ucs4 &  ch 
)
inlinestaticnoexcept

Read UCS4 from null terminated UTF-16 sequence.

Returns
Number of words read (not bytes).

◆ readUCS4() [2/4]

static int Unicode::readUCS4 ( const uint8 *  src,
const uint8 *  end,
ucs4 &  ch 
)
staticnoexcept

Converts the UTF-8 bytes into UCS4 character.

Parameters
srcThe start of the buffer.
endThe end of the buffer.
chThe result character.
Returns
Bytes read. If negative this is the error code.

◆ readUCS4() [3/4]

static int Unicode::readUCS4 ( const uint8 *  src,
ucs4 &  ch 
)
staticnoexcept

Read UCS4 from null terminated UTF-8 sequence.

Returns
Number of bytes read.

◆ readUCS4() [4/4]

static int Unicode::readUCS4 ( const utf16 *  src,
ucs4 &  ch 
)
staticnoexcept

Read UCS4 from null terminated UTF-16 sequence.

Returns
Number of words read (not bytes).

◆ UCS2ToUCS4()

static MemoryDiff Unicode::UCS2ToUCS4 ( ucs4 *  dest,
const ucs2 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Low-level method which converts an UCS-2 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff. UCS-2 is deprecated.

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-2 encoded string.
sizeThe number of characters in the UCS-2 encoded string.
flagsThe encoding flags.
Returns
The number of characters in the UCS-4 encoded string. Negative error code on error.

◆ UCS2ToUTF8()

static MemoryDiff Unicode::UCS2ToUTF8 ( uint8 *  dest,
const ucs2 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Low-level method which converts an UCS-2 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-2 encoded string.
sizeThe number of characters in the UCS-2 encoded string.
flagsThe encoding flags. The default is 0.
Returns
The number of bytes occupied by the UTF-8 encoded string.

◆ UCS4ToUCS2()

static MemoryDiff Unicode::UCS4ToUCS2 ( ucs2 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Low-level method which converts an UCS-4 encoded string to UCS-2 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). UCS-2 is deprecated.

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags.
Returns
The number of characters in the UCS-2 encoded string. Negative error code on error.

◆ UCS4ToUTF16() [1/2]

static MemoryDiff Unicode::UCS4ToUTF16 ( char16_t *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
inlinestaticnoexcept

Convert in-memory (no BOM) UCS-4 to UTF-16.

◆ UCS4ToUTF16() [2/2]

static MemoryDiff Unicode::UCS4ToUTF16 ( utf16 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Convert in-memory (no BOM) UCS-4 to UTF-16.

◆ UCS4ToUTF16BE()

static MemoryDiff Unicode::UCS4ToUTF16BE ( uint8 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = ADD_BOM 
)
staticnoexcept

Low-level method which converts an UCS-4 encoded string to UTF-16BE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF16BE)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags. The default is ADD_BOM.
Returns
The number of bytes occupied by the UTF-16BE encoded string. Negative error code on error.

◆ UCS4ToUTF16LE()

static MemoryDiff Unicode::UCS4ToUTF16LE ( uint8 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = ADD_BOM 
)
staticnoexcept

Low-level method which converts an UCS-4 encoded string to UTF-16LE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF16LE)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags. The default is ADD_BOM.
Returns
The number of bytes occupied by the UTF-16LE encoded string. Negative error code on error.

◆ UCS4ToUTF32BE()

static MemoryDiff Unicode::UCS4ToUTF32BE ( uint8 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = ADD_BOM 
)
staticnoexcept

Low-level method which converts an UCS-4 encoded string to UTF-32BE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF32BE)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags. The default is ADD_BOM.
Returns
The number of bytes occupied by the UTF-32BE encoded string. Negative error code on error.

◆ UCS4ToUTF32LE()

static MemoryDiff Unicode::UCS4ToUTF32LE ( uint8 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = ADD_BOM 
)
staticnoexcept

Low-level method which converts an UCS-4 encoded string to UTF-32LE. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF32LE)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags. The default is ADD_BOM.
Returns
The number of bytes occupied by the UTF-32LE encoded string. Negative error code on error.

◆ UCS4ToUTF8()

static MemoryDiff Unicode::UCS4ToUTF8 ( uint8 *  dest,
const ucs4 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Low-level method which converts an UCS-4 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags. The default is 0.
Returns
The number of bytes occupied by the UTF-8 encoded string.

◆ UCS4ToWChar()

static MemoryDiff Unicode::UCS4ToWChar ( wchar *  dest,
const ucs4 *  src,
MemorySize  size 
)
inlinestaticnoexcept

Converts UCS-4 string to wchar string.

◆ UTF16ToUCS4() [1/3]

static MemoryDiff Unicode::UTF16ToUCS4 ( ucs4 *  dest,
const char16_t *  src,
MemorySize  size,
unsigned int  flags = 0 
)
inlinestaticnoexcept

Convert in-memory (no BOM) UTF-16 to UCS-4.

◆ UTF16ToUCS4() [2/3]

static MemoryDiff Unicode::UTF16ToUCS4 ( ucs4 *  dest,
const uint8 *  src,
MemorySize  size,
unsigned int  flags = EAT_BOM 
)
staticnoexcept

Low-level method which converts an UTF-16 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff.

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF-16 encoded string.
sizeThe number of bytes in the UTF-16 encoded string.
flagsThe encoding flags. The default is EAT_BOM.
Returns
The number of characters in the UCS-4 encoded string.

◆ UTF16ToUCS4() [3/3]

static MemoryDiff Unicode::UTF16ToUCS4 ( ucs4 *  dest,
const utf16 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Convert in-memory (no BOM) UTF-16 to UCS-4.

◆ UTF16ToUTF8() [1/2]

static MemoryDiff Unicode::UTF16ToUTF8 ( uint8 *  dest,
const char16_t *  src,
MemorySize  size,
unsigned int  flags = 0 
)
inlinestaticnoexcept

Low-level method which converts an UTF16 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF16 encoded string.
sizeThe number of characters in the UTF16 encoded string.
flagsThe encoding flags. The default is 0.
Returns
The number of bytes occupied by the UTF-8 encoded string.

◆ UTF16ToUTF8() [2/2]

static MemoryDiff Unicode::UTF16ToUTF8 ( uint8 *  dest,
const utf16 *  src,
MemorySize  size,
unsigned int  flags = 0 
)
staticnoexcept

Low-level method which converts an UTF16 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF16 encoded string.
sizeThe number of characters in the UTF16 encoded string.
flagsThe encoding flags. The default is 0.
Returns
The number of bytes occupied by the UTF-8 encoded string.

◆ UTF32BEToUCS4()

static MemoryDiff Unicode::UTF32BEToUCS4 ( ucs4 *  dest,
const uint8 *  src,
MemorySize  size,
unsigned int  flags = EAT_BOM 
)
static

Low-level method which converts an UTF-32 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). See the technical report available at http://www.unicode.org/unicode/reports/tr19. The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff.

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF-32 encoded string.
sizeThe number of bytes in the UTF-32 encoded string.
flagsThe encoding flags. The default is EAT_BOM.
Returns
The number of characters in the UCS-4 encoded string.

◆ UTF32LEToUCS4()

static MemoryDiff Unicode::UTF32LEToUCS4 ( ucs4 *  dest,
const uint8 *  src,
MemorySize  size,
unsigned int  flags = EAT_BOM 
)
static

Low-level method which converts an UTF-32 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size). See the technical report available at http://www.unicode.org/unicode/reports/tr19. The UCS-4 characters are restricted to values in the range 0x00000000-0x0010ffff.

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF-32 encoded string.
sizeThe number of bytes in the UTF-32 encoded string.
flagsThe encoding flags. The default is EAT_BOM.
Returns
The number of characters in the UCS-4 encoded string.

◆ UTF32ToUTF8()

static MemoryDiff Unicode::UTF32ToUTF8 ( uint8 *  dest,
const char32_t *  src,
MemorySize  size,
unsigned int  flags = 0 
)
inlinestaticnoexcept

Low-level method which converts an UCS-4 encoded string to UTF-8. A null-terminator is NOT appended to the string. The destination buffer must have room for enough bytes (guaranteed to not exceed (size + 1) * getMaximumNumberOfMultibytes(UTF8)).

Parameters
destThe destination buffer (may be nullptr).
srcThe UCS-4 encoded string.
sizeThe number of characters in the UCS-4 encoded string.
flagsThe encoding flags. The default is 0.
Returns
The number of bytes occupied by the UTF-8 encoded string.

◆ UTF8ToUCS4() [1/2]

static MemoryDiff Unicode::UTF8ToUCS4 ( ucs4 *  dest,
const char *  src,
MemorySize  size,
unsigned int  flags = EAT_BOM 
)
inlinestaticnoexcept

Low-level method which converts an UTF-8 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size).

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF-8 encoded string.
sizeThe number of bytes in the UTF-8 encoded string.
flagsThe encoding flags. The default is EAT_BOM.
Returns
The number of characters in the UCS-4 encoded string.

◆ UTF8ToUCS4() [2/2]

static MemoryDiff Unicode::UTF8ToUCS4 ( ucs4 *  dest,
const uint8 *  src,
MemorySize  size,
unsigned int  flags = EAT_BOM 
)
staticnoexcept

Low-level method which converts an UTF-8 encoded string to UCS-4 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size).

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF-8 encoded string.
sizeThe number of bytes in the UTF-8 encoded string.
flagsThe encoding flags. The default is EAT_BOM.
Returns
The number of characters in the UCS-4 encoded string.

◆ UTF8ToUTF16()

static MemoryDiff Unicode::UTF8ToUTF16 ( utf16 *  dest,
const uint8 *  src,
MemorySize  size,
unsigned int  flags = EAT_BOM 
)
staticnoexcept

Low-level method which converts an UTF-8 encoded string to UTF-16 encoding. The destination buffer must have room for enough characters (guaranteed to not exceed size).

Parameters
destThe destination buffer (may be nullptr).
srcThe UTF-8 encoded string.
sizeThe number of bytes in the UTF-8 encoded string.
flagsThe encoding flags. The default is EAT_BOM.
Returns
The number of words (not characters) in the UTF-16 encoded string.

◆ UTF8ToWChar()

static MemoryDiff Unicode::UTF8ToWChar ( wchar *  dest,
const uint8 *  src,
MemorySize  size 
)
inlinestaticnoexcept

Converts UTF-8 string to wchar string.

◆ WCharToUCS4()

static MemoryDiff Unicode::WCharToUCS4 ( ucs4 *  dest,
const wchar *  src,
MemorySize  size 
)
inlinestaticnoexcept

Converts wchar string to UCS-4 string.

◆ WCharToUTF8()

static MemoryDiff Unicode::WCharToUTF8 ( uint8 *  dest,
const wchar *  src,
MemorySize  size 
)
inlinestaticnoexcept

Converts wchar string to UTF-8 string.

◆ writeUTF16()

static MemorySize Unicode::writeUTF16 ( utf16 *  dest,
ucs4  ch 
)
inlinestaticnoexcept

Writes the given code as UTF-16. Destination must ave room for minimum 2 words.

◆ writeUTF8()

static MemorySize Unicode::writeUTF8 ( uint8 *  dest,
ucs4  ch 
)
inlinestaticnoexcept

Converts UCS4 to UTF-8. Returns 0 if invalid UCS4 character. Buffer must have room for minimum 4 bytes for Unicode code.

Member Data Documentation

◆ BAD

constexpr ucs4 Unicode::BAD = 0xffffffff
staticconstexpr

Invalid code.

◆ BOM

constexpr ucs4 Unicode::BOM = 0x0000feff
staticconstexpr

Specifies the byte order mark.

◆ MAX

constexpr ucs4 Unicode::MAX = 0x10ffff
staticconstexpr

Specifies the maximum valid UCS4 code (Unicode).

◆ MAX_ISO

constexpr ucs4 Unicode::MAX_ISO = 0x7ffffff
staticconstexpr

The ISO/IEC 10646 standard.