37#define UTF8_1BYTE_CODEMAX 0x7F
38#define UTF8_2BYTE_CODEMAX 0x7FF
39#define UTF8_3BYTE_CODEMAX 0xFFFF
40#define UTF8_4BYTE_CODEMAX 0x10FFFF
42#define UTF8_1BYTE_MASK 0x80
43#define UTF8_2BYTE_MASK 0xE0
44#define UTF8_3BYTE_MASK 0xF0
45#define UTF8_4BYTE_MASK 0xF8
46#define UTF8_EXTBYTE_MASK 0xC0
48#define UTF8_1BYTE_PREFIX 0x00
49#define UTF8_2BYTE_PREFIX 0xC0
50#define UTF8_3BYTE_PREFIX 0xE0
51#define UTF8_4BYTE_PREFIX 0xF0
52#define UTF8_EXTBYTE_PREFIX 0x80
54#define UTF8_IS_1BYTE(ptr) \
55 ((ptr[0] & UTF8_1BYTE_MASK) == UTF8_1BYTE_PREFIX)
57#define UTF8_IS_2BYTE(ptr) \
58 (((ptr[0] & UTF8_2BYTE_MASK) == UTF8_2BYTE_PREFIX) && \
59 ((ptr[1] & UTF8_EXTBYTE_MASK) == UTF8_EXTBYTE_PREFIX))
61#define UTF8_IS_3BYTE(ptr) \
62 (((ptr[0] & UTF8_3BYTE_MASK) == UTF8_3BYTE_PREFIX) && \
63 (((ptr[1] | ptr[2]) & UTF8_EXTBYTE_MASK) == UTF8_EXTBYTE_PREFIX))
65#define UTF8_IS_4BYTE(ptr) \
66 (((ptr[0] & UTF8_4BYTE_MASK) == UTF8_4BYTE_PREFIX) && \
67 (((ptr[1] | ptr[2] | ptr[3]) & UTF8_EXTBYTE_MASK) == UTF8_EXTBYTE_PREFIX))
69static inline unsigned utf8CharToUnicode(
const char *ptr,
unsigned len)
75 code = (unsigned)(ptr[0] & (
char)~UTF8_1BYTE_MASK);
79 code = (((unsigned)(ptr[0] & (
char)~UTF8_2BYTE_MASK) << 6) |
84 code = (((unsigned)(ptr[0] & (
char)~UTF8_3BYTE_MASK) << 12) |
85 ((unsigned)(ptr[1] & (
char)~UTF8_EXTBYTE_MASK) << 6) |
90 code = (((unsigned)(ptr[0] & (
char)~UTF8_4BYTE_MASK) << 18) |
91 ((unsigned)(ptr[1] & (
char)~UTF8_EXTBYTE_MASK) << 12) |
92 ((unsigned)(ptr[2] & (
char)~UTF8_EXTBYTE_MASK) << 6) |
100static inline unsigned utf8CodeWidth(
unsigned code)
115static inline void unicodeToUtf8Char(
unsigned code,
unsigned char *ptr,
118 unsigned codeWidth = utf8CodeWidth(code);
120 if ((codeWidth == 1) && (len >= codeWidth))
124 else if ((codeWidth == 2) && (len >= codeWidth))
129 else if ((codeWidth == 3) && (len >= codeWidth))
135 else if ((codeWidth == 4) && (len >= codeWidth))
148 static int state_table[32] = {
149 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150 1, 1, 1, 1, 1, 1, 1, 1,
157 static int mask_bytes[32] = {
158 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
159 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F,
160 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
161 0x1F, 0x1F, 0x1F, 0x1F,
167 static int next[5] = {
176 *codep =
byte & mask_bytes[
byte >> 3];
177 *state = state_table[
byte >> 3];
179 else if (*state > 0) {
180 *codep = (
byte & 0x3F) | (*codep << 6);
181 *state = next[*state];
unsigned int uint32_t
Definition acefiex.h:163
#define UTF8_IS_4BYTE(ptr)
Definition utf.h:65
#define UTF8_3BYTE_PREFIX
Definition utf.h:50
#define UTF8_2BYTE_PREFIX
Definition utf.h:49
#define UTF8_EXTBYTE_MASK
Definition utf.h:46
#define UTF8_4BYTE_PREFIX
Definition utf.h:51
#define UTF8_3BYTE_CODEMAX
Definition utf.h:39
#define UTF8_IS_2BYTE(ptr)
Definition utf.h:57
#define UTF8_1BYTE_CODEMAX
Definition utf.h:37
#define UTF8_ACCEPT
Definition utf.h:144
#define UTF8_IS_3BYTE(ptr)
Definition utf.h:61
#define UTF8_IS_1BYTE(ptr)
Definition utf.h:54
#define UTF8_4BYTE_CODEMAX
Definition utf.h:40
#define UTF8_EXTBYTE_PREFIX
Definition utf.h:52
#define UTF8_2BYTE_CODEMAX
Definition utf.h:38