44 static unsigned int min[] = {
58 DBGC ( utf8,
"UTF8 %p unexpected %02x\n", utf8,
byte );
72 DBGC ( utf8,
"UTF8 %p illegal %02x\n", utf8,
81 DBGC2 ( utf8,
"UTF8 %p accumulated %02x\n",
92 DBGC ( utf8,
"UTF8 %p unexpected %02x (partial %02x/%02x)\n",
94 ( ( 1 << shift ) - 1 ) );
114 DBGC ( utf8,
"UTF8 %p illegal %02x length %d\n",
#define UTF8_MAX_LEN
Maximum length of UTF-8 sequence.
#define UTF8_IS_CONTINUATION(byte)
Check for a continuation byte.
#define UTF8_CONTINUATION_MASK
Bit mask for data bits in a continuation byte.
unsigned int remaining
Number of remaining continuation bytes.
#define UTF8_INVALID
Invalid character returned when decoding fails.
#define UTF8_MIN_FOUR
Minimum legal value for four-byte UTF-8 sequence.
#define UTF8_CONTINUATION_BITS
Number of data bits in each continuation byte.
unsigned int min
Minimum legal character.
assert((readw(&hdr->flags) &(GTF_reading|GTF_writing))==0)
#define UTF8_HIGH_BIT
High bit of UTF-8 bytes.
#define UTF8_IS_ASCII(byte)
Check for an ASCII byte.
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
#define UTF8_MIN_THREE
Minimum legal value for three-byte UTF-8 sequence.
A UTF-8 character accumulator.
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
#define UTF8_MIN_TWO
Minimum legal value for two-byte UTF-8 sequence.
unsigned int character
Character in progress.