iPXE
utf8.h
Go to the documentation of this file.
1 #ifndef _IPXE_UTF8_H
2 #define _IPXE_UTF8_H
3 
4 /** @file
5  *
6  * UTF-8 Unicode encoding
7  *
8  */
9 
10 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
11 
12 #include <stdint.h>
13 
14 /** Maximum length of UTF-8 sequence */
15 #define UTF8_MAX_LEN 4
16 
17 /** Minimum legal value for two-byte UTF-8 sequence */
18 #define UTF8_MIN_TWO 0x80
19 
20 /** Minimum legal value for three-byte UTF-8 sequence */
21 #define UTF8_MIN_THREE 0x800
22 
23 /** Minimum legal value for four-byte UTF-8 sequence */
24 #define UTF8_MIN_FOUR 0x10000
25 
26 /** High bit of UTF-8 bytes */
27 #define UTF8_HIGH_BIT 0x80
28 
29 /** Number of data bits in each continuation byte */
30 #define UTF8_CONTINUATION_BITS 6
31 
32 /** Bit mask for data bits in a continuation byte */
33 #define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )
34 
35 /** Non-data bits in a continuation byte */
36 #define UTF8_CONTINUATION 0x80
37 
38 /** Check for a continuation byte
39  *
40  * @v byte UTF-8 byte
41  * @ret is_continuation Byte is a continuation byte
42  */
43 #define UTF8_IS_CONTINUATION( byte ) \
44  ( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )
45 
46 /** Check for an ASCII byte
47  *
48  * @v byte UTF-8 byte
49  * @ret is_ascii Byte is an ASCII byte
50  */
51 #define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )
52 
53 /** Invalid character returned when decoding fails */
54 #define UTF8_INVALID 0xfffd
55 
56 /** A UTF-8 character accumulator */
58  /** Character in progress */
59  unsigned int character;
60  /** Number of remaining continuation bytes */
61  unsigned int remaining;
62  /** Minimum legal character */
63  unsigned int min;
64 };
65 
66 extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
67  uint8_t byte );
68 
69 #endif /* _IPXE_UTF8_H */
unsigned int remaining
Number of remaining continuation bytes.
Definition: utf8.h:61
unsigned int min
Minimum legal character.
Definition: utf8.h:63
unsigned char uint8_t
Definition: stdint.h:10
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition: utf8.c:43
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
A UTF-8 character accumulator.
Definition: utf8.h:57
unsigned int character
Character in progress.
Definition: utf8.h:59