iPXE
utf8.h
Go to the documentation of this file.
1 #ifndef _IPXE_UTF8_H
2 #define _IPXE_UTF8_H
3 
4 /** @file
5  *
6  * UTF-8 Unicode encoding
7  *
8  */
9 
10 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
11 FILE_SECBOOT ( PERMITTED );
12 
13 #include <stdint.h>
14 
15 /** Maximum length of UTF-8 sequence */
16 #define UTF8_MAX_LEN 4
17 
18 /** Minimum legal value for two-byte UTF-8 sequence */
19 #define UTF8_MIN_TWO 0x80
20 
21 /** Minimum legal value for three-byte UTF-8 sequence */
22 #define UTF8_MIN_THREE 0x800
23 
24 /** Minimum legal value for four-byte UTF-8 sequence */
25 #define UTF8_MIN_FOUR 0x10000
26 
27 /** High bit of UTF-8 bytes */
28 #define UTF8_HIGH_BIT 0x80
29 
30 /** Number of data bits in each continuation byte */
31 #define UTF8_CONTINUATION_BITS 6
32 
33 /** Bit mask for data bits in a continuation byte */
34 #define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )
35 
36 /** Non-data bits in a continuation byte */
37 #define UTF8_CONTINUATION 0x80
38 
39 /** Check for a continuation byte
40  *
41  * @v byte UTF-8 byte
42  * @ret is_continuation Byte is a continuation byte
43  */
44 #define UTF8_IS_CONTINUATION( byte ) \
45  ( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )
46 
47 /** Check for an ASCII byte
48  *
49  * @v byte UTF-8 byte
50  * @ret is_ascii Byte is an ASCII byte
51  */
52 #define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )
53 
54 /** Invalid character returned when decoding fails */
55 #define UTF8_INVALID 0xfffd
56 
57 /** A UTF-8 character accumulator */
59  /** Character in progress */
60  unsigned int character;
61  /** Number of remaining continuation bytes */
62  unsigned int remaining;
63  /** Minimum legal character */
64  unsigned int min;
65 };
66 
67 extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
68  uint8_t byte );
69 
70 #endif /* _IPXE_UTF8_H */
FILE_SECBOOT(PERMITTED)
unsigned int remaining
Number of remaining continuation bytes.
Definition: utf8.h:62
unsigned int min
Minimum legal character.
Definition: utf8.h:64
unsigned char uint8_t
Definition: stdint.h:10
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition: utf8.c:44
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
A UTF-8 character accumulator.
Definition: utf8.h:58
unsigned int character
Character in progress.
Definition: utf8.h:60