iPXE
utf8.h
Go to the documentation of this file.
1#ifndef _IPXE_UTF8_H
2#define _IPXE_UTF8_H
3
4/** @file
5 *
6 * UTF-8 Unicode encoding
7 *
8 */
9
10FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
11FILE_SECBOOT ( PERMITTED );
12
13#include <stdint.h>
14
15/** Maximum length of UTF-8 sequence */
16#define UTF8_MAX_LEN 4
17
18/** Minimum legal value for two-byte UTF-8 sequence */
19#define UTF8_MIN_TWO 0x80
20
21/** Minimum legal value for three-byte UTF-8 sequence */
22#define UTF8_MIN_THREE 0x800
23
24/** Minimum legal value for four-byte UTF-8 sequence */
25#define UTF8_MIN_FOUR 0x10000
26
27/** High bit of UTF-8 bytes */
28#define UTF8_HIGH_BIT 0x80
29
30/** Number of data bits in each continuation byte */
31#define UTF8_CONTINUATION_BITS 6
32
33/** Bit mask for data bits in a continuation byte */
34#define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )
35
36/** Non-data bits in a continuation byte */
37#define UTF8_CONTINUATION 0x80
38
39/** Check for a continuation byte
40 *
41 * @v byte UTF-8 byte
42 * @ret is_continuation Byte is a continuation byte
43 */
44#define UTF8_IS_CONTINUATION( byte ) \
45 ( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )
46
47/** Check for an ASCII byte
48 *
49 * @v byte UTF-8 byte
50 * @ret is_ascii Byte is an ASCII byte
51 */
52#define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )
53
54/** Invalid character returned when decoding fails */
55#define UTF8_INVALID 0xfffd
56
57/** A UTF-8 character accumulator */
59 /** Character in progress */
60 unsigned int character;
61 /** Number of remaining continuation bytes */
62 unsigned int remaining;
63 /** Minimum legal character */
64 unsigned int min;
65};
66
67extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
68 uint8_t byte );
69
70#endif /* _IPXE_UTF8_H */
unsigned char uint8_t
Definition stdint.h:10
#define FILE_LICENCE(_licence)
Declare a particular licence as applying to a file.
Definition compiler.h:896
#define FILE_SECBOOT(_status)
Declare a file's UEFI Secure Boot permission status.
Definition compiler.h:926
A UTF-8 character accumulator.
Definition utf8.h:58
unsigned int remaining
Number of remaining continuation bytes.
Definition utf8.h:62
unsigned int min
Minimum legal character.
Definition utf8.h:64
unsigned int character
Character in progress.
Definition utf8.h:60
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition utf8.c:44