iPXE
utf8_test.c File Reference

UTF-8 Unicode encoding tests. More...

#include <string.h>
#include <ipxe/utf8.h>
#include <ipxe/test.h>

Go to the source code of this file.

Data Structures

struct  utf8_accumulate_test
 A UTF-8 accumulation test. More...

Macros

#define DATA(...)
 Define inline data.
#define UTF8_ACCUMULATE(name, BYTES, EXPECTED)
 Define a UTF-8 accumulation test.
#define utf8_accumulate_ok(test)

Functions

 FILE_LICENCE (GPL2_OR_LATER_OR_UBDL)
 UTF8_ACCUMULATE (ascii, "Hello world!", DATA( 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'))
 Basic ASCII test.
 UTF8_ACCUMULATE (multibyte, "Héllô wörld 🥳", DATA( 'H', 0, L 'é', 'l', 'l', 0, L 'ô', ' ', 'w', 0, L 'ö', 'r', 'l', 'd', ' ', 0, 0, 0, 0x1f973))
 Multi-byte character test.
 UTF8_ACCUMULATE (stray_continuation, DATA( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c'), DATA( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c'))
 Stray continuation byte test.
 UTF8_ACCUMULATE (missing_continuation, DATA( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c'), DATA( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c'))
 Missing continuation byte test.
 UTF8_ACCUMULATE (illegal_two, DATA( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80, 'd'), DATA( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd'))
 Illegal two-byte sequence test.
 UTF8_ACCUMULATE (illegal_three, DATA( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c', 0xe0, 0x80, 0x80, 'd'), DATA( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c', 0, 0, 0xfffd, 'd'))
 Illegal three-byte sequence test.
 UTF8_ACCUMULATE (illegal_four, DATA( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf, 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd'), DATA( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c', 0, 0, 0, 0xfffd, 'd'))
 Illegal four-byte sequence test.
 UTF8_ACCUMULATE (illegal_length, DATA( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'e'), DATA( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'e'))
 Illegal overlength sequence test.
static void utf8_accumulate_okx (struct utf8_accumulate_test *test, const char *file, unsigned int line)
 Report UTF-8 accumulation test result.
static void utf8_test_exec (void)
 Perform UTF-8 self-test.

Variables

struct self_test utf8_test __self_test
 UTF-8 self-test.

Detailed Description

UTF-8 Unicode encoding tests.

Definition in file utf8_test.c.

Macro Definition Documentation

◆ DATA

#define DATA ( ...)
Value:
{ __VA_ARGS__ }

Define inline data.

Definition at line 50 of file utf8_test.c.

◆ UTF8_ACCUMULATE

#define UTF8_ACCUMULATE ( name,
BYTES,
EXPECTED )
Value:
static const char name ## _bytes[] = BYTES; \
static const unsigned int name ## _expected[] = EXPECTED; \
static struct utf8_accumulate_test name = { \
.bytes = name ## _bytes, \
.expected = name ## _expected, \
.len = ( sizeof ( name ## _expected ) / \
sizeof ( name ## _expected[0] ) ), \
};
const char * name
Definition ath9k_hw.c:1986
#define EXPECTED(...)
Define inline expected result point.
A UTF-8 accumulation test.
Definition utf8_test.c:40

Define a UTF-8 accumulation test.

Definition at line 53 of file utf8_test.c.

53#define UTF8_ACCUMULATE( name, BYTES, EXPECTED ) \
54 static const char name ## _bytes[] = BYTES; \
55 static const unsigned int name ## _expected[] = EXPECTED; \
56 static struct utf8_accumulate_test name = { \
57 .bytes = name ## _bytes, \
58 .expected = name ## _expected, \
59 .len = ( sizeof ( name ## _expected ) / \
60 sizeof ( name ## _expected[0] ) ), \
61 };

◆ utf8_accumulate_ok

#define utf8_accumulate_ok ( test)
Value:
utf8_accumulate_okx ( test, __FILE__, __LINE__ )
static int test
Definition epic100.c:73
static void utf8_accumulate_okx(struct utf8_accumulate_test *test, const char *file, unsigned int line)
Report UTF-8 accumulation test result.
Definition utf8_test.c:123

Definition at line 140 of file utf8_test.c.

140#define utf8_accumulate_ok( test ) \
141 utf8_accumulate_okx ( test, __FILE__, __LINE__ )

Referenced by utf8_test_exec().

Function Documentation

◆ FILE_LICENCE()

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL )

◆ UTF8_ACCUMULATE() [1/8]

UTF8_ACCUMULATE ( ascii ,
"Hello world!" ,
DATA('H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!')  )

Basic ASCII test.

References DATA.

◆ UTF8_ACCUMULATE() [2/8]

UTF8_ACCUMULATE ( multibyte ,
"Héllô wörld 🥳" ,
DATA('H', 0, L 'é', 'l', 'l', 0, L 'ô', ' ', 'w', 0, L 'ö', 'r', 'l', 'd', ' ', 0, 0, 0, 0x1f973)  )

Multi-byte character test.

References DATA.

◆ UTF8_ACCUMULATE() [3/8]

UTF8_ACCUMULATE ( stray_continuation ,
DATA('a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c') ,
DATA('a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c')  )

Stray continuation byte test.

References DATA.

◆ UTF8_ACCUMULATE() [4/8]

UTF8_ACCUMULATE ( missing_continuation ,
DATA('a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c') ,
DATA('a', 0, 'b', 0, 0, 0, 0xc9, 'c')  )

Missing continuation byte test.

References DATA.

◆ UTF8_ACCUMULATE() [5/8]

UTF8_ACCUMULATE ( illegal_two ,
DATA('a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80, 'd') ,
DATA('a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd')  )

Illegal two-byte sequence test.

References DATA.

◆ UTF8_ACCUMULATE() [6/8]

UTF8_ACCUMULATE ( illegal_three ,
DATA('a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c', 0xe0, 0x80, 0x80, 'd') ,
DATA('a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c', 0, 0, 0xfffd, 'd')  )

Illegal three-byte sequence test.

References DATA.

◆ UTF8_ACCUMULATE() [7/8]

UTF8_ACCUMULATE ( illegal_four ,
DATA('a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf, 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd') ,
DATA('a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c', 0, 0, 0, 0xfffd, 'd')  )

Illegal four-byte sequence test.

References DATA.

◆ UTF8_ACCUMULATE() [8/8]

UTF8_ACCUMULATE ( illegal_length ,
DATA('a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'e') ,
DATA('a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'e')  )

Illegal overlength sequence test.

References DATA.

◆ utf8_accumulate_okx()

void utf8_accumulate_okx ( struct utf8_accumulate_test * test,
const char * file,
unsigned int line )
static

Report UTF-8 accumulation test result.

Parameters
testUTF-8 accumulation test
fileTest code file
lineTest code line

Definition at line 123 of file utf8_test.c.

124 {
125 struct utf8_accumulator utf8;
126 unsigned int character;
127 unsigned int i;
128
129 /* Initialise accumulator */
130 memset ( &utf8, 0, sizeof ( utf8 ) );
131
132 /* Test each byte in turn */
133 for ( i = 0 ; i < test->len ; i++ ) {
134 character = utf8_accumulate ( &utf8, test->bytes[i] );
135 DBGC ( test, "UTF8 byte %02x character %02x\n",
136 test->bytes[i], character );
137 okx ( character == test->expected[i], file, line );
138 }
139}
#define DBGC(...)
Definition compiler.h:505
void * memset(void *dest, int character, size_t len) __nonnull
A UTF-8 character accumulator.
Definition utf8.h:58
unsigned int character
Character in progress.
Definition utf8.h:60
#define okx(success, file, line)
Report test result.
Definition test.h:44
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition utf8.c:44

References utf8_accumulator::character, DBGC, memset(), okx, test, and utf8_accumulate().

◆ utf8_test_exec()

void utf8_test_exec ( void )
static

Perform UTF-8 self-test.

Definition at line 147 of file utf8_test.c.

147 {
148
149 /* Accumulation tests */
150 utf8_accumulate_ok ( &ascii );
151 utf8_accumulate_ok ( &multibyte );
152 utf8_accumulate_ok ( &stray_continuation );
153 utf8_accumulate_ok ( &missing_continuation );
154 utf8_accumulate_ok ( &illegal_two );
155 utf8_accumulate_ok ( &illegal_three );
156 utf8_accumulate_ok ( &illegal_four );
157 utf8_accumulate_ok ( &illegal_length );
158}
#define utf8_accumulate_ok(test)
Definition utf8_test.c:140

References utf8_accumulate_ok.

Variable Documentation

◆ __self_test

struct self_test utf8_test __self_test
Initial value:
= {
.name = "utf8",
.exec = utf8_test_exec,
}
static void utf8_test_exec(void)
Perform UTF-8 self-test.
Definition utf8_test.c:147

UTF-8 self-test.

Definition at line 161 of file utf8_test.c.

161 {
162 .name = "utf8",
163 .exec = utf8_test_exec,
164};