iPXE
utf8_test.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26/** @file
27 *
28 * UTF-8 Unicode encoding tests
29 *
30 */
31
32/* Forcibly enable assertions */
33#undef NDEBUG
34
35#include <string.h>
36#include <ipxe/utf8.h>
37#include <ipxe/test.h>
38
39/** A UTF-8 accumulation test */
41 /** UTF-8 byte string */
42 const char *bytes;
43 /** Expected character sequence */
44 const unsigned int *expected;
45 /** Length */
46 size_t len;
47};
48
49/** Define inline data */
50#define DATA(...) { __VA_ARGS__ }
51
52/** Define a UTF-8 accumulation test */
53#define UTF8_ACCUMULATE( name, BYTES, EXPECTED ) \
54 static const char name ## _bytes[] = BYTES; \
55 static const unsigned int name ## _expected[] = EXPECTED; \
56 static struct utf8_accumulate_test name = { \
57 .bytes = name ## _bytes, \
58 .expected = name ## _expected, \
59 .len = ( sizeof ( name ## _expected ) / \
60 sizeof ( name ## _expected[0] ) ), \
61 };
62
63/** Basic ASCII test */
64UTF8_ACCUMULATE ( ascii, "Hello world!",
65 DATA ( 'H', 'e', 'l', 'l', 'o', ' ',
66 'w', 'o', 'r', 'l', 'd', '!' ) );
67
68/** Multi-byte character test */
69UTF8_ACCUMULATE ( multibyte, "Héllô wörld 🥳",
70 DATA ( 'H', 0, L'é', 'l', 'l', 0, L'ô', ' ',
71 'w', 0, L'ö', 'r', 'l', 'd', ' ',
72 0, 0, 0, 0x1f973 ) );
73
74/** Stray continuation byte test */
75UTF8_ACCUMULATE ( stray_continuation,
76 DATA ( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c' ),
77 DATA ( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c' ) );
78
79/** Missing continuation byte test */
80UTF8_ACCUMULATE ( missing_continuation,
81 DATA ( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c' ),
82 DATA ( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c' ) );
83
84/** Illegal two-byte sequence test */
85UTF8_ACCUMULATE ( illegal_two,
86 DATA ( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80,
87 'd' ),
88 DATA ( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd' ) );
89
90/** Illegal three-byte sequence test */
91UTF8_ACCUMULATE ( illegal_three,
92 DATA ( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c',
93 0xe0, 0x80, 0x80, 'd' ),
94 DATA ( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c',
95 0, 0, 0xfffd, 'd' ) );
96
97/** Illegal four-byte sequence test */
98UTF8_ACCUMULATE ( illegal_four,
99 DATA ( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf,
100 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd' ),
101 DATA ( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c',
102 0, 0, 0, 0xfffd, 'd' ) );
103
104/** Illegal overlength sequence test */
105UTF8_ACCUMULATE ( illegal_length,
106 DATA ( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf,
107 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf,
108 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf,
109 0xbf, 0xbf, 0xbf, 'e' ),
110 DATA ( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b',
111 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c',
112 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
113 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
114 0xfffd, 0xfffd, 0xfffd, 'e' ) );
115
116/**
117 * Report UTF-8 accumulation test result
118 *
119 * @v test UTF-8 accumulation test
120 * @v file Test code file
121 * @v line Test code line
122 */
124 const char *file, unsigned int line ) {
125 struct utf8_accumulator utf8;
126 unsigned int character;
127 unsigned int i;
128
129 /* Initialise accumulator */
130 memset ( &utf8, 0, sizeof ( utf8 ) );
131
132 /* Test each byte in turn */
133 for ( i = 0 ; i < test->len ; i++ ) {
134 character = utf8_accumulate ( &utf8, test->bytes[i] );
135 DBGC ( test, "UTF8 byte %02x character %02x\n",
136 test->bytes[i], character );
137 okx ( character == test->expected[i], file, line );
138 }
139}
140#define utf8_accumulate_ok( test ) \
141 utf8_accumulate_okx ( test, __FILE__, __LINE__ )
142
143/**
144 * Perform UTF-8 self-test
145 *
146 */
147static void utf8_test_exec ( void ) {
148
149 /* Accumulation tests */
150 utf8_accumulate_ok ( &ascii );
151 utf8_accumulate_ok ( &multibyte );
152 utf8_accumulate_ok ( &stray_continuation );
153 utf8_accumulate_ok ( &missing_continuation );
154 utf8_accumulate_ok ( &illegal_two );
155 utf8_accumulate_ok ( &illegal_three );
156 utf8_accumulate_ok ( &illegal_four );
157 utf8_accumulate_ok ( &illegal_length );
158}
159
160/** UTF-8 self-test */
161struct self_test utf8_test __self_test = {
162 .name = "utf8",
163 .exec = utf8_test_exec,
164};
#define DATA(...)
Define inline data.
Definition acpi_test.c:74
static int test
Definition epic100.c:73
#define DBGC(...)
Definition compiler.h:505
#define FILE_LICENCE(_licence)
Declare a particular licence as applying to a file.
Definition compiler.h:896
String functions.
void * memset(void *dest, int character, size_t len) __nonnull
A self-test set.
Definition test.h:15
A UTF-8 accumulation test.
Definition utf8_test.c:40
const unsigned int * expected
Expected character sequence.
Definition utf8_test.c:44
size_t len
Length.
Definition utf8_test.c:46
const char * bytes
UTF-8 byte string.
Definition utf8_test.c:42
A UTF-8 character accumulator.
Definition utf8.h:58
unsigned int character
Character in progress.
Definition utf8.h:60
Self-test infrastructure.
#define okx(success, file, line)
Report test result.
Definition test.h:44
#define __self_test
Declare a self-test.
Definition test.h:32
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition utf8.c:44
UTF-8 Unicode encoding.
static void utf8_test_exec(void)
Perform UTF-8 self-test.
Definition utf8_test.c:147
#define utf8_accumulate_ok(test)
Definition utf8_test.c:140
#define UTF8_ACCUMULATE(name, BYTES, EXPECTED)
Define a UTF-8 accumulation test.
Definition utf8_test.c:53
static void utf8_accumulate_okx(struct utf8_accumulate_test *test, const char *file, unsigned int line)
Report UTF-8 accumulation test result.
Definition utf8_test.c:123