iPXE
utf8_test.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 /** @file
27  *
28  * UTF-8 Unicode encoding tests
29  *
30  */
31 
32 /* Forcibly enable assertions */
33 #undef NDEBUG
34 
35 #include <string.h>
36 #include <ipxe/utf8.h>
37 #include <ipxe/test.h>
38 
39 /** A UTF-8 accumulation test */
41  /** UTF-8 byte string */
42  const char *bytes;
43  /** Expected character sequence */
44  const unsigned int *expected;
45  /** Length */
46  size_t len;
47 };
48 
49 /** Define inline data */
50 #define DATA(...) { __VA_ARGS__ }
51 
52 /** Define a UTF-8 accumulation test */
53 #define UTF8_ACCUMULATE( name, BYTES, EXPECTED ) \
54  static const char name ## _bytes[] = BYTES; \
55  static const unsigned int name ## _expected[] = EXPECTED; \
56  static struct utf8_accumulate_test name = { \
57  .bytes = name ## _bytes, \
58  .expected = name ## _expected, \
59  .len = ( sizeof ( name ## _expected ) / \
60  sizeof ( name ## _expected[0] ) ), \
61  };
62 
63 /** Basic ASCII test */
64 UTF8_ACCUMULATE ( ascii, "Hello world!",
65  DATA ( 'H', 'e', 'l', 'l', 'o', ' ',
66  'w', 'o', 'r', 'l', 'd', '!' ) );
67 
68 /** Multi-byte character test */
69 UTF8_ACCUMULATE ( multibyte, "Héllô wörld 🥳",
70  DATA ( 'H', 0, L'é', 'l', 'l', 0, L'ô', ' ',
71  'w', 0, L'ö', 'r', 'l', 'd', ' ',
72  0, 0, 0, 0x1f973 ) );
73 
74 /** Stray continuation byte test */
75 UTF8_ACCUMULATE ( stray_continuation,
76  DATA ( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c' ),
77  DATA ( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c' ) );
78 
79 /** Missing continuation byte test */
80 UTF8_ACCUMULATE ( missing_continuation,
81  DATA ( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c' ),
82  DATA ( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c' ) );
83 
84 /** Illegal two-byte sequence test */
85 UTF8_ACCUMULATE ( illegal_two,
86  DATA ( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80,
87  'd' ),
88  DATA ( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd' ) );
89 
90 /** Illegal three-byte sequence test */
91 UTF8_ACCUMULATE ( illegal_three,
92  DATA ( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c',
93  0xe0, 0x80, 0x80, 'd' ),
94  DATA ( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c',
95  0, 0, 0xfffd, 'd' ) );
96 
97 /** Illegal four-byte sequence test */
98 UTF8_ACCUMULATE ( illegal_four,
99  DATA ( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf,
100  0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd' ),
101  DATA ( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c',
102  0, 0, 0, 0xfffd, 'd' ) );
103 
104 /** Illegal overlength sequence test */
105 UTF8_ACCUMULATE ( illegal_length,
106  DATA ( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf,
107  0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf,
108  0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf,
109  0xbf, 0xbf, 0xbf, 'e' ),
110  DATA ( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b',
111  0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c',
112  0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
113  0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
114  0xfffd, 0xfffd, 0xfffd, 'e' ) );
115 
116 /**
117  * Report UTF-8 accumulation test result
118  *
119  * @v test UTF-8 accumulation test
120  * @v file Test code file
121  * @v line Test code line
122  */
124  const char *file, unsigned int line ) {
125  struct utf8_accumulator utf8;
126  unsigned int character;
127  unsigned int i;
128 
129  /* Initialise accumulator */
130  memset ( &utf8, 0, sizeof ( utf8 ) );
131 
132  /* Test each byte in turn */
133  for ( i = 0 ; i < test->len ; i++ ) {
134  character = utf8_accumulate ( &utf8, test->bytes[i] );
135  DBGC ( test, "UTF8 byte %02x character %02x\n",
136  test->bytes[i], character );
137  okx ( character == test->expected[i], file, line );
138  }
139 }
140 #define utf8_accumulate_ok( test ) \
141  utf8_accumulate_okx ( test, __FILE__, __LINE__ )
142 
143 /**
144  * Perform UTF-8 self-test
145  *
146  */
147 static void utf8_test_exec ( void ) {
148 
149  /* Accumulation tests */
150  utf8_accumulate_ok ( &ascii );
151  utf8_accumulate_ok ( &multibyte );
152  utf8_accumulate_ok ( &stray_continuation );
153  utf8_accumulate_ok ( &missing_continuation );
154  utf8_accumulate_ok ( &illegal_two );
155  utf8_accumulate_ok ( &illegal_three );
156  utf8_accumulate_ok ( &illegal_four );
157  utf8_accumulate_ok ( &illegal_length );
158 }
159 
160 /** UTF-8 self-test */
161 struct self_test utf8_test __self_test = {
162  .name = "utf8",
163  .exec = utf8_test_exec,
164 };
#define utf8_accumulate_ok(test)
Definition: utf8_test.c:140
#define DBGC(...)
Definition: compiler.h:505
Self-test infrastructure.
const char * name
Test set name.
Definition: test.h:17
A UTF-8 accumulation test.
Definition: utf8_test.c:40
#define UTF8_ACCUMULATE(name, BYTES, EXPECTED)
Define a UTF-8 accumulation test.
Definition: utf8_test.c:53
A self-test set.
Definition: test.h:15
UTF-8 Unicode encoding.
const unsigned int * expected
Expected character sequence.
Definition: utf8_test.c:44
#define okx(success, file, line)
Report test result.
Definition: test.h:44
#define DATA(...)
Define inline data.
Definition: utf8_test.c:50
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
size_t len
Length.
Definition: utf8_test.c:46
const char * bytes
UTF-8 byte string.
Definition: utf8_test.c:42
A UTF-8 character accumulator.
Definition: utf8.h:57
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition: utf8.c:43
unsigned int character
Character in progress.
Definition: utf8.h:59
static void utf8_test_exec(void)
Perform UTF-8 self-test.
Definition: utf8_test.c:147
struct self_test utf8_test __self_test
UTF-8 self-test.
Definition: utf8_test.c:161
String functions.
static int test
Definition: epic100.c:73
void * memset(void *dest, int character, size_t len) __nonnull
static void utf8_accumulate_okx(struct utf8_accumulate_test *test, const char *file, unsigned int line)
Report UTF-8 accumulation test result.
Definition: utf8_test.c:123