iPXE
Data Structures | Macros | Functions | Variables
utf8_test.c File Reference

UTF-8 Unicode encoding tests. More...

#include <string.h>
#include <ipxe/utf8.h>
#include <ipxe/test.h>

Go to the source code of this file.

Data Structures

struct  utf8_accumulate_test
 A UTF-8 accumulation test. More...
 

Macros

#define DATA(...)   { __VA_ARGS__ }
 Define inline data. More...
 
#define UTF8_ACCUMULATE(name, BYTES, EXPECTED)
 Define a UTF-8 accumulation test. More...
 
#define utf8_accumulate_ok(test)   utf8_accumulate_okx ( test, __FILE__, __LINE__ )
 

Functions

 FILE_LICENCE (GPL2_OR_LATER_OR_UBDL)
 
 UTF8_ACCUMULATE (ascii, "Hello world!", DATA( 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'))
 Basic ASCII test. More...
 
 UTF8_ACCUMULATE (multibyte, "Héllô wörld 🥳", DATA( 'H', 0, L 'é', 'l', 'l', 0, L 'ô', ' ', 'w', 0, L 'ö', 'r', 'l', 'd', ' ', 0, 0, 0, 0x1f973))
 Multi-byte character test. More...
 
 UTF8_ACCUMULATE (stray_continuation, DATA( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c'), DATA( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c'))
 Stray continuation byte test. More...
 
 UTF8_ACCUMULATE (missing_continuation, DATA( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c'), DATA( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c'))
 Missing continuation byte test. More...
 
 UTF8_ACCUMULATE (illegal_two, DATA( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80, 'd'), DATA( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd'))
 Illegal two-byte sequence test. More...
 
 UTF8_ACCUMULATE (illegal_three, DATA( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c', 0xe0, 0x80, 0x80, 'd'), DATA( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c', 0, 0, 0xfffd, 'd'))
 Illegal three-byte sequence test. More...
 
 UTF8_ACCUMULATE (illegal_four, DATA( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf, 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd'), DATA( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c', 0, 0, 0, 0xfffd, 'd'))
 Illegal four-byte sequence test. More...
 
 UTF8_ACCUMULATE (illegal_length, DATA( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'e'), DATA( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'e'))
 Illegal overlength sequence test. More...
 
static void utf8_accumulate_okx (struct utf8_accumulate_test *test, const char *file, unsigned int line)
 Report UTF-8 accumulation test result. More...
 
static void utf8_test_exec (void)
 Perform UTF-8 self-test. More...
 

Variables

struct self_test utf8_test __self_test
 UTF-8 self-test. More...
 

Detailed Description

UTF-8 Unicode encoding tests.

Definition in file utf8_test.c.

Macro Definition Documentation

◆ DATA

#define DATA (   ...)    { __VA_ARGS__ }

Define inline data.

Definition at line 50 of file utf8_test.c.

◆ UTF8_ACCUMULATE

#define UTF8_ACCUMULATE (   name,
  BYTES,
  EXPECTED 
)
Value:
static const char name ## _bytes[] = BYTES; \
static const unsigned int name ## _expected[] = EXPECTED; \
static struct utf8_accumulate_test name = { \
.bytes = name ## _bytes, \
.expected = name ## _expected, \
.len = ( sizeof ( name ## _expected ) / \
sizeof ( name ## _expected[0] ) ), \
};
#define EXPECTED(...)
Define inline expected HMAC.
Definition: hmac_test.c:49
const char * name
Definition: ath9k_hw.c:1984
A UTF-8 accumulation test.
Definition: utf8_test.c:40

Define a UTF-8 accumulation test.

Definition at line 53 of file utf8_test.c.

◆ utf8_accumulate_ok

#define utf8_accumulate_ok (   test)    utf8_accumulate_okx ( test, __FILE__, __LINE__ )

Definition at line 140 of file utf8_test.c.

Function Documentation

◆ FILE_LICENCE()

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL  )

◆ UTF8_ACCUMULATE() [1/8]

UTF8_ACCUMULATE ( ascii  ,
"Hello world!"  ,
DATA('H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!')   
)

Basic ASCII test.

◆ UTF8_ACCUMULATE() [2/8]

UTF8_ACCUMULATE ( multibyte  ,
"Héllô wörld 🥳"  ,
DATA('H', 0, L 'é', 'l', 'l', 0, L 'ô', ' ', 'w', 0, L 'ö', 'r', 'l', 'd', ' ', 0, 0, 0, 0x1f973)   
)

Multi-byte character test.

◆ UTF8_ACCUMULATE() [3/8]

UTF8_ACCUMULATE ( stray_continuation  ,
DATA('a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c')  ,
DATA('a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c')   
)

Stray continuation byte test.

◆ UTF8_ACCUMULATE() [4/8]

UTF8_ACCUMULATE ( missing_continuation  ,
DATA('a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c')  ,
DATA('a', 0, 'b', 0, 0, 0, 0xc9, 'c')   
)

Missing continuation byte test.

◆ UTF8_ACCUMULATE() [5/8]

UTF8_ACCUMULATE ( illegal_two  ,
DATA('a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80, 'd')  ,
DATA('a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd')   
)

Illegal two-byte sequence test.

◆ UTF8_ACCUMULATE() [6/8]

UTF8_ACCUMULATE ( illegal_three  ,
DATA('a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c', 0xe0, 0x80, 0x80, 'd')  ,
DATA('a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c', 0, 0, 0xfffd, 'd')   
)

Illegal three-byte sequence test.

◆ UTF8_ACCUMULATE() [7/8]

UTF8_ACCUMULATE ( illegal_four  ,
DATA('a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf, 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd')  ,
DATA('a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c', 0, 0, 0, 0xfffd, 'd')   
)

Illegal four-byte sequence test.

◆ UTF8_ACCUMULATE() [8/8]

UTF8_ACCUMULATE ( illegal_length  ,
DATA('a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 'e')  ,
DATA('a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'e')   
)

Illegal overlength sequence test.

◆ utf8_accumulate_okx()

static void utf8_accumulate_okx ( struct utf8_accumulate_test test,
const char *  file,
unsigned int  line 
)
static

Report UTF-8 accumulation test result.

Parameters
testUTF-8 accumulation test
fileTest code file
lineTest code line

Definition at line 123 of file utf8_test.c.

124  {
125  struct utf8_accumulator utf8;
126  unsigned int character;
127  unsigned int i;
128 
129  /* Initialise accumulator */
130  memset ( &utf8, 0, sizeof ( utf8 ) );
131 
132  /* Test each byte in turn */
133  for ( i = 0 ; i < test->len ; i++ ) {
134  character = utf8_accumulate ( &utf8, test->bytes[i] );
135  DBGC ( test, "UTF8 byte %02x character %02x\n",
136  test->bytes[i], character );
137  okx ( character == test->expected[i], file, line );
138  }
139 }
#define DBGC(...)
Definition: compiler.h:505
#define okx(success, file, line)
Report test result.
Definition: test.h:44
A UTF-8 character accumulator.
Definition: utf8.h:57
unsigned int utf8_accumulate(struct utf8_accumulator *utf8, uint8_t byte)
Accumulate Unicode character from UTF-8 byte sequence.
Definition: utf8.c:43
unsigned int character
Character in progress.
Definition: utf8.h:59
static int test
Definition: epic100.c:73
void * memset(void *dest, int character, size_t len) __nonnull

References utf8_accumulator::character, DBGC, memset(), okx, test, and utf8_accumulate().

◆ utf8_test_exec()

static void utf8_test_exec ( void  )
static

Perform UTF-8 self-test.

Definition at line 147 of file utf8_test.c.

147  {
148 
149  /* Accumulation tests */
150  utf8_accumulate_ok ( &ascii );
151  utf8_accumulate_ok ( &multibyte );
152  utf8_accumulate_ok ( &stray_continuation );
153  utf8_accumulate_ok ( &missing_continuation );
154  utf8_accumulate_ok ( &illegal_two );
155  utf8_accumulate_ok ( &illegal_three );
156  utf8_accumulate_ok ( &illegal_four );
157  utf8_accumulate_ok ( &illegal_length );
158 }
#define utf8_accumulate_ok(test)
Definition: utf8_test.c:140

References utf8_accumulate_ok.

Variable Documentation

◆ __self_test

struct self_test utf8_test __self_test
Initial value:
= {
.name = "utf8",
.exec = utf8_test_exec,
}
static void utf8_test_exec(void)
Perform UTF-8 self-test.
Definition: utf8_test.c:147

UTF-8 self-test.

Definition at line 161 of file utf8_test.c.