iPXE
string.h
Go to the documentation of this file.
1#ifndef X86_BITS_STRING_H
2#define X86_BITS_STRING_H
3
4/*
5 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 * 02110-1301, USA.
21 *
22 * You can also choose to distribute this program under the terms of
23 * the Unmodified Binary Distribution Licence (as given in the file
24 * COPYING.UBDL), provided that you have satisfied its requirements.
25 */
26
27FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
28FILE_SECBOOT ( PERMITTED );
29
30/** @file
31 *
32 * Optimised string operations
33 *
34 */
35
36extern void * __memcpy ( void *dest, const void *src, size_t len );
37extern void * __memcpy_reverse ( void *dest, const void *src, size_t len );
38
39/**
40 * Copy memory area (where length is a compile-time constant)
41 *
42 * @v dest Destination address
43 * @v src Source address
44 * @v len Length
45 * @ret dest Destination address
46 */
47static inline __attribute__ (( always_inline )) void *
48__constant_memcpy ( void *dest, const void *src, size_t len ) {
49 union {
50 uint32_t u32[2];
51 uint16_t u16[4];
52 uint8_t u8[8];
53 } __attribute__ (( __may_alias__ )) *dest_u = dest;
54 const union {
58 } __attribute__ (( __may_alias__ )) *src_u = src;
59 const void *esi;
60 void *edi;
61
62 switch ( len ) {
63 case 0 : /* 0 bytes */
64 return dest;
65 /*
66 * Single-register moves; these are always better than a
67 * string operation. We can clobber an arbitrary two
68 * registers (data, source, dest can re-use source register)
69 * instead of being restricted to esi and edi. There's also a
70 * much greater potential for optimising with nearby code.
71 *
72 */
73 case 1 : /* 4 bytes */
74 dest_u->u8[0] = src_u->u8[0];
75 return dest;
76 case 2 : /* 6 bytes */
77 dest_u->u16[0] = src_u->u16[0];
78 return dest;
79 case 4 : /* 4 bytes */
80 dest_u->u32[0] = src_u->u32[0];
81 return dest;
82 /*
83 * Double-register moves; these are probably still a win.
84 *
85 */
86 case 3 : /* 12 bytes */
87 dest_u->u16[0] = src_u->u16[0];
88 dest_u->u8[2] = src_u->u8[2];
89 return dest;
90 case 5 : /* 10 bytes */
91 dest_u->u32[0] = src_u->u32[0];
92 dest_u->u8[4] = src_u->u8[4];
93 return dest;
94 case 6 : /* 12 bytes */
95 dest_u->u32[0] = src_u->u32[0];
96 dest_u->u16[2] = src_u->u16[2];
97 return dest;
98 case 8 : /* 10 bytes */
99 dest_u->u32[0] = src_u->u32[0];
100 dest_u->u32[1] = src_u->u32[1];
101 return dest;
102 }
103
104 /* Even if we have to load up esi and edi ready for a string
105 * operation, we can sometimes save space by using multiple
106 * single-byte "movs" operations instead of loading up ecx and
107 * using "rep movsb".
108 *
109 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
110 * to allow for saving/restoring ecx 50% of the time.
111 *
112 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
113 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
114 * but "movsl" moves twice as much data, so it balances out).
115 *
116 * The cutoff point therefore occurs around 26 bytes; the byte
117 * requirements for each method are:
118 *
119 * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
120 * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
121 * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
122 */
123
124 esi = src;
125 edi = dest;
126
127 if ( len >= 26 )
128 return __memcpy ( dest, src, len );
129
130 if ( len >= 6*4 )
131 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
132 : "0" ( edi ), "1" ( esi ) : "memory" );
133 if ( len >= 5*4 )
134 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
135 : "0" ( edi ), "1" ( esi ) : "memory" );
136 if ( len >= 4*4 )
137 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
138 : "0" ( edi ), "1" ( esi ) : "memory" );
139 if ( len >= 3*4 )
140 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
141 : "0" ( edi ), "1" ( esi ) : "memory" );
142 if ( len >= 2*4 )
143 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
144 : "0" ( edi ), "1" ( esi ) : "memory" );
145 if ( len >= 1*4 )
146 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
147 : "0" ( edi ), "1" ( esi ) : "memory" );
148 if ( ( len % 4 ) >= 2 )
149 __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
150 : "0" ( edi ), "1" ( esi ) : "memory" );
151 if ( ( len % 2 ) >= 1 )
152 __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
153 : "0" ( edi ), "1" ( esi ) : "memory" );
154
155 return dest;
156}
157
158/**
159 * Copy memory area
160 *
161 * @v dest Destination address
162 * @v src Source address
163 * @v len Length
164 * @ret dest Destination address
165 */
166static inline __attribute__ (( always_inline )) void *
167memcpy ( void *dest, const void *src, size_t len ) {
168 if ( __builtin_constant_p ( len ) ) {
169 return __constant_memcpy ( dest, src, len );
170 } else {
171 return __memcpy ( dest, src, len );
172 }
173}
174
175extern void * __memmove ( void *dest, const void *src, size_t len );
176
177/**
178 * Copy (possibly overlapping) memory area
179 *
180 * @v dest Destination address
181 * @v src Source address
182 * @v len Length
183 * @ret dest Destination address
184 */
185static inline __attribute__ (( always_inline )) void *
186memmove ( void *dest, const void *src, size_t len ) {
187 ssize_t offset = ( dest - src );
188
189 if ( __builtin_constant_p ( offset ) ) {
190 if ( offset <= 0 ) {
191 return memcpy ( dest, src, len );
192 } else {
193 return __memcpy_reverse ( dest, src, len );
194 }
195 } else {
196 return __memmove ( dest, src, len );
197 }
198}
199
200/**
201 * Fill memory region
202 *
203 * @v dest Destination address
204 * @v fill Fill pattern
205 * @v len Length
206 * @ret dest Destination address
207 */
208static inline __attribute__ (( always_inline )) void *
209__memset ( void *dest, int fill, size_t len ) {
210 void *discard_D;
211 size_t discard_c;
212
213 __asm__ __volatile__ ( "rep stosb"
214 : "=&D" ( discard_D ), "=&c" ( discard_c )
215 : "0" ( dest ), "1" ( len ), "a" ( fill )
216 : "memory" );
217 return dest;
218}
219
220/**
221 * Fill memory region with zero (where length is a compile-time constant)
222 *
223 * @v dest Destination address
224 * @v len Length
225 * @ret dest Destination address
226 */
227static inline __attribute__ (( always_inline )) void *
228__constant_memset_zero ( void *dest, size_t len ) {
229 union {
230 uint32_t u32[2];
231 uint16_t u16[4];
232 uint8_t u8[8];
233 } __attribute__ (( __may_alias__ )) *dest_u = dest;
234 void *edi;
236
237 switch ( len ) {
238 case 0 : /* 0 bytes */
239 return dest;
240
241 /* Single-register moves. Almost certainly better than a
242 * string operation. We can avoid clobbering any registers,
243 * we can reuse a zero that happens to already be in a
244 * register, and we can optimise away the code entirely if the
245 * memset() is used to clear a region which then gets
246 * immediately overwritten.
247 */
248 case 1 : /* 3 bytes */
249 dest_u->u8[0] = 0;
250 return dest;
251 case 2: /* 5 bytes */
252 dest_u->u16[0] = 0;
253 return dest;
254 case 4: /* 6 bytes */
255 dest_u->u32[0] = 0;
256 return dest;
257
258 /* Double-register moves. Very probably better than a string
259 * operation.
260 */
261 case 3 : /* 9 bytes */
262 dest_u->u16[0] = 0;
263 dest_u->u8[2] = 0;
264 return dest;
265 case 5 : /* 10 bytes */
266 dest_u->u32[0] = 0;
267 dest_u->u8[4] = 0;
268 return dest;
269 case 6 : /* 12 bytes */
270 dest_u->u32[0] = 0;
271 dest_u->u16[2] = 0;
272 return dest;
273 case 8 : /* 13 bytes */
274 dest_u->u32[0] = 0;
275 dest_u->u32[1] = 0;
276 return dest;
277 }
278
279 /* As with memcpy(), we can potentially save space by using
280 * multiple single-byte "stos" instructions instead of loading
281 * up ecx and using "rep stosb".
282 *
283 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
284 * to allow for saving/restoring ecx 50% of the time.
285 *
286 * "stosl" and "stosb" are 1 byte each, "stosw" is two bytes.
287 *
288 * The calculations are therefore the same as for memcpy(),
289 * giving a cutoff point of around 26 bytes.
290 */
291
292 edi = dest;
293 eax = 0;
294
295 if ( len >= 26 )
296 return __memset ( dest, 0, len );
297
298 if ( len >= 6*4 )
299 __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
300 : "0" ( edi ), "1" ( eax ) : "memory" );
301 if ( len >= 5*4 )
302 __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
303 : "0" ( edi ), "1" ( eax ) : "memory" );
304 if ( len >= 4*4 )
305 __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
306 : "0" ( edi ), "1" ( eax ) : "memory" );
307 if ( len >= 3*4 )
308 __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
309 : "0" ( edi ), "1" ( eax ) : "memory" );
310 if ( len >= 2*4 )
311 __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
312 : "0" ( edi ), "1" ( eax ) : "memory" );
313 if ( len >= 1*4 )
314 __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
315 : "0" ( edi ), "1" ( eax ) : "memory" );
316 if ( ( len % 4 ) >= 2 )
317 __asm__ __volatile__ ( "stosw" : "=&D" ( edi ), "=&a" ( eax )
318 : "0" ( edi ), "1" ( eax ) : "memory" );
319 if ( ( len % 2 ) >= 1 )
320 __asm__ __volatile__ ( "stosb" : "=&D" ( edi ), "=&a" ( eax )
321 : "0" ( edi ), "1" ( eax ) : "memory" );
322
323 return dest;
324}
325
326/**
327 * Fill memory region
328 *
329 * @v dest Destination address
330 * @v fill Fill pattern
331 * @v len Length
332 * @ret dest Destination address
333 */
334static inline __attribute__ (( always_inline )) void *
335memset ( void *dest, int fill, size_t len ) {
336
337 if ( __builtin_constant_p ( fill ) && ( fill == 0 ) &&
338 __builtin_constant_p ( len ) ) {
339 return __constant_memset_zero ( dest, len );
340 } else {
341 return __memset ( dest, fill, len );
342 }
343}
344
345#endif /* X86_BITS_STRING_H */
unsigned short uint16_t
Definition stdint.h:11
unsigned int uint32_t
Definition stdint.h:12
unsigned char uint8_t
Definition stdint.h:10
signed long ssize_t
Definition stdint.h:7
void * discard_D
Definition bigint.h:32
long discard_c
Definition bigint.h:33
static int fill
Definition string.h:209
void * __memcpy_reverse(void *dest, const void *src, size_t len)
uint32_t eax
Definition string.h:235
void * __memcpy(void *dest, const void *src, size_t len)
const void * esi
Definition string.h:59
if(len >=6 *4) __asm__ __volatile__("movsl" if(len >=5 *4) __asm__ __volatile__("movsl" if(len >=4 *4) __asm__ __volatile__("movsl" if(len >=3 *4) __asm__ __volatile__("movsl" if(len >=2 *4) __asm__ __volatile__("movsl" if(len >=1 *4) __asm__ __volatile__("movsl" if((len % 4) >=2) __asm__ __volatile__("movsw" if((len % 2) >=1) __asm__ __volatile__("movsb" retur dest)
Definition string.h:151
void * edi
Definition string.h:60
__asm__ __volatile__("rep stosb" :"=&D"(discard_D), "=&c"(discard_c) :"0"(dest), "1"(len), "a"(fill) :"memory")
static const void * src
Definition string.h:48
uint16_t offset
Offset to command line.
Definition bzimage.h:3
ring len
Length.
Definition dwmac.h:226
#define FILE_LICENCE(_licence)
Declare a particular licence as applying to a file.
Definition compiler.h:896
#define FILE_SECBOOT(_status)
Declare a file's UEFI Secure Boot permission status.
Definition compiler.h:926
#define u8
Definition igbvf_osdep.h:40
#define __attribute__(x)
Definition compiler.h:10
void * memcpy(void *dest, const void *src, size_t len) __nonnull
void * memset(void *dest, int character, size_t len) __nonnull
void * memmove(void *dest, const void *src, size_t len) __nonnull
__asm__(".section \".rodata\", \"a\", " PROGBITS "\n\t" "\nprivate_key_data:\n\t" ".size private_key_data, ( . - private_key_data )\n\t" ".equ private_key_len, ( . - private_key_data )\n\t" ".previous\n\t")
#define u16
Definition vga.h:20
#define u32
Definition vga.h:21
void * __memmove(void *dest, const void *src, size_t len)
Copy (possibly overlapping) memory area.
Definition x86_string.c:118