iPXE
string.h
Go to the documentation of this file.
1 #ifndef X86_BITS_STRING_H
2 #define X86_BITS_STRING_H
3 
4 /*
5  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or any later version.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20  * 02110-1301, USA.
21  *
22  * You can also choose to distribute this program under the terms of
23  * the Unmodified Binary Distribution Licence (as given in the file
24  * COPYING.UBDL), provided that you have satisfied its requirements.
25  */
26 
27 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
28 FILE_SECBOOT ( PERMITTED );
29 
30 /** @file
31  *
32  * Optimised string operations
33  *
34  */
35 
36 extern void * __memcpy ( void *dest, const void *src, size_t len );
37 extern void * __memcpy_reverse ( void *dest, const void *src, size_t len );
38 
39 /**
40  * Copy memory area (where length is a compile-time constant)
41  *
42  * @v dest Destination address
43  * @v src Source address
44  * @v len Length
45  * @ret dest Destination address
46  */
47 static inline __attribute__ (( always_inline )) void *
48 __constant_memcpy ( void *dest, const void *src, size_t len ) {
49  union {
50  uint32_t u32[2];
51  uint16_t u16[4];
52  uint8_t u8[8];
53  } __attribute__ (( __may_alias__ )) *dest_u = dest;
54  const union {
57  uint8_t u8[8];
58  } __attribute__ (( __may_alias__ )) *src_u = src;
59  const void *esi;
60  void *edi;
61 
62  switch ( len ) {
63  case 0 : /* 0 bytes */
64  return dest;
65  /*
66  * Single-register moves; these are always better than a
67  * string operation. We can clobber an arbitrary two
68  * registers (data, source, dest can re-use source register)
69  * instead of being restricted to esi and edi. There's also a
70  * much greater potential for optimising with nearby code.
71  *
72  */
73  case 1 : /* 4 bytes */
74  dest_u->u8[0] = src_u->u8[0];
75  return dest;
76  case 2 : /* 6 bytes */
77  dest_u->u16[0] = src_u->u16[0];
78  return dest;
79  case 4 : /* 4 bytes */
80  dest_u->u32[0] = src_u->u32[0];
81  return dest;
82  /*
83  * Double-register moves; these are probably still a win.
84  *
85  */
86  case 3 : /* 12 bytes */
87  dest_u->u16[0] = src_u->u16[0];
88  dest_u->u8[2] = src_u->u8[2];
89  return dest;
90  case 5 : /* 10 bytes */
91  dest_u->u32[0] = src_u->u32[0];
92  dest_u->u8[4] = src_u->u8[4];
93  return dest;
94  case 6 : /* 12 bytes */
95  dest_u->u32[0] = src_u->u32[0];
96  dest_u->u16[2] = src_u->u16[2];
97  return dest;
98  case 8 : /* 10 bytes */
99  dest_u->u32[0] = src_u->u32[0];
100  dest_u->u32[1] = src_u->u32[1];
101  return dest;
102  }
103 
104  /* Even if we have to load up esi and edi ready for a string
105  * operation, we can sometimes save space by using multiple
106  * single-byte "movs" operations instead of loading up ecx and
107  * using "rep movsb".
108  *
109  * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
110  * to allow for saving/restoring ecx 50% of the time.
111  *
112  * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
113  * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
114  * but "movsl" moves twice as much data, so it balances out).
115  *
116  * The cutoff point therefore occurs around 26 bytes; the byte
117  * requirements for each method are:
118  *
119  * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
120  * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
121  * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
122  */
123 
124  esi = src;
125  edi = dest;
126 
127  if ( len >= 26 )
128  return __memcpy ( dest, src, len );
129 
130  if ( len >= 6*4 )
131  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
132  : "0" ( edi ), "1" ( esi ) : "memory" );
133  if ( len >= 5*4 )
134  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
135  : "0" ( edi ), "1" ( esi ) : "memory" );
136  if ( len >= 4*4 )
137  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
138  : "0" ( edi ), "1" ( esi ) : "memory" );
139  if ( len >= 3*4 )
140  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
141  : "0" ( edi ), "1" ( esi ) : "memory" );
142  if ( len >= 2*4 )
143  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
144  : "0" ( edi ), "1" ( esi ) : "memory" );
145  if ( len >= 1*4 )
146  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
147  : "0" ( edi ), "1" ( esi ) : "memory" );
148  if ( ( len % 4 ) >= 2 )
149  __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
150  : "0" ( edi ), "1" ( esi ) : "memory" );
151  if ( ( len % 2 ) >= 1 )
152  __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
153  : "0" ( edi ), "1" ( esi ) : "memory" );
154 
155  return dest;
156 }
157 
158 /**
159  * Copy memory area
160  *
161  * @v dest Destination address
162  * @v src Source address
163  * @v len Length
164  * @ret dest Destination address
165  */
166 static inline __attribute__ (( always_inline )) void *
167 memcpy ( void *dest, const void *src, size_t len ) {
168  if ( __builtin_constant_p ( len ) ) {
169  return __constant_memcpy ( dest, src, len );
170  } else {
171  return __memcpy ( dest, src, len );
172  }
173 }
174 
175 extern void * __memmove ( void *dest, const void *src, size_t len );
176 
177 /**
178  * Copy (possibly overlapping) memory area
179  *
180  * @v dest Destination address
181  * @v src Source address
182  * @v len Length
183  * @ret dest Destination address
184  */
185 static inline __attribute__ (( always_inline )) void *
186 memmove ( void *dest, const void *src, size_t len ) {
187  ssize_t offset = ( dest - src );
188 
189  if ( __builtin_constant_p ( offset ) ) {
190  if ( offset <= 0 ) {
191  return memcpy ( dest, src, len );
192  } else {
193  return __memcpy_reverse ( dest, src, len );
194  }
195  } else {
196  return __memmove ( dest, src, len );
197  }
198 }
199 
200 /**
201  * Fill memory region
202  *
203  * @v dest Destination address
204  * @v fill Fill pattern
205  * @v len Length
206  * @ret dest Destination address
207  */
208 static inline __attribute__ (( always_inline )) void *
209 __memset ( void *dest, int fill, size_t len ) {
210  void *discard_D;
211  size_t discard_c;
212 
213  __asm__ __volatile__ ( "rep stosb"
214  : "=&D" ( discard_D ), "=&c" ( discard_c )
215  : "0" ( dest ), "1" ( len ), "a" ( fill )
216  : "memory" );
217  return dest;
218 }
219 
220 /**
221  * Fill memory region with zero (where length is a compile-time constant)
222  *
223  * @v dest Destination address
224  * @v len Length
225  * @ret dest Destination address
226  */
227 static inline __attribute__ (( always_inline )) void *
228 __constant_memset_zero ( void *dest, size_t len ) {
229  union {
230  uint32_t u32[2];
231  uint16_t u16[4];
232  uint8_t u8[8];
233  } __attribute__ (( __may_alias__ )) *dest_u = dest;
234  void *edi;
236 
237  switch ( len ) {
238  case 0 : /* 0 bytes */
239  return dest;
240 
241  /* Single-register moves. Almost certainly better than a
242  * string operation. We can avoid clobbering any registers,
243  * we can reuse a zero that happens to already be in a
244  * register, and we can optimise away the code entirely if the
245  * memset() is used to clear a region which then gets
246  * immediately overwritten.
247  */
248  case 1 : /* 3 bytes */
249  dest_u->u8[0] = 0;
250  return dest;
251  case 2: /* 5 bytes */
252  dest_u->u16[0] = 0;
253  return dest;
254  case 4: /* 6 bytes */
255  dest_u->u32[0] = 0;
256  return dest;
257 
258  /* Double-register moves. Very probably better than a string
259  * operation.
260  */
261  case 3 : /* 9 bytes */
262  dest_u->u16[0] = 0;
263  dest_u->u8[2] = 0;
264  return dest;
265  case 5 : /* 10 bytes */
266  dest_u->u32[0] = 0;
267  dest_u->u8[4] = 0;
268  return dest;
269  case 6 : /* 12 bytes */
270  dest_u->u32[0] = 0;
271  dest_u->u16[2] = 0;
272  return dest;
273  case 8 : /* 13 bytes */
274  dest_u->u32[0] = 0;
275  dest_u->u32[1] = 0;
276  return dest;
277  }
278 
279  /* As with memcpy(), we can potentially save space by using
280  * multiple single-byte "stos" instructions instead of loading
281  * up ecx and using "rep stosb".
282  *
283  * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
284  * to allow for saving/restoring ecx 50% of the time.
285  *
286  * "stosl" and "stosb" are 1 byte each, "stosw" is two bytes.
287  *
288  * The calculations are therefore the same as for memcpy(),
289  * giving a cutoff point of around 26 bytes.
290  */
291 
292  edi = dest;
293  eax = 0;
294 
295  if ( len >= 26 )
296  return __memset ( dest, 0, len );
297 
298  if ( len >= 6*4 )
299  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
300  : "0" ( edi ), "1" ( eax ) : "memory" );
301  if ( len >= 5*4 )
302  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
303  : "0" ( edi ), "1" ( eax ) : "memory" );
304  if ( len >= 4*4 )
305  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
306  : "0" ( edi ), "1" ( eax ) : "memory" );
307  if ( len >= 3*4 )
308  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
309  : "0" ( edi ), "1" ( eax ) : "memory" );
310  if ( len >= 2*4 )
311  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
312  : "0" ( edi ), "1" ( eax ) : "memory" );
313  if ( len >= 1*4 )
314  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
315  : "0" ( edi ), "1" ( eax ) : "memory" );
316  if ( ( len % 4 ) >= 2 )
317  __asm__ __volatile__ ( "stosw" : "=&D" ( edi ), "=&a" ( eax )
318  : "0" ( edi ), "1" ( eax ) : "memory" );
319  if ( ( len % 2 ) >= 1 )
320  __asm__ __volatile__ ( "stosb" : "=&D" ( edi ), "=&a" ( eax )
321  : "0" ( edi ), "1" ( eax ) : "memory" );
322 
323  return dest;
324 }
325 
326 /**
327  * Fill memory region
328  *
329  * @v dest Destination address
330  * @v fill Fill pattern
331  * @v len Length
332  * @ret dest Destination address
333  */
334 static inline __attribute__ (( always_inline )) void *
335 memset ( void *dest, int fill, size_t len ) {
336 
337  if ( __builtin_constant_p ( fill ) && ( fill == 0 ) &&
338  __builtin_constant_p ( len ) ) {
339  return __constant_memset_zero ( dest, len );
340  } else {
341  return __memset ( dest, fill, len );
342  }
343 }
344 
345 #endif /* X86_BITS_STRING_H */
uint16_t u16
Definition: stdint.h:22
unsigned short uint16_t
Definition: stdint.h:11
static const void size_t len
Definition: string.h:48
static __attribute__((always_inline)) void *__constant_memcpy(void *dest
Copy memory area (where length is a compile-time constant)
void * __memmove(void *dest, const void *src, size_t len)
Copy (possibly overlapping) memory area.
Definition: x86_string.c:118
const void * esi
Definition: string.h:59
void * memcpy(void *dest, const void *src, size_t len) __nonnull
static const void * src
Definition: string.h:48
void * __memcpy_reverse(void *dest, const void *src, size_t len)
__asm__ __volatile__("rep stosb" :"=&D"(discard_D), "=&c"(discard_c) :"0"(dest), "1"(len), "a"(fill) :"memory")
size_t discard_c
Definition: string.h:211
uint32_t eax
Definition: string.h:235
unsigned char uint8_t
Definition: stdint.h:10
void * discard_D
Definition: bigint.h:32
void * edi
Definition: string.h:60
unsigned int uint32_t
Definition: stdint.h:12
void * memmove(void *dest, const void *src, size_t len) __nonnull
__asm__(".section \".rodata\", \"a\", " PROGBITS "\n\t" "\nprivate_key_data:\n\t" ".size private_key_data, ( . - private_key_data )\n\t" ".equ private_key_len, ( . - private_key_data )\n\t" ".previous\n\t")
void * __memcpy(void *dest, const void *src, size_t len)
if(len >=6 *4) __asm__ __volatile__("movsl" if(len >=5 *4) __asm__ __volatile__("movsl" if(len >=4 *4) __asm__ __volatile__("movsl" if(len >=3 *4) __asm__ __volatile__("movsl" if(len >=2 *4) __asm__ __volatile__("movsl" if(len >=1 *4) __asm__ __volatile__("movsl" if((len % 4) >=2) __asm__ __volatile__("movsw" if((len % 2) >=1) __asm__ __volatile__("movsb" return dest
Definition: string.h:151
signed long ssize_t
Definition: stdint.h:7
uint16_t offset
Offset to command line.
Definition: bzimage.h:8
FILE_SECBOOT(PERMITTED)
static int fill
Definition: string.h:209
uint8_t u8
Definition: stdint.h:20
uint32_t u32
Definition: stdint.h:24
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
void * memset(void *dest, int character, size_t len) __nonnull