iPXE
string.h
Go to the documentation of this file.
1 #ifndef X86_BITS_STRING_H
2 #define X86_BITS_STRING_H
3 
4 /*
5  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or any later version.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20  * 02110-1301, USA.
21  *
22  * You can also choose to distribute this program under the terms of
23  * the Unmodified Binary Distribution Licence (as given in the file
24  * COPYING.UBDL), provided that you have satisfied its requirements.
25  */
26 
27 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
28 
29 /** @file
30  *
31  * Optimised string operations
32  *
33  */
34 
35 extern void * __memcpy ( void *dest, const void *src, size_t len );
36 extern void * __memcpy_reverse ( void *dest, const void *src, size_t len );
37 
38 /**
39  * Copy memory area (where length is a compile-time constant)
40  *
41  * @v dest Destination address
42  * @v src Source address
43  * @v len Length
44  * @ret dest Destination address
45  */
46 static inline __attribute__ (( always_inline )) void *
47 __constant_memcpy ( void *dest, const void *src, size_t len ) {
48  union {
49  uint32_t u32[2];
50  uint16_t u16[4];
51  uint8_t u8[8];
52  } __attribute__ (( __may_alias__ )) *dest_u = dest;
53  const union {
56  uint8_t u8[8];
57  } __attribute__ (( __may_alias__ )) *src_u = src;
58  const void *esi;
59  void *edi;
60 
61  switch ( len ) {
62  case 0 : /* 0 bytes */
63  return dest;
64  /*
65  * Single-register moves; these are always better than a
66  * string operation. We can clobber an arbitrary two
67  * registers (data, source, dest can re-use source register)
68  * instead of being restricted to esi and edi. There's also a
69  * much greater potential for optimising with nearby code.
70  *
71  */
72  case 1 : /* 4 bytes */
73  dest_u->u8[0] = src_u->u8[0];
74  return dest;
75  case 2 : /* 6 bytes */
76  dest_u->u16[0] = src_u->u16[0];
77  return dest;
78  case 4 : /* 4 bytes */
79  dest_u->u32[0] = src_u->u32[0];
80  return dest;
81  /*
82  * Double-register moves; these are probably still a win.
83  *
84  */
85  case 3 : /* 12 bytes */
86  dest_u->u16[0] = src_u->u16[0];
87  dest_u->u8[2] = src_u->u8[2];
88  return dest;
89  case 5 : /* 10 bytes */
90  dest_u->u32[0] = src_u->u32[0];
91  dest_u->u8[4] = src_u->u8[4];
92  return dest;
93  case 6 : /* 12 bytes */
94  dest_u->u32[0] = src_u->u32[0];
95  dest_u->u16[2] = src_u->u16[2];
96  return dest;
97  case 8 : /* 10 bytes */
98  dest_u->u32[0] = src_u->u32[0];
99  dest_u->u32[1] = src_u->u32[1];
100  return dest;
101  }
102 
103  /* Even if we have to load up esi and edi ready for a string
104  * operation, we can sometimes save space by using multiple
105  * single-byte "movs" operations instead of loading up ecx and
106  * using "rep movsb".
107  *
108  * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
109  * to allow for saving/restoring ecx 50% of the time.
110  *
111  * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
112  * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
113  * but "movsl" moves twice as much data, so it balances out).
114  *
115  * The cutoff point therefore occurs around 26 bytes; the byte
116  * requirements for each method are:
117  *
118  * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
119  * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
120  * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
121  */
122 
123  esi = src;
124  edi = dest;
125 
126  if ( len >= 26 )
127  return __memcpy ( dest, src, len );
128 
129  if ( len >= 6*4 )
130  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
131  : "0" ( edi ), "1" ( esi ) : "memory" );
132  if ( len >= 5*4 )
133  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
134  : "0" ( edi ), "1" ( esi ) : "memory" );
135  if ( len >= 4*4 )
136  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
137  : "0" ( edi ), "1" ( esi ) : "memory" );
138  if ( len >= 3*4 )
139  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
140  : "0" ( edi ), "1" ( esi ) : "memory" );
141  if ( len >= 2*4 )
142  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
143  : "0" ( edi ), "1" ( esi ) : "memory" );
144  if ( len >= 1*4 )
145  __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
146  : "0" ( edi ), "1" ( esi ) : "memory" );
147  if ( ( len % 4 ) >= 2 )
148  __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
149  : "0" ( edi ), "1" ( esi ) : "memory" );
150  if ( ( len % 2 ) >= 1 )
151  __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
152  : "0" ( edi ), "1" ( esi ) : "memory" );
153 
154  return dest;
155 }
156 
157 /**
158  * Copy memory area
159  *
160  * @v dest Destination address
161  * @v src Source address
162  * @v len Length
163  * @ret dest Destination address
164  */
165 static inline __attribute__ (( always_inline )) void *
166 memcpy ( void *dest, const void *src, size_t len ) {
167  if ( __builtin_constant_p ( len ) ) {
168  return __constant_memcpy ( dest, src, len );
169  } else {
170  return __memcpy ( dest, src, len );
171  }
172 }
173 
174 extern void * __memmove ( void *dest, const void *src, size_t len );
175 
176 /**
177  * Copy (possibly overlapping) memory area
178  *
179  * @v dest Destination address
180  * @v src Source address
181  * @v len Length
182  * @ret dest Destination address
183  */
184 static inline __attribute__ (( always_inline )) void *
185 memmove ( void *dest, const void *src, size_t len ) {
186  ssize_t offset = ( dest - src );
187 
188  if ( __builtin_constant_p ( offset ) ) {
189  if ( offset <= 0 ) {
190  return memcpy ( dest, src, len );
191  } else {
192  return __memcpy_reverse ( dest, src, len );
193  }
194  } else {
195  return __memmove ( dest, src, len );
196  }
197 }
198 
199 /**
200  * Fill memory region
201  *
202  * @v dest Destination address
203  * @v fill Fill pattern
204  * @v len Length
205  * @ret dest Destination address
206  */
207 static inline __attribute__ (( always_inline )) void *
208 __memset ( void *dest, int fill, size_t len ) {
209  void *discard_D;
210  size_t discard_c;
211 
212  __asm__ __volatile__ ( "rep stosb"
213  : "=&D" ( discard_D ), "=&c" ( discard_c )
214  : "0" ( dest ), "1" ( len ), "a" ( fill )
215  : "memory" );
216  return dest;
217 }
218 
219 /**
220  * Fill memory region with zero (where length is a compile-time constant)
221  *
222  * @v dest Destination address
223  * @v len Length
224  * @ret dest Destination address
225  */
226 static inline __attribute__ (( always_inline )) void *
227 __constant_memset_zero ( void *dest, size_t len ) {
228  union {
229  uint32_t u32[2];
230  uint16_t u16[4];
231  uint8_t u8[8];
232  } __attribute__ (( __may_alias__ )) *dest_u = dest;
233  void *edi;
235 
236  switch ( len ) {
237  case 0 : /* 0 bytes */
238  return dest;
239 
240  /* Single-register moves. Almost certainly better than a
241  * string operation. We can avoid clobbering any registers,
242  * we can reuse a zero that happens to already be in a
243  * register, and we can optimise away the code entirely if the
244  * memset() is used to clear a region which then gets
245  * immediately overwritten.
246  */
247  case 1 : /* 3 bytes */
248  dest_u->u8[0] = 0;
249  return dest;
250  case 2: /* 5 bytes */
251  dest_u->u16[0] = 0;
252  return dest;
253  case 4: /* 6 bytes */
254  dest_u->u32[0] = 0;
255  return dest;
256 
257  /* Double-register moves. Very probably better than a string
258  * operation.
259  */
260  case 3 : /* 9 bytes */
261  dest_u->u16[0] = 0;
262  dest_u->u8[2] = 0;
263  return dest;
264  case 5 : /* 10 bytes */
265  dest_u->u32[0] = 0;
266  dest_u->u8[4] = 0;
267  return dest;
268  case 6 : /* 12 bytes */
269  dest_u->u32[0] = 0;
270  dest_u->u16[2] = 0;
271  return dest;
272  case 8 : /* 13 bytes */
273  dest_u->u32[0] = 0;
274  dest_u->u32[1] = 0;
275  return dest;
276  }
277 
278  /* As with memcpy(), we can potentially save space by using
279  * multiple single-byte "stos" instructions instead of loading
280  * up ecx and using "rep stosb".
281  *
282  * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
283  * to allow for saving/restoring ecx 50% of the time.
284  *
285  * "stosl" and "stosb" are 1 byte each, "stosw" is two bytes.
286  *
287  * The calculations are therefore the same as for memcpy(),
288  * giving a cutoff point of around 26 bytes.
289  */
290 
291  edi = dest;
292  eax = 0;
293 
294  if ( len >= 26 )
295  return __memset ( dest, 0, len );
296 
297  if ( len >= 6*4 )
298  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
299  : "0" ( edi ), "1" ( eax ) : "memory" );
300  if ( len >= 5*4 )
301  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
302  : "0" ( edi ), "1" ( eax ) : "memory" );
303  if ( len >= 4*4 )
304  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
305  : "0" ( edi ), "1" ( eax ) : "memory" );
306  if ( len >= 3*4 )
307  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
308  : "0" ( edi ), "1" ( eax ) : "memory" );
309  if ( len >= 2*4 )
310  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
311  : "0" ( edi ), "1" ( eax ) : "memory" );
312  if ( len >= 1*4 )
313  __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax )
314  : "0" ( edi ), "1" ( eax ) : "memory" );
315  if ( ( len % 4 ) >= 2 )
316  __asm__ __volatile__ ( "stosw" : "=&D" ( edi ), "=&a" ( eax )
317  : "0" ( edi ), "1" ( eax ) : "memory" );
318  if ( ( len % 2 ) >= 1 )
319  __asm__ __volatile__ ( "stosb" : "=&D" ( edi ), "=&a" ( eax )
320  : "0" ( edi ), "1" ( eax ) : "memory" );
321 
322  return dest;
323 }
324 
325 /**
326  * Fill memory region
327  *
328  * @v dest Destination address
329  * @v fill Fill pattern
330  * @v len Length
331  * @ret dest Destination address
332  */
333 static inline __attribute__ (( always_inline )) void *
334 memset ( void *dest, int fill, size_t len ) {
335 
336  if ( __builtin_constant_p ( fill ) && ( fill == 0 ) &&
337  __builtin_constant_p ( len ) ) {
338  return __constant_memset_zero ( dest, len );
339  } else {
340  return __memset ( dest, fill, len );
341  }
342 }
343 
344 #endif /* X86_BITS_STRING_H */
uint16_t u16
Definition: stdint.h:21
unsigned short uint16_t
Definition: stdint.h:11
static const void size_t len
Definition: string.h:47
static __attribute__((always_inline)) void *__constant_memcpy(void *dest
Copy memory area (where length is a compile-time constant)
if(len >=6 *4) __asm__ __volatile__("movsl" if(len >=5 *4) __asm__ __volatile__("movsl" if(len >=4 *4) __asm__ __volatile__("movsl" if(len >=3 *4) __asm__ __volatile__("movsl" if(len >=2 *4) __asm__ __volatile__("movsl" if(len >=1 *4) __asm__ __volatile__("movsl" if((len % 4) >=2) __asm__ __volatile__("movsw" if((len % 2) >=1) __asm__ __volatile__("movsb" return dest
Definition: string.h:150
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
void * __memmove(void *dest, const void *src, size_t len)
Copy (possibly overlapping) memory area.
Definition: x86_string.c:117
const void * esi
Definition: string.h:58
static const void * src
Definition: string.h:47
void * memcpy(void *dest, const void *src, size_t len) __nonnull
static userptr_t size_t offset
Offset of the first segment within the content.
Definition: deflate.h:259
void * __memcpy_reverse(void *dest, const void *src, size_t len)
__asm__ __volatile__("rep stosb" :"=&D"(discard_D), "=&c"(discard_c) :"0"(dest), "1"(len), "a"(fill) :"memory")
static int fill
Definition: string.h:208
size_t discard_c
Definition: string.h:210
uint32_t eax
Definition: string.h:234
unsigned char uint8_t
Definition: stdint.h:10
void * discard_D
Definition: bigint.h:31
void * edi
Definition: string.h:59
unsigned int uint32_t
Definition: stdint.h:12
void * memmove(void *dest, const void *src, size_t len) __nonnull
__asm__(".section \".rodata\", \"a\", " PROGBITS "\n\t" "\nprivate_key_data:\n\t" ".size private_key_data, ( . - private_key_data )\n\t" ".equ private_key_len, ( . - private_key_data )\n\t" ".previous\n\t")
void * __memcpy(void *dest, const void *src, size_t len)
signed long ssize_t
Definition: stdint.h:7
uint8_t u8
Definition: stdint.h:19
uint32_t u32
Definition: stdint.h:23
void * memset(void *dest, int character, size_t len) __nonnull