iPXE
x86_tcpip.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 /** @file
00027  *
00028  * TCP/IP checksum
00029  *
00030  */
00031 
00032 #include <limits.h>
00033 #include <ipxe/tcpip.h>
00034 
00035 extern char x86_tcpip_loop_end[];
00036 
00037 /**
00038  * Calculate continued TCP/IP checkum
00039  *
00040  * @v partial           Checksum of already-summed data, in network byte order
00041  * @v data              Data buffer
00042  * @v len               Length of data buffer
00043  * @ret cksum           Updated checksum, in network byte order
00044  */
00045 uint16_t tcpip_continue_chksum ( uint16_t partial, const void *data,
00046                                  size_t len ) {
00047         unsigned long sum = ( ( ~partial ) & 0xffff );
00048         unsigned long initial_word_count;
00049         unsigned long loop_count;
00050         unsigned long loop_partial_count;
00051         unsigned long final_word_count;
00052         unsigned long final_byte;
00053         unsigned long discard_S;
00054         unsigned long discard_c;
00055         unsigned long discard_a;
00056         unsigned long discard_r1;
00057         unsigned long discard_r2;
00058 
00059         /* Calculate number of initial 16-bit words required to bring
00060          * the main loop into alignment.  (We don't care about the
00061          * speed for data aligned to less than 16 bits, since this
00062          * situation won't occur in practice.)
00063          */
00064         if ( len >= sizeof ( sum ) ) {
00065                 initial_word_count = ( ( -( ( intptr_t ) data ) &
00066                                          ( sizeof ( sum ) - 1 ) ) >> 1 );
00067         } else {
00068                 initial_word_count = 0;
00069         }
00070         len -= ( initial_word_count * 2 );
00071 
00072         /* Calculate number of iterations of the main loop.  This loop
00073          * processes native machine words (32-bit or 64-bit), and is
00074          * unrolled 16 times.  We calculate an overall iteration
00075          * count, and a starting point for the first iteration.
00076          */
00077         loop_count = ( len / ( sizeof ( sum ) * 16 ) );
00078         loop_partial_count =
00079                 ( ( len % ( sizeof ( sum ) * 16 ) ) / sizeof ( sum ) );
00080 
00081         /* Calculate number of 16-bit words remaining after the main
00082          * loop completes.
00083          */
00084         final_word_count = ( ( len % sizeof ( sum ) ) / 2 );
00085 
00086         /* Calculate whether or not a final byte remains at the end */
00087         final_byte = ( len & 1 );
00088 
00089         /* Calculate the checksum */
00090         __asm__ ( /* Calculate position at which to jump into the
00091                    * unrolled loop.
00092                    */
00093                   "imul $( -x86_tcpip_loop_step_size ), %4\n\t"
00094                   "add %5, %4\n\t"
00095 
00096                   /* Clear carry flag before starting checksumming */
00097                   "clc\n\t"
00098 
00099                   /* Checksum initial words */
00100                   "jmp 2f\n\t"
00101                   "\n1:\n\t"
00102                   "lodsw\n\t"
00103                   "adcw %w2, %w0\n\t"
00104                   "\n2:\n\t"
00105                   "loop 1b\n\t"
00106 
00107                   /* Main "lods;adc" loop, unrolled x16 */
00108                   "mov %12, %3\n\t"
00109                   "jmp *%4\n\t"
00110                   "\nx86_tcpip_loop_start:\n\t"
00111                   "lods%z2\n\tadc %2, %0\n\t"
00112                   "lods%z2\n\tadc %2, %0\n\t"
00113                   "lods%z2\n\tadc %2, %0\n\t"
00114                   "lods%z2\n\tadc %2, %0\n\t"
00115                   "lods%z2\n\tadc %2, %0\n\t"
00116                   "lods%z2\n\tadc %2, %0\n\t"
00117                   "lods%z2\n\tadc %2, %0\n\t"
00118                   "lods%z2\n\tadc %2, %0\n\t"
00119                   "lods%z2\n\tadc %2, %0\n\t"
00120                   "lods%z2\n\tadc %2, %0\n\t"
00121                   "lods%z2\n\tadc %2, %0\n\t"
00122                   "lods%z2\n\tadc %2, %0\n\t"
00123                   "lods%z2\n\tadc %2, %0\n\t"
00124                   "lods%z2\n\tadc %2, %0\n\t"
00125                   "lods%z2\n\tadc %2, %0\n\t"
00126                   "lods%z2\n\tadc %2, %0\n\t"
00127                   "\nx86_tcpip_loop_end:\n\t"
00128                   "loop x86_tcpip_loop_start\n\t"
00129                   ".equ x86_tcpip_loop_step_size, "
00130                   "  ( ( x86_tcpip_loop_end - x86_tcpip_loop_start ) >> 4 )\n\t"
00131 
00132                   /* Checksum remaining whole words */
00133                   "mov %13, %3\n\t"
00134                   "jmp 2f\n\t"
00135                   "\n1:\n\t"
00136                   "lodsw\n\t"
00137                   "adcw %w2, %w0\n\t"
00138                   "\n2:\n\t"
00139                   "loop 1b\n\t"
00140 
00141                   /* Checksum final byte if applicable */
00142                   "mov %14, %3\n\t"
00143                   "loop 1f\n\t"
00144                   "adcb (%1), %b0\n\t"
00145                   "adcb $0, %h0\n\t"
00146                   "\n1:\n\t"
00147 
00148                   /* Fold down to a uint16_t */
00149                   "push %0\n\t"
00150                   "popw %w0\n\t"
00151                   "popw %w2\n\t"
00152                   "adcw %w2, %w0\n\t"
00153 #if ULONG_MAX > 0xffffffffUL /* 64-bit only */
00154                   "popw %w2\n\t"
00155                   "adcw %w2, %w0\n\t"
00156                   "popw %w2\n\t"
00157                   "adcw %w2, %w0\n\t"
00158 #endif /* 64-bit only */
00159 
00160                   /* Consume CF */
00161                   "adcw $0, %w0\n\t"
00162                   "adcw $0, %w0\n\t"
00163 
00164                   : "=&Q" ( sum ), "=&S" ( discard_S ), "=&a" ( discard_a ),
00165                     "=&c" ( discard_c ), "=&r" ( discard_r1 ),
00166                     "=&r" ( discard_r2 )
00167                   : "0" ( sum ), "1" ( data ), "2" ( 0 ),
00168                     "3" ( initial_word_count + 1 ), "4" ( loop_partial_count ),
00169                     "5" ( x86_tcpip_loop_end ), "g" ( loop_count + 1 ),
00170                     "g" ( final_word_count + 1 ), "g" ( final_byte ) );
00171 
00172         return ( ~sum & 0xffff );
00173 }