iPXE
aes.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 /** @file
00027  *
00028  * AES algorithm
00029  *
00030  */
00031 
00032 #include <stdint.h>
00033 #include <string.h>
00034 #include <errno.h>
00035 #include <assert.h>
00036 #include <byteswap.h>
00037 #include <ipxe/rotate.h>
00038 #include <ipxe/crypto.h>
00039 #include <ipxe/ecb.h>
00040 #include <ipxe/cbc.h>
00041 #include <ipxe/aes.h>
00042 
00043 /** AES strides
00044  *
00045  * These are the strides (modulo 16) used to walk through the AES
00046  * input state bytes in order of byte position after [Inv]ShiftRows.
00047  */
00048 enum aes_stride {
00049         /** Input stride for ShiftRows
00050          *
00051          *    0 4 8 c
00052          *     \ \ \
00053          *    1 5 9 d
00054          *     \ \ \
00055          *    2 6 a e
00056          *     \ \ \
00057          *    3 7 b f
00058          */
00059         AES_STRIDE_SHIFTROWS = +5,
00060         /** Input stride for InvShiftRows
00061          *
00062          *    0 4 8 c
00063          *     / / /
00064          *    1 5 9 d
00065          *     / / /
00066          *    2 6 a e
00067          *     / / /
00068          *    3 7 b f
00069          */
00070         AES_STRIDE_INVSHIFTROWS = -3,
00071 };
00072 
00073 /** A single AES lookup table entry
00074  *
00075  * This represents the product (in the Galois field GF(2^8)) of an
00076  * eight-byte vector multiplier with a single scalar multiplicand.
00077  *
00078  * The vector multipliers used for AES will be {1,1,1,3,2,1,1,3} for
00079  * MixColumns and {1,9,13,11,14,9,13,11} for InvMixColumns.  This
00080  * allows for the result of multiplying any single column of the
00081  * [Inv]MixColumns matrix by a scalar value to be obtained simply by
00082  * extracting the relevant four-byte subset from the lookup table
00083  * entry.
00084  *
00085  * For example, to find the result of multiplying the second column of
00086  * the MixColumns matrix by the scalar value 0x80:
00087  *
00088  * MixColumns column[0]: {                            2,    1,    1,    3 }
00089  * MixColumns column[1]: {                      3,    2,    1,    1       }
00090  * MixColumns column[2]: {                1,    3,    2,    1             }
00091  * MixColumns column[3]: {          1,    1,    3,    2                   }
00092  * Vector multiplier:    {    1,    1,    1,    3,    2,    1,    1,    3 }
00093  * Scalar multiplicand:    0x80
00094  * Lookup table entry:   { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
00095  *
00096  * The second column of the MixColumns matrix is {3,2,1,1}.  The
00097  * product of this column with the scalar value 0x80 can be obtained
00098  * by extracting the relevant four-byte subset of the lookup table
00099  * entry:
00100  *
00101  * MixColumns column[1]: {                      3,    2,    1,    1       }
00102  * Vector multiplier:    {    1,    1,    1,    3,    2,    1,    1,    3 }
00103  * Lookup table entry:   { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
00104  * Product:              {                   0x9b, 0x1b, 0x80, 0x80       }
00105  *
00106  * The column lookups require only seven bytes of the eight-byte
00107  * entry: the remaining (first) byte is used to hold the scalar
00108  * multiplicand itself (i.e. the first byte of the vector multiplier
00109  * is always chosen to be 1).
00110  */
00111 union aes_table_entry {
00112         /** Viewed as an array of bytes */
00113         uint8_t byte[8];
00114 } __attribute__ (( packed ));
00115 
00116 /** An AES lookup table
00117  *
00118  * This represents the products (in the Galois field GF(2^8)) of a
00119  * constant eight-byte vector multiplier with all possible 256 scalar
00120  * multiplicands.
00121  *
00122  * The entries are indexed by the AES [Inv]SubBytes S-box output
00123  * values (denoted S(N)).  This allows for the result of multiplying
00124  * any single column of the [Inv]MixColumns matrix by S(N) to be
00125  * obtained simply by extracting the relevant four-byte subset from
00126  * the Nth table entry.  For example:
00127  *
00128  * Input byte (N):         0x3a
00129  * SubBytes output S(N):   0x80
00130  * MixColumns column[1]: {                      3,    2,    1,    1       }
00131  * Vector multiplier:    {    1,    1,    1,    3,    2,    1,    1,    3 }
00132  * Table entry[0x3a]:    { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
00133  * Product:              {                   0x9b, 0x1b, 0x80, 0x80       }
00134  *
00135  * Since the first byte of the eight-byte vector multiplier is always
00136  * chosen to be 1, the value of S(N) may be lookup up by extracting
00137  * the first byte of the Nth table entry.
00138  */
00139 struct aes_table {
00140         /** Table entries, indexed by S(N) */
00141         union aes_table_entry entry[256];
00142 } __attribute__ (( aligned ( 8 ) ));
00143 
00144 /** AES MixColumns lookup table */
00145 static struct aes_table aes_mixcolumns;
00146 
00147 /** AES InvMixColumns lookup table */
00148 static struct aes_table aes_invmixcolumns;
00149 
00150 /**
00151  * Multiply [Inv]MixColumns matrix column by scalar multiplicand
00152  *
00153  * @v entry             AES lookup table entry for scalar multiplicand
00154  * @v column            [Inv]MixColumns matrix column index
00155  * @ret product         Product of matrix column with scalar multiplicand
00156  */
00157 static inline __attribute__ (( always_inline )) uint32_t
00158 aes_entry_column ( const union aes_table_entry *entry, unsigned int column ) {
00159         const union {
00160                 uint8_t byte;
00161                 uint32_t column;
00162         } __attribute__ (( may_alias )) *product;
00163 
00164         /* Locate relevant four-byte subset */
00165         product = container_of ( &entry->byte[ 4 - column ],
00166                                  typeof ( *product ), byte );
00167 
00168         /* Extract this four-byte subset */
00169         return product->column;
00170 }
00171 
00172 /**
00173  * Multiply [Inv]MixColumns matrix column by S-boxed input byte
00174  *
00175  * @v table             AES lookup table
00176  * @v stride            AES row shift stride
00177  * @v in                AES input state
00178  * @v offset            Output byte offset (after [Inv]ShiftRows)
00179  * @ret product         Product of matrix column with S(input byte)
00180  *
00181  * Note that the specified offset is not the offset of the input byte;
00182  * it is the offset of the output byte which corresponds to the input
00183  * byte.  This output byte offset is used to calculate both the input
00184  * byte offset and to select the appropriate matric column.
00185  *
00186  * With a compile-time constant offset, this function will optimise
00187  * down to a single "movzbl" (to extract the input byte) and will
00188  * generate a single x86 memory reference expression which can then be
00189  * used directly within a single "xorl" instruction.
00190  */
00191 static inline __attribute__ (( always_inline )) uint32_t
00192 aes_column ( const struct aes_table *table, size_t stride,
00193              const union aes_matrix *in, size_t offset ) {
00194         const union aes_table_entry *entry;
00195         unsigned int byte;
00196 
00197         /* Extract input byte corresponding to this output byte offset
00198          * (i.e. perform [Inv]ShiftRows).
00199          */
00200         byte = in->byte[ ( stride * offset ) & 0xf ];
00201 
00202         /* Locate lookup table entry for this input byte (i.e. perform
00203          * [Inv]SubBytes).
00204          */
00205         entry = &table->entry[byte];
00206 
00207         /* Multiply appropriate matrix column by this input byte
00208          * (i.e. perform [Inv]MixColumns).
00209          */
00210         return aes_entry_column ( entry, ( offset & 0x3 ) );
00211 }
00212 
00213 /**
00214  * Calculate intermediate round output column
00215  *
00216  * @v table             AES lookup table
00217  * @v stride            AES row shift stride
00218  * @v in                AES input state
00219  * @v key               AES round key
00220  * @v column            Column index
00221  * @ret output          Output column value
00222  */
00223 static inline __attribute__ (( always_inline )) uint32_t
00224 aes_output ( const struct aes_table *table, size_t stride,
00225              const union aes_matrix *in, const union aes_matrix *key,
00226              unsigned int column ) {
00227         size_t offset = ( column * 4 );
00228 
00229         /* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
00230          * AddRoundKey for this column.  The loop is unrolled to allow
00231          * for the required compile-time constant optimisations.
00232          */
00233         return ( aes_column ( table, stride, in, ( offset + 0 ) ) ^
00234                  aes_column ( table, stride, in, ( offset + 1 ) ) ^
00235                  aes_column ( table, stride, in, ( offset + 2 ) ) ^
00236                  aes_column ( table, stride, in, ( offset + 3 ) ) ^
00237                  key->column[column] );
00238 }
00239 
00240 /**
00241  * Perform a single intermediate round
00242  *
00243  * @v table             AES lookup table
00244  * @v stride            AES row shift stride
00245  * @v in                AES input state
00246  * @v out               AES output state
00247  * @v key               AES round key
00248  */
00249 static inline __attribute__ (( always_inline )) void
00250 aes_round ( const struct aes_table *table, size_t stride,
00251             const union aes_matrix *in, union aes_matrix *out,
00252             const union aes_matrix *key ) {
00253 
00254         /* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
00255          * AddRoundKey for all columns.  The loop is unrolled to allow
00256          * for the required compile-time constant optimisations.
00257          */
00258         out->column[0] = aes_output ( table, stride, in, key, 0 );
00259         out->column[1] = aes_output ( table, stride, in, key, 1 );
00260         out->column[2] = aes_output ( table, stride, in, key, 2 );
00261         out->column[3] = aes_output ( table, stride, in, key, 3 );
00262 }
00263 
00264 /**
00265  * Perform encryption intermediate rounds
00266  *
00267  * @v in                AES input state
00268  * @v out               AES output state
00269  * @v key               Round keys
00270  * @v rounds            Number of rounds (must be odd)
00271  *
00272  * This function is deliberately marked as non-inlinable to ensure
00273  * maximal availability of registers for GCC's register allocator,
00274  * which has a tendency to otherwise spill performance-critical
00275  * registers to the stack.
00276  */
00277 static __attribute__ (( noinline )) void
00278 aes_encrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
00279                      const union aes_matrix *key, unsigned int rounds ) {
00280         union aes_matrix *tmp;
00281 
00282         /* Perform intermediate rounds */
00283         do {
00284                 /* Perform one intermediate round */
00285                 aes_round ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
00286                             in, out, key++ );
00287 
00288                 /* Swap input and output states for next round */
00289                 tmp = in;
00290                 in = out;
00291                 out = tmp;
00292 
00293         } while ( --rounds );
00294 }
00295 
00296 /**
00297  * Perform decryption intermediate rounds
00298  *
00299  * @v in                AES input state
00300  * @v out               AES output state
00301  * @v key               Round keys
00302  * @v rounds            Number of rounds (must be odd)
00303  *
00304  * As with aes_encrypt_rounds(), this function is deliberately marked
00305  * as non-inlinable.
00306  *
00307  * This function could potentially use the same binary code as is used
00308  * for encryption.  To compensate for the difference between ShiftRows
00309  * and InvShiftRows, half of the input byte offsets would have to be
00310  * modifiable at runtime (half by an offset of +4/-4, half by an
00311  * offset of -4/+4 for ShiftRows/InvShiftRows).  This can be
00312  * accomplished in x86 assembly within the number of available
00313  * registers, but GCC's register allocator struggles to do so,
00314  * resulting in a significant performance decrease due to registers
00315  * being spilled to the stack.  We therefore use two separate but very
00316  * similar binary functions based on the same C source.
00317  */
00318 static __attribute__ (( noinline )) void
00319 aes_decrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
00320                      const union aes_matrix *key, unsigned int rounds ) {
00321         union aes_matrix *tmp;
00322 
00323         /* Perform intermediate rounds */
00324         do {
00325                 /* Perform one intermediate round */
00326                 aes_round ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS,
00327                             in, out, key++ );
00328 
00329                 /* Swap input and output states for next round */
00330                 tmp = in;
00331                 in = out;
00332                 out = tmp;
00333 
00334         } while ( --rounds );
00335 }
00336 
00337 /**
00338  * Perform standalone AddRoundKey
00339  *
00340  * @v state             AES state
00341  * @v key               AES round key
00342  */
00343 static inline __attribute__ (( always_inline )) void
00344 aes_addroundkey ( union aes_matrix *state, const union aes_matrix *key ) {
00345 
00346         state->column[0] ^= key->column[0];
00347         state->column[1] ^= key->column[1];
00348         state->column[2] ^= key->column[2];
00349         state->column[3] ^= key->column[3];
00350 }
00351 
00352 /**
00353  * Perform final round
00354  *
00355  * @v table             AES lookup table
00356  * @v stride            AES row shift stride
00357  * @v in                AES input state
00358  * @v out               AES output state
00359  * @v key               AES round key
00360  */
00361 static void aes_final ( const struct aes_table *table, size_t stride,
00362                         const union aes_matrix *in, union aes_matrix *out,
00363                         const union aes_matrix *key ) {
00364         const union aes_table_entry *entry;
00365         unsigned int byte;
00366         size_t out_offset;
00367         size_t in_offset;
00368 
00369         /* Perform [Inv]ShiftRows and [Inv]SubBytes */
00370         for ( out_offset = 0, in_offset = 0 ; out_offset < 16 ;
00371               out_offset++, in_offset = ( ( in_offset + stride ) & 0xf ) ) {
00372 
00373                 /* Extract input byte (i.e. perform [Inv]ShiftRows) */
00374                 byte = in->byte[in_offset];
00375 
00376                 /* Locate lookup table entry for this input byte
00377                  * (i.e. perform [Inv]SubBytes).
00378                  */
00379                 entry = &table->entry[byte];
00380 
00381                 /* Store output byte */
00382                 out->byte[out_offset] = entry->byte[0];
00383         }
00384 
00385         /* Perform AddRoundKey */
00386         aes_addroundkey ( out, key );
00387 }
00388 
00389 /**
00390  * Encrypt data
00391  *
00392  * @v ctx               Context
00393  * @v src               Data to encrypt
00394  * @v dst               Buffer for encrypted data
00395  * @v len               Length of data
00396  */
00397 static void aes_encrypt ( void *ctx, const void *src, void *dst, size_t len ) {
00398         struct aes_context *aes = ctx;
00399         union aes_matrix buffer[2];
00400         union aes_matrix *in = &buffer[0];
00401         union aes_matrix *out = &buffer[1];
00402         unsigned int rounds = aes->rounds;
00403 
00404         /* Sanity check */
00405         assert ( len == sizeof ( *in ) );
00406 
00407         /* Initialise input state */
00408         memcpy ( in, src, sizeof ( *in ) );
00409 
00410         /* Perform initial round (AddRoundKey) */
00411         aes_addroundkey ( in, &aes->encrypt.key[0] );
00412 
00413         /* Perform intermediate rounds (ShiftRows, SubBytes,
00414          * MixColumns, AddRoundKey).
00415          */
00416         aes_encrypt_rounds ( in, out, &aes->encrypt.key[1], ( rounds - 2 ) );
00417         in = out;
00418 
00419         /* Perform final round (ShiftRows, SubBytes, AddRoundKey) */
00420         out = dst;
00421         aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS, in, out,
00422                     &aes->encrypt.key[ rounds - 1 ] );
00423 }
00424 
00425 /**
00426  * Decrypt data
00427  *
00428  * @v ctx               Context
00429  * @v src               Data to decrypt
00430  * @v dst               Buffer for decrypted data
00431  * @v len               Length of data
00432  */
00433 static void aes_decrypt ( void *ctx, const void *src, void *dst, size_t len ) {
00434         struct aes_context *aes = ctx;
00435         union aes_matrix buffer[2];
00436         union aes_matrix *in = &buffer[0];
00437         union aes_matrix *out = &buffer[1];
00438         unsigned int rounds = aes->rounds;
00439 
00440         /* Sanity check */
00441         assert ( len == sizeof ( *in ) );
00442 
00443         /* Initialise input state */
00444         memcpy ( in, src, sizeof ( *in ) );
00445 
00446         /* Perform initial round (AddRoundKey) */
00447         aes_addroundkey ( in, &aes->decrypt.key[0] );
00448 
00449         /* Perform intermediate rounds (InvShiftRows, InvSubBytes,
00450          * InvMixColumns, AddRoundKey).
00451          */
00452         aes_decrypt_rounds ( in, out, &aes->decrypt.key[1], ( rounds - 2 ) );
00453         in = out;
00454 
00455         /* Perform final round (InvShiftRows, InvSubBytes, AddRoundKey) */
00456         out = dst;
00457         aes_final ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS, in, out,
00458                     &aes->decrypt.key[ rounds - 1 ] );
00459 }
00460 
00461 /**
00462  * Multiply a polynomial by (x) modulo (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8)
00463  *
00464  * @v poly              Polynomial to be multiplied
00465  * @ret result          Result
00466  */
00467 static __attribute__ (( const )) unsigned int aes_double ( unsigned int poly ) {
00468 
00469         /* Multiply polynomial by (x), placing the resulting x^8
00470          * coefficient in the LSB (i.e. rotate byte left by one).
00471          */
00472         poly = rol8 ( poly, 1 );
00473 
00474         /* If coefficient of x^8 (in LSB) is non-zero, then reduce by
00475          * subtracting (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8).
00476          */
00477         if ( poly & 0x01 ) {
00478                 poly ^= 0x01; /* Subtract x^8 (currently in LSB) */
00479                 poly ^= 0x1b; /* Subtract (x^4 + x^3 + x^2 + 1) */
00480         }
00481 
00482         return poly;
00483 }
00484 
00485 /**
00486  * Fill in MixColumns lookup table entry
00487  *
00488  * @v entry             AES lookup table entry for scalar multiplicand
00489  *
00490  * The MixColumns lookup table vector multiplier is {1,1,1,3,2,1,1,3}.
00491  */
00492 static void aes_mixcolumns_entry ( union aes_table_entry *entry ) {
00493         unsigned int scalar_x_1;
00494         unsigned int scalar_x;
00495         unsigned int scalar;
00496 
00497         /* Retrieve scalar multiplicand */
00498         scalar = entry->byte[0];
00499         entry->byte[1] = scalar;
00500         entry->byte[2] = scalar;
00501         entry->byte[5] = scalar;
00502         entry->byte[6] = scalar;
00503 
00504         /* Calculate scalar multiplied by (x) */
00505         scalar_x = aes_double ( scalar );
00506         entry->byte[4] = scalar_x;
00507 
00508         /* Calculate scalar multiplied by (x + 1) */
00509         scalar_x_1 = ( scalar_x ^ scalar );
00510         entry->byte[3] = scalar_x_1;
00511         entry->byte[7] = scalar_x_1;
00512 }
00513 
00514 /**
00515  * Fill in InvMixColumns lookup table entry
00516  *
00517  * @v entry             AES lookup table entry for scalar multiplicand
00518  *
00519  * The InvMixColumns lookup table vector multiplier is {1,9,13,11,14,9,13,11}.
00520  */
00521 static void aes_invmixcolumns_entry ( union aes_table_entry *entry ) {
00522         unsigned int scalar_x3_x2_x;
00523         unsigned int scalar_x3_x2_1;
00524         unsigned int scalar_x3_x2;
00525         unsigned int scalar_x3_x_1;
00526         unsigned int scalar_x3_1;
00527         unsigned int scalar_x3;
00528         unsigned int scalar_x2;
00529         unsigned int scalar_x;
00530         unsigned int scalar;
00531 
00532         /* Retrieve scalar multiplicand */
00533         scalar = entry->byte[0];
00534 
00535         /* Calculate scalar multiplied by (x) */
00536         scalar_x = aes_double ( scalar );
00537 
00538         /* Calculate scalar multiplied by (x^2) */
00539         scalar_x2 = aes_double ( scalar_x );
00540 
00541         /* Calculate scalar multiplied by (x^3) */
00542         scalar_x3 = aes_double ( scalar_x2 );
00543 
00544         /* Calculate scalar multiplied by (x^3 + 1) */
00545         scalar_x3_1 = ( scalar_x3 ^ scalar );
00546         entry->byte[1] = scalar_x3_1;
00547         entry->byte[5] = scalar_x3_1;
00548 
00549         /* Calculate scalar multiplied by (x^3 + x + 1) */
00550         scalar_x3_x_1 = ( scalar_x3_1 ^ scalar_x );
00551         entry->byte[3] = scalar_x3_x_1;
00552         entry->byte[7] = scalar_x3_x_1;
00553 
00554         /* Calculate scalar multiplied by (x^3 + x^2) */
00555         scalar_x3_x2 = ( scalar_x3 ^ scalar_x2 );
00556 
00557         /* Calculate scalar multiplied by (x^3 + x^2 + 1) */
00558         scalar_x3_x2_1 = ( scalar_x3_x2 ^ scalar );
00559         entry->byte[2] = scalar_x3_x2_1;
00560         entry->byte[6] = scalar_x3_x2_1;
00561 
00562         /* Calculate scalar multiplied by (x^3 + x^2 + x) */
00563         scalar_x3_x2_x = ( scalar_x3_x2 ^ scalar_x );
00564         entry->byte[4] = scalar_x3_x2_x;
00565 }
00566 
00567 /**
00568  * Generate AES lookup tables
00569  *
00570  */
00571 static void aes_generate ( void ) {
00572         union aes_table_entry *entry;
00573         union aes_table_entry *inventry;
00574         unsigned int poly = 0x01;
00575         unsigned int invpoly = 0x01;
00576         unsigned int transformed;
00577         unsigned int i;
00578 
00579         /* Iterate over non-zero values of GF(2^8) using generator (x + 1) */
00580         do {
00581 
00582                 /* Multiply polynomial by (x + 1) */
00583                 poly ^= aes_double ( poly );
00584 
00585                 /* Divide inverse polynomial by (x + 1).  This code
00586                  * fragment is taken directly from the Wikipedia page
00587                  * on the Rijndael S-box.  An explanation of why it
00588                  * works would be greatly appreciated.
00589                  */
00590                 invpoly ^= ( invpoly << 1 );
00591                 invpoly ^= ( invpoly << 2 );
00592                 invpoly ^= ( invpoly << 4 );
00593                 if ( invpoly & 0x80 )
00594                         invpoly ^= 0x09;
00595                 invpoly &= 0xff;
00596 
00597                 /* Apply affine transformation */
00598                 transformed = ( 0x63 ^ invpoly ^ rol8 ( invpoly, 1 ) ^
00599                                 rol8 ( invpoly, 2 ) ^ rol8 ( invpoly, 3 ) ^
00600                                 rol8 ( invpoly, 4 ) );
00601 
00602                 /* Populate S-box (within MixColumns lookup table) */
00603                 aes_mixcolumns.entry[poly].byte[0] = transformed;
00604 
00605         } while ( poly != 0x01 );
00606 
00607         /* Populate zeroth S-box entry (which has no inverse) */
00608         aes_mixcolumns.entry[0].byte[0] = 0x63;
00609 
00610         /* Fill in MixColumns and InvMixColumns lookup tables */
00611         for ( i = 0 ; i < 256 ; i++ ) {
00612 
00613                 /* Fill in MixColumns lookup table entry */
00614                 entry = &aes_mixcolumns.entry[i];
00615                 aes_mixcolumns_entry ( entry );
00616 
00617                 /* Populate inverse S-box (within InvMixColumns lookup table) */
00618                 inventry = &aes_invmixcolumns.entry[ entry->byte[0] ];
00619                 inventry->byte[0] = i;
00620 
00621                 /* Fill in InvMixColumns lookup table entry */
00622                 aes_invmixcolumns_entry ( inventry );
00623         }
00624 }
00625 
00626 /**
00627  * Rotate key column
00628  *
00629  * @v column            Key column
00630  * @ret column          Updated key column
00631  */
00632 static inline __attribute__ (( always_inline )) uint32_t
00633 aes_key_rotate ( uint32_t column ) {
00634 
00635         return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
00636                  ror32 ( column, 8 ) : rol32 ( column, 8 ) );
00637 }
00638 
00639 /**
00640  * Apply S-box to key column
00641  *
00642  * @v column            Key column
00643  * @ret column          Updated key column
00644  */
00645 static uint32_t aes_key_sbox ( uint32_t column ) {
00646         unsigned int i;
00647         uint8_t byte;
00648 
00649         for ( i = 0 ; i < 4 ; i++ ) {
00650                 byte = ( column & 0xff );
00651                 byte = aes_mixcolumns.entry[byte].byte[0];
00652                 column = ( ( column & ~0xff ) | byte );
00653                 column = rol32 ( column, 8 );
00654         }
00655         return column;
00656 }
00657 
00658 /**
00659  * Apply schedule round constant to key column
00660  *
00661  * @v column            Key column
00662  * @v rcon              Round constant
00663  * @ret column          Updated key column
00664  */
00665 static inline __attribute__ (( always_inline )) uint32_t
00666 aes_key_rcon ( uint32_t column, unsigned int rcon ) {
00667 
00668         return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
00669                  ( column ^ rcon ) : ( column ^ ( rcon << 24 ) ) );
00670 }
00671 
00672 /**
00673  * Set key
00674  *
00675  * @v ctx               Context
00676  * @v key               Key
00677  * @v keylen            Key length
00678  * @ret rc              Return status code
00679  */
00680 static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
00681         struct aes_context *aes = ctx;
00682         union aes_matrix *enc;
00683         union aes_matrix *dec;
00684         union aes_matrix temp;
00685         union aes_matrix zero;
00686         unsigned int rcon = 0x01;
00687         unsigned int rounds;
00688         size_t offset = 0;
00689         uint32_t *prev;
00690         uint32_t *next;
00691         uint32_t *end;
00692         uint32_t tmp;
00693 
00694         /* Generate lookup tables, if not already done */
00695         if ( ! aes_mixcolumns.entry[0].byte[0] )
00696                 aes_generate();
00697 
00698         /* Validate key length and calculate number of intermediate rounds */
00699         switch ( keylen ) {
00700         case ( 128 / 8 ) :
00701                 rounds = 11;
00702                 break;
00703         case ( 192 / 8 ) :
00704                 rounds = 13;
00705                 break;
00706         case ( 256 / 8 ) :
00707                 rounds = 15;
00708                 break;
00709         default:
00710                 DBGC ( aes, "AES %p unsupported key length (%zd bits)\n",
00711                        aes, ( keylen * 8 ) );
00712                 return -EINVAL;
00713         }
00714         aes->rounds = rounds;
00715         enc = aes->encrypt.key;
00716         end = enc[rounds].column;
00717 
00718         /* Copy raw key */
00719         memcpy ( enc, key, keylen );
00720         prev = enc->column;
00721         next = ( ( ( void * ) prev ) + keylen );
00722         tmp = next[-1];
00723 
00724         /* Construct expanded key */
00725         while ( next < end ) {
00726 
00727                 /* If this is the first column of an expanded key
00728                  * block, or the middle column of an AES-256 key
00729                  * block, then apply the S-box.
00730                  */
00731                 if ( ( offset == 0 ) || ( ( offset | keylen ) == 48 ) )
00732                         tmp = aes_key_sbox ( tmp );
00733 
00734                 /* If this is the first column of an expanded key
00735                  * block then rotate and apply the round constant.
00736                  */
00737                 if ( offset == 0 ) {
00738                         tmp = aes_key_rotate ( tmp );
00739                         tmp = aes_key_rcon ( tmp, rcon );
00740                         rcon = aes_double ( rcon );
00741                 }
00742 
00743                 /* XOR with previous key column */
00744                 tmp ^= *prev;
00745 
00746                 /* Store column */
00747                 *next = tmp;
00748 
00749                 /* Move to next column */
00750                 offset += sizeof ( *next );
00751                 if ( offset == keylen )
00752                         offset = 0;
00753                 next++;
00754                 prev++;
00755         }
00756         DBGC2 ( aes, "AES %p expanded %zd-bit key:\n", aes, ( keylen * 8 ) );
00757         DBGC2_HDA ( aes, 0, &aes->encrypt, ( rounds * sizeof ( *enc ) ) );
00758 
00759         /* Convert to decryption key */
00760         memset ( &zero, 0, sizeof ( zero ) );
00761         dec = &aes->decrypt.key[ rounds - 1 ];
00762         memcpy ( dec--, enc++, sizeof ( *dec ) );
00763         while ( dec > aes->decrypt.key ) {
00764                 /* Perform InvMixColumns (by reusing the encryption
00765                  * final-round code to perform ShiftRows+SubBytes and
00766                  * reusing the decryption intermediate-round code to
00767                  * perform InvShiftRows+InvSubBytes+InvMixColumns, all
00768                  * with a zero encryption key).
00769                  */
00770                 aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
00771                             enc++, &temp, &zero );
00772                 aes_decrypt_rounds ( &temp, dec--, &zero, 1 );
00773         }
00774         memcpy ( dec--, enc++, sizeof ( *dec ) );
00775         DBGC2 ( aes, "AES %p inverted %zd-bit key:\n", aes, ( keylen * 8 ) );
00776         DBGC2_HDA ( aes, 0, &aes->decrypt, ( rounds * sizeof ( *dec ) ) );
00777 
00778         return 0;
00779 }
00780 
00781 /**
00782  * Set initialisation vector
00783  *
00784  * @v ctx               Context
00785  * @v iv                Initialisation vector
00786  */
00787 static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
00788         /* Nothing to do */
00789 }
00790 
00791 /** Basic AES algorithm */
00792 struct cipher_algorithm aes_algorithm = {
00793         .name = "aes",
00794         .ctxsize = sizeof ( struct aes_context ),
00795         .blocksize = AES_BLOCKSIZE,
00796         .setkey = aes_setkey,
00797         .setiv = aes_setiv,
00798         .encrypt = aes_encrypt,
00799         .decrypt = aes_decrypt,
00800 };
00801 
00802 /* AES in Electronic Codebook mode */
00803 ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
00804              aes_algorithm, struct aes_context, AES_BLOCKSIZE );
00805 
00806 /* AES in Cipher Block Chaining mode */
00807 CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
00808              aes_algorithm, struct aes_context, AES_BLOCKSIZE );