iPXE
deflate.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License as
00006  * published by the Free Software Foundation; either version 2 of the
00007  * License, or any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but
00010  * WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301, USA.
00018  *
00019  * You can also choose to distribute this program under the terms of
00020  * the Unmodified Binary Distribution Licence (as given in the file
00021  * COPYING.UBDL), provided that you have satisfied its requirements.
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
00025 
00026 #include <string.h>
00027 #include <strings.h>
00028 #include <errno.h>
00029 #include <assert.h>
00030 #include <ctype.h>
00031 #include <ipxe/uaccess.h>
00032 #include <ipxe/deflate.h>
00033 
00034 /** @file
00035  *
00036  * DEFLATE decompression algorithm
00037  *
00038  * This file implements the decompression half of the DEFLATE
00039  * algorithm specified in RFC 1951.
00040  *
00041  * Portions of this code are derived from wimboot's xca.c.
00042  *
00043  */
00044 
00045 /**
00046  * Byte reversal table
00047  *
00048  * For some insane reason, the DEFLATE format stores some values in
00049  * bit-reversed order.
00050  */
00051 static uint8_t deflate_reverse[256];
00052 
00053 /** Literal/length base values
00054  *
00055  * We include entries only for literal/length codes 257-284.  Code 285
00056  * does not fit the pattern (it represents a length of 258; following
00057  * the pattern from the earlier codes would give a length of 259), and
00058  * has no extra bits.  Codes 286-287 are invalid, but can occur.  We
00059  * treat any code greater than 284 as meaning "length 285, no extra
00060  * bits".
00061  */
00062 static uint8_t deflate_litlen_base[28];
00063 
00064 /** Distance base values
00065  *
00066  * We include entries for all possible codes 0-31, avoiding the need
00067  * to check for undefined codes 30 and 31 before performing the
00068  * lookup.  Codes 30 and 31 are never initialised, and will therefore
00069  * be treated as meaning "14 extra bits, base distance 0".
00070  */
00071 static uint16_t deflate_distance_base[32];
00072 
00073 /** Code length map */
00074 static uint8_t deflate_codelen_map[19] = {
00075         16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
00076 };
00077 
00078 /** Static Huffman alphabet length patterns */
00079 static struct deflate_static_length_pattern deflate_static_length_patterns[] = {
00080         /* Literal/length code lengths */
00081         { 0x88, ( ( ( 143 -   0 ) + 1 ) / 2 ) },
00082         { 0x99, ( ( ( 255 - 144 ) + 1 ) / 2 ) },
00083         { 0x77, ( ( ( 279 - 256 ) + 1 ) / 2 ) },
00084         { 0x88, ( ( ( 287 - 280 ) + 1 ) / 2 ) },
00085         /* Distance code lengths */
00086         { 0x55, ( ( (  31 -   0 ) + 1 ) / 2 ) },
00087         /* End marker */
00088         { 0, 0 }
00089 };
00090 
00091 /**
00092  * Transcribe binary value (for debugging)
00093  *
00094  * @v value             Value
00095  * @v bits              Length of value (in bits)
00096  * @ret string          Transcribed value
00097  */
00098 static const char * deflate_bin ( unsigned long value, unsigned int bits ) {
00099         static char buf[ ( 8 * sizeof ( value ) ) + 1 /* NUL */ ];
00100         char *out = buf;
00101 
00102         /* Sanity check */
00103         assert ( bits < sizeof ( buf ) );
00104 
00105         /* Transcribe value */
00106         while ( bits-- )
00107                 *(out++) = ( ( value & ( 1 << bits ) ) ? '1' : '0' );
00108         *out = '\0';
00109 
00110         return buf;
00111 }
00112 
00113 /**
00114  * Set Huffman symbol length
00115  *
00116  * @v deflate           Decompressor
00117  * @v index             Index within lengths
00118  * @v bits              Symbol length (in bits)
00119  */
00120 static void deflate_set_length ( struct deflate *deflate, unsigned int index,
00121                                  unsigned int bits ) {
00122 
00123         deflate->lengths[ index / 2 ] |= ( bits << ( 4 * ( index % 2 ) ) );
00124 }
00125 
00126 /**
00127  * Get Huffman symbol length
00128  *
00129  * @v deflate           Decompressor
00130  * @v index             Index within lengths
00131  * @ret bits            Symbol length (in bits)
00132  */
00133 static unsigned int deflate_length ( struct deflate *deflate,
00134                                      unsigned int index ) {
00135 
00136         return ( ( deflate->lengths[ index / 2 ] >> ( 4 * ( index % 2 ) ) )
00137                  & 0x0f );
00138 }
00139 
00140 /**
00141  * Determine Huffman alphabet name (for debugging)
00142  *
00143  * @v deflate           Decompressor
00144  * @v alphabet          Huffman alphabet
00145  * @ret name            Alphabet name
00146  */
00147 static const char * deflate_alphabet_name ( struct deflate *deflate,
00148                                             struct deflate_alphabet *alphabet ){
00149 
00150         if ( alphabet == &deflate->litlen ) {
00151                 return "litlen";
00152         } else if ( alphabet == &deflate->distance_codelen ) {
00153                 return "distance/codelen";
00154         } else {
00155                 return "<UNKNOWN>";
00156         }
00157 }
00158 
00159 /**
00160  * Dump Huffman alphabet (for debugging)
00161  *
00162  * @v deflate           Decompressor
00163  * @v alphabet          Huffman alphabet
00164  */
00165 static void deflate_dump_alphabet ( struct deflate *deflate,
00166                                     struct deflate_alphabet *alphabet ) {
00167         struct deflate_huf_symbols *huf_sym;
00168         unsigned int bits;
00169         unsigned int huf;
00170         unsigned int i;
00171 
00172         /* Do nothing unless debugging is enabled */
00173         if ( ! DBG_EXTRA )
00174                 return;
00175 
00176         /* Dump symbol table for each utilised length */
00177         for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
00178                                    sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
00179                 huf_sym = &alphabet->huf[ bits - 1 ];
00180                 if ( huf_sym->freq == 0 )
00181                         continue;
00182                 huf = ( huf_sym->start >> huf_sym->shift );
00183                 DBGC2 ( alphabet, "DEFLATE %p \"%s\" length %d start \"%s\" "
00184                         "freq %d:", deflate,
00185                         deflate_alphabet_name ( deflate, alphabet ), bits,
00186                         deflate_bin ( huf, huf_sym->bits ), huf_sym->freq );
00187                 for ( i = 0 ; i < huf_sym->freq ; i++ ) {
00188                         DBGC2 ( alphabet, " %03x",
00189                                 huf_sym->raw[ huf + i ] );
00190                 }
00191                 DBGC2 ( alphabet, "\n" );
00192         }
00193 
00194         /* Dump quick lookup table */
00195         DBGC2 ( alphabet, "DEFLATE %p \"%s\" quick lookup:", deflate,
00196                 deflate_alphabet_name ( deflate, alphabet ) );
00197         for ( i = 0 ; i < ( sizeof ( alphabet->lookup ) /
00198                             sizeof ( alphabet->lookup[0] ) ) ; i++ ) {
00199                 DBGC2 ( alphabet, " %d", ( alphabet->lookup[i] + 1 ) );
00200         }
00201         DBGC2 ( alphabet, "\n" );
00202 }
00203 
00204 /**
00205  * Construct Huffman alphabet
00206  *
00207  * @v deflate           Decompressor
00208  * @v alphabet          Huffman alphabet
00209  * @v count             Number of symbols
00210  * @v offset            Starting offset within length table
00211  * @ret rc              Return status code
00212  */
00213 static int deflate_alphabet ( struct deflate *deflate,
00214                               struct deflate_alphabet *alphabet,
00215                               unsigned int count, unsigned int offset ) {
00216         struct deflate_huf_symbols *huf_sym;
00217         unsigned int huf;
00218         unsigned int cum_freq;
00219         unsigned int bits;
00220         unsigned int raw;
00221         unsigned int adjustment;
00222         unsigned int prefix;
00223         int complete;
00224 
00225         /* Clear symbol table */
00226         memset ( alphabet->huf, 0, sizeof ( alphabet->huf ) );
00227 
00228         /* Count number of symbols with each Huffman-coded length */
00229         for ( raw = 0 ; raw < count ; raw++ ) {
00230                 bits = deflate_length ( deflate, ( raw + offset ) );
00231                 if ( bits )
00232                         alphabet->huf[ bits - 1 ].freq++;
00233         }
00234 
00235         /* Populate Huffman-coded symbol table */
00236         huf = 0;
00237         cum_freq = 0;
00238         for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
00239                                    sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
00240                 huf_sym = &alphabet->huf[ bits - 1 ];
00241                 huf_sym->bits = bits;
00242                 huf_sym->shift = ( 16 - bits );
00243                 huf_sym->start = ( huf << huf_sym->shift );
00244                 huf_sym->raw = &alphabet->raw[cum_freq];
00245                 huf += huf_sym->freq;
00246                 if ( huf > ( 1U << bits ) ) {
00247                         DBGC ( alphabet, "DEFLATE %p \"%s\" has too many "
00248                                "symbols with lengths <=%d\n", deflate,
00249                                deflate_alphabet_name ( deflate, alphabet ),
00250                                bits );
00251                         return -EINVAL;
00252                 }
00253                 huf <<= 1;
00254                 cum_freq += huf_sym->freq;
00255         }
00256         complete = ( huf == ( 1U << bits ) );
00257 
00258         /* Populate raw symbol table */
00259         for ( raw = 0 ; raw < count ; raw++ ) {
00260                 bits = deflate_length ( deflate, ( raw + offset ) );
00261                 if ( bits ) {
00262                         huf_sym = &alphabet->huf[ bits - 1 ];
00263                         *(huf_sym->raw++) = raw;
00264                 }
00265         }
00266 
00267         /* Adjust Huffman-coded symbol table raw pointers and populate
00268          * quick lookup table.
00269          */
00270         for ( bits = 1 ; bits <= ( sizeof ( alphabet->huf ) /
00271                                    sizeof ( alphabet->huf[0] ) ) ; bits++ ) {
00272                 huf_sym = &alphabet->huf[ bits - 1 ];
00273 
00274                 /* Adjust raw pointer */
00275                 huf_sym->raw -= huf_sym->freq; /* Reset to first symbol */
00276                 adjustment = ( huf_sym->start >> huf_sym->shift );
00277                 huf_sym->raw -= adjustment; /* Adjust for quick indexing */
00278 
00279                 /* Populate quick lookup table */
00280                 for ( prefix = ( huf_sym->start >> DEFLATE_HUFFMAN_QL_SHIFT ) ;
00281                       prefix < ( 1 << DEFLATE_HUFFMAN_QL_BITS ) ; prefix++ ) {
00282                         alphabet->lookup[prefix] = ( bits - 1 );
00283                 }
00284         }
00285 
00286         /* Dump alphabet (for debugging) */
00287         deflate_dump_alphabet ( deflate, alphabet );
00288 
00289         /* Check that there are no invalid codes */
00290         if ( ! complete ) {
00291                 DBGC ( alphabet, "DEFLATE %p \"%s\" is incomplete\n", deflate,
00292                        deflate_alphabet_name ( deflate, alphabet ) );
00293                 return -EINVAL;
00294         }
00295 
00296         return 0;
00297 }
00298 
00299 /**
00300  * Attempt to accumulate bits from input stream
00301  *
00302  * @v deflate           Decompressor
00303  * @v in                Compressed input data
00304  * @v target            Number of bits to accumulate
00305  * @ret excess          Number of excess bits accumulated (may be negative)
00306  */
00307 static int deflate_accumulate ( struct deflate *deflate,
00308                                 struct deflate_chunk *in,
00309                                 unsigned int target ) {
00310         uint8_t byte;
00311 
00312         while ( deflate->bits < target ) {
00313 
00314                 /* Check for end of input */
00315                 if ( in->offset >= in->len )
00316                         break;
00317 
00318                 /* Acquire byte from input */
00319                 copy_from_user ( &byte, in->data, in->offset++,
00320                                  sizeof ( byte ) );
00321                 deflate->accumulator = ( deflate->accumulator |
00322                                          ( byte << deflate->bits ) );
00323                 deflate->rotalumucca = ( deflate->rotalumucca |
00324                                          ( deflate_reverse[byte] <<
00325                                            ( 24 - deflate->bits ) ) );
00326                 deflate->bits += 8;
00327 
00328                 /* Sanity check */
00329                 assert ( deflate->bits <=
00330                          ( 8 * sizeof ( deflate->accumulator ) ) );
00331         }
00332 
00333         return ( deflate->bits - target );
00334 }
00335 
00336 /**
00337  * Consume accumulated bits from the input stream
00338  *
00339  * @v deflate           Decompressor
00340  * @v count             Number of accumulated bits to consume
00341  * @ret data            Consumed bits
00342  */
00343 static int deflate_consume ( struct deflate *deflate, unsigned int count ) {
00344         int data;
00345 
00346         /* Sanity check */
00347         assert ( count <= deflate->bits );
00348 
00349         /* Extract data and consume bits */
00350         data = ( deflate->accumulator & ( ( 1 << count ) - 1 ) );
00351         deflate->accumulator >>= count;
00352         deflate->rotalumucca <<= count;
00353         deflate->bits -= count;
00354 
00355         return data;
00356 }
00357 
00358 /**
00359  * Attempt to extract a fixed number of bits from input stream
00360  *
00361  * @v deflate           Decompressor
00362  * @v in                Compressed input data
00363  * @v target            Number of bits to extract
00364  * @ret data            Extracted bits (or negative if not yet accumulated)
00365  */
00366 static int deflate_extract ( struct deflate *deflate, struct deflate_chunk *in,
00367                              unsigned int target ) {
00368         int excess;
00369         int data;
00370 
00371         /* Return immediately if we are attempting to extract zero bits */
00372         if ( target == 0 )
00373                 return 0;
00374 
00375         /* Attempt to accumulate bits */
00376         excess = deflate_accumulate ( deflate, in, target );
00377         if ( excess < 0 )
00378                 return excess;
00379 
00380         /* Extract data and consume bits */
00381         data = deflate_consume ( deflate, target );
00382         DBGCP ( deflate, "DEFLATE %p extracted %s = %#x = %d\n", deflate,
00383                 deflate_bin ( data, target ), data, data );
00384 
00385         return data;
00386 }
00387 
00388 /**
00389  * Attempt to decode a Huffman-coded symbol from input stream
00390  *
00391  * @v deflate           Decompressor
00392  * @v in                Compressed input data
00393  * @v alphabet          Huffman alphabet
00394  * @ret code            Raw code (or negative if not yet accumulated)
00395  */
00396 static int deflate_decode ( struct deflate *deflate,
00397                             struct deflate_chunk *in,
00398                             struct deflate_alphabet *alphabet ) {
00399         struct deflate_huf_symbols *huf_sym;
00400         uint16_t huf;
00401         unsigned int lookup_index;
00402         int excess;
00403         unsigned int raw;
00404 
00405         /* Attempt to accumulate maximum required number of bits.
00406          * There may be fewer bits than this remaining in the stream,
00407          * even if the stream still contains some complete
00408          * Huffman-coded symbols.
00409          */
00410         deflate_accumulate ( deflate, in, DEFLATE_HUFFMAN_BITS );
00411 
00412         /* Normalise the bit-reversed accumulated value to 16 bits */
00413         huf = ( deflate->rotalumucca >> 16 );
00414 
00415         /* Find symbol set for this length */
00416         lookup_index = ( huf >> DEFLATE_HUFFMAN_QL_SHIFT );
00417         huf_sym = &alphabet->huf[ alphabet->lookup[ lookup_index ] ];
00418         while ( huf < huf_sym->start )
00419                 huf_sym--;
00420 
00421         /* Calculate number of excess bits, and return if not yet complete */
00422         excess = ( deflate->bits - huf_sym->bits );
00423         if ( excess < 0 )
00424                 return excess;
00425 
00426         /* Consume bits */
00427         deflate_consume ( deflate, huf_sym->bits );
00428 
00429         /* Look up raw symbol */
00430         raw = huf_sym->raw[ huf >> huf_sym->shift ];
00431         DBGCP ( deflate, "DEFLATE %p decoded %s = %#x = %d\n", deflate,
00432                 deflate_bin ( ( huf >> huf_sym->shift ), huf_sym->bits ),
00433                 raw, raw );
00434 
00435         return raw;
00436 }
00437 
00438 /**
00439  * Discard bits up to the next byte boundary
00440  *
00441  * @v deflate           Decompressor
00442  */
00443 static void deflate_discard_to_byte ( struct deflate *deflate ) {
00444 
00445         deflate_consume ( deflate, ( deflate->bits & 7 ) );
00446 }
00447 
00448 /**
00449  * Copy data to output buffer (if available)
00450  *
00451  * @v out               Output data buffer
00452  * @v start             Source data
00453  * @v offset            Starting offset within source data
00454  * @v len               Length to copy
00455  */
00456 static void deflate_copy ( struct deflate_chunk *out,
00457                            userptr_t start, size_t offset, size_t len ) {
00458         size_t out_offset = out->offset;
00459         size_t copy_len;
00460 
00461         /* Copy data one byte at a time, to allow for overlap */
00462         if ( out_offset < out->len ) {
00463                 copy_len = ( out->len - out_offset );
00464                 if ( copy_len > len )
00465                         copy_len = len;
00466                 while ( copy_len-- ) {
00467                         memcpy_user ( out->data, out_offset++,
00468                                       start, offset++, 1 );
00469                 }
00470         }
00471         out->offset += len;
00472 }
00473 
00474 /**
00475  * Inflate compressed data
00476  *
00477  * @v deflate           Decompressor
00478  * @v in                Compressed input data
00479  * @v out               Output data buffer
00480  * @ret rc              Return status code
00481  *
00482  * The caller can use deflate_finished() to determine whether a
00483  * successful return indicates that the decompressor is merely waiting
00484  * for more input.
00485  *
00486  * Data will not be written beyond the specified end of the output
00487  * data buffer, but the offset within the output data buffer will be
00488  * updated to reflect the amount that should have been written.  The
00489  * caller can use this to find the length of the decompressed data
00490  * before allocating the output data buffer.
00491  */
00492 int deflate_inflate ( struct deflate *deflate,
00493                       struct deflate_chunk *in,
00494                       struct deflate_chunk *out ) {
00495 
00496         /* This could be implemented more neatly if gcc offered a
00497          * means for enforcing tail recursion.
00498          */
00499         if ( deflate->resume ) {
00500                 goto *(deflate->resume);
00501         } else switch ( deflate->format ) {
00502                 case DEFLATE_RAW:       goto block_header;
00503                 case DEFLATE_ZLIB:      goto zlib_header;
00504                 default:                assert ( 0 );
00505         }
00506 
00507  zlib_header: {
00508                 int header;
00509                 int cm;
00510 
00511                 /* Extract header */
00512                 header = deflate_extract ( deflate, in, ZLIB_HEADER_BITS );
00513                 if ( header < 0 ) {
00514                         deflate->resume = &&zlib_header;
00515                         return 0;
00516                 }
00517 
00518                 /* Parse header */
00519                 cm = ( ( header >> ZLIB_HEADER_CM_LSB ) & ZLIB_HEADER_CM_MASK );
00520                 if ( cm != ZLIB_HEADER_CM_DEFLATE ) {
00521                         DBGC ( deflate, "DEFLATE %p unsupported ZLIB "
00522                                "compression method %d\n", deflate, cm );
00523                         return -ENOTSUP;
00524                 }
00525                 if ( header & ( 1 << ZLIB_HEADER_FDICT_BIT ) ) {
00526                         DBGC ( deflate, "DEFLATE %p unsupported ZLIB preset "
00527                                "dictionary\n", deflate );
00528                         return -ENOTSUP;
00529                 }
00530 
00531                 /* Process first block header */
00532                 goto block_header;
00533         }
00534 
00535  block_header: {
00536                 int header;
00537                 int bfinal;
00538                 int btype;
00539 
00540                 /* Extract block header */
00541                 header = deflate_extract ( deflate, in, DEFLATE_HEADER_BITS );
00542                 if ( header < 0 ) {
00543                         deflate->resume = &&block_header;
00544                         return 0;
00545                 }
00546 
00547                 /* Parse header */
00548                 deflate->header = header;
00549                 bfinal = ( header & ( 1 << DEFLATE_HEADER_BFINAL_BIT ) );
00550                 btype = ( header >> DEFLATE_HEADER_BTYPE_LSB );
00551                 DBGC ( deflate, "DEFLATE %p found %sblock type %#x\n",
00552                        deflate, ( bfinal ? "final " : "" ), btype );
00553                 switch ( btype ) {
00554                 case DEFLATE_HEADER_BTYPE_LITERAL:
00555                         goto literal_block;
00556                 case DEFLATE_HEADER_BTYPE_STATIC:
00557                         goto static_block;
00558                 case DEFLATE_HEADER_BTYPE_DYNAMIC:
00559                         goto dynamic_block;
00560                 default:
00561                         DBGC ( deflate, "DEFLATE %p unsupported block type "
00562                                "%#x\n", deflate, btype );
00563                         return -ENOTSUP;
00564                 }
00565         }
00566 
00567  literal_block: {
00568 
00569                 /* Discard any bits up to the next byte boundary */
00570                 deflate_discard_to_byte ( deflate );
00571         }
00572 
00573  literal_len: {
00574                 int len;
00575 
00576                 /* Extract LEN field */
00577                 len = deflate_extract ( deflate, in, DEFLATE_LITERAL_LEN_BITS );
00578                 if ( len < 0 ) {
00579                         deflate->resume = &&literal_len;
00580                         return 0;
00581                 }
00582 
00583                 /* Record length of literal data */
00584                 deflate->remaining = len;
00585                 DBGC2 ( deflate, "DEFLATE %p literal block length %#04zx\n",
00586                         deflate, deflate->remaining );
00587         }
00588 
00589  literal_nlen: {
00590                 int nlen;
00591 
00592                 /* Extract NLEN field */
00593                 nlen = deflate_extract ( deflate, in, DEFLATE_LITERAL_LEN_BITS);
00594                 if ( nlen < 0 ) {
00595                         deflate->resume = &&literal_nlen;
00596                         return 0;
00597                 }
00598 
00599                 /* Verify NLEN */
00600                 if ( ( ( deflate->remaining ^ ~nlen ) &
00601                        ( ( 1 << DEFLATE_LITERAL_LEN_BITS ) - 1 ) ) != 0 ) {
00602                         DBGC ( deflate, "DEFLATE %p invalid len/nlen "
00603                                "%#04zx/%#04x\n", deflate,
00604                                deflate->remaining, nlen );
00605                         return -EINVAL;
00606                 }
00607         }
00608 
00609  literal_data: {
00610                 size_t in_remaining;
00611                 size_t len;
00612 
00613                 /* Calculate available amount of literal data */
00614                 in_remaining = ( in->len - in->offset );
00615                 len = deflate->remaining;
00616                 if ( len > in_remaining )
00617                         len = in_remaining;
00618 
00619                 /* Copy data to output buffer */
00620                 deflate_copy ( out, in->data, in->offset, len );
00621 
00622                 /* Consume data from input buffer */
00623                 in->offset += len;
00624                 deflate->remaining -= len;
00625 
00626                 /* Finish processing if we are blocked */
00627                 if ( deflate->remaining ) {
00628                         deflate->resume = &&literal_data;
00629                         return 0;
00630                 }
00631 
00632                 /* Otherwise, finish block */
00633                 goto block_done;
00634         }
00635 
00636  static_block: {
00637                 struct deflate_static_length_pattern *pattern;
00638                 uint8_t *lengths = deflate->lengths;
00639 
00640                 /* Construct static Huffman lengths as per RFC 1950 */
00641                 for ( pattern = deflate_static_length_patterns ;
00642                       pattern->count ; pattern++ ) {
00643                         memset ( lengths, pattern->fill, pattern->count );
00644                         lengths += pattern->count;
00645                 }
00646                 deflate->litlen_count = 288;
00647                 deflate->distance_count = 32;
00648                 goto construct_alphabets;
00649         }
00650 
00651  dynamic_block:
00652 
00653  dynamic_header: {
00654                 int header;
00655                 unsigned int hlit;
00656                 unsigned int hdist;
00657                 unsigned int hclen;
00658 
00659                 /* Extract block header */
00660                 header = deflate_extract ( deflate, in, DEFLATE_DYNAMIC_BITS );
00661                 if ( header < 0 ) {
00662                         deflate->resume = &&dynamic_header;
00663                         return 0;
00664                 }
00665 
00666                 /* Parse header */
00667                 hlit = ( ( header >> DEFLATE_DYNAMIC_HLIT_LSB ) &
00668                          DEFLATE_DYNAMIC_HLIT_MASK );
00669                 hdist = ( ( header >> DEFLATE_DYNAMIC_HDIST_LSB ) &
00670                           DEFLATE_DYNAMIC_HDIST_MASK );
00671                 hclen = ( ( header >> DEFLATE_DYNAMIC_HCLEN_LSB ) &
00672                           DEFLATE_DYNAMIC_HCLEN_MASK );
00673                 deflate->litlen_count = ( hlit + 257 );
00674                 deflate->distance_count = ( hdist + 1 );
00675                 deflate->length_index = 0;
00676                 deflate->length_target = ( hclen + 4 );
00677                 DBGC2 ( deflate, "DEFLATE %p dynamic block %d codelen, %d "
00678                         "litlen, %d distance\n", deflate,
00679                         deflate->length_target, deflate->litlen_count,
00680                         deflate->distance_count );
00681 
00682                 /* Prepare for decoding code length code lengths */
00683                 memset ( &deflate->lengths, 0, sizeof ( deflate->lengths ) );
00684         }
00685 
00686  dynamic_codelen: {
00687                 int len;
00688                 unsigned int index;
00689                 int rc;
00690 
00691                 /* Extract all code lengths */
00692                 while ( deflate->length_index < deflate->length_target ) {
00693 
00694                         /* Extract code length length */
00695                         len = deflate_extract ( deflate, in,
00696                                                 DEFLATE_CODELEN_BITS );
00697                         if ( len < 0 ) {
00698                                 deflate->resume = &&dynamic_codelen;
00699                                 return 0;
00700                         }
00701 
00702                         /* Store code length */
00703                         index = deflate_codelen_map[deflate->length_index++];
00704                         deflate_set_length ( deflate, index, len );
00705                         DBGCP ( deflate, "DEFLATE %p codelen for %d is %d\n",
00706                                 deflate, index, len );
00707                 }
00708 
00709                 /* Generate code length alphabet */
00710                 if ( ( rc = deflate_alphabet ( deflate,
00711                                                &deflate->distance_codelen,
00712                                                ( DEFLATE_CODELEN_MAX_CODE + 1 ),
00713                                                0 ) ) != 0 )
00714                         return rc;
00715 
00716                 /* Prepare for decoding literal/length/distance code lengths */
00717                 memset ( &deflate->lengths, 0, sizeof ( deflate->lengths ) );
00718                 deflate->length_index = 0;
00719                 deflate->length_target = ( deflate->litlen_count +
00720                                            deflate->distance_count );
00721                 deflate->length = 0;
00722         }
00723 
00724  dynamic_litlen_distance: {
00725                 int len;
00726                 int index;
00727 
00728                 /* Decode literal/length/distance code length */
00729                 len = deflate_decode ( deflate, in, &deflate->distance_codelen);
00730                 if ( len < 0 ) {
00731                         deflate->resume = &&dynamic_litlen_distance;
00732                         return 0;
00733                 }
00734 
00735                 /* Prepare for extra bits */
00736                 if ( len < 16 ) {
00737                         deflate->length = len;
00738                         deflate->extra_bits = 0;
00739                         deflate->dup_len = 1;
00740                 } else {
00741                         static const uint8_t dup_len[3] = { 3, 3, 11 };
00742                         static const uint8_t extra_bits[3] = { 2, 3, 7 };
00743                         index = ( len - 16 );
00744                         deflate->dup_len = dup_len[index];
00745                         deflate->extra_bits = extra_bits[index];
00746                         if ( index )
00747                                 deflate->length = 0;
00748                 }
00749         }
00750 
00751  dynamic_litlen_distance_extra: {
00752                 int extra;
00753                 unsigned int dup_len;
00754 
00755                 /* Extract extra bits */
00756                 extra = deflate_extract ( deflate, in, deflate->extra_bits );
00757                 if ( extra < 0 ) {
00758                         deflate->resume = &&dynamic_litlen_distance_extra;
00759                         return 0;
00760                 }
00761 
00762                 /* Store code lengths */
00763                 dup_len = ( deflate->dup_len + extra );
00764                 while ( ( deflate->length_index < deflate->length_target ) &&
00765                         dup_len-- ) {
00766                         deflate_set_length ( deflate, deflate->length_index++,
00767                                              deflate->length );
00768                 }
00769 
00770                 /* Process next literal/length or distance code
00771                  * length, if more are required.
00772                  */
00773                 if ( deflate->length_index < deflate->length_target )
00774                         goto dynamic_litlen_distance;
00775 
00776                 /* Construct alphabets */
00777                 goto construct_alphabets;
00778         }
00779 
00780  construct_alphabets: {
00781                 unsigned int distance_offset = deflate->litlen_count;
00782                 unsigned int distance_count = deflate->distance_count;
00783                 int rc;
00784 
00785                 /* Generate literal/length alphabet */
00786                 if ( ( rc = deflate_alphabet ( deflate, &deflate->litlen,
00787                                                deflate->litlen_count, 0 ) ) !=0)
00788                         return rc;
00789 
00790                 /* Handle degenerate case of a single distance code
00791                  * (for which it is impossible to construct a valid,
00792                  * complete Huffman alphabet).  RFC 1951 states:
00793                  *
00794                  *   If only one distance code is used, it is encoded
00795                  *   using one bit, not zero bits; in this case there
00796                  *   is a single code length of one, with one unused
00797                  *   code.  One distance code of zero bits means that
00798                  *   there are no distance codes used at all (the data
00799                  *   is all literals).
00800                  *
00801                  * If we have only a single distance code, then we
00802                  * instead use two distance codes both with length 1.
00803                  * This results in a valid Huffman alphabet.  The code
00804                  * "0" will mean distance code 0 (which is either
00805                  * correct or irrelevant), and the code "1" will mean
00806                  * distance code 1 (which is always irrelevant).
00807                  */
00808                 if ( deflate->distance_count == 1 ) {
00809 
00810                         deflate->lengths[0] = 0x11;
00811                         distance_offset = 0;
00812                         distance_count = 2;
00813                 }
00814 
00815                 /* Generate distance alphabet */
00816                 if ( ( rc = deflate_alphabet ( deflate,
00817                                                &deflate->distance_codelen,
00818                                                distance_count,
00819                                                distance_offset ) ) != 0 )
00820                         return rc;
00821         }
00822 
00823  lzhuf_litlen: {
00824                 int code;
00825                 uint8_t byte;
00826                 unsigned int extra;
00827                 unsigned int bits;
00828 
00829                 /* Decode Huffman codes */
00830                 while ( 1 ) {
00831 
00832                         /* Decode Huffman code */
00833                         code = deflate_decode ( deflate, in, &deflate->litlen );
00834                         if ( code < 0 ) {
00835                                 deflate->resume = &&lzhuf_litlen;
00836                                 return 0;
00837                         }
00838 
00839                         /* Handle according to code type */
00840                         if ( code < DEFLATE_LITLEN_END ) {
00841 
00842                                 /* Literal value: copy to output buffer */
00843                                 byte = code;
00844                                 DBGCP ( deflate, "DEFLATE %p literal %#02x "
00845                                         "('%c')\n", deflate, byte,
00846                                         ( isprint ( byte ) ? byte : '.' ) );
00847                                 deflate_copy ( out, virt_to_user ( &byte ), 0,
00848                                                sizeof ( byte ) );
00849 
00850                         } else if ( code == DEFLATE_LITLEN_END ) {
00851 
00852                                 /* End of block */
00853                                 goto block_done;
00854 
00855                         } else {
00856 
00857                                 /* Length code: process extra bits */
00858                                 extra = ( code - DEFLATE_LITLEN_END - 1 );
00859                                 if ( extra < 28 ) {
00860                                         bits = ( extra / 4 );
00861                                         if ( bits )
00862                                                 bits--;
00863                                         deflate->extra_bits = bits;
00864                                         deflate->dup_len =
00865                                                 deflate_litlen_base[extra];
00866                                 } else {
00867                                         deflate->extra_bits = 0;
00868                                         deflate->dup_len = 258;
00869                                 }
00870                                 goto lzhuf_litlen_extra;
00871                         }
00872                 }
00873         }
00874 
00875  lzhuf_litlen_extra: {
00876                 int extra;
00877 
00878                 /* Extract extra bits */
00879                 extra = deflate_extract ( deflate, in, deflate->extra_bits );
00880                 if ( extra < 0 ) {
00881                         deflate->resume = &&lzhuf_litlen_extra;
00882                         return 0;
00883                 }
00884 
00885                 /* Update duplicate length */
00886                 deflate->dup_len += extra;
00887         }
00888 
00889  lzhuf_distance: {
00890                 int code;
00891                 unsigned int extra;
00892                 unsigned int bits;
00893 
00894                 /* Decode Huffman code */
00895                 code = deflate_decode ( deflate, in,
00896                                         &deflate->distance_codelen );
00897                 if ( code < 0 ) {
00898                         deflate->resume = &&lzhuf_distance;
00899                         return 0;
00900                 }
00901 
00902                 /* Process extra bits */
00903                 extra = code;
00904                 bits = ( extra / 2 );
00905                 if ( bits )
00906                         bits--;
00907                 deflate->extra_bits = bits;
00908                 deflate->dup_distance = deflate_distance_base[extra];
00909         }
00910 
00911  lzhuf_distance_extra: {
00912                 int extra;
00913                 size_t dup_len;
00914                 size_t dup_distance;
00915 
00916                 /* Extract extra bits */
00917                 extra = deflate_extract ( deflate, in, deflate->extra_bits );
00918                 if ( extra < 0 ) {
00919                         deflate->resume = &&lzhuf_distance_extra;
00920                         return 0;
00921                 }
00922 
00923                 /* Update duplicate distance */
00924                 dup_distance = ( deflate->dup_distance + extra );
00925                 dup_len = deflate->dup_len;
00926                 DBGCP ( deflate, "DEFLATE %p duplicate length %zd distance "
00927                         "%zd\n", deflate, dup_len, dup_distance );
00928 
00929                 /* Sanity check */
00930                 if ( dup_distance > out->offset ) {
00931                         DBGC ( deflate, "DEFLATE %p bad distance %zd (max "
00932                                "%zd)\n", deflate, dup_distance, out->offset );
00933                         return -EINVAL;
00934                 }
00935 
00936                 /* Copy data, allowing for overlap */
00937                 deflate_copy ( out, out->data, ( out->offset - dup_distance ),
00938                                dup_len );
00939 
00940                 /* Process next literal/length symbol */
00941                 goto lzhuf_litlen;
00942         }
00943 
00944  block_done: {
00945 
00946                 DBGCP ( deflate, "DEFLATE %p end of block\n", deflate );
00947 
00948                 /* If this was not the final block, process next block header */
00949                 if ( ! ( deflate->header & ( 1 << DEFLATE_HEADER_BFINAL_BIT ) ))
00950                         goto block_header;
00951 
00952                 /* Otherwise, process footer (if any) */
00953                 switch ( deflate->format ) {
00954                 case DEFLATE_RAW:       goto finished;
00955                 case DEFLATE_ZLIB:      goto zlib_footer;
00956                 default:                assert ( 0 );
00957                 }
00958         }
00959 
00960  zlib_footer: {
00961 
00962                 /* Discard any bits up to the next byte boundary */
00963                 deflate_discard_to_byte ( deflate );
00964         }
00965 
00966  zlib_adler32: {
00967                 int excess;
00968 
00969                 /* Accumulate the 32 bits of checksum.  We don't check
00970                  * the value, stop processing immediately afterwards,
00971                  * and so don't have to worry about the nasty corner
00972                  * cases involved in calling deflate_extract() to
00973                  * obtain a full 32 bits.
00974                  */
00975                 excess = deflate_accumulate ( deflate, in, ZLIB_ADLER32_BITS );
00976                 if ( excess < 0 ) {
00977                         deflate->resume = &&zlib_adler32;
00978                         return 0;
00979                 }
00980 
00981                 /* Finish processing */
00982                 goto finished;
00983         }
00984 
00985  finished: {
00986                 /* Mark as finished and terminate */
00987                 DBGCP ( deflate, "DEFLATE %p finished\n", deflate );
00988                 deflate->resume = NULL;
00989                 return 0;
00990         }
00991 }
00992 
00993 /**
00994  * Initialise decompressor
00995  *
00996  * @v deflate           Decompressor
00997  * @v format            Compression format code
00998  */
00999 void deflate_init ( struct deflate *deflate, enum deflate_format format ) {
01000         static int global_init_done;
01001         uint8_t i;
01002         uint8_t bit;
01003         uint8_t byte;
01004         unsigned int base;
01005         unsigned int bits;
01006 
01007         /* Perform global initialisation if required */
01008         if ( ! global_init_done ) {
01009 
01010                 /* Initialise byte reversal table */
01011                 for ( i = 255 ; i ; i-- ) {
01012                         for ( bit = 1, byte = 0 ; bit ; bit <<= 1 ) {
01013                                 byte <<= 1;
01014                                 if ( i & bit )
01015                                         byte |= 1;
01016                         }
01017                         deflate_reverse[i] = byte;
01018                 }
01019 
01020                 /* Initialise literal/length extra bits table */
01021                 base = 3;
01022                 for ( i = 0 ; i < 28 ; i++ ) {
01023                         bits = ( i / 4 );
01024                         if ( bits )
01025                                 bits--;
01026                         deflate_litlen_base[i] = base;
01027                         base += ( 1 << bits );
01028                 }
01029                 assert ( base == 259 ); /* sic */
01030 
01031                 /* Initialise distance extra bits table */
01032                 base = 1;
01033                 for ( i = 0 ; i < 30 ; i++ ) {
01034                         bits = ( i / 2 );
01035                         if ( bits )
01036                                 bits--;
01037                         deflate_distance_base[i] = base;
01038                         base += ( 1 << bits );
01039                 }
01040                 assert ( base == 32769 );
01041 
01042                 /* Record global initialisation as complete */
01043                 global_init_done = 1;
01044         }
01045 
01046         /* Initialise structure */
01047         memset ( deflate, 0, sizeof ( *deflate ) );
01048         deflate->format = format;
01049 }