/* * Copyright (C) 2014 Michael Brown . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ /** * @file * * LZX decompression * * This algorithm is derived jointly from the document "[MS-PATCH]: * LZX DELTA Compression and Decompression", available from * * http://msdn.microsoft.com/en-us/library/cc483133.aspx * * and from the file lzx-decompress.c in the wimlib source code. * */ #include "wimboot.h" #include "huffman.h" #include "lzx.h" /** Base positions, indexed by position slot */ static unsigned int lzx_position_base[LZX_POSITION_SLOTS]; /** * Attempt to accumulate bits from LZX bitstream * * @v lzx Decompressor * @v bits Number of bits to accumulate * @v norm_value Accumulated value (normalised to 16 bits) * * Note that there may not be sufficient accumulated bits in the * bitstream; callers must check that sufficient bits are available * before using the value. */ static int lzx_accumulate ( struct lzx *lzx, unsigned int bits ) { const uint16_t *src16; /* Accumulate more bits if required */ if ( ( lzx->bits < bits ) && ( lzx->input.offset < lzx->input.len ) ) { src16 = (const uint16_t *)( ( char * ) lzx->input.data + lzx->input.offset ); lzx->input.offset += sizeof ( *src16 ); lzx->accumulator |= ( *src16 << ( 16 - lzx->bits ) ); lzx->bits += 16; } return ( lzx->accumulator >> 16 ); } /** * Consume accumulated bits from LZX bitstream * * @v lzx Decompressor * @v bits Number of bits to consume * @ret rc Return status code */ static int lzx_consume ( struct lzx *lzx, unsigned int bits ) { /* Fail if insufficient bits are available */ if ( lzx->bits < bits ) { DBG ( "LZX input overrun in %#zx/%#zx out %#zx)\n", lzx->input.offset, lzx->input.len, lzx->output.offset ); return -1; } /* Consume bits */ lzx->accumulator <<= bits; lzx->bits -= bits; return 0; } /** * Get bits from LZX bitstream * * @v lzx Decompressor * @v bits Number of bits to fetch * @ret value Value, or negative error */ static int lzx_getbits ( struct lzx *lzx, unsigned int bits ) { int norm_value; int rc; /* Accumulate more bits if required */ norm_value = lzx_accumulate ( lzx, bits ); /* Consume bits */ if ( ( rc = lzx_consume ( lzx, bits ) ) != 0 ) return rc; return ( norm_value >> ( 16 - bits ) ); } /** * Align LZX bitstream for byte access * * @v lzx Decompressor * @v bits Minimum number of padding bits * @ret rc Return status code */ static int lzx_align ( struct lzx *lzx, unsigned int bits ) { int pad; /* Get padding bits */ pad = lzx_getbits ( lzx, bits ); if ( pad < 0 ) return pad; /* Consume all accumulated bits */ lzx_consume ( lzx, lzx->bits ); return 0; } /** * Get bytes from LZX bitstream * * @v lzx Decompressor * @v data Data buffer, or NULL * @v len Length of data buffer * @ret rc Return status code */ static int lzx_getbytes ( struct lzx *lzx, void *data, size_t len ) { /* Sanity check */ if ( ( lzx->input.offset + len ) > lzx->input.len ) { DBG ( "LZX input overrun in %#zx/%#zx out %#zx)\n", lzx->input.offset, lzx->input.len, lzx->output.offset ); return -1; } /* Copy data */ if ( data ) memcpy ( data, ( lzx->input.data + lzx->input.offset ), len ); lzx->input.offset += len; return 0; } /** * Decode LZX Huffman-coded symbol * * @v lzx Decompressor * @v alphabet Huffman alphabet * @ret raw Raw symbol, or negative error */ static int lzx_decode ( struct lzx *lzx, struct huffman_alphabet *alphabet ) { struct huffman_symbols *sym; int huf; int rc; /* Accumulate sufficient bits */ huf = lzx_accumulate ( lzx, HUFFMAN_BITS ); if ( huf < 0 ) return huf; /* Decode symbol */ sym = huffman_sym ( alphabet, huf ); /* Consume bits */ if ( ( rc = lzx_consume ( lzx, huffman_len ( sym ) ) ) != 0 ) return rc; return huffman_raw ( sym, huf ); } /** * Generate Huffman alphabet from raw length table * * @v lzx Decompressor * @v count Number of symbols * @v bits Length of each length (in bits) * @v lengths Lengths table to fill in * @v alphabet Huffman alphabet to fill in * @ret rc Return status code */ static int lzx_raw_alphabet ( struct lzx *lzx, unsigned int count, unsigned int bits, uint8_t *lengths, struct huffman_alphabet *alphabet ) { unsigned int i; int len; int rc; /* Read lengths */ for ( i = 0 ; i < count ; i++ ) { len = lzx_getbits ( lzx, bits ); if ( len < 0 ) return len; lengths[i] = len; } /* Generate Huffman alphabet */ if ( ( rc = huffman_alphabet ( alphabet, lengths, count ) ) != 0 ) return rc; return 0; } /** * Generate pretree * * @v lzx Decompressor * @v count Number of symbols * @v lengths Lengths table to fill in * @ret rc Return status code */ static int lzx_pretree ( struct lzx *lzx, unsigned int count, uint8_t *lengths ) { unsigned int i; unsigned int length; int dup = 0; int code; int rc; /* Generate pretree alphabet */ if ( ( rc = lzx_raw_alphabet ( lzx, LZX_PRETREE_CODES, LZX_PRETREE_BITS, lzx->pretree_lengths, &lzx->pretree ) ) != 0 ) return rc; /* Read lengths */ for ( i = 0 ; i < count ; i++ ) { if ( dup ) { /* Duplicate previous length */ lengths[i] = lengths[ i - 1 ]; dup--; } else { /* Get next code */ code = lzx_decode ( lzx, &lzx->pretree ); if ( code < 0 ) return code; /* Interpret code */ if ( code <= 16 ) { length = ( ( lengths[i] - code + 17 ) % 17 ); } else if ( code == 17 ) { length = 0; dup = lzx_getbits ( lzx, 4 ); if ( dup < 0 ) return dup; dup += 3; } else if ( code == 18 ) { length = 0; dup = lzx_getbits ( lzx, 5 ); if ( dup < 0 ) return dup; dup += 19; } else if ( code == 19 ) { length = 0; dup = lzx_getbits ( lzx, 1 ); if ( dup < 0 ) return dup; dup += 3; code = lzx_decode ( lzx, &lzx->pretree ); if ( code < 0 ) return code; length = ( ( lengths[i] - code + 17 ) % 17 ); } else { DBG ( "Unrecognised pretree code %d\n", code ); return -1; } lengths[i] = length; } } /* Sanity check */ if ( dup ) { DBG ( "Pretree duplicate overrun\n" ); return -1; } return 0; } /** * Generate aligned offset Huffman alphabet * * @v lzx Decompressor * @ret rc Return status code */ static int lzx_alignoffset_alphabet ( struct lzx *lzx ) { int rc; /* Generate aligned offset alphabet */ if ( ( rc = lzx_raw_alphabet ( lzx, LZX_ALIGNOFFSET_CODES, LZX_ALIGNOFFSET_BITS, lzx->alignoffset_lengths, &lzx->alignoffset ) ) != 0 ) return rc; return 0; } /** * Generate main Huffman alphabet * * @v lzx Decompressor * @ret rc Return status code */ static int lzx_main_alphabet ( struct lzx *lzx ) { int rc; /* Generate literal symbols pretree */ if ( ( rc = lzx_pretree ( lzx, LZX_MAIN_LIT_CODES, lzx->main_lengths.literals ) ) != 0 ) { DBG ( "Could not construct main literal pretree\n" ); return rc; } /* Generate remaining symbols pretree */ if ( ( rc = lzx_pretree ( lzx, ( LZX_MAIN_CODES - LZX_MAIN_LIT_CODES ), lzx->main_lengths.remainder ) ) != 0 ) { DBG ( "Could not construct main remainder pretree\n" ); return rc; } /* Generate Huffman alphabet */ if ( ( rc = huffman_alphabet ( &lzx->main, lzx->main_lengths.literals, LZX_MAIN_CODES ) ) != 0 ) { DBG ( "Could not generate main alphabet\n" ); return rc; } return 0; } /** * Generate length Huffman alphabet * * @v lzx Decompressor * @ret rc Return status code */ static int lzx_length_alphabet ( struct lzx *lzx ) { int rc; /* Generate pretree */ if ( ( rc = lzx_pretree ( lzx, LZX_LENGTH_CODES, lzx->length_lengths ) ) != 0 ) { DBG ( "Could not generate length pretree\n" ); return rc; } /* Generate Huffman alphabet */ if ( ( rc = huffman_alphabet ( &lzx->length, lzx->length_lengths, LZX_LENGTH_CODES ) ) != 0 ) { DBG ( "Could not generate length alphabet\n" ); return rc; } return 0; } /** * Process LZX block header * * @v lzx Decompressor * @ret rc Return status code */ static int lzx_block_header ( struct lzx *lzx ) { size_t block_len; int block_type; int default_len; int len_high; int len_low; int rc; /* Get block type */ block_type = lzx_getbits ( lzx, LZX_BLOCK_TYPE_BITS ); if ( block_type < 0 ) return block_type; lzx->block_type = block_type; /* Check block length */ default_len = lzx_getbits ( lzx, 1 ); if ( default_len < 0 ) return default_len; if ( default_len ) { block_len = LZX_DEFAULT_BLOCK_LEN; } else { len_high = lzx_getbits ( lzx, 8 ); if ( len_high < 0 ) return len_high; len_low = lzx_getbits ( lzx, 8 ); if ( len_low < 0 ) return len_low; block_len = ( ( len_high << 8 ) | len_low ); } lzx->output.threshold = ( lzx->output.offset + block_len ); /* Handle block type */ switch ( block_type ) { case LZX_BLOCK_ALIGNOFFSET : /* Generated aligned offset alphabet */ if ( ( rc = lzx_alignoffset_alphabet ( lzx ) ) != 0 ) return rc; /* Fall through */ case LZX_BLOCK_VERBATIM : /* Generate main alphabet */ if ( ( rc = lzx_main_alphabet ( lzx ) ) != 0 ) return rc; /* Generate lengths alphabet */ if ( ( rc = lzx_length_alphabet ( lzx ) ) != 0 ) return rc; break; case LZX_BLOCK_UNCOMPRESSED : /* Align input stream */ if ( ( rc = lzx_align ( lzx, 1 ) ) != 0 ) return rc; /* Read new repeated offsets */ if ( ( rc = lzx_getbytes ( lzx, &lzx->repeated_offset, sizeof ( lzx->repeated_offset )))!=0) return rc; break; default: DBG ( "Unrecognised block type %d\n", block_type ); return -1; } return 0; } /** * Process uncompressed data * * @v lzx Decompressor * @ret rc Return status code */ static int lzx_uncompressed ( struct lzx *lzx ) { void *data; size_t len; int rc; /* Copy bytes */ data = ( lzx->output.data ? ( lzx->output.data + lzx->output.offset ) : NULL ); len = ( lzx->output.threshold - lzx->output.offset ); if ( ( rc = lzx_getbytes ( lzx, data, len ) ) != 0 ) return rc; /* Align input stream */ if ( len % 2 ) lzx->input.offset++; return 0; } /** * Process an LZX token * * @v lzx Decompressor * @ret rc Return status code * * Variable names are chosen to match the LZX specification * pseudo-code. */ static int lzx_token ( struct lzx *lzx ) { unsigned int length_header; unsigned int position_slot; unsigned int offset_bits; unsigned int i; size_t match_offset; size_t match_length; int verbatim_bits; int aligned_bits; int maindata; int length; uint8_t *copy; /* Get maindata symelse*/ maindata = lzx_decode ( lzx, &lzx->main ); if ( maindata < 0 ) return maindata; /* Check for literals */ if ( maindata < LZX_MAIN_LIT_CODES ) { if ( lzx->output.data ) lzx->output.data[lzx->output.offset] = maindata; lzx->output.offset++; return 0; } maindata -= LZX_MAIN_LIT_CODES; /* Calculate the match length */ length_header = ( maindata & 7 ); if ( length_header == 7 ) { length = lzx_decode ( lzx, &lzx->length ); if ( length < 0 ) return length; } else { length = 0; } match_length = ( length_header + 2 + length ); /* Calculate the position slot */ position_slot = ( maindata >> 3 ); if ( position_slot < LZX_REPEATED_OFFSETS ) { /* Repeated offset */ match_offset = lzx->repeated_offset[position_slot]; lzx->repeated_offset[position_slot] = lzx->repeated_offset[0]; lzx->repeated_offset[0] = match_offset; } else { /* Non-repeated offset */ offset_bits = lzx_footer_bits ( position_slot ); if ( ( lzx->block_type == LZX_BLOCK_ALIGNOFFSET ) && ( offset_bits >= 3 ) ) { verbatim_bits = lzx_getbits ( lzx, ( offset_bits - 3 )); if ( verbatim_bits < 0 ) return verbatim_bits; verbatim_bits <<= 3; aligned_bits = lzx_decode ( lzx, &lzx->alignoffset ); if ( aligned_bits < 0 ) return aligned_bits; } else { verbatim_bits = lzx_getbits ( lzx, offset_bits ); if ( verbatim_bits < 0 ) return verbatim_bits; aligned_bits = 0; } match_offset = ( lzx_position_base[position_slot] + verbatim_bits + aligned_bits - 2 ); /* Update repeated offset list */ for ( i = ( LZX_REPEATED_OFFSETS - 1 ) ; i > 0 ; i-- ) lzx->repeated_offset[i] = lzx->repeated_offset[ i - 1 ]; lzx->repeated_offset[0] = match_offset; } /* Copy data */ if ( match_offset > lzx->output.offset ) { DBG ( "LZX match underrun out 0x%x offset 0x%x len 0x%x\n", lzx->output.offset, match_offset, match_length ); return -1; } if ( lzx->output.data ) { copy = &lzx->output.data[lzx->output.offset]; for ( i = 0 ; i < match_length ; i++ ) copy[i] = copy[ i - match_offset ]; } lzx->output.offset += match_length; return 0; } /** * Translate E8 jump addresses * * @v lzx Decompressor */ static void lzx_translate_jumps ( struct lzx *lzx ) { size_t offset; int32_t *target; /* Sanity check */ if ( lzx->output.offset < 10 ) return; /* Scan for jump instructions */ for ( offset = 0 ; offset < ( lzx->output.offset - 10 ) ; offset++ ) { /* Check for jump instruction */ if ( lzx->output.data[offset] != 0xe8 ) continue; /* Translate jump target */ target = ( ( int32_t * ) &lzx->output.data[ offset + 1 ] ); if ( *target >= 0 ) { if ( *target < LZX_WIM_MAGIC_FILESIZE ) *target -= offset; } else { if ( *target >= -( ( int32_t ) offset ) ) *target += LZX_WIM_MAGIC_FILESIZE; } offset += sizeof ( *target ); } } /** * Decompress LZX-compressed data * * @v data Compressed data * @v len Length of compressed data * @v buf Decompression buffer, or NULL * @ret out_len Length of decompressed data, or negative error */ ssize_t lzx_decompress ( const void *data, size_t len, void *buf ) { struct lzx lzx; unsigned int i; int rc; /* Sanity check */ if ( len % 2 ) { DBG ( "LZX cannot handle odd-length input data\n" ); return -1; } /* Initialise global state, if required */ if ( ! lzx_position_base[ LZX_POSITION_SLOTS - 1 ] ) { for ( i = 1 ; i < LZX_POSITION_SLOTS ; i++ ) { lzx_position_base[i] = ( lzx_position_base[i-1] + ( 1 << lzx_footer_bits ( i - 1 ) ) ); } } /* Initialise decompressor */ memset ( &lzx, 0, sizeof ( lzx ) ); lzx.input.data = data; lzx.input.len = len; lzx.output.data = buf; for ( i = 0 ; i < LZX_REPEATED_OFFSETS ; i++ ) lzx.repeated_offset[i] = 1; /* Process blocks */ while ( lzx.input.offset < lzx.input.len ) { /* Process block header */ if ( ( rc = lzx_block_header ( &lzx ) ) != 0 ) return rc; /* Process block contents */ if ( lzx.block_type == LZX_BLOCK_UNCOMPRESSED ) { /* Copy uncompressed data */ if ( ( rc = lzx_uncompressed ( &lzx ) ) != 0 ) return rc; } else { /* Process token stream */ while ( lzx.output.offset < lzx.output.threshold ) { if ( ( rc = lzx_token ( &lzx ) ) != 0 ) return rc; } } } /* Postprocess to undo E8 jump compression */ if ( lzx.output.data ) lzx_translate_jumps ( &lzx ); return lzx.output.offset; }