From: Robert Pengelly Date: Wed, 1 Oct 2025 11:26:14 +0000 (+0100) Subject: Initial commit X-Git-Url: https://git.candlhat.org/?a=commitdiff_plain;h=4dbafc573af987e4650488327d01b5f0cfa2a686;p=unzip.git Initial commit --- 4dbafc573af987e4650488327d01b5f0cfa2a686 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fdddb29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/Makefile.unix b/Makefile.unix new file mode 100644 index 0000000..2c97be1 --- /dev/null +++ b/Makefile.unix @@ -0,0 +1,27 @@ +#****************************************************************************** +# @file Makefile.unix +#****************************************************************************** +SRCDIR ?= $(CURDIR) +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -Wall -Werror -Wextra -std=c90 + +ifeq ($(OS), Windows_NT) +all: unzip.exe + +unzip.exe: unzip.c bitstream.c huffman.c inflate.c lib.c lz77.c report.c tables.c vector.c + + $(CC) $(CFLAGS) -o $@ $^ +else +all: unzip + +unzip: unzip.c bitstream.c huffman.c inflate.c lib.c lz77.c report.c tables.c vector.c + + $(CC) $(CFLAGS) -o $@ $^ +endif + +clean: + + if [ -f unzip.exe ]; then rm -rf unzip.exe; fi + if [ -f unzip ]; then rm -rf unzip; fi diff --git a/Makefile.w32 b/Makefile.w32 new file mode 100644 index 0000000..875d20c --- /dev/null +++ b/Makefile.w32 @@ -0,0 +1,18 @@ +#****************************************************************************** +# @file Makefile.w32 +#****************************************************************************** +SRCDIR ?= $(CURDIR) +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -Wall -Werror -Wextra -std=c90 + +all: unzip.exe + +clean: + + if exist unzip.exe ( del /q unzip.exe ) + if exist unzip ( del /q unzip ) + +unzip.exe: unzip.c bitstream.c huffman.c inflate.c lib.c lz77.c report.c tables.c vector.c + $(CC) $(CFLAGS) -o $@ $^ diff --git a/README.md b/README.md new file mode 100644 index 0000000..85a05d0 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +All source code is Public Domain. + +## Obtain the source code + + git clone https://git.candlhat.org/unzip.git + +## Building + + BSD: + + Make sure you have gcc and gmake installed then run gmake -f Makefile.unix. + + Linux: + + Make sure you have gcc and make installed then run make -f Makefile.unix. + + macOS: + + Make sure you have xcode command line tools installed then run make -f Makefile.unix. + + Windows: + + Make sure you have mingw installed and the location within your PATH variable then run mingw32-make.exe -f Makefile.w32. diff --git a/bitstream.c b/bitstream.c new file mode 100644 index 0000000..18f8113 --- /dev/null +++ b/bitstream.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * @file bitstream.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "bitstream.h" +#include "lib.h" +#include "stdint.h" + +int istream_init (istream_t *is, unsigned char *s, uint64_t n) { + + is->end = (is->src = s) + n; + + is->bitpos = 0; + is->bitpos_end = n * 8; + + return 0; + +} + +int istream_advance (istream_t *is, uint64_t n) { + + assert (is->bitpos <= is->bitpos_end); + + if (is->bitpos_end - is->bitpos < n) { + return 0; + } + + is->bitpos += n; + return 1; + +} + +unsigned char *istream_byte_align (istream_t *is) { + + unsigned char *byte; + + assert (is->bitpos <= is->bitpos_end && "not past end of stream"); + is->bitpos = round_up (is->bitpos, 8); + + byte = is->src + (is->bitpos / 8); + assert (byte <= is->end); + + return byte; + +} + +uint64_t istream_bits (istream_t *is) { + + unsigned char *next; + uint64_t bits, i; + +#ifdef NO_LONG_LONG + int cnt = 4; +#else + int cnt = 8; +#endif + + assert ((next = is->src + (is->bitpos / 8)) <= is->end && "cannot read past end of stream"); + + if (is->end - next >= cnt) { + + /* Common case: read 4 bytes in one go. */ + bits = array_to_integer (next, cnt, 0); + + } else { + + /* Read the available bits and zero-pad. */ + bits = 0; + + for (i = 0; i < (uint64_t) (is->end - next); i++) { + bits |= (uint64_t) next[i] << (i * CHAR_BIT); + } + + } + + return bits >> (is->bitpos % 8); + +} + +uint64_t istream_bytes_read (istream_t *is) { + return round_up (is->bitpos, 8) / 8; +} diff --git a/bitstream.h b/bitstream.h new file mode 100644 index 0000000..49c2fe4 --- /dev/null +++ b/bitstream.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * @file bitstream.h + *****************************************************************************/ +#ifndef _BITSTREAM_H +#define _BITSTREAM_H + +#include "stdint.h" + +/* Input bitstream. */ +typedef struct { + + unsigned char *src; /* Source bytes. */ + unsigned char *end; /* Past-the-end byte of src*/ + + uint64_t bitpos; /* Position of the next bit to read. */ + uint64_t bitpos_end; /* Position of past-the-end bit. */ + +} istream_t; + +#ifdef NO_LONG_LONG +# define ISTREAM_MIN_BITS (32 - 7) +#else +# define ISTREAM_MIN_BITS (64 - 7) +#endif + +unsigned char *istream_byte_align (istream_t *is); + +int istream_init (istream_t *is, unsigned char *s, uint64_t n); +int istream_advance (istream_t *is, uint64_t n); + +uint64_t istream_bits (istream_t *is); +uint64_t istream_bytes_read (istream_t *is); + +#endif /* _BITSTREAM_H */ diff --git a/huffman.c b/huffman.c new file mode 100644 index 0000000..319e9f3 --- /dev/null +++ b/huffman.c @@ -0,0 +1,168 @@ +/****************************************************************************** + * @file huffman.c + *****************************************************************************/ +#include +#include + +#include "huffman.h" +#include "lib.h" +#include "stdint.h" +#include "tables.h" + +static uint16_t reverse16 (uint16_t x, int n) { + + uint16_t reversed, lo, hi; + + assert (n > 0); + assert (n <= 16); + + lo = x & UCHAR_MAX; + hi = x >> CHAR_BIT; + + reversed = (uint16_t) ((reverse8_tbl[lo] << CHAR_BIT) | reverse8_tbl[hi]); + return reversed >> (16 - n); + +} + +static void table_insert (huffman_decoder_t *d, uint64_t sym, uint64_t len, uint16_t codeword) { + + uint16_t padding, index; + int pad_len; + + assert (len <= HUFFMAN_LOOKUP_TABLE_BITS); + + codeword = reverse16 (codeword, len); + pad_len = HUFFMAN_LOOKUP_TABLE_BITS - len; + + /* Pad the pad_len upper bits with all bit combinations. */ + for (padding = 0; padding < (1U << pad_len); padding++) { + + index = (uint16_t) (codeword | (padding << len)); + + d->table[index].sym = (uint16_t) sym; + d->table[index].len = (uint16_t) len; + + assert (d->table[index].sym == sym && "fits in bitfield"); + assert (d->table[index].len == len && "fits in bitfield"); + + } + +} + +int huffman_decoder_init (huffman_decoder_t *d, unsigned char *lengths, uint64_t n) { + + uint16_t count[MAX_HUFFMAN_BITS + 1] = { 0 }; + uint16_t code[MAX_HUFFMAN_BITS + 1]; + uint16_t sym_idx[MAX_HUFFMAN_BITS + 1]; + + uint64_t i, l; + uint32_t s; + + assert (n <= MAX_HUFFMAN_SYMBOLS); + d->num_syms = n; + + /* Zero-initialize the lookup table. */ + for (i = 0; i < sizeof (d->table) / sizeof (d->table[0]); i++) { + d->table[i].len = 0; + } + + /* Count the number of codewords of each length. */ + for (i = 0; i < n; i++) { + + assert (lengths[i] <= MAX_HUFFMAN_BITS); + count[lengths[i]]++; + + } + + count[0] = 0; /* Ignore zero-length codeword. */ + + /* Compute sentinel bits and offset first sym_idx for each length. */ + code[0] = 0; + sym_idx[0] = 0; + + for (l = 1; l <= MAX_HUFFMAN_BITS; l++) { + + /* First canonical codeword of this length. */ + code[l] = (uint16_t) ((code[l - 1] + count[l - 1]) << 1); + + if (count[l] != 0 && code[l] + count[l] - 1 > (1 << l) - 1) { + + /* The last codeword is longer than l bits. */ + return 0; + + } + + s = (uint32_t) ((code[l] + count[l]) << (MAX_HUFFMAN_BITS - l)); + + d->sentinel_bits[l] = s; + assert (d->sentinel_bits[l] >= code[l] && "no overflow!"); + + sym_idx[l] = sym_idx[l - 1] + count[l - 1]; + d->offset_first_sym_idx[l] = sym_idx[l] - code[l]; + + } + + /* Build mapping from index to symbol and populate the loopup table. */ + for (i = 0; i < n; i++) { + + if ((l = lengths[i]) == 0) { + continue; + } + + d->syms[sym_idx[l]] = (uint16_t) i; + sym_idx[l]++; + + if (l <= HUFFMAN_LOOKUP_TABLE_BITS) { + + table_insert (d, i, l, code[l]); + code[l]++; + + } + + } + + return 1; + +} + +int huffman_decode (const huffman_decoder_t *d, uint16_t bits, uint64_t *num_used_bits) { + + uint64_t lookup_bits, sym_idx, l; + + /* First try the lookup table. */ + lookup_bits = lsb (bits, HUFFMAN_LOOKUP_TABLE_BITS); + assert (lookup_bits < sizeof (d->table) / sizeof (d->table[0])); + + if (d->table[lookup_bits].len != 0) { + + assert (d->table[lookup_bits].len <= HUFFMAN_LOOKUP_TABLE_BITS); + assert (d->table[lookup_bits].sym < d->num_syms); + + *num_used_bits = d->table[lookup_bits].len; + return d->table[lookup_bits].sym; + + } + + /* Then do canonical decoding with the bits in MSB-first order. */ + bits = reverse16 (bits, MAX_HUFFMAN_BITS); + + for (l = HUFFMAN_LOOKUP_TABLE_BITS + 1; l <= MAX_HUFFMAN_BITS; l++) { + + if (bits < d->sentinel_bits[l]) { + + bits >>= MAX_HUFFMAN_BITS - l; + + sym_idx = (uint16_t) (d->offset_first_sym_idx[l] + bits); + assert (sym_idx < d->num_syms); + + *num_used_bits = l; + return d->syms[sym_idx]; + + } + + } + + *num_used_bits = 0; + return -1; + +} diff --git a/huffman.h b/huffman.h new file mode 100644 index 0000000..8a8c881 --- /dev/null +++ b/huffman.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * @file huffman.h + *****************************************************************************/ +#ifndef _HUFFMAN_H +#define _HUFFMAN_H + +#include "stdint.h" + +#define MAX_HUFFMAN_SYMBOLS 288 /* Deflate uses max 288 symbols. */ +#define MAX_HUFFMAN_BITS 16 /* Implode uses max 16-bit codewords. */ +#define HUFFMAN_LOOKUP_TABLE_BITS 8 /* Seems a good trade-off. */ + +typedef struct { + + uint64_t num_syms; + + /* Lookup table for fast deocding of short codewords. */ + struct { + + uint16_t sym : 9; /* Wide enough to fit the max symbol mbr. */ + uint16_t len : 7; /* 0 means no symbol. */ + + } table[1U << HUFFMAN_LOOKUP_TABLE_BITS]; + + /* "Sentinel bits" value for each codeword length. */ + uint32_t sentinel_bits[MAX_HUFFMAN_BITS + 1]; + + /* First symbol index minus first codeword mod 2**16 for each length. */ + uint16_t offset_first_sym_idx[MAX_HUFFMAN_BITS + 1]; + + /* Map from symbol index to symbol. */ + uint16_t syms[MAX_HUFFMAN_SYMBOLS]; + +} huffman_decoder_t; + +/** + * Initialize huffman decoder d for a code defined by the n codeword lengths. + * Returns false if the codeword lengths do not correspond to a valid prefix code. + */ +int huffman_decoder_init (huffman_decoder_t *d, unsigned char *lengths, uint64_t n); + +/** + * Use the decoder d to decode a symbol from the LSB-first zero-padded bits. + * Returns the decoded symbol number or -1 if no symbol could be decoded. + * *num_used_bits will be set to the number of bits used to decode the symbol, or zero if no symbol could be decoded. + */ +int huffman_decode (const huffman_decoder_t *d, uint16_t bits, uint64_t *num_used_bits); + +#endif /* _HUFFMAN_H */ diff --git a/inflate.c b/inflate.c new file mode 100644 index 0000000..eebdd2b --- /dev/null +++ b/inflate.c @@ -0,0 +1,531 @@ +/****************************************************************************** + * @file inflate.c + *****************************************************************************/ +#include +#include +#include + +#include "bitstream.h" +#include "huffman.h" +#include "lib.h" +#include "lz77.h" +#include "stdint.h" +#include "tables.h" +#include "unzip.h" + +#define LITLEN_TBL_OFFSET 257 + +#define LITLEN_MAX 285 +#define LITLEN_EOB 256 + +#define MAX_LEN 258 +#define MIN_LEN 3 + +#define DISTSYM_MAX 29 + +#define MAX_DISTANCE 32768 +#define MIN_DISTANCE 1 + +#define MAX_CODELEN_LENS 19 +#define MIN_CODELEN_LENS 4 + +#define MAX_DIST_LENS 32 +#define MIN_DIST_LENS 1 + +#define MAX_LITLEN_LENS 288 +#define MIN_LITLEN_LENS 257 + +#define CODELEN_MAX_LIT 15 + +#define CODELEN_COPY 16 +#define CODELEN_COPY_MAX 6 +#define CODELEN_COPY_MIN 3 + +#define CODELEN_ZEROS 17 +#define CODELEN_ZEROS_MAX 10 +#define CODELEN_ZEROS_MIN 3 + +#define CODELEN_ZEROS2_MAX 138 +#define CODELEN_ZEROS2 18 +#define CODELEN_ZEROS2_MIN 11 + +/* RFC 1951, 3.2.7 */ +static const int codelen_lengths_order[MAX_CODELEN_LENS] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static inf_stat_t inf_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos, huffman_decoder_t *litlen_dec, huffman_decoder_t *dist_dec) { + + uint64_t bits, used, dist, len; + uint16_t ebits; + + int litlen, distsym; + +#ifndef NO_LONG_LONG + uint64_t used_tot; +#endif + + for (;;) { + + /* Read a litlen symbol. */ + bits = istream_bits (is); + + litlen = huffman_decode (litlen_dec, (uint16_t) bits, &used); + /*printf ("litlen: %d\n", litlen);*/ + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, used)) { + return HWINF_ERR; + } + +#else + + bits >>= used; + used_tot = used; + +#endif + + if (litlen < 0 || litlen > LITLEN_MAX) { + + /* Failed to decode, or invalid symbol. */ + return HWINF_ERR; + + } else if (litlen <= UINT8_MAX) { + + /* Literal. */ +#ifndef NO_LONG_LONG + + if (!istream_advance (is, used_tot)) { + return HWINF_ERR; + } + +#endif + + if (*dst_pos == dst_cap) { + return HWINF_FULL; + } + + if (lz77_output_lit (outfile, (*dst_pos)++, (uint8_t) litlen)) { + return HWINF_ERR; + } + + continue; + + } else if (litlen == LITLEN_EOB) { + + /* End of block. */ + +#ifndef NO_LONG_LONG + + if (!istream_advance (is, used_tot)) { + return HWINF_ERR; + } + +#endif + + return HWINF_OK; + + } + + assert (litlen >= LITLEN_TBL_OFFSET && litlen <= LITLEN_MAX); + + /* It is a back reference. Figure out the length.*/ + len = litlen_tbl[litlen - LITLEN_TBL_OFFSET].base_len; + + if ((ebits = litlen_tbl[litlen - LITLEN_TBL_OFFSET].ebits) != 0) { + +#ifdef NO_LONG_LONG + bits = istream_bits (is); +#endif + + len += lsb (bits, ebits); + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, ebits)) { + return HWINF_ERR; + } + +#else + + bits >>= ebits; + used_tot += ebits; + +#endif + + } + + assert (len >= MIN_LEN && len <= MAX_LEN); + + /* Get the distance. */ +#ifdef NO_LONG_LONG + bits = istream_bits (is); +#endif + + distsym = huffman_decode (dist_dec, (uint16_t) bits, &used); + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, used)) { + return HWINF_ERR; + } + +#else + + bits >>= used; + used_tot += used; + +#endif + + if (distsym < 0 || distsym > DISTSYM_MAX) { + return HWINF_ERR; + } + + dist = dist_tbl[distsym].base_dist; + + if ((ebits = dist_tbl[distsym].ebits) != 0) { + +#ifdef NO_LONG_LONG + bits = istream_bits (is); +#endif + + dist += lsb (bits, ebits); + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, ebits)) { + return HWINF_ERR; + } + +#else + + bits >>= ebits; + used_tot += ebits; + +#endif + + } + + assert (dist >= MIN_DISTANCE && dist <= MAX_DISTANCE); + +#ifndef NO_LONG_LONG + + assert (used_tot <= ISTREAM_MIN_BITS); + + if (!istream_advance (is, used_tot)) { + return HWINF_ERR; + } + +#endif + + /* Bounds check and output the backref. */ + if (dist > *dst_pos) { + return HWINF_ERR; + } + +#ifdef NO_LONG_LONG + if (round_up (len, 4) <= dst_cap - *dst_pos) { +#else + if (round_up (len, 8) <= dst_cap - *dst_pos) { +#endif + + if (lz77_output_backref64 (outfile, *dst_pos, dist, len)) { + return HWINF_ERR; + } + + } else if (len <= dst_cap - *dst_pos) { + + if (lz77_output_backref (outfile, *dst_pos, dist, len)) { + return HWINF_ERR; + } + + } else { + return HWINF_FULL; + } + + (*dst_pos) += len; + + } + +} + +static inf_stat_t init_dyn_decoders (istream_t *is, huffman_decoder_t *litlen_dec, huffman_decoder_t *dist_dec) { + + uint64_t num_litlen_lens, num_dist_lens, num_codelen_lens; + uint64_t i, n, used; + + unsigned char code_lengths[MAX_LITLEN_LENS + MAX_DIST_LENS]; + unsigned char codelen_lengths[MAX_CODELEN_LENS]; + + huffman_decoder_t codelen_dec; + int sym; + + uint64_t bits = istream_bits (is); + + /* Number of litlen codeword lengths (5 bits + 257). */ + num_litlen_lens = (uint64_t) (lsb (bits, 5) + MIN_LITLEN_LENS); + bits >>= 5; + + assert (num_litlen_lens <= MAX_LITLEN_LENS); + + /* Number of codeword lengths (5 bits + 1). */ + num_dist_lens = (uint64_t) (lsb (bits, 5) + MIN_DIST_LENS); + bits >>= 5; + + assert (num_dist_lens <= MAX_DIST_LENS); + + /* Number of code length lengths (4 bits + 4). */ + num_codelen_lens = (uint64_t) (lsb (bits, 4) + MIN_CODELEN_LENS); + bits >>= 4; + + assert (num_codelen_lens <= MAX_CODELEN_LENS); + + if (!istream_advance (is, 5 + 5 + 4)) { + return HWINF_ERR; + } + + /** + * Read the codelen codeword lengths (3 bits each) + * and initialize the codelen decoder. + */ + for (i = 0; i < num_codelen_lens; i++) { + + bits = istream_bits (is); + codelen_lengths[codelen_lengths_order[i]] = (unsigned char) lsb (bits, 3); + + if (!istream_advance (is, 3)) { + return HWINF_ERR; + } + + } + + for (; i < MAX_CODELEN_LENS; i++) { + codelen_lengths[codelen_lengths_order[i]] = 0; + } + + if (!huffman_decoder_init (&codelen_dec, codelen_lengths, MAX_CODELEN_LENS)) { + return HWINF_ERR; + } + + /* Read the litlen and dist codeword lengths. */ + i = 0; + + while (i < num_litlen_lens + num_dist_lens) { + + bits = istream_bits (is); + + sym = huffman_decode (&codelen_dec, (uint16_t) bits, &used); + bits >>= used; + + if (!istream_advance (is, used)) { + return HWINF_ERR; + } + + if (sym >= 0 && sym <= CODELEN_MAX_LIT) { + + /* A literal codeword length. */ + code_lengths[i++] = (unsigned char) sym; + + } else if (sym == CODELEN_COPY) { + + /* Copy the previous codeword length 3--6 times. */ + if (i < 1) { + return HWINF_ERR; /* No previous length. */ + } + + /* 2 bits + 3 */ + n = (uint64_t) lsb (bits, 2) + CODELEN_COPY_MIN; + + if (!istream_advance (is, 2)) { + return HWINF_ERR; + } + + assert (n >= CODELEN_COPY_MIN && n <= CODELEN_COPY_MAX); + + if (i + n > num_litlen_lens + num_dist_lens) { + return HWINF_ERR; + } + + while (n--) { + + code_lengths[i] = code_lengths[i - 1]; + i++; + + } + + } else if (sym == CODELEN_ZEROS) { + + /* 3--10 zeros; 3 bits + 3 */ + n = (uint64_t) (lsb (bits, 3) + CODELEN_ZEROS_MIN); + + if (!istream_advance (is, 3)) { + return HWINF_ERR; + } + + assert (n >= CODELEN_ZEROS_MIN && n <= CODELEN_ZEROS_MAX); + + if (i + n > num_litlen_lens + num_dist_lens) { + return HWINF_ERR; + } + + while (n--) { code_lengths[i++] = 0; } + + } else if (sym == CODELEN_ZEROS2) { + + /* 11--138 zeros; 7 bits + 11 */ + n = (uint64_t) (lsb (bits, 7) + CODELEN_ZEROS2_MIN); + + if (!istream_advance (is, 7)) { + return HWINF_ERR; + } + + assert (n >= CODELEN_ZEROS2_MIN && n <= CODELEN_ZEROS2_MAX); + + if (i + n > num_litlen_lens + num_dist_lens) { + return HWINF_ERR; + } + + while (n--) { code_lengths[i++] = 0; } + + } else { + + /* Invalid symbol. */ + return HWINF_ERR; + + } + + } + + if (!huffman_decoder_init (litlen_dec, &code_lengths[0], num_litlen_lens)) { + return HWINF_ERR; + } + + if (!huffman_decoder_init (dist_dec, &code_lengths[num_litlen_lens], num_dist_lens)) { + return HWINF_ERR; + } + + return HWINF_OK; + +} + +static inf_stat_t inf_dyn_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos) { + + huffman_decoder_t litlen_dec, dist_dec; + inf_stat_t stat; + + if ((stat = init_dyn_decoders (is, &litlen_dec, &dist_dec)) != HWINF_OK) { + return stat; + } + + return inf_block (is, outfile, dst_cap, dst_pos, &litlen_dec, &dist_dec); + +} + +static inf_stat_t inf_fixed_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos) { + + huffman_decoder_t litlen_dec, dist_dec; + + huffman_decoder_init (&litlen_dec, fixed_litlen_lengths, sizeof (fixed_litlen_lengths) / sizeof (fixed_litlen_lengths[0])); + huffman_decoder_init (&dist_dec, fixed_dist_lengths, sizeof (fixed_dist_lengths) / sizeof (fixed_dist_lengths[0])); + + return inf_block (is, outfile, dst_cap, dst_pos, &litlen_dec, &dist_dec); + +} + +static inf_stat_t inf_noncomp_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos) { + + unsigned char *p; + uint16_t len, nlen; + + p = istream_byte_align (is); + + /* Read len and nlen (2 x 16 bits). */ + if (!istream_advance (is, 32)) { + return HWINF_ERR; /* Not enough input. */ + } + + len = array_to_integer (p, 2, 0), p += 2; + nlen = array_to_integer (p, 2, 0), p += 2; + + if (nlen != (-len & 0xffff)) { + return HWINF_ERR; + } + + if (!istream_advance (is, len * 8)) { + return HWINF_ERR; /* Not enough input. */ + } + + if (dst_cap - *dst_pos < len) { + return HWINF_ERR; /* Not enough room to output. */ + } + + if (fwrite (p, 1, len, outfile) != len) { + return HWINF_ERR; /* Something went wrong. */ + } + + *dst_pos += len; + return HWINF_OK; + +} + +inf_stat_t hwinflate (unsigned char *src, uint64_t src_len, uint64_t *src_used, FILE *outfile, uint64_t dst_cap, uint64_t *dst_used) { + + inf_stat_t stat; + istream_t is; + + uint64_t dst_pos, bits; + int bfinal; + + if (istream_init (&is, src, src_len)) { + return HWINF_ERR; + } + + dst_pos = 0; + + do { + + bits = istream_bits (&is); + + if (!istream_advance (&is, 3)) { + return HWINF_ERR; + } + + bfinal = bits & 1; + bits >>= 1; + + switch (lsb (bits, 2)) { + + case 0: /* No compression. */ + + stat = inf_noncomp_block (&is, outfile, dst_cap, &dst_pos); + break; + + case 1: /* Compressed with fixed Huffman codes. */ + + stat = inf_fixed_block (&is, outfile, dst_cap, &dst_pos); + break; + + case 2: /* Compressed with "dynamic Huffman codes. */ + + stat = inf_dyn_block (&is, outfile, dst_cap, &dst_pos); + break; + + default: + + return HWINF_ERR; + + } + + if (stat != HWINF_OK) { + return stat; + } + + } while (!bfinal); + + *src_used = istream_bytes_read (&is); + + assert (dst_pos <= dst_cap); + *dst_used = dst_pos; + + return HWINF_OK; + +} diff --git a/lib.c b/lib.c new file mode 100755 index 0000000..491432c --- /dev/null +++ b/lib.c @@ -0,0 +1,452 @@ +/****************************************************************************** + * @file lib.c + *****************************************************************************/ +#include +#include +#include +#include +#include +#include + +#include "lib.h" +#include "report.h" +#include "stdint.h" +#include "unzip.h" +#include "vector.h" + +#define OPTION_COMMENT 0x0001 +#define OPTION_DIRECTORY 0x0002 +#define OPTION_EXCLUDE 0x0003 +#define OPTION_HELP 0x0004 +#define OPTION_LIST 0x0005 + +struct option { + + const char *name; + int index, flags; + +}; + +#define OPTION_NO_ARG 0x0001 +#define OPTION_HAS_ARG 0x0002 + +static struct option opts[] = { + + { "--directory", OPTION_DIRECTORY, OPTION_HAS_ARG }, + { "-d", OPTION_DIRECTORY, OPTION_HAS_ARG }, + + { "--comment", OPTION_COMMENT, OPTION_NO_ARG }, + { "-z", OPTION_COMMENT, OPTION_NO_ARG }, + + { "--exclude", OPTION_EXCLUDE, OPTION_NO_ARG }, + { "-x", OPTION_EXCLUDE, OPTION_NO_ARG }, + + { "--list", OPTION_LIST, OPTION_NO_ARG }, + { "-l", OPTION_LIST, OPTION_NO_ARG }, + + { "--help", OPTION_HELP, OPTION_NO_ARG }, + { 0, 0, 0 } + +}; + +static int strstart (const char *val, const char **str) { + + const char *p = val; + const char *q = *str; + + while (*p != '\0') { + + if (*p != *q) { + return 0; + } + + ++p; + ++q; + + } + + *str = q; + return 1; + +} + +static void print_help (void) { + + if (program_name) { + + fprintf (stderr, "Usage: %s [opts] file... [-x xlist]\n\n", program_name); + fprintf (stderr, "Options:\n\n"); + + fprintf (stderr, " -l List files -z Display archive comment only.\n"); + fprintf (stderr, "\n"); + + fprintf (stderr, " -d exdir Extract files into exdir.\n"); + fprintf (stderr, " -x xlist Exclude files that follow.\n"); + fprintf (stderr, "\n"); + + fprintf (stderr, " --help Show this help information then exit.\n"); + + } + + exit (EXIT_SUCCESS); + +} + +static void dynarray_add (void *ptab, long *nb_ptr, void *data) { + + int nb, nb_alloc; + void **pp; + + nb = *nb_ptr; + pp = *(void ***) ptab; + + if ((nb & (nb - 1)) == 0) { + + if (!nb) { + nb_alloc = 1; + } else { + nb_alloc = nb * 2; + } + + pp = xrealloc (pp, nb_alloc * sizeof (void *)); + *(void ***) ptab = pp; + + } + + pp[nb++] = data; + *nb_ptr = nb; + +} + +uint64_t array_to_integer (unsigned char *arr, int size, int bigendian) { + + uint64_t val = 0; + int i; + + if (bigendian) { + + int j; + + for (i = size, j = 0; i > 0; i--, j++) { + val |= (uint64_t) arr[j] << (CHAR_BIT * (i - 1)); + } + + } else { + + for (i = 0; i < size; i++) { + val |= (uint64_t) arr[i] << (CHAR_BIT * i); + } + + } + + return val; + +} + +uint64_t lsb (uint64_t x, uint64_t n) { + +#ifdef NO_LONG_LONG + + assert (n <= 31); + return x & (((uint32_t) 1 << n) - 1); + +#else + + assert (n <= 63); + return x & (((uint64_t) 1 << n) - 1); + +#endif + +} + +uint64_t round_up (uint64_t x, uint64_t m) { + + assert ((m & (m - 1)) == 0 && "m must be a power of two"); + return (x + m - 1) & (uint64_t) (-m); + +} + +int wild_compare (const char *wild, const char *s) { + + const char *cp = 0, *mp = 0; + + while (*s && (*wild != '*')) { + + if (*wild != *s) { + return 0; + } + + wild++; + s++; + + } + + while (*s) { + + if (*wild == '*') { + + if (!*++wild) { + return 1; + } + + mp = wild; + cp = s + 1; + + } else if (*wild == *s) { + + wild++; + s++; + + } else { + + wild = mp; + s = cp++; + + } + + } + + while (*wild == '*') { + wild++; + } + + return !*wild; + +} + +void parse_args (int argc, char **argv, int optind) { + + struct option *popt; + const char *optarg, *r; + + if (argc <= optind) { + print_help (); + } + + while (optind < argc) { + + r = argv[optind++]; + + if (r[0] != '-' || r[1] == '\0') { + + dynarray_add (&state->files, &state->nb_files, xstrdup (r)); + continue; + + } + + for (popt = opts; popt; popt++) { + + const char *p1 = popt->name; + const char *r1 = r; + + if (!p1) { + + report_at (program_name, 0, REPORT_ERROR, "invalid option -- '%s'", r); + exit (EXIT_FAILURE); + + } + + if (!strstart (p1, &r1)) { + continue; + } + + optarg = r1; + + if (popt->flags & OPTION_HAS_ARG) { + + if (*optarg == '\0') { + + if (optind >= argc) { + + report_at (program_name, 0, REPORT_ERROR, "argument to '%s' is missing", r); + exit (EXIT_FAILURE); + + } + + optarg = argv[optind++]; + + } + + } else if (*optarg != '\0') { + continue; + } + + break; + + } + + switch (popt->index) { + + case OPTION_COMMENT: { + + state->only_comment = 1; + break; + + } + + case OPTION_DIRECTORY: { + + uint64_t len, i; + + if (state->exdir) { + + report_at (program_name, 0, REPORT_ERROR, "-d option used more than once (only one exdir allowed)"); + exit (EXIT_FAILURE); + + } + + len = strlen (state->exdir = xstrdup (optarg)); + + for (i = 0; i < len; i++) { + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) + + if (state->exdir[i] == '\\') { + ((char *) state->exdir)[i] = '/'; + } + +#elif defined (_WIN32) + + if (state->exdir[i] == '/') { + ((char *) state->exdir)[i] = '\\'; + } + +#endif + + } + + break; + + } + + case OPTION_EXCLUDE: { + + char *arg, *copy, *p; + int i; + + for (; optind < argc; optind++) { + + optarg = argv[optind++]; + + if (!optarg || !*optarg) { + continue; + } + + arg = (copy = xstrdup (optarg)); + + while (arg && *arg != '\0') { + + if (isspace ((int) *arg)) { + + while (*arg != '\0') { + + if (!isspace ((int) *arg)) { + break; + } + + arg++; + + } + + continue; + + } + + if ((p = strchr (arg, ' '))) { + *p++ = '\0'; + } + + for (i = 0; i < state->xlist.length; i++) { + + if (strcmp (state->xlist.data[i], arg) == 0) { + break; + } + + } + + if (i < state->xlist.length) { + + arg = p; + continue; + + } + + vec_push (&state->xlist, xstrdup (arg)); + arg = p; + + } + + free (copy); + + } + + break; + + } + + case OPTION_HELP: { + + print_help (); + break; + + } + + case OPTION_LIST: { + + state->list = 1; + break; + + } + + default: { + + report_at (program_name, 0, REPORT_ERROR, "unsupported option '%s'", r); + exit (EXIT_FAILURE); + + } + + } + + } + +} + +char *xstrdup (const char *str) { + + char *ptr = xmalloc (strlen (str) + 1); + strcpy (ptr, str); + + return ptr; + +} + +void *xmalloc (unsigned long size) { + + void *ptr = malloc (size); + + if (ptr == NULL && size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (malloc)"); + exit (EXIT_FAILURE); + + } + + memset (ptr, 0, size); + return ptr; + +} + +void *xrealloc (void *ptr, unsigned long size) { + + void *new_ptr = realloc (ptr, size); + + if (new_ptr == NULL && size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (realloc)"); + exit (EXIT_FAILURE); + + } + + return new_ptr; + +} diff --git a/lib.h b/lib.h new file mode 100755 index 0000000..6746a5e --- /dev/null +++ b/lib.h @@ -0,0 +1,21 @@ +/****************************************************************************** + * @file lib.h + *****************************************************************************/ +#ifndef _LIB_H +#define _LIB_H + + +char *xstrdup (const char *str); +int wild_compare (const char *wild, const char *s); + +void *xmalloc (unsigned long size); +void *xrealloc (void *ptr, unsigned long size); + +void parse_args (int argc, char **argv, int optind); + +#include "stdint.h" +uint64_t array_to_integer (unsigned char *arr, int size, int bigendian); +uint64_t lsb (uint64_t x, uint64_t n); +uint64_t round_up (uint64_t x, uint64_t m); + +#endif /* _LIB_H */ diff --git a/lz77.c b/lz77.c new file mode 100644 index 0000000..265b4c2 --- /dev/null +++ b/lz77.c @@ -0,0 +1,104 @@ +/****************************************************************************** + * @file lz77.c + *****************************************************************************/ +#include +#include +#include + +#include "lz77.h" +#include "stdint.h" + +int lz77_output_lit (FILE *fp, uint64_t dst_pos, unsigned char lit) { + + if (fseek (fp, dst_pos, SEEK_SET)) { + return -1; + } + + if (fwrite (&lit, 1, 1, fp) != 1) { + return -1; + } + + return 0; + +} + +int lz77_output_backref (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len) { + + char byte; + uint64_t i; + + assert (dist <= dst_pos && "cannot reference before beginning of dst"); + + for (i = 0; i < len; i++) { + + if (fseek (fp, dst_pos - dist, SEEK_SET)) { + return -1; + } + + if (fread (&byte, 1, 1, fp) != 1) { + return -1; + } + + if (fseek (fp, dst_pos, SEEK_SET)) { + return -1; + } + + if (fwrite (&byte, 1, 1, fp) != 1) { + return -1; + } + + dst_pos++; + + } + + return 0; + +} + +int lz77_output_backref64 (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len) { + + uint64_t temp = 0, read, inc, i; + + assert (len > 0); + assert (dist <= dst_pos && "cannot reference before beginning of dst"); + + if (len > dist) { + + /* Self-overlapping backref, fall back to byte-by-byte copy. */ + return lz77_output_backref (fp, dst_pos, dist, len); + + } + +#ifdef NO_LONG_LONG + inc = 4; +#else + inc = 8; +#endif + + i = 0; + + do { + + if (fseek (fp, dst_pos - dist + i, SEEK_SET)) { + return -1; + } + + if (!(read = fread (&temp, 1, inc, fp))) { + return -1; + } + + if (fseek (fp, dst_pos + i, SEEK_SET)) { + return -1; + } + + if (fwrite (&temp, 1, read, fp) != read) { + return -1; + } + + i += inc; + + } while (i < len); + + return 0; + +} diff --git a/lz77.h b/lz77.h new file mode 100644 index 0000000..e7349e7 --- /dev/null +++ b/lz77.h @@ -0,0 +1,14 @@ +/****************************************************************************** + * @file lz77.h + *****************************************************************************/ +#ifndef _LZ77_H +#define _LZ77_H + +#include "stdint.h" + +#include +int lz77_output_lit (FILE *fp, uint64_t dst_pos, unsigned char lit); +int lz77_output_backref (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len); +int lz77_output_backref64 (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len); + +#endif /* _LZ77_H */ diff --git a/report.c b/report.c new file mode 100644 index 0000000..8e128ef --- /dev/null +++ b/report.c @@ -0,0 +1,150 @@ +/****************************************************************************** + * @file report.c + *****************************************************************************/ +#include +#include +#include + +#include "report.h" + +unsigned long errors = 0; + +#ifndef __PDOS__ +#if defined (_WIN32) +# include +static int OriginalConsoleColor = -1; +#endif + +static void reset_console_color (void) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + + if (OriginalConsoleColor == -1) { return; } + + SetConsoleTextAttribute (hStdError, OriginalConsoleColor); + OriginalConsoleColor = -1; + +#else + + fprintf (stderr, "\033[0m"); + +#endif + +} + +static void set_console_color (int color) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + WORD wColor; + + if (OriginalConsoleColor == -1) { + + CONSOLE_SCREEN_BUFFER_INFO csbi; + + if (!GetConsoleScreenBufferInfo (hStdError, &csbi)) { + return; + } + + OriginalConsoleColor = csbi.wAttributes; + + } + + wColor = (OriginalConsoleColor & 0xF0) + (color & 0xF); + SetConsoleTextAttribute (hStdError, wColor); + +#else + + fprintf (stderr, "\033[%dm", color); + +#endif + +} +#endif + +static void output_message (const char *filename, unsigned long lineno, unsigned long idx, enum report_type type, const char *fmt, va_list ap) { + + if (filename) { + + if (lineno == 0 && idx == 0) { + fprintf (stderr, "%s: ", filename); + } else { + fprintf (stderr, "%s:", filename); + } + + } + + if (lineno > 0) { + + if (idx == 0) { + fprintf (stderr, "%lu: ", lineno); + } else { + fprintf (stderr, "%lu:", lineno); + } + + } + + if (idx > 0) { + fprintf (stderr, "%lu: ", idx); + } + + if (type == REPORT_ERROR || type == REPORT_FATAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_ERROR); +#endif + + if (type == REPORT_ERROR) { + fprintf (stderr, "error:"); + } else { + fprintf (stderr, "fatal error:"); + } + + } else if (type == REPORT_INTERNAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_INTERNAL_ERROR); +#endif + + fprintf (stderr, "internal error:"); + + } else if (type == REPORT_WARNING) { + +#ifndef __PDOS__ + set_console_color (COLOR_WARNING); +#endif + + fprintf (stderr, "warning:"); + + } + +#ifndef __PDOS__ + reset_console_color (); +#endif + + fprintf (stderr, " "); + vfprintf (stderr, fmt, ap); + fprintf (stderr, "\n"); + + if (type != REPORT_WARNING) { + ++errors; + } + +} + +unsigned long get_error_count (void) { + return errors; +} + +void report_at (const char *filename, unsigned long lineno, enum report_type type, const char *fmt, ...) { + + va_list ap; + + va_start (ap, fmt); + output_message (filename, lineno, 0, type, fmt, ap); + va_end (ap); + +} diff --git a/report.h b/report.h new file mode 100644 index 0000000..8fc8758 --- /dev/null +++ b/report.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * @file report.h + *****************************************************************************/ +#ifndef _REPORT_H +#define _REPORT_H + +enum report_type { + + REPORT_ERROR = 0, + REPORT_FATAL_ERROR, + REPORT_INTERNAL_ERROR, + REPORT_WARNING + +}; + +#if defined (_WIN32) +# define COLOR_ERROR 12 +# define COLOR_WARNING 13 +# define COLOR_INTERNAL_ERROR 19 +#else +# define COLOR_ERROR 91 +# define COLOR_INTERNAL_ERROR 94 +# define COLOR_WARNING 95 +#endif + +unsigned long get_error_count (void); +void report_at (const char *filename, unsigned long line_number, enum report_type type, const char *fmt, ...); + +#endif /* _REPORT_H */ diff --git a/stdint.h b/stdint.h new file mode 100644 index 0000000..ae821b9 --- /dev/null +++ b/stdint.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * @file stdint.h + *****************************************************************************/ +#ifndef _STDINT_H_INCLUDED +#ifndef _STDINT_H +#ifndef _STDINT_H_ + +#define _STDINT_H_INCLUDED +#define _STDINT_H +#define _STDINT_H_ + +#include + +typedef signed char int8_t; +typedef unsigned char uint8_t; + +typedef signed short int16_t; +typedef unsigned short uint16_t; + +#if INT_MAX > 32767 +typedef signed int int32_t; +typedef unsigned int uint32_t; +#else +typedef signed long int32_t; +typedef unsigned long uint32_t; +#endif + +#if defined (NO_LONG_LONG) || ULONG_MAX > 4294967295UL +typedef signed long int64_t; +typedef unsigned long uint64_t; +#else +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +#endif + +#define UINT8_MAX 0xff + +#endif /* _STDINT_H_ */ +#endif /* _STDINT_H */ +#endif /* _STDINT_H_INCLUDED */ diff --git a/tables.c b/tables.c new file mode 100644 index 0000000..b065708 --- /dev/null +++ b/tables.c @@ -0,0 +1,664 @@ +/****************************************************************************** + * @file tables.c + *****************************************************************************/ +#include "tables.h" + +const uint8_t reverse8_tbl[UINT8_MAX + 1] = { + + /* 0x00 */ 0x00, + /* 0x01 */ 0x80, + /* 0x02 */ 0x40, + /* 0x03 */ 0xc0, + /* 0x04 */ 0x20, + /* 0x05 */ 0xa0, + /* 0x06 */ 0x60, + /* 0x07 */ 0xe0, + /* 0x08 */ 0x10, + /* 0x09 */ 0x90, + /* 0x0a */ 0x50, + /* 0x0b */ 0xd0, + /* 0x0c */ 0x30, + /* 0x0d */ 0xb0, + /* 0x0e */ 0x70, + /* 0x0f */ 0xf0, + /* 0x10 */ 0x08, + /* 0x11 */ 0x88, + /* 0x12 */ 0x48, + /* 0x13 */ 0xc8, + /* 0x14 */ 0x28, + /* 0x15 */ 0xa8, + /* 0x16 */ 0x68, + /* 0x17 */ 0xe8, + /* 0x18 */ 0x18, + /* 0x19 */ 0x98, + /* 0x1a */ 0x58, + /* 0x1b */ 0xd8, + /* 0x1c */ 0x38, + /* 0x1d */ 0xb8, + /* 0x1e */ 0x78, + /* 0x1f */ 0xf8, + /* 0x20 */ 0x04, + /* 0x21 */ 0x84, + /* 0x22 */ 0x44, + /* 0x23 */ 0xc4, + /* 0x24 */ 0x24, + /* 0x25 */ 0xa4, + /* 0x26 */ 0x64, + /* 0x27 */ 0xe4, + /* 0x28 */ 0x14, + /* 0x29 */ 0x94, + /* 0x2a */ 0x54, + /* 0x2b */ 0xd4, + /* 0x2c */ 0x34, + /* 0x2d */ 0xb4, + /* 0x2e */ 0x74, + /* 0x2f */ 0xf4, + /* 0x30 */ 0x0c, + /* 0x31 */ 0x8c, + /* 0x32 */ 0x4c, + /* 0x33 */ 0xcc, + /* 0x34 */ 0x2c, + /* 0x35 */ 0xac, + /* 0x36 */ 0x6c, + /* 0x37 */ 0xec, + /* 0x38 */ 0x1c, + /* 0x39 */ 0x9c, + /* 0x3a */ 0x5c, + /* 0x3b */ 0xdc, + /* 0x3c */ 0x3c, + /* 0x3d */ 0xbc, + /* 0x3e */ 0x7c, + /* 0x3f */ 0xfc, + /* 0x40 */ 0x02, + /* 0x41 */ 0x82, + /* 0x42 */ 0x42, + /* 0x43 */ 0xc2, + /* 0x44 */ 0x22, + /* 0x45 */ 0xa2, + /* 0x46 */ 0x62, + /* 0x47 */ 0xe2, + /* 0x48 */ 0x12, + /* 0x49 */ 0x92, + /* 0x4a */ 0x52, + /* 0x4b */ 0xd2, + /* 0x4c */ 0x32, + /* 0x4d */ 0xb2, + /* 0x4e */ 0x72, + /* 0x4f */ 0xf2, + /* 0x50 */ 0x0a, + /* 0x51 */ 0x8a, + /* 0x52 */ 0x4a, + /* 0x53 */ 0xca, + /* 0x54 */ 0x2a, + /* 0x55 */ 0xaa, + /* 0x56 */ 0x6a, + /* 0x57 */ 0xea, + /* 0x58 */ 0x1a, + /* 0x59 */ 0x9a, + /* 0x5a */ 0x5a, + /* 0x5b */ 0xda, + /* 0x5c */ 0x3a, + /* 0x5d */ 0xba, + /* 0x5e */ 0x7a, + /* 0x5f */ 0xfa, + /* 0x60 */ 0x06, + /* 0x61 */ 0x86, + /* 0x62 */ 0x46, + /* 0x63 */ 0xc6, + /* 0x64 */ 0x26, + /* 0x65 */ 0xa6, + /* 0x66 */ 0x66, + /* 0x67 */ 0xe6, + /* 0x68 */ 0x16, + /* 0x69 */ 0x96, + /* 0x6a */ 0x56, + /* 0x6b */ 0xd6, + /* 0x6c */ 0x36, + /* 0x6d */ 0xb6, + /* 0x6e */ 0x76, + /* 0x6f */ 0xf6, + /* 0x70 */ 0x0e, + /* 0x71 */ 0x8e, + /* 0x72 */ 0x4e, + /* 0x73 */ 0xce, + /* 0x74 */ 0x2e, + /* 0x75 */ 0xae, + /* 0x76 */ 0x6e, + /* 0x77 */ 0xee, + /* 0x78 */ 0x1e, + /* 0x79 */ 0x9e, + /* 0x7a */ 0x5e, + /* 0x7b */ 0xde, + /* 0x7c */ 0x3e, + /* 0x7d */ 0xbe, + /* 0x7e */ 0x7e, + /* 0x7f */ 0xfe, + /* 0x80 */ 0x01, + /* 0x81 */ 0x81, + /* 0x82 */ 0x41, + /* 0x83 */ 0xc1, + /* 0x84 */ 0x21, + /* 0x85 */ 0xa1, + /* 0x86 */ 0x61, + /* 0x87 */ 0xe1, + /* 0x88 */ 0x11, + /* 0x89 */ 0x91, + /* 0x8a */ 0x51, + /* 0x8b */ 0xd1, + /* 0x8c */ 0x31, + /* 0x8d */ 0xb1, + /* 0x8e */ 0x71, + /* 0x8f */ 0xf1, + /* 0x90 */ 0x09, + /* 0x91 */ 0x89, + /* 0x92 */ 0x49, + /* 0x93 */ 0xc9, + /* 0x94 */ 0x29, + /* 0x95 */ 0xa9, + /* 0x96 */ 0x69, + /* 0x97 */ 0xe9, + /* 0x98 */ 0x19, + /* 0x99 */ 0x99, + /* 0x9a */ 0x59, + /* 0x9b */ 0xd9, + /* 0x9c */ 0x39, + /* 0x9d */ 0xb9, + /* 0x9e */ 0x79, + /* 0x9f */ 0xf9, + /* 0xa0 */ 0x05, + /* 0xa1 */ 0x85, + /* 0xa2 */ 0x45, + /* 0xa3 */ 0xc5, + /* 0xa4 */ 0x25, + /* 0xa5 */ 0xa5, + /* 0xa6 */ 0x65, + /* 0xa7 */ 0xe5, + /* 0xa8 */ 0x15, + /* 0xa9 */ 0x95, + /* 0xaa */ 0x55, + /* 0xab */ 0xd5, + /* 0xac */ 0x35, + /* 0xad */ 0xb5, + /* 0xae */ 0x75, + /* 0xaf */ 0xf5, + /* 0xb0 */ 0x0d, + /* 0xb1 */ 0x8d, + /* 0xb2 */ 0x4d, + /* 0xb3 */ 0xcd, + /* 0xb4 */ 0x2d, + /* 0xb5 */ 0xad, + /* 0xb6 */ 0x6d, + /* 0xb7 */ 0xed, + /* 0xb8 */ 0x1d, + /* 0xb9 */ 0x9d, + /* 0xba */ 0x5d, + /* 0xbb */ 0xdd, + /* 0xbc */ 0x3d, + /* 0xbd */ 0xbd, + /* 0xbe */ 0x7d, + /* 0xbf */ 0xfd, + /* 0xc0 */ 0x03, + /* 0xc1 */ 0x83, + /* 0xc2 */ 0x43, + /* 0xc3 */ 0xc3, + /* 0xc4 */ 0x23, + /* 0xc5 */ 0xa3, + /* 0xc6 */ 0x63, + /* 0xc7 */ 0xe3, + /* 0xc8 */ 0x13, + /* 0xc9 */ 0x93, + /* 0xca */ 0x53, + /* 0xcb */ 0xd3, + /* 0xcc */ 0x33, + /* 0xcd */ 0xb3, + /* 0xce */ 0x73, + /* 0xcf */ 0xf3, + /* 0xd0 */ 0x0b, + /* 0xd1 */ 0x8b, + /* 0xd2 */ 0x4b, + /* 0xd3 */ 0xcb, + /* 0xd4 */ 0x2b, + /* 0xd5 */ 0xab, + /* 0xd6 */ 0x6b, + /* 0xd7 */ 0xeb, + /* 0xd8 */ 0x1b, + /* 0xd9 */ 0x9b, + /* 0xda */ 0x5b, + /* 0xdb */ 0xdb, + /* 0xdc */ 0x3b, + /* 0xdd */ 0xbb, + /* 0xde */ 0x7b, + /* 0xdf */ 0xfb, + /* 0xe0 */ 0x07, + /* 0xe1 */ 0x87, + /* 0xe2 */ 0x47, + /* 0xe3 */ 0xc7, + /* 0xe4 */ 0x27, + /* 0xe5 */ 0xa7, + /* 0xe6 */ 0x67, + /* 0xe7 */ 0xe7, + /* 0xe8 */ 0x17, + /* 0xe9 */ 0x97, + /* 0xea */ 0x57, + /* 0xeb */ 0xd7, + /* 0xec */ 0x37, + /* 0xed */ 0xb7, + /* 0xee */ 0x77, + /* 0xef */ 0xf7, + /* 0xf0 */ 0x0f, + /* 0xf1 */ 0x8f, + /* 0xf2 */ 0x4f, + /* 0xf3 */ 0xcf, + /* 0xf4 */ 0x2f, + /* 0xf5 */ 0xaf, + /* 0xf6 */ 0x6f, + /* 0xf7 */ 0xef, + /* 0xf8 */ 0x1f, + /* 0xf9 */ 0x9f, + /* 0xfa */ 0x5f, + /* 0xfb */ 0xdf, + /* 0xfc */ 0x3f, + /* 0xfd */ 0xbf, + /* 0xfe */ 0x7f, + /* 0xff */ 0xff, + +}; + +unsigned char fixed_litlen_lengths[288] = { + + /* 0 */ 8, + /* 1 */ 8, + /* 2 */ 8, + /* 3 */ 8, + /* 4 */ 8, + /* 5 */ 8, + /* 6 */ 8, + /* 7 */ 8, + /* 8 */ 8, + /* 9 */ 8, + /* 10 */ 8, + /* 11 */ 8, + /* 12 */ 8, + /* 13 */ 8, + /* 14 */ 8, + /* 15 */ 8, + /* 16 */ 8, + /* 17 */ 8, + /* 18 */ 8, + /* 19 */ 8, + /* 20 */ 8, + /* 21 */ 8, + /* 22 */ 8, + /* 23 */ 8, + /* 24 */ 8, + /* 25 */ 8, + /* 26 */ 8, + /* 27 */ 8, + /* 28 */ 8, + /* 29 */ 8, + /* 30 */ 8, + /* 31 */ 8, + /* 32 */ 8, + /* 33 */ 8, + /* 34 */ 8, + /* 35 */ 8, + /* 36 */ 8, + /* 37 */ 8, + /* 38 */ 8, + /* 39 */ 8, + /* 40 */ 8, + /* 41 */ 8, + /* 42 */ 8, + /* 43 */ 8, + /* 44 */ 8, + /* 45 */ 8, + /* 46 */ 8, + /* 47 */ 8, + /* 48 */ 8, + /* 49 */ 8, + /* 50 */ 8, + /* 51 */ 8, + /* 52 */ 8, + /* 53 */ 8, + /* 54 */ 8, + /* 55 */ 8, + /* 56 */ 8, + /* 57 */ 8, + /* 58 */ 8, + /* 59 */ 8, + /* 60 */ 8, + /* 61 */ 8, + /* 62 */ 8, + /* 63 */ 8, + /* 64 */ 8, + /* 65 */ 8, + /* 66 */ 8, + /* 67 */ 8, + /* 68 */ 8, + /* 69 */ 8, + /* 70 */ 8, + /* 71 */ 8, + /* 72 */ 8, + /* 73 */ 8, + /* 74 */ 8, + /* 75 */ 8, + /* 76 */ 8, + /* 77 */ 8, + /* 78 */ 8, + /* 79 */ 8, + /* 80 */ 8, + /* 81 */ 8, + /* 82 */ 8, + /* 83 */ 8, + /* 84 */ 8, + /* 85 */ 8, + /* 86 */ 8, + /* 87 */ 8, + /* 88 */ 8, + /* 89 */ 8, + /* 90 */ 8, + /* 91 */ 8, + /* 92 */ 8, + /* 93 */ 8, + /* 94 */ 8, + /* 95 */ 8, + /* 96 */ 8, + /* 97 */ 8, + /* 98 */ 8, + /* 99 */ 8, + /* 100 */ 8, + /* 101 */ 8, + /* 102 */ 8, + /* 103 */ 8, + /* 104 */ 8, + /* 105 */ 8, + /* 106 */ 8, + /* 107 */ 8, + /* 108 */ 8, + /* 109 */ 8, + /* 110 */ 8, + /* 111 */ 8, + /* 112 */ 8, + /* 113 */ 8, + /* 114 */ 8, + /* 115 */ 8, + /* 116 */ 8, + /* 117 */ 8, + /* 118 */ 8, + /* 119 */ 8, + /* 120 */ 8, + /* 121 */ 8, + /* 122 */ 8, + /* 123 */ 8, + /* 124 */ 8, + /* 125 */ 8, + /* 126 */ 8, + /* 127 */ 8, + /* 128 */ 8, + /* 129 */ 8, + /* 130 */ 8, + /* 131 */ 8, + /* 132 */ 8, + /* 133 */ 8, + /* 134 */ 8, + /* 135 */ 8, + /* 136 */ 8, + /* 137 */ 8, + /* 138 */ 8, + /* 139 */ 8, + /* 140 */ 8, + /* 141 */ 8, + /* 142 */ 8, + /* 143 */ 8, + /* 144 */ 9, + /* 145 */ 9, + /* 146 */ 9, + /* 147 */ 9, + /* 148 */ 9, + /* 149 */ 9, + /* 150 */ 9, + /* 151 */ 9, + /* 152 */ 9, + /* 153 */ 9, + /* 154 */ 9, + /* 155 */ 9, + /* 156 */ 9, + /* 157 */ 9, + /* 158 */ 9, + /* 159 */ 9, + /* 160 */ 9, + /* 161 */ 9, + /* 162 */ 9, + /* 163 */ 9, + /* 164 */ 9, + /* 165 */ 9, + /* 166 */ 9, + /* 167 */ 9, + /* 168 */ 9, + /* 169 */ 9, + /* 170 */ 9, + /* 171 */ 9, + /* 172 */ 9, + /* 173 */ 9, + /* 174 */ 9, + /* 175 */ 9, + /* 176 */ 9, + /* 177 */ 9, + /* 178 */ 9, + /* 179 */ 9, + /* 180 */ 9, + /* 181 */ 9, + /* 182 */ 9, + /* 183 */ 9, + /* 184 */ 9, + /* 185 */ 9, + /* 186 */ 9, + /* 187 */ 9, + /* 188 */ 9, + /* 189 */ 9, + /* 190 */ 9, + /* 191 */ 9, + /* 192 */ 9, + /* 193 */ 9, + /* 194 */ 9, + /* 195 */ 9, + /* 196 */ 9, + /* 197 */ 9, + /* 198 */ 9, + /* 199 */ 9, + /* 200 */ 9, + /* 201 */ 9, + /* 202 */ 9, + /* 203 */ 9, + /* 204 */ 9, + /* 205 */ 9, + /* 206 */ 9, + /* 207 */ 9, + /* 208 */ 9, + /* 209 */ 9, + /* 210 */ 9, + /* 211 */ 9, + /* 212 */ 9, + /* 213 */ 9, + /* 214 */ 9, + /* 215 */ 9, + /* 216 */ 9, + /* 217 */ 9, + /* 218 */ 9, + /* 219 */ 9, + /* 220 */ 9, + /* 221 */ 9, + /* 222 */ 9, + /* 223 */ 9, + /* 224 */ 9, + /* 225 */ 9, + /* 226 */ 9, + /* 227 */ 9, + /* 228 */ 9, + /* 229 */ 9, + /* 230 */ 9, + /* 231 */ 9, + /* 232 */ 9, + /* 233 */ 9, + /* 234 */ 9, + /* 235 */ 9, + /* 236 */ 9, + /* 237 */ 9, + /* 238 */ 9, + /* 239 */ 9, + /* 240 */ 9, + /* 241 */ 9, + /* 242 */ 9, + /* 243 */ 9, + /* 244 */ 9, + /* 245 */ 9, + /* 246 */ 9, + /* 247 */ 9, + /* 248 */ 9, + /* 249 */ 9, + /* 250 */ 9, + /* 251 */ 9, + /* 252 */ 9, + /* 253 */ 9, + /* 254 */ 9, + /* 255 */ 9, + /* 256 */ 7, + /* 257 */ 7, + /* 258 */ 7, + /* 259 */ 7, + /* 260 */ 7, + /* 261 */ 7, + /* 262 */ 7, + /* 263 */ 7, + /* 264 */ 7, + /* 265 */ 7, + /* 266 */ 7, + /* 267 */ 7, + /* 268 */ 7, + /* 269 */ 7, + /* 270 */ 7, + /* 271 */ 7, + /* 272 */ 7, + /* 273 */ 7, + /* 274 */ 7, + /* 275 */ 7, + /* 276 */ 7, + /* 277 */ 7, + /* 278 */ 7, + /* 279 */ 7, + /* 280 */ 8, + /* 281 */ 8, + /* 282 */ 8, + /* 283 */ 8, + /* 284 */ 8, + /* 285 */ 8, + /* 286 */ 8, + /* 287 */ 8, + +}; + +unsigned char fixed_dist_lengths[32] = { + + /* 0 */ 5, + /* 1 */ 5, + /* 2 */ 5, + /* 3 */ 5, + /* 4 */ 5, + /* 5 */ 5, + /* 6 */ 5, + /* 7 */ 5, + /* 8 */ 5, + /* 9 */ 5, + /* 10 */ 5, + /* 11 */ 5, + /* 12 */ 5, + /* 13 */ 5, + /* 14 */ 5, + /* 15 */ 5, + /* 16 */ 5, + /* 17 */ 5, + /* 18 */ 5, + /* 19 */ 5, + /* 20 */ 5, + /* 21 */ 5, + /* 22 */ 5, + /* 23 */ 5, + /* 24 */ 5, + /* 25 */ 5, + /* 26 */ 5, + /* 27 */ 5, + /* 28 */ 5, + /* 29 */ 5, + /* 30 */ 5, + /* 31 */ 5, + +}; + +struct litlen_tbl_t litlen_tbl[29] = { + + /* 257 */ { 3, 0 }, + /* 258 */ { 4, 0 }, + /* 259 */ { 5, 0 }, + /* 260 */ { 6, 0 }, + /* 261 */ { 7, 0 }, + /* 262 */ { 8, 0 }, + /* 263 */ { 9, 0 }, + /* 264 */ { 10, 0 }, + /* 265 */ { 11, 1 }, + /* 266 */ { 13, 1 }, + /* 267 */ { 15, 1 }, + /* 268 */ { 17, 1 }, + /* 269 */ { 19, 2 }, + /* 270 */ { 23, 2 }, + /* 271 */ { 27, 2 }, + /* 272 */ { 31, 2 }, + /* 273 */ { 35, 3 }, + /* 274 */ { 43, 3 }, + /* 275 */ { 51, 3 }, + /* 276 */ { 59, 3 }, + /* 277 */ { 67, 4 }, + /* 278 */ { 83, 4 }, + /* 279 */ { 99, 4 }, + /* 280 */ { 115, 4 }, + /* 281 */ { 131, 5 }, + /* 282 */ { 163, 5 }, + /* 283 */ { 195, 5 }, + /* 284 */ { 227, 5 }, + /* 285 */ { 258, 0 }, + +}; + +struct dist_tbl_t dist_tbl[30] = { + + /* 0 */ { 1, 0 }, + /* 1 */ { 2, 0 }, + /* 2 */ { 3, 0 }, + /* 3 */ { 4, 0 }, + /* 4 */ { 5, 1 }, + /* 5 */ { 7, 1 }, + /* 6 */ { 9, 2 }, + /* 7 */ { 13, 2 }, + /* 8 */ { 17, 3 }, + /* 9 */ { 25, 3 }, + /* 10 */ { 33, 4 }, + /* 11 */ { 49, 4 }, + /* 12 */ { 65, 5 }, + /* 13 */ { 97, 5 }, + /* 14 */ { 129, 6 }, + /* 15 */ { 193, 6 }, + /* 16 */ { 257, 7 }, + /* 17 */ { 385, 7 }, + /* 18 */ { 513, 8 }, + /* 19 */ { 769, 8 }, + /* 20 */ { 1025, 9 }, + /* 21 */ { 1537, 9 }, + /* 22 */ { 2049, 10 }, + /* 23 */ { 3073, 10 }, + /* 24 */ { 4097, 11 }, + /* 25 */ { 6145, 11 }, + /* 26 */ { 8193, 12 }, + /* 27 */ { 12289, 12 }, + /* 28 */ { 16385, 13 }, + /* 29 */ { 24577, 13 }, + +}; diff --git a/tables.h b/tables.h new file mode 100644 index 0000000..6fb6f9c --- /dev/null +++ b/tables.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * @file tables.h + *****************************************************************************/ +#ifndef _TABLES_H +#define _TABLES_H + +#include "stdint.h" + +/* Element x contains the value of x with the bits in reverse order. */ +extern const uint8_t reverse8_tbl[UINT8_MAX + 1]; + +/* Code lengths for fixed Huffman coding of litlen and dist symbols. */ +extern unsigned char fixed_litlen_lengths[288]; +extern unsigned char fixed_dist_lengths[32]; + +/** + * Table of litlen symbol values miuns 257 with corresponding base + * length and number of extra bits. + */ +struct litlen_tbl_t { uint16_t base_len : 9, ebits : 7; }; +extern struct litlen_tbl_t litlen_tbl[29]; + +/** + * Table of dist symbol values with corresponding base distance and + * number of extra bits. + */ +struct dist_tbl_t { uint16_t base_dist, ebits; }; +extern struct dist_tbl_t dist_tbl[30]; + +#endif /* _TABLES_H */ diff --git a/unzip.c b/unzip.c new file mode 100755 index 0000000..6ac3c78 --- /dev/null +++ b/unzip.c @@ -0,0 +1,1115 @@ +/****************************************************************************** + * @file unzip.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "lib.h" +#include "report.h" +#include "stdint.h" +#include "unzip.h" +#include "vector.h" + +struct unzip_state *state = 0; +const char *program_name = 0; + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) +# include + +# include +# include + +static int make_directory (const char *path) { + + char *p = (char *) path; + + while (p && *p != '\0') { + + while (*p && *p == '/') { + p++; + } + + if (*p == '\0') { break; } + + if ((p = strchr (p, '/'))) { + *p = '\0'; + } + + if (mkdir (path, 0755) < 0) { + + if (errno != EEXIST) { + return 1; + } + + } + + if (p) { *p = '/'; } + + } + + return 0; + +} +#elif defined (_WIN32) +# include + +static int make_directory (const char *path) { + + char *p = (char *) path; + + while (p && *p != '\0') { + + while (*p && *p == '\\') { + p++; + } + + if (*p == '\0') { break; } + + if ((p = strchr (p, '\\'))) { + *p = '\0'; + } + + if (!CreateDirectory (path, 0)) { + + if (GetLastError () != ERROR_ALREADY_EXISTS) { + return 1; + } + + } + + if (p) { *p = '\\'; } + + } + + return 0; + +} +#endif + +static FILE *fp = 0; + +struct eocdr { + + uint16_t disk_nbr; /* Number of this disk. */ + + uint16_t cd_start_disk; /* Nbr. of disk with start of the CD. */ + uint16_t disk_cd_entries; /* Nbr. of CD entries on this disk. */ + uint16_t cd_entries; /* Nbr. of Centeral Directory Entiries. */ + + uint32_t cd_size; /* Centeral Directory size in bytes. */ + uint32_t cd_offset; /* Centeral Directory file offset. */ + + uint16_t comment_len; /* Archive comment length. */ + unsigned char *comment; /* Archive comment. */ + +}; + +#define EOCDR_SIGNATURE 0x504B0506 +#define EOCDR_BASE_SIZE 22 + +#define MAX_BACK_OFFSET (1024 + 100) + +static int find_eocdr (struct eocdr *r) { + + unsigned long back_offset, length, signature; + + unsigned char *buf, *p; + int ret = 0; + + fseek (fp, 0, SEEK_END); + length = ftell (fp); + + for (back_offset = 0; back_offset <= MAX_BACK_OFFSET; back_offset++) { + + if (length < EOCDR_BASE_SIZE + back_offset) { + break; + } + + fseek (fp, length - EOCDR_BASE_SIZE - back_offset, SEEK_SET); + + if (!(buf = malloc (EOCDR_BASE_SIZE))) { + + ret = -1; + break; + + } + + p = buf; + + if ((fread (buf, 1, EOCDR_BASE_SIZE, fp)) != EOCDR_BASE_SIZE) { + + free ((void *) buf); + + ret = -1; + break; + + } + + signature = array_to_integer (p, 4, 1), p += 4; + + if (signature == EOCDR_SIGNATURE) { + + r->disk_nbr = array_to_integer (p, 2, 0), p += 2; + r->cd_start_disk = array_to_integer (p, 2, 0), p += 2; + r->disk_cd_entries = array_to_integer (p, 2, 0), p += 2; + r->cd_entries = array_to_integer (p, 2, 0), p += 2; + + r->cd_size = array_to_integer (p, 4, 0), p += 4; + r->cd_offset = array_to_integer (p, 4, 0), p += 4; + + if ((r->comment_len = array_to_integer (p, 2, 0)) > back_offset) { + + free ((void *) buf); + + ret = -1; + break; + + } + + free ((void *) buf); + + if (r->comment_len > 0) { + + if ((r->comment = malloc (r->comment_len + 1))) { + + memset (r->comment, 0, r->comment_len + 1); + + if (fread (r->comment, 1, r->comment_len, fp) != r->comment_len) { + + ret = -1; + break; + + } + + } + + break; + + } + + break; + + } + + free ((void *) buf); + + } + + rewind (fp); + return ret; + +} + +struct cfh { + + uint16_t made_by_ver; /* Version made by. */ + uint16_t extrcat_ver; /* Version needed to extract. */ + uint16_t gp_flag; /* General-purpose bit flag. */ + uint16_t method; /* Compression method. */ + uint16_t mod_time; /* Modification time. */ + uint16_t mod_date; /* Modification date. */ + + uint32_t crc32; /* CRC-32 checkusm. */ + uint32_t comp_size; /* Compressed size. */ + uint32_t uncomp_size; /* Uncompressed size. */ + + uint16_t name_len; /* Filename length. */ + uint16_t extra_len; /* Extra data length. */ + uint16_t comment_len; /* Comment length. */ + uint16_t disk_nbr_start; /* Disk nbr. where file begins. */ + + uint16_t int_attrs; /* Internal file attributes. */ + uint32_t ext_attrs; /* External file attributes. */ + + uint32_t lfh_offset; /* Local File HEader offset. */ + + char *name; /* Filename. */ + char *extra; /* Extra data. */ + char *comment; /* File comment. */ + +}; + +#define CFH_SIGNATURE 0x504B0102 +#define CFH_BASE_SIZE 46 + +static int read_cfh (struct cfh *cfh) { + + unsigned char buf[CFH_BASE_SIZE], *p = buf; + uint32_t signature; + + if (fread (buf, 1, CFH_BASE_SIZE, fp) != CFH_BASE_SIZE) { + return -1; + } + + signature = array_to_integer (buf, 4, 1), p += 4; + + if (signature != CFH_SIGNATURE) { + return -1; + } + + cfh->made_by_ver = array_to_integer (p, 2, 0), p += 2; + cfh->extrcat_ver = array_to_integer (p, 2, 0), p += 2; + cfh->gp_flag = array_to_integer (p, 2, 0), p += 2; + cfh->method = array_to_integer (p, 2, 0), p += 2; + cfh->mod_time = array_to_integer (p, 2, 0), p += 2; + cfh->mod_date = array_to_integer (p, 2, 0), p += 2; + cfh->crc32 = array_to_integer (p, 4, 0), p += 4; + cfh->comp_size = array_to_integer (p, 4, 0), p += 4; + cfh->uncomp_size = array_to_integer (p, 4, 0), p += 4; + cfh->name_len = array_to_integer (p, 2, 0), p += 2; + cfh->extra_len = array_to_integer (p, 2, 0), p += 2; + cfh->comment_len = array_to_integer (p, 2, 0), p += 2; + cfh->disk_nbr_start = array_to_integer (p, 2, 0), p += 2; + cfh->int_attrs = array_to_integer (p, 2, 0), p += 2; + cfh->ext_attrs = array_to_integer (p, 4, 0), p += 4; + cfh->lfh_offset = array_to_integer (p, 4, 0), p += 4; + + if ((unsigned char *) (p - CFH_BASE_SIZE) != buf) { + + printf ("FUCKKKK\n"); + return -1; + + } + + if ((cfh->name = malloc (cfh->name_len + 1))) { + + memset (cfh->name, 0, cfh->name_len + 1); + + if (fread (cfh->name, 1, cfh->name_len, fp) != cfh->name_len) { + + free (cfh->name); + return -1; + + } + + } + + if ((cfh->extra = malloc (cfh->extra_len + 1))) { + + memset (cfh->extra, 0, cfh->extra_len + 1); + + if (fread (cfh->extra, 1, cfh->extra_len, fp) != cfh->extra_len) { + + free (cfh->extra); + free (cfh->name); + + return -1; + + } + + } + + if ((cfh->comment = malloc (cfh->comment_len + 1))) { + + memset (cfh->comment, 0, cfh->comment_len + 1); + + if (fread (cfh->comment, 1, cfh->comment_len, fp) != cfh->comment_len) { + + free (cfh->comment); + free (cfh->extra); + free (cfh->name); + + return -1; + + } + + } + + return 0; + +} + +static time_t dos2ctime (uint16_t dos_date, uint16_t dos_time) { + + struct tm tm = { 0 }; + + tm.tm_sec = (dos_time & 0x1f) * 2; /* Bits 0--4; Secs divided by 2. */ + tm.tm_min = (dos_time >> 5) & 0x3f; /* Bits 5--10; Minute. */ + tm.tm_hour = (dos_time >> 11); /* Bits 11--15; Hour (0--23). */ + + tm.tm_mday = (dos_date & 0x1f); /* Bits 0--4; Day (1--31). */ + tm.tm_mon = ((dos_date >> 5) & 0x0f) - 1; /* Bits 5--8; Month (1--12). */ + tm.tm_year = (dos_date >> 9) + 80; /* Bits 9--15; Year - 1980. */ + + tm.tm_isdst = -1; + return mktime (&tm); + +} + +static void display_comment (const char *path) { + + struct eocdr eocdr = { 0 }; + + if (find_eocdr (&eocdr)) { + + report_at (program_name, 0, REPORT_ERROR, "%s is not a valid ZIP file", path); + return; + + } + + printf ("Achive: %s\n", path); + + if (eocdr.comment) { + + printf ("%s\n", eocdr.comment); + free (eocdr.comment); + + } + +} + +static void list_zip (const char *path) { + + struct eocdr eocdr = { 0 }; + struct cfh cfh = { 0 }; + + char date[11], time[6]; + time_t ctime; + + unsigned long final_size = 0, total_files = 0, i; + + if (find_eocdr (&eocdr)) { + + report_at (program_name, 0, REPORT_ERROR, "%s is not a valid ZIP file", path); + return; + + } + + if (eocdr.disk_nbr != 0 || eocdr.cd_start_disk != 0 || eocdr.disk_cd_entries != eocdr.cd_entries) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "currently multi-volume archives aren't supported"); + return; + + } + + printf ("Achive: %s\n", path); + if (eocdr.comment) { printf ("%s\n", eocdr.comment); } + + fseek (fp, eocdr.cd_offset, SEEK_SET); + + printf (" Length Date Time Name \n"); + printf ("---------- ---------- ---------- ----------\n"); + + for (i = 0; i < eocdr.cd_entries; i++) { + + if (read_cfh (&cfh)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to process Ceneral File Header"); + break; + + } + + if (!cfh.name) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "bad filename"); + break; + + } + + ctime = dos2ctime (cfh.mod_date, cfh.mod_time); + + strftime (date, 11, "%Y-%m-%d", localtime (&ctime)); + strftime (time, 6, "%H:%M", localtime (&ctime)); + + printf ("%10d %10s %10s %s\n", cfh.uncomp_size, date, time, cfh.name); + + final_size += cfh.uncomp_size; + total_files++; + + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + + } + + printf ("---------- ---------- ---------- ----------\n"); + printf ("%10ld %10s %10s %ld files\n", final_size, "", "", total_files); + +} + +static int is_relative (const char *name, uint64_t name_len) { + + uint64_t i = 0; + + if (name_len < 1) { + return 0; + } + + if (name[0] == '/' || name[0] == '\\' || name[0] == '~') { + return 0; + } + + for (i = 0; i < name_len; i++) { + + switch (name[i]) { + + case '<': + case '>': + case ':': + case '"': + case '|': + case '?': + case '*': + + return 0; + + case '.': + + if (i + 1 < name_len && name[i + 1] == '.') { + return 0; + } + + break; + + default: + + if (name[i] < ' ') { + return 0; + } + + break; + + } + + } + + i = name_len - 1; + + if (name[-i] == ' ' || name[i] == '.') { + return 0; + } + + return 1; + +} + +struct lfh { + + uint16_t extrcat_ver; /* Version needed to extract. */ + uint16_t gp_flag; /* General-purpose bit flag. */ + uint16_t method; /* Compression method. */ + uint16_t mod_time; /* Modification time. */ + uint16_t mod_date; /* Modification date. */ + + uint32_t crc32; /* CRC-32 checkusm. */ + uint32_t comp_size; /* Compressed size. */ + uint32_t uncomp_size; /* Uncompressed size. */ + + uint16_t name_len; /* Filename length. */ + uint16_t extra_len; /* Extra data length. */ + + char *name; /* Filename. */ + unsigned char *extra; /* Extra data. */ + +}; + +#define LFH_SIGNATURE 0x504B0304 +#define LFH_BASE_SIZE 30 + +static int read_lfh (struct lfh *lfh) { + + unsigned char buf[CFH_BASE_SIZE], *p = buf; + uint32_t signature; + + if (fread (buf, 1, LFH_BASE_SIZE, fp) != LFH_BASE_SIZE) { + return -1; + } + + signature = array_to_integer (buf, 4, 1), p += 4; + + if (signature != LFH_SIGNATURE) { + return -1; + } + + lfh->extrcat_ver = array_to_integer (p, 2, 0), p += 2; + lfh->gp_flag = array_to_integer (p, 2, 0), p += 2; + lfh->method = array_to_integer (p, 2, 0), p += 2; + lfh->mod_time = array_to_integer (p, 2, 0), p += 2; + lfh->mod_date = array_to_integer (p, 2, 0), p += 2; + lfh->crc32 = array_to_integer (p, 4, 0), p += 4; + lfh->comp_size = array_to_integer (p, 4, 0), p += 4; + lfh->uncomp_size = array_to_integer (p, 4, 0), p += 4; + lfh->name_len = array_to_integer (p, 2, 0), p += 2; + lfh->extra_len = array_to_integer (p, 2, 0), p += 2; + + if ((unsigned char *) (p - LFH_BASE_SIZE) != buf) { + + printf ("FUCKKKK\n"); + return -1; + + } + + if ((lfh->name = malloc (lfh->name_len + 1))) { + + memset (lfh->name, 0, lfh->name_len + 1); + + if (fread (lfh->name, 1, lfh->name_len, fp) != lfh->name_len) { + + free (lfh->name); + return -1; + + } + + } + + if ((lfh->extra = malloc (lfh->extra_len))) { + + memset (lfh->extra, 0, lfh->extra_len); + + if (fread (lfh->extra, 1, lfh->extra_len, fp) != lfh->extra_len) { + + free (lfh->extra); + free (lfh->name); + + return -1; + + } + + } + + return 0; + +} + +static int validate_structs (struct cfh *cfh, struct lfh *lfh) { + + if (cfh->extrcat_ver != lfh->extrcat_ver) { + return -1; + } + + if (cfh->gp_flag != lfh->gp_flag) { + return -1; + } + + if (cfh->method != lfh->method) { + return -1; + } + + if (cfh->mod_date != lfh->mod_date) { + return -1; + } + + if (cfh->mod_time != lfh->mod_time) { + return -1; + } + + if (cfh->crc32 != lfh->crc32) { + return -1; + } + + if (cfh->comp_size != lfh->comp_size) { + return -1; + } + + if (cfh->uncomp_size != lfh->uncomp_size) { + return -1; + } + + if (cfh->name_len != lfh->name_len) { + return -1; + } + + if (!lfh->name || strcmp (lfh->name, cfh->name)) { + return -1; + } + + return 0; + +} + +static void extract_zip (const char *path) { + + struct eocdr eocdr = { 0 }; + + struct cfh cfh = { 0 }; + struct lfh lfh = { 0 }; + + char *temp; + int j; + + uint64_t src_used, dst_used; + uint16_t i; + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) + + char ch = '/'; + struct stat sb; + +#elif defined (_WIN32) + + char ch = '\\'; + DWORD dwAttrib; + +#endif + + unsigned char *data; + uint64_t orig_offset; + + FILE *outfile; + + if (find_eocdr (&eocdr)) { + + report_at (program_name, 0, REPORT_ERROR, "%s is not a valid ZIP file", path); + return; + + } + + if (eocdr.disk_nbr != 0 || eocdr.cd_start_disk != 0 || eocdr.disk_cd_entries != eocdr.cd_entries) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "currently multi-volume archives aren't supported"); + return; + + } + + printf ("Archive: %s\n", path); + if (eocdr.comment) { printf ("%s\n", eocdr.comment); } + + fseek (fp, eocdr.cd_offset, SEEK_SET); + + for (i = 0; i < eocdr.cd_entries; i++) { + + if (read_cfh (&cfh)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to process Ceneral File Header"); + break; + + } + + if (cfh.gp_flag & 1) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "currently encryption isn't supported"); + break; + + } + + if (!cfh.name) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "bad filename"); + break; + + } + + for (j = 0; j < state->xlist.length; j++) { + + if ((temp = xstrdup (state->xlist.data[j]))) { + + if (strchr (temp, '*')) { + + if (wild_compare (temp, cfh.name)) { + + free (temp); + break; + + } + + } else { + + if (strcmp (cfh.name, temp) == 0) { + + free (temp); + break; + + } + + } + + free (temp); + + } + + } + + if (j < state->xlist.length) { + + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + if (cfh.ext_attrs & EXT_ATTR_DIR) { + + if (state->exdir) { + + if (!(temp = malloc (strlen (state->exdir) + 1 + cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + break; + + } + + sprintf (temp, "%s%c%s", state->exdir, ch, cfh.name); + + if (!is_relative (temp + strlen (state->exdir) + 1, cfh.name_len)) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + } else { + + if (!(temp = malloc (cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + break; + + } + + sprintf (temp, "%s", cfh.name); + + } + + if (!is_relative (temp, cfh.name_len)) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + if (temp[strlen (temp) - 1] == '/') { + temp[strlen (temp) - 1] = '\0'; + } + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) + + if (!stat (temp, &sb)) { + + if (S_ISDIR (sb.st_mode)) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + report_at (program_name, 0, REPORT_ERROR, "%s exists but is not a directory", temp); + + free (temp); + break; + + } + +#elif defined (_WIN32) + + dwAttrib = GetFileAttributes (temp); + + if (dwAttrib != INVALID_FILE_ATTRIBUTES) { + + if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + report_at (program_name, 0, REPORT_ERROR, "%s exists but is not a directory", temp); + + free (temp); + break; + + } + +#endif + + printf (" creating: %s%c\n", temp, ch); + + if (make_directory (temp)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to create %s", state->exdir); + + free (temp); + break; + + } + + free (temp); + continue; + + } + + if (!is_relative (cfh.name, cfh.name_len)) { + continue; + } + + if (cfh.method != ZIP_DEFLATE) { + + report_at (program_name, 0, REPORT_ERROR, "currently only ZIP_DEFLATE is supported"); + break; + + } + + if (state->exdir) { + + if (!(temp = malloc (strlen (state->exdir) + 1 + cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + break; + + } + + sprintf (temp, "%s%c%s", state->exdir, ch, cfh.name); + + } else { + + if (!(temp = malloc (cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + break; + + } + + sprintf (temp, "%s", cfh.name); + + } + + orig_offset = ftell (fp); + + if (fseek (fp, cfh.lfh_offset, SEEK_SET)) { + + free (temp); + break; + + } + + if (read_lfh (&lfh)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to process Local File Header"); + + fseek (fp, cfh.lfh_offset, SEEK_SET); + free (temp); + + break; + + } + + if (validate_structs (&cfh, &lfh)) { + + report_at (program_name, 0, REPORT_ERROR, "Centeral File Header and Local File Header mismatch"); + fseek (fp, cfh.lfh_offset, SEEK_SET); + + free (data); + free (temp); + + break; + + } + + if (!(data = malloc (cfh.comp_size))) { + + fseek (fp, cfh.lfh_offset, SEEK_SET); + free (temp); + + break; + + } + + if (fread (data, 1, lfh.comp_size, fp) != lfh.comp_size) { + + fseek (fp, cfh.lfh_offset, SEEK_SET); + + free (data); + free (temp); + + break; + + } + + if (fseek (fp, orig_offset, SEEK_SET)) { + + free (data); + free (temp); + + break; + + } + + if (cfh.method == ZIP_DEFLATE) { + + if (!(outfile = fopen (temp, "w+b"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", temp); + remove (temp); + + free (data); + free (temp); + + break; + + } + + printf (" inflating: %s\n", temp); + + if (hwinflate (data, lfh.comp_size, &src_used, outfile, lfh.uncomp_size, &dst_used) != HWINF_OK) { + + report_at (program_name, 0, REPORT_ERROR, "failed to extract %s", temp); + fclose (outfile); + + free (data); + remove (temp); + + break; + + } + + free (data); + fclose (outfile); + + if (src_used != lfh.comp_size || dst_used != lfh.uncomp_size) { + + report_at (program_name, 0, REPORT_ERROR, "%lld, %d, %lld, %d: failed to extract %s", + src_used, lfh.comp_size, dst_used, lfh.uncomp_size, temp); + + remove (temp); + break; + + } + + } + + if (lfh.extra) { free (lfh.extra); } + if (lfh.name) { free (lfh.name); } + + memset (&lfh, 0, sizeof (lfh)); + + free (cfh.name); + free (temp); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + + } + + if (lfh.extra) { free (lfh.extra); } + if (lfh.name) { free (lfh.name); } + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + if (cfh.name) { free (cfh.name); } + +} + +int main (int argc, char **argv) { + + long i; + + if (argc && *argv) { + + char *p; + program_name = *argv; + + if ((p = strrchr (program_name, '/')) || (p = strrchr (program_name, '\\'))) { + program_name = (p + 1); + } + + } + + state = xmalloc (sizeof (*state)); + parse_args (argc, argv, 1); + + if (state->nb_files == 0) { + + report_at (program_name, 0, REPORT_ERROR, "no input files provided"); + return EXIT_FAILURE; + + } + + if (state->exdir) { + + if (state->nb_files > 1) { + + report_at (program_name, 0, REPORT_ERROR, "only on file can be unzipped when exdir is specified"); + return EXIT_FAILURE; + + } + + } + + for (i = 0; i < state->nb_files; i++) { + + if (!(fp = fopen (state->files[i], "r+b"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for reading", state->files[i]); + continue; + + } + + if (state->list) { + + list_zip (state->files[i]); + + fclose (fp); + continue; + + } + + if (state->only_comment) { + + display_comment (state->files[i]); + + fclose (fp); + continue; + + } + + if (state->exdir) { + + if (make_directory (state->exdir)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to create %s", state->exdir); + + fclose (fp); + return EXIT_FAILURE; + + } + + } + + extract_zip (state->files[i]); + fclose (fp); + + } + + return (get_error_count () > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +} diff --git a/unzip.h b/unzip.h new file mode 100755 index 0000000..15f44ff --- /dev/null +++ b/unzip.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * @file unzip.h + *****************************************************************************/ +#ifndef _UNZIP_H +#define _UNZIP_H + +#include "stdint.h" +#include "vector.h" + +struct unzip_state { + + const char **files; + long nb_files; + + const char *exdir; + int only_comment, list; + + struct vector xlist; + +}; + +extern struct unzip_state *state; +extern const char *program_name; + +#define EXT_ATTR_DIR (1U << 4) +#define EXT_ATTR_ARC (1U << 5); + +#define ZIP_DEFLATE 8 + +typedef enum { + + HWINF_OK, /* Inflation was successful. */ + HWINF_FULL, /* Not enough room in the output buffer. */ + HWINF_ERR /* Error in the input data. */ + +} inf_stat_t; + +#include +inf_stat_t hwinflate (unsigned char *src, uint64_t src_len, uint64_t *src_used, FILE *outfile, uint64_t dst_cap, uint64_t *dst_used); + +#endif /* _UNZIP_H */ diff --git a/vector.c b/vector.c new file mode 100755 index 0000000..723993a --- /dev/null +++ b/vector.c @@ -0,0 +1,54 @@ +/****************************************************************************** + * @file vector.c + *****************************************************************************/ +#include +#include + +#include "vector.h" + +extern void *xrealloc (void *__ptr, unsigned int __size); + +int vec_adjust (struct vector *vec, int length) { + + if (vec->capacity <= length) { + + if (vec->capacity == 0) { + vec->capacity = 16; + } else { + vec->capacity <<= 1; + } + + vec->data = xrealloc (vec->data, sizeof (*(vec->data)) * vec->capacity); + + } + + return 0; + +} + +void *vec_pop (struct vector *vec) { + + if (!vec || vec == NULL) { + return NULL; + } + + if (vec->length == 0) { + return NULL; + } + + return vec->data[--vec->length]; + +} + +int vec_push (struct vector *vec, void *elem) { + + int ret; + + if ((ret = vec_adjust (vec, vec->length)) != 0) { + return ret; + } + + vec->data[vec->length++] = elem; + return 0; + +} diff --git a/vector.h b/vector.h new file mode 100755 index 0000000..3da05e1 --- /dev/null +++ b/vector.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * @file vector.h + *****************************************************************************/ +#ifndef _VECTOR_H +#define _VECTOR_H + +struct vector { + + void **data; + int capacity, length; + +}; + +int vec_adjust (struct vector *vec, int length); +int vec_push (struct vector *vec, void *elem); + +void *vec_pop (struct vector *vec); + +#endif /* _VECTOR_H */