From 4dbafc573af987e4650488327d01b5f0cfa2a686 Mon Sep 17 00:00:00 2001 From: Robert Pengelly Date: Wed, 1 Oct 2025 12:26:14 +0100 Subject: [PATCH] Initial commit --- LICENSE | 24 ++ Makefile.unix | 27 ++ Makefile.w32 | 18 + README.md | 23 + bitstream.c | 86 ++++ bitstream.h | 34 ++ huffman.c | 168 ++++++++ huffman.h | 49 +++ inflate.c | 531 +++++++++++++++++++++++ lib.c | 452 ++++++++++++++++++++ lib.h | 21 + lz77.c | 104 +++++ lz77.h | 14 + report.c | 150 +++++++ report.h | 29 ++ stdint.h | 40 ++ tables.c | 664 +++++++++++++++++++++++++++++ tables.h | 30 ++ unzip.c | 1115 +++++++++++++++++++++++++++++++++++++++++++++++++ unzip.h | 41 ++ vector.c | 54 +++ vector.h | 19 + 22 files changed, 3693 insertions(+) create mode 100644 LICENSE create mode 100644 Makefile.unix create mode 100644 Makefile.w32 create mode 100644 README.md create mode 100644 bitstream.c create mode 100644 bitstream.h create mode 100644 huffman.c create mode 100644 huffman.h create mode 100644 inflate.c create mode 100755 lib.c create mode 100755 lib.h create mode 100644 lz77.c create mode 100644 lz77.h create mode 100644 report.c create mode 100644 report.h create mode 100644 stdint.h create mode 100644 tables.c create mode 100644 tables.h create mode 100755 unzip.c create mode 100755 unzip.h create mode 100755 vector.c create mode 100755 vector.h diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fdddb29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/Makefile.unix b/Makefile.unix new file mode 100644 index 0000000..2c97be1 --- /dev/null +++ b/Makefile.unix @@ -0,0 +1,27 @@ +#****************************************************************************** +# @file Makefile.unix +#****************************************************************************** +SRCDIR ?= $(CURDIR) +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -Wall -Werror -Wextra -std=c90 + +ifeq ($(OS), Windows_NT) +all: unzip.exe + +unzip.exe: unzip.c bitstream.c huffman.c inflate.c lib.c lz77.c report.c tables.c vector.c + + $(CC) $(CFLAGS) -o $@ $^ +else +all: unzip + +unzip: unzip.c bitstream.c huffman.c inflate.c lib.c lz77.c report.c tables.c vector.c + + $(CC) $(CFLAGS) -o $@ $^ +endif + +clean: + + if [ -f unzip.exe ]; then rm -rf unzip.exe; fi + if [ -f unzip ]; then rm -rf unzip; fi diff --git a/Makefile.w32 b/Makefile.w32 new file mode 100644 index 0000000..875d20c --- /dev/null +++ b/Makefile.w32 @@ -0,0 +1,18 @@ +#****************************************************************************** +# @file Makefile.w32 +#****************************************************************************** +SRCDIR ?= $(CURDIR) +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -Wall -Werror -Wextra -std=c90 + +all: unzip.exe + +clean: + + if exist unzip.exe ( del /q unzip.exe ) + if exist unzip ( del /q unzip ) + +unzip.exe: unzip.c bitstream.c huffman.c inflate.c lib.c lz77.c report.c tables.c vector.c + $(CC) $(CFLAGS) -o $@ $^ diff --git a/README.md b/README.md new file mode 100644 index 0000000..85a05d0 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +All source code is Public Domain. + +## Obtain the source code + + git clone https://git.candlhat.org/unzip.git + +## Building + + BSD: + + Make sure you have gcc and gmake installed then run gmake -f Makefile.unix. + + Linux: + + Make sure you have gcc and make installed then run make -f Makefile.unix. + + macOS: + + Make sure you have xcode command line tools installed then run make -f Makefile.unix. + + Windows: + + Make sure you have mingw installed and the location within your PATH variable then run mingw32-make.exe -f Makefile.w32. diff --git a/bitstream.c b/bitstream.c new file mode 100644 index 0000000..18f8113 --- /dev/null +++ b/bitstream.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * @file bitstream.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "bitstream.h" +#include "lib.h" +#include "stdint.h" + +int istream_init (istream_t *is, unsigned char *s, uint64_t n) { + + is->end = (is->src = s) + n; + + is->bitpos = 0; + is->bitpos_end = n * 8; + + return 0; + +} + +int istream_advance (istream_t *is, uint64_t n) { + + assert (is->bitpos <= is->bitpos_end); + + if (is->bitpos_end - is->bitpos < n) { + return 0; + } + + is->bitpos += n; + return 1; + +} + +unsigned char *istream_byte_align (istream_t *is) { + + unsigned char *byte; + + assert (is->bitpos <= is->bitpos_end && "not past end of stream"); + is->bitpos = round_up (is->bitpos, 8); + + byte = is->src + (is->bitpos / 8); + assert (byte <= is->end); + + return byte; + +} + +uint64_t istream_bits (istream_t *is) { + + unsigned char *next; + uint64_t bits, i; + +#ifdef NO_LONG_LONG + int cnt = 4; +#else + int cnt = 8; +#endif + + assert ((next = is->src + (is->bitpos / 8)) <= is->end && "cannot read past end of stream"); + + if (is->end - next >= cnt) { + + /* Common case: read 4 bytes in one go. */ + bits = array_to_integer (next, cnt, 0); + + } else { + + /* Read the available bits and zero-pad. */ + bits = 0; + + for (i = 0; i < (uint64_t) (is->end - next); i++) { + bits |= (uint64_t) next[i] << (i * CHAR_BIT); + } + + } + + return bits >> (is->bitpos % 8); + +} + +uint64_t istream_bytes_read (istream_t *is) { + return round_up (is->bitpos, 8) / 8; +} diff --git a/bitstream.h b/bitstream.h new file mode 100644 index 0000000..49c2fe4 --- /dev/null +++ b/bitstream.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * @file bitstream.h + *****************************************************************************/ +#ifndef _BITSTREAM_H +#define _BITSTREAM_H + +#include "stdint.h" + +/* Input bitstream. */ +typedef struct { + + unsigned char *src; /* Source bytes. */ + unsigned char *end; /* Past-the-end byte of src*/ + + uint64_t bitpos; /* Position of the next bit to read. */ + uint64_t bitpos_end; /* Position of past-the-end bit. */ + +} istream_t; + +#ifdef NO_LONG_LONG +# define ISTREAM_MIN_BITS (32 - 7) +#else +# define ISTREAM_MIN_BITS (64 - 7) +#endif + +unsigned char *istream_byte_align (istream_t *is); + +int istream_init (istream_t *is, unsigned char *s, uint64_t n); +int istream_advance (istream_t *is, uint64_t n); + +uint64_t istream_bits (istream_t *is); +uint64_t istream_bytes_read (istream_t *is); + +#endif /* _BITSTREAM_H */ diff --git a/huffman.c b/huffman.c new file mode 100644 index 0000000..319e9f3 --- /dev/null +++ b/huffman.c @@ -0,0 +1,168 @@ +/****************************************************************************** + * @file huffman.c + *****************************************************************************/ +#include +#include + +#include "huffman.h" +#include "lib.h" +#include "stdint.h" +#include "tables.h" + +static uint16_t reverse16 (uint16_t x, int n) { + + uint16_t reversed, lo, hi; + + assert (n > 0); + assert (n <= 16); + + lo = x & UCHAR_MAX; + hi = x >> CHAR_BIT; + + reversed = (uint16_t) ((reverse8_tbl[lo] << CHAR_BIT) | reverse8_tbl[hi]); + return reversed >> (16 - n); + +} + +static void table_insert (huffman_decoder_t *d, uint64_t sym, uint64_t len, uint16_t codeword) { + + uint16_t padding, index; + int pad_len; + + assert (len <= HUFFMAN_LOOKUP_TABLE_BITS); + + codeword = reverse16 (codeword, len); + pad_len = HUFFMAN_LOOKUP_TABLE_BITS - len; + + /* Pad the pad_len upper bits with all bit combinations. */ + for (padding = 0; padding < (1U << pad_len); padding++) { + + index = (uint16_t) (codeword | (padding << len)); + + d->table[index].sym = (uint16_t) sym; + d->table[index].len = (uint16_t) len; + + assert (d->table[index].sym == sym && "fits in bitfield"); + assert (d->table[index].len == len && "fits in bitfield"); + + } + +} + +int huffman_decoder_init (huffman_decoder_t *d, unsigned char *lengths, uint64_t n) { + + uint16_t count[MAX_HUFFMAN_BITS + 1] = { 0 }; + uint16_t code[MAX_HUFFMAN_BITS + 1]; + uint16_t sym_idx[MAX_HUFFMAN_BITS + 1]; + + uint64_t i, l; + uint32_t s; + + assert (n <= MAX_HUFFMAN_SYMBOLS); + d->num_syms = n; + + /* Zero-initialize the lookup table. */ + for (i = 0; i < sizeof (d->table) / sizeof (d->table[0]); i++) { + d->table[i].len = 0; + } + + /* Count the number of codewords of each length. */ + for (i = 0; i < n; i++) { + + assert (lengths[i] <= MAX_HUFFMAN_BITS); + count[lengths[i]]++; + + } + + count[0] = 0; /* Ignore zero-length codeword. */ + + /* Compute sentinel bits and offset first sym_idx for each length. */ + code[0] = 0; + sym_idx[0] = 0; + + for (l = 1; l <= MAX_HUFFMAN_BITS; l++) { + + /* First canonical codeword of this length. */ + code[l] = (uint16_t) ((code[l - 1] + count[l - 1]) << 1); + + if (count[l] != 0 && code[l] + count[l] - 1 > (1 << l) - 1) { + + /* The last codeword is longer than l bits. */ + return 0; + + } + + s = (uint32_t) ((code[l] + count[l]) << (MAX_HUFFMAN_BITS - l)); + + d->sentinel_bits[l] = s; + assert (d->sentinel_bits[l] >= code[l] && "no overflow!"); + + sym_idx[l] = sym_idx[l - 1] + count[l - 1]; + d->offset_first_sym_idx[l] = sym_idx[l] - code[l]; + + } + + /* Build mapping from index to symbol and populate the loopup table. */ + for (i = 0; i < n; i++) { + + if ((l = lengths[i]) == 0) { + continue; + } + + d->syms[sym_idx[l]] = (uint16_t) i; + sym_idx[l]++; + + if (l <= HUFFMAN_LOOKUP_TABLE_BITS) { + + table_insert (d, i, l, code[l]); + code[l]++; + + } + + } + + return 1; + +} + +int huffman_decode (const huffman_decoder_t *d, uint16_t bits, uint64_t *num_used_bits) { + + uint64_t lookup_bits, sym_idx, l; + + /* First try the lookup table. */ + lookup_bits = lsb (bits, HUFFMAN_LOOKUP_TABLE_BITS); + assert (lookup_bits < sizeof (d->table) / sizeof (d->table[0])); + + if (d->table[lookup_bits].len != 0) { + + assert (d->table[lookup_bits].len <= HUFFMAN_LOOKUP_TABLE_BITS); + assert (d->table[lookup_bits].sym < d->num_syms); + + *num_used_bits = d->table[lookup_bits].len; + return d->table[lookup_bits].sym; + + } + + /* Then do canonical decoding with the bits in MSB-first order. */ + bits = reverse16 (bits, MAX_HUFFMAN_BITS); + + for (l = HUFFMAN_LOOKUP_TABLE_BITS + 1; l <= MAX_HUFFMAN_BITS; l++) { + + if (bits < d->sentinel_bits[l]) { + + bits >>= MAX_HUFFMAN_BITS - l; + + sym_idx = (uint16_t) (d->offset_first_sym_idx[l] + bits); + assert (sym_idx < d->num_syms); + + *num_used_bits = l; + return d->syms[sym_idx]; + + } + + } + + *num_used_bits = 0; + return -1; + +} diff --git a/huffman.h b/huffman.h new file mode 100644 index 0000000..8a8c881 --- /dev/null +++ b/huffman.h @@ -0,0 +1,49 @@ +/****************************************************************************** + * @file huffman.h + *****************************************************************************/ +#ifndef _HUFFMAN_H +#define _HUFFMAN_H + +#include "stdint.h" + +#define MAX_HUFFMAN_SYMBOLS 288 /* Deflate uses max 288 symbols. */ +#define MAX_HUFFMAN_BITS 16 /* Implode uses max 16-bit codewords. */ +#define HUFFMAN_LOOKUP_TABLE_BITS 8 /* Seems a good trade-off. */ + +typedef struct { + + uint64_t num_syms; + + /* Lookup table for fast deocding of short codewords. */ + struct { + + uint16_t sym : 9; /* Wide enough to fit the max symbol mbr. */ + uint16_t len : 7; /* 0 means no symbol. */ + + } table[1U << HUFFMAN_LOOKUP_TABLE_BITS]; + + /* "Sentinel bits" value for each codeword length. */ + uint32_t sentinel_bits[MAX_HUFFMAN_BITS + 1]; + + /* First symbol index minus first codeword mod 2**16 for each length. */ + uint16_t offset_first_sym_idx[MAX_HUFFMAN_BITS + 1]; + + /* Map from symbol index to symbol. */ + uint16_t syms[MAX_HUFFMAN_SYMBOLS]; + +} huffman_decoder_t; + +/** + * Initialize huffman decoder d for a code defined by the n codeword lengths. + * Returns false if the codeword lengths do not correspond to a valid prefix code. + */ +int huffman_decoder_init (huffman_decoder_t *d, unsigned char *lengths, uint64_t n); + +/** + * Use the decoder d to decode a symbol from the LSB-first zero-padded bits. + * Returns the decoded symbol number or -1 if no symbol could be decoded. + * *num_used_bits will be set to the number of bits used to decode the symbol, or zero if no symbol could be decoded. + */ +int huffman_decode (const huffman_decoder_t *d, uint16_t bits, uint64_t *num_used_bits); + +#endif /* _HUFFMAN_H */ diff --git a/inflate.c b/inflate.c new file mode 100644 index 0000000..eebdd2b --- /dev/null +++ b/inflate.c @@ -0,0 +1,531 @@ +/****************************************************************************** + * @file inflate.c + *****************************************************************************/ +#include +#include +#include + +#include "bitstream.h" +#include "huffman.h" +#include "lib.h" +#include "lz77.h" +#include "stdint.h" +#include "tables.h" +#include "unzip.h" + +#define LITLEN_TBL_OFFSET 257 + +#define LITLEN_MAX 285 +#define LITLEN_EOB 256 + +#define MAX_LEN 258 +#define MIN_LEN 3 + +#define DISTSYM_MAX 29 + +#define MAX_DISTANCE 32768 +#define MIN_DISTANCE 1 + +#define MAX_CODELEN_LENS 19 +#define MIN_CODELEN_LENS 4 + +#define MAX_DIST_LENS 32 +#define MIN_DIST_LENS 1 + +#define MAX_LITLEN_LENS 288 +#define MIN_LITLEN_LENS 257 + +#define CODELEN_MAX_LIT 15 + +#define CODELEN_COPY 16 +#define CODELEN_COPY_MAX 6 +#define CODELEN_COPY_MIN 3 + +#define CODELEN_ZEROS 17 +#define CODELEN_ZEROS_MAX 10 +#define CODELEN_ZEROS_MIN 3 + +#define CODELEN_ZEROS2_MAX 138 +#define CODELEN_ZEROS2 18 +#define CODELEN_ZEROS2_MIN 11 + +/* RFC 1951, 3.2.7 */ +static const int codelen_lengths_order[MAX_CODELEN_LENS] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static inf_stat_t inf_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos, huffman_decoder_t *litlen_dec, huffman_decoder_t *dist_dec) { + + uint64_t bits, used, dist, len; + uint16_t ebits; + + int litlen, distsym; + +#ifndef NO_LONG_LONG + uint64_t used_tot; +#endif + + for (;;) { + + /* Read a litlen symbol. */ + bits = istream_bits (is); + + litlen = huffman_decode (litlen_dec, (uint16_t) bits, &used); + /*printf ("litlen: %d\n", litlen);*/ + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, used)) { + return HWINF_ERR; + } + +#else + + bits >>= used; + used_tot = used; + +#endif + + if (litlen < 0 || litlen > LITLEN_MAX) { + + /* Failed to decode, or invalid symbol. */ + return HWINF_ERR; + + } else if (litlen <= UINT8_MAX) { + + /* Literal. */ +#ifndef NO_LONG_LONG + + if (!istream_advance (is, used_tot)) { + return HWINF_ERR; + } + +#endif + + if (*dst_pos == dst_cap) { + return HWINF_FULL; + } + + if (lz77_output_lit (outfile, (*dst_pos)++, (uint8_t) litlen)) { + return HWINF_ERR; + } + + continue; + + } else if (litlen == LITLEN_EOB) { + + /* End of block. */ + +#ifndef NO_LONG_LONG + + if (!istream_advance (is, used_tot)) { + return HWINF_ERR; + } + +#endif + + return HWINF_OK; + + } + + assert (litlen >= LITLEN_TBL_OFFSET && litlen <= LITLEN_MAX); + + /* It is a back reference. Figure out the length.*/ + len = litlen_tbl[litlen - LITLEN_TBL_OFFSET].base_len; + + if ((ebits = litlen_tbl[litlen - LITLEN_TBL_OFFSET].ebits) != 0) { + +#ifdef NO_LONG_LONG + bits = istream_bits (is); +#endif + + len += lsb (bits, ebits); + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, ebits)) { + return HWINF_ERR; + } + +#else + + bits >>= ebits; + used_tot += ebits; + +#endif + + } + + assert (len >= MIN_LEN && len <= MAX_LEN); + + /* Get the distance. */ +#ifdef NO_LONG_LONG + bits = istream_bits (is); +#endif + + distsym = huffman_decode (dist_dec, (uint16_t) bits, &used); + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, used)) { + return HWINF_ERR; + } + +#else + + bits >>= used; + used_tot += used; + +#endif + + if (distsym < 0 || distsym > DISTSYM_MAX) { + return HWINF_ERR; + } + + dist = dist_tbl[distsym].base_dist; + + if ((ebits = dist_tbl[distsym].ebits) != 0) { + +#ifdef NO_LONG_LONG + bits = istream_bits (is); +#endif + + dist += lsb (bits, ebits); + +#ifdef NO_LONG_LONG + + if (!istream_advance (is, ebits)) { + return HWINF_ERR; + } + +#else + + bits >>= ebits; + used_tot += ebits; + +#endif + + } + + assert (dist >= MIN_DISTANCE && dist <= MAX_DISTANCE); + +#ifndef NO_LONG_LONG + + assert (used_tot <= ISTREAM_MIN_BITS); + + if (!istream_advance (is, used_tot)) { + return HWINF_ERR; + } + +#endif + + /* Bounds check and output the backref. */ + if (dist > *dst_pos) { + return HWINF_ERR; + } + +#ifdef NO_LONG_LONG + if (round_up (len, 4) <= dst_cap - *dst_pos) { +#else + if (round_up (len, 8) <= dst_cap - *dst_pos) { +#endif + + if (lz77_output_backref64 (outfile, *dst_pos, dist, len)) { + return HWINF_ERR; + } + + } else if (len <= dst_cap - *dst_pos) { + + if (lz77_output_backref (outfile, *dst_pos, dist, len)) { + return HWINF_ERR; + } + + } else { + return HWINF_FULL; + } + + (*dst_pos) += len; + + } + +} + +static inf_stat_t init_dyn_decoders (istream_t *is, huffman_decoder_t *litlen_dec, huffman_decoder_t *dist_dec) { + + uint64_t num_litlen_lens, num_dist_lens, num_codelen_lens; + uint64_t i, n, used; + + unsigned char code_lengths[MAX_LITLEN_LENS + MAX_DIST_LENS]; + unsigned char codelen_lengths[MAX_CODELEN_LENS]; + + huffman_decoder_t codelen_dec; + int sym; + + uint64_t bits = istream_bits (is); + + /* Number of litlen codeword lengths (5 bits + 257). */ + num_litlen_lens = (uint64_t) (lsb (bits, 5) + MIN_LITLEN_LENS); + bits >>= 5; + + assert (num_litlen_lens <= MAX_LITLEN_LENS); + + /* Number of codeword lengths (5 bits + 1). */ + num_dist_lens = (uint64_t) (lsb (bits, 5) + MIN_DIST_LENS); + bits >>= 5; + + assert (num_dist_lens <= MAX_DIST_LENS); + + /* Number of code length lengths (4 bits + 4). */ + num_codelen_lens = (uint64_t) (lsb (bits, 4) + MIN_CODELEN_LENS); + bits >>= 4; + + assert (num_codelen_lens <= MAX_CODELEN_LENS); + + if (!istream_advance (is, 5 + 5 + 4)) { + return HWINF_ERR; + } + + /** + * Read the codelen codeword lengths (3 bits each) + * and initialize the codelen decoder. + */ + for (i = 0; i < num_codelen_lens; i++) { + + bits = istream_bits (is); + codelen_lengths[codelen_lengths_order[i]] = (unsigned char) lsb (bits, 3); + + if (!istream_advance (is, 3)) { + return HWINF_ERR; + } + + } + + for (; i < MAX_CODELEN_LENS; i++) { + codelen_lengths[codelen_lengths_order[i]] = 0; + } + + if (!huffman_decoder_init (&codelen_dec, codelen_lengths, MAX_CODELEN_LENS)) { + return HWINF_ERR; + } + + /* Read the litlen and dist codeword lengths. */ + i = 0; + + while (i < num_litlen_lens + num_dist_lens) { + + bits = istream_bits (is); + + sym = huffman_decode (&codelen_dec, (uint16_t) bits, &used); + bits >>= used; + + if (!istream_advance (is, used)) { + return HWINF_ERR; + } + + if (sym >= 0 && sym <= CODELEN_MAX_LIT) { + + /* A literal codeword length. */ + code_lengths[i++] = (unsigned char) sym; + + } else if (sym == CODELEN_COPY) { + + /* Copy the previous codeword length 3--6 times. */ + if (i < 1) { + return HWINF_ERR; /* No previous length. */ + } + + /* 2 bits + 3 */ + n = (uint64_t) lsb (bits, 2) + CODELEN_COPY_MIN; + + if (!istream_advance (is, 2)) { + return HWINF_ERR; + } + + assert (n >= CODELEN_COPY_MIN && n <= CODELEN_COPY_MAX); + + if (i + n > num_litlen_lens + num_dist_lens) { + return HWINF_ERR; + } + + while (n--) { + + code_lengths[i] = code_lengths[i - 1]; + i++; + + } + + } else if (sym == CODELEN_ZEROS) { + + /* 3--10 zeros; 3 bits + 3 */ + n = (uint64_t) (lsb (bits, 3) + CODELEN_ZEROS_MIN); + + if (!istream_advance (is, 3)) { + return HWINF_ERR; + } + + assert (n >= CODELEN_ZEROS_MIN && n <= CODELEN_ZEROS_MAX); + + if (i + n > num_litlen_lens + num_dist_lens) { + return HWINF_ERR; + } + + while (n--) { code_lengths[i++] = 0; } + + } else if (sym == CODELEN_ZEROS2) { + + /* 11--138 zeros; 7 bits + 11 */ + n = (uint64_t) (lsb (bits, 7) + CODELEN_ZEROS2_MIN); + + if (!istream_advance (is, 7)) { + return HWINF_ERR; + } + + assert (n >= CODELEN_ZEROS2_MIN && n <= CODELEN_ZEROS2_MAX); + + if (i + n > num_litlen_lens + num_dist_lens) { + return HWINF_ERR; + } + + while (n--) { code_lengths[i++] = 0; } + + } else { + + /* Invalid symbol. */ + return HWINF_ERR; + + } + + } + + if (!huffman_decoder_init (litlen_dec, &code_lengths[0], num_litlen_lens)) { + return HWINF_ERR; + } + + if (!huffman_decoder_init (dist_dec, &code_lengths[num_litlen_lens], num_dist_lens)) { + return HWINF_ERR; + } + + return HWINF_OK; + +} + +static inf_stat_t inf_dyn_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos) { + + huffman_decoder_t litlen_dec, dist_dec; + inf_stat_t stat; + + if ((stat = init_dyn_decoders (is, &litlen_dec, &dist_dec)) != HWINF_OK) { + return stat; + } + + return inf_block (is, outfile, dst_cap, dst_pos, &litlen_dec, &dist_dec); + +} + +static inf_stat_t inf_fixed_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos) { + + huffman_decoder_t litlen_dec, dist_dec; + + huffman_decoder_init (&litlen_dec, fixed_litlen_lengths, sizeof (fixed_litlen_lengths) / sizeof (fixed_litlen_lengths[0])); + huffman_decoder_init (&dist_dec, fixed_dist_lengths, sizeof (fixed_dist_lengths) / sizeof (fixed_dist_lengths[0])); + + return inf_block (is, outfile, dst_cap, dst_pos, &litlen_dec, &dist_dec); + +} + +static inf_stat_t inf_noncomp_block (istream_t *is, FILE *outfile, uint64_t dst_cap, uint64_t *dst_pos) { + + unsigned char *p; + uint16_t len, nlen; + + p = istream_byte_align (is); + + /* Read len and nlen (2 x 16 bits). */ + if (!istream_advance (is, 32)) { + return HWINF_ERR; /* Not enough input. */ + } + + len = array_to_integer (p, 2, 0), p += 2; + nlen = array_to_integer (p, 2, 0), p += 2; + + if (nlen != (-len & 0xffff)) { + return HWINF_ERR; + } + + if (!istream_advance (is, len * 8)) { + return HWINF_ERR; /* Not enough input. */ + } + + if (dst_cap - *dst_pos < len) { + return HWINF_ERR; /* Not enough room to output. */ + } + + if (fwrite (p, 1, len, outfile) != len) { + return HWINF_ERR; /* Something went wrong. */ + } + + *dst_pos += len; + return HWINF_OK; + +} + +inf_stat_t hwinflate (unsigned char *src, uint64_t src_len, uint64_t *src_used, FILE *outfile, uint64_t dst_cap, uint64_t *dst_used) { + + inf_stat_t stat; + istream_t is; + + uint64_t dst_pos, bits; + int bfinal; + + if (istream_init (&is, src, src_len)) { + return HWINF_ERR; + } + + dst_pos = 0; + + do { + + bits = istream_bits (&is); + + if (!istream_advance (&is, 3)) { + return HWINF_ERR; + } + + bfinal = bits & 1; + bits >>= 1; + + switch (lsb (bits, 2)) { + + case 0: /* No compression. */ + + stat = inf_noncomp_block (&is, outfile, dst_cap, &dst_pos); + break; + + case 1: /* Compressed with fixed Huffman codes. */ + + stat = inf_fixed_block (&is, outfile, dst_cap, &dst_pos); + break; + + case 2: /* Compressed with "dynamic Huffman codes. */ + + stat = inf_dyn_block (&is, outfile, dst_cap, &dst_pos); + break; + + default: + + return HWINF_ERR; + + } + + if (stat != HWINF_OK) { + return stat; + } + + } while (!bfinal); + + *src_used = istream_bytes_read (&is); + + assert (dst_pos <= dst_cap); + *dst_used = dst_pos; + + return HWINF_OK; + +} diff --git a/lib.c b/lib.c new file mode 100755 index 0000000..491432c --- /dev/null +++ b/lib.c @@ -0,0 +1,452 @@ +/****************************************************************************** + * @file lib.c + *****************************************************************************/ +#include +#include +#include +#include +#include +#include + +#include "lib.h" +#include "report.h" +#include "stdint.h" +#include "unzip.h" +#include "vector.h" + +#define OPTION_COMMENT 0x0001 +#define OPTION_DIRECTORY 0x0002 +#define OPTION_EXCLUDE 0x0003 +#define OPTION_HELP 0x0004 +#define OPTION_LIST 0x0005 + +struct option { + + const char *name; + int index, flags; + +}; + +#define OPTION_NO_ARG 0x0001 +#define OPTION_HAS_ARG 0x0002 + +static struct option opts[] = { + + { "--directory", OPTION_DIRECTORY, OPTION_HAS_ARG }, + { "-d", OPTION_DIRECTORY, OPTION_HAS_ARG }, + + { "--comment", OPTION_COMMENT, OPTION_NO_ARG }, + { "-z", OPTION_COMMENT, OPTION_NO_ARG }, + + { "--exclude", OPTION_EXCLUDE, OPTION_NO_ARG }, + { "-x", OPTION_EXCLUDE, OPTION_NO_ARG }, + + { "--list", OPTION_LIST, OPTION_NO_ARG }, + { "-l", OPTION_LIST, OPTION_NO_ARG }, + + { "--help", OPTION_HELP, OPTION_NO_ARG }, + { 0, 0, 0 } + +}; + +static int strstart (const char *val, const char **str) { + + const char *p = val; + const char *q = *str; + + while (*p != '\0') { + + if (*p != *q) { + return 0; + } + + ++p; + ++q; + + } + + *str = q; + return 1; + +} + +static void print_help (void) { + + if (program_name) { + + fprintf (stderr, "Usage: %s [opts] file... [-x xlist]\n\n", program_name); + fprintf (stderr, "Options:\n\n"); + + fprintf (stderr, " -l List files -z Display archive comment only.\n"); + fprintf (stderr, "\n"); + + fprintf (stderr, " -d exdir Extract files into exdir.\n"); + fprintf (stderr, " -x xlist Exclude files that follow.\n"); + fprintf (stderr, "\n"); + + fprintf (stderr, " --help Show this help information then exit.\n"); + + } + + exit (EXIT_SUCCESS); + +} + +static void dynarray_add (void *ptab, long *nb_ptr, void *data) { + + int nb, nb_alloc; + void **pp; + + nb = *nb_ptr; + pp = *(void ***) ptab; + + if ((nb & (nb - 1)) == 0) { + + if (!nb) { + nb_alloc = 1; + } else { + nb_alloc = nb * 2; + } + + pp = xrealloc (pp, nb_alloc * sizeof (void *)); + *(void ***) ptab = pp; + + } + + pp[nb++] = data; + *nb_ptr = nb; + +} + +uint64_t array_to_integer (unsigned char *arr, int size, int bigendian) { + + uint64_t val = 0; + int i; + + if (bigendian) { + + int j; + + for (i = size, j = 0; i > 0; i--, j++) { + val |= (uint64_t) arr[j] << (CHAR_BIT * (i - 1)); + } + + } else { + + for (i = 0; i < size; i++) { + val |= (uint64_t) arr[i] << (CHAR_BIT * i); + } + + } + + return val; + +} + +uint64_t lsb (uint64_t x, uint64_t n) { + +#ifdef NO_LONG_LONG + + assert (n <= 31); + return x & (((uint32_t) 1 << n) - 1); + +#else + + assert (n <= 63); + return x & (((uint64_t) 1 << n) - 1); + +#endif + +} + +uint64_t round_up (uint64_t x, uint64_t m) { + + assert ((m & (m - 1)) == 0 && "m must be a power of two"); + return (x + m - 1) & (uint64_t) (-m); + +} + +int wild_compare (const char *wild, const char *s) { + + const char *cp = 0, *mp = 0; + + while (*s && (*wild != '*')) { + + if (*wild != *s) { + return 0; + } + + wild++; + s++; + + } + + while (*s) { + + if (*wild == '*') { + + if (!*++wild) { + return 1; + } + + mp = wild; + cp = s + 1; + + } else if (*wild == *s) { + + wild++; + s++; + + } else { + + wild = mp; + s = cp++; + + } + + } + + while (*wild == '*') { + wild++; + } + + return !*wild; + +} + +void parse_args (int argc, char **argv, int optind) { + + struct option *popt; + const char *optarg, *r; + + if (argc <= optind) { + print_help (); + } + + while (optind < argc) { + + r = argv[optind++]; + + if (r[0] != '-' || r[1] == '\0') { + + dynarray_add (&state->files, &state->nb_files, xstrdup (r)); + continue; + + } + + for (popt = opts; popt; popt++) { + + const char *p1 = popt->name; + const char *r1 = r; + + if (!p1) { + + report_at (program_name, 0, REPORT_ERROR, "invalid option -- '%s'", r); + exit (EXIT_FAILURE); + + } + + if (!strstart (p1, &r1)) { + continue; + } + + optarg = r1; + + if (popt->flags & OPTION_HAS_ARG) { + + if (*optarg == '\0') { + + if (optind >= argc) { + + report_at (program_name, 0, REPORT_ERROR, "argument to '%s' is missing", r); + exit (EXIT_FAILURE); + + } + + optarg = argv[optind++]; + + } + + } else if (*optarg != '\0') { + continue; + } + + break; + + } + + switch (popt->index) { + + case OPTION_COMMENT: { + + state->only_comment = 1; + break; + + } + + case OPTION_DIRECTORY: { + + uint64_t len, i; + + if (state->exdir) { + + report_at (program_name, 0, REPORT_ERROR, "-d option used more than once (only one exdir allowed)"); + exit (EXIT_FAILURE); + + } + + len = strlen (state->exdir = xstrdup (optarg)); + + for (i = 0; i < len; i++) { + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) + + if (state->exdir[i] == '\\') { + ((char *) state->exdir)[i] = '/'; + } + +#elif defined (_WIN32) + + if (state->exdir[i] == '/') { + ((char *) state->exdir)[i] = '\\'; + } + +#endif + + } + + break; + + } + + case OPTION_EXCLUDE: { + + char *arg, *copy, *p; + int i; + + for (; optind < argc; optind++) { + + optarg = argv[optind++]; + + if (!optarg || !*optarg) { + continue; + } + + arg = (copy = xstrdup (optarg)); + + while (arg && *arg != '\0') { + + if (isspace ((int) *arg)) { + + while (*arg != '\0') { + + if (!isspace ((int) *arg)) { + break; + } + + arg++; + + } + + continue; + + } + + if ((p = strchr (arg, ' '))) { + *p++ = '\0'; + } + + for (i = 0; i < state->xlist.length; i++) { + + if (strcmp (state->xlist.data[i], arg) == 0) { + break; + } + + } + + if (i < state->xlist.length) { + + arg = p; + continue; + + } + + vec_push (&state->xlist, xstrdup (arg)); + arg = p; + + } + + free (copy); + + } + + break; + + } + + case OPTION_HELP: { + + print_help (); + break; + + } + + case OPTION_LIST: { + + state->list = 1; + break; + + } + + default: { + + report_at (program_name, 0, REPORT_ERROR, "unsupported option '%s'", r); + exit (EXIT_FAILURE); + + } + + } + + } + +} + +char *xstrdup (const char *str) { + + char *ptr = xmalloc (strlen (str) + 1); + strcpy (ptr, str); + + return ptr; + +} + +void *xmalloc (unsigned long size) { + + void *ptr = malloc (size); + + if (ptr == NULL && size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (malloc)"); + exit (EXIT_FAILURE); + + } + + memset (ptr, 0, size); + return ptr; + +} + +void *xrealloc (void *ptr, unsigned long size) { + + void *new_ptr = realloc (ptr, size); + + if (new_ptr == NULL && size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (realloc)"); + exit (EXIT_FAILURE); + + } + + return new_ptr; + +} diff --git a/lib.h b/lib.h new file mode 100755 index 0000000..6746a5e --- /dev/null +++ b/lib.h @@ -0,0 +1,21 @@ +/****************************************************************************** + * @file lib.h + *****************************************************************************/ +#ifndef _LIB_H +#define _LIB_H + + +char *xstrdup (const char *str); +int wild_compare (const char *wild, const char *s); + +void *xmalloc (unsigned long size); +void *xrealloc (void *ptr, unsigned long size); + +void parse_args (int argc, char **argv, int optind); + +#include "stdint.h" +uint64_t array_to_integer (unsigned char *arr, int size, int bigendian); +uint64_t lsb (uint64_t x, uint64_t n); +uint64_t round_up (uint64_t x, uint64_t m); + +#endif /* _LIB_H */ diff --git a/lz77.c b/lz77.c new file mode 100644 index 0000000..265b4c2 --- /dev/null +++ b/lz77.c @@ -0,0 +1,104 @@ +/****************************************************************************** + * @file lz77.c + *****************************************************************************/ +#include +#include +#include + +#include "lz77.h" +#include "stdint.h" + +int lz77_output_lit (FILE *fp, uint64_t dst_pos, unsigned char lit) { + + if (fseek (fp, dst_pos, SEEK_SET)) { + return -1; + } + + if (fwrite (&lit, 1, 1, fp) != 1) { + return -1; + } + + return 0; + +} + +int lz77_output_backref (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len) { + + char byte; + uint64_t i; + + assert (dist <= dst_pos && "cannot reference before beginning of dst"); + + for (i = 0; i < len; i++) { + + if (fseek (fp, dst_pos - dist, SEEK_SET)) { + return -1; + } + + if (fread (&byte, 1, 1, fp) != 1) { + return -1; + } + + if (fseek (fp, dst_pos, SEEK_SET)) { + return -1; + } + + if (fwrite (&byte, 1, 1, fp) != 1) { + return -1; + } + + dst_pos++; + + } + + return 0; + +} + +int lz77_output_backref64 (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len) { + + uint64_t temp = 0, read, inc, i; + + assert (len > 0); + assert (dist <= dst_pos && "cannot reference before beginning of dst"); + + if (len > dist) { + + /* Self-overlapping backref, fall back to byte-by-byte copy. */ + return lz77_output_backref (fp, dst_pos, dist, len); + + } + +#ifdef NO_LONG_LONG + inc = 4; +#else + inc = 8; +#endif + + i = 0; + + do { + + if (fseek (fp, dst_pos - dist + i, SEEK_SET)) { + return -1; + } + + if (!(read = fread (&temp, 1, inc, fp))) { + return -1; + } + + if (fseek (fp, dst_pos + i, SEEK_SET)) { + return -1; + } + + if (fwrite (&temp, 1, read, fp) != read) { + return -1; + } + + i += inc; + + } while (i < len); + + return 0; + +} diff --git a/lz77.h b/lz77.h new file mode 100644 index 0000000..e7349e7 --- /dev/null +++ b/lz77.h @@ -0,0 +1,14 @@ +/****************************************************************************** + * @file lz77.h + *****************************************************************************/ +#ifndef _LZ77_H +#define _LZ77_H + +#include "stdint.h" + +#include +int lz77_output_lit (FILE *fp, uint64_t dst_pos, unsigned char lit); +int lz77_output_backref (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len); +int lz77_output_backref64 (FILE *fp, uint64_t dst_pos, uint64_t dist, uint64_t len); + +#endif /* _LZ77_H */ diff --git a/report.c b/report.c new file mode 100644 index 0000000..8e128ef --- /dev/null +++ b/report.c @@ -0,0 +1,150 @@ +/****************************************************************************** + * @file report.c + *****************************************************************************/ +#include +#include +#include + +#include "report.h" + +unsigned long errors = 0; + +#ifndef __PDOS__ +#if defined (_WIN32) +# include +static int OriginalConsoleColor = -1; +#endif + +static void reset_console_color (void) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + + if (OriginalConsoleColor == -1) { return; } + + SetConsoleTextAttribute (hStdError, OriginalConsoleColor); + OriginalConsoleColor = -1; + +#else + + fprintf (stderr, "\033[0m"); + +#endif + +} + +static void set_console_color (int color) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + WORD wColor; + + if (OriginalConsoleColor == -1) { + + CONSOLE_SCREEN_BUFFER_INFO csbi; + + if (!GetConsoleScreenBufferInfo (hStdError, &csbi)) { + return; + } + + OriginalConsoleColor = csbi.wAttributes; + + } + + wColor = (OriginalConsoleColor & 0xF0) + (color & 0xF); + SetConsoleTextAttribute (hStdError, wColor); + +#else + + fprintf (stderr, "\033[%dm", color); + +#endif + +} +#endif + +static void output_message (const char *filename, unsigned long lineno, unsigned long idx, enum report_type type, const char *fmt, va_list ap) { + + if (filename) { + + if (lineno == 0 && idx == 0) { + fprintf (stderr, "%s: ", filename); + } else { + fprintf (stderr, "%s:", filename); + } + + } + + if (lineno > 0) { + + if (idx == 0) { + fprintf (stderr, "%lu: ", lineno); + } else { + fprintf (stderr, "%lu:", lineno); + } + + } + + if (idx > 0) { + fprintf (stderr, "%lu: ", idx); + } + + if (type == REPORT_ERROR || type == REPORT_FATAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_ERROR); +#endif + + if (type == REPORT_ERROR) { + fprintf (stderr, "error:"); + } else { + fprintf (stderr, "fatal error:"); + } + + } else if (type == REPORT_INTERNAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_INTERNAL_ERROR); +#endif + + fprintf (stderr, "internal error:"); + + } else if (type == REPORT_WARNING) { + +#ifndef __PDOS__ + set_console_color (COLOR_WARNING); +#endif + + fprintf (stderr, "warning:"); + + } + +#ifndef __PDOS__ + reset_console_color (); +#endif + + fprintf (stderr, " "); + vfprintf (stderr, fmt, ap); + fprintf (stderr, "\n"); + + if (type != REPORT_WARNING) { + ++errors; + } + +} + +unsigned long get_error_count (void) { + return errors; +} + +void report_at (const char *filename, unsigned long lineno, enum report_type type, const char *fmt, ...) { + + va_list ap; + + va_start (ap, fmt); + output_message (filename, lineno, 0, type, fmt, ap); + va_end (ap); + +} diff --git a/report.h b/report.h new file mode 100644 index 0000000..8fc8758 --- /dev/null +++ b/report.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * @file report.h + *****************************************************************************/ +#ifndef _REPORT_H +#define _REPORT_H + +enum report_type { + + REPORT_ERROR = 0, + REPORT_FATAL_ERROR, + REPORT_INTERNAL_ERROR, + REPORT_WARNING + +}; + +#if defined (_WIN32) +# define COLOR_ERROR 12 +# define COLOR_WARNING 13 +# define COLOR_INTERNAL_ERROR 19 +#else +# define COLOR_ERROR 91 +# define COLOR_INTERNAL_ERROR 94 +# define COLOR_WARNING 95 +#endif + +unsigned long get_error_count (void); +void report_at (const char *filename, unsigned long line_number, enum report_type type, const char *fmt, ...); + +#endif /* _REPORT_H */ diff --git a/stdint.h b/stdint.h new file mode 100644 index 0000000..ae821b9 --- /dev/null +++ b/stdint.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * @file stdint.h + *****************************************************************************/ +#ifndef _STDINT_H_INCLUDED +#ifndef _STDINT_H +#ifndef _STDINT_H_ + +#define _STDINT_H_INCLUDED +#define _STDINT_H +#define _STDINT_H_ + +#include + +typedef signed char int8_t; +typedef unsigned char uint8_t; + +typedef signed short int16_t; +typedef unsigned short uint16_t; + +#if INT_MAX > 32767 +typedef signed int int32_t; +typedef unsigned int uint32_t; +#else +typedef signed long int32_t; +typedef unsigned long uint32_t; +#endif + +#if defined (NO_LONG_LONG) || ULONG_MAX > 4294967295UL +typedef signed long int64_t; +typedef unsigned long uint64_t; +#else +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +#endif + +#define UINT8_MAX 0xff + +#endif /* _STDINT_H_ */ +#endif /* _STDINT_H */ +#endif /* _STDINT_H_INCLUDED */ diff --git a/tables.c b/tables.c new file mode 100644 index 0000000..b065708 --- /dev/null +++ b/tables.c @@ -0,0 +1,664 @@ +/****************************************************************************** + * @file tables.c + *****************************************************************************/ +#include "tables.h" + +const uint8_t reverse8_tbl[UINT8_MAX + 1] = { + + /* 0x00 */ 0x00, + /* 0x01 */ 0x80, + /* 0x02 */ 0x40, + /* 0x03 */ 0xc0, + /* 0x04 */ 0x20, + /* 0x05 */ 0xa0, + /* 0x06 */ 0x60, + /* 0x07 */ 0xe0, + /* 0x08 */ 0x10, + /* 0x09 */ 0x90, + /* 0x0a */ 0x50, + /* 0x0b */ 0xd0, + /* 0x0c */ 0x30, + /* 0x0d */ 0xb0, + /* 0x0e */ 0x70, + /* 0x0f */ 0xf0, + /* 0x10 */ 0x08, + /* 0x11 */ 0x88, + /* 0x12 */ 0x48, + /* 0x13 */ 0xc8, + /* 0x14 */ 0x28, + /* 0x15 */ 0xa8, + /* 0x16 */ 0x68, + /* 0x17 */ 0xe8, + /* 0x18 */ 0x18, + /* 0x19 */ 0x98, + /* 0x1a */ 0x58, + /* 0x1b */ 0xd8, + /* 0x1c */ 0x38, + /* 0x1d */ 0xb8, + /* 0x1e */ 0x78, + /* 0x1f */ 0xf8, + /* 0x20 */ 0x04, + /* 0x21 */ 0x84, + /* 0x22 */ 0x44, + /* 0x23 */ 0xc4, + /* 0x24 */ 0x24, + /* 0x25 */ 0xa4, + /* 0x26 */ 0x64, + /* 0x27 */ 0xe4, + /* 0x28 */ 0x14, + /* 0x29 */ 0x94, + /* 0x2a */ 0x54, + /* 0x2b */ 0xd4, + /* 0x2c */ 0x34, + /* 0x2d */ 0xb4, + /* 0x2e */ 0x74, + /* 0x2f */ 0xf4, + /* 0x30 */ 0x0c, + /* 0x31 */ 0x8c, + /* 0x32 */ 0x4c, + /* 0x33 */ 0xcc, + /* 0x34 */ 0x2c, + /* 0x35 */ 0xac, + /* 0x36 */ 0x6c, + /* 0x37 */ 0xec, + /* 0x38 */ 0x1c, + /* 0x39 */ 0x9c, + /* 0x3a */ 0x5c, + /* 0x3b */ 0xdc, + /* 0x3c */ 0x3c, + /* 0x3d */ 0xbc, + /* 0x3e */ 0x7c, + /* 0x3f */ 0xfc, + /* 0x40 */ 0x02, + /* 0x41 */ 0x82, + /* 0x42 */ 0x42, + /* 0x43 */ 0xc2, + /* 0x44 */ 0x22, + /* 0x45 */ 0xa2, + /* 0x46 */ 0x62, + /* 0x47 */ 0xe2, + /* 0x48 */ 0x12, + /* 0x49 */ 0x92, + /* 0x4a */ 0x52, + /* 0x4b */ 0xd2, + /* 0x4c */ 0x32, + /* 0x4d */ 0xb2, + /* 0x4e */ 0x72, + /* 0x4f */ 0xf2, + /* 0x50 */ 0x0a, + /* 0x51 */ 0x8a, + /* 0x52 */ 0x4a, + /* 0x53 */ 0xca, + /* 0x54 */ 0x2a, + /* 0x55 */ 0xaa, + /* 0x56 */ 0x6a, + /* 0x57 */ 0xea, + /* 0x58 */ 0x1a, + /* 0x59 */ 0x9a, + /* 0x5a */ 0x5a, + /* 0x5b */ 0xda, + /* 0x5c */ 0x3a, + /* 0x5d */ 0xba, + /* 0x5e */ 0x7a, + /* 0x5f */ 0xfa, + /* 0x60 */ 0x06, + /* 0x61 */ 0x86, + /* 0x62 */ 0x46, + /* 0x63 */ 0xc6, + /* 0x64 */ 0x26, + /* 0x65 */ 0xa6, + /* 0x66 */ 0x66, + /* 0x67 */ 0xe6, + /* 0x68 */ 0x16, + /* 0x69 */ 0x96, + /* 0x6a */ 0x56, + /* 0x6b */ 0xd6, + /* 0x6c */ 0x36, + /* 0x6d */ 0xb6, + /* 0x6e */ 0x76, + /* 0x6f */ 0xf6, + /* 0x70 */ 0x0e, + /* 0x71 */ 0x8e, + /* 0x72 */ 0x4e, + /* 0x73 */ 0xce, + /* 0x74 */ 0x2e, + /* 0x75 */ 0xae, + /* 0x76 */ 0x6e, + /* 0x77 */ 0xee, + /* 0x78 */ 0x1e, + /* 0x79 */ 0x9e, + /* 0x7a */ 0x5e, + /* 0x7b */ 0xde, + /* 0x7c */ 0x3e, + /* 0x7d */ 0xbe, + /* 0x7e */ 0x7e, + /* 0x7f */ 0xfe, + /* 0x80 */ 0x01, + /* 0x81 */ 0x81, + /* 0x82 */ 0x41, + /* 0x83 */ 0xc1, + /* 0x84 */ 0x21, + /* 0x85 */ 0xa1, + /* 0x86 */ 0x61, + /* 0x87 */ 0xe1, + /* 0x88 */ 0x11, + /* 0x89 */ 0x91, + /* 0x8a */ 0x51, + /* 0x8b */ 0xd1, + /* 0x8c */ 0x31, + /* 0x8d */ 0xb1, + /* 0x8e */ 0x71, + /* 0x8f */ 0xf1, + /* 0x90 */ 0x09, + /* 0x91 */ 0x89, + /* 0x92 */ 0x49, + /* 0x93 */ 0xc9, + /* 0x94 */ 0x29, + /* 0x95 */ 0xa9, + /* 0x96 */ 0x69, + /* 0x97 */ 0xe9, + /* 0x98 */ 0x19, + /* 0x99 */ 0x99, + /* 0x9a */ 0x59, + /* 0x9b */ 0xd9, + /* 0x9c */ 0x39, + /* 0x9d */ 0xb9, + /* 0x9e */ 0x79, + /* 0x9f */ 0xf9, + /* 0xa0 */ 0x05, + /* 0xa1 */ 0x85, + /* 0xa2 */ 0x45, + /* 0xa3 */ 0xc5, + /* 0xa4 */ 0x25, + /* 0xa5 */ 0xa5, + /* 0xa6 */ 0x65, + /* 0xa7 */ 0xe5, + /* 0xa8 */ 0x15, + /* 0xa9 */ 0x95, + /* 0xaa */ 0x55, + /* 0xab */ 0xd5, + /* 0xac */ 0x35, + /* 0xad */ 0xb5, + /* 0xae */ 0x75, + /* 0xaf */ 0xf5, + /* 0xb0 */ 0x0d, + /* 0xb1 */ 0x8d, + /* 0xb2 */ 0x4d, + /* 0xb3 */ 0xcd, + /* 0xb4 */ 0x2d, + /* 0xb5 */ 0xad, + /* 0xb6 */ 0x6d, + /* 0xb7 */ 0xed, + /* 0xb8 */ 0x1d, + /* 0xb9 */ 0x9d, + /* 0xba */ 0x5d, + /* 0xbb */ 0xdd, + /* 0xbc */ 0x3d, + /* 0xbd */ 0xbd, + /* 0xbe */ 0x7d, + /* 0xbf */ 0xfd, + /* 0xc0 */ 0x03, + /* 0xc1 */ 0x83, + /* 0xc2 */ 0x43, + /* 0xc3 */ 0xc3, + /* 0xc4 */ 0x23, + /* 0xc5 */ 0xa3, + /* 0xc6 */ 0x63, + /* 0xc7 */ 0xe3, + /* 0xc8 */ 0x13, + /* 0xc9 */ 0x93, + /* 0xca */ 0x53, + /* 0xcb */ 0xd3, + /* 0xcc */ 0x33, + /* 0xcd */ 0xb3, + /* 0xce */ 0x73, + /* 0xcf */ 0xf3, + /* 0xd0 */ 0x0b, + /* 0xd1 */ 0x8b, + /* 0xd2 */ 0x4b, + /* 0xd3 */ 0xcb, + /* 0xd4 */ 0x2b, + /* 0xd5 */ 0xab, + /* 0xd6 */ 0x6b, + /* 0xd7 */ 0xeb, + /* 0xd8 */ 0x1b, + /* 0xd9 */ 0x9b, + /* 0xda */ 0x5b, + /* 0xdb */ 0xdb, + /* 0xdc */ 0x3b, + /* 0xdd */ 0xbb, + /* 0xde */ 0x7b, + /* 0xdf */ 0xfb, + /* 0xe0 */ 0x07, + /* 0xe1 */ 0x87, + /* 0xe2 */ 0x47, + /* 0xe3 */ 0xc7, + /* 0xe4 */ 0x27, + /* 0xe5 */ 0xa7, + /* 0xe6 */ 0x67, + /* 0xe7 */ 0xe7, + /* 0xe8 */ 0x17, + /* 0xe9 */ 0x97, + /* 0xea */ 0x57, + /* 0xeb */ 0xd7, + /* 0xec */ 0x37, + /* 0xed */ 0xb7, + /* 0xee */ 0x77, + /* 0xef */ 0xf7, + /* 0xf0 */ 0x0f, + /* 0xf1 */ 0x8f, + /* 0xf2 */ 0x4f, + /* 0xf3 */ 0xcf, + /* 0xf4 */ 0x2f, + /* 0xf5 */ 0xaf, + /* 0xf6 */ 0x6f, + /* 0xf7 */ 0xef, + /* 0xf8 */ 0x1f, + /* 0xf9 */ 0x9f, + /* 0xfa */ 0x5f, + /* 0xfb */ 0xdf, + /* 0xfc */ 0x3f, + /* 0xfd */ 0xbf, + /* 0xfe */ 0x7f, + /* 0xff */ 0xff, + +}; + +unsigned char fixed_litlen_lengths[288] = { + + /* 0 */ 8, + /* 1 */ 8, + /* 2 */ 8, + /* 3 */ 8, + /* 4 */ 8, + /* 5 */ 8, + /* 6 */ 8, + /* 7 */ 8, + /* 8 */ 8, + /* 9 */ 8, + /* 10 */ 8, + /* 11 */ 8, + /* 12 */ 8, + /* 13 */ 8, + /* 14 */ 8, + /* 15 */ 8, + /* 16 */ 8, + /* 17 */ 8, + /* 18 */ 8, + /* 19 */ 8, + /* 20 */ 8, + /* 21 */ 8, + /* 22 */ 8, + /* 23 */ 8, + /* 24 */ 8, + /* 25 */ 8, + /* 26 */ 8, + /* 27 */ 8, + /* 28 */ 8, + /* 29 */ 8, + /* 30 */ 8, + /* 31 */ 8, + /* 32 */ 8, + /* 33 */ 8, + /* 34 */ 8, + /* 35 */ 8, + /* 36 */ 8, + /* 37 */ 8, + /* 38 */ 8, + /* 39 */ 8, + /* 40 */ 8, + /* 41 */ 8, + /* 42 */ 8, + /* 43 */ 8, + /* 44 */ 8, + /* 45 */ 8, + /* 46 */ 8, + /* 47 */ 8, + /* 48 */ 8, + /* 49 */ 8, + /* 50 */ 8, + /* 51 */ 8, + /* 52 */ 8, + /* 53 */ 8, + /* 54 */ 8, + /* 55 */ 8, + /* 56 */ 8, + /* 57 */ 8, + /* 58 */ 8, + /* 59 */ 8, + /* 60 */ 8, + /* 61 */ 8, + /* 62 */ 8, + /* 63 */ 8, + /* 64 */ 8, + /* 65 */ 8, + /* 66 */ 8, + /* 67 */ 8, + /* 68 */ 8, + /* 69 */ 8, + /* 70 */ 8, + /* 71 */ 8, + /* 72 */ 8, + /* 73 */ 8, + /* 74 */ 8, + /* 75 */ 8, + /* 76 */ 8, + /* 77 */ 8, + /* 78 */ 8, + /* 79 */ 8, + /* 80 */ 8, + /* 81 */ 8, + /* 82 */ 8, + /* 83 */ 8, + /* 84 */ 8, + /* 85 */ 8, + /* 86 */ 8, + /* 87 */ 8, + /* 88 */ 8, + /* 89 */ 8, + /* 90 */ 8, + /* 91 */ 8, + /* 92 */ 8, + /* 93 */ 8, + /* 94 */ 8, + /* 95 */ 8, + /* 96 */ 8, + /* 97 */ 8, + /* 98 */ 8, + /* 99 */ 8, + /* 100 */ 8, + /* 101 */ 8, + /* 102 */ 8, + /* 103 */ 8, + /* 104 */ 8, + /* 105 */ 8, + /* 106 */ 8, + /* 107 */ 8, + /* 108 */ 8, + /* 109 */ 8, + /* 110 */ 8, + /* 111 */ 8, + /* 112 */ 8, + /* 113 */ 8, + /* 114 */ 8, + /* 115 */ 8, + /* 116 */ 8, + /* 117 */ 8, + /* 118 */ 8, + /* 119 */ 8, + /* 120 */ 8, + /* 121 */ 8, + /* 122 */ 8, + /* 123 */ 8, + /* 124 */ 8, + /* 125 */ 8, + /* 126 */ 8, + /* 127 */ 8, + /* 128 */ 8, + /* 129 */ 8, + /* 130 */ 8, + /* 131 */ 8, + /* 132 */ 8, + /* 133 */ 8, + /* 134 */ 8, + /* 135 */ 8, + /* 136 */ 8, + /* 137 */ 8, + /* 138 */ 8, + /* 139 */ 8, + /* 140 */ 8, + /* 141 */ 8, + /* 142 */ 8, + /* 143 */ 8, + /* 144 */ 9, + /* 145 */ 9, + /* 146 */ 9, + /* 147 */ 9, + /* 148 */ 9, + /* 149 */ 9, + /* 150 */ 9, + /* 151 */ 9, + /* 152 */ 9, + /* 153 */ 9, + /* 154 */ 9, + /* 155 */ 9, + /* 156 */ 9, + /* 157 */ 9, + /* 158 */ 9, + /* 159 */ 9, + /* 160 */ 9, + /* 161 */ 9, + /* 162 */ 9, + /* 163 */ 9, + /* 164 */ 9, + /* 165 */ 9, + /* 166 */ 9, + /* 167 */ 9, + /* 168 */ 9, + /* 169 */ 9, + /* 170 */ 9, + /* 171 */ 9, + /* 172 */ 9, + /* 173 */ 9, + /* 174 */ 9, + /* 175 */ 9, + /* 176 */ 9, + /* 177 */ 9, + /* 178 */ 9, + /* 179 */ 9, + /* 180 */ 9, + /* 181 */ 9, + /* 182 */ 9, + /* 183 */ 9, + /* 184 */ 9, + /* 185 */ 9, + /* 186 */ 9, + /* 187 */ 9, + /* 188 */ 9, + /* 189 */ 9, + /* 190 */ 9, + /* 191 */ 9, + /* 192 */ 9, + /* 193 */ 9, + /* 194 */ 9, + /* 195 */ 9, + /* 196 */ 9, + /* 197 */ 9, + /* 198 */ 9, + /* 199 */ 9, + /* 200 */ 9, + /* 201 */ 9, + /* 202 */ 9, + /* 203 */ 9, + /* 204 */ 9, + /* 205 */ 9, + /* 206 */ 9, + /* 207 */ 9, + /* 208 */ 9, + /* 209 */ 9, + /* 210 */ 9, + /* 211 */ 9, + /* 212 */ 9, + /* 213 */ 9, + /* 214 */ 9, + /* 215 */ 9, + /* 216 */ 9, + /* 217 */ 9, + /* 218 */ 9, + /* 219 */ 9, + /* 220 */ 9, + /* 221 */ 9, + /* 222 */ 9, + /* 223 */ 9, + /* 224 */ 9, + /* 225 */ 9, + /* 226 */ 9, + /* 227 */ 9, + /* 228 */ 9, + /* 229 */ 9, + /* 230 */ 9, + /* 231 */ 9, + /* 232 */ 9, + /* 233 */ 9, + /* 234 */ 9, + /* 235 */ 9, + /* 236 */ 9, + /* 237 */ 9, + /* 238 */ 9, + /* 239 */ 9, + /* 240 */ 9, + /* 241 */ 9, + /* 242 */ 9, + /* 243 */ 9, + /* 244 */ 9, + /* 245 */ 9, + /* 246 */ 9, + /* 247 */ 9, + /* 248 */ 9, + /* 249 */ 9, + /* 250 */ 9, + /* 251 */ 9, + /* 252 */ 9, + /* 253 */ 9, + /* 254 */ 9, + /* 255 */ 9, + /* 256 */ 7, + /* 257 */ 7, + /* 258 */ 7, + /* 259 */ 7, + /* 260 */ 7, + /* 261 */ 7, + /* 262 */ 7, + /* 263 */ 7, + /* 264 */ 7, + /* 265 */ 7, + /* 266 */ 7, + /* 267 */ 7, + /* 268 */ 7, + /* 269 */ 7, + /* 270 */ 7, + /* 271 */ 7, + /* 272 */ 7, + /* 273 */ 7, + /* 274 */ 7, + /* 275 */ 7, + /* 276 */ 7, + /* 277 */ 7, + /* 278 */ 7, + /* 279 */ 7, + /* 280 */ 8, + /* 281 */ 8, + /* 282 */ 8, + /* 283 */ 8, + /* 284 */ 8, + /* 285 */ 8, + /* 286 */ 8, + /* 287 */ 8, + +}; + +unsigned char fixed_dist_lengths[32] = { + + /* 0 */ 5, + /* 1 */ 5, + /* 2 */ 5, + /* 3 */ 5, + /* 4 */ 5, + /* 5 */ 5, + /* 6 */ 5, + /* 7 */ 5, + /* 8 */ 5, + /* 9 */ 5, + /* 10 */ 5, + /* 11 */ 5, + /* 12 */ 5, + /* 13 */ 5, + /* 14 */ 5, + /* 15 */ 5, + /* 16 */ 5, + /* 17 */ 5, + /* 18 */ 5, + /* 19 */ 5, + /* 20 */ 5, + /* 21 */ 5, + /* 22 */ 5, + /* 23 */ 5, + /* 24 */ 5, + /* 25 */ 5, + /* 26 */ 5, + /* 27 */ 5, + /* 28 */ 5, + /* 29 */ 5, + /* 30 */ 5, + /* 31 */ 5, + +}; + +struct litlen_tbl_t litlen_tbl[29] = { + + /* 257 */ { 3, 0 }, + /* 258 */ { 4, 0 }, + /* 259 */ { 5, 0 }, + /* 260 */ { 6, 0 }, + /* 261 */ { 7, 0 }, + /* 262 */ { 8, 0 }, + /* 263 */ { 9, 0 }, + /* 264 */ { 10, 0 }, + /* 265 */ { 11, 1 }, + /* 266 */ { 13, 1 }, + /* 267 */ { 15, 1 }, + /* 268 */ { 17, 1 }, + /* 269 */ { 19, 2 }, + /* 270 */ { 23, 2 }, + /* 271 */ { 27, 2 }, + /* 272 */ { 31, 2 }, + /* 273 */ { 35, 3 }, + /* 274 */ { 43, 3 }, + /* 275 */ { 51, 3 }, + /* 276 */ { 59, 3 }, + /* 277 */ { 67, 4 }, + /* 278 */ { 83, 4 }, + /* 279 */ { 99, 4 }, + /* 280 */ { 115, 4 }, + /* 281 */ { 131, 5 }, + /* 282 */ { 163, 5 }, + /* 283 */ { 195, 5 }, + /* 284 */ { 227, 5 }, + /* 285 */ { 258, 0 }, + +}; + +struct dist_tbl_t dist_tbl[30] = { + + /* 0 */ { 1, 0 }, + /* 1 */ { 2, 0 }, + /* 2 */ { 3, 0 }, + /* 3 */ { 4, 0 }, + /* 4 */ { 5, 1 }, + /* 5 */ { 7, 1 }, + /* 6 */ { 9, 2 }, + /* 7 */ { 13, 2 }, + /* 8 */ { 17, 3 }, + /* 9 */ { 25, 3 }, + /* 10 */ { 33, 4 }, + /* 11 */ { 49, 4 }, + /* 12 */ { 65, 5 }, + /* 13 */ { 97, 5 }, + /* 14 */ { 129, 6 }, + /* 15 */ { 193, 6 }, + /* 16 */ { 257, 7 }, + /* 17 */ { 385, 7 }, + /* 18 */ { 513, 8 }, + /* 19 */ { 769, 8 }, + /* 20 */ { 1025, 9 }, + /* 21 */ { 1537, 9 }, + /* 22 */ { 2049, 10 }, + /* 23 */ { 3073, 10 }, + /* 24 */ { 4097, 11 }, + /* 25 */ { 6145, 11 }, + /* 26 */ { 8193, 12 }, + /* 27 */ { 12289, 12 }, + /* 28 */ { 16385, 13 }, + /* 29 */ { 24577, 13 }, + +}; diff --git a/tables.h b/tables.h new file mode 100644 index 0000000..6fb6f9c --- /dev/null +++ b/tables.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * @file tables.h + *****************************************************************************/ +#ifndef _TABLES_H +#define _TABLES_H + +#include "stdint.h" + +/* Element x contains the value of x with the bits in reverse order. */ +extern const uint8_t reverse8_tbl[UINT8_MAX + 1]; + +/* Code lengths for fixed Huffman coding of litlen and dist symbols. */ +extern unsigned char fixed_litlen_lengths[288]; +extern unsigned char fixed_dist_lengths[32]; + +/** + * Table of litlen symbol values miuns 257 with corresponding base + * length and number of extra bits. + */ +struct litlen_tbl_t { uint16_t base_len : 9, ebits : 7; }; +extern struct litlen_tbl_t litlen_tbl[29]; + +/** + * Table of dist symbol values with corresponding base distance and + * number of extra bits. + */ +struct dist_tbl_t { uint16_t base_dist, ebits; }; +extern struct dist_tbl_t dist_tbl[30]; + +#endif /* _TABLES_H */ diff --git a/unzip.c b/unzip.c new file mode 100755 index 0000000..6ac3c78 --- /dev/null +++ b/unzip.c @@ -0,0 +1,1115 @@ +/****************************************************************************** + * @file unzip.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "lib.h" +#include "report.h" +#include "stdint.h" +#include "unzip.h" +#include "vector.h" + +struct unzip_state *state = 0; +const char *program_name = 0; + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) +# include + +# include +# include + +static int make_directory (const char *path) { + + char *p = (char *) path; + + while (p && *p != '\0') { + + while (*p && *p == '/') { + p++; + } + + if (*p == '\0') { break; } + + if ((p = strchr (p, '/'))) { + *p = '\0'; + } + + if (mkdir (path, 0755) < 0) { + + if (errno != EEXIST) { + return 1; + } + + } + + if (p) { *p = '/'; } + + } + + return 0; + +} +#elif defined (_WIN32) +# include + +static int make_directory (const char *path) { + + char *p = (char *) path; + + while (p && *p != '\0') { + + while (*p && *p == '\\') { + p++; + } + + if (*p == '\0') { break; } + + if ((p = strchr (p, '\\'))) { + *p = '\0'; + } + + if (!CreateDirectory (path, 0)) { + + if (GetLastError () != ERROR_ALREADY_EXISTS) { + return 1; + } + + } + + if (p) { *p = '\\'; } + + } + + return 0; + +} +#endif + +static FILE *fp = 0; + +struct eocdr { + + uint16_t disk_nbr; /* Number of this disk. */ + + uint16_t cd_start_disk; /* Nbr. of disk with start of the CD. */ + uint16_t disk_cd_entries; /* Nbr. of CD entries on this disk. */ + uint16_t cd_entries; /* Nbr. of Centeral Directory Entiries. */ + + uint32_t cd_size; /* Centeral Directory size in bytes. */ + uint32_t cd_offset; /* Centeral Directory file offset. */ + + uint16_t comment_len; /* Archive comment length. */ + unsigned char *comment; /* Archive comment. */ + +}; + +#define EOCDR_SIGNATURE 0x504B0506 +#define EOCDR_BASE_SIZE 22 + +#define MAX_BACK_OFFSET (1024 + 100) + +static int find_eocdr (struct eocdr *r) { + + unsigned long back_offset, length, signature; + + unsigned char *buf, *p; + int ret = 0; + + fseek (fp, 0, SEEK_END); + length = ftell (fp); + + for (back_offset = 0; back_offset <= MAX_BACK_OFFSET; back_offset++) { + + if (length < EOCDR_BASE_SIZE + back_offset) { + break; + } + + fseek (fp, length - EOCDR_BASE_SIZE - back_offset, SEEK_SET); + + if (!(buf = malloc (EOCDR_BASE_SIZE))) { + + ret = -1; + break; + + } + + p = buf; + + if ((fread (buf, 1, EOCDR_BASE_SIZE, fp)) != EOCDR_BASE_SIZE) { + + free ((void *) buf); + + ret = -1; + break; + + } + + signature = array_to_integer (p, 4, 1), p += 4; + + if (signature == EOCDR_SIGNATURE) { + + r->disk_nbr = array_to_integer (p, 2, 0), p += 2; + r->cd_start_disk = array_to_integer (p, 2, 0), p += 2; + r->disk_cd_entries = array_to_integer (p, 2, 0), p += 2; + r->cd_entries = array_to_integer (p, 2, 0), p += 2; + + r->cd_size = array_to_integer (p, 4, 0), p += 4; + r->cd_offset = array_to_integer (p, 4, 0), p += 4; + + if ((r->comment_len = array_to_integer (p, 2, 0)) > back_offset) { + + free ((void *) buf); + + ret = -1; + break; + + } + + free ((void *) buf); + + if (r->comment_len > 0) { + + if ((r->comment = malloc (r->comment_len + 1))) { + + memset (r->comment, 0, r->comment_len + 1); + + if (fread (r->comment, 1, r->comment_len, fp) != r->comment_len) { + + ret = -1; + break; + + } + + } + + break; + + } + + break; + + } + + free ((void *) buf); + + } + + rewind (fp); + return ret; + +} + +struct cfh { + + uint16_t made_by_ver; /* Version made by. */ + uint16_t extrcat_ver; /* Version needed to extract. */ + uint16_t gp_flag; /* General-purpose bit flag. */ + uint16_t method; /* Compression method. */ + uint16_t mod_time; /* Modification time. */ + uint16_t mod_date; /* Modification date. */ + + uint32_t crc32; /* CRC-32 checkusm. */ + uint32_t comp_size; /* Compressed size. */ + uint32_t uncomp_size; /* Uncompressed size. */ + + uint16_t name_len; /* Filename length. */ + uint16_t extra_len; /* Extra data length. */ + uint16_t comment_len; /* Comment length. */ + uint16_t disk_nbr_start; /* Disk nbr. where file begins. */ + + uint16_t int_attrs; /* Internal file attributes. */ + uint32_t ext_attrs; /* External file attributes. */ + + uint32_t lfh_offset; /* Local File HEader offset. */ + + char *name; /* Filename. */ + char *extra; /* Extra data. */ + char *comment; /* File comment. */ + +}; + +#define CFH_SIGNATURE 0x504B0102 +#define CFH_BASE_SIZE 46 + +static int read_cfh (struct cfh *cfh) { + + unsigned char buf[CFH_BASE_SIZE], *p = buf; + uint32_t signature; + + if (fread (buf, 1, CFH_BASE_SIZE, fp) != CFH_BASE_SIZE) { + return -1; + } + + signature = array_to_integer (buf, 4, 1), p += 4; + + if (signature != CFH_SIGNATURE) { + return -1; + } + + cfh->made_by_ver = array_to_integer (p, 2, 0), p += 2; + cfh->extrcat_ver = array_to_integer (p, 2, 0), p += 2; + cfh->gp_flag = array_to_integer (p, 2, 0), p += 2; + cfh->method = array_to_integer (p, 2, 0), p += 2; + cfh->mod_time = array_to_integer (p, 2, 0), p += 2; + cfh->mod_date = array_to_integer (p, 2, 0), p += 2; + cfh->crc32 = array_to_integer (p, 4, 0), p += 4; + cfh->comp_size = array_to_integer (p, 4, 0), p += 4; + cfh->uncomp_size = array_to_integer (p, 4, 0), p += 4; + cfh->name_len = array_to_integer (p, 2, 0), p += 2; + cfh->extra_len = array_to_integer (p, 2, 0), p += 2; + cfh->comment_len = array_to_integer (p, 2, 0), p += 2; + cfh->disk_nbr_start = array_to_integer (p, 2, 0), p += 2; + cfh->int_attrs = array_to_integer (p, 2, 0), p += 2; + cfh->ext_attrs = array_to_integer (p, 4, 0), p += 4; + cfh->lfh_offset = array_to_integer (p, 4, 0), p += 4; + + if ((unsigned char *) (p - CFH_BASE_SIZE) != buf) { + + printf ("FUCKKKK\n"); + return -1; + + } + + if ((cfh->name = malloc (cfh->name_len + 1))) { + + memset (cfh->name, 0, cfh->name_len + 1); + + if (fread (cfh->name, 1, cfh->name_len, fp) != cfh->name_len) { + + free (cfh->name); + return -1; + + } + + } + + if ((cfh->extra = malloc (cfh->extra_len + 1))) { + + memset (cfh->extra, 0, cfh->extra_len + 1); + + if (fread (cfh->extra, 1, cfh->extra_len, fp) != cfh->extra_len) { + + free (cfh->extra); + free (cfh->name); + + return -1; + + } + + } + + if ((cfh->comment = malloc (cfh->comment_len + 1))) { + + memset (cfh->comment, 0, cfh->comment_len + 1); + + if (fread (cfh->comment, 1, cfh->comment_len, fp) != cfh->comment_len) { + + free (cfh->comment); + free (cfh->extra); + free (cfh->name); + + return -1; + + } + + } + + return 0; + +} + +static time_t dos2ctime (uint16_t dos_date, uint16_t dos_time) { + + struct tm tm = { 0 }; + + tm.tm_sec = (dos_time & 0x1f) * 2; /* Bits 0--4; Secs divided by 2. */ + tm.tm_min = (dos_time >> 5) & 0x3f; /* Bits 5--10; Minute. */ + tm.tm_hour = (dos_time >> 11); /* Bits 11--15; Hour (0--23). */ + + tm.tm_mday = (dos_date & 0x1f); /* Bits 0--4; Day (1--31). */ + tm.tm_mon = ((dos_date >> 5) & 0x0f) - 1; /* Bits 5--8; Month (1--12). */ + tm.tm_year = (dos_date >> 9) + 80; /* Bits 9--15; Year - 1980. */ + + tm.tm_isdst = -1; + return mktime (&tm); + +} + +static void display_comment (const char *path) { + + struct eocdr eocdr = { 0 }; + + if (find_eocdr (&eocdr)) { + + report_at (program_name, 0, REPORT_ERROR, "%s is not a valid ZIP file", path); + return; + + } + + printf ("Achive: %s\n", path); + + if (eocdr.comment) { + + printf ("%s\n", eocdr.comment); + free (eocdr.comment); + + } + +} + +static void list_zip (const char *path) { + + struct eocdr eocdr = { 0 }; + struct cfh cfh = { 0 }; + + char date[11], time[6]; + time_t ctime; + + unsigned long final_size = 0, total_files = 0, i; + + if (find_eocdr (&eocdr)) { + + report_at (program_name, 0, REPORT_ERROR, "%s is not a valid ZIP file", path); + return; + + } + + if (eocdr.disk_nbr != 0 || eocdr.cd_start_disk != 0 || eocdr.disk_cd_entries != eocdr.cd_entries) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "currently multi-volume archives aren't supported"); + return; + + } + + printf ("Achive: %s\n", path); + if (eocdr.comment) { printf ("%s\n", eocdr.comment); } + + fseek (fp, eocdr.cd_offset, SEEK_SET); + + printf (" Length Date Time Name \n"); + printf ("---------- ---------- ---------- ----------\n"); + + for (i = 0; i < eocdr.cd_entries; i++) { + + if (read_cfh (&cfh)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to process Ceneral File Header"); + break; + + } + + if (!cfh.name) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "bad filename"); + break; + + } + + ctime = dos2ctime (cfh.mod_date, cfh.mod_time); + + strftime (date, 11, "%Y-%m-%d", localtime (&ctime)); + strftime (time, 6, "%H:%M", localtime (&ctime)); + + printf ("%10d %10s %10s %s\n", cfh.uncomp_size, date, time, cfh.name); + + final_size += cfh.uncomp_size; + total_files++; + + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + + } + + printf ("---------- ---------- ---------- ----------\n"); + printf ("%10ld %10s %10s %ld files\n", final_size, "", "", total_files); + +} + +static int is_relative (const char *name, uint64_t name_len) { + + uint64_t i = 0; + + if (name_len < 1) { + return 0; + } + + if (name[0] == '/' || name[0] == '\\' || name[0] == '~') { + return 0; + } + + for (i = 0; i < name_len; i++) { + + switch (name[i]) { + + case '<': + case '>': + case ':': + case '"': + case '|': + case '?': + case '*': + + return 0; + + case '.': + + if (i + 1 < name_len && name[i + 1] == '.') { + return 0; + } + + break; + + default: + + if (name[i] < ' ') { + return 0; + } + + break; + + } + + } + + i = name_len - 1; + + if (name[-i] == ' ' || name[i] == '.') { + return 0; + } + + return 1; + +} + +struct lfh { + + uint16_t extrcat_ver; /* Version needed to extract. */ + uint16_t gp_flag; /* General-purpose bit flag. */ + uint16_t method; /* Compression method. */ + uint16_t mod_time; /* Modification time. */ + uint16_t mod_date; /* Modification date. */ + + uint32_t crc32; /* CRC-32 checkusm. */ + uint32_t comp_size; /* Compressed size. */ + uint32_t uncomp_size; /* Uncompressed size. */ + + uint16_t name_len; /* Filename length. */ + uint16_t extra_len; /* Extra data length. */ + + char *name; /* Filename. */ + unsigned char *extra; /* Extra data. */ + +}; + +#define LFH_SIGNATURE 0x504B0304 +#define LFH_BASE_SIZE 30 + +static int read_lfh (struct lfh *lfh) { + + unsigned char buf[CFH_BASE_SIZE], *p = buf; + uint32_t signature; + + if (fread (buf, 1, LFH_BASE_SIZE, fp) != LFH_BASE_SIZE) { + return -1; + } + + signature = array_to_integer (buf, 4, 1), p += 4; + + if (signature != LFH_SIGNATURE) { + return -1; + } + + lfh->extrcat_ver = array_to_integer (p, 2, 0), p += 2; + lfh->gp_flag = array_to_integer (p, 2, 0), p += 2; + lfh->method = array_to_integer (p, 2, 0), p += 2; + lfh->mod_time = array_to_integer (p, 2, 0), p += 2; + lfh->mod_date = array_to_integer (p, 2, 0), p += 2; + lfh->crc32 = array_to_integer (p, 4, 0), p += 4; + lfh->comp_size = array_to_integer (p, 4, 0), p += 4; + lfh->uncomp_size = array_to_integer (p, 4, 0), p += 4; + lfh->name_len = array_to_integer (p, 2, 0), p += 2; + lfh->extra_len = array_to_integer (p, 2, 0), p += 2; + + if ((unsigned char *) (p - LFH_BASE_SIZE) != buf) { + + printf ("FUCKKKK\n"); + return -1; + + } + + if ((lfh->name = malloc (lfh->name_len + 1))) { + + memset (lfh->name, 0, lfh->name_len + 1); + + if (fread (lfh->name, 1, lfh->name_len, fp) != lfh->name_len) { + + free (lfh->name); + return -1; + + } + + } + + if ((lfh->extra = malloc (lfh->extra_len))) { + + memset (lfh->extra, 0, lfh->extra_len); + + if (fread (lfh->extra, 1, lfh->extra_len, fp) != lfh->extra_len) { + + free (lfh->extra); + free (lfh->name); + + return -1; + + } + + } + + return 0; + +} + +static int validate_structs (struct cfh *cfh, struct lfh *lfh) { + + if (cfh->extrcat_ver != lfh->extrcat_ver) { + return -1; + } + + if (cfh->gp_flag != lfh->gp_flag) { + return -1; + } + + if (cfh->method != lfh->method) { + return -1; + } + + if (cfh->mod_date != lfh->mod_date) { + return -1; + } + + if (cfh->mod_time != lfh->mod_time) { + return -1; + } + + if (cfh->crc32 != lfh->crc32) { + return -1; + } + + if (cfh->comp_size != lfh->comp_size) { + return -1; + } + + if (cfh->uncomp_size != lfh->uncomp_size) { + return -1; + } + + if (cfh->name_len != lfh->name_len) { + return -1; + } + + if (!lfh->name || strcmp (lfh->name, cfh->name)) { + return -1; + } + + return 0; + +} + +static void extract_zip (const char *path) { + + struct eocdr eocdr = { 0 }; + + struct cfh cfh = { 0 }; + struct lfh lfh = { 0 }; + + char *temp; + int j; + + uint64_t src_used, dst_used; + uint16_t i; + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) + + char ch = '/'; + struct stat sb; + +#elif defined (_WIN32) + + char ch = '\\'; + DWORD dwAttrib; + +#endif + + unsigned char *data; + uint64_t orig_offset; + + FILE *outfile; + + if (find_eocdr (&eocdr)) { + + report_at (program_name, 0, REPORT_ERROR, "%s is not a valid ZIP file", path); + return; + + } + + if (eocdr.disk_nbr != 0 || eocdr.cd_start_disk != 0 || eocdr.disk_cd_entries != eocdr.cd_entries) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "currently multi-volume archives aren't supported"); + return; + + } + + printf ("Archive: %s\n", path); + if (eocdr.comment) { printf ("%s\n", eocdr.comment); } + + fseek (fp, eocdr.cd_offset, SEEK_SET); + + for (i = 0; i < eocdr.cd_entries; i++) { + + if (read_cfh (&cfh)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to process Ceneral File Header"); + break; + + } + + if (cfh.gp_flag & 1) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "currently encryption isn't supported"); + break; + + } + + if (!cfh.name) { + + report_at (program_name, 0, REPORT_INTERNAL_ERROR, "bad filename"); + break; + + } + + for (j = 0; j < state->xlist.length; j++) { + + if ((temp = xstrdup (state->xlist.data[j]))) { + + if (strchr (temp, '*')) { + + if (wild_compare (temp, cfh.name)) { + + free (temp); + break; + + } + + } else { + + if (strcmp (cfh.name, temp) == 0) { + + free (temp); + break; + + } + + } + + free (temp); + + } + + } + + if (j < state->xlist.length) { + + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + if (cfh.ext_attrs & EXT_ATTR_DIR) { + + if (state->exdir) { + + if (!(temp = malloc (strlen (state->exdir) + 1 + cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + break; + + } + + sprintf (temp, "%s%c%s", state->exdir, ch, cfh.name); + + if (!is_relative (temp + strlen (state->exdir) + 1, cfh.name_len)) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + } else { + + if (!(temp = malloc (cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + break; + + } + + sprintf (temp, "%s", cfh.name); + + } + + if (!is_relative (temp, cfh.name_len)) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + if (temp[strlen (temp) - 1] == '/') { + temp[strlen (temp) - 1] = '\0'; + } + +#if defined (unix) || defined (__unix) || defined (__unix__) || defined (__APPLE__) + + if (!stat (temp, &sb)) { + + if (S_ISDIR (sb.st_mode)) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + report_at (program_name, 0, REPORT_ERROR, "%s exists but is not a directory", temp); + + free (temp); + break; + + } + +#elif defined (_WIN32) + + dwAttrib = GetFileAttributes (temp); + + if (dwAttrib != INVALID_FILE_ATTRIBUTES) { + + if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) { + + free (temp); + free (cfh.name); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + continue; + + } + + report_at (program_name, 0, REPORT_ERROR, "%s exists but is not a directory", temp); + + free (temp); + break; + + } + +#endif + + printf (" creating: %s%c\n", temp, ch); + + if (make_directory (temp)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to create %s", state->exdir); + + free (temp); + break; + + } + + free (temp); + continue; + + } + + if (!is_relative (cfh.name, cfh.name_len)) { + continue; + } + + if (cfh.method != ZIP_DEFLATE) { + + report_at (program_name, 0, REPORT_ERROR, "currently only ZIP_DEFLATE is supported"); + break; + + } + + if (state->exdir) { + + if (!(temp = malloc (strlen (state->exdir) + 1 + cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + break; + + } + + sprintf (temp, "%s%c%s", state->exdir, ch, cfh.name); + + } else { + + if (!(temp = malloc (cfh.name_len + 1))) { + + report_at (program_name, 0, REPORT_ERROR, "not enough free memory for name"); + break; + + } + + sprintf (temp, "%s", cfh.name); + + } + + orig_offset = ftell (fp); + + if (fseek (fp, cfh.lfh_offset, SEEK_SET)) { + + free (temp); + break; + + } + + if (read_lfh (&lfh)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to process Local File Header"); + + fseek (fp, cfh.lfh_offset, SEEK_SET); + free (temp); + + break; + + } + + if (validate_structs (&cfh, &lfh)) { + + report_at (program_name, 0, REPORT_ERROR, "Centeral File Header and Local File Header mismatch"); + fseek (fp, cfh.lfh_offset, SEEK_SET); + + free (data); + free (temp); + + break; + + } + + if (!(data = malloc (cfh.comp_size))) { + + fseek (fp, cfh.lfh_offset, SEEK_SET); + free (temp); + + break; + + } + + if (fread (data, 1, lfh.comp_size, fp) != lfh.comp_size) { + + fseek (fp, cfh.lfh_offset, SEEK_SET); + + free (data); + free (temp); + + break; + + } + + if (fseek (fp, orig_offset, SEEK_SET)) { + + free (data); + free (temp); + + break; + + } + + if (cfh.method == ZIP_DEFLATE) { + + if (!(outfile = fopen (temp, "w+b"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", temp); + remove (temp); + + free (data); + free (temp); + + break; + + } + + printf (" inflating: %s\n", temp); + + if (hwinflate (data, lfh.comp_size, &src_used, outfile, lfh.uncomp_size, &dst_used) != HWINF_OK) { + + report_at (program_name, 0, REPORT_ERROR, "failed to extract %s", temp); + fclose (outfile); + + free (data); + remove (temp); + + break; + + } + + free (data); + fclose (outfile); + + if (src_used != lfh.comp_size || dst_used != lfh.uncomp_size) { + + report_at (program_name, 0, REPORT_ERROR, "%lld, %d, %lld, %d: failed to extract %s", + src_used, lfh.comp_size, dst_used, lfh.uncomp_size, temp); + + remove (temp); + break; + + } + + } + + if (lfh.extra) { free (lfh.extra); } + if (lfh.name) { free (lfh.name); } + + memset (&lfh, 0, sizeof (lfh)); + + free (cfh.name); + free (temp); + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + + memset (&cfh, 0, sizeof (cfh)); + + } + + if (lfh.extra) { free (lfh.extra); } + if (lfh.name) { free (lfh.name); } + + if (cfh.comment) { free (cfh.comment); } + if (cfh.extra) { free (cfh.extra); } + if (cfh.name) { free (cfh.name); } + +} + +int main (int argc, char **argv) { + + long i; + + if (argc && *argv) { + + char *p; + program_name = *argv; + + if ((p = strrchr (program_name, '/')) || (p = strrchr (program_name, '\\'))) { + program_name = (p + 1); + } + + } + + state = xmalloc (sizeof (*state)); + parse_args (argc, argv, 1); + + if (state->nb_files == 0) { + + report_at (program_name, 0, REPORT_ERROR, "no input files provided"); + return EXIT_FAILURE; + + } + + if (state->exdir) { + + if (state->nb_files > 1) { + + report_at (program_name, 0, REPORT_ERROR, "only on file can be unzipped when exdir is specified"); + return EXIT_FAILURE; + + } + + } + + for (i = 0; i < state->nb_files; i++) { + + if (!(fp = fopen (state->files[i], "r+b"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for reading", state->files[i]); + continue; + + } + + if (state->list) { + + list_zip (state->files[i]); + + fclose (fp); + continue; + + } + + if (state->only_comment) { + + display_comment (state->files[i]); + + fclose (fp); + continue; + + } + + if (state->exdir) { + + if (make_directory (state->exdir)) { + + report_at (program_name, 0, REPORT_ERROR, "failed to create %s", state->exdir); + + fclose (fp); + return EXIT_FAILURE; + + } + + } + + extract_zip (state->files[i]); + fclose (fp); + + } + + return (get_error_count () > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +} diff --git a/unzip.h b/unzip.h new file mode 100755 index 0000000..15f44ff --- /dev/null +++ b/unzip.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * @file unzip.h + *****************************************************************************/ +#ifndef _UNZIP_H +#define _UNZIP_H + +#include "stdint.h" +#include "vector.h" + +struct unzip_state { + + const char **files; + long nb_files; + + const char *exdir; + int only_comment, list; + + struct vector xlist; + +}; + +extern struct unzip_state *state; +extern const char *program_name; + +#define EXT_ATTR_DIR (1U << 4) +#define EXT_ATTR_ARC (1U << 5); + +#define ZIP_DEFLATE 8 + +typedef enum { + + HWINF_OK, /* Inflation was successful. */ + HWINF_FULL, /* Not enough room in the output buffer. */ + HWINF_ERR /* Error in the input data. */ + +} inf_stat_t; + +#include +inf_stat_t hwinflate (unsigned char *src, uint64_t src_len, uint64_t *src_used, FILE *outfile, uint64_t dst_cap, uint64_t *dst_used); + +#endif /* _UNZIP_H */ diff --git a/vector.c b/vector.c new file mode 100755 index 0000000..723993a --- /dev/null +++ b/vector.c @@ -0,0 +1,54 @@ +/****************************************************************************** + * @file vector.c + *****************************************************************************/ +#include +#include + +#include "vector.h" + +extern void *xrealloc (void *__ptr, unsigned int __size); + +int vec_adjust (struct vector *vec, int length) { + + if (vec->capacity <= length) { + + if (vec->capacity == 0) { + vec->capacity = 16; + } else { + vec->capacity <<= 1; + } + + vec->data = xrealloc (vec->data, sizeof (*(vec->data)) * vec->capacity); + + } + + return 0; + +} + +void *vec_pop (struct vector *vec) { + + if (!vec || vec == NULL) { + return NULL; + } + + if (vec->length == 0) { + return NULL; + } + + return vec->data[--vec->length]; + +} + +int vec_push (struct vector *vec, void *elem) { + + int ret; + + if ((ret = vec_adjust (vec, vec->length)) != 0) { + return ret; + } + + vec->data[vec->length++] = elem; + return 0; + +} diff --git a/vector.h b/vector.h new file mode 100755 index 0000000..3da05e1 --- /dev/null +++ b/vector.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * @file vector.h + *****************************************************************************/ +#ifndef _VECTOR_H +#define _VECTOR_H + +struct vector { + + void **data; + int capacity, length; + +}; + +int vec_adjust (struct vector *vec, int length); +int vec_push (struct vector *vec, void *elem); + +void *vec_pop (struct vector *vec); + +#endif /* _VECTOR_H */ -- 2.34.1