• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

contrib/zlib/contrib/blast/blast.c

00001 /* blast.c
00002  * Copyright (C) 2003 Mark Adler
00003  * For conditions of distribution and use, see copyright notice in blast.h
00004  * version 1.1, 16 Feb 2003
00005  *
00006  * blast.c decompresses data compressed by the PKWare Compression Library.
00007  * This function provides functionality similar to the explode() function of
00008  * the PKWare library, hence the name "blast".
00009  *
00010  * This decompressor is based on the excellent format description provided by
00011  * Ben Rudiak-Gould in comp.compression on August 13, 2001.  Interestingly, the
00012  * example Ben provided in the post is incorrect.  The distance 110001 should
00013  * instead be 111000.  When corrected, the example byte stream becomes:
00014  *
00015  *    00 04 82 24 25 8f 80 7f
00016  *
00017  * which decompresses to "AIAIAIAIAIAIA" (without the quotes).
00018  */
00019 
00020 /*
00021  * Change history:
00022  *
00023  * 1.0  12 Feb 2003     - First version
00024  * 1.1  16 Feb 2003     - Fixed distance check for > 4 GB uncompressed data
00025  */
00026 
00027 #include <setjmp.h>             /* for setjmp(), longjmp(), and jmp_buf */
00028 #include "blast.h"              /* prototype for blast() */
00029 
00030 #define local static            /* for local function definitions */
00031 #define MAXBITS 13              /* maximum code length */
00032 #define MAXWIN 4096             /* maximum window size */
00033 
00034 /* input and output state */
00035 struct state {
00036     /* input state */
00037     blast_in infun;             /* input function provided by user */
00038     void *inhow;                /* opaque information passed to infun() */
00039     unsigned char *in;          /* next input location */
00040     unsigned left;              /* available input at in */
00041     int bitbuf;                 /* bit buffer */
00042     int bitcnt;                 /* number of bits in bit buffer */
00043 
00044     /* input limit error return state for bits() and decode() */
00045     jmp_buf env;
00046 
00047     /* output state */
00048     blast_out outfun;           /* output function provided by user */
00049     void *outhow;               /* opaque information passed to outfun() */
00050     unsigned next;              /* index of next write location in out[] */
00051     int first;                  /* true to check distances (for first 4K) */
00052     unsigned char out[MAXWIN];  /* output buffer and sliding window */
00053 };
00054 
00055 /*
00056  * Return need bits from the input stream.  This always leaves less than
00057  * eight bits in the buffer.  bits() works properly for need == 0.
00058  *
00059  * Format notes:
00060  *
00061  * - Bits are stored in bytes from the least significant bit to the most
00062  *   significant bit.  Therefore bits are dropped from the bottom of the bit
00063  *   buffer, using shift right, and new bytes are appended to the top of the
00064  *   bit buffer, using shift left.
00065  */
00066 local int bits(struct state *s, int need)
00067 {
00068     int val;            /* bit accumulator */
00069 
00070     /* load at least need bits into val */
00071     val = s->bitbuf;
00072     while (s->bitcnt < need) {
00073         if (s->left == 0) {
00074             s->left = s->infun(s->inhow, &(s->in));
00075             if (s->left == 0) longjmp(s->env, 1);       /* out of input */
00076         }
00077         val |= (int)(*(s->in)++) << s->bitcnt;          /* load eight bits */
00078         s->left--;
00079         s->bitcnt += 8;
00080     }
00081 
00082     /* drop need bits and update buffer, always zero to seven bits left */
00083     s->bitbuf = val >> need;
00084     s->bitcnt -= need;
00085 
00086     /* return need bits, zeroing the bits above that */
00087     return val & ((1 << need) - 1);
00088 }
00089 
00090 /*
00091  * Huffman code decoding tables.  count[1..MAXBITS] is the number of symbols of
00092  * each length, which for a canonical code are stepped through in order.
00093  * symbol[] are the symbol values in canonical order, where the number of
00094  * entries is the sum of the counts in count[].  The decoding process can be
00095  * seen in the function decode() below.
00096  */
00097 struct huffman {
00098     short *count;       /* number of symbols of each length */
00099     short *symbol;      /* canonically ordered symbols */
00100 };
00101 
00102 /*
00103  * Decode a code from the stream s using huffman table h.  Return the symbol or
00104  * a negative value if there is an error.  If all of the lengths are zero, i.e.
00105  * an empty code, or if the code is incomplete and an invalid code is received,
00106  * then -9 is returned after reading MAXBITS bits.
00107  *
00108  * Format notes:
00109  *
00110  * - The codes as stored in the compressed data are bit-reversed relative to
00111  *   a simple integer ordering of codes of the same lengths.  Hence below the
00112  *   bits are pulled from the compressed data one at a time and used to
00113  *   build the code value reversed from what is in the stream in order to
00114  *   permit simple integer comparisons for decoding.
00115  *
00116  * - The first code for the shortest length is all ones.  Subsequent codes of
00117  *   the same length are simply integer decrements of the previous code.  When
00118  *   moving up a length, a one bit is appended to the code.  For a complete
00119  *   code, the last code of the longest length will be all zeros.  To support
00120  *   this ordering, the bits pulled during decoding are inverted to apply the
00121  *   more "natural" ordering starting with all zeros and incrementing.
00122  */
00123 local int decode(struct state *s, struct huffman *h)
00124 {
00125     int len;            /* current number of bits in code */
00126     int code;           /* len bits being decoded */
00127     int first;          /* first code of length len */
00128     int count;          /* number of codes of length len */
00129     int index;          /* index of first code of length len in symbol table */
00130     int bitbuf;         /* bits from stream */
00131     int left;           /* bits left in next or left to process */
00132     short *next;        /* next number of codes */
00133 
00134     bitbuf = s->bitbuf;
00135     left = s->bitcnt;
00136     code = first = index = 0;
00137     len = 1;
00138     next = h->count + 1;
00139     while (1) {
00140         while (left--) {
00141             code |= (bitbuf & 1) ^ 1;   /* invert code */
00142             bitbuf >>= 1;
00143             count = *next++;
00144             if (code < first + count) { /* if length len, return symbol */
00145                 s->bitbuf = bitbuf;
00146                 s->bitcnt = (s->bitcnt - len) & 7;
00147                 return h->symbol[index + (code - first)];
00148             }
00149             index += count;             /* else update for next length */
00150             first += count;
00151             first <<= 1;
00152             code <<= 1;
00153             len++;
00154         }
00155         left = (MAXBITS+1) - len;
00156         if (left == 0) break;
00157         if (s->left == 0) {
00158             s->left = s->infun(s->inhow, &(s->in));
00159             if (s->left == 0) longjmp(s->env, 1);       /* out of input */
00160         }
00161         bitbuf = *(s->in)++;
00162         s->left--;
00163         if (left > 8) left = 8;
00164     }
00165     return -9;                          /* ran out of codes */
00166 }
00167 
00168 /*
00169  * Given a list of repeated code lengths rep[0..n-1], where each byte is a
00170  * count (high four bits + 1) and a code length (low four bits), generate the
00171  * list of code lengths.  This compaction reduces the size of the object code.
00172  * Then given the list of code lengths length[0..n-1] representing a canonical
00173  * Huffman code for n symbols, construct the tables required to decode those
00174  * codes.  Those tables are the number of codes of each length, and the symbols
00175  * sorted by length, retaining their original order within each length.  The
00176  * return value is zero for a complete code set, negative for an over-
00177  * subscribed code set, and positive for an incomplete code set.  The tables
00178  * can be used if the return value is zero or positive, but they cannot be used
00179  * if the return value is negative.  If the return value is zero, it is not
00180  * possible for decode() using that table to return an error--any stream of
00181  * enough bits will resolve to a symbol.  If the return value is positive, then
00182  * it is possible for decode() using that table to return an error for received
00183  * codes past the end of the incomplete lengths.
00184  */
00185 local int construct(struct huffman *h, const unsigned char *rep, int n)
00186 {
00187     int symbol;         /* current symbol when stepping through length[] */
00188     int len;            /* current length when stepping through h->count[] */
00189     int left;           /* number of possible codes left of current length */
00190     short offs[MAXBITS+1];      /* offsets in symbol table for each length */
00191     short length[256];  /* code lengths */
00192 
00193     /* convert compact repeat counts into symbol bit length list */
00194     symbol = 0;
00195     do {
00196         len = *rep++;
00197         left = (len >> 4) + 1;
00198         len &= 15;
00199         do {
00200             length[symbol++] = len;
00201         } while (--left);
00202     } while (--n);
00203     n = symbol;
00204 
00205     /* count number of codes of each length */
00206     for (len = 0; len <= MAXBITS; len++)
00207         h->count[len] = 0;
00208     for (symbol = 0; symbol < n; symbol++)
00209         (h->count[length[symbol]])++;   /* assumes lengths are within bounds */
00210     if (h->count[0] == n)               /* no codes! */
00211         return 0;                       /* complete, but decode() will fail */
00212 
00213     /* check for an over-subscribed or incomplete set of lengths */
00214     left = 1;                           /* one possible code of zero length */
00215     for (len = 1; len <= MAXBITS; len++) {
00216         left <<= 1;                     /* one more bit, double codes left */
00217         left -= h->count[len];          /* deduct count from possible codes */
00218         if (left < 0) return left;      /* over-subscribed--return negative */
00219     }                                   /* left > 0 means incomplete */
00220 
00221     /* generate offsets into symbol table for each length for sorting */
00222     offs[1] = 0;
00223     for (len = 1; len < MAXBITS; len++)
00224         offs[len + 1] = offs[len] + h->count[len];
00225 
00226     /*
00227      * put symbols in table sorted by length, by symbol order within each
00228      * length
00229      */
00230     for (symbol = 0; symbol < n; symbol++)
00231         if (length[symbol] != 0)
00232             h->symbol[offs[length[symbol]]++] = symbol;
00233 
00234     /* return zero for complete set, positive for incomplete set */
00235     return left;
00236 }
00237 
00238 /*
00239  * Decode PKWare Compression Library stream.
00240  *
00241  * Format notes:
00242  *
00243  * - First byte is 0 if literals are uncoded or 1 if they are coded.  Second
00244  *   byte is 4, 5, or 6 for the number of extra bits in the distance code.
00245  *   This is the base-2 logarithm of the dictionary size minus six.
00246  *
00247  * - Compressed data is a combination of literals and length/distance pairs
00248  *   terminated by an end code.  Literals are either Huffman coded or
00249  *   uncoded bytes.  A length/distance pair is a coded length followed by a
00250  *   coded distance to represent a string that occurs earlier in the
00251  *   uncompressed data that occurs again at the current location.
00252  *
00253  * - A bit preceding a literal or length/distance pair indicates which comes
00254  *   next, 0 for literals, 1 for length/distance.
00255  *
00256  * - If literals are uncoded, then the next eight bits are the literal, in the
00257  *   normal bit order in th stream, i.e. no bit-reversal is needed. Similarly,
00258  *   no bit reversal is needed for either the length extra bits or the distance
00259  *   extra bits.
00260  *
00261  * - Literal bytes are simply written to the output.  A length/distance pair is
00262  *   an instruction to copy previously uncompressed bytes to the output.  The
00263  *   copy is from distance bytes back in the output stream, copying for length
00264  *   bytes.
00265  *
00266  * - Distances pointing before the beginning of the output data are not
00267  *   permitted.
00268  *
00269  * - Overlapped copies, where the length is greater than the distance, are
00270  *   allowed and common.  For example, a distance of one and a length of 518
00271  *   simply copies the last byte 518 times.  A distance of four and a length of
00272  *   twelve copies the last four bytes three times.  A simple forward copy
00273  *   ignoring whether the length is greater than the distance or not implements
00274  *   this correctly.
00275  */
00276 local int decomp(struct state *s)
00277 {
00278     int lit;            /* true if literals are coded */
00279     int dict;           /* log2(dictionary size) - 6 */
00280     int symbol;         /* decoded symbol, extra bits for distance */
00281     int len;            /* length for copy */
00282     int dist;           /* distance for copy */
00283     int copy;           /* copy counter */
00284     unsigned char *from, *to;   /* copy pointers */
00285     static int virgin = 1;                              /* build tables once */
00286     static short litcnt[MAXBITS+1], litsym[256];        /* litcode memory */
00287     static short lencnt[MAXBITS+1], lensym[16];         /* lencode memory */
00288     static short distcnt[MAXBITS+1], distsym[64];       /* distcode memory */
00289     static struct huffman litcode = {litcnt, litsym};   /* length code */
00290     static struct huffman lencode = {lencnt, lensym};   /* length code */
00291     static struct huffman distcode = {distcnt, distsym};/* distance code */
00292         /* bit lengths of literal codes */
00293     static const unsigned char litlen[] = {
00294         11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8,
00295         9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5,
00296         7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12,
00297         8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27,
00298         44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45,
00299         44, 173};
00300         /* bit lengths of length codes 0..15 */
00301     static const unsigned char lenlen[] = {2, 35, 36, 53, 38, 23};
00302         /* bit lengths of distance codes 0..63 */
00303     static const unsigned char distlen[] = {2, 20, 53, 230, 247, 151, 248};
00304     static const short base[16] = {     /* base for length codes */
00305         3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264};
00306     static const char extra[16] = {     /* extra bits for length codes */
00307         0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8};
00308 
00309     /* set up decoding tables (once--might not be thread-safe) */
00310     if (virgin) {
00311         construct(&litcode, litlen, sizeof(litlen));
00312         construct(&lencode, lenlen, sizeof(lenlen));
00313         construct(&distcode, distlen, sizeof(distlen));
00314         virgin = 0;
00315     }
00316 
00317     /* read header */
00318     lit = bits(s, 8);
00319     if (lit > 1) return -1;
00320     dict = bits(s, 8);
00321     if (dict < 4 || dict > 6) return -2;
00322 
00323     /* decode literals and length/distance pairs */
00324     do {
00325         if (bits(s, 1)) {
00326             /* get length */
00327             symbol = decode(s, &lencode);
00328             len = base[symbol] + bits(s, extra[symbol]);
00329             if (len == 519) break;              /* end code */
00330 
00331             /* get distance */
00332             symbol = len == 2 ? 2 : dict;
00333             dist = decode(s, &distcode) << symbol;
00334             dist += bits(s, symbol);
00335             dist++;
00336             if (s->first && dist > s->next)
00337                 return -3;              /* distance too far back */
00338 
00339             /* copy length bytes from distance bytes back */
00340             do {
00341                 to = s->out + s->next;
00342                 from = to - dist;
00343                 copy = MAXWIN;
00344                 if (s->next < dist) {
00345                     from += copy;
00346                     copy = dist;
00347                 }
00348                 copy -= s->next;
00349                 if (copy > len) copy = len;
00350                 len -= copy;
00351                 s->next += copy;
00352                 do {
00353                     *to++ = *from++;
00354                 } while (--copy);
00355                 if (s->next == MAXWIN) {
00356                     if (s->outfun(s->outhow, s->out, s->next)) return 1;
00357                     s->next = 0;
00358                     s->first = 0;
00359                 }
00360             } while (len != 0);
00361         }
00362         else {
00363             /* get literal and write it */
00364             symbol = lit ? decode(s, &litcode) : bits(s, 8);
00365             s->out[s->next++] = symbol;
00366             if (s->next == MAXWIN) {
00367                 if (s->outfun(s->outhow, s->out, s->next)) return 1;
00368                 s->next = 0;
00369                 s->first = 0;
00370             }
00371         }
00372     } while (1);
00373     return 0;
00374 }
00375 
00376 /* See comments in blast.h */
00377 int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow)
00378 {
00379     struct state s;             /* input/output state */
00380     int err;                    /* return value */
00381 
00382     /* initialize input state */
00383     s.infun = infun;
00384     s.inhow = inhow;
00385     s.left = 0;
00386     s.bitbuf = 0;
00387     s.bitcnt = 0;
00388 
00389     /* initialize output state */
00390     s.outfun = outfun;
00391     s.outhow = outhow;
00392     s.next = 0;
00393     s.first = 1;
00394 
00395     /* return if bits() or decode() tries to read past available input */
00396     if (setjmp(s.env) != 0)             /* if came back here via longjmp(), */
00397         err = 2;                        /*  then skip decomp(), return error */
00398     else
00399         err = decomp(&s);               /* decompress */
00400 
00401     /* write any leftover output and update the error code if needed */
00402     if (err != 1 && s.next && s.outfun(s.outhow, s.out, s.next) && err == 0)
00403         err = 1;
00404     return err;
00405 }
00406 
00407 #ifdef TEST
00408 /* Example of how to use blast() */
00409 #include <stdio.h>
00410 #include <stdlib.h>
00411 
00412 #define CHUNK 16384
00413 
00414 local unsigned inf(void *how, unsigned char **buf)
00415 {
00416     static unsigned char hold[CHUNK];
00417 
00418     *buf = hold;
00419     return fread(hold, 1, CHUNK, (FILE *)how);
00420 }
00421 
00422 local int outf(void *how, unsigned char *buf, unsigned len)
00423 {
00424     return fwrite(buf, 1, len, (FILE *)how) != len;
00425 }
00426 
00427 /* Decompress a PKWare Compression Library stream from stdin to stdout */
00428 int main(void)
00429 {
00430     int ret, n;
00431 
00432     /* decompress to stdout */
00433     ret = blast(inf, stdin, outf, stdout);
00434     if (ret != 0) fprintf(stderr, "blast error: %d\n", ret);
00435 
00436     /* see if there are any leftover bytes */
00437     n = 0;
00438     while (getchar() != EOF) n++;
00439     if (n) fprintf(stderr, "blast warning: %d unused bytes of input\n", n);
00440 
00441     /* return blast() error code */
00442     return ret;
00443 }
00444 #endif

Generated on Wed Oct 20 2010 11:12:17 for APBS by  doxygen 1.7.2