• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

contrib/zlib/examples/gzjoin.c

00001 /* gzjoin -- command to join gzip files into one gzip file
00002 
00003   Copyright (C) 2004 Mark Adler, all rights reserved
00004   version 1.0, 11 Dec 2004
00005 
00006   This software is provided 'as-is', without any express or implied
00007   warranty.  In no event will the author be held liable for any damages
00008   arising from the use of this software.
00009 
00010   Permission is granted to anyone to use this software for any purpose,
00011   including commercial applications, and to alter it and redistribute it
00012   freely, subject to the following restrictions:
00013 
00014   1. The origin of this software must not be misrepresented; you must not
00015      claim that you wrote the original software. If you use this software
00016      in a product, an acknowledgment in the product documentation would be
00017      appreciated but is not required.
00018   2. Altered source versions must be plainly marked as such, and must not be
00019      misrepresented as being the original software.
00020   3. This notice may not be removed or altered from any source distribution.
00021 
00022   Mark Adler    madler@alumni.caltech.edu
00023  */
00024 
00025 /*
00026  * Change history:
00027  *
00028  * 1.0  11 Dec 2004     - First version
00029  * 1.1  12 Jun 2005     - Changed ssize_t to long for portability
00030  */
00031 
00032 /*
00033    gzjoin takes one or more gzip files on the command line and writes out a
00034    single gzip file that will uncompress to the concatenation of the
00035    uncompressed data from the individual gzip files.  gzjoin does this without
00036    having to recompress any of the data and without having to calculate a new
00037    crc32 for the concatenated uncompressed data.  gzjoin does however have to
00038    decompress all of the input data in order to find the bits in the compressed
00039    data that need to be modified to concatenate the streams.
00040 
00041    gzjoin does not do an integrity check on the input gzip files other than
00042    checking the gzip header and decompressing the compressed data.  They are
00043    otherwise assumed to be complete and correct.
00044 
00045    Each joint between gzip files removes at least 18 bytes of previous trailer
00046    and subsequent header, and inserts an average of about three bytes to the
00047    compressed data in order to connect the streams.  The output gzip file
00048    has a minimal ten-byte gzip header with no file name or modification time.
00049 
00050    This program was written to illustrate the use of the Z_BLOCK option of
00051    inflate() and the crc32_combine() function.  gzjoin will not compile with
00052    versions of zlib earlier than 1.2.3.
00053  */
00054 
00055 #include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */
00056 #include <stdlib.h>     /* exit(), malloc(), free() */
00057 #include <fcntl.h>      /* open() */
00058 #include <unistd.h>     /* close(), read(), lseek() */
00059 #include "zlib.h"
00060     /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
00061 
00062 #define local static
00063 
00064 /* exit with an error (return a value to allow use in an expression) */
00065 local int bail(char *why1, char *why2)
00066 {
00067     fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
00068     exit(1);
00069     return 0;
00070 }
00071 
00072 /* -- simple buffered file input with access to the buffer -- */
00073 
00074 #define CHUNK 32768         /* must be a power of two and fit in unsigned */
00075 
00076 /* bin buffered input file type */
00077 typedef struct {
00078     char *name;             /* name of file for error messages */
00079     int fd;                 /* file descriptor */
00080     unsigned left;          /* bytes remaining at next */
00081     unsigned char *next;    /* next byte to read */
00082     unsigned char *buf;     /* allocated buffer of length CHUNK */
00083 } bin;
00084 
00085 /* close a buffered file and free allocated memory */
00086 local void bclose(bin *in)
00087 {
00088     if (in != NULL) {
00089         if (in->fd != -1)
00090             close(in->fd);
00091         if (in->buf != NULL)
00092             free(in->buf);
00093         free(in);
00094     }
00095 }
00096 
00097 /* open a buffered file for input, return a pointer to type bin, or NULL on
00098    failure */
00099 local bin *bopen(char *name)
00100 {
00101     bin *in;
00102 
00103     in = malloc(sizeof(bin));
00104     if (in == NULL)
00105         return NULL;
00106     in->buf = malloc(CHUNK);
00107     in->fd = open(name, O_RDONLY, 0);
00108     if (in->buf == NULL || in->fd == -1) {
00109         bclose(in);
00110         return NULL;
00111     }
00112     in->left = 0;
00113     in->next = in->buf;
00114     in->name = name;
00115     return in;
00116 }
00117 
00118 /* load buffer from file, return -1 on read error, 0 or 1 on success, with
00119    1 indicating that end-of-file was reached */
00120 local int bload(bin *in)
00121 {
00122     long len;
00123 
00124     if (in == NULL)
00125         return -1;
00126     if (in->left != 0)
00127         return 0;
00128     in->next = in->buf;
00129     do {
00130         len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
00131         if (len < 0)
00132             return -1;
00133         in->left += (unsigned)len;
00134     } while (len != 0 && in->left < CHUNK);
00135     return len == 0 ? 1 : 0;
00136 }
00137 
00138 /* get a byte from the file, bail if end of file */
00139 #define bget(in) (in->left ? 0 : bload(in), \
00140                   in->left ? (in->left--, *(in->next)++) : \
00141                     bail("unexpected end of file on ", in->name))
00142 
00143 /* get a four-byte little-endian unsigned integer from file */
00144 local unsigned long bget4(bin *in)
00145 {
00146     unsigned long val;
00147 
00148     val = bget(in);
00149     val += (unsigned long)(bget(in)) << 8;
00150     val += (unsigned long)(bget(in)) << 16;
00151     val += (unsigned long)(bget(in)) << 24;
00152     return val;
00153 }
00154 
00155 /* skip bytes in file */
00156 local void bskip(bin *in, unsigned skip)
00157 {
00158     /* check pointer */
00159     if (in == NULL)
00160         return;
00161 
00162     /* easy case -- skip bytes in buffer */
00163     if (skip <= in->left) {
00164         in->left -= skip;
00165         in->next += skip;
00166         return;
00167     }
00168 
00169     /* skip what's in buffer, discard buffer contents */
00170     skip -= in->left;
00171     in->left = 0;
00172 
00173     /* seek past multiples of CHUNK bytes */
00174     if (skip > CHUNK) {
00175         unsigned left;
00176 
00177         left = skip & (CHUNK - 1);
00178         if (left == 0) {
00179             /* exact number of chunks: seek all the way minus one byte to check
00180                for end-of-file with a read */
00181             lseek(in->fd, skip - 1, SEEK_CUR);
00182             if (read(in->fd, in->buf, 1) != 1)
00183                 bail("unexpected end of file on ", in->name);
00184             return;
00185         }
00186 
00187         /* skip the integral chunks, update skip with remainder */
00188         lseek(in->fd, skip - left, SEEK_CUR);
00189         skip = left;
00190     }
00191 
00192     /* read more input and skip remainder */
00193     bload(in);
00194     if (skip > in->left)
00195         bail("unexpected end of file on ", in->name);
00196     in->left -= skip;
00197     in->next += skip;
00198 }
00199 
00200 /* -- end of buffered input functions -- */
00201 
00202 /* skip the gzip header from file in */
00203 local void gzhead(bin *in)
00204 {
00205     int flags;
00206 
00207     /* verify gzip magic header and compression method */
00208     if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
00209         bail(in->name, " is not a valid gzip file");
00210 
00211     /* get and verify flags */
00212     flags = bget(in);
00213     if ((flags & 0xe0) != 0)
00214         bail("unknown reserved bits set in ", in->name);
00215 
00216     /* skip modification time, extra flags, and os */
00217     bskip(in, 6);
00218 
00219     /* skip extra field if present */
00220     if (flags & 4) {
00221         unsigned len;
00222 
00223         len = bget(in);
00224         len += (unsigned)(bget(in)) << 8;
00225         bskip(in, len);
00226     }
00227 
00228     /* skip file name if present */
00229     if (flags & 8)
00230         while (bget(in) != 0)
00231             ;
00232 
00233     /* skip comment if present */
00234     if (flags & 16)
00235         while (bget(in) != 0)
00236             ;
00237 
00238     /* skip header crc if present */
00239     if (flags & 2)
00240         bskip(in, 2);
00241 }
00242 
00243 /* write a four-byte little-endian unsigned integer to out */
00244 local void put4(unsigned long val, FILE *out)
00245 {
00246     putc(val & 0xff, out);
00247     putc((val >> 8) & 0xff, out);
00248     putc((val >> 16) & 0xff, out);
00249     putc((val >> 24) & 0xff, out);
00250 }
00251 
00252 /* Load up zlib stream from buffered input, bail if end of file */
00253 local void zpull(z_streamp strm, bin *in)
00254 {
00255     if (in->left == 0)
00256         bload(in);
00257     if (in->left == 0)
00258         bail("unexpected end of file on ", in->name);
00259     strm->avail_in = in->left;
00260     strm->next_in = in->next;
00261 }
00262 
00263 /* Write header for gzip file to out and initialize trailer. */
00264 local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
00265 {
00266     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
00267     *crc = crc32(0L, Z_NULL, 0);
00268     *tot = 0;
00269 }
00270 
00271 /* Copy the compressed data from name, zeroing the last block bit of the last
00272    block if clr is true, and adding empty blocks as needed to get to a byte
00273    boundary.  If clr is false, then the last block becomes the last block of
00274    the output, and the gzip trailer is written.  crc and tot maintains the
00275    crc and length (modulo 2^32) of the output for the trailer.  The resulting
00276    gzip file is written to out.  gzinit() must be called before the first call
00277    of gzcopy() to write the gzip header and to initialize crc and tot. */
00278 local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
00279                   FILE *out)
00280 {
00281     int ret;                /* return value from zlib functions */
00282     int pos;                /* where the "last block" bit is in byte */
00283     int last;               /* true if processing the last block */
00284     bin *in;                /* buffered input file */
00285     unsigned char *start;   /* start of compressed data in buffer */
00286     unsigned char *junk;    /* buffer for uncompressed data -- discarded */
00287     z_off_t len;            /* length of uncompressed data (support > 4 GB) */
00288     z_stream strm;          /* zlib inflate stream */
00289 
00290     /* open gzip file and skip header */
00291     in = bopen(name);
00292     if (in == NULL)
00293         bail("could not open ", name);
00294     gzhead(in);
00295 
00296     /* allocate buffer for uncompressed data and initialize raw inflate
00297        stream */
00298     junk = malloc(CHUNK);
00299     strm.zalloc = Z_NULL;
00300     strm.zfree = Z_NULL;
00301     strm.opaque = Z_NULL;
00302     strm.avail_in = 0;
00303     strm.next_in = Z_NULL;
00304     ret = inflateInit2(&strm, -15);
00305     if (junk == NULL || ret != Z_OK)
00306         bail("out of memory", "");
00307 
00308     /* inflate and copy compressed data, clear last-block bit if requested */
00309     len = 0;
00310     zpull(&strm, in);
00311     start = strm.next_in;
00312     last = start[0] & 1;
00313     if (last && clr)
00314         start[0] &= ~1;
00315     strm.avail_out = 0;
00316     for (;;) {
00317         /* if input used and output done, write used input and get more */
00318         if (strm.avail_in == 0 && strm.avail_out != 0) {
00319             fwrite(start, 1, strm.next_in - start, out);
00320             start = in->buf;
00321             in->left = 0;
00322             zpull(&strm, in);
00323         }
00324 
00325         /* decompress -- return early when end-of-block reached */
00326         strm.avail_out = CHUNK;
00327         strm.next_out = junk;
00328         ret = inflate(&strm, Z_BLOCK);
00329         switch (ret) {
00330         case Z_MEM_ERROR:
00331             bail("out of memory", "");
00332         case Z_DATA_ERROR:
00333             bail("invalid compressed data in ", in->name);
00334         }
00335 
00336         /* update length of uncompressed data */
00337         len += CHUNK - strm.avail_out;
00338 
00339         /* check for block boundary (only get this when block copied out) */
00340         if (strm.data_type & 128) {
00341             /* if that was the last block, then done */
00342             if (last)
00343                 break;
00344 
00345             /* number of unused bits in last byte */
00346             pos = strm.data_type & 7;
00347 
00348             /* find the next last-block bit */
00349             if (pos != 0) {
00350                 /* next last-block bit is in last used byte */
00351                 pos = 0x100 >> pos;
00352                 last = strm.next_in[-1] & pos;
00353                 if (last && clr)
00354                     strm.next_in[-1] &= ~pos;
00355             }
00356             else {
00357                 /* next last-block bit is in next unused byte */
00358                 if (strm.avail_in == 0) {
00359                     /* don't have that byte yet -- get it */
00360                     fwrite(start, 1, strm.next_in - start, out);
00361                     start = in->buf;
00362                     in->left = 0;
00363                     zpull(&strm, in);
00364                 }
00365                 last = strm.next_in[0] & 1;
00366                 if (last && clr)
00367                     strm.next_in[0] &= ~1;
00368             }
00369         }
00370     }
00371 
00372     /* update buffer with unused input */
00373     in->left = strm.avail_in;
00374     in->next = strm.next_in;
00375 
00376     /* copy used input, write empty blocks to get to byte boundary */
00377     pos = strm.data_type & 7;
00378     fwrite(start, 1, in->next - start - 1, out);
00379     last = in->next[-1];
00380     if (pos == 0 || !clr)
00381         /* already at byte boundary, or last file: write last byte */
00382         putc(last, out);
00383     else {
00384         /* append empty blocks to last byte */
00385         last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */
00386         if (pos & 1) {
00387             /* odd -- append an empty stored block */
00388             putc(last, out);
00389             if (pos == 1)
00390                 putc(0, out);               /* two more bits in block header */
00391             fwrite("\0\0\xff\xff", 1, 4, out);
00392         }
00393         else {
00394             /* even -- append 1, 2, or 3 empty fixed blocks */
00395             switch (pos) {
00396             case 6:
00397                 putc(last | 8, out);
00398                 last = 0;
00399             case 4:
00400                 putc(last | 0x20, out);
00401                 last = 0;
00402             case 2:
00403                 putc(last | 0x80, out);
00404                 putc(0, out);
00405             }
00406         }
00407     }
00408 
00409     /* update crc and tot */
00410     *crc = crc32_combine(*crc, bget4(in), len);
00411     *tot += (unsigned long)len;
00412 
00413     /* clean up */
00414     inflateEnd(&strm);
00415     free(junk);
00416     bclose(in);
00417 
00418     /* write trailer if this is the last gzip file */
00419     if (!clr) {
00420         put4(*crc, out);
00421         put4(*tot, out);
00422     }
00423 }
00424 
00425 /* join the gzip files on the command line, write result to stdout */
00426 int main(int argc, char **argv)
00427 {
00428     unsigned long crc, tot;     /* running crc and total uncompressed length */
00429 
00430     /* skip command name */
00431     argc--;
00432     argv++;
00433 
00434     /* show usage if no arguments */
00435     if (argc == 0) {
00436         fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
00437               stderr);
00438         return 0;
00439     }
00440 
00441     /* join gzip files on command line and write to stdout */
00442     gzinit(&crc, &tot, stdout);
00443     while (argc--)
00444         gzcopy(*argv++, argc, &crc, &tot, stdout);
00445 
00446     /* done */
00447     return 0;
00448 }

Generated on Wed Oct 20 2010 11:12:17 for APBS by  doxygen 1.7.2