• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

contrib/zlib/examples/gzappend.c

00001 /* gzappend -- command to append to a gzip file
00002 
00003   Copyright (C) 2003 Mark Adler, all rights reserved
00004   version 1.1, 4 Nov 2003
00005 
00006   This software is provided 'as-is', without any express or implied
00007   warranty.  In no event will the author be held liable for any damages
00008   arising from the use of this software.
00009 
00010   Permission is granted to anyone to use this software for any purpose,
00011   including commercial applications, and to alter it and redistribute it
00012   freely, subject to the following restrictions:
00013 
00014   1. The origin of this software must not be misrepresented; you must not
00015      claim that you wrote the original software. If you use this software
00016      in a product, an acknowledgment in the product documentation would be
00017      appreciated but is not required.
00018   2. Altered source versions must be plainly marked as such, and must not be
00019      misrepresented as being the original software.
00020   3. This notice may not be removed or altered from any source distribution.
00021 
00022   Mark Adler    madler@alumni.caltech.edu
00023  */
00024 
00025 /*
00026  * Change history:
00027  *
00028  * 1.0  19 Oct 2003     - First version
00029  * 1.1   4 Nov 2003     - Expand and clarify some comments and notes
00030  *                      - Add version and copyright to help
00031  *                      - Send help to stdout instead of stderr
00032  *                      - Add some preemptive typecasts
00033  *                      - Add L to constants in lseek() calls
00034  *                      - Remove some debugging information in error messages
00035  *                      - Use new data_type definition for zlib 1.2.1
00036  *                      - Simplfy and unify file operations
00037  *                      - Finish off gzip file in gztack()
00038  *                      - Use deflatePrime() instead of adding empty blocks
00039  *                      - Keep gzip file clean on appended file read errors
00040  *                      - Use in-place rotate instead of auxiliary buffer
00041  *                        (Why you ask?  Because it was fun to write!)
00042  */
00043 
00044 /*
00045    gzappend takes a gzip file and appends to it, compressing files from the
00046    command line or data from stdin.  The gzip file is written to directly, to
00047    avoid copying that file, in case it's large.  Note that this results in the
00048    unfriendly behavior that if gzappend fails, the gzip file is corrupted.
00049 
00050    This program was written to illustrate the use of the new Z_BLOCK option of
00051    zlib 1.2.x's inflate() function.  This option returns from inflate() at each
00052    block boundary to facilitate locating and modifying the last block bit at
00053    the start of the final deflate block.  Also whether using Z_BLOCK or not,
00054    another required feature of zlib 1.2.x is that inflate() now provides the
00055    number of unusued bits in the last input byte used.  gzappend will not work
00056    with versions of zlib earlier than 1.2.1.
00057 
00058    gzappend first decompresses the gzip file internally, discarding all but
00059    the last 32K of uncompressed data, and noting the location of the last block
00060    bit and the number of unused bits in the last byte of the compressed data.
00061    The gzip trailer containing the CRC-32 and length of the uncompressed data
00062    is verified.  This trailer will be later overwritten.
00063 
00064    Then the last block bit is cleared by seeking back in the file and rewriting
00065    the byte that contains it.  Seeking forward, the last byte of the compressed
00066    data is saved along with the number of unused bits to initialize deflate.
00067 
00068    A deflate process is initialized, using the last 32K of the uncompressed
00069    data from the gzip file to initialize the dictionary.  If the total
00070    uncompressed data was less than 32K, then all of it is used to initialize
00071    the dictionary.  The deflate output bit buffer is also initialized with the
00072    last bits from the original deflate stream.  From here on, the data to
00073    append is simply compressed using deflate, and written to the gzip file.
00074    When that is complete, the new CRC-32 and uncompressed length are written
00075    as the trailer of the gzip file.
00076  */
00077 
00078 #include <stdio.h>
00079 #include <stdlib.h>
00080 #include <string.h>
00081 #include <fcntl.h>
00082 #include <unistd.h>
00083 #include "zlib.h"
00084 
00085 #define local static
00086 #define LGCHUNK 14
00087 #define CHUNK (1U << LGCHUNK)
00088 #define DSIZE 32768U
00089 
00090 /* print an error message and terminate with extreme prejudice */
00091 local void bye(char *msg1, char *msg2)
00092 {
00093     fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
00094     exit(1);
00095 }
00096 
00097 /* return the greatest common divisor of a and b using Euclid's algorithm,
00098    modified to be fast when one argument much greater than the other, and
00099    coded to avoid unnecessary swapping */
00100 local unsigned gcd(unsigned a, unsigned b)
00101 {
00102     unsigned c;
00103 
00104     while (a && b)
00105         if (a > b) {
00106             c = b;
00107             while (a - c >= c)
00108                 c <<= 1;
00109             a -= c;
00110         }
00111         else {
00112             c = a;
00113             while (b - c >= c)
00114                 c <<= 1;
00115             b -= c;
00116         }
00117     return a + b;
00118 }
00119 
00120 /* rotate list[0..len-1] left by rot positions, in place */
00121 local void rotate(unsigned char *list, unsigned len, unsigned rot)
00122 {
00123     unsigned char tmp;
00124     unsigned cycles;
00125     unsigned char *start, *last, *to, *from;
00126 
00127     /* normalize rot and handle degenerate cases */
00128     if (len < 2) return;
00129     if (rot >= len) rot %= len;
00130     if (rot == 0) return;
00131 
00132     /* pointer to last entry in list */
00133     last = list + (len - 1);
00134 
00135     /* do simple left shift by one */
00136     if (rot == 1) {
00137         tmp = *list;
00138         memcpy(list, list + 1, len - 1);
00139         *last = tmp;
00140         return;
00141     }
00142 
00143     /* do simple right shift by one */
00144     if (rot == len - 1) {
00145         tmp = *last;
00146         memmove(list + 1, list, len - 1);
00147         *list = tmp;
00148         return;
00149     }
00150 
00151     /* otherwise do rotate as a set of cycles in place */
00152     cycles = gcd(len, rot);             /* number of cycles */
00153     do {
00154         start = from = list + cycles;   /* start index is arbitrary */
00155         tmp = *from;                    /* save entry to be overwritten */
00156         for (;;) {
00157             to = from;                  /* next step in cycle */
00158             from += rot;                /* go right rot positions */
00159             if (from > last) from -= len;   /* (pointer better not wrap) */
00160             if (from == start) break;   /* all but one shifted */
00161             *to = *from;                /* shift left */
00162         }
00163         *to = tmp;                      /* complete the circle */
00164     } while (--cycles);
00165 }
00166 
00167 /* structure for gzip file read operations */
00168 typedef struct {
00169     int fd;                     /* file descriptor */
00170     int size;                   /* 1 << size is bytes in buf */
00171     unsigned left;              /* bytes available at next */
00172     unsigned char *buf;         /* buffer */
00173     unsigned char *next;        /* next byte in buffer */
00174     char *name;                 /* file name for error messages */
00175 } file;
00176 
00177 /* reload buffer */
00178 local int readin(file *in)
00179 {
00180     int len;
00181 
00182     len = read(in->fd, in->buf, 1 << in->size);
00183     if (len == -1) bye("error reading ", in->name);
00184     in->left = (unsigned)len;
00185     in->next = in->buf;
00186     return len;
00187 }
00188 
00189 /* read from file in, exit if end-of-file */
00190 local int readmore(file *in)
00191 {
00192     if (readin(in) == 0) bye("unexpected end of ", in->name);
00193     return 0;
00194 }
00195 
00196 #define read1(in) (in->left == 0 ? readmore(in) : 0, \
00197                    in->left--, *(in->next)++)
00198 
00199 /* skip over n bytes of in */
00200 local void skip(file *in, unsigned n)
00201 {
00202     unsigned bypass;
00203 
00204     if (n > in->left) {
00205         n -= in->left;
00206         bypass = n & ~((1U << in->size) - 1);
00207         if (bypass) {
00208             if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
00209                 bye("seeking ", in->name);
00210             n -= bypass;
00211         }
00212         readmore(in);
00213         if (n > in->left)
00214             bye("unexpected end of ", in->name);
00215     }
00216     in->left -= n;
00217     in->next += n;
00218 }
00219 
00220 /* read a four-byte unsigned integer, little-endian, from in */
00221 unsigned long read4(file *in)
00222 {
00223     unsigned long val;
00224 
00225     val = read1(in);
00226     val += (unsigned)read1(in) << 8;
00227     val += (unsigned long)read1(in) << 16;
00228     val += (unsigned long)read1(in) << 24;
00229     return val;
00230 }
00231 
00232 /* skip over gzip header */
00233 local void gzheader(file *in)
00234 {
00235     int flags;
00236     unsigned n;
00237 
00238     if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
00239     if (read1(in) != 8) bye("unknown compression method in", in->name);
00240     flags = read1(in);
00241     if (flags & 0xe0) bye("unknown header flags set in", in->name);
00242     skip(in, 6);
00243     if (flags & 4) {
00244         n = read1(in);
00245         n += (unsigned)(read1(in)) << 8;
00246         skip(in, n);
00247     }
00248     if (flags & 8) while (read1(in) != 0) ;
00249     if (flags & 16) while (read1(in) != 0) ;
00250     if (flags & 2) skip(in, 2);
00251 }
00252 
00253 /* decompress gzip file "name", return strm with a deflate stream ready to
00254    continue compression of the data in the gzip file, and return a file
00255    descriptor pointing to where to write the compressed data -- the deflate
00256    stream is initialized to compress using level "level" */
00257 local int gzscan(char *name, z_stream *strm, int level)
00258 {
00259     int ret, lastbit, left, full;
00260     unsigned have;
00261     unsigned long crc, tot;
00262     unsigned char *window;
00263     off_t lastoff, end;
00264     file gz;
00265 
00266     /* open gzip file */
00267     gz.name = name;
00268     gz.fd = open(name, O_RDWR, 0);
00269     if (gz.fd == -1) bye("cannot open ", name);
00270     gz.buf = malloc(CHUNK);
00271     if (gz.buf == NULL) bye("out of memory", "");
00272     gz.size = LGCHUNK;
00273     gz.left = 0;
00274 
00275     /* skip gzip header */
00276     gzheader(&gz);
00277 
00278     /* prepare to decompress */
00279     window = malloc(DSIZE);
00280     if (window == NULL) bye("out of memory", "");
00281     strm->zalloc = Z_NULL;
00282     strm->zfree = Z_NULL;
00283     strm->opaque = Z_NULL;
00284     ret = inflateInit2(strm, -15);
00285     if (ret != Z_OK) bye("out of memory", " or library mismatch");
00286 
00287     /* decompress the deflate stream, saving append information */
00288     lastbit = 0;
00289     lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
00290     left = 0;
00291     strm->avail_in = gz.left;
00292     strm->next_in = gz.next;
00293     crc = crc32(0L, Z_NULL, 0);
00294     have = full = 0;
00295     do {
00296         /* if needed, get more input */
00297         if (strm->avail_in == 0) {
00298             readmore(&gz);
00299             strm->avail_in = gz.left;
00300             strm->next_in = gz.next;
00301         }
00302 
00303         /* set up output to next available section of sliding window */
00304         strm->avail_out = DSIZE - have;
00305         strm->next_out = window + have;
00306 
00307         /* inflate and check for errors */
00308         ret = inflate(strm, Z_BLOCK);
00309         if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
00310         if (ret == Z_MEM_ERROR) bye("out of memory", "");
00311         if (ret == Z_DATA_ERROR)
00312             bye("invalid compressed data--format violated in", name);
00313 
00314         /* update crc and sliding window pointer */
00315         crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
00316         if (strm->avail_out)
00317             have = DSIZE - strm->avail_out;
00318         else {
00319             have = 0;
00320             full = 1;
00321         }
00322 
00323         /* process end of block */
00324         if (strm->data_type & 128) {
00325             if (strm->data_type & 64)
00326                 left = strm->data_type & 0x1f;
00327             else {
00328                 lastbit = strm->data_type & 0x1f;
00329                 lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
00330             }
00331         }
00332     } while (ret != Z_STREAM_END);
00333     inflateEnd(strm);
00334     gz.left = strm->avail_in;
00335     gz.next = strm->next_in;
00336 
00337     /* save the location of the end of the compressed data */
00338     end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
00339 
00340     /* check gzip trailer and save total for deflate */
00341     if (crc != read4(&gz))
00342         bye("invalid compressed data--crc mismatch in ", name);
00343     tot = strm->total_out;
00344     if ((tot & 0xffffffffUL) != read4(&gz))
00345         bye("invalid compressed data--length mismatch in", name);
00346 
00347     /* if not at end of file, warn */
00348     if (gz.left || readin(&gz))
00349         fprintf(stderr,
00350             "gzappend warning: junk at end of gzip file overwritten\n");
00351 
00352     /* clear last block bit */
00353     lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
00354     if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
00355     *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
00356     lseek(gz.fd, -1L, SEEK_CUR);
00357     if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
00358 
00359     /* if window wrapped, build dictionary from window by rotating */
00360     if (full) {
00361         rotate(window, DSIZE, have);
00362         have = DSIZE;
00363     }
00364 
00365     /* set up deflate stream with window, crc, total_in, and leftover bits */
00366     ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
00367     if (ret != Z_OK) bye("out of memory", "");
00368     deflateSetDictionary(strm, window, have);
00369     strm->adler = crc;
00370     strm->total_in = tot;
00371     if (left) {
00372         lseek(gz.fd, --end, SEEK_SET);
00373         if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
00374         deflatePrime(strm, 8 - left, *gz.buf);
00375     }
00376     lseek(gz.fd, end, SEEK_SET);
00377 
00378     /* clean up and return */
00379     free(window);
00380     free(gz.buf);
00381     return gz.fd;
00382 }
00383 
00384 /* append file "name" to gzip file gd using deflate stream strm -- if last
00385    is true, then finish off the deflate stream at the end */
00386 local void gztack(char *name, int gd, z_stream *strm, int last)
00387 {
00388     int fd, len, ret;
00389     unsigned left;
00390     unsigned char *in, *out;
00391 
00392     /* open file to compress and append */
00393     fd = 0;
00394     if (name != NULL) {
00395         fd = open(name, O_RDONLY, 0);
00396         if (fd == -1)
00397             fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
00398                     name);
00399     }
00400 
00401     /* allocate buffers */
00402     in = fd == -1 ? NULL : malloc(CHUNK);
00403     out = malloc(CHUNK);
00404     if (out == NULL) bye("out of memory", "");
00405 
00406     /* compress input file and append to gzip file */
00407     do {
00408         /* get more input */
00409         len = fd == -1 ? 0 : read(fd, in, CHUNK);
00410         if (len == -1) {
00411             fprintf(stderr,
00412                     "gzappend warning: error reading %s, skipping rest ...\n",
00413                     name);
00414             len = 0;
00415         }
00416         strm->avail_in = (unsigned)len;
00417         strm->next_in = in;
00418         if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
00419 
00420         /* compress and write all available output */
00421         do {
00422             strm->avail_out = CHUNK;
00423             strm->next_out = out;
00424             ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
00425             left = CHUNK - strm->avail_out;
00426             while (left) {
00427                 len = write(gd, out + CHUNK - strm->avail_out - left, left);
00428                 if (len == -1) bye("writing gzip file", "");
00429                 left -= (unsigned)len;
00430             }
00431         } while (strm->avail_out == 0 && ret != Z_STREAM_END);
00432     } while (len != 0);
00433 
00434     /* write trailer after last entry */
00435     if (last) {
00436         deflateEnd(strm);
00437         out[0] = (unsigned char)(strm->adler);
00438         out[1] = (unsigned char)(strm->adler >> 8);
00439         out[2] = (unsigned char)(strm->adler >> 16);
00440         out[3] = (unsigned char)(strm->adler >> 24);
00441         out[4] = (unsigned char)(strm->total_in);
00442         out[5] = (unsigned char)(strm->total_in >> 8);
00443         out[6] = (unsigned char)(strm->total_in >> 16);
00444         out[7] = (unsigned char)(strm->total_in >> 24);
00445         len = 8;
00446         do {
00447             ret = write(gd, out + 8 - len, len);
00448             if (ret == -1) bye("writing gzip file", "");
00449             len -= ret;
00450         } while (len);
00451         close(gd);
00452     }
00453 
00454     /* clean up and return */
00455     free(out);
00456     if (in != NULL) free(in);
00457     if (fd > 0) close(fd);
00458 }
00459 
00460 /* process the compression level option if present, scan the gzip file, and
00461    append the specified files, or append the data from stdin if no other file
00462    names are provided on the command line -- the gzip file must be writable
00463    and seekable */
00464 int main(int argc, char **argv)
00465 {
00466     int gd, level;
00467     z_stream strm;
00468 
00469     /* ignore command name */
00470     argv++;
00471 
00472     /* provide usage if no arguments */
00473     if (*argv == NULL) {
00474         printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
00475         printf(
00476             "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
00477         return 0;
00478     }
00479 
00480     /* set compression level */
00481     level = Z_DEFAULT_COMPRESSION;
00482     if (argv[0][0] == '-') {
00483         if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
00484             bye("invalid compression level", "");
00485         level = argv[0][1] - '0';
00486         if (*++argv == NULL) bye("no gzip file name after options", "");
00487     }
00488 
00489     /* prepare to append to gzip file */
00490     gd = gzscan(*argv++, &strm, level);
00491 
00492     /* append files on command line, or from stdin if none */
00493     if (*argv == NULL)
00494         gztack(NULL, gd, &strm, 1);
00495     else
00496         do {
00497             gztack(*argv, gd, &strm, argv[1] == NULL);
00498         } while (*++argv != NULL);
00499     return 0;
00500 }

Generated on Wed Oct 20 2010 11:12:17 for APBS by  doxygen 1.7.2