00001 /* enough.c -- determine the maximum size of inflate's Huffman code tables over 00002 * all possible valid and complete Huffman codes, subject to a length limit. 00003 * Copyright (C) 2007, 2008 Mark Adler 00004 * Version 1.3 17 February 2008 Mark Adler 00005 */ 00006 00007 /* Version history: 00008 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) 00009 1.1 4 Jan 2007 Use faster incremental table usage computation 00010 Prune examine() search on previously visited states 00011 1.2 5 Jan 2007 Comments clean up 00012 As inflate does, decrease root for short codes 00013 Refuse cases where inflate would increase root 00014 1.3 17 Feb 2008 Add argument for initial root table size 00015 Fix bug for initial root table size == max - 1 00016 Use a macro to compute the history index 00017 */ 00018 00019 /* 00020 Examine all possible Huffman codes for a given number of symbols and a 00021 maximum code length in bits to determine the maximum table size for zilb's 00022 inflate. Only complete Huffman codes are counted. 00023 00024 Two codes are considered distinct if the vectors of the number of codes per 00025 length are not identical. So permutations of the symbol assignments result 00026 in the same code for the counting, as do permutations of the assignments of 00027 the bit values to the codes (i.e. only canonical codes are counted). 00028 00029 We build a code from shorter to longer lengths, determining how many symbols 00030 are coded at each length. At each step, we have how many symbols remain to 00031 be coded, what the last code length used was, and how many bit patterns of 00032 that length remain unused. Then we add one to the code length and double the 00033 number of unused patterns to graduate to the next code length. We then 00034 assign all portions of the remaining symbols to that code length that 00035 preserve the properties of a correct and eventually complete code. Those 00036 properties are: we cannot use more bit patterns than are available; and when 00037 all the symbols are used, there are exactly zero possible bit patterns 00038 remaining. 00039 00040 The inflate Huffman decoding algorithm uses two-level lookup tables for 00041 speed. There is a single first-level table to decode codes up to root bits 00042 in length (root == 9 in the current inflate implementation). The table 00043 has 1 << root entries and is indexed by the next root bits of input. Codes 00044 shorter than root bits have replicated table entries, so that the correct 00045 entry is pointed to regardless of the bits that follow the short code. If 00046 the code is longer than root bits, then the table entry points to a second- 00047 level table. The size of that table is determined by the longest code with 00048 that root-bit prefix. If that longest code has length len, then the table 00049 has size 1 << (len - root), to index the remaining bits in that set of 00050 codes. Each subsequent root-bit prefix then has its own sub-table. The 00051 total number of table entries required by the code is calculated 00052 incrementally as the number of codes at each bit length is populated. When 00053 all of the codes are shorter than root bits, then root is reduced to the 00054 longest code length, resulting in a single, smaller, one-level table. 00055 00056 The inflate algorithm also provides for small values of root (relative to 00057 the log2 of the number of symbols), where the shortest code has more bits 00058 than root. In that case, root is increased to the length of the shortest 00059 code. This program, by design, does not handle that case, so it is verified 00060 that the number of symbols is less than 2^(root + 1). 00061 00062 In order to speed up the examination (by about ten orders of magnitude for 00063 the default arguments), the intermediate states in the build-up of a code 00064 are remembered and previously visited branches are pruned. The memory 00065 required for this will increase rapidly with the total number of symbols and 00066 the maximum code length in bits. However this is a very small price to pay 00067 for the vast speedup. 00068 00069 First, all of the possible Huffman codes are counted, and reachable 00070 intermediate states are noted by a non-zero count in a saved-results array. 00071 Second, the intermediate states that lead to (root + 1) bit or longer codes 00072 are used to look at all sub-codes from those junctures for their inflate 00073 memory usage. (The amount of memory used is not affected by the number of 00074 codes of root bits or less in length.) Third, the visited states in the 00075 construction of those sub-codes and the associated calculation of the table 00076 size is recalled in order to avoid recalculating from the same juncture. 00077 Beginning the code examination at (root + 1) bit codes, which is enabled by 00078 identifying the reachable nodes, accounts for about six of the orders of 00079 magnitude of improvement for the default arguments. About another four 00080 orders of magnitude come from not revisiting previous states. Out of 00081 approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes 00082 need to be examined to cover all of the possible table memory usage cases 00083 for the default arguments of 286 symbols limited to 15-bit codes. 00084 00085 Note that an unsigned long long type is used for counting. It is quite easy 00086 to exceed the capacity of an eight-byte integer with a large number of 00087 symbols and a large maximum code length, so multiple-precision arithmetic 00088 would need to replace the unsigned long long arithmetic in that case. This 00089 program will abort if an overflow occurs. The big_t type identifies where 00090 the counting takes place. 00091 00092 An unsigned long long type is also used for calculating the number of 00093 possible codes remaining at the maximum length. This limits the maximum 00094 code length to the number of bits in a long long minus the number of bits 00095 needed to represent the symbols in a flat code. The code_t type identifies 00096 where the bit pattern counting takes place. 00097 */ 00098 00099 #include <stdio.h> 00100 #include <stdlib.h> 00101 #include <string.h> 00102 #include <assert.h> 00103 00104 #define local static 00105 00106 /* special data types */ 00107 typedef unsigned long long big_t; /* type for code counting */ 00108 typedef unsigned long long code_t; /* type for bit pattern counting */ 00109 struct tab { /* type for been here check */ 00110 size_t len; /* length of bit vector in char's */ 00111 char *vec; /* allocated bit vector */ 00112 }; 00113 00114 /* The array for saving results, num[], is indexed with this triplet: 00115 00116 syms: number of symbols remaining to code 00117 left: number of available bit patterns at length len 00118 len: number of bits in the codes currently being assigned 00119 00120 Those indices are constrained thusly when saving results: 00121 00122 syms: 3..totsym (totsym == total symbols to code) 00123 left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) 00124 len: 1..max - 1 (max == maximum code length in bits) 00125 00126 syms == 2 is not saved since that immediately leads to a single code. left 00127 must be even, since it represents the number of available bit patterns at 00128 the current length, which is double the number at the previous length. 00129 left ends at syms-1 since left == syms immediately results in a single code. 00130 (left > sym is not allowed since that would result in an incomplete code.) 00131 len is less than max, since the code completes immediately when len == max. 00132 00133 The offset into the array is calculated for the three indices with the 00134 first one (syms) being outermost, and the last one (len) being innermost. 00135 We build the array with length max-1 lists for the len index, with syms-3 00136 of those for each symbol. There are totsym-2 of those, with each one 00137 varying in length as a function of sym. See the calculation of index in 00138 count() for the index, and the calculation of size in main() for the size 00139 of the array. 00140 00141 For the deflate example of 286 symbols limited to 15-bit codes, the array 00142 has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than 00143 half of the space allocated for saved results is actually used -- not all 00144 possible triplets are reached in the generation of valid Huffman codes. 00145 */ 00146 00147 /* The array for tracking visited states, done[], is itself indexed identically 00148 to the num[] array as described above for the (syms, left, len) triplet. 00149 Each element in the array is further indexed by the (mem, rem) doublet, 00150 where mem is the amount of inflate table space used so far, and rem is the 00151 remaining unused entries in the current inflate sub-table. Each indexed 00152 element is simply one bit indicating whether the state has been visited or 00153 not. Since the ranges for mem and rem are not known a priori, each bit 00154 vector is of a variable size, and grows as needed to accommodate the visited 00155 states. mem and rem are used to calculate a single index in a triangular 00156 array. Since the range of mem is expected in the default case to be about 00157 ten times larger than the range of rem, the array is skewed to reduce the 00158 memory usage, with eight times the range for mem than for rem. See the 00159 calculations for offset and bit in beenhere() for the details. 00160 00161 For the deflate example of 286 symbols limited to 15-bit codes, the bit 00162 vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] 00163 array itself. 00164 */ 00165 00166 /* Globals to avoid propagating constants or constant pointers recursively */ 00167 local int max; /* maximum allowed bit length for the codes */ 00168 local int root; /* size of base code table in bits */ 00169 local int large; /* largest code table so far */ 00170 local size_t size; /* number of elements in num and done */ 00171 local int *code; /* number of symbols assigned to each bit length */ 00172 local big_t *num; /* saved results array for code counting */ 00173 local struct tab *done; /* states already evaluated array */ 00174 00175 /* Index function for num[] and done[] */ 00176 #define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1) 00177 00178 /* Free allocated space. Uses globals code, num, and done. */ 00179 local void cleanup(void) 00180 { 00181 size_t n; 00182 00183 if (done != NULL) { 00184 for (n = 0; n < size; n++) 00185 if (done[n].len) 00186 free(done[n].vec); 00187 free(done); 00188 } 00189 if (num != NULL) 00190 free(num); 00191 if (code != NULL) 00192 free(code); 00193 } 00194 00195 /* Return the number of possible Huffman codes using bit patterns of lengths 00196 len through max inclusive, coding syms symbols, with left bit patterns of 00197 length len unused -- return -1 if there is an overflow in the counting. 00198 Keep a record of previous results in num to prevent repeating the same 00199 calculation. Uses the globals max and num. */ 00200 local big_t count(int syms, int len, int left) 00201 { 00202 big_t sum; /* number of possible codes from this juncture */ 00203 big_t got; /* value returned from count() */ 00204 int least; /* least number of syms to use at this juncture */ 00205 int most; /* most number of syms to use at this juncture */ 00206 int use; /* number of bit patterns to use in next call */ 00207 size_t index; /* index of this case in *num */ 00208 00209 /* see if only one possible code */ 00210 if (syms == left) 00211 return 1; 00212 00213 /* note and verify the expected state */ 00214 assert(syms > left && left > 0 && len < max); 00215 00216 /* see if we've done this one already */ 00217 index = INDEX(syms, left, len); 00218 got = num[index]; 00219 if (got) 00220 return got; /* we have -- return the saved result */ 00221 00222 /* we need to use at least this many bit patterns so that the code won't be 00223 incomplete at the next length (more bit patterns than symbols) */ 00224 least = (left << 1) - syms; 00225 if (least < 0) 00226 least = 0; 00227 00228 /* we can use at most this many bit patterns, lest there not be enough 00229 available for the remaining symbols at the maximum length (if there were 00230 no limit to the code length, this would become: most = left - 1) */ 00231 most = (((code_t)left << (max - len)) - syms) / 00232 (((code_t)1 << (max - len)) - 1); 00233 00234 /* count all possible codes from this juncture and add them up */ 00235 sum = 0; 00236 for (use = least; use <= most; use++) { 00237 got = count(syms - use, len + 1, (left - use) << 1); 00238 sum += got; 00239 if (got == -1 || sum < got) /* overflow */ 00240 return -1; 00241 } 00242 00243 /* verify that all recursive calls are productive */ 00244 assert(sum != 0); 00245 00246 /* save the result and return it */ 00247 num[index] = sum; 00248 return sum; 00249 } 00250 00251 /* Return true if we've been here before, set to true if not. Set a bit in a 00252 bit vector to indicate visiting this state. Each (syms,len,left) state 00253 has a variable size bit vector indexed by (mem,rem). The bit vector is 00254 lengthened if needed to allow setting the (mem,rem) bit. */ 00255 local int beenhere(int syms, int len, int left, int mem, int rem) 00256 { 00257 size_t index; /* index for this state's bit vector */ 00258 size_t offset; /* offset in this state's bit vector */ 00259 int bit; /* mask for this state's bit */ 00260 size_t length; /* length of the bit vector in bytes */ 00261 char *vector; /* new or enlarged bit vector */ 00262 00263 /* point to vector for (syms,left,len), bit in vector for (mem,rem) */ 00264 index = INDEX(syms, left, len); 00265 mem -= 1 << root; 00266 offset = (mem >> 3) + rem; 00267 offset = ((offset * (offset + 1)) >> 1) + rem; 00268 bit = 1 << (mem & 7); 00269 00270 /* see if we've been here */ 00271 length = done[index].len; 00272 if (offset < length && (done[index].vec[offset] & bit) != 0) 00273 return 1; /* done this! */ 00274 00275 /* we haven't been here before -- set the bit to show we have now */ 00276 00277 /* see if we need to lengthen the vector in order to set the bit */ 00278 if (length <= offset) { 00279 /* if we have one already, enlarge it, zero out the appended space */ 00280 if (length) { 00281 do { 00282 length <<= 1; 00283 } while (length <= offset); 00284 vector = realloc(done[index].vec, length); 00285 if (vector != NULL) 00286 memset(vector + done[index].len, 0, length - done[index].len); 00287 } 00288 00289 /* otherwise we need to make a new vector and zero it out */ 00290 else { 00291 length = 1 << (len - root); 00292 while (length <= offset) 00293 length <<= 1; 00294 vector = calloc(length, sizeof(char)); 00295 } 00296 00297 /* in either case, bail if we can't get the memory */ 00298 if (vector == NULL) { 00299 fputs("abort: unable to allocate enough memory\n", stderr); 00300 cleanup(); 00301 exit(1); 00302 } 00303 00304 /* install the new vector */ 00305 done[index].len = length; 00306 done[index].vec = vector; 00307 } 00308 00309 /* set the bit */ 00310 done[index].vec[offset] |= bit; 00311 return 0; 00312 } 00313 00314 /* Examine all possible codes from the given node (syms, len, left). Compute 00315 the amount of memory required to build inflate's decoding tables, where the 00316 number of code structures used so far is mem, and the number remaining in 00317 the current sub-table is rem. Uses the globals max, code, root, large, and 00318 done. */ 00319 local void examine(int syms, int len, int left, int mem, int rem) 00320 { 00321 int least; /* least number of syms to use at this juncture */ 00322 int most; /* most number of syms to use at this juncture */ 00323 int use; /* number of bit patterns to use in next call */ 00324 00325 /* see if we have a complete code */ 00326 if (syms == left) { 00327 /* set the last code entry */ 00328 code[len] = left; 00329 00330 /* complete computation of memory used by this code */ 00331 while (rem < left) { 00332 left -= rem; 00333 rem = 1 << (len - root); 00334 mem += rem; 00335 } 00336 assert(rem == left); 00337 00338 /* if this is a new maximum, show the entries used and the sub-code */ 00339 if (mem > large) { 00340 large = mem; 00341 printf("max %d: ", mem); 00342 for (use = root + 1; use <= max; use++) 00343 if (code[use]) 00344 printf("%d[%d] ", code[use], use); 00345 putchar('\n'); 00346 fflush(stdout); 00347 } 00348 00349 /* remove entries as we drop back down in the recursion */ 00350 code[len] = 0; 00351 return; 00352 } 00353 00354 /* prune the tree if we can */ 00355 if (beenhere(syms, len, left, mem, rem)) 00356 return; 00357 00358 /* we need to use at least this many bit patterns so that the code won't be 00359 incomplete at the next length (more bit patterns than symbols) */ 00360 least = (left << 1) - syms; 00361 if (least < 0) 00362 least = 0; 00363 00364 /* we can use at most this many bit patterns, lest there not be enough 00365 available for the remaining symbols at the maximum length (if there were 00366 no limit to the code length, this would become: most = left - 1) */ 00367 most = (((code_t)left << (max - len)) - syms) / 00368 (((code_t)1 << (max - len)) - 1); 00369 00370 /* occupy least table spaces, creating new sub-tables as needed */ 00371 use = least; 00372 while (rem < use) { 00373 use -= rem; 00374 rem = 1 << (len - root); 00375 mem += rem; 00376 } 00377 rem -= use; 00378 00379 /* examine codes from here, updating table space as we go */ 00380 for (use = least; use <= most; use++) { 00381 code[len] = use; 00382 examine(syms - use, len + 1, (left - use) << 1, 00383 mem + (rem ? 1 << (len - root) : 0), rem << 1); 00384 if (rem == 0) { 00385 rem = 1 << (len - root); 00386 mem += rem; 00387 } 00388 rem--; 00389 } 00390 00391 /* remove entries as we drop back down in the recursion */ 00392 code[len] = 0; 00393 } 00394 00395 /* Look at all sub-codes starting with root + 1 bits. Look at only the valid 00396 intermediate code states (syms, left, len). For each completed code, 00397 calculate the amount of memory required by inflate to build the decoding 00398 tables. Find the maximum amount of memory required and show the code that 00399 requires that maximum. Uses the globals max, root, and num. */ 00400 local void enough(int syms) 00401 { 00402 int n; /* number of remaing symbols for this node */ 00403 int left; /* number of unused bit patterns at this length */ 00404 size_t index; /* index of this case in *num */ 00405 00406 /* clear code */ 00407 for (n = 0; n <= max; n++) 00408 code[n] = 0; 00409 00410 /* look at all (root + 1) bit and longer codes */ 00411 large = 1 << root; /* base table */ 00412 if (root < max) /* otherwise, there's only a base table */ 00413 for (n = 3; n <= syms; n++) 00414 for (left = 2; left < n; left += 2) 00415 { 00416 /* look at all reachable (root + 1) bit nodes, and the 00417 resulting codes (complete at root + 2 or more) */ 00418 index = INDEX(n, left, root + 1); 00419 if (root + 1 < max && num[index]) /* reachable node */ 00420 examine(n, root + 1, left, 1 << root, 0); 00421 00422 /* also look at root bit codes with completions at root + 1 00423 bits (not saved in num, since complete), just in case */ 00424 if (num[index - 1] && n <= left << 1) 00425 examine((n - left) << 1, root + 1, (n - left) << 1, 00426 1 << root, 0); 00427 } 00428 00429 /* done */ 00430 printf("done: maximum of %d table entries\n", large); 00431 } 00432 00433 /* 00434 Examine and show the total number of possible Huffman codes for a given 00435 maximum number of symbols, initial root table size, and maximum code length 00436 in bits -- those are the command arguments in that order. The default 00437 values are 286, 9, and 15 respectively, for the deflate literal/length code. 00438 The possible codes are counted for each number of coded symbols from two to 00439 the maximum. The counts for each of those and the total number of codes are 00440 shown. The maximum number of inflate table entires is then calculated 00441 across all possible codes. Each new maximum number of table entries and the 00442 associated sub-code (starting at root + 1 == 10 bits) is shown. 00443 00444 To count and examine Huffman codes that are not length-limited, provide a 00445 maximum length equal to the number of symbols minus one. 00446 00447 For the deflate literal/length code, use "enough". For the deflate distance 00448 code, use "enough 30 6". 00449 00450 This uses the %llu printf format to print big_t numbers, which assumes that 00451 big_t is an unsigned long long. If the big_t type is changed (for example 00452 to a multiple precision type), the method of printing will also need to be 00453 updated. 00454 */ 00455 int main(int argc, char **argv) 00456 { 00457 int syms; /* total number of symbols to code */ 00458 int n; /* number of symbols to code for this run */ 00459 big_t got; /* return value of count() */ 00460 big_t sum; /* accumulated number of codes over n */ 00461 00462 /* set up globals for cleanup() */ 00463 code = NULL; 00464 num = NULL; 00465 done = NULL; 00466 00467 /* get arguments -- default to the deflate literal/length code */ 00468 syms = 286; 00469 root = 9; 00470 max = 15; 00471 if (argc > 1) { 00472 syms = atoi(argv[1]); 00473 if (argc > 2) { 00474 root = atoi(argv[2]); 00475 if (argc > 3) 00476 max = atoi(argv[3]); 00477 } 00478 } 00479 if (argc > 4 || syms < 2 || root < 1 || max < 1) { 00480 fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", 00481 stderr); 00482 return 1; 00483 } 00484 00485 /* if not restricting the code length, the longest is syms - 1 */ 00486 if (max > syms - 1) 00487 max = syms - 1; 00488 00489 /* determine the number of bits in a code_t */ 00490 n = 0; 00491 while (((code_t)1 << n) != 0) 00492 n++; 00493 00494 /* make sure that the calculation of most will not overflow */ 00495 if (max > n || syms - 2 >= (((code_t)0 - 1) >> (max - 1))) { 00496 fputs("abort: code length too long for internal types\n", stderr); 00497 return 1; 00498 } 00499 00500 /* reject impossible code requests */ 00501 if (syms - 1 > ((code_t)1 << max) - 1) { 00502 fprintf(stderr, "%d symbols cannot be coded in %d bits\n", 00503 syms, max); 00504 return 1; 00505 } 00506 00507 /* allocate code vector */ 00508 code = calloc(max + 1, sizeof(int)); 00509 if (code == NULL) { 00510 fputs("abort: unable to allocate enough memory\n", stderr); 00511 return 1; 00512 } 00513 00514 /* determine size of saved results array, checking for overflows, 00515 allocate and clear the array (set all to zero with calloc()) */ 00516 if (syms == 2) /* iff max == 1 */ 00517 num = NULL; /* won't be saving any results */ 00518 else { 00519 size = syms >> 1; 00520 if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || 00521 (size *= n, size > ((size_t)0 - 1) / (n = max - 1)) || 00522 (size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) || 00523 (num = calloc(size, sizeof(big_t))) == NULL) { 00524 fputs("abort: unable to allocate enough memory\n", stderr); 00525 cleanup(); 00526 return 1; 00527 } 00528 } 00529 00530 /* count possible codes for all numbers of symbols, add up counts */ 00531 sum = 0; 00532 for (n = 2; n <= syms; n++) { 00533 got = count(n, 1, 2); 00534 sum += got; 00535 if (got == -1 || sum < got) { /* overflow */ 00536 fputs("abort: can't count that high!\n", stderr); 00537 cleanup(); 00538 return 1; 00539 } 00540 printf("%llu %d-codes\n", got, n); 00541 } 00542 printf("%llu total codes for 2 to %d symbols", sum, syms); 00543 if (max < syms - 1) 00544 printf(" (%d-bit length limit)\n", max); 00545 else 00546 puts(" (no length limit)"); 00547 00548 /* allocate and clear done array for beenhere() */ 00549 if (syms == 2) 00550 done = NULL; 00551 else if (size > ((size_t)0 - 1) / sizeof(struct tab) || 00552 (done = calloc(size, sizeof(struct tab))) == NULL) { 00553 fputs("abort: unable to allocate enough memory\n", stderr); 00554 cleanup(); 00555 return 1; 00556 } 00557 00558 /* find and show maximum inflate table usage */ 00559 if (root > max) /* reduce root to max length */ 00560 root = max; 00561 if (syms < ((code_t)1 << (root + 1))) 00562 enough(syms); 00563 else 00564 puts("cannot handle minimum code lengths > root"); 00565 00566 /* done */ 00567 cleanup(); 00568 return 0; 00569 }