Commit | Line | Data |
---|---|---|
64d06f2f | 1 | #ifndef lint |
a2de8fe3 | 2 | static char sccsid[] = "@(#)compress.c 5.2 (Berkeley) %G%"; |
64d06f2f KM |
3 | #endif not lint |
4 | ||
a2de8fe3 KM |
5 | #define min(a,b) ((a>b) ? b : a) |
6 | ||
64d06f2f | 7 | /* |
a2de8fe3 KM |
8 | * machine variants which require cc -Dmachine: pdp11, z8000, pcxt |
9 | */ | |
10 | ||
11 | /* Set USERMEM to the maximum amount of physical user memory available | |
64d06f2f KM |
12 | * in bytes. USERMEM is used to determine the maximum BITS that can be used |
13 | * for compression. If USERMEM is big enough, use fast compression algorithm. | |
14 | * | |
15 | * SACREDMEM is the amount of physical memory saved for others; compress | |
16 | * will hog the rest. | |
17 | */ | |
18 | #ifndef SACREDMEM | |
19 | #define SACREDMEM 0 | |
20 | #endif | |
21 | ||
a2de8fe3 KM |
22 | #ifndef USERMEM |
23 | # define USERMEM 750000 /* default user memory */ | |
24 | #endif | |
25 | ||
64d06f2f KM |
26 | #ifdef pdp11 |
27 | # define BITS 12 /* max bits/code for 16-bit machine */ | |
28 | # define NO_UCHAR /* also if "unsigned char" functions as signed char */ | |
a2de8fe3 | 29 | # define SHORT_INT /* ints are short */ |
64d06f2f | 30 | # undef USERMEM |
a2de8fe3 KM |
31 | #endif pdp11 |
32 | ||
33 | #ifdef z8000 | |
34 | # define BITS 12 | |
35 | # define SHORT_INT | |
36 | # undef vax /* weird preprocessor */ | |
37 | # undef USERMEM | |
38 | #endif z8000 | |
39 | ||
40 | #ifdef pcxt | |
41 | # define BITS 12 | |
42 | # define SHORT_INT | |
43 | # define SMALL_STACK /* at least for Latice C, sez Lauren */ | |
44 | # undef USERMEM | |
45 | #endif pcxt | |
46 | ||
64d06f2f KM |
47 | /* |
48 | * Define FBITS for machines with several MB of physical memory, to use | |
49 | * table lookup for (b <= FBITS). If FBITS is made too large, performance | |
50 | * will decrease due to increased swapping/paging. Since the program minus | |
51 | * the fast lookup table is about a half Meg, we can allocate the rest of | |
52 | * available physical memory to the fast lookup table. | |
53 | * | |
54 | * If FBITS is set to 12, a 2 MB array is allocated, but only 1 MB is | |
55 | * addressed for parity-free input (i.e. text). | |
56 | * | |
57 | * FBITS=10 yields 1/2 meg lookup table + 4K code memory | |
58 | * FBITS=11 yields 1 meg lookup table + 8K code memory | |
59 | * FBITS=12 yields 2 meg lookup table + 16K code memory | |
60 | * FBITS=13 yields 4 meg lookup table + 32K code memory | |
61 | * | |
62 | */ | |
63 | ||
64 | #ifdef USERMEM | |
65 | # if USERMEM >= (2621440+SACREDMEM) | |
66 | # if USERMEM >= (4718592+SACREDMEM) | |
67 | # define FBITS 13 | |
68 | # define PBITS 16 | |
69 | # else 2.5M <= USERMEM < 4.5M | |
70 | # define FBITS 12 | |
71 | # define PBITS 16 | |
72 | # endif USERMEM <=> 4.5M | |
73 | # else USERMEM < 2.5M | |
74 | # if USERMEM >= (1572864+SACREDMEM) | |
75 | # define FBITS 11 | |
76 | # define PBITS 16 | |
77 | # else USERMEM < 1.5M | |
78 | # if USERMEM >= (1048576+SACREDMEM) | |
79 | # define FBITS 10 | |
80 | # define PBITS 16 | |
81 | # else USERMEM < 1M | |
82 | # if USERMEM >= (631808+SACREDMEM) | |
83 | # define PBITS 16 | |
84 | # else | |
85 | # if USERMEM >= (329728+SACREDMEM) | |
86 | # define PBITS 15 | |
87 | # else | |
88 | # if USERMEM >= (178176+SACREDMEM) | |
89 | # define PBITS 14 | |
90 | # else | |
91 | # if USERMEM >= (99328+SACREDMEM) | |
92 | # define PBITS 13 | |
93 | # else | |
94 | # define PBITS 12 | |
95 | # endif | |
96 | # endif | |
97 | # endif | |
98 | # endif | |
99 | # undef USERMEM | |
100 | # endif USERMEM <=> 1M | |
101 | # endif USERMEM <=> 1.5M | |
102 | # endif USERMEM <=> 2.5M | |
103 | #endif USERMEM | |
104 | ||
105 | #ifdef PBITS /* Preferred BITS for this memory size */ | |
106 | # ifndef BITS | |
107 | # define BITS PBITS | |
108 | # endif BITS | |
109 | #endif PBITS | |
110 | ||
111 | #if BITS == 16 | |
112 | # define HSIZE 69001 /* 95% occupancy */ | |
113 | #endif | |
114 | #if BITS == 15 | |
115 | # define HSIZE 35023 /* 94% occupancy */ | |
116 | #endif | |
117 | #if BITS == 14 | |
118 | # define HSIZE 18013 /* 91% occupancy */ | |
119 | #endif | |
120 | #if BITS == 13 | |
121 | # define HSIZE 9001 /* 91% occupancy */ | |
122 | #endif | |
a2de8fe3 | 123 | #if BITS <= 12 |
64d06f2f KM |
124 | # define HSIZE 5003 /* 80% occupancy */ |
125 | #endif | |
64d06f2f KM |
126 | /* BITS < 9 will cause an error */ |
127 | ||
128 | /* | |
129 | * a code_int must be able to hold 2**BITS values of type int, and also -1 | |
130 | */ | |
131 | #if BITS > 15 | |
132 | typedef long int code_int; | |
133 | #else | |
134 | typedef int code_int; | |
135 | #endif | |
136 | ||
137 | #ifdef interdata | |
138 | typedef unsigned long int count_int; | |
139 | typedef unsigned short int count_short; | |
140 | #else | |
141 | typedef long int count_int; | |
142 | #endif | |
143 | ||
144 | #ifdef NO_UCHAR | |
145 | typedef char char_type; | |
146 | #else UCHAR | |
147 | typedef unsigned char char_type; | |
148 | #endif UCHAR | |
149 | char_type magic_header[] = { "\037\235" }; /* 1F 9D */ | |
150 | ||
151 | /* Defines for third byte of header */ | |
152 | #define BIT_MASK 0x1f | |
153 | #define BLOCK_MASK 0x80 | |
154 | /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is | |
155 | a fourth header byte (for expansion). | |
156 | */ | |
157 | #define INIT_BITS 9 /* initial number of bits/code */ | |
158 | ||
159 | /* | |
a2de8fe3 | 160 | * compress.c - File compression ala IEEE Computer, June 1984. |
64d06f2f KM |
161 | * |
162 | * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) | |
163 | * Jim McKie (decvax!mcvax!jim) | |
164 | * Steve Davies (decvax!vax135!petsd!peora!srd) | |
165 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) | |
166 | * James A. Woods (decvax!ihnp4!ames!jaw) | |
167 | * Joe Orost (decvax!vax135!petsd!joe) | |
168 | * | |
a2de8fe3 | 169 | * $Header: compress.c,v 3.2 85/06/06 21:53:24 jaw Exp $ |
64d06f2f | 170 | * $Log: compress.c,v $ |
a2de8fe3 KM |
171 | * Revision 3.2 85/06/06 21:53:24 jaw |
172 | * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list. | |
173 | * Default to "quiet" output (no compression statistics). | |
174 | * | |
175 | * Revision 3.1 85/05/12 18:56:13 jaw | |
176 | * Integrate decompress() stack speedups (from early pointer mods by McKie). | |
177 | * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics | |
178 | * of SVR2 'pack'. Streamline block-compress table clear logic. Increase | |
179 | * output byte count by magic number size. | |
180 | * | |
64d06f2f KM |
181 | * Revision 3.0 84/11/27 11:50:00 petsd!joe |
182 | * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled | |
a2de8fe3 | 183 | * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used |
64d06f2f KM |
184 | * unsigned compares on Perkin-Elmer. Fixed foreground check. |
185 | * | |
186 | * Revision 2.7 84/11/16 19:35:39 ames!jaw | |
187 | * Cache common hash codes based on input statistics; this improves | |
188 | * performance for low-density raster images. Pass on #ifdef bundle | |
189 | * from Turkowski. | |
190 | * | |
191 | * Revision 2.6 84/11/05 19:18:21 ames!jaw | |
192 | * Vary size of hash tables to reduce time for small files. | |
193 | * Tune PDP-11 hash function. | |
194 | * | |
195 | * Revision 2.5 84/10/30 20:15:14 ames!jaw | |
196 | * Junk chaining; replace with the simpler (and, on the VAX, faster) | |
197 | * double hashing, discussed within. Make block compression standard. | |
198 | * | |
199 | * Revision 2.4 84/10/16 11:11:11 ames!jaw | |
200 | * Introduce adaptive reset for block compression, to boost the rate | |
201 | * another several percent. (See mailing list notes.) | |
202 | * | |
203 | * Revision 2.3 84/09/22 22:00:00 petsd!joe | |
204 | * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next. | |
205 | * Bug fix for last bits. Changed fwrite to putchar loop everywhere. | |
206 | * | |
207 | * Revision 2.2 84/09/18 14:12:21 ames!jaw | |
208 | * Fold in news changes, small machine typedef from thomas, | |
209 | * #ifdef interdata from joe. | |
210 | * | |
211 | * Revision 2.1 84/09/10 12:34:56 ames!jaw | |
212 | * Configured fast table lookup for 32-bit machines. | |
213 | * This cuts user time in half for b <= FBITS, and is useful for news batching | |
214 | * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and | |
215 | * added signal catcher [plus beef in writeerr()] to delete effluvia. | |
216 | * | |
217 | * Revision 2.0 84/08/28 22:00:00 petsd!joe | |
218 | * Add check for foreground before prompting user. Insert maxbits into | |
219 | * compressed file. Force file being uncompressed to end with ".Z". | |
220 | * Added "-c" flag and "zcat". Prepared for release. | |
221 | * | |
222 | * Revision 1.10 84/08/24 18:28:00 turtlevax!ken | |
223 | * Will only compress regular files (no directories), added a magic number | |
224 | * header (plus an undocumented -n flag to handle old files without headers), | |
225 | * added -f flag to force overwriting of possibly existing destination file, | |
226 | * otherwise the user is prompted for a response. Will tack on a .Z to a | |
227 | * filename if it doesn't have one when decompressing. Will only replace | |
228 | * file if it was compressed. | |
229 | * | |
230 | * Revision 1.9 84/08/16 17:28:00 turtlevax!ken | |
231 | * Removed scanargs(), getopt(), added .Z extension and unlimited number of | |
232 | * filenames to compress. Flags may be clustered (-Ddvb12) or separated | |
233 | * (-D -d -v -b 12), or combination thereof. Modes and other status is | |
234 | * copied with copystat(). -O bug for 4.2 seems to have disappeared with | |
235 | * 1.8. | |
236 | * | |
237 | * Revision 1.8 84/08/09 23:15:00 joe | |
238 | * Made it compatible with vax version, installed jim's fixes/enhancements | |
239 | * | |
240 | * Revision 1.6 84/08/01 22:08:00 joe | |
241 | * Sped up algorithm significantly by sorting the compress chain. | |
242 | * | |
243 | * Revision 1.5 84/07/13 13:11:00 srd | |
244 | * Added C version of vax asm routines. Changed structure to arrays to | |
245 | * save much memory. Do unsigned compares where possible (faster on | |
246 | * Perkin-Elmer) | |
247 | * | |
248 | * Revision 1.4 84/07/05 03:11:11 thomas | |
249 | * Clean up the code a little and lint it. (Lint complains about all | |
250 | * the regs used in the asm, but I'm not going to "fix" this.) | |
251 | * | |
252 | * Revision 1.3 84/07/05 02:06:54 thomas | |
253 | * Minor fixes. | |
254 | * | |
255 | * Revision 1.2 84/07/05 00:27:27 thomas | |
256 | * Add variable bit length output. | |
257 | * | |
258 | */ | |
259 | #ifndef lint | |
a2de8fe3 | 260 | static char rcs_ident[] = "$Header: compress.c,v 3.1 85/05/12 18:56:13 jaw Exp $"; |
64d06f2f KM |
261 | #endif !lint |
262 | ||
263 | #include <stdio.h> | |
264 | #include <ctype.h> | |
265 | #include <signal.h> | |
266 | #include <sys/types.h> | |
267 | #include <sys/stat.h> | |
268 | ||
269 | #define ARGVAL() (*++(*argv) || (--argc && *++argv)) | |
270 | ||
271 | int n_bits; /* number of bits/code */ | |
272 | int maxbits = BITS; /* user settable max # bits/code */ | |
273 | code_int maxcode; /* maximum code, given n_bits */ | |
274 | code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ | |
275 | #ifdef COMPATIBLE /* But wrong! */ | |
276 | # define MAXCODE(n_bits) (1 << (n_bits) - 1) | |
277 | #else COMPATIBLE | |
278 | # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) | |
279 | #endif COMPATIBLE | |
280 | ||
281 | /* | |
282 | * One code could conceivably represent (1<<BITS) characters, but | |
283 | * to get a code of length N requires an input string of at least | |
284 | * N*(N-1)/2 characters. With 5000 chars in the stack, an input | |
285 | * file would have to contain a 25Mb string of a single character. | |
286 | * This seems unlikely. | |
287 | */ | |
a2de8fe3 | 288 | #ifdef SHORT_INT |
64d06f2f | 289 | # define MAXSTACK 5000 /* size of output stack */ |
a2de8fe3 | 290 | #else !SHORT_INT |
64d06f2f | 291 | # define MAXSTACK 8000 /* size of output stack */ |
a2de8fe3 | 292 | #endif !SHORT_INT |
64d06f2f KM |
293 | |
294 | count_int htab [HSIZE]; | |
295 | unsigned short codetab [HSIZE]; | |
296 | code_int hsize = HSIZE; /* for dynamic table sizing */ | |
297 | count_int fsize; | |
298 | ||
299 | #define tab_prefix codetab /* prefix code for this entry */ | |
300 | char_type tab_suffix[1<<BITS]; /* last char in this entry */ | |
301 | ||
302 | #ifdef USERMEM | |
303 | short ftable [(1 << FBITS) * 256]; | |
304 | count_int fcodemem [1 << FBITS]; | |
305 | #endif USERMEM | |
306 | ||
307 | code_int free_ent = 0; /* first unused entry */ | |
308 | int exit_stat = 0; | |
309 | ||
310 | code_int getcode(); | |
311 | ||
312 | Usage() { | |
313 | #ifdef DEBUG | |
a2de8fe3 | 314 | fprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n"); |
64d06f2f KM |
315 | } |
316 | int debug = 0; | |
317 | #else DEBUG | |
a2de8fe3 | 318 | fprintf(stderr,"Usage: compress [-dfvc] [-b maxbits] [file ...]\n"); |
64d06f2f KM |
319 | } |
320 | #endif DEBUG | |
a2de8fe3 | 321 | int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ |
64d06f2f | 322 | int zcat_flg = 0; /* Write output on stdout, suppress messages */ |
a2de8fe3 | 323 | int quiet = 1; /* don't tell me about compression */ |
64d06f2f KM |
324 | |
325 | /* | |
326 | * block compression parameters -- after all codes are used up, | |
327 | * and compression rate changes, start over. | |
328 | */ | |
329 | int block_compress = BLOCK_MASK; | |
330 | int clear_flg = 0; | |
331 | double ratio = 0.0; /* compression ratio for last block */ | |
332 | #define CHECK_GAP 10000 /* ratio check interval */ | |
333 | count_int checkpoint = CHECK_GAP; | |
334 | /* | |
335 | * the next two codes should not be changed lightly, as they must not | |
336 | * lie within the contiguous general code space. | |
337 | */ | |
338 | #define FIRST 257 /* first free entry */ | |
339 | #define CLEAR 256 /* table clear output code */ | |
340 | ||
341 | int force = 0; | |
342 | char ofname [100]; | |
343 | #ifdef DEBUG | |
344 | int verbose = 0; | |
345 | #endif DEBUG | |
346 | int (*bgnd_flag)(); | |
347 | ||
a2de8fe3 KM |
348 | int nfiles = 0; /* number of files processed */ |
349 | ||
64d06f2f KM |
350 | /***************************************************************** |
351 | * TAG( main ) | |
352 | * | |
353 | * Algorithm from "A Technique for High Performance Data Compression", | |
354 | * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. | |
355 | * | |
a2de8fe3 | 356 | * Usage: compress [-dfvc] [-b bits] [file ...] |
64d06f2f KM |
357 | * Inputs: |
358 | * -d: If given, decompression is done instead. | |
359 | * | |
360 | * -c: Write output on stdout, don't remove original. | |
361 | * | |
362 | * -b: Parameter limits the max number of bits/code. | |
363 | * | |
364 | * -f: Forces output file to be generated, even if one already | |
a2de8fe3 KM |
365 | * exists, and even if no space is saved by compressing. |
366 | * If -f is not used, the user will be prompted if stdin is | |
367 | * a tty, otherwise, the output file will not be overwritten. | |
64d06f2f | 368 | * |
a2de8fe3 | 369 | * -v: Write compression statistics |
64d06f2f KM |
370 | * |
371 | * file ...: Files to be compressed. If none specified, stdin | |
372 | * is used. | |
373 | * Outputs: | |
374 | * file.Z: Compressed form of file with same mode, owner, and utimes | |
375 | * or stdout (if stdin used as input) | |
376 | * | |
377 | * Assumptions: | |
378 | * When filenames are given, replaces with the compressed version | |
a2de8fe3 | 379 | * (.Z suffix) only if the file decreases in size. |
64d06f2f KM |
380 | * Algorithm: |
381 | * Modified Lempel-Ziv method (LZW). Basically finds common | |
382 | * substrings and replaces them with a variable size code. This is | |
383 | * deterministic, and can be done on the fly. Thus, the decompression | |
a2de8fe3 | 384 | * procedure needs no input table, but tracks the way the table was built. |
64d06f2f | 385 | */ |
64d06f2f KM |
386 | main( argc, argv ) |
387 | register int argc; char **argv; | |
388 | { | |
389 | int do_decomp = 0; | |
390 | int overwrite = 0; /* Do not overwrite unless given -f flag */ | |
391 | char tempname[100]; | |
392 | char **filelist, **fileptr; | |
a2de8fe3 | 393 | char *cp, *rindex(), *malloc(); |
64d06f2f KM |
394 | struct stat statbuf; |
395 | extern onintr(); | |
396 | ||
397 | ||
398 | if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) | |
399 | signal ( SIGINT, onintr ); | |
400 | ||
401 | #ifdef COMPATIBLE | |
402 | nomagic = 1; /* Original didn't have a magic number */ | |
403 | #endif COMPATIBLE | |
404 | ||
405 | filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); | |
406 | *filelist = NULL; | |
407 | ||
408 | if((cp = rindex(argv[0], '/')) != 0) { | |
409 | cp++; | |
410 | } else { | |
411 | cp = argv[0]; | |
412 | } | |
413 | if(strcmp(cp, "uncompress") == 0) { | |
414 | do_decomp = 1; | |
415 | } else if(strcmp(cp, "zcat") == 0) { | |
416 | do_decomp = 1; | |
417 | zcat_flg = 1; | |
418 | } | |
419 | ||
420 | #ifdef BSD4_2 | |
421 | /* 4.2BSD dependent - take it out if not */ | |
422 | setlinebuf( stderr ); | |
423 | #endif BSD4_2 | |
424 | ||
425 | /* Argument Processing | |
426 | * All flags are optional. | |
427 | * -D => debug | |
a2de8fe3 | 428 | * -V => debug verbose |
64d06f2f | 429 | * -d => do_decomp |
a2de8fe3 | 430 | * -v => unquiet |
64d06f2f KM |
431 | * -f => force overwrite of output file |
432 | * -n => no header: useful to uncompress old files | |
433 | * -b maxbits => maxbits. If -b is specified, then maxbits MUST be | |
434 | * given also. | |
435 | * -c => cat all output to stdout | |
a2de8fe3 | 436 | * -C => generate output compatible with compress 2.0. |
64d06f2f KM |
437 | * if a string is left, must be an input filename. |
438 | */ | |
439 | for (argc--, argv++; argc > 0; argc--, argv++) { | |
440 | if (**argv == '-') { /* A flag argument */ | |
441 | while (*++(*argv)) { /* Process all flags in this arg */ | |
442 | switch (**argv) { | |
443 | #ifdef DEBUG | |
444 | case 'D': | |
445 | debug = 1; | |
446 | break; | |
a2de8fe3 | 447 | case 'V': |
64d06f2f KM |
448 | verbose = 1; |
449 | break; | |
450 | #endif DEBUG | |
a2de8fe3 KM |
451 | case 'v': |
452 | quiet = 0; | |
453 | break; | |
64d06f2f KM |
454 | case 'd': |
455 | do_decomp = 1; | |
456 | break; | |
457 | case 'f': | |
a2de8fe3 | 458 | case 'F': |
64d06f2f | 459 | overwrite = 1; |
a2de8fe3 | 460 | force = 1; |
64d06f2f KM |
461 | break; |
462 | case 'n': | |
463 | nomagic = 1; | |
464 | break; | |
465 | case 'C': | |
466 | block_compress = 0; | |
467 | break; | |
468 | case 'b': | |
469 | if (!ARGVAL()) { | |
470 | fprintf(stderr, "Missing maxbits\n"); | |
471 | Usage(); | |
472 | exit(1); | |
473 | } | |
474 | maxbits = atoi(*argv); | |
475 | goto nextarg; | |
476 | case 'c': | |
477 | zcat_flg = 1; | |
478 | break; | |
479 | case 'q': | |
480 | quiet = 1; | |
481 | break; | |
64d06f2f KM |
482 | default: |
483 | fprintf(stderr, "Unknown flag: '%c'; ", **argv); | |
484 | Usage(); | |
485 | exit(1); | |
486 | } | |
487 | } | |
488 | } | |
489 | else { /* Input file name */ | |
490 | *fileptr++ = *argv; /* Build input file list */ | |
491 | *fileptr = NULL; | |
492 | /* goto nextarg; */ | |
493 | } | |
494 | nextarg: continue; | |
495 | } | |
496 | ||
497 | if(maxbits < INIT_BITS) maxbits = INIT_BITS; | |
498 | if (maxbits > BITS) maxbits = BITS; | |
499 | maxmaxcode = 1 << maxbits; | |
500 | ||
501 | if (*filelist != NULL) { | |
502 | for (fileptr = filelist; *fileptr; fileptr++) { | |
503 | exit_stat = 0; | |
504 | if (do_decomp != 0) { /* DECOMPRESSION */ | |
505 | /* Check for .Z suffix */ | |
506 | if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { | |
507 | /* No .Z: tack one on */ | |
508 | strcpy(tempname, *fileptr); | |
509 | strcat(tempname, ".Z"); | |
510 | *fileptr = tempname; | |
511 | } | |
512 | /* Open input file */ | |
513 | if ((freopen(*fileptr, "r", stdin)) == NULL) { | |
514 | perror(*fileptr); continue; | |
515 | } | |
516 | /* Check the magic number */ | |
517 | if (nomagic == 0) { | |
518 | if ((getchar() != (magic_header[0] & 0xFF)) | |
519 | || (getchar() != (magic_header[1] & 0xFF))) { | |
520 | fprintf(stderr, "%s: not in compressed format\n", | |
521 | *fileptr); | |
522 | continue; | |
523 | } | |
524 | maxbits = getchar(); /* set -b from file */ | |
525 | block_compress = maxbits & BLOCK_MASK; | |
526 | maxbits &= BIT_MASK; | |
527 | maxmaxcode = 1 << maxbits; | |
528 | if(maxbits > BITS) { | |
529 | fprintf(stderr, | |
530 | "%s: compressed with %d bits, can only handle %d bits\n", | |
531 | *fileptr, maxbits, BITS); | |
532 | continue; | |
533 | } | |
534 | } | |
535 | /* Generate output filename */ | |
536 | strcpy(ofname, *fileptr); | |
537 | ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ | |
538 | } else { /* COMPRESSION */ | |
539 | if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { | |
a2de8fe3 | 540 | fprintf(stderr, "%s: already has .Z suffix -- no change\n", |
64d06f2f KM |
541 | *fileptr); |
542 | continue; | |
543 | } | |
544 | /* Open input file */ | |
545 | if ((freopen(*fileptr, "r", stdin)) == NULL) { | |
546 | perror(*fileptr); continue; | |
547 | } | |
548 | stat ( *fileptr, &statbuf ); | |
549 | fsize = (long) statbuf.st_size; | |
550 | /* | |
551 | * tune hash table size for small files -- ad hoc | |
552 | */ | |
a2de8fe3 | 553 | hsize = HSIZE; |
64d06f2f | 554 | if ( fsize < (1 << 12) ) |
a2de8fe3 | 555 | hsize = min ( 5003, HSIZE ); |
64d06f2f | 556 | else if ( fsize < (1 << 13) ) |
a2de8fe3 | 557 | hsize = min ( 9001, HSIZE ); |
64d06f2f | 558 | else if ( fsize < (1 << 14) ) |
a2de8fe3 | 559 | hsize = min ( 18013, HSIZE ); |
64d06f2f | 560 | else if ( fsize < (1 << 15) ) |
a2de8fe3 | 561 | hsize = min ( 35023, HSIZE ); |
64d06f2f | 562 | else if ( fsize < 47000 ) |
a2de8fe3 KM |
563 | hsize = min ( 50021, HSIZE ); |
564 | ||
64d06f2f KM |
565 | /* Generate output filename */ |
566 | strcpy(ofname, *fileptr); | |
567 | #ifndef BSD4_2 /* Short filenames */ | |
568 | if ((cp=rindex(ofname,'/')) != NULL) cp++; | |
569 | else cp = ofname; | |
570 | if (strlen(cp) > 12) { | |
571 | fprintf(stderr,"%s: filename too long to tack on .Z\n",cp); | |
572 | continue; | |
573 | } | |
574 | #endif BSD4_2 /* Long filenames allowed */ | |
575 | strcat(ofname, ".Z"); | |
576 | } | |
577 | /* Check for overwrite of existing file */ | |
578 | if (overwrite == 0 && zcat_flg == 0) { | |
579 | if (stat(ofname, &statbuf) == 0) { | |
580 | char response[2]; | |
581 | response[0] = 'n'; | |
582 | fprintf(stderr, "%s already exists;", ofname); | |
583 | if (foreground()) { | |
a2de8fe3 | 584 | fprintf(stderr, " do you wish to overwrite %s (y or n)? ", |
64d06f2f KM |
585 | ofname); |
586 | fflush(stderr); | |
587 | read(2, response, 2); | |
588 | while (response[1] != '\n') { | |
589 | if (read(2, response+1, 1) < 0) { /* Ack! */ | |
590 | perror("stderr"); break; | |
591 | } | |
592 | } | |
593 | } | |
594 | if (response[0] != 'y') { | |
595 | fprintf(stderr, "\tnot overwritten\n"); | |
596 | continue; | |
597 | } | |
598 | } | |
599 | } | |
600 | if(zcat_flg == 0) { /* Open output file */ | |
601 | if (freopen(ofname, "w", stdout) == NULL) { | |
602 | perror(ofname); | |
603 | continue; | |
604 | } | |
605 | if(!quiet) | |
606 | fprintf(stderr, "%s: ", *fileptr); | |
607 | } | |
608 | ||
609 | /* Actually do the compression/decompression */ | |
610 | if (do_decomp == 0) compress(); | |
611 | #ifndef DEBUG | |
612 | else decompress(); | |
613 | #else DEBUG | |
614 | else if (debug == 0) decompress(); | |
615 | else printcodes(); | |
616 | if (verbose) dump_tab(); | |
617 | #endif DEBUG | |
618 | if(zcat_flg == 0) { | |
619 | copystat(*fileptr, ofname); /* Copy stats */ | |
620 | if(exit_stat || (!quiet)) | |
621 | putc('\n', stderr); | |
622 | } | |
623 | } | |
624 | } else { /* Standard input */ | |
625 | if (do_decomp == 0) { | |
626 | compress(); | |
627 | if(!quiet) | |
628 | putc('\n', stderr); | |
629 | } else { | |
630 | /* Check the magic number */ | |
631 | if (nomagic == 0) { | |
632 | if ((getchar()!=(magic_header[0] & 0xFF)) | |
633 | || (getchar()!=(magic_header[1] & 0xFF))) { | |
634 | fprintf(stderr, "stdin: not in compressed format\n"); | |
635 | exit(1); | |
636 | } | |
637 | maxbits = getchar(); /* set -b from file */ | |
638 | block_compress = maxbits & BLOCK_MASK; | |
639 | maxbits &= BIT_MASK; | |
640 | maxmaxcode = 1 << maxbits; | |
641 | fsize = 100000; /* assume stdin large for USERMEM */ | |
642 | if(maxbits > BITS) { | |
643 | fprintf(stderr, | |
644 | "stdin: compressed with %d bits, can only handle %d bits\n", | |
645 | maxbits, BITS); | |
646 | exit(1); | |
647 | } | |
648 | } | |
649 | #ifndef DEBUG | |
650 | decompress(); | |
651 | #else DEBUG | |
652 | if (debug == 0) decompress(); | |
653 | else printcodes(); | |
654 | if (verbose) dump_tab(); | |
655 | #endif DEBUG | |
656 | } | |
657 | } | |
658 | exit(exit_stat); | |
659 | } | |
660 | ||
661 | static int offset; | |
662 | long int in_count = 1; /* length of input */ | |
663 | long int bytes_out; /* length of compressed output */ | |
664 | long int out_count = 0; /* # of codes output (for debugging) */ | |
665 | ||
666 | #define HOG_CHECK ((count_int) 2000) /* Number of chars to read b4 check */ | |
667 | #define MAX_CACHE ((count_int) 1<<BITS) /* Next line is this constant too */ | |
668 | unsigned short hashcache [1<<BITS]; /* common hash short circuit cache */ | |
669 | count_int cfreq [256]; /* character counts */ | |
670 | #ifndef vax | |
671 | char chog; /* most common character from input */ | |
672 | # define CHOG ' ' /* Assume space is most frequent */ | |
673 | #else | |
674 | int chog; /* char arith slow on VAX */ | |
675 | # define CHOG (int) ' ' /* Assume space is most frequent */ | |
676 | #endif | |
677 | int cstat_flg = 0; /* on after determining char hog */ | |
678 | ||
679 | /* | |
680 | * compress stdin to stdout | |
681 | * | |
682 | * Algorithm: on large machines, for maxbits <= FBITS, use fast direct table | |
683 | * lookup on the prefix code / next character combination. For smaller code | |
684 | * size, use open addressing modular division double hashing (no chaining), ala | |
685 | * Knuth vol. 3, sec. 6.4 Algorithm D, along with G. Knott's relatively-prime | |
686 | * secondary probe. Do block compression with an adaptive reset, whereby the | |
687 | * code table is cleared when the compression ratio decreases, but after the | |
688 | * table fills. The variable-length output codes are re-sized at this point, | |
689 | * and a special CLEAR code is generated for the decompressor. For the | |
690 | * megamemory version, the sparse array is cleared indirectly through a | |
691 | * "shadow" output code history. Late additions: for the hashing code, | |
692 | * construct the table according to file size for noticeable speed improvement | |
693 | * on small files. Also detect and cache codes associated with the most | |
694 | * common character to bypass hash calculation on these codes (a characteristic | |
695 | * of highly-compressable raster images). Please direct questions about this | |
696 | * implementation to ames!jaw. | |
697 | */ | |
698 | ||
699 | ||
700 | compress() { | |
701 | register long fcode; | |
702 | register code_int i = 0; | |
703 | register int c; | |
704 | register code_int ent; | |
705 | register int disp; | |
706 | register code_int hsize_reg; | |
707 | ||
708 | #ifndef COMPATIBLE | |
709 | if (nomagic == 0) { | |
710 | putchar(magic_header[0]); putchar(magic_header[1]); | |
711 | putchar((char)(maxbits | block_compress)); | |
712 | } | |
713 | #endif COMPATIBLE | |
714 | ||
715 | offset = 0; | |
a2de8fe3 | 716 | bytes_out = 3; /* includes 3-byte header mojo */ |
64d06f2f KM |
717 | out_count = 0; |
718 | clear_flg = 0; | |
719 | ratio = 0.0; | |
720 | in_count = 1; | |
721 | checkpoint = CHECK_GAP; | |
722 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
723 | free_ent = ((block_compress) ? FIRST : 256 ); | |
724 | ent = getchar (); | |
725 | ||
726 | #ifdef USERMEM | |
727 | if ( maxbits <= FBITS && (fsize >= 30000) ) { /* use hashing on small files */ | |
728 | ||
a2de8fe3 KM |
729 | if ( nfiles++ > 0 ) /* clear table between files */ |
730 | cl_sparse (); | |
731 | ||
64d06f2f KM |
732 | while ( (c = getchar()) != (unsigned) EOF ) { |
733 | in_count++; | |
734 | fcode = (long) (((long) c << maxbits) + ent); | |
a2de8fe3 | 735 | if ( ftable [fcode] != 0 ) /* test for code in "string" table */ |
64d06f2f KM |
736 | ent = ftable [fcode]; |
737 | else { | |
738 | output ( (code_int) ent ); | |
739 | out_count++; | |
740 | ent = c; | |
741 | if ( free_ent >= maxmaxcode ) { | |
742 | if ( (count_int)in_count < checkpoint || (!block_compress) ) | |
743 | continue; | |
744 | else { | |
a2de8fe3 | 745 | cl_block (); |
64d06f2f KM |
746 | i = 0; |
747 | } | |
748 | } else { /* put code in table */ | |
749 | ftable [fcode] = (short) free_ent++; | |
a2de8fe3 | 750 | fcodemem [i++] = fcode; /* memorize for block compression */ |
64d06f2f KM |
751 | } |
752 | } | |
753 | } | |
754 | goto fin; | |
755 | } | |
756 | #endif USERMEM | |
757 | ||
758 | chog = CHOG; /* assumed character for the hog */ | |
759 | cstat_flg = 0; | |
760 | hsize_reg = hsize; | |
a2de8fe3 | 761 | cl_hash( (count_int) hsize_reg); /* clear hash tables */ |
64d06f2f KM |
762 | |
763 | while ( (c = getchar()) != (unsigned) EOF ) { | |
764 | in_count++; | |
765 | if ( cstat_flg == 0 ) { | |
766 | cfreq [c]++; /* gather frequencies at start of input */ | |
767 | if ( (count_int)in_count > HOG_CHECK ) { | |
768 | cstat_flg = 1; | |
769 | chog = hogtally(); /* compute char hog */ | |
770 | if(chog != CHOG) /* fixup for wrong assumption */ | |
a2de8fe3 | 771 | cl_cache( (count_int) free_ent ); |
64d06f2f KM |
772 | } |
773 | } | |
774 | if ( c == chog ) | |
775 | if ( (i = hashcache [ent]) ) { /* cache -> code */ | |
776 | ent = i; | |
777 | continue; | |
778 | } | |
779 | fcode = (long) (((long) c << maxbits) + ent); | |
a2de8fe3 | 780 | #ifdef SHORT_INT |
64d06f2f | 781 | i = (((c + 12347) * ent) & 077777) % HSIZE; /* avoid 'lrem' call */ |
a2de8fe3 | 782 | #else !SHORT_INT |
64d06f2f | 783 | i = fcode % hsize_reg; /* division hashing */ |
a2de8fe3 | 784 | #endif SHORT_INT |
64d06f2f KM |
785 | |
786 | if ( htab [i] == fcode ) { | |
787 | ent = codetab [i]; | |
788 | continue; | |
789 | } else if ( (long)htab [i] < 0 ) /* empty slot */ | |
790 | goto nomatch; | |
791 | disp = hsize_reg - i; /* secondary hash (G. Knott) */ | |
792 | if ( i == 0 ) | |
793 | disp = 1; | |
794 | probe: | |
795 | if ( (i -= disp) < 0 ) | |
796 | i += hsize_reg; | |
797 | ||
798 | if ( htab [i] == fcode ) { | |
799 | ent = codetab [i]; | |
800 | continue; | |
801 | } | |
802 | if ( (long)htab [i] > 0 ) | |
803 | goto probe; | |
804 | nomatch: | |
805 | output ( (code_int) ent ); | |
806 | out_count++; | |
807 | #ifdef interdata | |
808 | if ( (unsigned) free_ent < (unsigned) maxmaxcode) { | |
809 | #else | |
810 | if ( free_ent < maxmaxcode ) { | |
811 | #endif | |
812 | if ( c == chog ) /* code -> cache */ | |
813 | hashcache [ent] = free_ent; | |
814 | /* code -> hashtable */ | |
815 | codetab [i] = free_ent++; | |
816 | htab [i] = fcode; | |
817 | } | |
818 | else if ( (count_int)in_count >= checkpoint && block_compress ) | |
a2de8fe3 | 819 | cl_block (); |
64d06f2f KM |
820 | ent = c; |
821 | } | |
822 | fin: | |
823 | /* | |
824 | * Put out the final code. | |
825 | */ | |
826 | output( (code_int)ent ); | |
827 | out_count++; | |
828 | output( (code_int)-1 ); | |
829 | ||
830 | /* | |
831 | * Print out stats on stderr | |
832 | */ | |
833 | if(zcat_flg == 0 && !quiet) { | |
834 | #ifdef DEBUG | |
835 | fprintf( stderr, | |
836 | "%ld chars in, %ld codes (%ld bytes) out, compression factor %g\n", | |
837 | in_count, out_count, bytes_out, | |
838 | (double)in_count / (double)bytes_out ); | |
839 | fprintf( stderr, "\tCompression as in compact: %5.2f%%\n", | |
840 | 100.0 * ( in_count - bytes_out ) / (double) in_count ); | |
841 | fprintf( stderr, "\tLargest code was %d (%d bits)\n", free_ent - 1, n_bits ); | |
842 | #else DEBUG | |
843 | fprintf( stderr, "Compression: %5.2f%%", | |
844 | 100.0 * ( in_count - bytes_out ) / (double) in_count ); | |
845 | #endif DEBUG | |
846 | } | |
847 | if(bytes_out > in_count) /* exit(2) if no savings */ | |
848 | exit_stat = 2; | |
849 | return; | |
850 | } | |
851 | ||
852 | /***************************************************************** | |
853 | * TAG( output ) | |
854 | * | |
855 | * Output the given code. | |
856 | * Inputs: | |
857 | * code: A n_bits-bit integer. If == -1, then EOF. This assumes | |
858 | * that n_bits =< (long)wordsize - 1. | |
859 | * Outputs: | |
860 | * Outputs code to the file. | |
861 | * Assumptions: | |
862 | * Chars are 8 bits long. | |
863 | * Algorithm: | |
864 | * Maintain a BITS character long buffer (so that 8 codes will | |
865 | * fit in it exactly). Use the VAX insv instruction to insert each | |
866 | * code in turn. When the buffer fills up empty it and start over. | |
867 | */ | |
868 | ||
869 | static char buf[BITS]; | |
870 | ||
871 | #ifndef vax | |
872 | char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; | |
873 | char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; | |
874 | #endif !vax | |
875 | ||
876 | output( code ) | |
877 | code_int code; | |
878 | { | |
879 | #ifdef DEBUG | |
880 | static int col = 0; | |
881 | #endif DEBUG | |
882 | ||
883 | /* | |
884 | * On the VAX, it is important to have the register declarations | |
885 | * in exactly the order given, or the asm will break. | |
886 | */ | |
887 | register int r_off = offset, bits= n_bits; | |
888 | register char * bp = buf; | |
889 | ||
890 | if ( code >= 0 ) { | |
891 | #ifdef vax | |
a2de8fe3 | 892 | /* VAX DEPENDENT!! Implementation on other machines may be difficult. |
64d06f2f KM |
893 | * |
894 | * Translation: Insert BITS bits from the argument starting at | |
895 | * offset bits from the beginning of buf. | |
896 | */ | |
897 | 0; /* C compiler bug ?? */ | |
898 | asm( "insv 4(ap),r11,r10,(r9)" ); | |
899 | #else not a vax | |
a2de8fe3 KM |
900 | /* |
901 | * byte/bit numbering on the VAX is simulated by the following code | |
902 | */ | |
64d06f2f KM |
903 | /* |
904 | * Get to the first byte. | |
905 | */ | |
906 | bp += (r_off >> 3); | |
907 | r_off &= 7; | |
908 | /* | |
909 | * Since code is always >= 8 bits, only need to mask the first | |
910 | * hunk on the left. | |
911 | */ | |
912 | *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; | |
913 | bp++; | |
914 | bits -= (8 - r_off); | |
915 | code >>= 8 - r_off; | |
916 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
917 | if ( bits >= 8 ) { | |
918 | *bp++ = code; | |
919 | code >>= 8; | |
920 | bits -= 8; | |
921 | } | |
922 | /* Last bits. */ | |
923 | if(bits) | |
924 | *bp = code; | |
925 | #endif vax | |
926 | offset += n_bits; | |
927 | if ( offset == (n_bits << 3) ) { | |
928 | bp = buf; | |
929 | bits = n_bits; | |
930 | bytes_out += bits; | |
931 | do | |
932 | putchar(*bp++); | |
933 | while(--bits); | |
934 | if (ferror(stdout)) | |
935 | writeerr(); | |
936 | offset = 0; | |
937 | } | |
938 | #ifdef DEBUG | |
939 | if ( verbose ) | |
940 | fprintf( stderr, "%5d%c", code, | |
941 | (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); | |
942 | #endif DEBUG | |
943 | ||
944 | /* | |
945 | * If the next entry is going to be too big for the code size, | |
946 | * then increase it, if possible. | |
947 | */ | |
948 | if ( free_ent > maxcode || (clear_flg > 0)) | |
949 | { | |
950 | /* | |
951 | * Write the whole buffer, because the input side won't | |
952 | * discover the size increase until after it has read it. | |
953 | */ | |
954 | if ( offset > 0 ) { | |
955 | if( fwrite( buf, 1, n_bits, stdout ) != n_bits) | |
956 | writeerr(); | |
957 | bytes_out += n_bits; | |
958 | } | |
959 | offset = 0; | |
960 | ||
961 | if ( clear_flg ) { | |
962 | maxcode = MAXCODE (n_bits = INIT_BITS); | |
963 | clear_flg = 0; | |
964 | } | |
965 | else { | |
966 | n_bits++; | |
967 | if ( n_bits == maxbits ) | |
968 | maxcode = maxmaxcode; | |
969 | else | |
970 | maxcode = MAXCODE(n_bits); | |
971 | } | |
972 | #ifdef DEBUG | |
973 | if ( debug ) { | |
974 | fprintf( stderr, "\nChange to %d bits\n", n_bits ); | |
975 | col = 0; | |
976 | } | |
977 | #endif DEBUG | |
978 | } | |
979 | } else { | |
980 | /* | |
981 | * At EOF, write the rest of the buffer. | |
982 | */ | |
983 | if ( offset > 0 ) | |
984 | fwrite( buf, 1, (offset + 7) / 8, stdout ); | |
985 | bytes_out += (offset + 7) / 8; | |
986 | offset = 0; | |
987 | fflush( stdout ); | |
988 | #ifdef DEBUG | |
989 | if ( verbose ) | |
990 | fprintf( stderr, "\n" ); | |
991 | #endif DEBUG | |
992 | if( ferror( stdout ) ) | |
993 | writeerr(); | |
994 | } | |
995 | } | |
996 | ||
a2de8fe3 KM |
997 | #ifdef SMALL_STACK |
998 | char_type stack[MAXSTACK]; | |
999 | #endif | |
1000 | ||
64d06f2f | 1001 | decompress() { |
a2de8fe3 | 1002 | register char_type *stackp; |
64d06f2f | 1003 | register int finchar; |
a2de8fe3 KM |
1004 | register code_int code, oldcode, incode; |
1005 | ||
1006 | #ifndef SMALL_STACK | |
1007 | char_type stack[MAXSTACK]; | |
1008 | #endif | |
64d06f2f KM |
1009 | |
1010 | /* | |
1011 | * As above, initialize the first 256 entries in the table. | |
1012 | */ | |
1013 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
1014 | for ( code = 255; code >= 0; code-- ) { | |
1015 | tab_prefix[code] = 0; | |
1016 | tab_suffix[code] = (char_type)code; | |
1017 | } | |
1018 | free_ent = ((block_compress) ? FIRST : 256 ); | |
1019 | ||
1020 | finchar = oldcode = getcode(); | |
1021 | putchar( (char)finchar ); /* first code must be 8 bits = char */ | |
a2de8fe3 | 1022 | stackp = &stack[0]; |
64d06f2f KM |
1023 | |
1024 | while ( (code = getcode()) != -1 ) { | |
1025 | ||
1026 | if ( (code == CLEAR) && block_compress ) { | |
a2de8fe3 KM |
1027 | for ( code = 255; code >= 0; code-- ) |
1028 | tab_prefix[code] = 0; | |
64d06f2f KM |
1029 | clear_flg = 1; |
1030 | free_ent = FIRST - 1; | |
1031 | if ( (code = getcode ()) == -1 ) /* O, untimely death! */ | |
1032 | break; | |
1033 | } | |
1034 | incode = code; | |
1035 | /* | |
1036 | * Special case for KwKwK string. | |
1037 | */ | |
1038 | if ( code >= free_ent ) { | |
a2de8fe3 | 1039 | *stackp++ = finchar; |
64d06f2f KM |
1040 | code = oldcode; |
1041 | } | |
1042 | ||
1043 | /* | |
1044 | * Generate output characters in reverse order | |
1045 | */ | |
1046 | #ifdef interdata | |
1047 | while ( ((unsigned long)code) >= ((unsigned long)256) ) { | |
1048 | #else !interdata | |
1049 | while ( code >= 256 ) { | |
1050 | #endif interdata | |
a2de8fe3 | 1051 | *stackp++ = tab_suffix[code]; |
64d06f2f KM |
1052 | code = tab_prefix[code]; |
1053 | } | |
a2de8fe3 | 1054 | *stackp++ = finchar = tab_suffix[code]; |
64d06f2f KM |
1055 | |
1056 | /* | |
1057 | * And put them out in forward order | |
1058 | */ | |
a2de8fe3 KM |
1059 | while ( --stackp > &stack[0] ) |
1060 | putchar ( *stackp ); | |
1061 | putchar ( *stackp ); | |
64d06f2f KM |
1062 | if (ferror(stdout)) |
1063 | writeerr ( ); | |
64d06f2f KM |
1064 | |
1065 | /* | |
1066 | * Generate the new entry. | |
1067 | */ | |
1068 | if ( (code=free_ent) < maxmaxcode ) { | |
1069 | tab_prefix[code] = (unsigned short)oldcode; | |
1070 | tab_suffix[code] = finchar; | |
1071 | free_ent = code+1; | |
1072 | } | |
1073 | /* | |
1074 | * Remember previous code. | |
1075 | */ | |
1076 | oldcode = incode; | |
1077 | } | |
1078 | fflush( stdout ); | |
1079 | if(ferror(stdout)) | |
1080 | writeerr(); | |
1081 | } | |
1082 | ||
64d06f2f KM |
1083 | /***************************************************************** |
1084 | * TAG( getcode ) | |
1085 | * | |
1086 | * Read one code from the standard input. If EOF, return -1. | |
1087 | * Inputs: | |
1088 | * stdin | |
1089 | * Outputs: | |
1090 | * code or -1 is returned. | |
1091 | */ | |
1092 | ||
1093 | code_int | |
1094 | getcode() { | |
1095 | /* | |
1096 | * On the VAX, it is important to have the register declarations | |
1097 | * in exactly the order given, or the asm will break. | |
1098 | */ | |
1099 | register code_int code; | |
1100 | static int offset = 0, size = 0; | |
1101 | static char_type buf[BITS]; | |
1102 | register int r_off, bits; | |
1103 | register char_type *bp = buf; | |
1104 | ||
1105 | if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { | |
1106 | /* | |
1107 | * If the next entry will be too big for the current code | |
1108 | * size, then we must increase the size. This implies reading | |
1109 | * a new buffer full, too. | |
1110 | */ | |
1111 | if ( free_ent > maxcode ) { | |
1112 | n_bits++; | |
1113 | if ( n_bits == maxbits ) | |
1114 | maxcode = maxmaxcode; /* won't get any bigger now */ | |
1115 | else | |
1116 | maxcode = MAXCODE(n_bits); | |
1117 | } | |
1118 | if ( clear_flg > 0) { | |
1119 | maxcode = MAXCODE (n_bits = INIT_BITS); | |
1120 | clear_flg = 0; | |
1121 | } | |
1122 | size = fread( buf, 1, n_bits, stdin ); | |
1123 | if ( size <= 0 ) | |
1124 | return -1; /* end of file */ | |
1125 | offset = 0; | |
1126 | /* Round size down to integral number of codes */ | |
1127 | size = (size << 3) - (n_bits - 1); | |
1128 | } | |
1129 | r_off = offset; | |
1130 | bits = n_bits; | |
1131 | #ifdef vax | |
1132 | asm( "extzv r10,r9,(r8),r11" ); | |
1133 | #else not a vax | |
1134 | /* | |
1135 | * Get to the first byte. | |
1136 | */ | |
1137 | bp += (r_off >> 3); | |
1138 | r_off &= 7; | |
1139 | /* Get first part (low order bits) */ | |
1140 | #ifdef NO_UCHAR | |
1141 | code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; | |
1142 | #else NO_UCHAR | |
1143 | code = (*bp++ >> r_off); | |
1144 | #endif NO_UCHAR | |
1145 | bits -= (8 - r_off); | |
1146 | r_off = 8 - r_off; /* now, offset into code word */ | |
1147 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
1148 | if ( bits >= 8 ) { | |
1149 | #ifdef NO_UCHAR | |
1150 | code |= (*bp++ & 0xff) << r_off; | |
1151 | #else NO_UCHAR | |
1152 | code |= *bp++ << r_off; | |
1153 | #endif NO_UCHAR | |
1154 | r_off += 8; | |
1155 | bits -= 8; | |
1156 | } | |
1157 | /* high order bits. */ | |
1158 | code |= (*bp & rmask[bits]) << r_off; | |
1159 | #endif vax | |
1160 | offset += n_bits; | |
1161 | ||
1162 | return code; | |
1163 | } | |
1164 | ||
1165 | char * | |
1166 | rindex(s, c) /* For those who don't have it in libc.a */ | |
1167 | register char *s, c; | |
1168 | { | |
1169 | char *p; | |
1170 | for (p = NULL; *s; s++) | |
1171 | if (*s == c) | |
1172 | p = s; | |
1173 | return(p); | |
1174 | } | |
1175 | ||
1176 | #ifdef DEBUG | |
1177 | printcodes() | |
1178 | { | |
1179 | /* | |
a2de8fe3 | 1180 | * Just print out codes from input file. For debugging. |
64d06f2f KM |
1181 | */ |
1182 | code_int code; | |
1183 | int col = 0, bits; | |
1184 | ||
1185 | bits = n_bits = INIT_BITS; | |
1186 | maxcode = MAXCODE(n_bits); | |
1187 | free_ent = ((block_compress) ? FIRST : 256 ); | |
1188 | while ( ( code = getcode() ) >= 0 ) { | |
1189 | if ( (code == CLEAR) && block_compress ) { | |
1190 | free_ent = FIRST - 1; | |
1191 | clear_flg = 1; | |
1192 | } | |
1193 | else if ( free_ent < maxmaxcode ) | |
1194 | free_ent++; | |
1195 | if ( bits != n_bits ) { | |
1196 | fprintf(stderr, "\nChange to %d bits\n", n_bits ); | |
1197 | bits = n_bits; | |
1198 | col = 0; | |
1199 | } | |
1200 | fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); | |
1201 | } | |
1202 | putc( '\n', stderr ); | |
1203 | exit( 0 ); | |
1204 | } | |
1205 | ||
1206 | dump_tab() /* dump string table */ | |
1207 | { | |
1208 | register int i; | |
1209 | register ent; | |
1210 | char stack[4 * MAXSTACK]; /* \nnn makes it 4 times bigger */ | |
1211 | int stack_top = 4 * MAXSTACK; | |
1212 | ||
1213 | for ( i = 0; i < free_ent; i++ ) { | |
1214 | ent = i; | |
1215 | if ( isascii(tab_suffix[ent]) && isprint(tab_suffix[ent]) ) | |
1216 | fprintf( stderr, "%5d: %5d/'%c' \"", | |
1217 | ent, tab_prefix[ent], tab_suffix[ent] ); | |
1218 | else | |
1219 | fprintf( stderr, "%5d: %5d/\\%03o \"", | |
1220 | ent, tab_prefix[ent], tab_suffix[ent] ); | |
1221 | stack[--stack_top] = '\n'; | |
1222 | stack[--stack_top] = '"'; | |
1223 | for ( ; ent != NULL; | |
1224 | ent = (ent >= FIRST ? tab_prefix[ent] : NULL) ) { | |
1225 | if ( isascii(tab_suffix[ent]) && isprint(tab_suffix[ent]) ) | |
1226 | stack[--stack_top] = tab_suffix[ent]; | |
1227 | else { | |
1228 | switch( tab_suffix[ent] ) { | |
1229 | case '\n': stack[--stack_top] = 'n'; break; | |
1230 | case '\t': stack[--stack_top] = 't'; break; | |
1231 | case '\b': stack[--stack_top] = 'b'; break; | |
1232 | case '\f': stack[--stack_top] = 'f'; break; | |
1233 | case '\r': stack[--stack_top] = 'r'; break; | |
1234 | default: | |
1235 | stack[--stack_top] = '0' + tab_suffix[ent] % 8; | |
1236 | stack[--stack_top] = '0' + (tab_suffix[ent] / 8) % 8; | |
1237 | stack[--stack_top] = '0' + tab_suffix[ent] / 64; | |
1238 | break; | |
1239 | } | |
1240 | stack[--stack_top] = '\\'; | |
1241 | } | |
1242 | } | |
1243 | fwrite( &stack[stack_top], 1, 4 * MAXSTACK - stack_top, stderr ); | |
1244 | stack_top = 4 * MAXSTACK; | |
1245 | } | |
1246 | } | |
1247 | #endif DEBUG | |
1248 | ||
1249 | /***************************************************************** | |
1250 | * TAG( writeerr ) | |
1251 | * | |
1252 | * Exits with a message. We only check for write errors often enough | |
1253 | * to avoid a lot of "file system full" messages, not on every write. | |
1254 | * ferror() check after fflush will catch any others (I trust). | |
1255 | * | |
1256 | */ | |
1257 | ||
1258 | writeerr() | |
1259 | { | |
1260 | perror ( ofname ); | |
1261 | unlink ( ofname ); | |
1262 | exit ( 1 ); | |
1263 | } | |
1264 | ||
1265 | copystat(ifname, ofname) | |
1266 | char *ifname, *ofname; | |
1267 | { | |
1268 | struct stat statbuf; | |
1269 | int mode; | |
1270 | time_t timep[2]; | |
1271 | ||
1272 | fclose(stdout); | |
1273 | if (stat(ifname, &statbuf)) { /* Get stat on input file */ | |
1274 | perror(ifname); | |
1275 | return; | |
1276 | } | |
1277 | if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { | |
1278 | if(quiet) | |
a2de8fe3 | 1279 | fprintf(stderr, "%s: ", ifname); |
64d06f2f KM |
1280 | fprintf(stderr, " -- not a regular file: unchanged"); |
1281 | exit_stat = 1; | |
1282 | } else if (statbuf.st_nlink > 1) { | |
1283 | if(quiet) | |
a2de8fe3 | 1284 | fprintf(stderr, "%s: ", ifname); |
64d06f2f KM |
1285 | fprintf(stderr, " -- has %d other links: unchanged", |
1286 | statbuf.st_nlink - 1); | |
1287 | exit_stat = 1; | |
a2de8fe3 | 1288 | } else if (exit_stat == 2 && (!force) && !quiet) { /* No compression: remove file.Z */ |
64d06f2f KM |
1289 | fprintf(stderr, " -- file unchanged"); |
1290 | } else { /* ***** Successful Compression ***** */ | |
1291 | exit_stat = 0; | |
1292 | mode = statbuf.st_mode & 07777; | |
1293 | if (chmod(ofname, mode)) /* Copy modes */ | |
1294 | perror(ofname); | |
1295 | chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ | |
1296 | timep[0] = statbuf.st_atime; | |
1297 | timep[1] = statbuf.st_mtime; | |
1298 | utime(ofname, timep); /* Update last accessed and modified times */ | |
1299 | if (unlink(ifname)) /* Remove input file */ | |
1300 | perror(ifname); | |
1301 | if(!quiet) | |
1302 | fprintf(stderr, " -- replaced with %s", ofname); | |
1303 | return; /* Successful return */ | |
1304 | } | |
1305 | ||
1306 | /* Unsuccessful return -- one of the tests failed */ | |
1307 | if (unlink(ofname)) | |
1308 | perror(ofname); | |
1309 | } | |
1310 | /* | |
1311 | * This routine returns 1 if we are running in the foreground and stderr | |
1312 | * is a tty. | |
1313 | */ | |
1314 | foreground() | |
1315 | { | |
1316 | if(bgnd_flag) { /* background? */ | |
1317 | return(0); | |
1318 | } else { /* foreground */ | |
1319 | if(isatty(2)) { /* and stderr is a tty */ | |
1320 | return(1); | |
1321 | } else { | |
1322 | return(0); | |
1323 | } | |
1324 | } | |
1325 | } | |
1326 | ||
1327 | onintr ( ) | |
1328 | { | |
1329 | unlink ( ofname ); | |
1330 | exit ( 1 ); | |
1331 | } | |
1332 | ||
a2de8fe3 | 1333 | cl_block () /* table clear for block compress */ |
64d06f2f | 1334 | { |
64d06f2f KM |
1335 | #ifdef DEBUG |
1336 | if(debug) | |
1337 | fprintf ( stderr, "count: %ld ratio: %f\n", in_count, | |
1338 | (double) in_count / (double) bytes_out ); | |
1339 | #endif DEBUG | |
1340 | ||
1341 | checkpoint = in_count + CHECK_GAP; | |
1342 | if ( (double) in_count / (double) bytes_out > ratio ) | |
1343 | ratio = (double) in_count / (double) bytes_out; | |
1344 | else { | |
1345 | ratio = 0.0; | |
1346 | #ifdef USERMEM | |
1347 | if ( maxbits <= FBITS ) /* sparse array clear */ | |
a2de8fe3 | 1348 | cl_sparse (); |
64d06f2f KM |
1349 | else |
1350 | #endif USERMEM /* hash table clear */ | |
1351 | { | |
a2de8fe3 | 1352 | cl_hash ( (count_int) hsize ); |
64d06f2f KM |
1353 | } |
1354 | free_ent = FIRST; | |
1355 | clear_flg = 1; | |
1356 | output ( (code_int) CLEAR ); | |
1357 | #ifdef DEBUG | |
1358 | if(debug) | |
1359 | fprintf ( stderr, "clear\n" ); | |
1360 | #endif DEBUG | |
1361 | } | |
1362 | } | |
1363 | ||
a2de8fe3 | 1364 | cl_cache ( n ) /* clear hash cache */ |
64d06f2f KM |
1365 | register count_int n; /* clear at least this many entries */ |
1366 | { | |
1367 | register count_int i; | |
1368 | register unsigned short *hash_p; | |
1369 | register unsigned short zero = 0; | |
64d06f2f | 1370 | |
a2de8fe3 | 1371 | if ( nfiles++ == 0 ) /* no clear needed if first time */ |
64d06f2f KM |
1372 | return; |
1373 | n = (n+15) & (-16); | |
1374 | hash_p = hashcache + n; | |
a2de8fe3 | 1375 | for ( i = n; i > 0; i -=16 ) { /* can do BSD bzero(3) or Sys V memset(3) */ |
64d06f2f KM |
1376 | *(hash_p-16) = zero; |
1377 | *(hash_p-15) = zero; | |
1378 | *(hash_p-14) = zero; | |
1379 | *(hash_p-13) = zero; | |
1380 | *(hash_p-12) = zero; | |
1381 | *(hash_p-11) = zero; | |
1382 | *(hash_p-10) = zero; | |
1383 | *(hash_p-9) = zero; | |
1384 | *(hash_p-8) = zero; | |
1385 | *(hash_p-7) = zero; | |
1386 | *(hash_p-6) = zero; | |
1387 | *(hash_p-5) = zero; | |
1388 | *(hash_p-4) = zero; | |
1389 | *(hash_p-3) = zero; | |
1390 | *(hash_p-2) = zero; | |
1391 | *(hash_p-1) = zero; | |
1392 | hash_p -= 16; | |
1393 | } | |
1394 | } | |
1395 | ||
1396 | hogtally () /* compute character code hog */ | |
1397 | { | |
1398 | register int i, most; | |
1399 | ||
1400 | for ( i = most = 0; i < 256; i++ ) | |
1401 | if ( cfreq [i] >= cfreq [most] ) | |
1402 | most = i; | |
1403 | return ( most ); | |
1404 | } | |
1405 | ||
a2de8fe3 KM |
1406 | cl_hash(hsize) /* clear hash cache, re-init code table */ |
1407 | register count_int hsize; | |
64d06f2f KM |
1408 | { |
1409 | register count_int *htab_p = htab+hsize; | |
a2de8fe3 | 1410 | register count_int i; |
64d06f2f KM |
1411 | register long m1 = -1; |
1412 | ||
a2de8fe3 | 1413 | cl_cache( min((count_int)hsize, MAX_CACHE) ); |
64d06f2f KM |
1414 | |
1415 | i = hsize - 16; | |
1416 | do { | |
1417 | *(htab_p-16) = m1; | |
1418 | *(htab_p-15) = m1; | |
1419 | *(htab_p-14) = m1; | |
1420 | *(htab_p-13) = m1; | |
1421 | *(htab_p-12) = m1; | |
1422 | *(htab_p-11) = m1; | |
1423 | *(htab_p-10) = m1; | |
1424 | *(htab_p-9) = m1; | |
1425 | *(htab_p-8) = m1; | |
1426 | *(htab_p-7) = m1; | |
1427 | *(htab_p-6) = m1; | |
1428 | *(htab_p-5) = m1; | |
1429 | *(htab_p-4) = m1; | |
1430 | *(htab_p-3) = m1; | |
1431 | *(htab_p-2) = m1; | |
1432 | *(htab_p-1) = m1; | |
1433 | htab_p -= 16; | |
1434 | } while ((i -= 16) >= 0); | |
1435 | for ( i += 16; i > 0; i-- ) | |
1436 | *--htab_p = m1; | |
1437 | } | |
a2de8fe3 KM |
1438 | |
1439 | #ifdef USERMEM | |
1440 | cl_sparse ( ) /* clear sparse table indirectly thru "shadow" array */ | |
1441 | { | |
1442 | register code_int i; | |
1443 | ||
1444 | for ( i = (1 << maxbits) - 1; i >= 0; i-- ) | |
1445 | ftable [fcodemem [i]] = 0; | |
1446 | } | |
1447 | #endif USERMEM |