Commit | Line | Data |
---|---|---|
64d06f2f | 1 | #ifndef lint |
c8f1fe84 | 2 | static char sccsid[] = "@(#)compress.c @(#)compress.c 5.9 (Berkeley) %G%"; |
64d06f2f KM |
3 | #endif not lint |
4 | ||
4eeb10d4 JL |
5 | /* |
6 | * Compress - data compression program | |
7 | */ | |
a2de8fe3 KM |
8 | #define min(a,b) ((a>b) ? b : a) |
9 | ||
64d06f2f | 10 | /* |
a2de8fe3 KM |
11 | * machine variants which require cc -Dmachine: pdp11, z8000, pcxt |
12 | */ | |
13 | ||
4eeb10d4 JL |
14 | /* |
15 | * Set USERMEM to the maximum amount of physical user memory available | |
64d06f2f | 16 | * in bytes. USERMEM is used to determine the maximum BITS that can be used |
4eeb10d4 | 17 | * for compression. |
64d06f2f KM |
18 | * |
19 | * SACREDMEM is the amount of physical memory saved for others; compress | |
20 | * will hog the rest. | |
21 | */ | |
22 | #ifndef SACREDMEM | |
23 | #define SACREDMEM 0 | |
24 | #endif | |
25 | ||
a2de8fe3 | 26 | #ifndef USERMEM |
4eeb10d4 JL |
27 | # define USERMEM 450000 /* default user memory */ |
28 | #endif | |
29 | ||
30 | #ifdef interdata /* (Perkin-Elmer) */ | |
31 | #define SIGNED_COMPARE_SLOW /* signed compare is slower than unsigned */ | |
a2de8fe3 KM |
32 | #endif |
33 | ||
64d06f2f KM |
34 | #ifdef pdp11 |
35 | # define BITS 12 /* max bits/code for 16-bit machine */ | |
36 | # define NO_UCHAR /* also if "unsigned char" functions as signed char */ | |
37 | # undef USERMEM | |
4eeb10d4 | 38 | #endif /* pdp11 */ /* don't forget to compile with -i */ |
a2de8fe3 KM |
39 | |
40 | #ifdef z8000 | |
41 | # define BITS 12 | |
a2de8fe3 KM |
42 | # undef vax /* weird preprocessor */ |
43 | # undef USERMEM | |
4eeb10d4 | 44 | #endif /* z8000 */ |
a2de8fe3 KM |
45 | |
46 | #ifdef pcxt | |
47 | # define BITS 12 | |
a2de8fe3 | 48 | # undef USERMEM |
4eeb10d4 | 49 | #endif /* pcxt */ |
64d06f2f KM |
50 | |
51 | #ifdef USERMEM | |
4eeb10d4 JL |
52 | # if USERMEM >= (433484+SACREDMEM) |
53 | # define PBITS 16 | |
54 | # else | |
55 | # if USERMEM >= (229600+SACREDMEM) | |
56 | # define PBITS 15 | |
57 | # else | |
58 | # if USERMEM >= (127536+SACREDMEM) | |
59 | # define PBITS 14 | |
60 | # else | |
61 | # if USERMEM >= (73464+SACREDMEM) | |
62 | # define PBITS 13 | |
64d06f2f | 63 | # else |
4eeb10d4 | 64 | # define PBITS 12 |
64d06f2f | 65 | # endif |
4eeb10d4 JL |
66 | # endif |
67 | # endif | |
68 | # endif | |
69 | # undef USERMEM | |
70 | #endif /* USERMEM */ | |
64d06f2f KM |
71 | |
72 | #ifdef PBITS /* Preferred BITS for this memory size */ | |
73 | # ifndef BITS | |
74 | # define BITS PBITS | |
75 | # endif BITS | |
4eeb10d4 | 76 | #endif /* PBITS */ |
64d06f2f KM |
77 | |
78 | #if BITS == 16 | |
79 | # define HSIZE 69001 /* 95% occupancy */ | |
80 | #endif | |
81 | #if BITS == 15 | |
82 | # define HSIZE 35023 /* 94% occupancy */ | |
83 | #endif | |
84 | #if BITS == 14 | |
85 | # define HSIZE 18013 /* 91% occupancy */ | |
86 | #endif | |
87 | #if BITS == 13 | |
88 | # define HSIZE 9001 /* 91% occupancy */ | |
89 | #endif | |
a2de8fe3 | 90 | #if BITS <= 12 |
64d06f2f KM |
91 | # define HSIZE 5003 /* 80% occupancy */ |
92 | #endif | |
4eeb10d4 JL |
93 | |
94 | #ifdef M_XENIX /* Stupid compiler can't handle arrays with */ | |
95 | # if BITS == 16 /* more than 65535 bytes - so we fake it */ | |
96 | # define XENIX_16 | |
97 | # else | |
98 | # if BITS > 13 /* Code only handles BITS = 12, 13, or 16 */ | |
99 | # define BITS 13 | |
100 | # endif | |
101 | # endif | |
102 | #endif | |
64d06f2f KM |
103 | |
104 | /* | |
105 | * a code_int must be able to hold 2**BITS values of type int, and also -1 | |
106 | */ | |
107 | #if BITS > 15 | |
108 | typedef long int code_int; | |
109 | #else | |
110 | typedef int code_int; | |
111 | #endif | |
112 | ||
4eeb10d4 | 113 | #ifdef SIGNED_COMPARE_SLOW |
64d06f2f KM |
114 | typedef unsigned long int count_int; |
115 | typedef unsigned short int count_short; | |
116 | #else | |
117 | typedef long int count_int; | |
118 | #endif | |
119 | ||
120 | #ifdef NO_UCHAR | |
121 | typedef char char_type; | |
4eeb10d4 | 122 | #else |
64d06f2f | 123 | typedef unsigned char char_type; |
4eeb10d4 | 124 | #endif /* UCHAR */ |
64d06f2f KM |
125 | char_type magic_header[] = { "\037\235" }; /* 1F 9D */ |
126 | ||
127 | /* Defines for third byte of header */ | |
128 | #define BIT_MASK 0x1f | |
129 | #define BLOCK_MASK 0x80 | |
130 | /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is | |
131 | a fourth header byte (for expansion). | |
132 | */ | |
133 | #define INIT_BITS 9 /* initial number of bits/code */ | |
134 | ||
135 | /* | |
a2de8fe3 | 136 | * compress.c - File compression ala IEEE Computer, June 1984. |
64d06f2f | 137 | * |
c8f1fe84 | 138 | * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) |
64d06f2f KM |
139 | * Jim McKie (decvax!mcvax!jim) |
140 | * Steve Davies (decvax!vax135!petsd!peora!srd) | |
141 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) | |
142 | * James A. Woods (decvax!ihnp4!ames!jaw) | |
143 | * Joe Orost (decvax!vax135!petsd!joe) | |
144 | * | |
4eeb10d4 | 145 | * $Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $ |
64d06f2f | 146 | * $Log: compress.c,v $ |
4eeb10d4 JL |
147 | * Revision 4.0 85/07/30 12:50:00 joe |
148 | * Removed ferror() calls in output routine on every output except first. | |
149 | * Prepared for release to the world. | |
150 | * | |
151 | * Revision 3.6 85/07/04 01:22:21 joe | |
152 | * Remove much wasted storage by overlaying hash table with the tables | |
153 | * used by decompress: tab_suffix[1<<BITS], stack[8000]. Updated USERMEM | |
154 | * computations. Fixed dump_tab() DEBUG routine. | |
155 | * | |
156 | * Revision 3.5 85/06/30 20:47:21 jaw | |
157 | * Change hash function to use exclusive-or. Rip out hash cache. These | |
158 | * speedups render the megamemory version defunct, for now. Make decoder | |
159 | * stack global. Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply. | |
160 | * | |
161 | * Revision 3.4 85/06/27 12:00:00 ken | |
162 | * Get rid of all floating-point calculations by doing all compression ratio | |
163 | * calculations in fixed point. | |
164 | * | |
165 | * Revision 3.3 85/06/24 21:53:24 joe | |
166 | * Incorporate portability suggestion for M_XENIX. Got rid of text on #else | |
167 | * and #endif lines. Cleaned up #ifdefs for vax and interdata. | |
168 | * | |
a2de8fe3 KM |
169 | * Revision 3.2 85/06/06 21:53:24 jaw |
170 | * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list. | |
171 | * Default to "quiet" output (no compression statistics). | |
172 | * | |
173 | * Revision 3.1 85/05/12 18:56:13 jaw | |
174 | * Integrate decompress() stack speedups (from early pointer mods by McKie). | |
175 | * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics | |
176 | * of SVR2 'pack'. Streamline block-compress table clear logic. Increase | |
177 | * output byte count by magic number size. | |
178 | * | |
64d06f2f KM |
179 | * Revision 3.0 84/11/27 11:50:00 petsd!joe |
180 | * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled | |
a2de8fe3 | 181 | * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used |
64d06f2f KM |
182 | * unsigned compares on Perkin-Elmer. Fixed foreground check. |
183 | * | |
184 | * Revision 2.7 84/11/16 19:35:39 ames!jaw | |
185 | * Cache common hash codes based on input statistics; this improves | |
186 | * performance for low-density raster images. Pass on #ifdef bundle | |
187 | * from Turkowski. | |
188 | * | |
189 | * Revision 2.6 84/11/05 19:18:21 ames!jaw | |
190 | * Vary size of hash tables to reduce time for small files. | |
191 | * Tune PDP-11 hash function. | |
192 | * | |
193 | * Revision 2.5 84/10/30 20:15:14 ames!jaw | |
194 | * Junk chaining; replace with the simpler (and, on the VAX, faster) | |
195 | * double hashing, discussed within. Make block compression standard. | |
196 | * | |
197 | * Revision 2.4 84/10/16 11:11:11 ames!jaw | |
198 | * Introduce adaptive reset for block compression, to boost the rate | |
199 | * another several percent. (See mailing list notes.) | |
200 | * | |
201 | * Revision 2.3 84/09/22 22:00:00 petsd!joe | |
202 | * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next. | |
203 | * Bug fix for last bits. Changed fwrite to putchar loop everywhere. | |
204 | * | |
205 | * Revision 2.2 84/09/18 14:12:21 ames!jaw | |
206 | * Fold in news changes, small machine typedef from thomas, | |
207 | * #ifdef interdata from joe. | |
208 | * | |
209 | * Revision 2.1 84/09/10 12:34:56 ames!jaw | |
210 | * Configured fast table lookup for 32-bit machines. | |
211 | * This cuts user time in half for b <= FBITS, and is useful for news batching | |
212 | * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and | |
213 | * added signal catcher [plus beef in writeerr()] to delete effluvia. | |
214 | * | |
215 | * Revision 2.0 84/08/28 22:00:00 petsd!joe | |
216 | * Add check for foreground before prompting user. Insert maxbits into | |
217 | * compressed file. Force file being uncompressed to end with ".Z". | |
218 | * Added "-c" flag and "zcat". Prepared for release. | |
219 | * | |
220 | * Revision 1.10 84/08/24 18:28:00 turtlevax!ken | |
221 | * Will only compress regular files (no directories), added a magic number | |
222 | * header (plus an undocumented -n flag to handle old files without headers), | |
223 | * added -f flag to force overwriting of possibly existing destination file, | |
224 | * otherwise the user is prompted for a response. Will tack on a .Z to a | |
225 | * filename if it doesn't have one when decompressing. Will only replace | |
226 | * file if it was compressed. | |
227 | * | |
228 | * Revision 1.9 84/08/16 17:28:00 turtlevax!ken | |
229 | * Removed scanargs(), getopt(), added .Z extension and unlimited number of | |
230 | * filenames to compress. Flags may be clustered (-Ddvb12) or separated | |
231 | * (-D -d -v -b 12), or combination thereof. Modes and other status is | |
232 | * copied with copystat(). -O bug for 4.2 seems to have disappeared with | |
233 | * 1.8. | |
234 | * | |
235 | * Revision 1.8 84/08/09 23:15:00 joe | |
236 | * Made it compatible with vax version, installed jim's fixes/enhancements | |
237 | * | |
238 | * Revision 1.6 84/08/01 22:08:00 joe | |
239 | * Sped up algorithm significantly by sorting the compress chain. | |
240 | * | |
241 | * Revision 1.5 84/07/13 13:11:00 srd | |
242 | * Added C version of vax asm routines. Changed structure to arrays to | |
243 | * save much memory. Do unsigned compares where possible (faster on | |
244 | * Perkin-Elmer) | |
245 | * | |
246 | * Revision 1.4 84/07/05 03:11:11 thomas | |
247 | * Clean up the code a little and lint it. (Lint complains about all | |
248 | * the regs used in the asm, but I'm not going to "fix" this.) | |
249 | * | |
250 | * Revision 1.3 84/07/05 02:06:54 thomas | |
251 | * Minor fixes. | |
252 | * | |
253 | * Revision 1.2 84/07/05 00:27:27 thomas | |
254 | * Add variable bit length output. | |
255 | * | |
256 | */ | |
4eeb10d4 | 257 | static char rcs_ident[] = "$Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $"; |
64d06f2f KM |
258 | |
259 | #include <stdio.h> | |
260 | #include <ctype.h> | |
261 | #include <signal.h> | |
262 | #include <sys/types.h> | |
263 | #include <sys/stat.h> | |
c8f1fe84 JL |
264 | #ifdef notdef |
265 | #include <sys/ioctl.h> | |
266 | #endif | |
64d06f2f KM |
267 | |
268 | #define ARGVAL() (*++(*argv) || (--argc && *++argv)) | |
269 | ||
270 | int n_bits; /* number of bits/code */ | |
271 | int maxbits = BITS; /* user settable max # bits/code */ | |
272 | code_int maxcode; /* maximum code, given n_bits */ | |
273 | code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ | |
274 | #ifdef COMPATIBLE /* But wrong! */ | |
275 | # define MAXCODE(n_bits) (1 << (n_bits) - 1) | |
4eeb10d4 | 276 | #else |
64d06f2f | 277 | # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) |
4eeb10d4 JL |
278 | #endif /* COMPATIBLE */ |
279 | ||
280 | #ifdef XENIX_16 | |
281 | count_int htab0[8192]; | |
282 | count_int htab1[8192]; | |
283 | count_int htab2[8192]; | |
284 | count_int htab3[8192]; | |
285 | count_int htab4[8192]; | |
286 | count_int htab5[8192]; | |
287 | count_int htab6[8192]; | |
288 | count_int htab7[8192]; | |
289 | count_int htab8[HSIZE-65536]; | |
290 | count_int * htab[9] = { | |
291 | htab0, htab1, htab2, htab3, htab4, htab5, htab6, htab7, htab8 }; | |
292 | ||
293 | #define htabof(i) (htab[(i) >> 13][(i) & 0x1fff]) | |
294 | unsigned short code0tab[16384]; | |
295 | unsigned short code1tab[16384]; | |
296 | unsigned short code2tab[16384]; | |
297 | unsigned short code3tab[16384]; | |
298 | unsigned short code4tab[16384]; | |
299 | unsigned short * codetab[5] = { | |
300 | code0tab, code1tab, code2tab, code3tab, code4tab }; | |
301 | ||
302 | #define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff]) | |
303 | ||
304 | #else /* Normal machine */ | |
c8f1fe84 JL |
305 | |
306 | #ifdef sel /* gould base register braindamage */ | |
307 | /*NOBASE*/ | |
308 | count_int htab [HSIZE]; | |
309 | unsigned short codetab [HSIZE]; | |
310 | /*NOBASE*/ | |
311 | #else | |
64d06f2f KM |
312 | count_int htab [HSIZE]; |
313 | unsigned short codetab [HSIZE]; | |
c8f1fe84 JL |
314 | #endif sel |
315 | ||
4eeb10d4 JL |
316 | #define htabof(i) htab[i] |
317 | #define codetabof(i) codetab[i] | |
318 | #endif /* XENIX_16 */ | |
64d06f2f KM |
319 | code_int hsize = HSIZE; /* for dynamic table sizing */ |
320 | count_int fsize; | |
321 | ||
4eeb10d4 JL |
322 | /* |
323 | * To save much memory, we overlay the table used by compress() with those | |
324 | * used by decompress(). The tab_prefix table is the same size and type | |
325 | * as the codetab. The tab_suffix table needs 2**BITS characters. We | |
326 | * get this from the beginning of htab. The output stack uses the rest | |
327 | * of htab, and contains characters. There is plenty of room for any | |
328 | * possible stack (stack used to be 8000 characters). | |
329 | */ | |
64d06f2f | 330 | |
4eeb10d4 JL |
331 | #define tab_prefixof(i) codetabof(i) |
332 | #ifdef XENIX_16 | |
333 | # define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff] | |
334 | # define de_stack ((char_type *)(htab2)) | |
335 | #else /* Normal machine */ | |
336 | # define tab_suffixof(i) ((char_type *)(htab))[i] | |
337 | # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) | |
338 | #endif /* XENIX_16 */ | |
64d06f2f KM |
339 | |
340 | code_int free_ent = 0; /* first unused entry */ | |
c8f1fe84 JL |
341 | int exit_stat = 0; /* per-file status */ |
342 | int perm_stat = 0; /* permanent status */ | |
64d06f2f KM |
343 | |
344 | code_int getcode(); | |
345 | ||
346 | Usage() { | |
347 | #ifdef DEBUG | |
a2de8fe3 | 348 | fprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n"); |
64d06f2f KM |
349 | } |
350 | int debug = 0; | |
4eeb10d4 | 351 | #else |
bffb9eb4 | 352 | fprintf(stderr,"Usage: compress [-fvc] [-b maxbits] [file ...]\n"); |
64d06f2f | 353 | } |
4eeb10d4 | 354 | #endif /* DEBUG */ |
a2de8fe3 | 355 | int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ |
64d06f2f | 356 | int zcat_flg = 0; /* Write output on stdout, suppress messages */ |
be5873af | 357 | int precious = 1; /* Don't unlink output file on interrupt */ |
a2de8fe3 | 358 | int quiet = 1; /* don't tell me about compression */ |
64d06f2f KM |
359 | |
360 | /* | |
361 | * block compression parameters -- after all codes are used up, | |
362 | * and compression rate changes, start over. | |
363 | */ | |
364 | int block_compress = BLOCK_MASK; | |
365 | int clear_flg = 0; | |
4eeb10d4 | 366 | long int ratio = 0; |
64d06f2f KM |
367 | #define CHECK_GAP 10000 /* ratio check interval */ |
368 | count_int checkpoint = CHECK_GAP; | |
369 | /* | |
370 | * the next two codes should not be changed lightly, as they must not | |
371 | * lie within the contiguous general code space. | |
372 | */ | |
373 | #define FIRST 257 /* first free entry */ | |
374 | #define CLEAR 256 /* table clear output code */ | |
375 | ||
376 | int force = 0; | |
377 | char ofname [100]; | |
378 | #ifdef DEBUG | |
379 | int verbose = 0; | |
4eeb10d4 | 380 | #endif /* DEBUG */ |
c8f1fe84 JL |
381 | int (*oldint)(); |
382 | int bgnd_flag; | |
64d06f2f | 383 | |
4eeb10d4 | 384 | int do_decomp = 0; |
a2de8fe3 | 385 | |
64d06f2f KM |
386 | /***************************************************************** |
387 | * TAG( main ) | |
388 | * | |
389 | * Algorithm from "A Technique for High Performance Data Compression", | |
390 | * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. | |
391 | * | |
a2de8fe3 | 392 | * Usage: compress [-dfvc] [-b bits] [file ...] |
64d06f2f KM |
393 | * Inputs: |
394 | * -d: If given, decompression is done instead. | |
395 | * | |
396 | * -c: Write output on stdout, don't remove original. | |
397 | * | |
398 | * -b: Parameter limits the max number of bits/code. | |
399 | * | |
400 | * -f: Forces output file to be generated, even if one already | |
a2de8fe3 KM |
401 | * exists, and even if no space is saved by compressing. |
402 | * If -f is not used, the user will be prompted if stdin is | |
403 | * a tty, otherwise, the output file will not be overwritten. | |
64d06f2f | 404 | * |
a2de8fe3 | 405 | * -v: Write compression statistics |
64d06f2f KM |
406 | * |
407 | * file ...: Files to be compressed. If none specified, stdin | |
408 | * is used. | |
409 | * Outputs: | |
410 | * file.Z: Compressed form of file with same mode, owner, and utimes | |
411 | * or stdout (if stdin used as input) | |
412 | * | |
413 | * Assumptions: | |
414 | * When filenames are given, replaces with the compressed version | |
a2de8fe3 | 415 | * (.Z suffix) only if the file decreases in size. |
64d06f2f KM |
416 | * Algorithm: |
417 | * Modified Lempel-Ziv method (LZW). Basically finds common | |
418 | * substrings and replaces them with a variable size code. This is | |
419 | * deterministic, and can be done on the fly. Thus, the decompression | |
a2de8fe3 | 420 | * procedure needs no input table, but tracks the way the table was built. |
64d06f2f | 421 | */ |
4eeb10d4 | 422 | |
64d06f2f KM |
423 | main( argc, argv ) |
424 | register int argc; char **argv; | |
425 | { | |
64d06f2f KM |
426 | int overwrite = 0; /* Do not overwrite unless given -f flag */ |
427 | char tempname[100]; | |
428 | char **filelist, **fileptr; | |
a2de8fe3 | 429 | char *cp, *rindex(), *malloc(); |
64d06f2f | 430 | struct stat statbuf; |
4eeb10d4 | 431 | extern onintr(), oops(); |
64d06f2f | 432 | |
c8f1fe84 JL |
433 | /* This bg check only works for sh. */ |
434 | if ( (oldint = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) { | |
64d06f2f | 435 | signal ( SIGINT, onintr ); |
4eeb10d4 JL |
436 | signal ( SIGSEGV, oops ); |
437 | } | |
c8f1fe84 JL |
438 | bgnd_flag = oldint != SIG_DFL; |
439 | #ifdef notdef /* This works for csh but we don't want it. */ | |
440 | { int tgrp; | |
441 | if (bgnd_flag == 0 && ioctl(2, TIOCGPGRP, (char *)&tgrp) == 0 && | |
442 | getpgrp(0) != tgrp) | |
443 | bgnd_flag = 1; | |
444 | } | |
445 | #endif | |
446 | ||
64d06f2f KM |
447 | #ifdef COMPATIBLE |
448 | nomagic = 1; /* Original didn't have a magic number */ | |
4eeb10d4 | 449 | #endif /* COMPATIBLE */ |
64d06f2f KM |
450 | |
451 | filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); | |
452 | *filelist = NULL; | |
453 | ||
454 | if((cp = rindex(argv[0], '/')) != 0) { | |
455 | cp++; | |
456 | } else { | |
457 | cp = argv[0]; | |
458 | } | |
459 | if(strcmp(cp, "uncompress") == 0) { | |
460 | do_decomp = 1; | |
461 | } else if(strcmp(cp, "zcat") == 0) { | |
462 | do_decomp = 1; | |
463 | zcat_flg = 1; | |
464 | } | |
465 | ||
466 | #ifdef BSD4_2 | |
467 | /* 4.2BSD dependent - take it out if not */ | |
468 | setlinebuf( stderr ); | |
4eeb10d4 | 469 | #endif /* BSD4_2 */ |
64d06f2f KM |
470 | |
471 | /* Argument Processing | |
472 | * All flags are optional. | |
473 | * -D => debug | |
4eeb10d4 | 474 | * -V => print Version; debug verbose |
64d06f2f | 475 | * -d => do_decomp |
a2de8fe3 | 476 | * -v => unquiet |
64d06f2f KM |
477 | * -f => force overwrite of output file |
478 | * -n => no header: useful to uncompress old files | |
479 | * -b maxbits => maxbits. If -b is specified, then maxbits MUST be | |
480 | * given also. | |
481 | * -c => cat all output to stdout | |
a2de8fe3 | 482 | * -C => generate output compatible with compress 2.0. |
64d06f2f KM |
483 | * if a string is left, must be an input filename. |
484 | */ | |
485 | for (argc--, argv++; argc > 0; argc--, argv++) { | |
486 | if (**argv == '-') { /* A flag argument */ | |
487 | while (*++(*argv)) { /* Process all flags in this arg */ | |
488 | switch (**argv) { | |
489 | #ifdef DEBUG | |
490 | case 'D': | |
491 | debug = 1; | |
492 | break; | |
a2de8fe3 | 493 | case 'V': |
64d06f2f | 494 | verbose = 1; |
4eeb10d4 JL |
495 | version(); |
496 | break; | |
497 | #else | |
498 | case 'V': | |
499 | version(); | |
64d06f2f | 500 | break; |
4eeb10d4 | 501 | #endif /* DEBUG */ |
a2de8fe3 KM |
502 | case 'v': |
503 | quiet = 0; | |
504 | break; | |
64d06f2f KM |
505 | case 'd': |
506 | do_decomp = 1; | |
507 | break; | |
508 | case 'f': | |
a2de8fe3 | 509 | case 'F': |
64d06f2f | 510 | overwrite = 1; |
a2de8fe3 | 511 | force = 1; |
64d06f2f KM |
512 | break; |
513 | case 'n': | |
514 | nomagic = 1; | |
515 | break; | |
516 | case 'C': | |
517 | block_compress = 0; | |
518 | break; | |
519 | case 'b': | |
520 | if (!ARGVAL()) { | |
521 | fprintf(stderr, "Missing maxbits\n"); | |
522 | Usage(); | |
523 | exit(1); | |
524 | } | |
525 | maxbits = atoi(*argv); | |
526 | goto nextarg; | |
527 | case 'c': | |
528 | zcat_flg = 1; | |
529 | break; | |
530 | case 'q': | |
531 | quiet = 1; | |
532 | break; | |
64d06f2f KM |
533 | default: |
534 | fprintf(stderr, "Unknown flag: '%c'; ", **argv); | |
535 | Usage(); | |
536 | exit(1); | |
537 | } | |
538 | } | |
539 | } | |
540 | else { /* Input file name */ | |
541 | *fileptr++ = *argv; /* Build input file list */ | |
542 | *fileptr = NULL; | |
4eeb10d4 | 543 | /* process nextarg; */ |
64d06f2f KM |
544 | } |
545 | nextarg: continue; | |
546 | } | |
547 | ||
548 | if(maxbits < INIT_BITS) maxbits = INIT_BITS; | |
549 | if (maxbits > BITS) maxbits = BITS; | |
550 | maxmaxcode = 1 << maxbits; | |
551 | ||
552 | if (*filelist != NULL) { | |
553 | for (fileptr = filelist; *fileptr; fileptr++) { | |
554 | exit_stat = 0; | |
c8f1fe84 | 555 | if (do_decomp) { /* DECOMPRESSION */ |
64d06f2f KM |
556 | /* Check for .Z suffix */ |
557 | if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { | |
558 | /* No .Z: tack one on */ | |
559 | strcpy(tempname, *fileptr); | |
560 | strcat(tempname, ".Z"); | |
561 | *fileptr = tempname; | |
562 | } | |
563 | /* Open input file */ | |
564 | if ((freopen(*fileptr, "r", stdin)) == NULL) { | |
c8f1fe84 JL |
565 | perror(*fileptr); |
566 | perm_stat = 1; | |
567 | continue; | |
64d06f2f KM |
568 | } |
569 | /* Check the magic number */ | |
570 | if (nomagic == 0) { | |
571 | if ((getchar() != (magic_header[0] & 0xFF)) | |
572 | || (getchar() != (magic_header[1] & 0xFF))) { | |
573 | fprintf(stderr, "%s: not in compressed format\n", | |
574 | *fileptr); | |
575 | continue; | |
576 | } | |
577 | maxbits = getchar(); /* set -b from file */ | |
578 | block_compress = maxbits & BLOCK_MASK; | |
579 | maxbits &= BIT_MASK; | |
580 | maxmaxcode = 1 << maxbits; | |
581 | if(maxbits > BITS) { | |
582 | fprintf(stderr, | |
583 | "%s: compressed with %d bits, can only handle %d bits\n", | |
584 | *fileptr, maxbits, BITS); | |
585 | continue; | |
586 | } | |
587 | } | |
588 | /* Generate output filename */ | |
589 | strcpy(ofname, *fileptr); | |
590 | ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ | |
591 | } else { /* COMPRESSION */ | |
592 | if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { | |
a2de8fe3 | 593 | fprintf(stderr, "%s: already has .Z suffix -- no change\n", |
64d06f2f KM |
594 | *fileptr); |
595 | continue; | |
596 | } | |
597 | /* Open input file */ | |
598 | if ((freopen(*fileptr, "r", stdin)) == NULL) { | |
c8f1fe84 JL |
599 | perror(*fileptr); |
600 | perm_stat = 1; | |
601 | continue; | |
64d06f2f KM |
602 | } |
603 | stat ( *fileptr, &statbuf ); | |
604 | fsize = (long) statbuf.st_size; | |
605 | /* | |
4eeb10d4 JL |
606 | * tune hash table size for small files -- ad hoc, |
607 | * but the sizes match earlier #defines, which | |
608 | * serve as upper bounds on the number of output codes. | |
64d06f2f | 609 | */ |
a2de8fe3 | 610 | hsize = HSIZE; |
64d06f2f | 611 | if ( fsize < (1 << 12) ) |
a2de8fe3 | 612 | hsize = min ( 5003, HSIZE ); |
64d06f2f | 613 | else if ( fsize < (1 << 13) ) |
a2de8fe3 | 614 | hsize = min ( 9001, HSIZE ); |
64d06f2f | 615 | else if ( fsize < (1 << 14) ) |
a2de8fe3 | 616 | hsize = min ( 18013, HSIZE ); |
64d06f2f | 617 | else if ( fsize < (1 << 15) ) |
a2de8fe3 | 618 | hsize = min ( 35023, HSIZE ); |
64d06f2f | 619 | else if ( fsize < 47000 ) |
a2de8fe3 KM |
620 | hsize = min ( 50021, HSIZE ); |
621 | ||
64d06f2f KM |
622 | /* Generate output filename */ |
623 | strcpy(ofname, *fileptr); | |
624 | #ifndef BSD4_2 /* Short filenames */ | |
625 | if ((cp=rindex(ofname,'/')) != NULL) cp++; | |
626 | else cp = ofname; | |
627 | if (strlen(cp) > 12) { | |
628 | fprintf(stderr,"%s: filename too long to tack on .Z\n",cp); | |
629 | continue; | |
630 | } | |
4eeb10d4 | 631 | #endif /* BSD4_2 Long filenames allowed */ |
64d06f2f KM |
632 | strcat(ofname, ".Z"); |
633 | } | |
634 | /* Check for overwrite of existing file */ | |
635 | if (overwrite == 0 && zcat_flg == 0) { | |
636 | if (stat(ofname, &statbuf) == 0) { | |
637 | char response[2]; | |
638 | response[0] = 'n'; | |
639 | fprintf(stderr, "%s already exists;", ofname); | |
c8f1fe84 | 640 | if (bgnd_flag == 0 && isatty(2)) { |
a2de8fe3 | 641 | fprintf(stderr, " do you wish to overwrite %s (y or n)? ", |
64d06f2f KM |
642 | ofname); |
643 | fflush(stderr); | |
644 | read(2, response, 2); | |
645 | while (response[1] != '\n') { | |
646 | if (read(2, response+1, 1) < 0) { /* Ack! */ | |
647 | perror("stderr"); break; | |
648 | } | |
649 | } | |
650 | } | |
651 | if (response[0] != 'y') { | |
652 | fprintf(stderr, "\tnot overwritten\n"); | |
653 | continue; | |
654 | } | |
655 | } | |
656 | } | |
657 | if(zcat_flg == 0) { /* Open output file */ | |
658 | if (freopen(ofname, "w", stdout) == NULL) { | |
659 | perror(ofname); | |
c8f1fe84 | 660 | perm_stat = 1; |
64d06f2f KM |
661 | continue; |
662 | } | |
be5873af | 663 | precious = 0; |
64d06f2f KM |
664 | if(!quiet) |
665 | fprintf(stderr, "%s: ", *fileptr); | |
666 | } | |
667 | ||
668 | /* Actually do the compression/decompression */ | |
669 | if (do_decomp == 0) compress(); | |
670 | #ifndef DEBUG | |
671 | else decompress(); | |
4eeb10d4 | 672 | #else |
64d06f2f KM |
673 | else if (debug == 0) decompress(); |
674 | else printcodes(); | |
675 | if (verbose) dump_tab(); | |
4eeb10d4 | 676 | #endif /* DEBUG */ |
64d06f2f KM |
677 | if(zcat_flg == 0) { |
678 | copystat(*fileptr, ofname); /* Copy stats */ | |
be5873af | 679 | precious = 1; |
4eeb10d4 | 680 | if((exit_stat == 1) || (!quiet)) |
64d06f2f KM |
681 | putc('\n', stderr); |
682 | } | |
683 | } | |
684 | } else { /* Standard input */ | |
685 | if (do_decomp == 0) { | |
686 | compress(); | |
4eeb10d4 JL |
687 | #ifdef DEBUG |
688 | if(verbose) dump_tab(); | |
689 | #endif /* DEBUG */ | |
64d06f2f KM |
690 | if(!quiet) |
691 | putc('\n', stderr); | |
692 | } else { | |
693 | /* Check the magic number */ | |
694 | if (nomagic == 0) { | |
695 | if ((getchar()!=(magic_header[0] & 0xFF)) | |
696 | || (getchar()!=(magic_header[1] & 0xFF))) { | |
697 | fprintf(stderr, "stdin: not in compressed format\n"); | |
698 | exit(1); | |
699 | } | |
700 | maxbits = getchar(); /* set -b from file */ | |
701 | block_compress = maxbits & BLOCK_MASK; | |
702 | maxbits &= BIT_MASK; | |
703 | maxmaxcode = 1 << maxbits; | |
704 | fsize = 100000; /* assume stdin large for USERMEM */ | |
705 | if(maxbits > BITS) { | |
706 | fprintf(stderr, | |
707 | "stdin: compressed with %d bits, can only handle %d bits\n", | |
708 | maxbits, BITS); | |
709 | exit(1); | |
710 | } | |
711 | } | |
712 | #ifndef DEBUG | |
713 | decompress(); | |
4eeb10d4 | 714 | #else |
64d06f2f KM |
715 | if (debug == 0) decompress(); |
716 | else printcodes(); | |
717 | if (verbose) dump_tab(); | |
4eeb10d4 | 718 | #endif /* DEBUG */ |
64d06f2f KM |
719 | } |
720 | } | |
c8f1fe84 | 721 | exit(perm_stat ? perm_stat : exit_stat); |
64d06f2f KM |
722 | } |
723 | ||
724 | static int offset; | |
725 | long int in_count = 1; /* length of input */ | |
726 | long int bytes_out; /* length of compressed output */ | |
727 | long int out_count = 0; /* # of codes output (for debugging) */ | |
728 | ||
64d06f2f KM |
729 | /* |
730 | * compress stdin to stdout | |
731 | * | |
4eeb10d4 JL |
732 | * Algorithm: use open addressing double hashing (no chaining) on the |
733 | * prefix code / next character combination. We do a variant of Knuth's | |
734 | * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime | |
735 | * secondary probe. Here, the modular division first probe is gives way | |
736 | * to a faster exclusive-or manipulation. Also do block compression with | |
737 | * an adaptive reset, whereby the code table is cleared when the compression | |
738 | * ratio decreases, but after the table fills. The variable-length output | |
739 | * codes are re-sized at this point, and a special CLEAR code is generated | |
740 | * for the decompressor. Late addition: construct the table according to | |
741 | * file size for noticeable speed improvement on small files. Please direct | |
742 | * questions about this implementation to ames!jaw. | |
64d06f2f KM |
743 | */ |
744 | ||
64d06f2f KM |
745 | compress() { |
746 | register long fcode; | |
747 | register code_int i = 0; | |
748 | register int c; | |
749 | register code_int ent; | |
4eeb10d4 JL |
750 | #ifdef XENIX_16 |
751 | register code_int disp; | |
752 | #else /* Normal machine */ | |
64d06f2f | 753 | register int disp; |
4eeb10d4 | 754 | #endif |
64d06f2f | 755 | register code_int hsize_reg; |
4eeb10d4 | 756 | register int hshift; |
64d06f2f KM |
757 | |
758 | #ifndef COMPATIBLE | |
759 | if (nomagic == 0) { | |
760 | putchar(magic_header[0]); putchar(magic_header[1]); | |
761 | putchar((char)(maxbits | block_compress)); | |
4eeb10d4 JL |
762 | if(ferror(stdout)) |
763 | writeerr(); | |
64d06f2f | 764 | } |
4eeb10d4 | 765 | #endif /* COMPATIBLE */ |
64d06f2f KM |
766 | |
767 | offset = 0; | |
a2de8fe3 | 768 | bytes_out = 3; /* includes 3-byte header mojo */ |
64d06f2f KM |
769 | out_count = 0; |
770 | clear_flg = 0; | |
4eeb10d4 | 771 | ratio = 0; |
64d06f2f KM |
772 | in_count = 1; |
773 | checkpoint = CHECK_GAP; | |
774 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
775 | free_ent = ((block_compress) ? FIRST : 256 ); | |
64d06f2f | 776 | |
4eeb10d4 | 777 | ent = getchar (); |
a2de8fe3 | 778 | |
4eeb10d4 JL |
779 | hshift = 0; |
780 | for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) | |
781 | hshift++; | |
782 | hshift = 8 - hshift; /* set hash code range bound */ | |
64d06f2f | 783 | |
64d06f2f | 784 | hsize_reg = hsize; |
4eeb10d4 | 785 | cl_hash( (count_int) hsize_reg); /* clear hash table */ |
64d06f2f | 786 | |
4eeb10d4 | 787 | #ifdef SIGNED_COMPARE_SLOW |
64d06f2f | 788 | while ( (c = getchar()) != (unsigned) EOF ) { |
4eeb10d4 JL |
789 | #else |
790 | while ( (c = getchar()) != EOF ) { | |
791 | #endif | |
64d06f2f | 792 | in_count++; |
64d06f2f | 793 | fcode = (long) (((long) c << maxbits) + ent); |
4eeb10d4 JL |
794 | i = ((c << hshift) ^ ent); /* xor hashing */ |
795 | ||
796 | if ( htabof (i) == fcode ) { | |
797 | ent = codetabof (i); | |
64d06f2f | 798 | continue; |
4eeb10d4 | 799 | } else if ( (long)htabof (i) < 0 ) /* empty slot */ |
64d06f2f | 800 | goto nomatch; |
4eeb10d4 | 801 | disp = hsize_reg - i; /* secondary hash (after G. Knott) */ |
64d06f2f KM |
802 | if ( i == 0 ) |
803 | disp = 1; | |
804 | probe: | |
805 | if ( (i -= disp) < 0 ) | |
806 | i += hsize_reg; | |
807 | ||
4eeb10d4 JL |
808 | if ( htabof (i) == fcode ) { |
809 | ent = codetabof (i); | |
64d06f2f KM |
810 | continue; |
811 | } | |
4eeb10d4 | 812 | if ( (long)htabof (i) > 0 ) |
64d06f2f KM |
813 | goto probe; |
814 | nomatch: | |
815 | output ( (code_int) ent ); | |
816 | out_count++; | |
4eeb10d4 JL |
817 | ent = c; |
818 | #ifdef SIGNED_COMPARE_SLOW | |
64d06f2f KM |
819 | if ( (unsigned) free_ent < (unsigned) maxmaxcode) { |
820 | #else | |
821 | if ( free_ent < maxmaxcode ) { | |
822 | #endif | |
4eeb10d4 JL |
823 | codetabof (i) = free_ent++; /* code -> hashtable */ |
824 | htabof (i) = fcode; | |
64d06f2f KM |
825 | } |
826 | else if ( (count_int)in_count >= checkpoint && block_compress ) | |
a2de8fe3 | 827 | cl_block (); |
64d06f2f | 828 | } |
64d06f2f KM |
829 | /* |
830 | * Put out the final code. | |
831 | */ | |
832 | output( (code_int)ent ); | |
833 | out_count++; | |
834 | output( (code_int)-1 ); | |
835 | ||
836 | /* | |
837 | * Print out stats on stderr | |
838 | */ | |
839 | if(zcat_flg == 0 && !quiet) { | |
840 | #ifdef DEBUG | |
841 | fprintf( stderr, | |
4eeb10d4 JL |
842 | "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", |
843 | in_count, out_count, bytes_out ); | |
844 | prratio( stderr, in_count, bytes_out ); | |
845 | fprintf( stderr, "\n"); | |
846 | fprintf( stderr, "\tCompression as in compact: " ); | |
847 | prratio( stderr, in_count-bytes_out, in_count ); | |
848 | fprintf( stderr, "\n"); | |
849 | fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", | |
850 | free_ent - 1, n_bits ); | |
851 | #else /* !DEBUG */ | |
852 | fprintf( stderr, "Compression: " ); | |
853 | prratio( stderr, in_count-bytes_out, in_count ); | |
854 | #endif /* DEBUG */ | |
64d06f2f KM |
855 | } |
856 | if(bytes_out > in_count) /* exit(2) if no savings */ | |
857 | exit_stat = 2; | |
858 | return; | |
859 | } | |
860 | ||
861 | /***************************************************************** | |
862 | * TAG( output ) | |
863 | * | |
864 | * Output the given code. | |
865 | * Inputs: | |
866 | * code: A n_bits-bit integer. If == -1, then EOF. This assumes | |
867 | * that n_bits =< (long)wordsize - 1. | |
868 | * Outputs: | |
869 | * Outputs code to the file. | |
870 | * Assumptions: | |
871 | * Chars are 8 bits long. | |
872 | * Algorithm: | |
873 | * Maintain a BITS character long buffer (so that 8 codes will | |
874 | * fit in it exactly). Use the VAX insv instruction to insert each | |
875 | * code in turn. When the buffer fills up empty it and start over. | |
876 | */ | |
877 | ||
878 | static char buf[BITS]; | |
879 | ||
880 | #ifndef vax | |
881 | char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; | |
882 | char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; | |
4eeb10d4 | 883 | #endif /* vax */ |
64d06f2f KM |
884 | |
885 | output( code ) | |
886 | code_int code; | |
887 | { | |
888 | #ifdef DEBUG | |
889 | static int col = 0; | |
4eeb10d4 | 890 | #endif /* DEBUG */ |
64d06f2f KM |
891 | |
892 | /* | |
893 | * On the VAX, it is important to have the register declarations | |
894 | * in exactly the order given, or the asm will break. | |
895 | */ | |
896 | register int r_off = offset, bits= n_bits; | |
897 | register char * bp = buf; | |
898 | ||
4eeb10d4 JL |
899 | #ifdef DEBUG |
900 | if ( verbose ) | |
901 | fprintf( stderr, "%5d%c", code, | |
902 | (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); | |
903 | #endif /* DEBUG */ | |
64d06f2f KM |
904 | if ( code >= 0 ) { |
905 | #ifdef vax | |
4eeb10d4 | 906 | /* VAX DEPENDENT!! Implementation on other machines is below. |
64d06f2f KM |
907 | * |
908 | * Translation: Insert BITS bits from the argument starting at | |
909 | * offset bits from the beginning of buf. | |
910 | */ | |
4eeb10d4 | 911 | 0; /* Work around for pcc -O bug with asm and if stmt */ |
64d06f2f | 912 | asm( "insv 4(ap),r11,r10,(r9)" ); |
4eeb10d4 | 913 | #else /* not a vax */ |
a2de8fe3 KM |
914 | /* |
915 | * byte/bit numbering on the VAX is simulated by the following code | |
916 | */ | |
64d06f2f KM |
917 | /* |
918 | * Get to the first byte. | |
919 | */ | |
920 | bp += (r_off >> 3); | |
921 | r_off &= 7; | |
922 | /* | |
923 | * Since code is always >= 8 bits, only need to mask the first | |
924 | * hunk on the left. | |
925 | */ | |
926 | *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; | |
927 | bp++; | |
928 | bits -= (8 - r_off); | |
929 | code >>= 8 - r_off; | |
930 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
931 | if ( bits >= 8 ) { | |
932 | *bp++ = code; | |
933 | code >>= 8; | |
934 | bits -= 8; | |
935 | } | |
936 | /* Last bits. */ | |
937 | if(bits) | |
938 | *bp = code; | |
4eeb10d4 | 939 | #endif /* vax */ |
64d06f2f KM |
940 | offset += n_bits; |
941 | if ( offset == (n_bits << 3) ) { | |
942 | bp = buf; | |
943 | bits = n_bits; | |
944 | bytes_out += bits; | |
945 | do | |
946 | putchar(*bp++); | |
947 | while(--bits); | |
64d06f2f KM |
948 | offset = 0; |
949 | } | |
64d06f2f KM |
950 | |
951 | /* | |
952 | * If the next entry is going to be too big for the code size, | |
953 | * then increase it, if possible. | |
954 | */ | |
955 | if ( free_ent > maxcode || (clear_flg > 0)) | |
956 | { | |
957 | /* | |
958 | * Write the whole buffer, because the input side won't | |
959 | * discover the size increase until after it has read it. | |
960 | */ | |
961 | if ( offset > 0 ) { | |
962 | if( fwrite( buf, 1, n_bits, stdout ) != n_bits) | |
963 | writeerr(); | |
964 | bytes_out += n_bits; | |
965 | } | |
966 | offset = 0; | |
967 | ||
968 | if ( clear_flg ) { | |
969 | maxcode = MAXCODE (n_bits = INIT_BITS); | |
970 | clear_flg = 0; | |
971 | } | |
972 | else { | |
973 | n_bits++; | |
974 | if ( n_bits == maxbits ) | |
975 | maxcode = maxmaxcode; | |
976 | else | |
977 | maxcode = MAXCODE(n_bits); | |
978 | } | |
979 | #ifdef DEBUG | |
980 | if ( debug ) { | |
981 | fprintf( stderr, "\nChange to %d bits\n", n_bits ); | |
982 | col = 0; | |
983 | } | |
4eeb10d4 | 984 | #endif /* DEBUG */ |
64d06f2f KM |
985 | } |
986 | } else { | |
987 | /* | |
988 | * At EOF, write the rest of the buffer. | |
989 | */ | |
990 | if ( offset > 0 ) | |
991 | fwrite( buf, 1, (offset + 7) / 8, stdout ); | |
992 | bytes_out += (offset + 7) / 8; | |
993 | offset = 0; | |
994 | fflush( stdout ); | |
995 | #ifdef DEBUG | |
996 | if ( verbose ) | |
997 | fprintf( stderr, "\n" ); | |
4eeb10d4 | 998 | #endif /* DEBUG */ |
64d06f2f KM |
999 | if( ferror( stdout ) ) |
1000 | writeerr(); | |
1001 | } | |
1002 | } | |
1003 | ||
4eeb10d4 JL |
1004 | /* |
1005 | * Decompress stdin to stdout. This routine adapts to the codes in the | |
1006 | * file building the "string" table on-the-fly; requiring no table to | |
1007 | * be stored in the compressed file. The tables used herein are shared | |
1008 | * with those of the compress() routine. See the definitions above. | |
1009 | */ | |
a2de8fe3 | 1010 | |
64d06f2f | 1011 | decompress() { |
a2de8fe3 | 1012 | register char_type *stackp; |
64d06f2f | 1013 | register int finchar; |
a2de8fe3 KM |
1014 | register code_int code, oldcode, incode; |
1015 | ||
64d06f2f KM |
1016 | /* |
1017 | * As above, initialize the first 256 entries in the table. | |
1018 | */ | |
1019 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
1020 | for ( code = 255; code >= 0; code-- ) { | |
4eeb10d4 JL |
1021 | tab_prefixof(code) = 0; |
1022 | tab_suffixof(code) = (char_type)code; | |
64d06f2f KM |
1023 | } |
1024 | free_ent = ((block_compress) ? FIRST : 256 ); | |
1025 | ||
1026 | finchar = oldcode = getcode(); | |
4eeb10d4 JL |
1027 | if(oldcode == -1) /* EOF already? */ |
1028 | return; /* Get out of here */ | |
64d06f2f | 1029 | putchar( (char)finchar ); /* first code must be 8 bits = char */ |
4eeb10d4 JL |
1030 | if(ferror(stdout)) /* Crash if can't write */ |
1031 | writeerr(); | |
1032 | stackp = de_stack; | |
64d06f2f | 1033 | |
4eeb10d4 | 1034 | while ( (code = getcode()) > -1 ) { |
64d06f2f KM |
1035 | |
1036 | if ( (code == CLEAR) && block_compress ) { | |
a2de8fe3 | 1037 | for ( code = 255; code >= 0; code-- ) |
4eeb10d4 | 1038 | tab_prefixof(code) = 0; |
64d06f2f KM |
1039 | clear_flg = 1; |
1040 | free_ent = FIRST - 1; | |
1041 | if ( (code = getcode ()) == -1 ) /* O, untimely death! */ | |
1042 | break; | |
1043 | } | |
1044 | incode = code; | |
1045 | /* | |
1046 | * Special case for KwKwK string. | |
1047 | */ | |
1048 | if ( code >= free_ent ) { | |
a2de8fe3 | 1049 | *stackp++ = finchar; |
64d06f2f KM |
1050 | code = oldcode; |
1051 | } | |
1052 | ||
1053 | /* | |
1054 | * Generate output characters in reverse order | |
1055 | */ | |
4eeb10d4 | 1056 | #ifdef SIGNED_COMPARE_SLOW |
64d06f2f | 1057 | while ( ((unsigned long)code) >= ((unsigned long)256) ) { |
4eeb10d4 | 1058 | #else |
64d06f2f | 1059 | while ( code >= 256 ) { |
4eeb10d4 JL |
1060 | #endif |
1061 | *stackp++ = tab_suffixof(code); | |
1062 | code = tab_prefixof(code); | |
64d06f2f | 1063 | } |
4eeb10d4 | 1064 | *stackp++ = finchar = tab_suffixof(code); |
64d06f2f KM |
1065 | |
1066 | /* | |
1067 | * And put them out in forward order | |
1068 | */ | |
4eeb10d4 JL |
1069 | do |
1070 | putchar ( *--stackp ); | |
1071 | while ( stackp > de_stack ); | |
64d06f2f KM |
1072 | |
1073 | /* | |
1074 | * Generate the new entry. | |
1075 | */ | |
1076 | if ( (code=free_ent) < maxmaxcode ) { | |
4eeb10d4 JL |
1077 | tab_prefixof(code) = (unsigned short)oldcode; |
1078 | tab_suffixof(code) = finchar; | |
64d06f2f KM |
1079 | free_ent = code+1; |
1080 | } | |
1081 | /* | |
1082 | * Remember previous code. | |
1083 | */ | |
1084 | oldcode = incode; | |
1085 | } | |
1086 | fflush( stdout ); | |
1087 | if(ferror(stdout)) | |
1088 | writeerr(); | |
1089 | } | |
1090 | ||
64d06f2f KM |
1091 | /***************************************************************** |
1092 | * TAG( getcode ) | |
1093 | * | |
1094 | * Read one code from the standard input. If EOF, return -1. | |
1095 | * Inputs: | |
1096 | * stdin | |
1097 | * Outputs: | |
1098 | * code or -1 is returned. | |
1099 | */ | |
1100 | ||
1101 | code_int | |
1102 | getcode() { | |
1103 | /* | |
1104 | * On the VAX, it is important to have the register declarations | |
1105 | * in exactly the order given, or the asm will break. | |
1106 | */ | |
1107 | register code_int code; | |
1108 | static int offset = 0, size = 0; | |
1109 | static char_type buf[BITS]; | |
1110 | register int r_off, bits; | |
1111 | register char_type *bp = buf; | |
1112 | ||
1113 | if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { | |
1114 | /* | |
1115 | * If the next entry will be too big for the current code | |
1116 | * size, then we must increase the size. This implies reading | |
1117 | * a new buffer full, too. | |
1118 | */ | |
1119 | if ( free_ent > maxcode ) { | |
1120 | n_bits++; | |
1121 | if ( n_bits == maxbits ) | |
1122 | maxcode = maxmaxcode; /* won't get any bigger now */ | |
1123 | else | |
1124 | maxcode = MAXCODE(n_bits); | |
1125 | } | |
1126 | if ( clear_flg > 0) { | |
1127 | maxcode = MAXCODE (n_bits = INIT_BITS); | |
1128 | clear_flg = 0; | |
1129 | } | |
1130 | size = fread( buf, 1, n_bits, stdin ); | |
1131 | if ( size <= 0 ) | |
1132 | return -1; /* end of file */ | |
1133 | offset = 0; | |
1134 | /* Round size down to integral number of codes */ | |
1135 | size = (size << 3) - (n_bits - 1); | |
1136 | } | |
1137 | r_off = offset; | |
1138 | bits = n_bits; | |
1139 | #ifdef vax | |
1140 | asm( "extzv r10,r9,(r8),r11" ); | |
4eeb10d4 | 1141 | #else /* not a vax */ |
64d06f2f KM |
1142 | /* |
1143 | * Get to the first byte. | |
1144 | */ | |
1145 | bp += (r_off >> 3); | |
1146 | r_off &= 7; | |
1147 | /* Get first part (low order bits) */ | |
1148 | #ifdef NO_UCHAR | |
1149 | code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; | |
4eeb10d4 | 1150 | #else |
64d06f2f | 1151 | code = (*bp++ >> r_off); |
4eeb10d4 | 1152 | #endif /* NO_UCHAR */ |
64d06f2f KM |
1153 | bits -= (8 - r_off); |
1154 | r_off = 8 - r_off; /* now, offset into code word */ | |
1155 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
1156 | if ( bits >= 8 ) { | |
1157 | #ifdef NO_UCHAR | |
1158 | code |= (*bp++ & 0xff) << r_off; | |
4eeb10d4 | 1159 | #else |
64d06f2f | 1160 | code |= *bp++ << r_off; |
4eeb10d4 | 1161 | #endif /* NO_UCHAR */ |
64d06f2f KM |
1162 | r_off += 8; |
1163 | bits -= 8; | |
1164 | } | |
1165 | /* high order bits. */ | |
1166 | code |= (*bp & rmask[bits]) << r_off; | |
4eeb10d4 | 1167 | #endif /* vax */ |
64d06f2f KM |
1168 | offset += n_bits; |
1169 | ||
1170 | return code; | |
1171 | } | |
1172 | ||
1173 | char * | |
1174 | rindex(s, c) /* For those who don't have it in libc.a */ | |
1175 | register char *s, c; | |
1176 | { | |
1177 | char *p; | |
1178 | for (p = NULL; *s; s++) | |
1179 | if (*s == c) | |
1180 | p = s; | |
1181 | return(p); | |
1182 | } | |
1183 | ||
1184 | #ifdef DEBUG | |
1185 | printcodes() | |
1186 | { | |
1187 | /* | |
a2de8fe3 | 1188 | * Just print out codes from input file. For debugging. |
64d06f2f KM |
1189 | */ |
1190 | code_int code; | |
1191 | int col = 0, bits; | |
1192 | ||
1193 | bits = n_bits = INIT_BITS; | |
1194 | maxcode = MAXCODE(n_bits); | |
1195 | free_ent = ((block_compress) ? FIRST : 256 ); | |
1196 | while ( ( code = getcode() ) >= 0 ) { | |
1197 | if ( (code == CLEAR) && block_compress ) { | |
1198 | free_ent = FIRST - 1; | |
1199 | clear_flg = 1; | |
1200 | } | |
1201 | else if ( free_ent < maxmaxcode ) | |
1202 | free_ent++; | |
1203 | if ( bits != n_bits ) { | |
1204 | fprintf(stderr, "\nChange to %d bits\n", n_bits ); | |
1205 | bits = n_bits; | |
1206 | col = 0; | |
1207 | } | |
1208 | fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); | |
1209 | } | |
1210 | putc( '\n', stderr ); | |
1211 | exit( 0 ); | |
1212 | } | |
1213 | ||
4eeb10d4 JL |
1214 | code_int sorttab[1<<BITS]; /* sorted pointers into htab */ |
1215 | ||
64d06f2f KM |
1216 | dump_tab() /* dump string table */ |
1217 | { | |
4eeb10d4 | 1218 | register int i, first; |
64d06f2f | 1219 | register ent; |
4eeb10d4 JL |
1220 | #define STACK_SIZE 15000 |
1221 | int stack_top = STACK_SIZE; | |
1222 | register c; | |
1223 | ||
1224 | if(do_decomp == 0) { /* compressing */ | |
1225 | register int flag = 1; | |
1226 | ||
1227 | for(i=0; i<hsize; i++) { /* build sort pointers */ | |
1228 | if((long)htabof(i) >= 0) { | |
1229 | sorttab[codetabof(i)] = i; | |
64d06f2f | 1230 | } |
64d06f2f | 1231 | } |
4eeb10d4 JL |
1232 | first = block_compress ? FIRST : 256; |
1233 | for(i = first; i < free_ent; i++) { | |
1234 | fprintf(stderr, "%5d: \"", i); | |
1235 | de_stack[--stack_top] = '\n'; | |
1236 | de_stack[--stack_top] = '"'; | |
1237 | stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, | |
1238 | stack_top); | |
1239 | for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); | |
1240 | ent > 256; | |
1241 | ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { | |
1242 | stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, | |
1243 | stack_top); | |
1244 | } | |
1245 | stack_top = in_stack(ent, stack_top); | |
1246 | fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); | |
1247 | stack_top = STACK_SIZE; | |
1248 | } | |
1249 | } else if(!debug) { /* decompressing */ | |
1250 | ||
1251 | for ( i = 0; i < free_ent; i++ ) { | |
1252 | ent = i; | |
1253 | c = tab_suffixof(ent); | |
1254 | if ( isascii(c) && isprint(c) ) | |
1255 | fprintf( stderr, "%5d: %5d/'%c' \"", | |
1256 | ent, tab_prefixof(ent), c ); | |
1257 | else | |
1258 | fprintf( stderr, "%5d: %5d/\\%03o \"", | |
1259 | ent, tab_prefixof(ent), c ); | |
1260 | de_stack[--stack_top] = '\n'; | |
1261 | de_stack[--stack_top] = '"'; | |
1262 | for ( ; ent != NULL; | |
1263 | ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { | |
1264 | stack_top = in_stack(tab_suffixof(ent), stack_top); | |
1265 | } | |
1266 | fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); | |
1267 | stack_top = STACK_SIZE; | |
1268 | } | |
64d06f2f KM |
1269 | } |
1270 | } | |
64d06f2f | 1271 | |
4eeb10d4 JL |
1272 | int |
1273 | in_stack(c, stack_top) | |
1274 | register c, stack_top; | |
1275 | { | |
1276 | if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { | |
1277 | de_stack[--stack_top] = c; | |
1278 | } else { | |
1279 | switch( c ) { | |
1280 | case '\n': de_stack[--stack_top] = 'n'; break; | |
1281 | case '\t': de_stack[--stack_top] = 't'; break; | |
1282 | case '\b': de_stack[--stack_top] = 'b'; break; | |
1283 | case '\f': de_stack[--stack_top] = 'f'; break; | |
1284 | case '\r': de_stack[--stack_top] = 'r'; break; | |
1285 | case '\\': de_stack[--stack_top] = '\\'; break; | |
1286 | default: | |
1287 | de_stack[--stack_top] = '0' + c % 8; | |
1288 | de_stack[--stack_top] = '0' + (c / 8) % 8; | |
1289 | de_stack[--stack_top] = '0' + c / 64; | |
1290 | break; | |
1291 | } | |
1292 | de_stack[--stack_top] = '\\'; | |
1293 | } | |
1294 | return stack_top; | |
1295 | } | |
1296 | #endif /* DEBUG */ | |
64d06f2f KM |
1297 | |
1298 | writeerr() | |
1299 | { | |
1300 | perror ( ofname ); | |
1301 | unlink ( ofname ); | |
1302 | exit ( 1 ); | |
1303 | } | |
1304 | ||
1305 | copystat(ifname, ofname) | |
1306 | char *ifname, *ofname; | |
1307 | { | |
1308 | struct stat statbuf; | |
1309 | int mode; | |
1310 | time_t timep[2]; | |
1311 | ||
1312 | fclose(stdout); | |
1313 | if (stat(ifname, &statbuf)) { /* Get stat on input file */ | |
1314 | perror(ifname); | |
1315 | return; | |
1316 | } | |
1317 | if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { | |
1318 | if(quiet) | |
a2de8fe3 | 1319 | fprintf(stderr, "%s: ", ifname); |
64d06f2f KM |
1320 | fprintf(stderr, " -- not a regular file: unchanged"); |
1321 | exit_stat = 1; | |
c8f1fe84 | 1322 | perm_stat = 1; |
64d06f2f KM |
1323 | } else if (statbuf.st_nlink > 1) { |
1324 | if(quiet) | |
a2de8fe3 | 1325 | fprintf(stderr, "%s: ", ifname); |
64d06f2f KM |
1326 | fprintf(stderr, " -- has %d other links: unchanged", |
1327 | statbuf.st_nlink - 1); | |
1328 | exit_stat = 1; | |
c8f1fe84 | 1329 | perm_stat = 1; |
4eeb10d4 JL |
1330 | } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ |
1331 | if(!quiet) | |
1332 | fprintf(stderr, " -- file unchanged"); | |
64d06f2f KM |
1333 | } else { /* ***** Successful Compression ***** */ |
1334 | exit_stat = 0; | |
1335 | mode = statbuf.st_mode & 07777; | |
1336 | if (chmod(ofname, mode)) /* Copy modes */ | |
1337 | perror(ofname); | |
1338 | chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ | |
1339 | timep[0] = statbuf.st_atime; | |
1340 | timep[1] = statbuf.st_mtime; | |
1341 | utime(ofname, timep); /* Update last accessed and modified times */ | |
1342 | if (unlink(ifname)) /* Remove input file */ | |
1343 | perror(ifname); | |
1344 | if(!quiet) | |
1345 | fprintf(stderr, " -- replaced with %s", ofname); | |
1346 | return; /* Successful return */ | |
1347 | } | |
1348 | ||
1349 | /* Unsuccessful return -- one of the tests failed */ | |
1350 | if (unlink(ofname)) | |
1351 | perror(ofname); | |
1352 | } | |
64d06f2f KM |
1353 | |
1354 | onintr ( ) | |
1355 | { | |
be5873af | 1356 | if (!precious) |
ea90d69f | 1357 | unlink ( ofname ); |
4eeb10d4 JL |
1358 | exit ( 1 ); |
1359 | } | |
1360 | ||
1361 | oops ( ) /* wild pointer -- assume bad input */ | |
1362 | { | |
c8f1fe84 | 1363 | if ( do_decomp ) |
4eeb10d4 JL |
1364 | fprintf ( stderr, "uncompress: corrupt input\n" ); |
1365 | unlink ( ofname ); | |
64d06f2f KM |
1366 | exit ( 1 ); |
1367 | } | |
1368 | ||
a2de8fe3 | 1369 | cl_block () /* table clear for block compress */ |
64d06f2f | 1370 | { |
4eeb10d4 | 1371 | register long int rat; |
64d06f2f KM |
1372 | |
1373 | checkpoint = in_count + CHECK_GAP; | |
4eeb10d4 JL |
1374 | #ifdef DEBUG |
1375 | if ( debug ) { | |
1376 | fprintf ( stderr, "count: %ld, ratio: ", in_count ); | |
1377 | prratio ( stderr, in_count, bytes_out ); | |
1378 | fprintf ( stderr, "\n"); | |
1379 | } | |
1380 | #endif /* DEBUG */ | |
1381 | ||
1382 | if(in_count > 0x007fffff) { /* shift will overflow */ | |
1383 | rat = bytes_out >> 8; | |
1384 | if(rat == 0) { /* Don't divide by zero */ | |
1385 | rat = 0x7fffffff; | |
1386 | } else { | |
1387 | rat = in_count / rat; | |
64d06f2f | 1388 | } |
4eeb10d4 JL |
1389 | } else { |
1390 | rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ | |
1391 | } | |
1392 | if ( rat > ratio ) { | |
1393 | ratio = rat; | |
1394 | } else { | |
1395 | ratio = 0; | |
1396 | #ifdef DEBUG | |
1397 | if(verbose) | |
1398 | dump_tab(); /* dump string table */ | |
1399 | #endif | |
1400 | cl_hash ( (count_int) hsize ); | |
64d06f2f KM |
1401 | free_ent = FIRST; |
1402 | clear_flg = 1; | |
1403 | output ( (code_int) CLEAR ); | |
1404 | #ifdef DEBUG | |
1405 | if(debug) | |
1406 | fprintf ( stderr, "clear\n" ); | |
4eeb10d4 | 1407 | #endif /* DEBUG */ |
64d06f2f KM |
1408 | } |
1409 | } | |
1410 | ||
4eeb10d4 | 1411 | cl_hash(hsize) /* reset code table */ |
a2de8fe3 | 1412 | register count_int hsize; |
64d06f2f | 1413 | { |
4eeb10d4 | 1414 | #ifndef XENIX_16 /* Normal machine */ |
64d06f2f | 1415 | register count_int *htab_p = htab+hsize; |
4eeb10d4 JL |
1416 | #else |
1417 | register j; | |
1418 | register long k = hsize; | |
1419 | register count_int *htab_p; | |
1420 | #endif | |
dad1b88c | 1421 | register long i; |
64d06f2f KM |
1422 | register long m1 = -1; |
1423 | ||
4eeb10d4 JL |
1424 | #ifdef XENIX_16 |
1425 | for(j=0; j<=8 && k>=0; j++,k-=8192) { | |
1426 | i = 8192; | |
1427 | if(k < 8192) { | |
1428 | i = k; | |
1429 | } | |
1430 | htab_p = &(htab[j][i]); | |
1431 | i -= 16; | |
1432 | if(i > 0) { | |
1433 | #else | |
64d06f2f | 1434 | i = hsize - 16; |
4eeb10d4 JL |
1435 | #endif |
1436 | do { /* might use Sys V memset(3) here */ | |
64d06f2f KM |
1437 | *(htab_p-16) = m1; |
1438 | *(htab_p-15) = m1; | |
1439 | *(htab_p-14) = m1; | |
1440 | *(htab_p-13) = m1; | |
1441 | *(htab_p-12) = m1; | |
1442 | *(htab_p-11) = m1; | |
1443 | *(htab_p-10) = m1; | |
1444 | *(htab_p-9) = m1; | |
1445 | *(htab_p-8) = m1; | |
1446 | *(htab_p-7) = m1; | |
1447 | *(htab_p-6) = m1; | |
1448 | *(htab_p-5) = m1; | |
1449 | *(htab_p-4) = m1; | |
1450 | *(htab_p-3) = m1; | |
1451 | *(htab_p-2) = m1; | |
1452 | *(htab_p-1) = m1; | |
1453 | htab_p -= 16; | |
1454 | } while ((i -= 16) >= 0); | |
4eeb10d4 JL |
1455 | #ifdef XENIX_16 |
1456 | } | |
1457 | } | |
1458 | #endif | |
64d06f2f KM |
1459 | for ( i += 16; i > 0; i-- ) |
1460 | *--htab_p = m1; | |
1461 | } | |
a2de8fe3 | 1462 | |
4eeb10d4 JL |
1463 | prratio(stream, num, den) |
1464 | FILE *stream; | |
1465 | long int num, den; | |
a2de8fe3 | 1466 | { |
4eeb10d4 JL |
1467 | register int q; /* Doesn't need to be long */ |
1468 | ||
1469 | if(num > 214748L) { /* 2147483647/10000 */ | |
1470 | q = num / (den / 10000L); | |
1471 | } else { | |
1472 | q = 10000L * num / den; /* Long calculations, though */ | |
1473 | } | |
1474 | if (q < 0) { | |
1475 | putc('-', stream); | |
1476 | q = -q; | |
1477 | } | |
1478 | fprintf(stream, "%d.%02d%%", q / 100, q % 100); | |
1479 | } | |
a2de8fe3 | 1480 | |
4eeb10d4 JL |
1481 | version() |
1482 | { | |
c8f1fe84 | 1483 | fprintf(stderr, "%s, Berkeley 5.9 %G%\n", rcs_ident); |
4eeb10d4 JL |
1484 | fprintf(stderr, "Options: "); |
1485 | #ifdef vax | |
1486 | fprintf(stderr, "vax, "); | |
1487 | #endif | |
1488 | #ifdef NO_UCHAR | |
1489 | fprintf(stderr, "NO_UCHAR, "); | |
1490 | #endif | |
1491 | #ifdef SIGNED_COMPARE_SLOW | |
1492 | fprintf(stderr, "SIGNED_COMPARE_SLOW, "); | |
1493 | #endif | |
1494 | #ifdef XENIX_16 | |
1495 | fprintf(stderr, "XENIX_16, "); | |
1496 | #endif | |
1497 | #ifdef COMPATIBLE | |
1498 | fprintf(stderr, "COMPATIBLE, "); | |
1499 | #endif | |
1500 | #ifdef DEBUG | |
1501 | fprintf(stderr, "DEBUG, "); | |
1502 | #endif | |
1503 | #ifdef BSD4_2 | |
1504 | fprintf(stderr, "BSD4_2, "); | |
1505 | #endif | |
1506 | fprintf(stderr, "BITS = %d\n", BITS); | |
a2de8fe3 | 1507 | } |