arrange for a useful error message if the mailer fork fails
[unix-history] / usr / src / usr.bin / compress / compress.c
CommitLineData
64d06f2f 1#ifndef lint
a2de8fe3 2static char sccsid[] = "@(#)compress.c 5.2 (Berkeley) %G%";
64d06f2f
KM
3#endif not lint
4
a2de8fe3
KM
5#define min(a,b) ((a>b) ? b : a)
6
64d06f2f 7/*
a2de8fe3
KM
8 * machine variants which require cc -Dmachine: pdp11, z8000, pcxt
9 */
10
11/* Set USERMEM to the maximum amount of physical user memory available
64d06f2f
KM
12 * in bytes. USERMEM is used to determine the maximum BITS that can be used
13 * for compression. If USERMEM is big enough, use fast compression algorithm.
14 *
15 * SACREDMEM is the amount of physical memory saved for others; compress
16 * will hog the rest.
17 */
18#ifndef SACREDMEM
19#define SACREDMEM 0
20#endif
21
a2de8fe3
KM
22#ifndef USERMEM
23# define USERMEM 750000 /* default user memory */
24#endif
25
64d06f2f
KM
26#ifdef pdp11
27# define BITS 12 /* max bits/code for 16-bit machine */
28# define NO_UCHAR /* also if "unsigned char" functions as signed char */
a2de8fe3 29# define SHORT_INT /* ints are short */
64d06f2f 30# undef USERMEM
a2de8fe3
KM
31#endif pdp11
32
33#ifdef z8000
34# define BITS 12
35# define SHORT_INT
36# undef vax /* weird preprocessor */
37# undef USERMEM
38#endif z8000
39
40#ifdef pcxt
41# define BITS 12
42# define SHORT_INT
43# define SMALL_STACK /* at least for Latice C, sez Lauren */
44# undef USERMEM
45#endif pcxt
46
64d06f2f
KM
47/*
48 * Define FBITS for machines with several MB of physical memory, to use
49 * table lookup for (b <= FBITS). If FBITS is made too large, performance
50 * will decrease due to increased swapping/paging. Since the program minus
51 * the fast lookup table is about a half Meg, we can allocate the rest of
52 * available physical memory to the fast lookup table.
53 *
54 * If FBITS is set to 12, a 2 MB array is allocated, but only 1 MB is
55 * addressed for parity-free input (i.e. text).
56 *
57 * FBITS=10 yields 1/2 meg lookup table + 4K code memory
58 * FBITS=11 yields 1 meg lookup table + 8K code memory
59 * FBITS=12 yields 2 meg lookup table + 16K code memory
60 * FBITS=13 yields 4 meg lookup table + 32K code memory
61 *
62 */
63
64#ifdef USERMEM
65# if USERMEM >= (2621440+SACREDMEM)
66# if USERMEM >= (4718592+SACREDMEM)
67# define FBITS 13
68# define PBITS 16
69# else 2.5M <= USERMEM < 4.5M
70# define FBITS 12
71# define PBITS 16
72# endif USERMEM <=> 4.5M
73# else USERMEM < 2.5M
74# if USERMEM >= (1572864+SACREDMEM)
75# define FBITS 11
76# define PBITS 16
77# else USERMEM < 1.5M
78# if USERMEM >= (1048576+SACREDMEM)
79# define FBITS 10
80# define PBITS 16
81# else USERMEM < 1M
82# if USERMEM >= (631808+SACREDMEM)
83# define PBITS 16
84# else
85# if USERMEM >= (329728+SACREDMEM)
86# define PBITS 15
87# else
88# if USERMEM >= (178176+SACREDMEM)
89# define PBITS 14
90# else
91# if USERMEM >= (99328+SACREDMEM)
92# define PBITS 13
93# else
94# define PBITS 12
95# endif
96# endif
97# endif
98# endif
99# undef USERMEM
100# endif USERMEM <=> 1M
101# endif USERMEM <=> 1.5M
102# endif USERMEM <=> 2.5M
103#endif USERMEM
104
105#ifdef PBITS /* Preferred BITS for this memory size */
106# ifndef BITS
107# define BITS PBITS
108# endif BITS
109#endif PBITS
110
111#if BITS == 16
112# define HSIZE 69001 /* 95% occupancy */
113#endif
114#if BITS == 15
115# define HSIZE 35023 /* 94% occupancy */
116#endif
117#if BITS == 14
118# define HSIZE 18013 /* 91% occupancy */
119#endif
120#if BITS == 13
121# define HSIZE 9001 /* 91% occupancy */
122#endif
a2de8fe3 123#if BITS <= 12
64d06f2f
KM
124# define HSIZE 5003 /* 80% occupancy */
125#endif
64d06f2f
KM
126/* BITS < 9 will cause an error */
127
128/*
129 * a code_int must be able to hold 2**BITS values of type int, and also -1
130 */
131#if BITS > 15
132typedef long int code_int;
133#else
134typedef int code_int;
135#endif
136
137#ifdef interdata
138typedef unsigned long int count_int;
139typedef unsigned short int count_short;
140#else
141typedef long int count_int;
142#endif
143
144#ifdef NO_UCHAR
145 typedef char char_type;
146#else UCHAR
147 typedef unsigned char char_type;
148#endif UCHAR
149char_type magic_header[] = { "\037\235" }; /* 1F 9D */
150
151/* Defines for third byte of header */
152#define BIT_MASK 0x1f
153#define BLOCK_MASK 0x80
154/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
155 a fourth header byte (for expansion).
156*/
157#define INIT_BITS 9 /* initial number of bits/code */
158
159/*
a2de8fe3 160 * compress.c - File compression ala IEEE Computer, June 1984.
64d06f2f
KM
161 *
162 * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
163 * Jim McKie (decvax!mcvax!jim)
164 * Steve Davies (decvax!vax135!petsd!peora!srd)
165 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
166 * James A. Woods (decvax!ihnp4!ames!jaw)
167 * Joe Orost (decvax!vax135!petsd!joe)
168 *
a2de8fe3 169 * $Header: compress.c,v 3.2 85/06/06 21:53:24 jaw Exp $
64d06f2f 170 * $Log: compress.c,v $
a2de8fe3
KM
171 * Revision 3.2 85/06/06 21:53:24 jaw
172 * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
173 * Default to "quiet" output (no compression statistics).
174 *
175 * Revision 3.1 85/05/12 18:56:13 jaw
176 * Integrate decompress() stack speedups (from early pointer mods by McKie).
177 * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics
178 * of SVR2 'pack'. Streamline block-compress table clear logic. Increase
179 * output byte count by magic number size.
180 *
64d06f2f
KM
181 * Revision 3.0 84/11/27 11:50:00 petsd!joe
182 * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled
a2de8fe3 183 * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used
64d06f2f
KM
184 * unsigned compares on Perkin-Elmer. Fixed foreground check.
185 *
186 * Revision 2.7 84/11/16 19:35:39 ames!jaw
187 * Cache common hash codes based on input statistics; this improves
188 * performance for low-density raster images. Pass on #ifdef bundle
189 * from Turkowski.
190 *
191 * Revision 2.6 84/11/05 19:18:21 ames!jaw
192 * Vary size of hash tables to reduce time for small files.
193 * Tune PDP-11 hash function.
194 *
195 * Revision 2.5 84/10/30 20:15:14 ames!jaw
196 * Junk chaining; replace with the simpler (and, on the VAX, faster)
197 * double hashing, discussed within. Make block compression standard.
198 *
199 * Revision 2.4 84/10/16 11:11:11 ames!jaw
200 * Introduce adaptive reset for block compression, to boost the rate
201 * another several percent. (See mailing list notes.)
202 *
203 * Revision 2.3 84/09/22 22:00:00 petsd!joe
204 * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next.
205 * Bug fix for last bits. Changed fwrite to putchar loop everywhere.
206 *
207 * Revision 2.2 84/09/18 14:12:21 ames!jaw
208 * Fold in news changes, small machine typedef from thomas,
209 * #ifdef interdata from joe.
210 *
211 * Revision 2.1 84/09/10 12:34:56 ames!jaw
212 * Configured fast table lookup for 32-bit machines.
213 * This cuts user time in half for b <= FBITS, and is useful for news batching
214 * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and
215 * added signal catcher [plus beef in writeerr()] to delete effluvia.
216 *
217 * Revision 2.0 84/08/28 22:00:00 petsd!joe
218 * Add check for foreground before prompting user. Insert maxbits into
219 * compressed file. Force file being uncompressed to end with ".Z".
220 * Added "-c" flag and "zcat". Prepared for release.
221 *
222 * Revision 1.10 84/08/24 18:28:00 turtlevax!ken
223 * Will only compress regular files (no directories), added a magic number
224 * header (plus an undocumented -n flag to handle old files without headers),
225 * added -f flag to force overwriting of possibly existing destination file,
226 * otherwise the user is prompted for a response. Will tack on a .Z to a
227 * filename if it doesn't have one when decompressing. Will only replace
228 * file if it was compressed.
229 *
230 * Revision 1.9 84/08/16 17:28:00 turtlevax!ken
231 * Removed scanargs(), getopt(), added .Z extension and unlimited number of
232 * filenames to compress. Flags may be clustered (-Ddvb12) or separated
233 * (-D -d -v -b 12), or combination thereof. Modes and other status is
234 * copied with copystat(). -O bug for 4.2 seems to have disappeared with
235 * 1.8.
236 *
237 * Revision 1.8 84/08/09 23:15:00 joe
238 * Made it compatible with vax version, installed jim's fixes/enhancements
239 *
240 * Revision 1.6 84/08/01 22:08:00 joe
241 * Sped up algorithm significantly by sorting the compress chain.
242 *
243 * Revision 1.5 84/07/13 13:11:00 srd
244 * Added C version of vax asm routines. Changed structure to arrays to
245 * save much memory. Do unsigned compares where possible (faster on
246 * Perkin-Elmer)
247 *
248 * Revision 1.4 84/07/05 03:11:11 thomas
249 * Clean up the code a little and lint it. (Lint complains about all
250 * the regs used in the asm, but I'm not going to "fix" this.)
251 *
252 * Revision 1.3 84/07/05 02:06:54 thomas
253 * Minor fixes.
254 *
255 * Revision 1.2 84/07/05 00:27:27 thomas
256 * Add variable bit length output.
257 *
258 */
259#ifndef lint
a2de8fe3 260static char rcs_ident[] = "$Header: compress.c,v 3.1 85/05/12 18:56:13 jaw Exp $";
64d06f2f
KM
261#endif !lint
262
263#include <stdio.h>
264#include <ctype.h>
265#include <signal.h>
266#include <sys/types.h>
267#include <sys/stat.h>
268
269#define ARGVAL() (*++(*argv) || (--argc && *++argv))
270
271int n_bits; /* number of bits/code */
272int maxbits = BITS; /* user settable max # bits/code */
273code_int maxcode; /* maximum code, given n_bits */
274code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */
275#ifdef COMPATIBLE /* But wrong! */
276# define MAXCODE(n_bits) (1 << (n_bits) - 1)
277#else COMPATIBLE
278# define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
279#endif COMPATIBLE
280
281/*
282 * One code could conceivably represent (1<<BITS) characters, but
283 * to get a code of length N requires an input string of at least
284 * N*(N-1)/2 characters. With 5000 chars in the stack, an input
285 * file would have to contain a 25Mb string of a single character.
286 * This seems unlikely.
287 */
a2de8fe3 288#ifdef SHORT_INT
64d06f2f 289# define MAXSTACK 5000 /* size of output stack */
a2de8fe3 290#else !SHORT_INT
64d06f2f 291# define MAXSTACK 8000 /* size of output stack */
a2de8fe3 292#endif !SHORT_INT
64d06f2f
KM
293
294count_int htab [HSIZE];
295unsigned short codetab [HSIZE];
296code_int hsize = HSIZE; /* for dynamic table sizing */
297count_int fsize;
298
299#define tab_prefix codetab /* prefix code for this entry */
300char_type tab_suffix[1<<BITS]; /* last char in this entry */
301
302#ifdef USERMEM
303short ftable [(1 << FBITS) * 256];
304count_int fcodemem [1 << FBITS];
305#endif USERMEM
306
307code_int free_ent = 0; /* first unused entry */
308int exit_stat = 0;
309
310code_int getcode();
311
312Usage() {
313#ifdef DEBUG
a2de8fe3 314fprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n");
64d06f2f
KM
315}
316int debug = 0;
317#else DEBUG
a2de8fe3 318fprintf(stderr,"Usage: compress [-dfvc] [-b maxbits] [file ...]\n");
64d06f2f
KM
319}
320#endif DEBUG
a2de8fe3 321int nomagic = 0; /* Use a 3-byte magic number header, unless old file */
64d06f2f 322int zcat_flg = 0; /* Write output on stdout, suppress messages */
a2de8fe3 323int quiet = 1; /* don't tell me about compression */
64d06f2f
KM
324
325/*
326 * block compression parameters -- after all codes are used up,
327 * and compression rate changes, start over.
328 */
329int block_compress = BLOCK_MASK;
330int clear_flg = 0;
331double ratio = 0.0; /* compression ratio for last block */
332#define CHECK_GAP 10000 /* ratio check interval */
333count_int checkpoint = CHECK_GAP;
334/*
335 * the next two codes should not be changed lightly, as they must not
336 * lie within the contiguous general code space.
337 */
338#define FIRST 257 /* first free entry */
339#define CLEAR 256 /* table clear output code */
340
341int force = 0;
342char ofname [100];
343#ifdef DEBUG
344int verbose = 0;
345#endif DEBUG
346int (*bgnd_flag)();
347
a2de8fe3
KM
348int nfiles = 0; /* number of files processed */
349
64d06f2f
KM
350/*****************************************************************
351 * TAG( main )
352 *
353 * Algorithm from "A Technique for High Performance Data Compression",
354 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
355 *
a2de8fe3 356 * Usage: compress [-dfvc] [-b bits] [file ...]
64d06f2f
KM
357 * Inputs:
358 * -d: If given, decompression is done instead.
359 *
360 * -c: Write output on stdout, don't remove original.
361 *
362 * -b: Parameter limits the max number of bits/code.
363 *
364 * -f: Forces output file to be generated, even if one already
a2de8fe3
KM
365 * exists, and even if no space is saved by compressing.
366 * If -f is not used, the user will be prompted if stdin is
367 * a tty, otherwise, the output file will not be overwritten.
64d06f2f 368 *
a2de8fe3 369 * -v: Write compression statistics
64d06f2f
KM
370 *
371 * file ...: Files to be compressed. If none specified, stdin
372 * is used.
373 * Outputs:
374 * file.Z: Compressed form of file with same mode, owner, and utimes
375 * or stdout (if stdin used as input)
376 *
377 * Assumptions:
378 * When filenames are given, replaces with the compressed version
a2de8fe3 379 * (.Z suffix) only if the file decreases in size.
64d06f2f
KM
380 * Algorithm:
381 * Modified Lempel-Ziv method (LZW). Basically finds common
382 * substrings and replaces them with a variable size code. This is
383 * deterministic, and can be done on the fly. Thus, the decompression
a2de8fe3 384 * procedure needs no input table, but tracks the way the table was built.
64d06f2f 385 */
64d06f2f
KM
386main( argc, argv )
387register int argc; char **argv;
388{
389 int do_decomp = 0;
390 int overwrite = 0; /* Do not overwrite unless given -f flag */
391 char tempname[100];
392 char **filelist, **fileptr;
a2de8fe3 393 char *cp, *rindex(), *malloc();
64d06f2f
KM
394 struct stat statbuf;
395 extern onintr();
396
397
398 if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN )
399 signal ( SIGINT, onintr );
400
401#ifdef COMPATIBLE
402 nomagic = 1; /* Original didn't have a magic number */
403#endif COMPATIBLE
404
405 filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
406 *filelist = NULL;
407
408 if((cp = rindex(argv[0], '/')) != 0) {
409 cp++;
410 } else {
411 cp = argv[0];
412 }
413 if(strcmp(cp, "uncompress") == 0) {
414 do_decomp = 1;
415 } else if(strcmp(cp, "zcat") == 0) {
416 do_decomp = 1;
417 zcat_flg = 1;
418 }
419
420#ifdef BSD4_2
421 /* 4.2BSD dependent - take it out if not */
422 setlinebuf( stderr );
423#endif BSD4_2
424
425 /* Argument Processing
426 * All flags are optional.
427 * -D => debug
a2de8fe3 428 * -V => debug verbose
64d06f2f 429 * -d => do_decomp
a2de8fe3 430 * -v => unquiet
64d06f2f
KM
431 * -f => force overwrite of output file
432 * -n => no header: useful to uncompress old files
433 * -b maxbits => maxbits. If -b is specified, then maxbits MUST be
434 * given also.
435 * -c => cat all output to stdout
a2de8fe3 436 * -C => generate output compatible with compress 2.0.
64d06f2f
KM
437 * if a string is left, must be an input filename.
438 */
439 for (argc--, argv++; argc > 0; argc--, argv++) {
440 if (**argv == '-') { /* A flag argument */
441 while (*++(*argv)) { /* Process all flags in this arg */
442 switch (**argv) {
443#ifdef DEBUG
444 case 'D':
445 debug = 1;
446 break;
a2de8fe3 447 case 'V':
64d06f2f
KM
448 verbose = 1;
449 break;
450#endif DEBUG
a2de8fe3
KM
451 case 'v':
452 quiet = 0;
453 break;
64d06f2f
KM
454 case 'd':
455 do_decomp = 1;
456 break;
457 case 'f':
a2de8fe3 458 case 'F':
64d06f2f 459 overwrite = 1;
a2de8fe3 460 force = 1;
64d06f2f
KM
461 break;
462 case 'n':
463 nomagic = 1;
464 break;
465 case 'C':
466 block_compress = 0;
467 break;
468 case 'b':
469 if (!ARGVAL()) {
470 fprintf(stderr, "Missing maxbits\n");
471 Usage();
472 exit(1);
473 }
474 maxbits = atoi(*argv);
475 goto nextarg;
476 case 'c':
477 zcat_flg = 1;
478 break;
479 case 'q':
480 quiet = 1;
481 break;
64d06f2f
KM
482 default:
483 fprintf(stderr, "Unknown flag: '%c'; ", **argv);
484 Usage();
485 exit(1);
486 }
487 }
488 }
489 else { /* Input file name */
490 *fileptr++ = *argv; /* Build input file list */
491 *fileptr = NULL;
492 /* goto nextarg; */
493 }
494 nextarg: continue;
495 }
496
497 if(maxbits < INIT_BITS) maxbits = INIT_BITS;
498 if (maxbits > BITS) maxbits = BITS;
499 maxmaxcode = 1 << maxbits;
500
501 if (*filelist != NULL) {
502 for (fileptr = filelist; *fileptr; fileptr++) {
503 exit_stat = 0;
504 if (do_decomp != 0) { /* DECOMPRESSION */
505 /* Check for .Z suffix */
506 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
507 /* No .Z: tack one on */
508 strcpy(tempname, *fileptr);
509 strcat(tempname, ".Z");
510 *fileptr = tempname;
511 }
512 /* Open input file */
513 if ((freopen(*fileptr, "r", stdin)) == NULL) {
514 perror(*fileptr); continue;
515 }
516 /* Check the magic number */
517 if (nomagic == 0) {
518 if ((getchar() != (magic_header[0] & 0xFF))
519 || (getchar() != (magic_header[1] & 0xFF))) {
520 fprintf(stderr, "%s: not in compressed format\n",
521 *fileptr);
522 continue;
523 }
524 maxbits = getchar(); /* set -b from file */
525 block_compress = maxbits & BLOCK_MASK;
526 maxbits &= BIT_MASK;
527 maxmaxcode = 1 << maxbits;
528 if(maxbits > BITS) {
529 fprintf(stderr,
530 "%s: compressed with %d bits, can only handle %d bits\n",
531 *fileptr, maxbits, BITS);
532 continue;
533 }
534 }
535 /* Generate output filename */
536 strcpy(ofname, *fileptr);
537 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */
538 } else { /* COMPRESSION */
539 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
a2de8fe3 540 fprintf(stderr, "%s: already has .Z suffix -- no change\n",
64d06f2f
KM
541 *fileptr);
542 continue;
543 }
544 /* Open input file */
545 if ((freopen(*fileptr, "r", stdin)) == NULL) {
546 perror(*fileptr); continue;
547 }
548 stat ( *fileptr, &statbuf );
549 fsize = (long) statbuf.st_size;
550 /*
551 * tune hash table size for small files -- ad hoc
552 */
a2de8fe3 553 hsize = HSIZE;
64d06f2f 554 if ( fsize < (1 << 12) )
a2de8fe3 555 hsize = min ( 5003, HSIZE );
64d06f2f 556 else if ( fsize < (1 << 13) )
a2de8fe3 557 hsize = min ( 9001, HSIZE );
64d06f2f 558 else if ( fsize < (1 << 14) )
a2de8fe3 559 hsize = min ( 18013, HSIZE );
64d06f2f 560 else if ( fsize < (1 << 15) )
a2de8fe3 561 hsize = min ( 35023, HSIZE );
64d06f2f 562 else if ( fsize < 47000 )
a2de8fe3
KM
563 hsize = min ( 50021, HSIZE );
564
64d06f2f
KM
565 /* Generate output filename */
566 strcpy(ofname, *fileptr);
567#ifndef BSD4_2 /* Short filenames */
568 if ((cp=rindex(ofname,'/')) != NULL) cp++;
569 else cp = ofname;
570 if (strlen(cp) > 12) {
571 fprintf(stderr,"%s: filename too long to tack on .Z\n",cp);
572 continue;
573 }
574#endif BSD4_2 /* Long filenames allowed */
575 strcat(ofname, ".Z");
576 }
577 /* Check for overwrite of existing file */
578 if (overwrite == 0 && zcat_flg == 0) {
579 if (stat(ofname, &statbuf) == 0) {
580 char response[2];
581 response[0] = 'n';
582 fprintf(stderr, "%s already exists;", ofname);
583 if (foreground()) {
a2de8fe3 584 fprintf(stderr, " do you wish to overwrite %s (y or n)? ",
64d06f2f
KM
585 ofname);
586 fflush(stderr);
587 read(2, response, 2);
588 while (response[1] != '\n') {
589 if (read(2, response+1, 1) < 0) { /* Ack! */
590 perror("stderr"); break;
591 }
592 }
593 }
594 if (response[0] != 'y') {
595 fprintf(stderr, "\tnot overwritten\n");
596 continue;
597 }
598 }
599 }
600 if(zcat_flg == 0) { /* Open output file */
601 if (freopen(ofname, "w", stdout) == NULL) {
602 perror(ofname);
603 continue;
604 }
605 if(!quiet)
606 fprintf(stderr, "%s: ", *fileptr);
607 }
608
609 /* Actually do the compression/decompression */
610 if (do_decomp == 0) compress();
611#ifndef DEBUG
612 else decompress();
613#else DEBUG
614 else if (debug == 0) decompress();
615 else printcodes();
616 if (verbose) dump_tab();
617#endif DEBUG
618 if(zcat_flg == 0) {
619 copystat(*fileptr, ofname); /* Copy stats */
620 if(exit_stat || (!quiet))
621 putc('\n', stderr);
622 }
623 }
624 } else { /* Standard input */
625 if (do_decomp == 0) {
626 compress();
627 if(!quiet)
628 putc('\n', stderr);
629 } else {
630 /* Check the magic number */
631 if (nomagic == 0) {
632 if ((getchar()!=(magic_header[0] & 0xFF))
633 || (getchar()!=(magic_header[1] & 0xFF))) {
634 fprintf(stderr, "stdin: not in compressed format\n");
635 exit(1);
636 }
637 maxbits = getchar(); /* set -b from file */
638 block_compress = maxbits & BLOCK_MASK;
639 maxbits &= BIT_MASK;
640 maxmaxcode = 1 << maxbits;
641 fsize = 100000; /* assume stdin large for USERMEM */
642 if(maxbits > BITS) {
643 fprintf(stderr,
644 "stdin: compressed with %d bits, can only handle %d bits\n",
645 maxbits, BITS);
646 exit(1);
647 }
648 }
649#ifndef DEBUG
650 decompress();
651#else DEBUG
652 if (debug == 0) decompress();
653 else printcodes();
654 if (verbose) dump_tab();
655#endif DEBUG
656 }
657 }
658 exit(exit_stat);
659}
660
661static int offset;
662long int in_count = 1; /* length of input */
663long int bytes_out; /* length of compressed output */
664long int out_count = 0; /* # of codes output (for debugging) */
665
666#define HOG_CHECK ((count_int) 2000) /* Number of chars to read b4 check */
667#define MAX_CACHE ((count_int) 1<<BITS) /* Next line is this constant too */
668unsigned short hashcache [1<<BITS]; /* common hash short circuit cache */
669count_int cfreq [256]; /* character counts */
670#ifndef vax
671 char chog; /* most common character from input */
672# define CHOG ' ' /* Assume space is most frequent */
673#else
674 int chog; /* char arith slow on VAX */
675# define CHOG (int) ' ' /* Assume space is most frequent */
676#endif
677int cstat_flg = 0; /* on after determining char hog */
678
679/*
680 * compress stdin to stdout
681 *
682 * Algorithm: on large machines, for maxbits <= FBITS, use fast direct table
683 * lookup on the prefix code / next character combination. For smaller code
684 * size, use open addressing modular division double hashing (no chaining), ala
685 * Knuth vol. 3, sec. 6.4 Algorithm D, along with G. Knott's relatively-prime
686 * secondary probe. Do block compression with an adaptive reset, whereby the
687 * code table is cleared when the compression ratio decreases, but after the
688 * table fills. The variable-length output codes are re-sized at this point,
689 * and a special CLEAR code is generated for the decompressor. For the
690 * megamemory version, the sparse array is cleared indirectly through a
691 * "shadow" output code history. Late additions: for the hashing code,
692 * construct the table according to file size for noticeable speed improvement
693 * on small files. Also detect and cache codes associated with the most
694 * common character to bypass hash calculation on these codes (a characteristic
695 * of highly-compressable raster images). Please direct questions about this
696 * implementation to ames!jaw.
697 */
698
699
700compress() {
701 register long fcode;
702 register code_int i = 0;
703 register int c;
704 register code_int ent;
705 register int disp;
706 register code_int hsize_reg;
707
708#ifndef COMPATIBLE
709 if (nomagic == 0) {
710 putchar(magic_header[0]); putchar(magic_header[1]);
711 putchar((char)(maxbits | block_compress));
712 }
713#endif COMPATIBLE
714
715 offset = 0;
a2de8fe3 716 bytes_out = 3; /* includes 3-byte header mojo */
64d06f2f
KM
717 out_count = 0;
718 clear_flg = 0;
719 ratio = 0.0;
720 in_count = 1;
721 checkpoint = CHECK_GAP;
722 maxcode = MAXCODE(n_bits = INIT_BITS);
723 free_ent = ((block_compress) ? FIRST : 256 );
724 ent = getchar ();
725
726#ifdef USERMEM
727if ( maxbits <= FBITS && (fsize >= 30000) ) { /* use hashing on small files */
728
a2de8fe3
KM
729 if ( nfiles++ > 0 ) /* clear table between files */
730 cl_sparse ();
731
64d06f2f
KM
732 while ( (c = getchar()) != (unsigned) EOF ) {
733 in_count++;
734 fcode = (long) (((long) c << maxbits) + ent);
a2de8fe3 735 if ( ftable [fcode] != 0 ) /* test for code in "string" table */
64d06f2f
KM
736 ent = ftable [fcode];
737 else {
738 output ( (code_int) ent );
739 out_count++;
740 ent = c;
741 if ( free_ent >= maxmaxcode ) {
742 if ( (count_int)in_count < checkpoint || (!block_compress) )
743 continue;
744 else {
a2de8fe3 745 cl_block ();
64d06f2f
KM
746 i = 0;
747 }
748 } else { /* put code in table */
749 ftable [fcode] = (short) free_ent++;
a2de8fe3 750 fcodemem [i++] = fcode; /* memorize for block compression */
64d06f2f
KM
751 }
752 }
753 }
754 goto fin;
755}
756#endif USERMEM
757
758 chog = CHOG; /* assumed character for the hog */
759 cstat_flg = 0;
760 hsize_reg = hsize;
a2de8fe3 761 cl_hash( (count_int) hsize_reg); /* clear hash tables */
64d06f2f
KM
762
763 while ( (c = getchar()) != (unsigned) EOF ) {
764 in_count++;
765 if ( cstat_flg == 0 ) {
766 cfreq [c]++; /* gather frequencies at start of input */
767 if ( (count_int)in_count > HOG_CHECK ) {
768 cstat_flg = 1;
769 chog = hogtally(); /* compute char hog */
770 if(chog != CHOG) /* fixup for wrong assumption */
a2de8fe3 771 cl_cache( (count_int) free_ent );
64d06f2f
KM
772 }
773 }
774 if ( c == chog )
775 if ( (i = hashcache [ent]) ) { /* cache -> code */
776 ent = i;
777 continue;
778 }
779 fcode = (long) (((long) c << maxbits) + ent);
a2de8fe3 780#ifdef SHORT_INT
64d06f2f 781 i = (((c + 12347) * ent) & 077777) % HSIZE; /* avoid 'lrem' call */
a2de8fe3 782#else !SHORT_INT
64d06f2f 783 i = fcode % hsize_reg; /* division hashing */
a2de8fe3 784#endif SHORT_INT
64d06f2f
KM
785
786 if ( htab [i] == fcode ) {
787 ent = codetab [i];
788 continue;
789 } else if ( (long)htab [i] < 0 ) /* empty slot */
790 goto nomatch;
791 disp = hsize_reg - i; /* secondary hash (G. Knott) */
792 if ( i == 0 )
793 disp = 1;
794probe:
795 if ( (i -= disp) < 0 )
796 i += hsize_reg;
797
798 if ( htab [i] == fcode ) {
799 ent = codetab [i];
800 continue;
801 }
802 if ( (long)htab [i] > 0 )
803 goto probe;
804nomatch:
805 output ( (code_int) ent );
806 out_count++;
807#ifdef interdata
808 if ( (unsigned) free_ent < (unsigned) maxmaxcode) {
809#else
810 if ( free_ent < maxmaxcode ) {
811#endif
812 if ( c == chog ) /* code -> cache */
813 hashcache [ent] = free_ent;
814 /* code -> hashtable */
815 codetab [i] = free_ent++;
816 htab [i] = fcode;
817 }
818 else if ( (count_int)in_count >= checkpoint && block_compress )
a2de8fe3 819 cl_block ();
64d06f2f
KM
820 ent = c;
821 }
822fin:
823 /*
824 * Put out the final code.
825 */
826 output( (code_int)ent );
827 out_count++;
828 output( (code_int)-1 );
829
830 /*
831 * Print out stats on stderr
832 */
833 if(zcat_flg == 0 && !quiet) {
834#ifdef DEBUG
835 fprintf( stderr,
836 "%ld chars in, %ld codes (%ld bytes) out, compression factor %g\n",
837 in_count, out_count, bytes_out,
838 (double)in_count / (double)bytes_out );
839 fprintf( stderr, "\tCompression as in compact: %5.2f%%\n",
840 100.0 * ( in_count - bytes_out ) / (double) in_count );
841 fprintf( stderr, "\tLargest code was %d (%d bits)\n", free_ent - 1, n_bits );
842#else DEBUG
843 fprintf( stderr, "Compression: %5.2f%%",
844 100.0 * ( in_count - bytes_out ) / (double) in_count );
845#endif DEBUG
846 }
847 if(bytes_out > in_count) /* exit(2) if no savings */
848 exit_stat = 2;
849 return;
850}
851
852/*****************************************************************
853 * TAG( output )
854 *
855 * Output the given code.
856 * Inputs:
857 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
858 * that n_bits =< (long)wordsize - 1.
859 * Outputs:
860 * Outputs code to the file.
861 * Assumptions:
862 * Chars are 8 bits long.
863 * Algorithm:
864 * Maintain a BITS character long buffer (so that 8 codes will
865 * fit in it exactly). Use the VAX insv instruction to insert each
866 * code in turn. When the buffer fills up empty it and start over.
867 */
868
869static char buf[BITS];
870
871#ifndef vax
872char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
873char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
874#endif !vax
875
876output( code )
877code_int code;
878{
879#ifdef DEBUG
880 static int col = 0;
881#endif DEBUG
882
883 /*
884 * On the VAX, it is important to have the register declarations
885 * in exactly the order given, or the asm will break.
886 */
887 register int r_off = offset, bits= n_bits;
888 register char * bp = buf;
889
890 if ( code >= 0 ) {
891#ifdef vax
a2de8fe3 892 /* VAX DEPENDENT!! Implementation on other machines may be difficult.
64d06f2f
KM
893 *
894 * Translation: Insert BITS bits from the argument starting at
895 * offset bits from the beginning of buf.
896 */
897 0; /* C compiler bug ?? */
898 asm( "insv 4(ap),r11,r10,(r9)" );
899#else not a vax
a2de8fe3
KM
900/*
901 * byte/bit numbering on the VAX is simulated by the following code
902 */
64d06f2f
KM
903 /*
904 * Get to the first byte.
905 */
906 bp += (r_off >> 3);
907 r_off &= 7;
908 /*
909 * Since code is always >= 8 bits, only need to mask the first
910 * hunk on the left.
911 */
912 *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
913 bp++;
914 bits -= (8 - r_off);
915 code >>= 8 - r_off;
916 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
917 if ( bits >= 8 ) {
918 *bp++ = code;
919 code >>= 8;
920 bits -= 8;
921 }
922 /* Last bits. */
923 if(bits)
924 *bp = code;
925#endif vax
926 offset += n_bits;
927 if ( offset == (n_bits << 3) ) {
928 bp = buf;
929 bits = n_bits;
930 bytes_out += bits;
931 do
932 putchar(*bp++);
933 while(--bits);
934 if (ferror(stdout))
935 writeerr();
936 offset = 0;
937 }
938#ifdef DEBUG
939 if ( verbose )
940 fprintf( stderr, "%5d%c", code,
941 (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
942#endif DEBUG
943
944 /*
945 * If the next entry is going to be too big for the code size,
946 * then increase it, if possible.
947 */
948 if ( free_ent > maxcode || (clear_flg > 0))
949 {
950 /*
951 * Write the whole buffer, because the input side won't
952 * discover the size increase until after it has read it.
953 */
954 if ( offset > 0 ) {
955 if( fwrite( buf, 1, n_bits, stdout ) != n_bits)
956 writeerr();
957 bytes_out += n_bits;
958 }
959 offset = 0;
960
961 if ( clear_flg ) {
962 maxcode = MAXCODE (n_bits = INIT_BITS);
963 clear_flg = 0;
964 }
965 else {
966 n_bits++;
967 if ( n_bits == maxbits )
968 maxcode = maxmaxcode;
969 else
970 maxcode = MAXCODE(n_bits);
971 }
972#ifdef DEBUG
973 if ( debug ) {
974 fprintf( stderr, "\nChange to %d bits\n", n_bits );
975 col = 0;
976 }
977#endif DEBUG
978 }
979 } else {
980 /*
981 * At EOF, write the rest of the buffer.
982 */
983 if ( offset > 0 )
984 fwrite( buf, 1, (offset + 7) / 8, stdout );
985 bytes_out += (offset + 7) / 8;
986 offset = 0;
987 fflush( stdout );
988#ifdef DEBUG
989 if ( verbose )
990 fprintf( stderr, "\n" );
991#endif DEBUG
992 if( ferror( stdout ) )
993 writeerr();
994 }
995}
996
a2de8fe3
KM
997#ifdef SMALL_STACK
998 char_type stack[MAXSTACK];
999#endif
1000
64d06f2f 1001decompress() {
a2de8fe3 1002 register char_type *stackp;
64d06f2f 1003 register int finchar;
a2de8fe3
KM
1004 register code_int code, oldcode, incode;
1005
1006#ifndef SMALL_STACK
1007 char_type stack[MAXSTACK];
1008#endif
64d06f2f
KM
1009
1010 /*
1011 * As above, initialize the first 256 entries in the table.
1012 */
1013 maxcode = MAXCODE(n_bits = INIT_BITS);
1014 for ( code = 255; code >= 0; code-- ) {
1015 tab_prefix[code] = 0;
1016 tab_suffix[code] = (char_type)code;
1017 }
1018 free_ent = ((block_compress) ? FIRST : 256 );
1019
1020 finchar = oldcode = getcode();
1021 putchar( (char)finchar ); /* first code must be 8 bits = char */
a2de8fe3 1022 stackp = &stack[0];
64d06f2f
KM
1023
1024 while ( (code = getcode()) != -1 ) {
1025
1026 if ( (code == CLEAR) && block_compress ) {
a2de8fe3
KM
1027 for ( code = 255; code >= 0; code-- )
1028 tab_prefix[code] = 0;
64d06f2f
KM
1029 clear_flg = 1;
1030 free_ent = FIRST - 1;
1031 if ( (code = getcode ()) == -1 ) /* O, untimely death! */
1032 break;
1033 }
1034 incode = code;
1035 /*
1036 * Special case for KwKwK string.
1037 */
1038 if ( code >= free_ent ) {
a2de8fe3 1039 *stackp++ = finchar;
64d06f2f
KM
1040 code = oldcode;
1041 }
1042
1043 /*
1044 * Generate output characters in reverse order
1045 */
1046#ifdef interdata
1047 while ( ((unsigned long)code) >= ((unsigned long)256) ) {
1048#else !interdata
1049 while ( code >= 256 ) {
1050#endif interdata
a2de8fe3 1051 *stackp++ = tab_suffix[code];
64d06f2f
KM
1052 code = tab_prefix[code];
1053 }
a2de8fe3 1054 *stackp++ = finchar = tab_suffix[code];
64d06f2f
KM
1055
1056 /*
1057 * And put them out in forward order
1058 */
a2de8fe3
KM
1059 while ( --stackp > &stack[0] )
1060 putchar ( *stackp );
1061 putchar ( *stackp );
64d06f2f
KM
1062 if (ferror(stdout))
1063 writeerr ( );
64d06f2f
KM
1064
1065 /*
1066 * Generate the new entry.
1067 */
1068 if ( (code=free_ent) < maxmaxcode ) {
1069 tab_prefix[code] = (unsigned short)oldcode;
1070 tab_suffix[code] = finchar;
1071 free_ent = code+1;
1072 }
1073 /*
1074 * Remember previous code.
1075 */
1076 oldcode = incode;
1077 }
1078 fflush( stdout );
1079 if(ferror(stdout))
1080 writeerr();
1081}
1082
64d06f2f
KM
1083/*****************************************************************
1084 * TAG( getcode )
1085 *
1086 * Read one code from the standard input. If EOF, return -1.
1087 * Inputs:
1088 * stdin
1089 * Outputs:
1090 * code or -1 is returned.
1091 */
1092
1093code_int
1094getcode() {
1095 /*
1096 * On the VAX, it is important to have the register declarations
1097 * in exactly the order given, or the asm will break.
1098 */
1099 register code_int code;
1100 static int offset = 0, size = 0;
1101 static char_type buf[BITS];
1102 register int r_off, bits;
1103 register char_type *bp = buf;
1104
1105 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
1106 /*
1107 * If the next entry will be too big for the current code
1108 * size, then we must increase the size. This implies reading
1109 * a new buffer full, too.
1110 */
1111 if ( free_ent > maxcode ) {
1112 n_bits++;
1113 if ( n_bits == maxbits )
1114 maxcode = maxmaxcode; /* won't get any bigger now */
1115 else
1116 maxcode = MAXCODE(n_bits);
1117 }
1118 if ( clear_flg > 0) {
1119 maxcode = MAXCODE (n_bits = INIT_BITS);
1120 clear_flg = 0;
1121 }
1122 size = fread( buf, 1, n_bits, stdin );
1123 if ( size <= 0 )
1124 return -1; /* end of file */
1125 offset = 0;
1126 /* Round size down to integral number of codes */
1127 size = (size << 3) - (n_bits - 1);
1128 }
1129 r_off = offset;
1130 bits = n_bits;
1131#ifdef vax
1132 asm( "extzv r10,r9,(r8),r11" );
1133#else not a vax
1134 /*
1135 * Get to the first byte.
1136 */
1137 bp += (r_off >> 3);
1138 r_off &= 7;
1139 /* Get first part (low order bits) */
1140#ifdef NO_UCHAR
1141 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff;
1142#else NO_UCHAR
1143 code = (*bp++ >> r_off);
1144#endif NO_UCHAR
1145 bits -= (8 - r_off);
1146 r_off = 8 - r_off; /* now, offset into code word */
1147 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1148 if ( bits >= 8 ) {
1149#ifdef NO_UCHAR
1150 code |= (*bp++ & 0xff) << r_off;
1151#else NO_UCHAR
1152 code |= *bp++ << r_off;
1153#endif NO_UCHAR
1154 r_off += 8;
1155 bits -= 8;
1156 }
1157 /* high order bits. */
1158 code |= (*bp & rmask[bits]) << r_off;
1159#endif vax
1160 offset += n_bits;
1161
1162 return code;
1163}
1164
1165char *
1166rindex(s, c) /* For those who don't have it in libc.a */
1167register char *s, c;
1168{
1169 char *p;
1170 for (p = NULL; *s; s++)
1171 if (*s == c)
1172 p = s;
1173 return(p);
1174}
1175
1176#ifdef DEBUG
1177printcodes()
1178{
1179 /*
a2de8fe3 1180 * Just print out codes from input file. For debugging.
64d06f2f
KM
1181 */
1182 code_int code;
1183 int col = 0, bits;
1184
1185 bits = n_bits = INIT_BITS;
1186 maxcode = MAXCODE(n_bits);
1187 free_ent = ((block_compress) ? FIRST : 256 );
1188 while ( ( code = getcode() ) >= 0 ) {
1189 if ( (code == CLEAR) && block_compress ) {
1190 free_ent = FIRST - 1;
1191 clear_flg = 1;
1192 }
1193 else if ( free_ent < maxmaxcode )
1194 free_ent++;
1195 if ( bits != n_bits ) {
1196 fprintf(stderr, "\nChange to %d bits\n", n_bits );
1197 bits = n_bits;
1198 col = 0;
1199 }
1200 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
1201 }
1202 putc( '\n', stderr );
1203 exit( 0 );
1204}
1205
1206dump_tab() /* dump string table */
1207{
1208 register int i;
1209 register ent;
1210 char stack[4 * MAXSTACK]; /* \nnn makes it 4 times bigger */
1211 int stack_top = 4 * MAXSTACK;
1212
1213 for ( i = 0; i < free_ent; i++ ) {
1214 ent = i;
1215 if ( isascii(tab_suffix[ent]) && isprint(tab_suffix[ent]) )
1216 fprintf( stderr, "%5d: %5d/'%c' \"",
1217 ent, tab_prefix[ent], tab_suffix[ent] );
1218 else
1219 fprintf( stderr, "%5d: %5d/\\%03o \"",
1220 ent, tab_prefix[ent], tab_suffix[ent] );
1221 stack[--stack_top] = '\n';
1222 stack[--stack_top] = '"';
1223 for ( ; ent != NULL;
1224 ent = (ent >= FIRST ? tab_prefix[ent] : NULL) ) {
1225 if ( isascii(tab_suffix[ent]) && isprint(tab_suffix[ent]) )
1226 stack[--stack_top] = tab_suffix[ent];
1227 else {
1228 switch( tab_suffix[ent] ) {
1229 case '\n': stack[--stack_top] = 'n'; break;
1230 case '\t': stack[--stack_top] = 't'; break;
1231 case '\b': stack[--stack_top] = 'b'; break;
1232 case '\f': stack[--stack_top] = 'f'; break;
1233 case '\r': stack[--stack_top] = 'r'; break;
1234 default:
1235 stack[--stack_top] = '0' + tab_suffix[ent] % 8;
1236 stack[--stack_top] = '0' + (tab_suffix[ent] / 8) % 8;
1237 stack[--stack_top] = '0' + tab_suffix[ent] / 64;
1238 break;
1239 }
1240 stack[--stack_top] = '\\';
1241 }
1242 }
1243 fwrite( &stack[stack_top], 1, 4 * MAXSTACK - stack_top, stderr );
1244 stack_top = 4 * MAXSTACK;
1245 }
1246}
1247#endif DEBUG
1248
1249/*****************************************************************
1250 * TAG( writeerr )
1251 *
1252 * Exits with a message. We only check for write errors often enough
1253 * to avoid a lot of "file system full" messages, not on every write.
1254 * ferror() check after fflush will catch any others (I trust).
1255 *
1256 */
1257
1258writeerr()
1259{
1260 perror ( ofname );
1261 unlink ( ofname );
1262 exit ( 1 );
1263}
1264
1265copystat(ifname, ofname)
1266char *ifname, *ofname;
1267{
1268 struct stat statbuf;
1269 int mode;
1270 time_t timep[2];
1271
1272 fclose(stdout);
1273 if (stat(ifname, &statbuf)) { /* Get stat on input file */
1274 perror(ifname);
1275 return;
1276 }
1277 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) {
1278 if(quiet)
a2de8fe3 1279 fprintf(stderr, "%s: ", ifname);
64d06f2f
KM
1280 fprintf(stderr, " -- not a regular file: unchanged");
1281 exit_stat = 1;
1282 } else if (statbuf.st_nlink > 1) {
1283 if(quiet)
a2de8fe3 1284 fprintf(stderr, "%s: ", ifname);
64d06f2f
KM
1285 fprintf(stderr, " -- has %d other links: unchanged",
1286 statbuf.st_nlink - 1);
1287 exit_stat = 1;
a2de8fe3 1288 } else if (exit_stat == 2 && (!force) && !quiet) { /* No compression: remove file.Z */
64d06f2f
KM
1289 fprintf(stderr, " -- file unchanged");
1290 } else { /* ***** Successful Compression ***** */
1291 exit_stat = 0;
1292 mode = statbuf.st_mode & 07777;
1293 if (chmod(ofname, mode)) /* Copy modes */
1294 perror(ofname);
1295 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */
1296 timep[0] = statbuf.st_atime;
1297 timep[1] = statbuf.st_mtime;
1298 utime(ofname, timep); /* Update last accessed and modified times */
1299 if (unlink(ifname)) /* Remove input file */
1300 perror(ifname);
1301 if(!quiet)
1302 fprintf(stderr, " -- replaced with %s", ofname);
1303 return; /* Successful return */
1304 }
1305
1306 /* Unsuccessful return -- one of the tests failed */
1307 if (unlink(ofname))
1308 perror(ofname);
1309}
1310/*
1311 * This routine returns 1 if we are running in the foreground and stderr
1312 * is a tty.
1313 */
1314foreground()
1315{
1316 if(bgnd_flag) { /* background? */
1317 return(0);
1318 } else { /* foreground */
1319 if(isatty(2)) { /* and stderr is a tty */
1320 return(1);
1321 } else {
1322 return(0);
1323 }
1324 }
1325}
1326
1327onintr ( )
1328{
1329 unlink ( ofname );
1330 exit ( 1 );
1331}
1332
a2de8fe3 1333cl_block () /* table clear for block compress */
64d06f2f 1334{
64d06f2f
KM
1335#ifdef DEBUG
1336 if(debug)
1337 fprintf ( stderr, "count: %ld ratio: %f\n", in_count,
1338 (double) in_count / (double) bytes_out );
1339#endif DEBUG
1340
1341 checkpoint = in_count + CHECK_GAP;
1342 if ( (double) in_count / (double) bytes_out > ratio )
1343 ratio = (double) in_count / (double) bytes_out;
1344 else {
1345 ratio = 0.0;
1346#ifdef USERMEM
1347 if ( maxbits <= FBITS ) /* sparse array clear */
a2de8fe3 1348 cl_sparse ();
64d06f2f
KM
1349 else
1350#endif USERMEM /* hash table clear */
1351 {
a2de8fe3 1352 cl_hash ( (count_int) hsize );
64d06f2f
KM
1353 }
1354 free_ent = FIRST;
1355 clear_flg = 1;
1356 output ( (code_int) CLEAR );
1357#ifdef DEBUG
1358 if(debug)
1359 fprintf ( stderr, "clear\n" );
1360#endif DEBUG
1361 }
1362}
1363
a2de8fe3 1364cl_cache ( n ) /* clear hash cache */
64d06f2f
KM
1365 register count_int n; /* clear at least this many entries */
1366{
1367 register count_int i;
1368 register unsigned short *hash_p;
1369 register unsigned short zero = 0;
64d06f2f 1370
a2de8fe3 1371 if ( nfiles++ == 0 ) /* no clear needed if first time */
64d06f2f
KM
1372 return;
1373 n = (n+15) & (-16);
1374 hash_p = hashcache + n;
a2de8fe3 1375 for ( i = n; i > 0; i -=16 ) { /* can do BSD bzero(3) or Sys V memset(3) */
64d06f2f
KM
1376 *(hash_p-16) = zero;
1377 *(hash_p-15) = zero;
1378 *(hash_p-14) = zero;
1379 *(hash_p-13) = zero;
1380 *(hash_p-12) = zero;
1381 *(hash_p-11) = zero;
1382 *(hash_p-10) = zero;
1383 *(hash_p-9) = zero;
1384 *(hash_p-8) = zero;
1385 *(hash_p-7) = zero;
1386 *(hash_p-6) = zero;
1387 *(hash_p-5) = zero;
1388 *(hash_p-4) = zero;
1389 *(hash_p-3) = zero;
1390 *(hash_p-2) = zero;
1391 *(hash_p-1) = zero;
1392 hash_p -= 16;
1393 }
1394}
1395
1396hogtally () /* compute character code hog */
1397{
1398 register int i, most;
1399
1400 for ( i = most = 0; i < 256; i++ )
1401 if ( cfreq [i] >= cfreq [most] )
1402 most = i;
1403 return ( most );
1404}
1405
a2de8fe3
KM
1406cl_hash(hsize) /* clear hash cache, re-init code table */
1407 register count_int hsize;
64d06f2f
KM
1408{
1409 register count_int *htab_p = htab+hsize;
a2de8fe3 1410 register count_int i;
64d06f2f
KM
1411 register long m1 = -1;
1412
a2de8fe3 1413 cl_cache( min((count_int)hsize, MAX_CACHE) );
64d06f2f
KM
1414
1415 i = hsize - 16;
1416 do {
1417 *(htab_p-16) = m1;
1418 *(htab_p-15) = m1;
1419 *(htab_p-14) = m1;
1420 *(htab_p-13) = m1;
1421 *(htab_p-12) = m1;
1422 *(htab_p-11) = m1;
1423 *(htab_p-10) = m1;
1424 *(htab_p-9) = m1;
1425 *(htab_p-8) = m1;
1426 *(htab_p-7) = m1;
1427 *(htab_p-6) = m1;
1428 *(htab_p-5) = m1;
1429 *(htab_p-4) = m1;
1430 *(htab_p-3) = m1;
1431 *(htab_p-2) = m1;
1432 *(htab_p-1) = m1;
1433 htab_p -= 16;
1434 } while ((i -= 16) >= 0);
1435 for ( i += 16; i > 0; i-- )
1436 *--htab_p = m1;
1437}
a2de8fe3
KM
1438
1439#ifdef USERMEM
1440cl_sparse ( ) /* clear sparse table indirectly thru "shadow" array */
1441{
1442 register code_int i;
1443
1444 for ( i = (1 << maxbits) - 1; i >= 0; i-- )
1445 ftable [fcodemem [i]] = 0;
1446}
1447#endif USERMEM