port to new kvm
[unix-history] / usr / src / usr.bin / window / compress.c
CommitLineData
49373f41
EW
1/*
2 * Copyright (c) 1989 Regents of the University of California.
3 * All rights reserved.
4 *
3dd3a9e5
KB
5 * This code is derived from software contributed to Berkeley by
6 * Edward Wang at The University of California, Berkeley.
7 *
87f529ec 8 * %sccs.include.redist.c%
49373f41
EW
9 */
10
11#ifndef lint
af14aa25 12static char sccsid[] = "@(#)compress.c 3.6 (Berkeley) %G%";
49373f41
EW
13#endif /* not lint */
14
15#include "ww.h"
49373f41
EW
16#include "tt.h"
17
17f96a0f
EW
18 /* special */
19#include <stdio.h>
20int cc_trace = 0;
21FILE *cc_trace_fp;
22
0ab446d2
EW
23 /* tunable parameters */
24
17f96a0f
EW
25int cc_reverse = 1;
26int cc_sort = 0;
27int cc_chop = 0;
0ab446d2 28
17f96a0f
EW
29int cc_token_max = 8; /* <= TOKEN_MAX */
30int cc_token_min = 2; /* > tt.tt_put_token_cost */
31int cc_npass0 = 1;
32int cc_npass1 = 1;
0ab446d2 33
17f96a0f 34int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
0ab446d2 35
17f96a0f 36int cc_ntoken = 8192;
0ab446d2 37
17f96a0f
EW
38#define cc_weight XXX
39#ifndef cc_weight
40int cc_weight = 0;
0ab446d2
EW
41#endif
42
43#define TOKEN_MAX 16
44
17f96a0f 45struct cc {
49373f41 46 char string[TOKEN_MAX];
0ab446d2
EW
47 char length;
48 char flag;
17f96a0f 49#ifndef cc_weight
0ab446d2
EW
50 short weight;
51#endif
52 long time; /* time last seen */
53 short bcount; /* count in this buffer */
54 short ccount; /* count in compression */
55 short places; /* places in the buffer */
56 short code; /* token code */
17f96a0f
EW
57 struct cc *qforw, *qback;
58 struct cc *hforw, **hback;
49373f41
EW
59};
60
17f96a0f
EW
61short cc_thresholds[TOKEN_MAX + 1];
62#define thresh(length) (cc_thresholds[length])
0ab446d2 63#define threshp(code, count, length) \
17f96a0f 64 ((code) >= 0 || (short) (count) >= cc_thresholds[length])
0ab446d2 65
17f96a0f
EW
66#ifndef cc_weight
67short cc_wthresholds[TOKEN_MAX + 1];
68#define wthresh(length) (cc_wthresholds[length])
69#define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
0ab446d2
EW
70#else
71#define wthreshp(weight, length) (0)
72#endif
73
17f96a0f
EW
74#ifndef cc_weight
75short cc_wlimits[TOKEN_MAX + 1];
76#define wlimit(length) (cc_wlimits[length])
0ab446d2
EW
77#endif
78
79#define put_token_score(length) ((length) - tt.tt_put_token_cost)
80
17f96a0f 81int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
0ab446d2
EW
82#define score_adjust(score, p) \
83 do { \
84 int length = (p)->length; \
85 int ccount = (p)->ccount; \
86 if (threshp((p)->code, ccount, length) || \
87 wthreshp((p)->weight, length)) /* XXX */ \
88 (score) -= length - tt.tt_put_token_cost; \
89 else \
17f96a0f 90 (score) += cc_score_adjustments[length][ccount]; \
0ab446d2
EW
91 } while (0)
92
17f96a0f 93int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
0ab446d2 94
17f96a0f 95struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
0ab446d2 96
17f96a0f 97#define qinsert(p1, p2) \
0ab446d2 98 do { \
17f96a0f
EW
99 register struct cc *forw = (p1)->qforw; \
100 register struct cc *back = (p1)->qback; \
0ab446d2
EW
101 back->qforw = forw; \
102 forw->qback = back; \
17f96a0f
EW
103 forw = (p2)->qforw; \
104 (p1)->qforw = forw; \
105 forw->qback = (p1); \
106 (p2)->qforw = (p1); \
107 (p1)->qback = (p2); \
0ab446d2
EW
108 } while (0)
109
17f96a0f 110#define qinsertq(q, p) \
0ab446d2 111 ((q)->qforw == (q) ? 0 : \
17f96a0f
EW
112 ((q)->qback->qforw = (p)->qforw, \
113 (p)->qforw->qback = (q)->qback, \
114 (q)->qforw->qback = (p), \
115 (p)->qforw = (q)->qforw, \
0ab446d2
EW
116 (q)->qforw = (q), \
117 (q)->qback = (q)))
118
119#define H (14)
120#define HSIZE (1 << H)
17f96a0f
EW
121#define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
122
123char *cc_buffer;
124struct cc **cc_output; /* the output array */
125short *cc_places[TOKEN_MAX + 1];
126short *cc_hashcodes; /* for computing hashcodes */
127struct cc **cc_htab; /* the hash table */
128struct cc **cc_tokens; /* holds all the active tokens */
129struct cc_undo {
130 struct cc **pos;
131 struct cc *val;
132} *cc_undo;
133
134long cc_time, cc_time0;
135
136char *cc_tt_ob, *cc_tt_obe;
137
138ccinit()
49373f41 139{
0ab446d2 140 register i, j;
17f96a0f 141 register struct cc *p;
49373f41 142
17f96a0f
EW
143 if (tt.tt_token_max > cc_token_max)
144 tt.tt_token_max = cc_token_max;
145 if (tt.tt_token_min < cc_token_min)
146 tt.tt_token_min = cc_token_min;
0ab446d2 147 if (tt.tt_token_min > tt.tt_token_max) {
49373f41 148 tt.tt_ntoken = 0;
0ab446d2 149 return 0;
49373f41 150 }
17f96a0f
EW
151 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
152 tt.tt_ntoken = cc_ntoken / 2;
0ab446d2 153#define C(x) (sizeof (x) / sizeof *(x))
17f96a0f 154 for (i = 0; i < C(cc_thresholds); i++) {
0ab446d2
EW
155 int h = i - tt.tt_put_token_cost;
156 if (h > 0)
17f96a0f 157 cc_thresholds[i] =
0ab446d2
EW
158 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
159 else
17f96a0f 160 cc_thresholds[i] = 0;
0ab446d2 161 }
17f96a0f
EW
162 for (i = 0; i < C(cc_score_adjustments); i++) {
163 int t = cc_thresholds[i];
164 for (j = 0; j < C(*cc_score_adjustments); j++) {
0ab446d2 165 if (j >= t)
17f96a0f 166 cc_score_adjustments[i][j] =
0ab446d2
EW
167 - (i - tt.tt_put_token_cost);
168 else if (j < t - 1)
17f96a0f 169 cc_score_adjustments[i][j] = 0;
0ab446d2
EW
170 else
171 /*
172 * cost now is
173 * length * (ccount + 1) a
174 * cost before was
175 * set-token-cost + length +
176 * ccount * put-token-cost b
177 * the score adjustment is (b - a)
178 */
17f96a0f 179 cc_score_adjustments[i][j] =
0ab446d2
EW
180 tt.tt_set_token_cost + i +
181 j * tt.tt_put_token_cost -
182 i * (j + 1);
183 if (j >= t)
17f96a0f 184 cc_initial_scores[i][j] = 0;
0ab446d2
EW
185 else
186 /*
187 * - (set-token-cost +
188 * (length - put-token-cost) -
189 * (length - put-token-cost) * ccount)
190 */
17f96a0f 191 cc_initial_scores[i][j] =
0ab446d2
EW
192 - (tt.tt_set_token_cost +
193 (i - tt.tt_put_token_cost) -
194 (i - tt.tt_put_token_cost) * j);
195 }
196 }
17f96a0f
EW
197#ifndef cc_weight
198 for (i = 1; i < C(cc_wthresholds); i++) {
199 cc_wthresholds[i] =
0ab446d2
EW
200 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
201 i / 5 + 1) *
17f96a0f
EW
202 cc_weight + 1;
203 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
0ab446d2
EW
204 }
205#endif
206#undef C
17f96a0f
EW
207 if ((cc_output = (struct cc **)
208 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
0ab446d2 209 goto nomem;
17f96a0f
EW
210 if ((cc_hashcodes = (short *)
211 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
0ab446d2 212 goto nomem;
17f96a0f 213 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
49373f41 214 goto nomem;
17f96a0f 215 if ((cc_tokens = (struct cc **)
0ab446d2 216 malloc((unsigned)
17f96a0f
EW
217 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
218 sizeof *cc_tokens)) == 0)
49373f41 219 goto nomem;
17f96a0f
EW
220 if ((cc_undo = (struct cc_undo *)
221 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
0ab446d2
EW
222 goto nomem;
223 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
17f96a0f
EW
224 if ((cc_places[i] = (short *)
225 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
0ab446d2 226 goto nomem;
17f96a0f
EW
227 cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
228 cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
229 cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
230 cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
231 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
0ab446d2
EW
232 goto nomem;
233 for (i = 0; i < tt.tt_ntoken; i++) {
234 p->code = i;
235 p->time = -1;
17f96a0f
EW
236 p->qback = cc_q0a.qback;
237 p->qforw = &cc_q0a;
0ab446d2 238 p->qback->qforw = p;
17f96a0f 239 cc_q0a.qback = p;
0ab446d2 240 p++;
49373f41 241 }
17f96a0f 242 for (; i < cc_ntoken; i++) {
0ab446d2
EW
243 p->code = -1;
244 p->time = -1;
17f96a0f
EW
245 p->qback = cc_q1a.qback;
246 p->qforw = &cc_q1a;
0ab446d2 247 p->qback->qforw = p;
17f96a0f 248 cc_q1a.qback = p;
0ab446d2 249 p++;
49373f41 250 }
17f96a0f
EW
251 cc_tt_ob = tt_ob;
252 cc_tt_obe = tt_obe;
253 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
254 goto nomem;
49373f41
EW
255 return 0;
256nomem:
257 wwerrno = WWE_NOMEM;
258 return -1;
259}
260
17f96a0f 261ccstart()
49373f41 262{
17f96a0f
EW
263 register struct cc *p;
264 int ccflush();
265
266 (*tt.tt_flush)();
267 tt_obp = tt_ob = cc_buffer;
268 tt_obe = tt_ob + cc_bufsize;
269 tt.tt_flush = ccflush;
270 bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
271 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
0ab446d2 272 p->hback = 0;
17f96a0f 273 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
0ab446d2 274 p->hback = 0;
17f96a0f
EW
275 if (cc_trace)
276 cc_trace_fp = fopen("window-trace", "a");
49373f41
EW
277}
278
17f96a0f 279ccend()
49373f41 280{
17f96a0f
EW
281 int ttflush();
282
283 (*tt.tt_flush)();
284 tt_obp = tt_ob = cc_tt_ob;
285 tt_obe = cc_tt_obe;
286 tt.tt_flush = ttflush;
287 if (cc_trace_fp != NULL) {
288 (void) fclose(cc_trace_fp);
289 cc_trace_fp = NULL;
290 }
291}
292
293ccflush()
294{
295 int bufsize = tt_obp - tt_ob;
0ab446d2 296 int n;
17f96a0f 297 int ttflush();
0ab446d2 298
17f96a0f
EW
299 if (tt_ob != cc_buffer)
300 abort();
301 if (cc_trace_fp != NULL) {
302 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
303 putc(-1, cc_trace_fp);
304 }
305 if (bufsize < tt.tt_token_min) {
306 ttflush();
0ab446d2 307 return;
0ab446d2 308 }
17f96a0f
EW
309 tt_obp = tt_ob = cc_tt_ob;
310 tt_obe = cc_tt_obe;
311 tt.tt_flush = ttflush;
312 cc_time0 = cc_time;
313 cc_time += bufsize;
314 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
315 cc_compress_phase(cc_output, bufsize, cc_tokens, n);
316 cc_output_phase(cc_buffer, cc_output, bufsize);
317 ttflush();
318 tt_obp = tt_ob = cc_buffer;
319 tt_obe = cc_buffer + cc_bufsize;
320 tt.tt_flush = ccflush;
0ab446d2
EW
321}
322
17f96a0f 323cc_sweep_phase(buffer, bufsize, tokens)
0ab446d2 324 char *buffer;
17f96a0f 325 struct cc **tokens;
0ab446d2 326{
17f96a0f 327 register struct cc **pp = tokens;
0ab446d2
EW
328 register i, n;
329#ifdef STATS
330 int nn, ii;
331#endif
332
333#ifdef STATS
17f96a0f
EW
334 if (verbose >= 0)
335 time_begin();
0ab446d2
EW
336 if (verbose > 0)
337 printf("Sweep:");
338#endif
17f96a0f 339 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
0ab446d2 340#ifdef STATS
17f96a0f 341 ntoken_stat = 0;
0ab446d2
EW
342 nn = 0;
343 ii = 0;
344#endif
345 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
346#ifdef STATS
347 if (verbose > 0) {
348 if (ii > 7) {
349 printf("\n ");
350 ii = 0;
351 }
352 ii++;
353 printf(" (%d", i);
354 (void) fflush(stdout);
355 }
356#endif
17f96a0f 357 n = cc_sweep(buffer, bufsize, pp, i);
0ab446d2
EW
358 pp += n;
359#ifdef STATS
360 if (verbose > 0) {
361 if (--n > 0) {
362 printf(" %d", n);
363 nn += n;
364 }
365 putchar(')');
366 }
367#endif
368 }
17f96a0f 369 qinsertq(&cc_q1b, &cc_q1a);
0ab446d2
EW
370#ifdef STATS
371 if (verbose > 0)
372 printf("\n %d tokens, %d candidates\n",
17f96a0f
EW
373 ntoken_stat, nn);
374 if (verbose >= 0)
375 time_end();
0ab446d2
EW
376#endif
377 return pp - tokens;
49373f41
EW
378}
379
17f96a0f 380cc_sweep0(buffer, n, length)
0ab446d2 381 char *buffer;
49373f41 382{
17f96a0f
EW
383 register char *p;
384 register short *hc;
0ab446d2 385 register i;
17f96a0f
EW
386 register short c;
387 register short pc = tt.tt_padc;
0ab446d2 388
17f96a0f
EW
389 /* n and length are at least 1 */
390 p = buffer++;
391 hc = cc_hashcodes;
392 i = n;
393 do {
394 if ((*hc++ = *p++) == pc)
395 hc[-1] = -1;
396 } while (--i);
397 while (--length) {
398 p = buffer++;
399 hc = cc_hashcodes;
400 for (i = n--; --i;) {
401 if ((c = *p++) == pc || *hc < 0)
402 c = -1;
403 else
404 c = hash(*hc, c);
405 *hc++ = c;
0ab446d2 406 }
17f96a0f 407 }
0ab446d2
EW
408}
409
17f96a0f 410cc_sweep(buffer, bufsize, tokens, length)
0ab446d2 411 char *buffer;
17f96a0f 412 struct cc **tokens;
0ab446d2
EW
413 register length;
414{
17f96a0f 415 register struct cc *p;
0ab446d2 416 register char *cp;
49373f41 417 register i;
0ab446d2 418 short *hc;
17f96a0f
EW
419 short *places = cc_places[length];
420 struct cc **pp = tokens;
0ab446d2 421 short threshold = thresh(length);
17f96a0f 422#ifndef cc_weight
0ab446d2
EW
423 short wthreshold = wthresh(length);
424 short limit = wlimit(length);
425#endif
426 int time;
17f96a0f 427 short pc = tt.tt_padc;
49373f41 428
0ab446d2
EW
429 i = length - 1;
430 bufsize -= i;
431 cp = buffer + i;
17f96a0f
EW
432 hc = cc_hashcodes;
433 time = cc_time0;
0ab446d2 434 for (i = 0; i < bufsize; i++, time++) {
17f96a0f 435 struct cc **h;
0ab446d2
EW
436
437 {
438 register short *hc1 = hc;
17f96a0f
EW
439 register short c = *cp++;
440 register short hh;
441 if ((hh = *hc1) < 0 || c == pc) {
0ab446d2
EW
442 *hc1++ = -1;
443 hc = hc1;
0ab446d2 444 continue;
49373f41 445 }
17f96a0f
EW
446 h = cc_htab + (*hc1++ = hash(hh, c));
447 hc = hc1;
0ab446d2
EW
448 }
449 for (p = *h; p != 0; p = p->hforw)
450 if (p->length == (char) length) {
451 register char *p1 = p->string;
452 register char *p2 = cp - length;
453 register n = length;
454 do
455 if (*p1++ != *p2++)
456 goto fail;
457 while (--n);
458 break;
459 fail:;
460 }
461 if (p == 0) {
17f96a0f
EW
462 p = cc_q1a.qback;
463 if (p == &cc_q1a ||
464 p->time >= cc_time0 && p->length == (char) length)
0ab446d2
EW
465 continue;
466 if (p->hback != 0)
467 if ((*p->hback = p->hforw) != 0)
468 p->hforw->hback = p->hback;
469 {
470 register char *p1 = p->string;
471 register char *p2 = cp - length;
472 register n = length;
473 do
474 *p1++ = *p2++;
475 while (--n);
476 }
477 p->length = length;
17f96a0f
EW
478#ifndef cc_weight
479 p->weight = cc_weight;
0ab446d2
EW
480#endif
481 p->time = time;
482 p->bcount = 1;
483 p->ccount = 0;
484 p->flag = 0;
485 if ((p->hforw = *h) != 0)
486 p->hforw->hback = &p->hforw;
487 *h = p;
488 p->hback = h;
17f96a0f 489 qinsert(p, &cc_q1a);
0ab446d2
EW
490 places[i] = -1;
491 p->places = i;
492#ifdef STATS
17f96a0f 493 ntoken_stat++;
0ab446d2 494#endif
17f96a0f
EW
495 } else if (p->time < cc_time0) {
496#ifndef cc_weight
0ab446d2 497 if ((p->weight += p->time - time) < 0)
17f96a0f
EW
498 p->weight = cc_weight;
499 else if ((p->weight += cc_weight) > limit)
0ab446d2
EW
500 p->weight = limit;
501#endif
502 p->time = time;
503 p->bcount = 1;
504 p->ccount = 0;
505 if (p->code >= 0) {
506 p->flag = 1;
507 *pp++ = p;
508 } else
17f96a0f 509#ifndef cc_weight
0ab446d2
EW
510 if (p->weight >= wthreshold) {
511 p->flag = 1;
512 *pp++ = p;
17f96a0f 513 qinsert(p, &cc_q1b);
0ab446d2
EW
514 } else
515#endif
516 {
517 p->flag = 0;
17f96a0f 518 qinsert(p, &cc_q1a);
0ab446d2
EW
519 }
520 places[i] = -1;
521 p->places = i;
522#ifdef STATS
17f96a0f 523 ntoken_stat++;
0ab446d2
EW
524#endif
525 } else if (p->time + length > time) {
49373f41 526 /*
0ab446d2
EW
527 * overlapping token, don't count as two and
528 * don't update time, but do adjust weight to offset
529 * the difference
49373f41 530 */
17f96a0f
EW
531#ifndef cc_weight
532 if (cc_weight != 0) { /* XXX */
0ab446d2
EW
533 p->weight += time - p->time;
534 if (!p->flag && p->weight >= wthreshold) {
535 p->flag = 1;
536 *pp++ = p;
17f96a0f 537 qinsert(p, &cc_q1b);
0ab446d2
EW
538 }
539 }
540#endif
541 places[i] = p->places;
542 p->places = i;
543 } else {
17f96a0f 544#ifndef cc_weight
0ab446d2 545 if ((p->weight += p->time - time) < 0)
17f96a0f
EW
546 p->weight = cc_weight;
547 else if ((p->weight += cc_weight) > limit)
0ab446d2
EW
548 p->weight = limit;
549#endif
550 p->time = time;
551 p->bcount++;
552 if (!p->flag &&
553 /* code must be < 0 if flag false here */
554 (p->bcount >= threshold
17f96a0f 555#ifndef cc_weight
0ab446d2
EW
556 || p->weight >= wthreshold
557#endif
558 )) {
559 p->flag = 1;
560 *pp++ = p;
17f96a0f 561 qinsert(p, &cc_q1b);
0ab446d2
EW
562 }
563 places[i] = p->places;
564 p->places = i;
49373f41 565 }
49373f41 566 }
0ab446d2
EW
567 if ((i = pp - tokens) > 0) {
568 *pp = 0;
17f96a0f
EW
569 if (cc_reverse)
570 cc_sweep_reverse(tokens, places);
571 if (cc_sort && i > 1) {
572 int cc_token_compare();
0ab446d2 573 qsort((char *) tokens, i, sizeof *tokens,
17f96a0f 574 cc_token_compare);
0ab446d2 575 }
17f96a0f
EW
576 if (cc_chop) {
577 if ((i = i * cc_chop / 100) == 0)
0ab446d2
EW
578 i = 1;
579 tokens[i] = 0;
580 }
581 i++;
582 }
583 return i;
584}
585
17f96a0f
EW
586cc_sweep_reverse(pp, places)
587 register struct cc **pp;
0ab446d2
EW
588 register short *places;
589{
17f96a0f 590 register struct cc *p;
0ab446d2
EW
591 register short front, back, t;
592
593 while ((p = *pp++) != 0) {
594 back = -1;
595 t = p->places;
596 /* the list is never empty */
597 do {
598 front = places[t];
599 places[t] = back;
600 back = t;
601 } while ((t = front) >= 0);
602 p->places = back;
603 }
604}
605
17f96a0f
EW
606cc_compress_phase(output, bufsize, tokens, ntoken)
607 struct cc **output;
608 struct cc **tokens;
0ab446d2
EW
609{
610 register i;
611
612 bzero((char *) output, bufsize * sizeof *output);
17f96a0f
EW
613 for (i = 0; i < cc_npass0; i++)
614 cc_compress_phase1(output, tokens, ntoken, 0);
615 for (i = 0; i < cc_npass1; i++)
616 cc_compress_phase1(output, tokens, ntoken, 1);
617 cc_compress_cleanup(output, bufsize);
0ab446d2
EW
618}
619
17f96a0f
EW
620cc_compress_phase1(output, tokens, ntoken, flag)
621 register struct cc **output;
622 struct cc **tokens;
0ab446d2 623{
17f96a0f
EW
624 register struct cc **pp;
625#ifdef STATS
af14aa25 626 register int i = 0;
17f96a0f
EW
627 int nt = 0, cc = 0, nc = 0;
628#endif
0ab446d2
EW
629
630#ifdef STATS
17f96a0f
EW
631 if (verbose >= 0)
632 time_begin();
0ab446d2
EW
633 if (verbose > 0)
634 printf("Compress:");
635#endif
636 pp = tokens;
637 while (pp < tokens + ntoken) {
638#ifdef STATS
639 if (verbose > 0) {
17f96a0f
EW
640 ntoken_stat = 0;
641 ccount_stat = 0;
642 ncover_stat = 0;
0ab446d2
EW
643 if (i > 2) {
644 printf("\n ");
645 i = 0;
646 }
647 i++;
648 printf(" (%d", (*pp)->length);
649 (void) fflush(stdout);
650 }
651#endif
17f96a0f 652 pp += cc_compress(output, pp, flag);
0ab446d2 653#ifdef STATS
17f96a0f
EW
654 if (verbose > 0) {
655 printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
656 ncover_stat);
657 nt += ntoken_stat;
658 cc += ccount_stat;
659 nc += ncover_stat;
660 }
0ab446d2
EW
661#endif
662 }
663#ifdef STATS
664 if (verbose > 0)
17f96a0f
EW
665 printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
666 if (verbose >= 0)
667 time_end();
0ab446d2
EW
668#endif
669}
670
17f96a0f
EW
671cc_compress_cleanup(output, bufsize)
672 register struct cc **output;
0ab446d2 673{
17f96a0f 674 register struct cc **end;
0ab446d2
EW
675
676 /* the previous output phase may have been interrupted */
17f96a0f 677 qinsertq(&cc_q0b, &cc_q0a);
0ab446d2 678 for (end = output + bufsize; output < end;) {
17f96a0f 679 register struct cc *p;
0ab446d2
EW
680 register length;
681 if ((p = *output) == 0) {
682 output++;
683 continue;
684 }
685 length = p->length;
686 if (!p->flag) {
687 } else if (p->code >= 0) {
17f96a0f 688 qinsert(p, &cc_q0b);
0ab446d2
EW
689 p->flag = 0;
690 } else if (p->ccount == 0) {
691 *output = 0;
692 } else if (p->ccount >= thresh(length)
17f96a0f 693#ifndef cc_weight
0ab446d2
EW
694 || wthreshp(p->weight, length)
695#endif
696 ) {
697 p->flag = 0;
698 } else {
699 p->ccount = 0;
700 *output = 0;
701 }
702 output += length;
703 }
704}
705
17f96a0f
EW
706cc_compress(output, tokens, flag)
707 struct cc **output;
708 struct cc **tokens;
0ab446d2
EW
709 char flag;
710{
17f96a0f
EW
711 struct cc **pp = tokens;
712 register struct cc *p = *pp++;
0ab446d2
EW
713 int length = p->length;
714 int threshold = thresh(length);
17f96a0f 715#ifndef cc_weight
0ab446d2
EW
716 short wthreshold = wthresh(length);
717#endif
17f96a0f
EW
718 short *places = cc_places[length];
719 int *initial_scores = cc_initial_scores[length];
0ab446d2
EW
720 int initial_score0 = put_token_score(length);
721
722 do {
723 int score;
17f96a0f 724 register struct cc_undo *undop;
0ab446d2
EW
725 int ccount;
726#ifdef STATS
727 int ncover;
728#endif
729 int i;
730
731 ccount = p->ccount;
732 if ((short) ccount >= p->bcount)
733 continue;
734 if (p->code >= 0 || ccount >= threshold)
735 score = 0;
17f96a0f 736#ifndef cc_weight
0ab446d2
EW
737 else if (p->weight >= wthreshold)
738 /* allow one fewer match than normal */
739 /* XXX, should adjust for ccount */
740 score = - tt.tt_set_token_cost;
741#endif
742 else
743 score = initial_scores[ccount];
17f96a0f 744 undop = cc_undo;
0ab446d2
EW
745#ifdef STATS
746 ncover = 0;
747#endif
748 for (i = p->places; i >= 0; i = places[i]) {
17f96a0f
EW
749 register struct cc **jp;
750 register struct cc *x;
751 register struct cc **ip = output + i;
0ab446d2 752 register score0 = initial_score0;
17f96a0f
EW
753 struct cc **iip = ip + length;
754 struct cc_undo *undop1 = undop;
0ab446d2
EW
755
756 if ((x = *(jp = ip)) != 0)
757 goto z;
758 while (--jp >= output)
759 if ((x = *jp) != 0) {
760 if (jp + x->length > ip)
761 goto z;
762 break;
763 }
764 jp = ip + 1;
765 while (jp < iip) {
766 if ((x = *jp) == 0) {
767 jp++;
768 continue;
769 }
770 z:
771 if (x == p)
772 goto undo;
773#ifdef STATS
774 ncover++;
775#endif
776 undop->pos = jp;
777 undop->val = x;
778 undop++;
779 *jp = 0;
780 x->ccount--;
781 score_adjust(score0, x);
782 if (score0 < 0 && flag)
783 goto undo;
784 jp += x->length;
785 }
786 undop->pos = ip;
787 undop->val = 0;
788 undop++;
789 *ip = p;
790 ccount++;
791 score += score0;
792 continue;
793 undo:
794 while (--undop >= undop1)
795 if (*undop->pos = x = undop->val)
796 x->ccount++;
797 undop++;
798 }
799 if (score > 0) {
800#ifdef STATS
17f96a0f
EW
801 ccount_stat += ccount - p->ccount;
802 ntoken_stat++;
803 ncover_stat += ncover;
0ab446d2
EW
804#endif
805 p->ccount = ccount;
806 } else {
17f96a0f 807 register struct cc_undo *u = cc_undo;
0ab446d2 808 while (--undop >= u) {
17f96a0f 809 register struct cc *x;
0ab446d2
EW
810 if (*undop->pos = x = undop->val)
811 x->ccount++;
812 }
813 }
814 } while ((p = *pp++) != 0);
815 return pp - tokens;
49373f41
EW
816}
817
17f96a0f
EW
818cc_output_phase(buffer, output, bufsize)
819 register char *buffer;
820 register struct cc **output;
821 register bufsize;
49373f41
EW
822{
823 register i;
17f96a0f 824 register struct cc *p, *p1;
49373f41 825
17f96a0f 826 for (i = 0; i < bufsize;) {
0ab446d2 827 if ((p = output[i]) == 0) {
17f96a0f 828 ttputc(buffer[i]);
49373f41 829 i++;
0ab446d2
EW
830 } else if (p->code >= 0) {
831 if (--p->ccount == 0)
17f96a0f 832 qinsert(p, &cc_q0a);
0ab446d2 833 (*tt.tt_put_token)(p->code, p->string, p->length);
49373f41 834 wwntokuse++;
0ab446d2
EW
835 wwntoksave += put_token_score(p->length);
836 i += p->length;
17f96a0f 837 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
0ab446d2
EW
838 p->code = p1->code;
839 p1->code = -1;
17f96a0f 840 qinsert(p1, &cc_q1a);
0ab446d2 841 if (--p->ccount == 0)
17f96a0f 842 qinsert(p, &cc_q0a);
0ab446d2 843 else
17f96a0f 844 qinsert(p, &cc_q0b);
0ab446d2 845 (*tt.tt_set_token)(p->code, p->string, p->length);
49373f41
EW
846 wwntokdef++;
847 wwntoksave -= tt.tt_set_token_cost;
0ab446d2 848 i += p->length;
49373f41 849 } else {
0ab446d2 850 p->ccount--;
17f96a0f 851 ttwrite(p->string, p->length);
0ab446d2
EW
852 wwntokbad++;
853 i += p->length;
49373f41
EW
854 }
855 }
17f96a0f 856 wwntokc += bufsize;
49373f41 857}
0ab446d2 858
17f96a0f
EW
859cc_token_compare(p1, p2)
860 struct cc **p1, **p2;
0ab446d2
EW
861{
862 return (*p2)->bcount - (*p1)->bcount;
863}