* Copyright (c) 1989 Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Edward Wang at The University of California, Berkeley.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
static char sccsid
[] = "@(#)compress.c 3.6 (Berkeley) 8/12/90";
int cc_token_max
= 8; /* <= TOKEN_MAX */
int cc_token_min
= 2; /* > tt.tt_put_token_cost */
int cc_bufsize
= 1024 * 3; /* XXX, or 80 * 24 * 2 */
long time
; /* time last seen */
short bcount
; /* count in this buffer */
short ccount
; /* count in compression */
short places
; /* places in the buffer */
short code
; /* token code */
struct cc
*qforw
, *qback
;
struct cc
*hforw
, **hback
;
short cc_thresholds
[TOKEN_MAX
+ 1];
#define thresh(length) (cc_thresholds[length])
#define threshp(code, count, length) \
((code) >= 0 || (short) (count) >= cc_thresholds[length])
short cc_wthresholds
[TOKEN_MAX
+ 1];
#define wthresh(length) (cc_wthresholds[length])
#define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
#define wthreshp(weight, length) (0)
short cc_wlimits
[TOKEN_MAX
+ 1];
#define wlimit(length) (cc_wlimits[length])
#define put_token_score(length) ((length) - tt.tt_put_token_cost)
int cc_score_adjustments
[TOKEN_MAX
+ 1][8]; /* XXX, 8 > max of cc_thresholds */
#define score_adjust(score, p) \
int length = (p)->length; \
int ccount = (p)->ccount; \
if (threshp((p)->code, ccount, length) || \
wthreshp((p)->weight, length)) /* XXX */ \
(score) -= length - tt.tt_put_token_cost; \
(score) += cc_score_adjustments[length][ccount]; \
int cc_initial_scores
[TOKEN_MAX
+ 1][8]; /* XXX, 8 > max of cc_thresholds */
struct cc cc_q0a
, cc_q0b
, cc_q1a
, cc_q1b
;
#define qinsert(p1, p2) \
register struct cc *forw = (p1)->qforw; \
register struct cc *back = (p1)->qback; \
((q)->qforw == (q) ? 0 : \
((q)->qback->qforw = (p)->qforw, \
(p)->qforw->qback = (q)->qback, \
(q)->qforw->qback = (p), \
(p)->qforw = (q)->qforw, \
#define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
struct cc
**cc_output
; /* the output array */
short *cc_places
[TOKEN_MAX
+ 1];
short *cc_hashcodes
; /* for computing hashcodes */
struct cc
**cc_htab
; /* the hash table */
struct cc
**cc_tokens
; /* holds all the active tokens */
char *cc_tt_ob
, *cc_tt_obe
;
if (tt
.tt_token_max
> cc_token_max
)
tt
.tt_token_max
= cc_token_max
;
if (tt
.tt_token_min
< cc_token_min
)
tt
.tt_token_min
= cc_token_min
;
if (tt
.tt_token_min
> tt
.tt_token_max
) {
if (tt
.tt_ntoken
> cc_ntoken
/ 2) /* not likely */
tt
.tt_ntoken
= cc_ntoken
/ 2;
#define C(x) (sizeof (x) / sizeof *(x))
for (i
= 0; i
< C(cc_thresholds
); i
++) {
int h
= i
- tt
.tt_put_token_cost
;
(tt
.tt_set_token_cost
+ 1 + h
- 1) / h
+ 1;
for (i
= 0; i
< C(cc_score_adjustments
); i
++) {
int t
= cc_thresholds
[i
];
for (j
= 0; j
< C(*cc_score_adjustments
); j
++) {
cc_score_adjustments
[i
][j
] =
- (i
- tt
.tt_put_token_cost
);
cc_score_adjustments
[i
][j
] = 0;
* length * (ccount + 1) a
* set-token-cost + length +
* ccount * put-token-cost b
* the score adjustment is (b - a)
cc_score_adjustments
[i
][j
] =
tt
.tt_set_token_cost
+ i
+
j
* tt
.tt_put_token_cost
-
cc_initial_scores
[i
][j
] = 0;
* (length - put-token-cost) -
* (length - put-token-cost) * ccount)
cc_initial_scores
[i
][j
] =
- (tt
.tt_set_token_cost
+
(i
- tt
.tt_put_token_cost
) -
(i
- tt
.tt_put_token_cost
) * j
);
for (i
= 1; i
< C(cc_wthresholds
); i
++) {
((tt
.tt_set_token_cost
+ tt
.tt_put_token_cost
) / i
+
cc_wlimits
[i
] = cc_wthresholds
[i
] + cc_weight
;
if ((cc_output
= (struct cc
**)
malloc((unsigned) cc_bufsize
* sizeof *cc_output
)) == 0)
if ((cc_hashcodes
= (short *)
malloc((unsigned) cc_bufsize
* sizeof *cc_hashcodes
)) == 0)
if ((cc_htab
= (struct cc
**) malloc(HSIZE
* sizeof *cc_htab
)) == 0)
if ((cc_tokens
= (struct cc
**)
(cc_ntoken
+ tt
.tt_token_max
- tt
.tt_token_min
+ 1) *
sizeof *cc_tokens
)) == 0)
if ((cc_undo
= (struct cc_undo
*)
malloc((unsigned) cc_bufsize
* sizeof *cc_undo
)) == 0)
for (i
= tt
.tt_token_min
; i
<= tt
.tt_token_max
; i
++)
if ((cc_places
[i
] = (short *)
malloc((unsigned) cc_bufsize
* sizeof **cc_places
)) == 0)
cc_q0a
.qforw
= cc_q0a
.qback
= &cc_q0a
;
cc_q0b
.qforw
= cc_q0b
.qback
= &cc_q0b
;
cc_q1a
.qforw
= cc_q1a
.qback
= &cc_q1a
;
cc_q1b
.qforw
= cc_q1b
.qback
= &cc_q1b
;
if ((p
= (struct cc
*) malloc((unsigned) cc_ntoken
* sizeof *p
)) == 0)
for (i
= 0; i
< tt
.tt_ntoken
; i
++) {
for (; i
< cc_ntoken
; i
++) {
if ((cc_buffer
= malloc((unsigned) cc_bufsize
)) == 0)
tt_obp
= tt_ob
= cc_buffer
;
tt_obe
= tt_ob
+ cc_bufsize
;
bzero((char *) cc_htab
, HSIZE
* sizeof *cc_htab
);
for (p
= cc_q0a
.qforw
; p
!= &cc_q0a
; p
= p
->qforw
)
for (p
= cc_q1a
.qforw
; p
!= &cc_q1a
; p
= p
->qforw
)
cc_trace_fp
= fopen("window-trace", "a");
tt_obp
= tt_ob
= cc_tt_ob
;
if (cc_trace_fp
!= NULL
) {
(void) fclose(cc_trace_fp
);
int bufsize
= tt_obp
- tt_ob
;
if (cc_trace_fp
!= NULL
) {
(void) fwrite(tt_ob
, 1, bufsize
, cc_trace_fp
);
if (bufsize
< tt
.tt_token_min
) {
tt_obp
= tt_ob
= cc_tt_ob
;
n
= cc_sweep_phase(cc_buffer
, bufsize
, cc_tokens
);
cc_compress_phase(cc_output
, bufsize
, cc_tokens
, n
);
cc_output_phase(cc_buffer
, cc_output
, bufsize
);
tt_obp
= tt_ob
= cc_buffer
;
tt_obe
= cc_buffer
+ cc_bufsize
;
cc_sweep_phase(buffer
, bufsize
, tokens
)
register struct cc
**pp
= tokens
;
cc_sweep0(buffer
, bufsize
, tt
.tt_token_min
- 1);
for (i
= tt
.tt_token_min
; i
<= tt
.tt_token_max
; i
++) {
n
= cc_sweep(buffer
, bufsize
, pp
, i
);
qinsertq(&cc_q1b
, &cc_q1a
);
printf("\n %d tokens, %d candidates\n",
cc_sweep0(buffer
, n
, length
)
register short pc
= tt
.tt_padc
;
/* n and length are at least 1 */
if ((*hc
++ = *p
++) == pc
)
if ((c
= *p
++) == pc
|| *hc
< 0)
cc_sweep(buffer
, bufsize
, tokens
, length
)
short *places
= cc_places
[length
];
short threshold
= thresh(length
);
short wthreshold
= wthresh(length
);
short limit
= wlimit(length
);
for (i
= 0; i
< bufsize
; i
++, time
++) {
register short *hc1
= hc
;
register short c
= *cp
++;
if ((hh
= *hc1
) < 0 || c
== pc
) {
h
= cc_htab
+ (*hc1
++ = hash(hh
, c
));
for (p
= *h
; p
!= 0; p
= p
->hforw
)
if (p
->length
== (char) length
) {
register char *p1
= p
->string
;
register char *p2
= cp
- length
;
p
->time
>= cc_time0
&& p
->length
== (char) length
)
if ((*p
->hback
= p
->hforw
) != 0)
p
->hforw
->hback
= p
->hback
;
register char *p1
= p
->string
;
register char *p2
= cp
- length
;
if ((p
->hforw
= *h
) != 0)
p
->hforw
->hback
= &p
->hforw
;
} else if (p
->time
< cc_time0
) {
if ((p
->weight
+= p
->time
- time
) < 0)
else if ((p
->weight
+= cc_weight
) > limit
)
if (p
->weight
>= wthreshold
) {
} else if (p
->time
+ length
> time
) {
* overlapping token, don't count as two and
* don't update time, but do adjust weight to offset
if (cc_weight
!= 0) { /* XXX */
p
->weight
+= time
- p
->time
;
if (!p
->flag
&& p
->weight
>= wthreshold
) {
if ((p
->weight
+= p
->time
- time
) < 0)
else if ((p
->weight
+= cc_weight
) > limit
)
/* code must be < 0 if flag false here */
|| p
->weight
>= wthreshold
if ((i
= pp
- tokens
) > 0) {
cc_sweep_reverse(tokens
, places
);
qsort((char *) tokens
, i
, sizeof *tokens
,
if ((i
= i
* cc_chop
/ 100) == 0)
cc_sweep_reverse(pp
, places
)
register short front
, back
, t
;
while ((p
= *pp
++) != 0) {
/* the list is never empty */
} while ((t
= front
) >= 0);
cc_compress_phase(output
, bufsize
, tokens
, ntoken
)
bzero((char *) output
, bufsize
* sizeof *output
);
for (i
= 0; i
< cc_npass0
; i
++)
cc_compress_phase1(output
, tokens
, ntoken
, 0);
for (i
= 0; i
< cc_npass1
; i
++)
cc_compress_phase1(output
, tokens
, ntoken
, 1);
cc_compress_cleanup(output
, bufsize
);
cc_compress_phase1(output
, tokens
, ntoken
, flag
)
register struct cc
**output
;
int nt
= 0, cc
= 0, nc
= 0;
while (pp
< tokens
+ ntoken
) {
printf(" (%d", (*pp
)->length
);
pp
+= cc_compress(output
, pp
, flag
);
printf(" %dt %du %dc)", ntoken_stat
, ccount_stat
,
printf("\n total: (%dt %du %dc)\n", nt
, cc
, nc
);
cc_compress_cleanup(output
, bufsize
)
register struct cc
**output
;
register struct cc
**end
;
/* the previous output phase may have been interrupted */
qinsertq(&cc_q0b
, &cc_q0a
);
for (end
= output
+ bufsize
; output
< end
;) {
if ((p
= *output
) == 0) {
} else if (p
->code
>= 0) {
} else if (p
->ccount
== 0) {
} else if (p
->ccount
>= thresh(length
)
|| wthreshp(p
->weight
, length
)
cc_compress(output
, tokens
, flag
)
register struct cc
*p
= *pp
++;
int threshold
= thresh(length
);
short wthreshold
= wthresh(length
);
short *places
= cc_places
[length
];
int *initial_scores
= cc_initial_scores
[length
];
int initial_score0
= put_token_score(length
);
register struct cc_undo
*undop
;
if ((short) ccount
>= p
->bcount
)
if (p
->code
>= 0 || ccount
>= threshold
)
else if (p
->weight
>= wthreshold
)
/* allow one fewer match than normal */
/* XXX, should adjust for ccount */
score
= - tt
.tt_set_token_cost
;
score
= initial_scores
[ccount
];
for (i
= p
->places
; i
>= 0; i
= places
[i
]) {
register struct cc
**ip
= output
+ i
;
register score0
= initial_score0
;
struct cc
**iip
= ip
+ length
;
struct cc_undo
*undop1
= undop
;
if ((x
= *(jp
= ip
)) != 0)
while (--undop
>= undop1
)
if (*undop
->pos
= x
= undop
->val
)
ccount_stat
+= ccount
- p
->ccount
;
register struct cc_undo
*u
= cc_undo
;
if (*undop
->pos
= x
= undop
->val
)
} while ((p
= *pp
++) != 0);
cc_output_phase(buffer
, output
, bufsize
)
register struct cc
**output
;
register struct cc
*p
, *p1
;
for (i
= 0; i
< bufsize
;) {
if ((p
= output
[i
]) == 0) {
} else if (p
->code
>= 0) {
(*tt
.tt_put_token
)(p
->code
, p
->string
, p
->length
);
wwntoksave
+= put_token_score(p
->length
);
} else if ((p1
= cc_q0a
.qback
) != &cc_q0a
) {
(*tt
.tt_set_token
)(p
->code
, p
->string
, p
->length
);
wwntoksave
-= tt
.tt_set_token_cost
;
ttwrite(p
->string
, p
->length
);
return (*p2
)->bcount
- (*p1
)->bcount
;