Commit | Line | Data |
---|---|---|
3013fe88 NW |
1 | /* unlzw.c -- decompress files in LZW format. |
2 | * The code in this file is directly derived from the public domain 'compress' | |
3 | * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, | |
4 | * Ken Turkowski, Dave Mack and Peter Jannesen. | |
5 | * | |
6 | * This is a temporary version which will be rewritten in some future version | |
7 | * to accommodate in-memory decompression. | |
8 | */ | |
9 | ||
e32dd11d | 10 | #ifdef RCSID |
caed0dfe | 11 | static char rcsid[] = "$Id: unlzw.c,v 0.15 1993/06/10 13:28:35 jloup Exp $"; |
3013fe88 NW |
12 | #endif |
13 | ||
3013fe88 NW |
14 | #include <sys/types.h> |
15 | ||
16 | #include "tailor.h" | |
17 | ||
18 | #ifdef HAVE_UNISTD_H | |
19 | # include <unistd.h> | |
20 | #endif | |
21 | #ifndef NO_FCNTL_H | |
22 | # include <fcntl.h> | |
23 | #endif | |
24 | ||
25 | #include "gzip.h" | |
26 | #include "lzw.h" | |
27 | ||
28 | typedef unsigned char char_type; | |
29 | typedef long code_int; | |
30 | typedef unsigned long count_int; | |
31 | typedef unsigned short count_short; | |
32 | typedef unsigned long cmp_code_int; | |
33 | ||
34 | #define MAXCODE(n) (1L << (n)) | |
35 | ||
36 | #ifndef REGISTERS | |
37 | # define REGISTERS 2 | |
38 | #endif | |
39 | #define REG1 | |
40 | #define REG2 | |
41 | #define REG3 | |
42 | #define REG4 | |
43 | #define REG5 | |
44 | #define REG6 | |
45 | #define REG7 | |
46 | #define REG8 | |
47 | #define REG9 | |
48 | #define REG10 | |
49 | #define REG11 | |
50 | #define REG12 | |
51 | #define REG13 | |
52 | #define REG14 | |
53 | #define REG15 | |
54 | #define REG16 | |
55 | #if REGISTERS >= 1 | |
56 | # undef REG1 | |
57 | # define REG1 register | |
58 | #endif | |
59 | #if REGISTERS >= 2 | |
60 | # undef REG2 | |
61 | # define REG2 register | |
62 | #endif | |
63 | #if REGISTERS >= 3 | |
64 | # undef REG3 | |
65 | # define REG3 register | |
66 | #endif | |
67 | #if REGISTERS >= 4 | |
68 | # undef REG4 | |
69 | # define REG4 register | |
70 | #endif | |
71 | #if REGISTERS >= 5 | |
72 | # undef REG5 | |
73 | # define REG5 register | |
74 | #endif | |
75 | #if REGISTERS >= 6 | |
76 | # undef REG6 | |
77 | # define REG6 register | |
78 | #endif | |
79 | #if REGISTERS >= 7 | |
80 | # undef REG7 | |
81 | # define REG7 register | |
82 | #endif | |
83 | #if REGISTERS >= 8 | |
84 | # undef REG8 | |
85 | # define REG8 register | |
86 | #endif | |
87 | #if REGISTERS >= 9 | |
88 | # undef REG9 | |
89 | # define REG9 register | |
90 | #endif | |
91 | #if REGISTERS >= 10 | |
92 | # undef REG10 | |
93 | # define REG10 register | |
94 | #endif | |
95 | #if REGISTERS >= 11 | |
96 | # undef REG11 | |
97 | # define REG11 register | |
98 | #endif | |
99 | #if REGISTERS >= 12 | |
100 | # undef REG12 | |
101 | # define REG12 register | |
102 | #endif | |
103 | #if REGISTERS >= 13 | |
104 | # undef REG13 | |
105 | # define REG13 register | |
106 | #endif | |
107 | #if REGISTERS >= 14 | |
108 | # undef REG14 | |
109 | # define REG14 register | |
110 | #endif | |
111 | #if REGISTERS >= 15 | |
112 | # undef REG15 | |
113 | # define REG15 register | |
114 | #endif | |
115 | #if REGISTERS >= 16 | |
116 | # undef REG16 | |
117 | # define REG16 register | |
118 | #endif | |
119 | ||
120 | #ifndef BYTEORDER | |
121 | # define BYTEORDER 0000 | |
122 | #endif | |
123 | ||
124 | #ifndef NOALLIGN | |
125 | # define NOALLIGN 0 | |
126 | #endif | |
127 | ||
128 | ||
129 | union bytes { | |
130 | long word; | |
131 | struct { | |
132 | #if BYTEORDER == 4321 | |
133 | char_type b1; | |
134 | char_type b2; | |
135 | char_type b3; | |
136 | char_type b4; | |
137 | #else | |
138 | #if BYTEORDER == 1234 | |
139 | char_type b4; | |
140 | char_type b3; | |
141 | char_type b2; | |
142 | char_type b1; | |
143 | #else | |
144 | # undef BYTEORDER | |
145 | int dummy; | |
146 | #endif | |
147 | #endif | |
148 | } bytes; | |
149 | }; | |
150 | ||
151 | #if BYTEORDER == 4321 && NOALLIGN == 1 | |
152 | # define input(b,o,c,n,m){ \ | |
153 | (c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \ | |
154 | (o) += (n); \ | |
155 | } | |
156 | #else | |
157 | # define input(b,o,c,n,m){ \ | |
158 | REG1 char_type *p = &(b)[(o)>>3]; \ | |
159 | (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \ | |
160 | ((long)(p[2])<<16))>>((o)&0x7))&(m); \ | |
161 | (o) += (n); \ | |
162 | } | |
163 | #endif | |
164 | ||
165 | #ifndef MAXSEG_64K | |
166 | /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */ | |
167 | # define tab_prefixof(i) tab_prefix[i] | |
168 | # define clear_tab_prefixof() memzero(tab_prefix, 256); | |
169 | #else | |
170 | /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */ | |
171 | /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */ | |
172 | ush *tab_prefix[2]; | |
173 | # define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1] | |
174 | # define clear_tab_prefixof() \ | |
175 | memzero(tab_prefix0, 128), \ | |
176 | memzero(tab_prefix1, 128); | |
177 | #endif | |
178 | #define de_stack ((char_type *)(&d_buf[DIST_BUFSIZE-1])) | |
179 | #define tab_suffixof(i) tab_suffix[i] | |
180 | ||
181 | int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */ | |
182 | ||
183 | /* ============================================================================ | |
184 | * Decompress in to out. This routine adapts to the codes in the | |
185 | * file building the "string" table on-the-fly; requiring no table to | |
186 | * be stored in the compressed file. | |
187 | * IN assertions: the buffer inbuf contains already the beginning of | |
188 | * the compressed data, from offsets iptr to insize-1 included. | |
189 | * The magic header has already been checked and skipped. | |
190 | * bytes_in and bytes_out have been initialized. | |
191 | */ | |
192 | int unlzw(in, out) | |
193 | int in, out; /* input and output file descriptors */ | |
194 | { | |
195 | REG2 char_type *stackp; | |
196 | REG3 code_int code; | |
197 | REG4 int finchar; | |
198 | REG5 code_int oldcode; | |
199 | REG6 code_int incode; | |
200 | REG7 long inbits; | |
201 | REG8 long posbits; | |
202 | REG9 int outpos; | |
203 | /* REG10 int insize; (global) */ | |
204 | REG11 unsigned bitmask; | |
205 | REG12 code_int free_ent; | |
206 | REG13 code_int maxcode; | |
207 | REG14 code_int maxmaxcode; | |
208 | REG15 int n_bits; | |
209 | REG16 int rsize; | |
210 | ||
211 | #ifdef MAXSEG_64K | |
212 | tab_prefix[0] = tab_prefix0; | |
213 | tab_prefix[1] = tab_prefix1; | |
214 | #endif | |
215 | maxbits = get_byte(); | |
216 | block_mode = maxbits & BLOCK_MODE; | |
217 | if ((maxbits & LZW_RESERVED) != 0) { | |
218 | WARN((stderr, "\n%s: %s: warning, unknown flags 0x%x\n", | |
219 | progname, ifname, maxbits & LZW_RESERVED)); | |
220 | } | |
221 | maxbits &= BIT_MASK; | |
222 | maxmaxcode = MAXCODE(maxbits); | |
223 | ||
224 | if (maxbits > BITS) { | |
225 | fprintf(stderr, | |
226 | "\n%s: %s: compressed with %d bits, can only handle %d bits\n", | |
227 | progname, ifname, maxbits, BITS); | |
228 | exit_code = ERROR; | |
229 | return ERROR; | |
230 | } | |
231 | rsize = insize; | |
232 | maxcode = MAXCODE(n_bits = INIT_BITS)-1; | |
233 | bitmask = (1<<n_bits)-1; | |
234 | oldcode = -1; | |
235 | finchar = 0; | |
236 | outpos = 0; | |
237 | posbits = inptr<<3; | |
238 | ||
239 | free_ent = ((block_mode) ? FIRST : 256); | |
240 | ||
241 | clear_tab_prefixof(); /* Initialize the first 256 entries in the table. */ | |
242 | ||
243 | for (code = 255 ; code >= 0 ; --code) { | |
244 | tab_suffixof(code) = (char_type)code; | |
245 | } | |
246 | do { | |
247 | REG1 int i; | |
248 | int e; | |
249 | int o; | |
250 | ||
251 | resetbuf: | |
252 | e = insize-(o = (posbits>>3)); | |
253 | ||
254 | for (i = 0 ; i < e ; ++i) { | |
255 | inbuf[i] = inbuf[i+o]; | |
256 | } | |
257 | insize = e; | |
258 | posbits = 0; | |
259 | ||
260 | if (insize < INBUF_EXTRA) { | |
261 | if ((rsize = read(in, (char*)inbuf+insize, INBUFSIZ)) == EOF) { | |
262 | read_error(); | |
263 | } | |
264 | insize += rsize; | |
caed0dfe | 265 | bytes_in += (ulg)rsize; |
3013fe88 NW |
266 | } |
267 | inbits = ((rsize != 0) ? ((long)insize - insize%n_bits)<<3 : | |
268 | ((long)insize<<3)-(n_bits-1)); | |
269 | ||
270 | while (inbits > posbits) { | |
271 | if (free_ent > maxcode) { | |
272 | posbits = ((posbits-1) + | |
273 | ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3))); | |
274 | ++n_bits; | |
275 | if (n_bits == maxbits) { | |
276 | maxcode = maxmaxcode; | |
277 | } else { | |
278 | maxcode = MAXCODE(n_bits)-1; | |
279 | } | |
280 | bitmask = (1<<n_bits)-1; | |
281 | goto resetbuf; | |
282 | } | |
283 | input(inbuf,posbits,code,n_bits,bitmask); | |
e32dd11d NW |
284 | Tracev((stderr, "%d ", code)); |
285 | ||
3013fe88 | 286 | if (oldcode == -1) { |
e32dd11d | 287 | if (code >= 256) error("corrupt input."); |
3013fe88 NW |
288 | outbuf[outpos++] = (char_type)(finchar = (int)(oldcode=code)); |
289 | continue; | |
290 | } | |
291 | if (code == CLEAR && block_mode) { | |
292 | clear_tab_prefixof(); | |
293 | free_ent = FIRST - 1; | |
294 | posbits = ((posbits-1) + | |
295 | ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3))); | |
296 | maxcode = MAXCODE(n_bits = INIT_BITS)-1; | |
297 | bitmask = (1<<n_bits)-1; | |
298 | goto resetbuf; | |
299 | } | |
300 | incode = code; | |
301 | stackp = de_stack; | |
302 | ||
303 | if (code >= free_ent) { /* Special case for KwKwK string. */ | |
304 | if (code > free_ent) { | |
305 | #ifdef DEBUG | |
306 | char_type *p; | |
307 | ||
308 | posbits -= n_bits; | |
309 | p = &inbuf[posbits>>3]; | |
310 | fprintf(stderr, | |
311 | "code:%ld free_ent:%ld n_bits:%d insize:%u\n", | |
312 | code, free_ent, n_bits, insize); | |
313 | fprintf(stderr, | |
314 | "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n", | |
315 | posbits, p[-1],p[0],p[1],p[2],p[3]); | |
316 | #endif | |
317 | if (!test && outpos > 0) { | |
caed0dfe NW |
318 | write_buf(out, (char*)outbuf, outpos); |
319 | bytes_out += (ulg)outpos; | |
3013fe88 | 320 | } |
e32dd11d NW |
321 | error(to_stdout ? "corrupt input." : |
322 | "corrupt input. Use zcat to recover some data."); | |
3013fe88 NW |
323 | } |
324 | *--stackp = (char_type)finchar; | |
325 | code = oldcode; | |
326 | } | |
327 | ||
328 | while ((cmp_code_int)code >= (cmp_code_int)256) { | |
329 | /* Generate output characters in reverse order */ | |
330 | *--stackp = tab_suffixof(code); | |
331 | code = tab_prefixof(code); | |
332 | } | |
333 | *--stackp = (char_type)(finchar = tab_suffixof(code)); | |
334 | ||
335 | /* And put them out in forward order */ | |
336 | { | |
337 | REG1 int i; | |
338 | ||
339 | if (outpos+(i = (de_stack-stackp)) >= OUTBUFSIZ) { | |
340 | do { | |
341 | if (i > OUTBUFSIZ-outpos) i = OUTBUFSIZ-outpos; | |
342 | ||
343 | if (i > 0) { | |
344 | memcpy(outbuf+outpos, stackp, i); | |
345 | outpos += i; | |
346 | } | |
347 | if (outpos >= OUTBUFSIZ) { | |
caed0dfe NW |
348 | if (!test) { |
349 | write_buf(out, (char*)outbuf, outpos); | |
350 | bytes_out += (ulg)outpos; | |
351 | } | |
3013fe88 NW |
352 | outpos = 0; |
353 | } | |
354 | stackp+= i; | |
355 | } while ((i = (de_stack-stackp)) > 0); | |
356 | } else { | |
357 | memcpy(outbuf+outpos, stackp, i); | |
358 | outpos += i; | |
359 | } | |
360 | } | |
361 | ||
362 | if ((code = free_ent) < maxmaxcode) { /* Generate the new entry. */ | |
363 | ||
364 | tab_prefixof(code) = (unsigned short)oldcode; | |
365 | tab_suffixof(code) = (char_type)finchar; | |
366 | free_ent = code+1; | |
367 | } | |
368 | oldcode = incode; /* Remember previous code. */ | |
369 | } | |
3013fe88 NW |
370 | } while (rsize != 0); |
371 | ||
caed0dfe NW |
372 | if (!test && outpos > 0) { |
373 | write_buf(out, (char*)outbuf, outpos); | |
374 | bytes_out += (ulg)outpos; | |
375 | } | |
3013fe88 NW |
376 | return OK; |
377 | } |