Commit | Line | Data |
---|---|---|
3013fe88 NW |
1 | /* match.s -- optional optimized asm version of longest match in deflate.c |
2 | * Copyright (C) 1992-1993 Jean-loup Gailly | |
3 | * This is free software; you can redistribute it and/or modify it under the | |
4 | * terms of the GNU General Public License, see the file COPYING. | |
5 | * | |
6 | * The 68020 version has been written by Francesco Potorti` <pot@cnuce.cnr.it> | |
7 | * with adaptations by Carsten Steger <stegerc@informatik.tu-muenchen.de>, | |
8 | * Andreas Schwab <schwab@lamothe.informatik.uni-dortmund.de> and | |
9 | * Kristoffer Eriksson <ske@pkmab.se> | |
10 | */ | |
11 | ||
caed0dfe | 12 | /* $Id: match.S,v 0.14 1993/06/11 18:33:24 jloup Exp $ */ |
3013fe88 NW |
13 | |
14 | /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix | |
15 | * external symbols with an underline character '_'. | |
16 | */ | |
17 | #ifdef NO_UNDERLINE | |
18 | # define _prev prev | |
19 | # define _window window | |
20 | # define _match_start match_start | |
21 | # define _prev_length prev_length | |
22 | # define _good_match good_match | |
23 | # define _nice_match nice_match | |
24 | # define _strstart strstart | |
25 | # define _max_chain_length max_chain_length | |
26 | ||
27 | # define _match_init match_init | |
28 | # define _longest_match longest_match | |
29 | #endif | |
30 | ||
31 | #ifdef DYN_ALLOC | |
32 | error: DYN_ALLOC not yet supported in match.s | |
33 | #endif | |
34 | ||
35 | #if defined(i386) || defined(_I386) | |
36 | ||
37 | /* This version is for 386 Unix or OS/2 in 32 bit mode. | |
38 | * Warning: it uses the AT&T syntax: mov source,dest | |
39 | * This file is only optional. If you want to force the C version, | |
40 | * add -DNO_ASM to CFLAGS in Makefile and set OBJA to an empty string. | |
41 | * If you have reduced WSIZE in gzip.h, then change its value below. | |
42 | * This version assumes static allocation of the arrays (-DDYN_ALLOC not used). | |
43 | */ | |
44 | ||
45 | .file "match.S" | |
46 | ||
caed0dfe NW |
47 | #define MAX_MATCH 258 |
48 | #define MAX_MATCH2 $128 /* MAX_MATCH/2-1 */ | |
3013fe88 | 49 | #define MIN_MATCH 3 |
caed0dfe NW |
50 | #define WSIZE $32768 |
51 | #define MAX_DIST WSIZE - MAX_MATCH - MIN_MATCH - 1 | |
3013fe88 NW |
52 | |
53 | .globl _match_init | |
54 | .globl _longest_match | |
55 | ||
56 | .text | |
57 | ||
58 | _match_init: | |
59 | ret | |
60 | ||
61 | /*----------------------------------------------------------------------- | |
62 | * Set match_start to the longest match starting at the given string and | |
63 | * return its length. Matches shorter or equal to prev_length are discarded, | |
64 | * in which case the result is equal to prev_length and match_start is | |
65 | * garbage. | |
66 | * IN assertions: cur_match is the head of the hash chain for the current | |
67 | * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 | |
68 | */ | |
69 | ||
70 | _longest_match: /* int longest_match(cur_match) */ | |
71 | ||
72 | #define cur_match 20(%esp) | |
73 | /* return address */ /* esp+16 */ | |
74 | push %ebp /* esp+12 */ | |
75 | push %edi /* esp+8 */ | |
76 | push %esi /* esp+4 */ | |
77 | push %ebx /* esp */ | |
78 | ||
79 | /* | |
80 | * match equ esi | |
81 | * scan equ edi | |
82 | * chain_length equ ebp | |
83 | * best_len equ ebx | |
84 | * limit equ edx | |
85 | */ | |
86 | mov cur_match,%esi | |
87 | mov _max_chain_length,%ebp /* chain_length = max_chain_length */ | |
88 | mov _strstart,%edi | |
89 | mov %edi,%edx | |
caed0dfe | 90 | sub MAX_DIST,%edx /* limit = strstart-MAX_DIST */ |
3013fe88 NW |
91 | jae limit_ok |
92 | sub %edx,%edx /* limit = NIL */ | |
93 | limit_ok: | |
caed0dfe | 94 | add $2+_window,%edi /* edi = offset(window+strstart+2) */ |
3013fe88 NW |
95 | mov _prev_length,%ebx /* best_len = prev_length */ |
96 | movw -3(%ebx,%edi),%ax /* ax = scan[best_len-1..best_len] */ | |
97 | movw -2(%edi),%cx /* cx = scan[0..1] */ | |
98 | cmp _good_match,%ebx /* do we have a good match already? */ | |
99 | jb do_scan | |
100 | shr $2,%ebp /* chain_length >>= 2 */ | |
101 | jmp do_scan | |
102 | ||
103 | .align 4 | |
104 | long_loop: | |
105 | /* at this point, edi == scan+2, esi == cur_match */ | |
106 | movw -3(%ebx,%edi),%ax /* ax = scan[best_len-1..best_len] */ | |
107 | movw -2(%edi),%cx /* cx = scan[0..1] */ | |
108 | short_loop: | |
109 | /* | |
110 | * at this point, di == scan+2, si == cur_match, | |
111 | * ax = scan[best_len-1..best_len] and cx = scan[0..1] | |
112 | */ | |
caed0dfe | 113 | and WSIZE-1, %esi |
3013fe88 NW |
114 | movw _prev(%esi,%esi),%si /* cur_match = prev[cur_match] */ |
115 | /* top word of esi is still 0 */ | |
116 | cmp %edx,%esi /* cur_match <= limit ? */ | |
117 | jbe the_end | |
118 | dec %ebp /* --chain_length */ | |
119 | jz the_end | |
120 | do_scan: | |
121 | cmpw _window-1(%ebx,%esi),%ax/* check match at best_len-1 */ | |
122 | jne short_loop | |
123 | cmpw _window(%esi),%cx /* check min_match_length match */ | |
124 | jne short_loop | |
125 | ||
126 | lea _window+2(%esi),%esi /* si = match */ | |
127 | mov %edi,%eax /* ax = scan+2 */ | |
caed0dfe | 128 | mov MAX_MATCH2,%ecx /* scan for at most MAX_MATCH bytes */ |
3013fe88 NW |
129 | rep; cmpsw /* loop until mismatch */ |
130 | je maxmatch /* match of length MAX_MATCH? */ | |
131 | mismatch: | |
132 | movb -2(%edi),%cl /* mismatch on first or second byte? */ | |
133 | subb -2(%esi),%cl /* cl = 0 if first bytes equal */ | |
134 | xchg %edi,%eax /* edi = scan+2, eax = end of scan */ | |
135 | sub %edi,%eax /* eax = len */ | |
136 | sub %eax,%esi /* esi = cur_match + 2 + offset(window) */ | |
caed0dfe | 137 | sub $2+_window,%esi /* esi = cur_match */ |
3013fe88 NW |
138 | subb $1,%cl /* set carry if cl == 0 (cannot use DEC) */ |
139 | adc $0,%eax /* eax = carry ? len+1 : len */ | |
140 | cmp %ebx,%eax /* len > best_len ? */ | |
141 | jle long_loop | |
142 | mov %esi,_match_start /* match_start = cur_match */ | |
143 | mov %eax,%ebx /* ebx = best_len = len */ | |
144 | cmp _nice_match,%eax /* len >= nice_match ? */ | |
145 | jl long_loop | |
146 | the_end: | |
147 | mov %ebx,%eax /* result = eax = best_len */ | |
148 | pop %ebx | |
149 | pop %esi | |
150 | pop %edi | |
151 | pop %ebp | |
152 | ret | |
153 | maxmatch: | |
154 | cmpsb | |
155 | jmp mismatch | |
156 | ||
157 | #else | |
158 | ||
159 | /* ======================== 680x0 version ================================= */ | |
160 | ||
161 | #if defined(m68k)||defined(mc68k)||defined(__mc68000__)||defined(__MC68000__) | |
162 | # ifndef mc68000 | |
163 | # define mc68000 | |
164 | # endif | |
165 | #endif | |
166 | ||
167 | #if defined(__mc68020__) || defined(__MC68020__) || defined(sysV68) | |
168 | # ifndef mc68020 | |
169 | # define mc68020 | |
170 | # endif | |
171 | #endif | |
172 | ||
173 | #if defined(mc68020) || defined(mc68000) | |
174 | ||
175 | #if (defined(mc68020) || defined(NeXT)) && !defined(UNALIGNED_OK) | |
176 | # define UNALIGNED_OK | |
177 | #endif | |
178 | ||
179 | #ifdef sysV68 /* Try Motorola Delta style */ | |
180 | ||
181 | # define GLOBAL(symbol) global symbol | |
182 | # define TEXT text | |
183 | # define FILE(filename) file filename | |
184 | # define invert_maybe(src,dst) dst,src | |
185 | # define imm(data) &data | |
186 | # define reg(register) %register | |
187 | ||
188 | # define addl add.l | |
189 | # define addql addq.l | |
190 | # define blos blo.b | |
191 | # define bhis bhi.b | |
192 | # define bras bra.b | |
193 | # define clrl clr.l | |
194 | # define cmpmb cmpm.b | |
195 | # define cmpw cmp.w | |
196 | # define cmpl cmp.l | |
197 | # define lslw lsl.w | |
198 | # define lsrl lsr.l | |
199 | # define movel move.l | |
200 | # define movew move.w | |
201 | # define moveb move.b | |
202 | # define moveml movem.l | |
203 | # define subl sub.l | |
204 | # define subw sub.w | |
205 | # define subql subq.l | |
206 | ||
207 | # define IndBase(bd,An) (bd,An) | |
208 | # define IndBaseNdxl(bd,An,Xn) (bd,An,Xn.l) | |
209 | # define IndBaseNdxw(bd,An,Xn) (bd,An,Xn.w) | |
210 | # define predec(An) -(An) | |
211 | # define postinc(An) (An)+ | |
212 | ||
213 | #else /* default style (Sun 3, NeXT, Amiga, Atari) */ | |
214 | ||
215 | # define GLOBAL(symbol) .globl symbol | |
216 | # define TEXT .text | |
217 | # define FILE(filename) .even | |
218 | # define invert_maybe(src,dst) src,dst | |
219 | # if defined(sun) || defined(mc68k) | |
220 | # define imm(data) #data | |
221 | # else | |
222 | # define imm(data) \#data | |
223 | # endif | |
224 | # define reg(register) register | |
225 | ||
226 | # define blos bcss | |
227 | # if defined(sun) || defined(mc68k) | |
228 | # define movel movl | |
229 | # define movew movw | |
230 | # define moveb movb | |
231 | # endif | |
232 | # define IndBase(bd,An) An@(bd) | |
233 | # define IndBaseNdxl(bd,An,Xn) An@(bd,Xn:l) | |
234 | # define IndBaseNdxw(bd,An,Xn) An@(bd,Xn:w) | |
235 | # define predec(An) An@- | |
236 | # define postinc(An) An@+ | |
237 | ||
238 | #endif /* styles */ | |
239 | ||
240 | #define Best_Len reg(d0) /* unsigned */ | |
241 | #define Cur_Match reg(d1) /* Ipos */ | |
242 | #define Loop_Counter reg(d2) /* int */ | |
243 | #define Scan_Start reg(d3) /* unsigned short */ | |
244 | #define Scan_End reg(d4) /* unsigned short */ | |
245 | #define Limit reg(d5) /* IPos */ | |
246 | #define Chain_Length reg(d6) /* unsigned */ | |
247 | #define Scan_Test reg(d7) | |
248 | #define Scan reg(a0) /* *uch */ | |
249 | #define Match reg(a1) /* *uch */ | |
250 | #define Prev_Address reg(a2) /* *Pos */ | |
251 | #define Scan_Ini reg(a3) /* *uch */ | |
252 | #define Match_Ini reg(a4) /* *uch */ | |
253 | #define Stack_Pointer reg(sp) | |
254 | ||
255 | #define MAX_MATCH 258 | |
256 | #define MIN_MATCH 3 | |
257 | #define WSIZE 32768 | |
258 | #define MAX_DIST (WSIZE - MAX_MATCH - MIN_MATCH - 1) | |
259 | ||
260 | GLOBAL (_match_init) | |
261 | GLOBAL (_longest_match) | |
262 | ||
263 | TEXT | |
264 | ||
265 | FILE ("match.S") | |
266 | ||
267 | _match_init: | |
268 | rts | |
269 | ||
270 | /*----------------------------------------------------------------------- | |
271 | * Set match_start to the longest match starting at the given string and | |
272 | * return its length. Matches shorter or equal to prev_length are discarded, | |
273 | * in which case the result is equal to prev_length and match_start is | |
274 | * garbage. | |
275 | * IN assertions: cur_match is the head of the hash chain for the current | |
276 | * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 | |
277 | */ | |
278 | ||
279 | /* int longest_match (cur_match) */ | |
280 | ||
281 | #ifdef UNALIGNED_OK | |
282 | # define pushreg 15928 /* d2-d6/a2-a4 */ | |
283 | # define popreg 7292 | |
284 | #else | |
285 | # define pushreg 16184 /* d2-d7/a2-a4 */ | |
286 | # define popreg 7420 | |
287 | #endif | |
288 | ||
289 | _longest_match: | |
290 | movel IndBase(4,Stack_Pointer),Cur_Match | |
291 | moveml imm(pushreg),predec(Stack_Pointer) | |
292 | movel _max_chain_length,Chain_Length | |
293 | movel _prev_length,Best_Len | |
294 | movel imm(_prev),Prev_Address | |
295 | movel imm(_window+MIN_MATCH),Match_Ini | |
296 | movel _strstart,Limit | |
297 | movel Match_Ini,Scan_Ini | |
298 | addl Limit,Scan_Ini | |
299 | subw imm(MAX_DIST),Limit | |
300 | bhis L__limit_ok | |
301 | clrl Limit | |
302 | L__limit_ok: | |
303 | cmpl invert_maybe(_good_match,Best_Len) | |
304 | blos L__length_ok | |
305 | lsrl imm(2),Chain_Length | |
306 | L__length_ok: | |
307 | subql imm(1),Chain_Length | |
308 | #ifdef UNALIGNED_OK | |
309 | movew IndBase(-MIN_MATCH,Scan_Ini),Scan_Start | |
310 | movew IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End | |
311 | #else | |
312 | moveb IndBase(-MIN_MATCH,Scan_Ini),Scan_Start | |
313 | lslw imm(8),Scan_Start | |
314 | moveb IndBase(-MIN_MATCH+1,Scan_Ini),Scan_Start | |
315 | moveb IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End | |
316 | lslw imm(8),Scan_End | |
317 | moveb IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End | |
318 | #endif | |
319 | bras L__do_scan | |
320 | ||
321 | L__long_loop: | |
322 | #ifdef UNALIGNED_OK | |
323 | movew IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End | |
324 | #else | |
325 | moveb IndBaseNdxw(-MIN_MATCH-1,Scan_Ini,Best_Len),Scan_End | |
326 | lslw imm(8),Scan_End | |
327 | moveb IndBaseNdxw(-MIN_MATCH,Scan_Ini,Best_Len),Scan_End | |
328 | #endif | |
329 | ||
330 | L__short_loop: | |
331 | lslw imm(1),Cur_Match | |
332 | movew IndBaseNdxl(0,Prev_Address,Cur_Match),Cur_Match | |
333 | cmpw invert_maybe(Limit,Cur_Match) | |
334 | dbls Chain_Length,L__do_scan | |
335 | bras L__return | |
336 | ||
337 | L__do_scan: | |
338 | movel Match_Ini,Match | |
339 | addl Cur_Match,Match | |
340 | #ifdef UNALIGNED_OK | |
341 | cmpw invert_maybe(IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_End) | |
342 | bne L__short_loop | |
343 | cmpw invert_maybe(IndBase(-MIN_MATCH,Match),Scan_Start) | |
344 | bne L__short_loop | |
345 | #else | |
346 | moveb IndBaseNdxw(-MIN_MATCH-1,Match,Best_Len),Scan_Test | |
347 | lslw imm(8),Scan_Test | |
348 | moveb IndBaseNdxw(-MIN_MATCH,Match,Best_Len),Scan_Test | |
349 | cmpw invert_maybe(Scan_Test,Scan_End) | |
350 | bne L__short_loop | |
351 | moveb IndBase(-MIN_MATCH,Match),Scan_Test | |
352 | lslw imm(8),Scan_Test | |
353 | moveb IndBase(-MIN_MATCH+1,Match),Scan_Test | |
354 | cmpw invert_maybe(Scan_Test,Scan_Start) | |
355 | bne L__short_loop | |
356 | #endif | |
357 | ||
358 | movew imm((MAX_MATCH-MIN_MATCH+1)-1),Loop_Counter | |
359 | movel Scan_Ini,Scan | |
360 | L__scan_loop: | |
361 | cmpmb postinc(Match),postinc(Scan) | |
362 | dbne Loop_Counter,L__scan_loop | |
363 | ||
364 | subl Scan_Ini,Scan | |
365 | addql imm(MIN_MATCH-1),Scan | |
366 | cmpl invert_maybe(Best_Len,Scan) | |
367 | bls L__short_loop | |
368 | movel Scan,Best_Len | |
369 | movel Cur_Match,_match_start | |
370 | cmpl invert_maybe(_nice_match,Best_Len) | |
371 | blos L__long_loop | |
372 | L__return: | |
373 | moveml postinc(Stack_Pointer),imm(popreg) | |
374 | rts | |
375 | ||
376 | #else | |
377 | error: this asm version is for 386 or 680x0 only | |
378 | #endif /* mc68000 || mc68020 */ | |
379 | #endif /* i386 || _I386 */ |