Commit | Line | Data |
---|---|---|
86041518 WJ |
1 | /* tblcmp - table compression routines */ |
2 | ||
3 | /*- | |
4 | * Copyright (c) 1990 The Regents of the University of California. | |
5 | * All rights reserved. | |
6 | * | |
7 | * This code is derived from software contributed to Berkeley by | |
8 | * Vern Paxson of Lawrence Berkeley Laboratory. | |
9 | * | |
10 | * The United States Government has rights in this work pursuant | |
11 | * to contract no. DE-AC03-76SF00098 between the United States | |
12 | * Department of Energy and the University of California. | |
13 | * | |
14 | * Redistribution and use in source and binary forms, with or without | |
15 | * modification, are permitted provided that the following conditions | |
16 | * are met: | |
17 | * 1. Redistributions of source code must retain the above copyright | |
18 | * notice, this list of conditions and the following disclaimer. | |
19 | * 2. Redistributions in binary form must reproduce the above copyright | |
20 | * notice, this list of conditions and the following disclaimer in the | |
21 | * documentation and/or other materials provided with the distribution. | |
22 | * 3. All advertising materials mentioning features or use of this software | |
23 | * must display the following acknowledgement: | |
24 | * This product includes software developed by the University of | |
25 | * California, Berkeley and its contributors. | |
26 | * 4. Neither the name of the University nor the names of its contributors | |
27 | * may be used to endorse or promote products derived from this software | |
28 | * without specific prior written permission. | |
29 | * | |
30 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
32 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
33 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
34 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
35 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
36 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
37 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
38 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
39 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
40 | * SUCH DAMAGE. | |
41 | */ | |
42 | ||
43 | #ifndef lint | |
44 | static char sccsid[] = "@(#)tblcmp.c 5.2 (Berkeley) 6/18/90"; | |
45 | #endif /* not lint */ | |
46 | ||
47 | #include "flexdef.h" | |
48 | ||
49 | /* declarations for functions that have forward references */ | |
50 | ||
51 | void mkentry PROTO((register int*, int, int, int, int)); | |
52 | void mkprot PROTO((int[], int, int)); | |
53 | void mktemplate PROTO((int[], int, int)); | |
54 | void mv2front PROTO((int)); | |
55 | int tbldiff PROTO((int[], int, int[])); | |
56 | ||
57 | ||
58 | /* bldtbl - build table entries for dfa state | |
59 | * | |
60 | * synopsis | |
61 | * int state[numecs], statenum, totaltrans, comstate, comfreq; | |
62 | * bldtbl( state, statenum, totaltrans, comstate, comfreq ); | |
63 | * | |
64 | * State is the statenum'th dfa state. It is indexed by equivalence class and | |
65 | * gives the number of the state to enter for a given equivalence class. | |
66 | * totaltrans is the total number of transitions out of the state. Comstate | |
67 | * is that state which is the destination of the most transitions out of State. | |
68 | * Comfreq is how many transitions there are out of State to Comstate. | |
69 | * | |
70 | * A note on terminology: | |
71 | * "protos" are transition tables which have a high probability of | |
72 | * either being redundant (a state processed later will have an identical | |
73 | * transition table) or nearly redundant (a state processed later will have | |
74 | * many of the same out-transitions). A "most recently used" queue of | |
75 | * protos is kept around with the hope that most states will find a proto | |
76 | * which is similar enough to be usable, and therefore compacting the | |
77 | * output tables. | |
78 | * "templates" are a special type of proto. If a transition table is | |
79 | * homogeneous or nearly homogeneous (all transitions go to the same | |
80 | * destination) then the odds are good that future states will also go | |
81 | * to the same destination state on basically the same character set. | |
82 | * These homogeneous states are so common when dealing with large rule | |
83 | * sets that they merit special attention. If the transition table were | |
84 | * simply made into a proto, then (typically) each subsequent, similar | |
85 | * state will differ from the proto for two out-transitions. One of these | |
86 | * out-transitions will be that character on which the proto does not go | |
87 | * to the common destination, and one will be that character on which the | |
88 | * state does not go to the common destination. Templates, on the other | |
89 | * hand, go to the common state on EVERY transition character, and therefore | |
90 | * cost only one difference. | |
91 | */ | |
92 | ||
93 | void bldtbl( state, statenum, totaltrans, comstate, comfreq ) | |
94 | int state[], statenum, totaltrans, comstate, comfreq; | |
95 | ||
96 | { | |
97 | int extptr, extrct[2][CSIZE + 1]; | |
98 | int mindiff, minprot, i, d; | |
99 | int checkcom; | |
100 | ||
101 | /* If extptr is 0 then the first array of extrct holds the result of the | |
102 | * "best difference" to date, which is those transitions which occur in | |
103 | * "state" but not in the proto which, to date, has the fewest differences | |
104 | * between itself and "state". If extptr is 1 then the second array of | |
105 | * extrct hold the best difference. The two arrays are toggled | |
106 | * between so that the best difference to date can be kept around and | |
107 | * also a difference just created by checking against a candidate "best" | |
108 | * proto. | |
109 | */ | |
110 | ||
111 | extptr = 0; | |
112 | ||
113 | /* if the state has too few out-transitions, don't bother trying to | |
114 | * compact its tables | |
115 | */ | |
116 | ||
117 | if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) | |
118 | mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); | |
119 | ||
120 | else | |
121 | { | |
122 | /* checkcom is true if we should only check "state" against | |
123 | * protos which have the same "comstate" value | |
124 | */ | |
125 | ||
126 | checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; | |
127 | ||
128 | minprot = firstprot; | |
129 | mindiff = totaltrans; | |
130 | ||
131 | if ( checkcom ) | |
132 | { | |
133 | /* find first proto which has the same "comstate" */ | |
134 | for ( i = firstprot; i != NIL; i = protnext[i] ) | |
135 | if ( protcomst[i] == comstate ) | |
136 | { | |
137 | minprot = i; | |
138 | mindiff = tbldiff( state, minprot, extrct[extptr] ); | |
139 | break; | |
140 | } | |
141 | } | |
142 | ||
143 | else | |
144 | { | |
145 | /* since we've decided that the most common destination out | |
146 | * of "state" does not occur with a high enough frequency, | |
147 | * we set the "comstate" to zero, assuring that if this state | |
148 | * is entered into the proto list, it will not be considered | |
149 | * a template. | |
150 | */ | |
151 | comstate = 0; | |
152 | ||
153 | if ( firstprot != NIL ) | |
154 | { | |
155 | minprot = firstprot; | |
156 | mindiff = tbldiff( state, minprot, extrct[extptr] ); | |
157 | } | |
158 | } | |
159 | ||
160 | /* we now have the first interesting proto in "minprot". If | |
161 | * it matches within the tolerances set for the first proto, | |
162 | * we don't want to bother scanning the rest of the proto list | |
163 | * to see if we have any other reasonable matches. | |
164 | */ | |
165 | ||
166 | if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) | |
167 | { /* not a good enough match. Scan the rest of the protos */ | |
168 | for ( i = minprot; i != NIL; i = protnext[i] ) | |
169 | { | |
170 | d = tbldiff( state, i, extrct[1 - extptr] ); | |
171 | if ( d < mindiff ) | |
172 | { | |
173 | extptr = 1 - extptr; | |
174 | mindiff = d; | |
175 | minprot = i; | |
176 | } | |
177 | } | |
178 | } | |
179 | ||
180 | /* check if the proto we've decided on as our best bet is close | |
181 | * enough to the state we want to match to be usable | |
182 | */ | |
183 | ||
184 | if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) | |
185 | { | |
186 | /* no good. If the state is homogeneous enough, we make a | |
187 | * template out of it. Otherwise, we make a proto. | |
188 | */ | |
189 | ||
190 | if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE ) | |
191 | mktemplate( state, statenum, comstate ); | |
192 | ||
193 | else | |
194 | { | |
195 | mkprot( state, statenum, comstate ); | |
196 | mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); | |
197 | } | |
198 | } | |
199 | ||
200 | else | |
201 | { /* use the proto */ | |
202 | mkentry( extrct[extptr], numecs, statenum, | |
203 | prottbl[minprot], mindiff ); | |
204 | ||
205 | /* if this state was sufficiently different from the proto | |
206 | * we built it from, make it, too, a proto | |
207 | */ | |
208 | ||
209 | if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) | |
210 | mkprot( state, statenum, comstate ); | |
211 | ||
212 | /* since mkprot added a new proto to the proto queue, it's possible | |
213 | * that "minprot" is no longer on the proto queue (if it happened | |
214 | * to have been the last entry, it would have been bumped off). | |
215 | * If it's not there, then the new proto took its physical place | |
216 | * (though logically the new proto is at the beginning of the | |
217 | * queue), so in that case the following call will do nothing. | |
218 | */ | |
219 | ||
220 | mv2front( minprot ); | |
221 | } | |
222 | } | |
223 | } | |
224 | ||
225 | ||
226 | /* cmptmps - compress template table entries | |
227 | * | |
228 | * synopsis | |
229 | * cmptmps(); | |
230 | * | |
231 | * template tables are compressed by using the 'template equivalence | |
232 | * classes', which are collections of transition character equivalence | |
233 | * classes which always appear together in templates - really meta-equivalence | |
234 | * classes. until this point, the tables for templates have been stored | |
235 | * up at the top end of the nxt array; they will now be compressed and have | |
236 | * table entries made for them. | |
237 | */ | |
238 | ||
239 | void cmptmps() | |
240 | ||
241 | { | |
242 | int tmpstorage[CSIZE + 1]; | |
243 | register int *tmp = tmpstorage, i, j; | |
244 | int totaltrans, trans; | |
245 | ||
246 | peakpairs = numtemps * numecs + tblend; | |
247 | ||
248 | if ( usemecs ) | |
249 | { | |
250 | /* create equivalence classes base on data gathered on template | |
251 | * transitions | |
252 | */ | |
253 | ||
254 | nummecs = cre8ecs( tecfwd, tecbck, numecs ); | |
255 | } | |
256 | ||
257 | else | |
258 | nummecs = numecs; | |
259 | ||
260 | if ( lastdfa + numtemps + 1 >= current_max_dfas ) | |
261 | increase_max_dfas(); | |
262 | ||
263 | /* loop through each template */ | |
264 | ||
265 | for ( i = 1; i <= numtemps; ++i ) | |
266 | { | |
267 | totaltrans = 0; /* number of non-jam transitions out of this template */ | |
268 | ||
269 | for ( j = 1; j <= numecs; ++j ) | |
270 | { | |
271 | trans = tnxt[numecs * i + j]; | |
272 | ||
273 | if ( usemecs ) | |
274 | { | |
275 | /* the absolute value of tecbck is the meta-equivalence class | |
276 | * of a given equivalence class, as set up by cre8ecs | |
277 | */ | |
278 | if ( tecbck[j] > 0 ) | |
279 | { | |
280 | tmp[tecbck[j]] = trans; | |
281 | ||
282 | if ( trans > 0 ) | |
283 | ++totaltrans; | |
284 | } | |
285 | } | |
286 | ||
287 | else | |
288 | { | |
289 | tmp[j] = trans; | |
290 | ||
291 | if ( trans > 0 ) | |
292 | ++totaltrans; | |
293 | } | |
294 | } | |
295 | ||
296 | /* it is assumed (in a rather subtle way) in the skeleton that | |
297 | * if we're using meta-equivalence classes, the def[] entry for | |
298 | * all templates is the jam template, i.e., templates never default | |
299 | * to other non-jam table entries (e.g., another template) | |
300 | */ | |
301 | ||
302 | /* leave room for the jam-state after the last real state */ | |
303 | mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); | |
304 | } | |
305 | } | |
306 | ||
307 | ||
308 | ||
309 | /* expand_nxt_chk - expand the next check arrays */ | |
310 | ||
311 | void expand_nxt_chk() | |
312 | ||
313 | { | |
314 | register int old_max = current_max_xpairs; | |
315 | ||
316 | current_max_xpairs += MAX_XPAIRS_INCREMENT; | |
317 | ||
318 | ++num_reallocs; | |
319 | ||
320 | nxt = reallocate_integer_array( nxt, current_max_xpairs ); | |
321 | chk = reallocate_integer_array( chk, current_max_xpairs ); | |
322 | ||
323 | bzero( (char *) (chk + old_max), | |
324 | MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); | |
325 | } | |
326 | ||
327 | ||
328 | /* find_table_space - finds a space in the table for a state to be placed | |
329 | * | |
330 | * synopsis | |
331 | * int *state, numtrans, block_start; | |
332 | * int find_table_space(); | |
333 | * | |
334 | * block_start = find_table_space( state, numtrans ); | |
335 | * | |
336 | * State is the state to be added to the full speed transition table. | |
337 | * Numtrans is the number of out-transitions for the state. | |
338 | * | |
339 | * find_table_space() returns the position of the start of the first block (in | |
340 | * chk) able to accommodate the state | |
341 | * | |
342 | * In determining if a state will or will not fit, find_table_space() must take | |
343 | * into account the fact that an end-of-buffer state will be added at [0], | |
344 | * and an action number will be added in [-1]. | |
345 | */ | |
346 | ||
347 | int find_table_space( state, numtrans ) | |
348 | int *state, numtrans; | |
349 | ||
350 | { | |
351 | /* firstfree is the position of the first possible occurrence of two | |
352 | * consecutive unused records in the chk and nxt arrays | |
353 | */ | |
354 | register int i; | |
355 | register int *state_ptr, *chk_ptr; | |
356 | register int *ptr_to_last_entry_in_state; | |
357 | ||
358 | /* if there are too many out-transitions, put the state at the end of | |
359 | * nxt and chk | |
360 | */ | |
361 | if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) | |
362 | { | |
363 | /* if table is empty, return the first available spot in chk/nxt, | |
364 | * which should be 1 | |
365 | */ | |
366 | if ( tblend < 2 ) | |
367 | return ( 1 ); | |
368 | ||
369 | i = tblend - numecs; /* start searching for table space near the | |
370 | * end of chk/nxt arrays | |
371 | */ | |
372 | } | |
373 | ||
374 | else | |
375 | i = firstfree; /* start searching for table space from the | |
376 | * beginning (skipping only the elements | |
377 | * which will definitely not hold the new | |
378 | * state) | |
379 | */ | |
380 | ||
381 | while ( 1 ) /* loops until a space is found */ | |
382 | { | |
383 | if ( i + numecs > current_max_xpairs ) | |
384 | expand_nxt_chk(); | |
385 | ||
386 | /* loops until space for end-of-buffer and action number are found */ | |
387 | while ( 1 ) | |
388 | { | |
389 | if ( chk[i - 1] == 0 ) /* check for action number space */ | |
390 | { | |
391 | if ( chk[i] == 0 ) /* check for end-of-buffer space */ | |
392 | break; | |
393 | ||
394 | else | |
395 | i += 2; /* since i != 0, there is no use checking to | |
396 | * see if (++i) - 1 == 0, because that's the | |
397 | * same as i == 0, so we skip a space | |
398 | */ | |
399 | } | |
400 | ||
401 | else | |
402 | ++i; | |
403 | ||
404 | if ( i + numecs > current_max_xpairs ) | |
405 | expand_nxt_chk(); | |
406 | } | |
407 | ||
408 | /* if we started search from the beginning, store the new firstfree for | |
409 | * the next call of find_table_space() | |
410 | */ | |
411 | if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) | |
412 | firstfree = i + 1; | |
413 | ||
414 | /* check to see if all elements in chk (and therefore nxt) that are | |
415 | * needed for the new state have not yet been taken | |
416 | */ | |
417 | ||
418 | state_ptr = &state[1]; | |
419 | ptr_to_last_entry_in_state = &chk[i + numecs + 1]; | |
420 | ||
421 | for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state; | |
422 | ++chk_ptr ) | |
423 | if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) | |
424 | break; | |
425 | ||
426 | if ( chk_ptr == ptr_to_last_entry_in_state ) | |
427 | return ( i ); | |
428 | ||
429 | else | |
430 | ++i; | |
431 | } | |
432 | } | |
433 | ||
434 | ||
435 | /* inittbl - initialize transition tables | |
436 | * | |
437 | * synopsis | |
438 | * inittbl(); | |
439 | * | |
440 | * Initializes "firstfree" to be one beyond the end of the table. Initializes | |
441 | * all "chk" entries to be zero. Note that templates are built in their | |
442 | * own tbase/tdef tables. They are shifted down to be contiguous | |
443 | * with the non-template entries during table generation. | |
444 | */ | |
445 | void inittbl() | |
446 | ||
447 | { | |
448 | register int i; | |
449 | ||
450 | bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) ); | |
451 | ||
452 | tblend = 0; | |
453 | firstfree = tblend + 1; | |
454 | numtemps = 0; | |
455 | ||
456 | if ( usemecs ) | |
457 | { | |
458 | /* set up doubly-linked meta-equivalence classes | |
459 | * these are sets of equivalence classes which all have identical | |
460 | * transitions out of TEMPLATES | |
461 | */ | |
462 | ||
463 | tecbck[1] = NIL; | |
464 | ||
465 | for ( i = 2; i <= numecs; ++i ) | |
466 | { | |
467 | tecbck[i] = i - 1; | |
468 | tecfwd[i - 1] = i; | |
469 | } | |
470 | ||
471 | tecfwd[numecs] = NIL; | |
472 | } | |
473 | } | |
474 | ||
475 | ||
476 | /* mkdeftbl - make the default, "jam" table entries | |
477 | * | |
478 | * synopsis | |
479 | * mkdeftbl(); | |
480 | */ | |
481 | ||
482 | void mkdeftbl() | |
483 | ||
484 | { | |
485 | int i; | |
486 | ||
487 | jamstate = lastdfa + 1; | |
488 | ||
489 | ++tblend; /* room for transition on end-of-buffer character */ | |
490 | ||
491 | if ( tblend + numecs > current_max_xpairs ) | |
492 | expand_nxt_chk(); | |
493 | ||
494 | /* add in default end-of-buffer transition */ | |
495 | nxt[tblend] = end_of_buffer_state; | |
496 | chk[tblend] = jamstate; | |
497 | ||
498 | for ( i = 1; i <= numecs; ++i ) | |
499 | { | |
500 | nxt[tblend + i] = 0; | |
501 | chk[tblend + i] = jamstate; | |
502 | } | |
503 | ||
504 | jambase = tblend; | |
505 | ||
506 | base[jamstate] = jambase; | |
507 | def[jamstate] = 0; | |
508 | ||
509 | tblend += numecs; | |
510 | ++numtemps; | |
511 | } | |
512 | ||
513 | ||
514 | /* mkentry - create base/def and nxt/chk entries for transition array | |
515 | * | |
516 | * synopsis | |
517 | * int state[numchars + 1], numchars, statenum, deflink, totaltrans; | |
518 | * mkentry( state, numchars, statenum, deflink, totaltrans ); | |
519 | * | |
520 | * "state" is a transition array "numchars" characters in size, "statenum" | |
521 | * is the offset to be used into the base/def tables, and "deflink" is the | |
522 | * entry to put in the "def" table entry. If "deflink" is equal to | |
523 | * "JAMSTATE", then no attempt will be made to fit zero entries of "state" | |
524 | * (i.e., jam entries) into the table. It is assumed that by linking to | |
525 | * "JAMSTATE" they will be taken care of. In any case, entries in "state" | |
526 | * marking transitions to "SAME_TRANS" are treated as though they will be | |
527 | * taken care of by whereever "deflink" points. "totaltrans" is the total | |
528 | * number of transitions out of the state. If it is below a certain threshold, | |
529 | * the tables are searched for an interior spot that will accommodate the | |
530 | * state array. | |
531 | */ | |
532 | ||
533 | void mkentry( state, numchars, statenum, deflink, totaltrans ) | |
534 | register int *state; | |
535 | int numchars, statenum, deflink, totaltrans; | |
536 | ||
537 | { | |
538 | register int minec, maxec, i, baseaddr; | |
539 | int tblbase, tbllast; | |
540 | ||
541 | if ( totaltrans == 0 ) | |
542 | { /* there are no out-transitions */ | |
543 | if ( deflink == JAMSTATE ) | |
544 | base[statenum] = JAMSTATE; | |
545 | else | |
546 | base[statenum] = 0; | |
547 | ||
548 | def[statenum] = deflink; | |
549 | return; | |
550 | } | |
551 | ||
552 | for ( minec = 1; minec <= numchars; ++minec ) | |
553 | { | |
554 | if ( state[minec] != SAME_TRANS ) | |
555 | if ( state[minec] != 0 || deflink != JAMSTATE ) | |
556 | break; | |
557 | } | |
558 | ||
559 | if ( totaltrans == 1 ) | |
560 | { | |
561 | /* there's only one out-transition. Save it for later to fill | |
562 | * in holes in the tables. | |
563 | */ | |
564 | stack1( statenum, minec, state[minec], deflink ); | |
565 | return; | |
566 | } | |
567 | ||
568 | for ( maxec = numchars; maxec > 0; --maxec ) | |
569 | { | |
570 | if ( state[maxec] != SAME_TRANS ) | |
571 | if ( state[maxec] != 0 || deflink != JAMSTATE ) | |
572 | break; | |
573 | } | |
574 | ||
575 | /* Whether we try to fit the state table in the middle of the table | |
576 | * entries we have already generated, or if we just take the state | |
577 | * table at the end of the nxt/chk tables, we must make sure that we | |
578 | * have a valid base address (i.e., non-negative). Note that not only are | |
579 | * negative base addresses dangerous at run-time (because indexing the | |
580 | * next array with one and a low-valued character might generate an | |
581 | * array-out-of-bounds error message), but at compile-time negative | |
582 | * base addresses denote TEMPLATES. | |
583 | */ | |
584 | ||
585 | /* find the first transition of state that we need to worry about. */ | |
586 | if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) | |
587 | { /* attempt to squeeze it into the middle of the tabls */ | |
588 | baseaddr = firstfree; | |
589 | ||
590 | while ( baseaddr < minec ) | |
591 | { | |
592 | /* using baseaddr would result in a negative base address below | |
593 | * find the next free slot | |
594 | */ | |
595 | for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) | |
596 | ; | |
597 | } | |
598 | ||
599 | if ( baseaddr + maxec - minec >= current_max_xpairs ) | |
600 | expand_nxt_chk(); | |
601 | ||
602 | for ( i = minec; i <= maxec; ++i ) | |
603 | if ( state[i] != SAME_TRANS ) | |
604 | if ( state[i] != 0 || deflink != JAMSTATE ) | |
605 | if ( chk[baseaddr + i - minec] != 0 ) | |
606 | { /* baseaddr unsuitable - find another */ | |
607 | for ( ++baseaddr; | |
608 | baseaddr < current_max_xpairs && | |
609 | chk[baseaddr] != 0; | |
610 | ++baseaddr ) | |
611 | ; | |
612 | ||
613 | if ( baseaddr + maxec - minec >= current_max_xpairs ) | |
614 | expand_nxt_chk(); | |
615 | ||
616 | /* reset the loop counter so we'll start all | |
617 | * over again next time it's incremented | |
618 | */ | |
619 | ||
620 | i = minec - 1; | |
621 | } | |
622 | } | |
623 | ||
624 | else | |
625 | { | |
626 | /* ensure that the base address we eventually generate is | |
627 | * non-negative | |
628 | */ | |
629 | baseaddr = max( tblend + 1, minec ); | |
630 | } | |
631 | ||
632 | tblbase = baseaddr - minec; | |
633 | tbllast = tblbase + maxec; | |
634 | ||
635 | if ( tbllast >= current_max_xpairs ) | |
636 | expand_nxt_chk(); | |
637 | ||
638 | base[statenum] = tblbase; | |
639 | def[statenum] = deflink; | |
640 | ||
641 | for ( i = minec; i <= maxec; ++i ) | |
642 | if ( state[i] != SAME_TRANS ) | |
643 | if ( state[i] != 0 || deflink != JAMSTATE ) | |
644 | { | |
645 | nxt[tblbase + i] = state[i]; | |
646 | chk[tblbase + i] = statenum; | |
647 | } | |
648 | ||
649 | if ( baseaddr == firstfree ) | |
650 | /* find next free slot in tables */ | |
651 | for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) | |
652 | ; | |
653 | ||
654 | tblend = max( tblend, tbllast ); | |
655 | } | |
656 | ||
657 | ||
658 | /* mk1tbl - create table entries for a state (or state fragment) which | |
659 | * has only one out-transition | |
660 | * | |
661 | * synopsis | |
662 | * int state, sym, onenxt, onedef; | |
663 | * mk1tbl( state, sym, onenxt, onedef ); | |
664 | */ | |
665 | ||
666 | void mk1tbl( state, sym, onenxt, onedef ) | |
667 | int state, sym, onenxt, onedef; | |
668 | ||
669 | { | |
670 | if ( firstfree < sym ) | |
671 | firstfree = sym; | |
672 | ||
673 | while ( chk[firstfree] != 0 ) | |
674 | if ( ++firstfree >= current_max_xpairs ) | |
675 | expand_nxt_chk(); | |
676 | ||
677 | base[state] = firstfree - sym; | |
678 | def[state] = onedef; | |
679 | chk[firstfree] = state; | |
680 | nxt[firstfree] = onenxt; | |
681 | ||
682 | if ( firstfree > tblend ) | |
683 | { | |
684 | tblend = firstfree++; | |
685 | ||
686 | if ( firstfree >= current_max_xpairs ) | |
687 | expand_nxt_chk(); | |
688 | } | |
689 | } | |
690 | ||
691 | ||
692 | /* mkprot - create new proto entry | |
693 | * | |
694 | * synopsis | |
695 | * int state[], statenum, comstate; | |
696 | * mkprot( state, statenum, comstate ); | |
697 | */ | |
698 | ||
699 | void mkprot( state, statenum, comstate ) | |
700 | int state[], statenum, comstate; | |
701 | ||
702 | { | |
703 | int i, slot, tblbase; | |
704 | ||
705 | if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) | |
706 | { | |
707 | /* gotta make room for the new proto by dropping last entry in | |
708 | * the queue | |
709 | */ | |
710 | slot = lastprot; | |
711 | lastprot = protprev[lastprot]; | |
712 | protnext[lastprot] = NIL; | |
713 | } | |
714 | ||
715 | else | |
716 | slot = numprots; | |
717 | ||
718 | protnext[slot] = firstprot; | |
719 | ||
720 | if ( firstprot != NIL ) | |
721 | protprev[firstprot] = slot; | |
722 | ||
723 | firstprot = slot; | |
724 | prottbl[slot] = statenum; | |
725 | protcomst[slot] = comstate; | |
726 | ||
727 | /* copy state into save area so it can be compared with rapidly */ | |
728 | tblbase = numecs * (slot - 1); | |
729 | ||
730 | for ( i = 1; i <= numecs; ++i ) | |
731 | protsave[tblbase + i] = state[i]; | |
732 | } | |
733 | ||
734 | ||
735 | /* mktemplate - create a template entry based on a state, and connect the state | |
736 | * to it | |
737 | * | |
738 | * synopsis | |
739 | * int state[], statenum, comstate, totaltrans; | |
740 | * mktemplate( state, statenum, comstate, totaltrans ); | |
741 | */ | |
742 | ||
743 | void mktemplate( state, statenum, comstate ) | |
744 | int state[], statenum, comstate; | |
745 | ||
746 | { | |
747 | int i, numdiff, tmpbase, tmp[CSIZE + 1]; | |
748 | Char transset[CSIZE + 1]; | |
749 | int tsptr; | |
750 | ||
751 | ++numtemps; | |
752 | ||
753 | tsptr = 0; | |
754 | ||
755 | /* calculate where we will temporarily store the transition table | |
756 | * of the template in the tnxt[] array. The final transition table | |
757 | * gets created by cmptmps() | |
758 | */ | |
759 | ||
760 | tmpbase = numtemps * numecs; | |
761 | ||
762 | if ( tmpbase + numecs >= current_max_template_xpairs ) | |
763 | { | |
764 | current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; | |
765 | ||
766 | ++num_reallocs; | |
767 | ||
768 | tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs ); | |
769 | } | |
770 | ||
771 | for ( i = 1; i <= numecs; ++i ) | |
772 | if ( state[i] == 0 ) | |
773 | tnxt[tmpbase + i] = 0; | |
774 | else | |
775 | { | |
776 | transset[tsptr++] = i; | |
777 | tnxt[tmpbase + i] = comstate; | |
778 | } | |
779 | ||
780 | if ( usemecs ) | |
781 | mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); | |
782 | ||
783 | mkprot( tnxt + tmpbase, -numtemps, comstate ); | |
784 | ||
785 | /* we rely on the fact that mkprot adds things to the beginning | |
786 | * of the proto queue | |
787 | */ | |
788 | ||
789 | numdiff = tbldiff( state, firstprot, tmp ); | |
790 | mkentry( tmp, numecs, statenum, -numtemps, numdiff ); | |
791 | } | |
792 | ||
793 | ||
794 | /* mv2front - move proto queue element to front of queue | |
795 | * | |
796 | * synopsis | |
797 | * int qelm; | |
798 | * mv2front( qelm ); | |
799 | */ | |
800 | ||
801 | void mv2front( qelm ) | |
802 | int qelm; | |
803 | ||
804 | { | |
805 | if ( firstprot != qelm ) | |
806 | { | |
807 | if ( qelm == lastprot ) | |
808 | lastprot = protprev[lastprot]; | |
809 | ||
810 | protnext[protprev[qelm]] = protnext[qelm]; | |
811 | ||
812 | if ( protnext[qelm] != NIL ) | |
813 | protprev[protnext[qelm]] = protprev[qelm]; | |
814 | ||
815 | protprev[qelm] = NIL; | |
816 | protnext[qelm] = firstprot; | |
817 | protprev[firstprot] = qelm; | |
818 | firstprot = qelm; | |
819 | } | |
820 | } | |
821 | ||
822 | ||
823 | /* place_state - place a state into full speed transition table | |
824 | * | |
825 | * synopsis | |
826 | * int *state, statenum, transnum; | |
827 | * place_state( state, statenum, transnum ); | |
828 | * | |
829 | * State is the statenum'th state. It is indexed by equivalence class and | |
830 | * gives the number of the state to enter for a given equivalence class. | |
831 | * Transnum is the number of out-transitions for the state. | |
832 | */ | |
833 | ||
834 | void place_state( state, statenum, transnum ) | |
835 | int *state, statenum, transnum; | |
836 | ||
837 | { | |
838 | register int i; | |
839 | register int *state_ptr; | |
840 | int position = find_table_space( state, transnum ); | |
841 | ||
842 | /* base is the table of start positions */ | |
843 | base[statenum] = position; | |
844 | ||
845 | /* put in action number marker; this non-zero number makes sure that | |
846 | * find_table_space() knows that this position in chk/nxt is taken | |
847 | * and should not be used for another accepting number in another state | |
848 | */ | |
849 | chk[position - 1] = 1; | |
850 | ||
851 | /* put in end-of-buffer marker; this is for the same purposes as above */ | |
852 | chk[position] = 1; | |
853 | ||
854 | /* place the state into chk and nxt */ | |
855 | state_ptr = &state[1]; | |
856 | ||
857 | for ( i = 1; i <= numecs; ++i, ++state_ptr ) | |
858 | if ( *state_ptr != 0 ) | |
859 | { | |
860 | chk[position + i] = i; | |
861 | nxt[position + i] = *state_ptr; | |
862 | } | |
863 | ||
864 | if ( position + numecs > tblend ) | |
865 | tblend = position + numecs; | |
866 | } | |
867 | ||
868 | ||
869 | /* stack1 - save states with only one out-transition to be processed later | |
870 | * | |
871 | * synopsis | |
872 | * int statenum, sym, nextstate, deflink; | |
873 | * stack1( statenum, sym, nextstate, deflink ); | |
874 | * | |
875 | * if there's room for another state one the "one-transition" stack, the | |
876 | * state is pushed onto it, to be processed later by mk1tbl. If there's | |
877 | * no room, we process the sucker right now. | |
878 | */ | |
879 | ||
880 | void stack1( statenum, sym, nextstate, deflink ) | |
881 | int statenum, sym, nextstate, deflink; | |
882 | ||
883 | { | |
884 | if ( onesp >= ONE_STACK_SIZE - 1 ) | |
885 | mk1tbl( statenum, sym, nextstate, deflink ); | |
886 | ||
887 | else | |
888 | { | |
889 | ++onesp; | |
890 | onestate[onesp] = statenum; | |
891 | onesym[onesp] = sym; | |
892 | onenext[onesp] = nextstate; | |
893 | onedef[onesp] = deflink; | |
894 | } | |
895 | } | |
896 | ||
897 | ||
898 | /* tbldiff - compute differences between two state tables | |
899 | * | |
900 | * synopsis | |
901 | * int state[], pr, ext[]; | |
902 | * int tbldiff, numdifferences; | |
903 | * numdifferences = tbldiff( state, pr, ext ) | |
904 | * | |
905 | * "state" is the state array which is to be extracted from the pr'th | |
906 | * proto. "pr" is both the number of the proto we are extracting from | |
907 | * and an index into the save area where we can find the proto's complete | |
908 | * state table. Each entry in "state" which differs from the corresponding | |
909 | * entry of "pr" will appear in "ext". | |
910 | * Entries which are the same in both "state" and "pr" will be marked | |
911 | * as transitions to "SAME_TRANS" in "ext". The total number of differences | |
912 | * between "state" and "pr" is returned as function value. Note that this | |
913 | * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". | |
914 | */ | |
915 | ||
916 | int tbldiff( state, pr, ext ) | |
917 | int state[], pr, ext[]; | |
918 | ||
919 | { | |
920 | register int i, *sp = state, *ep = ext, *protp; | |
921 | register int numdiff = 0; | |
922 | ||
923 | protp = &protsave[numecs * (pr - 1)]; | |
924 | ||
925 | for ( i = numecs; i > 0; --i ) | |
926 | { | |
927 | if ( *++protp == *++sp ) | |
928 | *++ep = SAME_TRANS; | |
929 | else | |
930 | { | |
931 | *++ep = *sp; | |
932 | ++numdiff; | |
933 | } | |
934 | } | |
935 | ||
936 | return ( numdiff ); | |
937 | } |