Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* tblcmp - table compression routines */ |
2 | ||
3 | /*- | |
4 | * Copyright (c) 1990 The Regents of the University of California. | |
5 | * All rights reserved. | |
6 | * | |
7 | * This code is derived from software contributed to Berkeley by | |
7d684d5e | 8 | * Vern Paxson. |
15637ed4 RG |
9 | * |
10 | * The United States Government has rights in this work pursuant | |
11 | * to contract no. DE-AC03-76SF00098 between the United States | |
12 | * Department of Energy and the University of California. | |
13 | * | |
7d684d5e NW |
14 | * Redistribution and use in source and binary forms are permitted provided |
15 | * that: (1) source distributions retain this entire copyright notice and | |
16 | * comment, and (2) distributions including binaries display the following | |
17 | * acknowledgement: ``This product includes software developed by the | |
18 | * University of California, Berkeley and its contributors'' in the | |
19 | * documentation or other materials provided with the distribution and in | |
20 | * all advertising materials mentioning features or use of this software. | |
21 | * Neither the name of the University nor the names of its contributors may | |
22 | * be used to endorse or promote products derived from this software without | |
23 | * specific prior written permission. | |
24 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED | |
25 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | |
26 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
15637ed4 RG |
27 | */ |
28 | ||
29 | #ifndef lint | |
7d684d5e NW |
30 | static char rcsid[] = |
31 | "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/tblcmp.c,v 2.5 90/06/27 23:48:38 vern Exp $ (LBL)"; | |
32 | #endif | |
15637ed4 RG |
33 | |
34 | #include "flexdef.h" | |
35 | ||
7d684d5e | 36 | |
15637ed4 RG |
37 | /* declarations for functions that have forward references */ |
38 | ||
39 | void mkentry PROTO((register int*, int, int, int, int)); | |
40 | void mkprot PROTO((int[], int, int)); | |
41 | void mktemplate PROTO((int[], int, int)); | |
42 | void mv2front PROTO((int)); | |
43 | int tbldiff PROTO((int[], int, int[])); | |
44 | ||
45 | ||
46 | /* bldtbl - build table entries for dfa state | |
47 | * | |
48 | * synopsis | |
49 | * int state[numecs], statenum, totaltrans, comstate, comfreq; | |
50 | * bldtbl( state, statenum, totaltrans, comstate, comfreq ); | |
51 | * | |
52 | * State is the statenum'th dfa state. It is indexed by equivalence class and | |
53 | * gives the number of the state to enter for a given equivalence class. | |
54 | * totaltrans is the total number of transitions out of the state. Comstate | |
55 | * is that state which is the destination of the most transitions out of State. | |
56 | * Comfreq is how many transitions there are out of State to Comstate. | |
57 | * | |
58 | * A note on terminology: | |
59 | * "protos" are transition tables which have a high probability of | |
60 | * either being redundant (a state processed later will have an identical | |
61 | * transition table) or nearly redundant (a state processed later will have | |
62 | * many of the same out-transitions). A "most recently used" queue of | |
63 | * protos is kept around with the hope that most states will find a proto | |
64 | * which is similar enough to be usable, and therefore compacting the | |
65 | * output tables. | |
66 | * "templates" are a special type of proto. If a transition table is | |
67 | * homogeneous or nearly homogeneous (all transitions go to the same | |
68 | * destination) then the odds are good that future states will also go | |
69 | * to the same destination state on basically the same character set. | |
70 | * These homogeneous states are so common when dealing with large rule | |
71 | * sets that they merit special attention. If the transition table were | |
72 | * simply made into a proto, then (typically) each subsequent, similar | |
73 | * state will differ from the proto for two out-transitions. One of these | |
74 | * out-transitions will be that character on which the proto does not go | |
75 | * to the common destination, and one will be that character on which the | |
76 | * state does not go to the common destination. Templates, on the other | |
77 | * hand, go to the common state on EVERY transition character, and therefore | |
78 | * cost only one difference. | |
79 | */ | |
80 | ||
81 | void bldtbl( state, statenum, totaltrans, comstate, comfreq ) | |
82 | int state[], statenum, totaltrans, comstate, comfreq; | |
83 | ||
84 | { | |
85 | int extptr, extrct[2][CSIZE + 1]; | |
86 | int mindiff, minprot, i, d; | |
87 | int checkcom; | |
88 | ||
89 | /* If extptr is 0 then the first array of extrct holds the result of the | |
90 | * "best difference" to date, which is those transitions which occur in | |
91 | * "state" but not in the proto which, to date, has the fewest differences | |
92 | * between itself and "state". If extptr is 1 then the second array of | |
93 | * extrct hold the best difference. The two arrays are toggled | |
94 | * between so that the best difference to date can be kept around and | |
95 | * also a difference just created by checking against a candidate "best" | |
96 | * proto. | |
97 | */ | |
98 | ||
99 | extptr = 0; | |
100 | ||
101 | /* if the state has too few out-transitions, don't bother trying to | |
102 | * compact its tables | |
103 | */ | |
104 | ||
105 | if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) | |
106 | mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); | |
107 | ||
108 | else | |
109 | { | |
110 | /* checkcom is true if we should only check "state" against | |
111 | * protos which have the same "comstate" value | |
112 | */ | |
113 | ||
114 | checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; | |
115 | ||
116 | minprot = firstprot; | |
117 | mindiff = totaltrans; | |
118 | ||
119 | if ( checkcom ) | |
120 | { | |
121 | /* find first proto which has the same "comstate" */ | |
122 | for ( i = firstprot; i != NIL; i = protnext[i] ) | |
123 | if ( protcomst[i] == comstate ) | |
124 | { | |
125 | minprot = i; | |
126 | mindiff = tbldiff( state, minprot, extrct[extptr] ); | |
127 | break; | |
128 | } | |
129 | } | |
130 | ||
131 | else | |
132 | { | |
133 | /* since we've decided that the most common destination out | |
134 | * of "state" does not occur with a high enough frequency, | |
135 | * we set the "comstate" to zero, assuring that if this state | |
136 | * is entered into the proto list, it will not be considered | |
137 | * a template. | |
138 | */ | |
139 | comstate = 0; | |
140 | ||
141 | if ( firstprot != NIL ) | |
142 | { | |
143 | minprot = firstprot; | |
144 | mindiff = tbldiff( state, minprot, extrct[extptr] ); | |
145 | } | |
146 | } | |
147 | ||
148 | /* we now have the first interesting proto in "minprot". If | |
149 | * it matches within the tolerances set for the first proto, | |
150 | * we don't want to bother scanning the rest of the proto list | |
151 | * to see if we have any other reasonable matches. | |
152 | */ | |
153 | ||
154 | if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) | |
155 | { /* not a good enough match. Scan the rest of the protos */ | |
156 | for ( i = minprot; i != NIL; i = protnext[i] ) | |
157 | { | |
158 | d = tbldiff( state, i, extrct[1 - extptr] ); | |
159 | if ( d < mindiff ) | |
160 | { | |
161 | extptr = 1 - extptr; | |
162 | mindiff = d; | |
163 | minprot = i; | |
164 | } | |
165 | } | |
166 | } | |
167 | ||
168 | /* check if the proto we've decided on as our best bet is close | |
169 | * enough to the state we want to match to be usable | |
170 | */ | |
171 | ||
172 | if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) | |
173 | { | |
174 | /* no good. If the state is homogeneous enough, we make a | |
175 | * template out of it. Otherwise, we make a proto. | |
176 | */ | |
177 | ||
178 | if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE ) | |
179 | mktemplate( state, statenum, comstate ); | |
180 | ||
181 | else | |
182 | { | |
183 | mkprot( state, statenum, comstate ); | |
184 | mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); | |
185 | } | |
186 | } | |
187 | ||
188 | else | |
189 | { /* use the proto */ | |
190 | mkentry( extrct[extptr], numecs, statenum, | |
191 | prottbl[minprot], mindiff ); | |
192 | ||
193 | /* if this state was sufficiently different from the proto | |
194 | * we built it from, make it, too, a proto | |
195 | */ | |
196 | ||
197 | if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) | |
198 | mkprot( state, statenum, comstate ); | |
199 | ||
200 | /* since mkprot added a new proto to the proto queue, it's possible | |
201 | * that "minprot" is no longer on the proto queue (if it happened | |
202 | * to have been the last entry, it would have been bumped off). | |
203 | * If it's not there, then the new proto took its physical place | |
204 | * (though logically the new proto is at the beginning of the | |
205 | * queue), so in that case the following call will do nothing. | |
206 | */ | |
207 | ||
208 | mv2front( minprot ); | |
209 | } | |
210 | } | |
211 | } | |
212 | ||
213 | ||
214 | /* cmptmps - compress template table entries | |
215 | * | |
216 | * synopsis | |
217 | * cmptmps(); | |
218 | * | |
219 | * template tables are compressed by using the 'template equivalence | |
220 | * classes', which are collections of transition character equivalence | |
221 | * classes which always appear together in templates - really meta-equivalence | |
222 | * classes. until this point, the tables for templates have been stored | |
223 | * up at the top end of the nxt array; they will now be compressed and have | |
224 | * table entries made for them. | |
225 | */ | |
226 | ||
227 | void cmptmps() | |
228 | ||
229 | { | |
230 | int tmpstorage[CSIZE + 1]; | |
231 | register int *tmp = tmpstorage, i, j; | |
232 | int totaltrans, trans; | |
233 | ||
234 | peakpairs = numtemps * numecs + tblend; | |
235 | ||
236 | if ( usemecs ) | |
237 | { | |
238 | /* create equivalence classes base on data gathered on template | |
239 | * transitions | |
240 | */ | |
241 | ||
242 | nummecs = cre8ecs( tecfwd, tecbck, numecs ); | |
243 | } | |
244 | ||
245 | else | |
246 | nummecs = numecs; | |
247 | ||
248 | if ( lastdfa + numtemps + 1 >= current_max_dfas ) | |
249 | increase_max_dfas(); | |
250 | ||
251 | /* loop through each template */ | |
252 | ||
253 | for ( i = 1; i <= numtemps; ++i ) | |
254 | { | |
255 | totaltrans = 0; /* number of non-jam transitions out of this template */ | |
256 | ||
257 | for ( j = 1; j <= numecs; ++j ) | |
258 | { | |
259 | trans = tnxt[numecs * i + j]; | |
260 | ||
261 | if ( usemecs ) | |
262 | { | |
263 | /* the absolute value of tecbck is the meta-equivalence class | |
264 | * of a given equivalence class, as set up by cre8ecs | |
265 | */ | |
266 | if ( tecbck[j] > 0 ) | |
267 | { | |
268 | tmp[tecbck[j]] = trans; | |
269 | ||
270 | if ( trans > 0 ) | |
271 | ++totaltrans; | |
272 | } | |
273 | } | |
274 | ||
275 | else | |
276 | { | |
277 | tmp[j] = trans; | |
278 | ||
279 | if ( trans > 0 ) | |
280 | ++totaltrans; | |
281 | } | |
282 | } | |
283 | ||
284 | /* it is assumed (in a rather subtle way) in the skeleton that | |
285 | * if we're using meta-equivalence classes, the def[] entry for | |
286 | * all templates is the jam template, i.e., templates never default | |
287 | * to other non-jam table entries (e.g., another template) | |
288 | */ | |
289 | ||
290 | /* leave room for the jam-state after the last real state */ | |
291 | mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); | |
292 | } | |
293 | } | |
294 | ||
295 | ||
296 | ||
297 | /* expand_nxt_chk - expand the next check arrays */ | |
298 | ||
299 | void expand_nxt_chk() | |
300 | ||
301 | { | |
302 | register int old_max = current_max_xpairs; | |
303 | ||
304 | current_max_xpairs += MAX_XPAIRS_INCREMENT; | |
305 | ||
306 | ++num_reallocs; | |
307 | ||
308 | nxt = reallocate_integer_array( nxt, current_max_xpairs ); | |
309 | chk = reallocate_integer_array( chk, current_max_xpairs ); | |
310 | ||
311 | bzero( (char *) (chk + old_max), | |
312 | MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); | |
313 | } | |
314 | ||
315 | ||
316 | /* find_table_space - finds a space in the table for a state to be placed | |
317 | * | |
318 | * synopsis | |
319 | * int *state, numtrans, block_start; | |
320 | * int find_table_space(); | |
321 | * | |
322 | * block_start = find_table_space( state, numtrans ); | |
323 | * | |
324 | * State is the state to be added to the full speed transition table. | |
325 | * Numtrans is the number of out-transitions for the state. | |
326 | * | |
327 | * find_table_space() returns the position of the start of the first block (in | |
328 | * chk) able to accommodate the state | |
329 | * | |
330 | * In determining if a state will or will not fit, find_table_space() must take | |
331 | * into account the fact that an end-of-buffer state will be added at [0], | |
332 | * and an action number will be added in [-1]. | |
333 | */ | |
334 | ||
335 | int find_table_space( state, numtrans ) | |
336 | int *state, numtrans; | |
337 | ||
338 | { | |
339 | /* firstfree is the position of the first possible occurrence of two | |
340 | * consecutive unused records in the chk and nxt arrays | |
341 | */ | |
342 | register int i; | |
343 | register int *state_ptr, *chk_ptr; | |
344 | register int *ptr_to_last_entry_in_state; | |
345 | ||
346 | /* if there are too many out-transitions, put the state at the end of | |
347 | * nxt and chk | |
348 | */ | |
349 | if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) | |
350 | { | |
351 | /* if table is empty, return the first available spot in chk/nxt, | |
352 | * which should be 1 | |
353 | */ | |
354 | if ( tblend < 2 ) | |
355 | return ( 1 ); | |
356 | ||
357 | i = tblend - numecs; /* start searching for table space near the | |
358 | * end of chk/nxt arrays | |
359 | */ | |
360 | } | |
361 | ||
362 | else | |
363 | i = firstfree; /* start searching for table space from the | |
364 | * beginning (skipping only the elements | |
365 | * which will definitely not hold the new | |
366 | * state) | |
367 | */ | |
368 | ||
369 | while ( 1 ) /* loops until a space is found */ | |
370 | { | |
7d684d5e | 371 | if ( i + numecs >= current_max_xpairs ) |
15637ed4 RG |
372 | expand_nxt_chk(); |
373 | ||
374 | /* loops until space for end-of-buffer and action number are found */ | |
375 | while ( 1 ) | |
376 | { | |
377 | if ( chk[i - 1] == 0 ) /* check for action number space */ | |
378 | { | |
379 | if ( chk[i] == 0 ) /* check for end-of-buffer space */ | |
380 | break; | |
381 | ||
382 | else | |
383 | i += 2; /* since i != 0, there is no use checking to | |
384 | * see if (++i) - 1 == 0, because that's the | |
385 | * same as i == 0, so we skip a space | |
386 | */ | |
387 | } | |
388 | ||
389 | else | |
390 | ++i; | |
391 | ||
7d684d5e | 392 | if ( i + numecs >= current_max_xpairs ) |
15637ed4 RG |
393 | expand_nxt_chk(); |
394 | } | |
395 | ||
396 | /* if we started search from the beginning, store the new firstfree for | |
397 | * the next call of find_table_space() | |
398 | */ | |
399 | if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) | |
400 | firstfree = i + 1; | |
401 | ||
402 | /* check to see if all elements in chk (and therefore nxt) that are | |
403 | * needed for the new state have not yet been taken | |
404 | */ | |
405 | ||
406 | state_ptr = &state[1]; | |
407 | ptr_to_last_entry_in_state = &chk[i + numecs + 1]; | |
408 | ||
409 | for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state; | |
410 | ++chk_ptr ) | |
411 | if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) | |
412 | break; | |
413 | ||
414 | if ( chk_ptr == ptr_to_last_entry_in_state ) | |
415 | return ( i ); | |
416 | ||
417 | else | |
418 | ++i; | |
419 | } | |
420 | } | |
421 | ||
422 | ||
423 | /* inittbl - initialize transition tables | |
424 | * | |
425 | * synopsis | |
426 | * inittbl(); | |
427 | * | |
428 | * Initializes "firstfree" to be one beyond the end of the table. Initializes | |
429 | * all "chk" entries to be zero. Note that templates are built in their | |
430 | * own tbase/tdef tables. They are shifted down to be contiguous | |
431 | * with the non-template entries during table generation. | |
432 | */ | |
433 | void inittbl() | |
434 | ||
435 | { | |
436 | register int i; | |
437 | ||
438 | bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) ); | |
439 | ||
440 | tblend = 0; | |
441 | firstfree = tblend + 1; | |
442 | numtemps = 0; | |
443 | ||
444 | if ( usemecs ) | |
445 | { | |
446 | /* set up doubly-linked meta-equivalence classes | |
447 | * these are sets of equivalence classes which all have identical | |
448 | * transitions out of TEMPLATES | |
449 | */ | |
450 | ||
451 | tecbck[1] = NIL; | |
452 | ||
453 | for ( i = 2; i <= numecs; ++i ) | |
454 | { | |
455 | tecbck[i] = i - 1; | |
456 | tecfwd[i - 1] = i; | |
457 | } | |
458 | ||
459 | tecfwd[numecs] = NIL; | |
460 | } | |
461 | } | |
462 | ||
463 | ||
464 | /* mkdeftbl - make the default, "jam" table entries | |
465 | * | |
466 | * synopsis | |
467 | * mkdeftbl(); | |
468 | */ | |
469 | ||
470 | void mkdeftbl() | |
471 | ||
472 | { | |
473 | int i; | |
474 | ||
475 | jamstate = lastdfa + 1; | |
476 | ||
477 | ++tblend; /* room for transition on end-of-buffer character */ | |
478 | ||
7d684d5e | 479 | if ( tblend + numecs >= current_max_xpairs ) |
15637ed4 RG |
480 | expand_nxt_chk(); |
481 | ||
482 | /* add in default end-of-buffer transition */ | |
483 | nxt[tblend] = end_of_buffer_state; | |
484 | chk[tblend] = jamstate; | |
485 | ||
486 | for ( i = 1; i <= numecs; ++i ) | |
487 | { | |
488 | nxt[tblend + i] = 0; | |
489 | chk[tblend + i] = jamstate; | |
490 | } | |
491 | ||
492 | jambase = tblend; | |
493 | ||
494 | base[jamstate] = jambase; | |
495 | def[jamstate] = 0; | |
496 | ||
497 | tblend += numecs; | |
498 | ++numtemps; | |
499 | } | |
500 | ||
501 | ||
502 | /* mkentry - create base/def and nxt/chk entries for transition array | |
503 | * | |
504 | * synopsis | |
505 | * int state[numchars + 1], numchars, statenum, deflink, totaltrans; | |
506 | * mkentry( state, numchars, statenum, deflink, totaltrans ); | |
507 | * | |
508 | * "state" is a transition array "numchars" characters in size, "statenum" | |
509 | * is the offset to be used into the base/def tables, and "deflink" is the | |
510 | * entry to put in the "def" table entry. If "deflink" is equal to | |
511 | * "JAMSTATE", then no attempt will be made to fit zero entries of "state" | |
512 | * (i.e., jam entries) into the table. It is assumed that by linking to | |
513 | * "JAMSTATE" they will be taken care of. In any case, entries in "state" | |
514 | * marking transitions to "SAME_TRANS" are treated as though they will be | |
515 | * taken care of by whereever "deflink" points. "totaltrans" is the total | |
516 | * number of transitions out of the state. If it is below a certain threshold, | |
517 | * the tables are searched for an interior spot that will accommodate the | |
518 | * state array. | |
519 | */ | |
520 | ||
521 | void mkentry( state, numchars, statenum, deflink, totaltrans ) | |
522 | register int *state; | |
523 | int numchars, statenum, deflink, totaltrans; | |
524 | ||
525 | { | |
526 | register int minec, maxec, i, baseaddr; | |
527 | int tblbase, tbllast; | |
528 | ||
529 | if ( totaltrans == 0 ) | |
530 | { /* there are no out-transitions */ | |
531 | if ( deflink == JAMSTATE ) | |
532 | base[statenum] = JAMSTATE; | |
533 | else | |
534 | base[statenum] = 0; | |
535 | ||
536 | def[statenum] = deflink; | |
537 | return; | |
538 | } | |
539 | ||
540 | for ( minec = 1; minec <= numchars; ++minec ) | |
541 | { | |
542 | if ( state[minec] != SAME_TRANS ) | |
543 | if ( state[minec] != 0 || deflink != JAMSTATE ) | |
544 | break; | |
545 | } | |
546 | ||
547 | if ( totaltrans == 1 ) | |
548 | { | |
549 | /* there's only one out-transition. Save it for later to fill | |
550 | * in holes in the tables. | |
551 | */ | |
552 | stack1( statenum, minec, state[minec], deflink ); | |
553 | return; | |
554 | } | |
555 | ||
556 | for ( maxec = numchars; maxec > 0; --maxec ) | |
557 | { | |
558 | if ( state[maxec] != SAME_TRANS ) | |
559 | if ( state[maxec] != 0 || deflink != JAMSTATE ) | |
560 | break; | |
561 | } | |
562 | ||
563 | /* Whether we try to fit the state table in the middle of the table | |
564 | * entries we have already generated, or if we just take the state | |
565 | * table at the end of the nxt/chk tables, we must make sure that we | |
566 | * have a valid base address (i.e., non-negative). Note that not only are | |
567 | * negative base addresses dangerous at run-time (because indexing the | |
568 | * next array with one and a low-valued character might generate an | |
569 | * array-out-of-bounds error message), but at compile-time negative | |
570 | * base addresses denote TEMPLATES. | |
571 | */ | |
572 | ||
573 | /* find the first transition of state that we need to worry about. */ | |
574 | if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) | |
575 | { /* attempt to squeeze it into the middle of the tabls */ | |
576 | baseaddr = firstfree; | |
577 | ||
578 | while ( baseaddr < minec ) | |
579 | { | |
580 | /* using baseaddr would result in a negative base address below | |
581 | * find the next free slot | |
582 | */ | |
583 | for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) | |
584 | ; | |
585 | } | |
586 | ||
7d684d5e | 587 | if ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) |
15637ed4 RG |
588 | expand_nxt_chk(); |
589 | ||
590 | for ( i = minec; i <= maxec; ++i ) | |
591 | if ( state[i] != SAME_TRANS ) | |
592 | if ( state[i] != 0 || deflink != JAMSTATE ) | |
593 | if ( chk[baseaddr + i - minec] != 0 ) | |
594 | { /* baseaddr unsuitable - find another */ | |
595 | for ( ++baseaddr; | |
596 | baseaddr < current_max_xpairs && | |
597 | chk[baseaddr] != 0; | |
598 | ++baseaddr ) | |
599 | ; | |
600 | ||
7d684d5e NW |
601 | if ( baseaddr + maxec - minec + 1 >= |
602 | current_max_xpairs ) | |
15637ed4 RG |
603 | expand_nxt_chk(); |
604 | ||
605 | /* reset the loop counter so we'll start all | |
606 | * over again next time it's incremented | |
607 | */ | |
608 | ||
609 | i = minec - 1; | |
610 | } | |
611 | } | |
612 | ||
613 | else | |
614 | { | |
615 | /* ensure that the base address we eventually generate is | |
616 | * non-negative | |
617 | */ | |
618 | baseaddr = max( tblend + 1, minec ); | |
619 | } | |
620 | ||
621 | tblbase = baseaddr - minec; | |
622 | tbllast = tblbase + maxec; | |
623 | ||
7d684d5e | 624 | if ( tbllast + 1 >= current_max_xpairs ) |
15637ed4 RG |
625 | expand_nxt_chk(); |
626 | ||
627 | base[statenum] = tblbase; | |
628 | def[statenum] = deflink; | |
629 | ||
630 | for ( i = minec; i <= maxec; ++i ) | |
631 | if ( state[i] != SAME_TRANS ) | |
632 | if ( state[i] != 0 || deflink != JAMSTATE ) | |
633 | { | |
634 | nxt[tblbase + i] = state[i]; | |
635 | chk[tblbase + i] = statenum; | |
636 | } | |
637 | ||
638 | if ( baseaddr == firstfree ) | |
639 | /* find next free slot in tables */ | |
640 | for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) | |
641 | ; | |
642 | ||
643 | tblend = max( tblend, tbllast ); | |
644 | } | |
645 | ||
646 | ||
647 | /* mk1tbl - create table entries for a state (or state fragment) which | |
648 | * has only one out-transition | |
649 | * | |
650 | * synopsis | |
651 | * int state, sym, onenxt, onedef; | |
652 | * mk1tbl( state, sym, onenxt, onedef ); | |
653 | */ | |
654 | ||
655 | void mk1tbl( state, sym, onenxt, onedef ) | |
656 | int state, sym, onenxt, onedef; | |
657 | ||
658 | { | |
659 | if ( firstfree < sym ) | |
660 | firstfree = sym; | |
661 | ||
662 | while ( chk[firstfree] != 0 ) | |
663 | if ( ++firstfree >= current_max_xpairs ) | |
664 | expand_nxt_chk(); | |
665 | ||
666 | base[state] = firstfree - sym; | |
667 | def[state] = onedef; | |
668 | chk[firstfree] = state; | |
669 | nxt[firstfree] = onenxt; | |
670 | ||
671 | if ( firstfree > tblend ) | |
672 | { | |
673 | tblend = firstfree++; | |
674 | ||
675 | if ( firstfree >= current_max_xpairs ) | |
676 | expand_nxt_chk(); | |
677 | } | |
678 | } | |
679 | ||
680 | ||
681 | /* mkprot - create new proto entry | |
682 | * | |
683 | * synopsis | |
684 | * int state[], statenum, comstate; | |
685 | * mkprot( state, statenum, comstate ); | |
686 | */ | |
687 | ||
688 | void mkprot( state, statenum, comstate ) | |
689 | int state[], statenum, comstate; | |
690 | ||
691 | { | |
692 | int i, slot, tblbase; | |
693 | ||
694 | if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) | |
695 | { | |
696 | /* gotta make room for the new proto by dropping last entry in | |
697 | * the queue | |
698 | */ | |
699 | slot = lastprot; | |
700 | lastprot = protprev[lastprot]; | |
701 | protnext[lastprot] = NIL; | |
702 | } | |
703 | ||
704 | else | |
705 | slot = numprots; | |
706 | ||
707 | protnext[slot] = firstprot; | |
708 | ||
709 | if ( firstprot != NIL ) | |
710 | protprev[firstprot] = slot; | |
711 | ||
712 | firstprot = slot; | |
713 | prottbl[slot] = statenum; | |
714 | protcomst[slot] = comstate; | |
715 | ||
716 | /* copy state into save area so it can be compared with rapidly */ | |
717 | tblbase = numecs * (slot - 1); | |
718 | ||
719 | for ( i = 1; i <= numecs; ++i ) | |
720 | protsave[tblbase + i] = state[i]; | |
721 | } | |
722 | ||
723 | ||
724 | /* mktemplate - create a template entry based on a state, and connect the state | |
725 | * to it | |
726 | * | |
727 | * synopsis | |
728 | * int state[], statenum, comstate, totaltrans; | |
729 | * mktemplate( state, statenum, comstate, totaltrans ); | |
730 | */ | |
731 | ||
732 | void mktemplate( state, statenum, comstate ) | |
733 | int state[], statenum, comstate; | |
734 | ||
735 | { | |
736 | int i, numdiff, tmpbase, tmp[CSIZE + 1]; | |
737 | Char transset[CSIZE + 1]; | |
738 | int tsptr; | |
739 | ||
740 | ++numtemps; | |
741 | ||
742 | tsptr = 0; | |
743 | ||
744 | /* calculate where we will temporarily store the transition table | |
745 | * of the template in the tnxt[] array. The final transition table | |
746 | * gets created by cmptmps() | |
747 | */ | |
748 | ||
749 | tmpbase = numtemps * numecs; | |
750 | ||
751 | if ( tmpbase + numecs >= current_max_template_xpairs ) | |
752 | { | |
753 | current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; | |
754 | ||
755 | ++num_reallocs; | |
756 | ||
757 | tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs ); | |
758 | } | |
759 | ||
760 | for ( i = 1; i <= numecs; ++i ) | |
761 | if ( state[i] == 0 ) | |
762 | tnxt[tmpbase + i] = 0; | |
763 | else | |
764 | { | |
765 | transset[tsptr++] = i; | |
766 | tnxt[tmpbase + i] = comstate; | |
767 | } | |
768 | ||
769 | if ( usemecs ) | |
770 | mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); | |
771 | ||
772 | mkprot( tnxt + tmpbase, -numtemps, comstate ); | |
773 | ||
774 | /* we rely on the fact that mkprot adds things to the beginning | |
775 | * of the proto queue | |
776 | */ | |
777 | ||
778 | numdiff = tbldiff( state, firstprot, tmp ); | |
779 | mkentry( tmp, numecs, statenum, -numtemps, numdiff ); | |
780 | } | |
781 | ||
782 | ||
783 | /* mv2front - move proto queue element to front of queue | |
784 | * | |
785 | * synopsis | |
786 | * int qelm; | |
787 | * mv2front( qelm ); | |
788 | */ | |
789 | ||
790 | void mv2front( qelm ) | |
791 | int qelm; | |
792 | ||
793 | { | |
794 | if ( firstprot != qelm ) | |
795 | { | |
796 | if ( qelm == lastprot ) | |
797 | lastprot = protprev[lastprot]; | |
798 | ||
799 | protnext[protprev[qelm]] = protnext[qelm]; | |
800 | ||
801 | if ( protnext[qelm] != NIL ) | |
802 | protprev[protnext[qelm]] = protprev[qelm]; | |
803 | ||
804 | protprev[qelm] = NIL; | |
805 | protnext[qelm] = firstprot; | |
806 | protprev[firstprot] = qelm; | |
807 | firstprot = qelm; | |
808 | } | |
809 | } | |
810 | ||
811 | ||
812 | /* place_state - place a state into full speed transition table | |
813 | * | |
814 | * synopsis | |
815 | * int *state, statenum, transnum; | |
816 | * place_state( state, statenum, transnum ); | |
817 | * | |
818 | * State is the statenum'th state. It is indexed by equivalence class and | |
819 | * gives the number of the state to enter for a given equivalence class. | |
820 | * Transnum is the number of out-transitions for the state. | |
821 | */ | |
822 | ||
823 | void place_state( state, statenum, transnum ) | |
824 | int *state, statenum, transnum; | |
825 | ||
826 | { | |
827 | register int i; | |
828 | register int *state_ptr; | |
829 | int position = find_table_space( state, transnum ); | |
830 | ||
831 | /* base is the table of start positions */ | |
832 | base[statenum] = position; | |
833 | ||
834 | /* put in action number marker; this non-zero number makes sure that | |
835 | * find_table_space() knows that this position in chk/nxt is taken | |
836 | * and should not be used for another accepting number in another state | |
837 | */ | |
838 | chk[position - 1] = 1; | |
839 | ||
840 | /* put in end-of-buffer marker; this is for the same purposes as above */ | |
841 | chk[position] = 1; | |
842 | ||
843 | /* place the state into chk and nxt */ | |
844 | state_ptr = &state[1]; | |
845 | ||
846 | for ( i = 1; i <= numecs; ++i, ++state_ptr ) | |
847 | if ( *state_ptr != 0 ) | |
848 | { | |
849 | chk[position + i] = i; | |
850 | nxt[position + i] = *state_ptr; | |
851 | } | |
852 | ||
853 | if ( position + numecs > tblend ) | |
854 | tblend = position + numecs; | |
855 | } | |
856 | ||
857 | ||
858 | /* stack1 - save states with only one out-transition to be processed later | |
859 | * | |
860 | * synopsis | |
861 | * int statenum, sym, nextstate, deflink; | |
862 | * stack1( statenum, sym, nextstate, deflink ); | |
863 | * | |
864 | * if there's room for another state one the "one-transition" stack, the | |
865 | * state is pushed onto it, to be processed later by mk1tbl. If there's | |
866 | * no room, we process the sucker right now. | |
867 | */ | |
868 | ||
869 | void stack1( statenum, sym, nextstate, deflink ) | |
870 | int statenum, sym, nextstate, deflink; | |
871 | ||
872 | { | |
873 | if ( onesp >= ONE_STACK_SIZE - 1 ) | |
874 | mk1tbl( statenum, sym, nextstate, deflink ); | |
875 | ||
876 | else | |
877 | { | |
878 | ++onesp; | |
879 | onestate[onesp] = statenum; | |
880 | onesym[onesp] = sym; | |
881 | onenext[onesp] = nextstate; | |
882 | onedef[onesp] = deflink; | |
883 | } | |
884 | } | |
885 | ||
886 | ||
887 | /* tbldiff - compute differences between two state tables | |
888 | * | |
889 | * synopsis | |
890 | * int state[], pr, ext[]; | |
891 | * int tbldiff, numdifferences; | |
892 | * numdifferences = tbldiff( state, pr, ext ) | |
893 | * | |
894 | * "state" is the state array which is to be extracted from the pr'th | |
895 | * proto. "pr" is both the number of the proto we are extracting from | |
896 | * and an index into the save area where we can find the proto's complete | |
897 | * state table. Each entry in "state" which differs from the corresponding | |
898 | * entry of "pr" will appear in "ext". | |
899 | * Entries which are the same in both "state" and "pr" will be marked | |
900 | * as transitions to "SAME_TRANS" in "ext". The total number of differences | |
901 | * between "state" and "pr" is returned as function value. Note that this | |
902 | * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". | |
903 | */ | |
904 | ||
905 | int tbldiff( state, pr, ext ) | |
906 | int state[], pr, ext[]; | |
907 | ||
908 | { | |
909 | register int i, *sp = state, *ep = ext, *protp; | |
910 | register int numdiff = 0; | |
911 | ||
912 | protp = &protsave[numecs * (pr - 1)]; | |
913 | ||
914 | for ( i = numecs; i > 0; --i ) | |
915 | { | |
916 | if ( *++protp == *++sp ) | |
917 | *++ep = SAME_TRANS; | |
918 | else | |
919 | { | |
920 | *++ep = *sp; | |
921 | ++numdiff; | |
922 | } | |
923 | } | |
924 | ||
925 | return ( numdiff ); | |
926 | } |