Commit | Line | Data |
---|---|---|
7d684d5e | 1 | |
15637ed4 RG |
2 | /* parse.y - parser for flex input */ |
3 | ||
4 | %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP | |
5 | ||
6 | %{ | |
15637ed4 RG |
7 | /*- |
8 | * Copyright (c) 1990 The Regents of the University of California. | |
9 | * All rights reserved. | |
10 | * | |
11 | * This code is derived from software contributed to Berkeley by | |
7d684d5e | 12 | * Vern Paxson. |
15637ed4 RG |
13 | * |
14 | * The United States Government has rights in this work pursuant | |
15 | * to contract no. DE-AC03-76SF00098 between the United States | |
16 | * Department of Energy and the University of California. | |
17 | * | |
7d684d5e NW |
18 | * Redistribution and use in source and binary forms are permitted provided |
19 | * that: (1) source distributions retain this entire copyright notice and | |
20 | * comment, and (2) distributions including binaries display the following | |
21 | * acknowledgement: ``This product includes software developed by the | |
22 | * University of California, Berkeley and its contributors'' in the | |
23 | * documentation or other materials provided with the distribution and in | |
24 | * all advertising materials mentioning features or use of this software. | |
25 | * Neither the name of the University nor the names of its contributors may | |
26 | * be used to endorse or promote products derived from this software without | |
27 | * specific prior written permission. | |
28 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED | |
29 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | |
30 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
15637ed4 RG |
31 | */ |
32 | ||
33 | #ifndef lint | |
7d684d5e NW |
34 | static char rcsid[] = |
35 | "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/parse.y,v 2.7 90/06/27 23:48:31 vern Exp $ (LBL)"; | |
36 | #endif | |
15637ed4 RG |
37 | |
38 | #include "flexdef.h" | |
39 | ||
40 | int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; | |
41 | int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; | |
42 | Char clower(); | |
7d684d5e NW |
43 | void build_eof_action(); |
44 | void yyerror(); | |
15637ed4 RG |
45 | |
46 | static int madeany = false; /* whether we've made the '.' character class */ | |
47 | int previous_continued_action; /* whether the previous rule's action was '|' */ | |
48 | ||
49 | %} | |
50 | ||
51 | %% | |
52 | goal : initlex sect1 sect1end sect2 initforrule | |
53 | { /* add default rule */ | |
54 | int def_rule; | |
55 | ||
56 | pat = cclinit(); | |
57 | cclnegate( pat ); | |
58 | ||
59 | def_rule = mkstate( -pat ); | |
60 | ||
61 | finish_rule( def_rule, false, 0, 0 ); | |
62 | ||
63 | for ( i = 1; i <= lastsc; ++i ) | |
64 | scset[i] = mkbranch( scset[i], def_rule ); | |
65 | ||
66 | if ( spprdflt ) | |
67 | fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", | |
68 | temp_action_file ); | |
69 | else | |
70 | fputs( "ECHO", temp_action_file ); | |
71 | ||
72 | fputs( ";\n\tYY_BREAK\n", temp_action_file ); | |
73 | } | |
74 | ; | |
75 | ||
76 | initlex : | |
77 | { | |
78 | /* initialize for processing rules */ | |
79 | ||
80 | /* create default DFA start condition */ | |
81 | scinstal( "INITIAL", false ); | |
82 | } | |
83 | ; | |
84 | ||
85 | sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' | |
86 | | | |
87 | | error '\n' | |
88 | { synerr( "unknown error processing section 1" ); } | |
89 | ; | |
90 | ||
91 | sect1end : SECTEND | |
92 | ; | |
93 | ||
94 | startconddecl : SCDECL | |
95 | { | |
96 | /* these productions are separate from the s1object | |
97 | * rule because the semantics must be done before | |
98 | * we parse the remainder of an s1object | |
99 | */ | |
100 | ||
101 | xcluflg = false; | |
102 | } | |
103 | ||
104 | | XSCDECL | |
105 | { xcluflg = true; } | |
106 | ; | |
107 | ||
108 | namelist1 : namelist1 WHITESPACE NAME | |
109 | { scinstal( nmstr, xcluflg ); } | |
110 | ||
111 | | NAME | |
112 | { scinstal( nmstr, xcluflg ); } | |
113 | ||
114 | | error | |
115 | { synerr( "bad start condition list" ); } | |
116 | ; | |
117 | ||
118 | sect2 : sect2 initforrule flexrule '\n' | |
119 | | | |
120 | ; | |
121 | ||
122 | initforrule : | |
123 | { | |
124 | /* initialize for a parse of one rule */ | |
125 | trlcontxt = variable_trail_rule = varlength = false; | |
126 | trailcnt = headcnt = rulelen = 0; | |
127 | current_state_type = STATE_NORMAL; | |
128 | previous_continued_action = continued_action; | |
129 | new_rule(); | |
130 | } | |
131 | ; | |
132 | ||
133 | flexrule : scon '^' rule | |
134 | { | |
135 | pat = $3; | |
136 | finish_rule( pat, variable_trail_rule, | |
137 | headcnt, trailcnt ); | |
138 | ||
139 | for ( i = 1; i <= actvp; ++i ) | |
140 | scbol[actvsc[i]] = | |
141 | mkbranch( scbol[actvsc[i]], pat ); | |
142 | ||
143 | if ( ! bol_needed ) | |
144 | { | |
145 | bol_needed = true; | |
146 | ||
147 | if ( performance_report ) | |
148 | pinpoint_message( | |
149 | "'^' operator results in sub-optimal performance" ); | |
150 | } | |
151 | } | |
152 | ||
153 | | scon rule | |
154 | { | |
155 | pat = $2; | |
156 | finish_rule( pat, variable_trail_rule, | |
157 | headcnt, trailcnt ); | |
158 | ||
159 | for ( i = 1; i <= actvp; ++i ) | |
160 | scset[actvsc[i]] = | |
161 | mkbranch( scset[actvsc[i]], pat ); | |
162 | } | |
163 | ||
164 | | '^' rule | |
165 | { | |
166 | pat = $2; | |
167 | finish_rule( pat, variable_trail_rule, | |
168 | headcnt, trailcnt ); | |
169 | ||
170 | /* add to all non-exclusive start conditions, | |
171 | * including the default (0) start condition | |
172 | */ | |
173 | ||
174 | for ( i = 1; i <= lastsc; ++i ) | |
175 | if ( ! scxclu[i] ) | |
176 | scbol[i] = mkbranch( scbol[i], pat ); | |
177 | ||
178 | if ( ! bol_needed ) | |
179 | { | |
180 | bol_needed = true; | |
181 | ||
182 | if ( performance_report ) | |
183 | pinpoint_message( | |
184 | "'^' operator results in sub-optimal performance" ); | |
185 | } | |
186 | } | |
187 | ||
188 | | rule | |
189 | { | |
190 | pat = $1; | |
191 | finish_rule( pat, variable_trail_rule, | |
192 | headcnt, trailcnt ); | |
193 | ||
194 | for ( i = 1; i <= lastsc; ++i ) | |
195 | if ( ! scxclu[i] ) | |
196 | scset[i] = mkbranch( scset[i], pat ); | |
197 | } | |
198 | ||
199 | | scon EOF_OP | |
200 | { build_eof_action(); } | |
201 | ||
202 | | EOF_OP | |
203 | { | |
204 | /* this EOF applies to all start conditions | |
205 | * which don't already have EOF actions | |
206 | */ | |
207 | actvp = 0; | |
208 | ||
209 | for ( i = 1; i <= lastsc; ++i ) | |
210 | if ( ! sceof[i] ) | |
211 | actvsc[++actvp] = i; | |
212 | ||
213 | if ( actvp == 0 ) | |
214 | pinpoint_message( | |
215 | "warning - all start conditions already have <<EOF>> rules" ); | |
216 | ||
217 | else | |
218 | build_eof_action(); | |
219 | } | |
220 | ||
221 | | error | |
222 | { synerr( "unrecognized rule" ); } | |
223 | ; | |
224 | ||
225 | scon : '<' namelist2 '>' | |
226 | ; | |
227 | ||
228 | namelist2 : namelist2 ',' NAME | |
229 | { | |
230 | if ( (scnum = sclookup( nmstr )) == 0 ) | |
231 | format_pinpoint_message( | |
232 | "undeclared start condition %s", nmstr ); | |
233 | ||
234 | else | |
235 | actvsc[++actvp] = scnum; | |
236 | } | |
237 | ||
238 | | NAME | |
239 | { | |
240 | if ( (scnum = sclookup( nmstr )) == 0 ) | |
241 | format_pinpoint_message( | |
242 | "undeclared start condition %s", nmstr ); | |
243 | else | |
244 | actvsc[actvp = 1] = scnum; | |
245 | } | |
246 | ||
247 | | error | |
248 | { synerr( "bad start condition list" ); } | |
249 | ; | |
250 | ||
251 | rule : re2 re | |
252 | { | |
253 | if ( transchar[lastst[$2]] != SYM_EPSILON ) | |
254 | /* provide final transition \now/ so it | |
255 | * will be marked as a trailing context | |
256 | * state | |
257 | */ | |
258 | $2 = link_machines( $2, mkstate( SYM_EPSILON ) ); | |
259 | ||
260 | mark_beginning_as_normal( $2 ); | |
261 | current_state_type = STATE_NORMAL; | |
262 | ||
263 | if ( previous_continued_action ) | |
264 | { | |
265 | /* we need to treat this as variable trailing | |
266 | * context so that the backup does not happen | |
267 | * in the action but before the action switch | |
268 | * statement. If the backup happens in the | |
269 | * action, then the rules "falling into" this | |
270 | * one's action will *also* do the backup, | |
271 | * erroneously. | |
272 | */ | |
273 | if ( ! varlength || headcnt != 0 ) | |
274 | { | |
275 | fprintf( stderr, | |
276 | "%s: warning - trailing context rule at line %d made variable because\n", | |
277 | program_name, linenum ); | |
278 | fprintf( stderr, | |
279 | " of preceding '|' action\n" ); | |
280 | } | |
281 | ||
282 | /* mark as variable */ | |
283 | varlength = true; | |
284 | headcnt = 0; | |
285 | } | |
286 | ||
287 | if ( varlength && headcnt == 0 ) | |
288 | { /* variable trailing context rule */ | |
289 | /* mark the first part of the rule as the accepting | |
290 | * "head" part of a trailing context rule | |
291 | */ | |
292 | /* by the way, we didn't do this at the beginning | |
293 | * of this production because back then | |
294 | * current_state_type was set up for a trail | |
295 | * rule, and add_accept() can create a new | |
296 | * state ... | |
297 | */ | |
298 | add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); | |
299 | variable_trail_rule = true; | |
300 | } | |
301 | ||
302 | else | |
303 | trailcnt = rulelen; | |
304 | ||
305 | $$ = link_machines( $1, $2 ); | |
306 | } | |
307 | ||
308 | | re2 re '$' | |
309 | { synerr( "trailing context used twice" ); } | |
310 | ||
311 | | re '$' | |
312 | { | |
313 | if ( trlcontxt ) | |
314 | { | |
315 | synerr( "trailing context used twice" ); | |
316 | $$ = mkstate( SYM_EPSILON ); | |
317 | } | |
318 | ||
319 | else if ( previous_continued_action ) | |
320 | { | |
321 | /* see the comment in the rule for "re2 re" | |
322 | * above | |
323 | */ | |
324 | if ( ! varlength || headcnt != 0 ) | |
325 | { | |
326 | fprintf( stderr, | |
327 | "%s: warning - trailing context rule at line %d made variable because\n", | |
328 | program_name, linenum ); | |
329 | fprintf( stderr, | |
330 | " of preceding '|' action\n" ); | |
331 | } | |
332 | ||
333 | /* mark as variable */ | |
334 | varlength = true; | |
335 | headcnt = 0; | |
336 | } | |
337 | ||
7d684d5e NW |
338 | if ( varlength && headcnt == 0 ) |
339 | { | |
340 | /* again, see the comment in the rule for "re2 re" | |
341 | * above | |
342 | */ | |
343 | add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); | |
344 | variable_trail_rule = true; | |
345 | } | |
15637ed4 | 346 | |
7d684d5e NW |
347 | else |
348 | { | |
349 | if ( ! varlength ) | |
350 | headcnt = rulelen; | |
15637ed4 | 351 | |
7d684d5e NW |
352 | ++rulelen; |
353 | trailcnt = 1; | |
354 | } | |
355 | ||
356 | trlcontxt = true; | |
15637ed4 RG |
357 | |
358 | eps = mkstate( SYM_EPSILON ); | |
359 | $$ = link_machines( $1, | |
360 | link_machines( eps, mkstate( '\n' ) ) ); | |
361 | } | |
362 | ||
363 | | re | |
364 | { | |
365 | $$ = $1; | |
366 | ||
367 | if ( trlcontxt ) | |
368 | { | |
369 | if ( varlength && headcnt == 0 ) | |
370 | /* both head and trail are variable-length */ | |
371 | variable_trail_rule = true; | |
372 | else | |
373 | trailcnt = rulelen; | |
374 | } | |
375 | } | |
376 | ; | |
377 | ||
378 | ||
379 | re : re '|' series | |
380 | { | |
381 | varlength = true; | |
382 | $$ = mkor( $1, $3 ); | |
383 | } | |
384 | ||
385 | | series | |
386 | { $$ = $1; } | |
387 | ; | |
388 | ||
389 | ||
390 | re2 : re '/' | |
391 | { | |
392 | /* this rule is written separately so | |
393 | * the reduction will occur before the trailing | |
394 | * series is parsed | |
395 | */ | |
396 | ||
397 | if ( trlcontxt ) | |
398 | synerr( "trailing context used twice" ); | |
399 | else | |
400 | trlcontxt = true; | |
401 | ||
402 | if ( varlength ) | |
403 | /* we hope the trailing context is fixed-length */ | |
404 | varlength = false; | |
405 | else | |
406 | headcnt = rulelen; | |
407 | ||
408 | rulelen = 0; | |
409 | ||
410 | current_state_type = STATE_TRAILING_CONTEXT; | |
411 | $$ = $1; | |
412 | } | |
413 | ; | |
414 | ||
415 | series : series singleton | |
416 | { | |
417 | /* this is where concatenation of adjacent patterns | |
418 | * gets done | |
419 | */ | |
420 | $$ = link_machines( $1, $2 ); | |
421 | } | |
422 | ||
423 | | singleton | |
424 | { $$ = $1; } | |
425 | ; | |
426 | ||
427 | singleton : singleton '*' | |
428 | { | |
429 | varlength = true; | |
430 | ||
431 | $$ = mkclos( $1 ); | |
432 | } | |
433 | ||
434 | | singleton '+' | |
435 | { | |
436 | varlength = true; | |
437 | ||
438 | $$ = mkposcl( $1 ); | |
439 | } | |
440 | ||
441 | | singleton '?' | |
442 | { | |
443 | varlength = true; | |
444 | ||
445 | $$ = mkopt( $1 ); | |
446 | } | |
447 | ||
448 | | singleton '{' NUMBER ',' NUMBER '}' | |
449 | { | |
450 | varlength = true; | |
451 | ||
452 | if ( $3 > $5 || $3 < 0 ) | |
453 | { | |
454 | synerr( "bad iteration values" ); | |
455 | $$ = $1; | |
456 | } | |
457 | else | |
458 | { | |
459 | if ( $3 == 0 ) | |
7d684d5e NW |
460 | { |
461 | if ( $5 <= 0 ) | |
462 | { | |
463 | synerr( "bad iteration values" ); | |
464 | $$ = $1; | |
465 | } | |
466 | else | |
467 | $$ = mkopt( mkrep( $1, 1, $5 ) ); | |
468 | } | |
15637ed4 RG |
469 | else |
470 | $$ = mkrep( $1, $3, $5 ); | |
471 | } | |
472 | } | |
473 | ||
474 | | singleton '{' NUMBER ',' '}' | |
475 | { | |
476 | varlength = true; | |
477 | ||
478 | if ( $3 <= 0 ) | |
479 | { | |
480 | synerr( "iteration value must be positive" ); | |
481 | $$ = $1; | |
482 | } | |
483 | ||
484 | else | |
485 | $$ = mkrep( $1, $3, INFINITY ); | |
486 | } | |
487 | ||
488 | | singleton '{' NUMBER '}' | |
489 | { | |
490 | /* the singleton could be something like "(foo)", | |
491 | * in which case we have no idea what its length | |
492 | * is, so we punt here. | |
493 | */ | |
494 | varlength = true; | |
495 | ||
496 | if ( $3 <= 0 ) | |
497 | { | |
498 | synerr( "iteration value must be positive" ); | |
499 | $$ = $1; | |
500 | } | |
501 | ||
502 | else | |
503 | $$ = link_machines( $1, copysingl( $1, $3 - 1 ) ); | |
504 | } | |
505 | ||
506 | | '.' | |
507 | { | |
508 | if ( ! madeany ) | |
509 | { | |
510 | /* create the '.' character class */ | |
511 | anyccl = cclinit(); | |
512 | ccladd( anyccl, '\n' ); | |
513 | cclnegate( anyccl ); | |
514 | ||
515 | if ( useecs ) | |
516 | mkeccl( ccltbl + cclmap[anyccl], | |
517 | ccllen[anyccl], nextecm, | |
518 | ecgroup, csize, csize ); | |
519 | ||
520 | madeany = true; | |
521 | } | |
522 | ||
523 | ++rulelen; | |
524 | ||
525 | $$ = mkstate( -anyccl ); | |
526 | } | |
527 | ||
528 | | fullccl | |
529 | { | |
530 | if ( ! cclsorted ) | |
531 | /* sort characters for fast searching. We use a | |
532 | * shell sort since this list could be large. | |
533 | */ | |
534 | cshell( ccltbl + cclmap[$1], ccllen[$1], true ); | |
535 | ||
536 | if ( useecs ) | |
537 | mkeccl( ccltbl + cclmap[$1], ccllen[$1], | |
538 | nextecm, ecgroup, csize, csize ); | |
539 | ||
540 | ++rulelen; | |
541 | ||
542 | $$ = mkstate( -$1 ); | |
543 | } | |
544 | ||
545 | | PREVCCL | |
546 | { | |
547 | ++rulelen; | |
548 | ||
549 | $$ = mkstate( -$1 ); | |
550 | } | |
551 | ||
552 | | '"' string '"' | |
553 | { $$ = $2; } | |
554 | ||
555 | | '(' re ')' | |
556 | { $$ = $2; } | |
557 | ||
558 | | CHAR | |
559 | { | |
560 | ++rulelen; | |
561 | ||
562 | if ( caseins && $1 >= 'A' && $1 <= 'Z' ) | |
563 | $1 = clower( $1 ); | |
564 | ||
565 | $$ = mkstate( $1 ); | |
566 | } | |
567 | ; | |
568 | ||
569 | fullccl : '[' ccl ']' | |
570 | { $$ = $2; } | |
571 | ||
572 | | '[' '^' ccl ']' | |
573 | { | |
574 | /* *Sigh* - to be compatible Unix lex, negated ccls | |
575 | * match newlines | |
576 | */ | |
577 | #ifdef NOTDEF | |
578 | ccladd( $3, '\n' ); /* negated ccls don't match '\n' */ | |
579 | cclsorted = false; /* because we added the newline */ | |
580 | #endif | |
581 | cclnegate( $3 ); | |
582 | $$ = $3; | |
583 | } | |
584 | ; | |
585 | ||
586 | ccl : ccl CHAR '-' CHAR | |
587 | { | |
588 | if ( $2 > $4 ) | |
589 | synerr( "negative range in character class" ); | |
590 | ||
591 | else | |
592 | { | |
593 | if ( caseins ) | |
594 | { | |
595 | if ( $2 >= 'A' && $2 <= 'Z' ) | |
596 | $2 = clower( $2 ); | |
597 | if ( $4 >= 'A' && $4 <= 'Z' ) | |
598 | $4 = clower( $4 ); | |
599 | } | |
600 | ||
601 | for ( i = $2; i <= $4; ++i ) | |
602 | ccladd( $1, i ); | |
603 | ||
604 | /* keep track if this ccl is staying in alphabetical | |
605 | * order | |
606 | */ | |
607 | cclsorted = cclsorted && ($2 > lastchar); | |
608 | lastchar = $4; | |
609 | } | |
610 | ||
611 | $$ = $1; | |
612 | } | |
613 | ||
614 | | ccl CHAR | |
615 | { | |
616 | if ( caseins ) | |
617 | if ( $2 >= 'A' && $2 <= 'Z' ) | |
618 | $2 = clower( $2 ); | |
619 | ||
620 | ccladd( $1, $2 ); | |
621 | cclsorted = cclsorted && ($2 > lastchar); | |
622 | lastchar = $2; | |
623 | $$ = $1; | |
624 | } | |
625 | ||
626 | | | |
627 | { | |
628 | cclsorted = true; | |
629 | lastchar = 0; | |
630 | $$ = cclinit(); | |
631 | } | |
632 | ; | |
633 | ||
634 | string : string CHAR | |
635 | { | |
636 | if ( caseins ) | |
637 | if ( $2 >= 'A' && $2 <= 'Z' ) | |
638 | $2 = clower( $2 ); | |
639 | ||
640 | ++rulelen; | |
641 | ||
642 | $$ = link_machines( $1, mkstate( $2 ) ); | |
643 | } | |
644 | ||
645 | | | |
646 | { $$ = mkstate( SYM_EPSILON ); } | |
647 | ; | |
648 | ||
649 | %% | |
650 | ||
651 | ||
652 | /* build_eof_action - build the "<<EOF>>" action for the active start | |
653 | * conditions | |
654 | */ | |
655 | ||
656 | void build_eof_action() | |
657 | ||
658 | { | |
659 | register int i; | |
660 | ||
661 | for ( i = 1; i <= actvp; ++i ) | |
662 | { | |
663 | if ( sceof[actvsc[i]] ) | |
664 | format_pinpoint_message( | |
665 | "multiple <<EOF>> rules for start condition %s", | |
666 | scname[actvsc[i]] ); | |
667 | ||
668 | else | |
669 | { | |
670 | sceof[actvsc[i]] = true; | |
671 | fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n", | |
672 | scname[actvsc[i]] ); | |
673 | } | |
674 | } | |
675 | ||
676 | line_directive_out( temp_action_file ); | |
677 | } | |
678 | ||
679 | ||
680 | /* synerr - report a syntax error */ | |
681 | ||
682 | void synerr( str ) | |
683 | char str[]; | |
684 | ||
685 | { | |
686 | syntaxerror = true; | |
687 | pinpoint_message( str ); | |
688 | } | |
689 | ||
690 | ||
691 | /* format_pinpoint_message - write out a message formatted with one string, | |
692 | * pinpointing its location | |
693 | */ | |
694 | ||
695 | void format_pinpoint_message( msg, arg ) | |
696 | char msg[], arg[]; | |
697 | ||
698 | { | |
699 | char errmsg[MAXLINE]; | |
700 | ||
701 | (void) sprintf( errmsg, msg, arg ); | |
702 | pinpoint_message( errmsg ); | |
703 | } | |
704 | ||
705 | ||
706 | /* pinpoint_message - write out a message, pinpointing its location */ | |
707 | ||
708 | void pinpoint_message( str ) | |
709 | char str[]; | |
710 | ||
711 | { | |
712 | fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str ); | |
713 | } | |
714 | ||
715 | ||
716 | /* yyerror - eat up an error message from the parser; | |
717 | * currently, messages are ignore | |
718 | */ | |
719 | ||
720 | void yyerror( msg ) | |
721 | char msg[]; | |
722 | ||
723 | { | |
724 | } |