Added
[unix-history] / usr.bin / lex / parse.y
CommitLineData
7d684d5e 1
15637ed4
RG
2/* parse.y - parser for flex input */
3
4%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
5
6%{
15637ed4
RG
7/*-
8 * Copyright (c) 1990 The Regents of the University of California.
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
7d684d5e 12 * Vern Paxson.
15637ed4
RG
13 *
14 * The United States Government has rights in this work pursuant
15 * to contract no. DE-AC03-76SF00098 between the United States
16 * Department of Energy and the University of California.
17 *
7d684d5e
NW
18 * Redistribution and use in source and binary forms are permitted provided
19 * that: (1) source distributions retain this entire copyright notice and
20 * comment, and (2) distributions including binaries display the following
21 * acknowledgement: ``This product includes software developed by the
22 * University of California, Berkeley and its contributors'' in the
23 * documentation or other materials provided with the distribution and in
24 * all advertising materials mentioning features or use of this software.
25 * Neither the name of the University nor the names of its contributors may
26 * be used to endorse or promote products derived from this software without
27 * specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15637ed4
RG
31 */
32
33#ifndef lint
7d684d5e
NW
34static char rcsid[] =
35 "@(#) $Header: /usr/fsys/odin/a/vern/flex/RCS/parse.y,v 2.7 90/06/27 23:48:31 vern Exp $ (LBL)";
36#endif
15637ed4
RG
37
38#include "flexdef.h"
39
40int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
41int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
42Char clower();
7d684d5e
NW
43void build_eof_action();
44void yyerror();
15637ed4
RG
45
46static int madeany = false; /* whether we've made the '.' character class */
47int previous_continued_action; /* whether the previous rule's action was '|' */
48
49%}
50
51%%
52goal : initlex sect1 sect1end sect2 initforrule
53 { /* add default rule */
54 int def_rule;
55
56 pat = cclinit();
57 cclnegate( pat );
58
59 def_rule = mkstate( -pat );
60
61 finish_rule( def_rule, false, 0, 0 );
62
63 for ( i = 1; i <= lastsc; ++i )
64 scset[i] = mkbranch( scset[i], def_rule );
65
66 if ( spprdflt )
67 fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
68 temp_action_file );
69 else
70 fputs( "ECHO", temp_action_file );
71
72 fputs( ";\n\tYY_BREAK\n", temp_action_file );
73 }
74 ;
75
76initlex :
77 {
78 /* initialize for processing rules */
79
80 /* create default DFA start condition */
81 scinstal( "INITIAL", false );
82 }
83 ;
84
85sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
86 |
87 | error '\n'
88 { synerr( "unknown error processing section 1" ); }
89 ;
90
91sect1end : SECTEND
92 ;
93
94startconddecl : SCDECL
95 {
96 /* these productions are separate from the s1object
97 * rule because the semantics must be done before
98 * we parse the remainder of an s1object
99 */
100
101 xcluflg = false;
102 }
103
104 | XSCDECL
105 { xcluflg = true; }
106 ;
107
108namelist1 : namelist1 WHITESPACE NAME
109 { scinstal( nmstr, xcluflg ); }
110
111 | NAME
112 { scinstal( nmstr, xcluflg ); }
113
114 | error
115 { synerr( "bad start condition list" ); }
116 ;
117
118sect2 : sect2 initforrule flexrule '\n'
119 |
120 ;
121
122initforrule :
123 {
124 /* initialize for a parse of one rule */
125 trlcontxt = variable_trail_rule = varlength = false;
126 trailcnt = headcnt = rulelen = 0;
127 current_state_type = STATE_NORMAL;
128 previous_continued_action = continued_action;
129 new_rule();
130 }
131 ;
132
133flexrule : scon '^' rule
134 {
135 pat = $3;
136 finish_rule( pat, variable_trail_rule,
137 headcnt, trailcnt );
138
139 for ( i = 1; i <= actvp; ++i )
140 scbol[actvsc[i]] =
141 mkbranch( scbol[actvsc[i]], pat );
142
143 if ( ! bol_needed )
144 {
145 bol_needed = true;
146
147 if ( performance_report )
148 pinpoint_message(
149 "'^' operator results in sub-optimal performance" );
150 }
151 }
152
153 | scon rule
154 {
155 pat = $2;
156 finish_rule( pat, variable_trail_rule,
157 headcnt, trailcnt );
158
159 for ( i = 1; i <= actvp; ++i )
160 scset[actvsc[i]] =
161 mkbranch( scset[actvsc[i]], pat );
162 }
163
164 | '^' rule
165 {
166 pat = $2;
167 finish_rule( pat, variable_trail_rule,
168 headcnt, trailcnt );
169
170 /* add to all non-exclusive start conditions,
171 * including the default (0) start condition
172 */
173
174 for ( i = 1; i <= lastsc; ++i )
175 if ( ! scxclu[i] )
176 scbol[i] = mkbranch( scbol[i], pat );
177
178 if ( ! bol_needed )
179 {
180 bol_needed = true;
181
182 if ( performance_report )
183 pinpoint_message(
184 "'^' operator results in sub-optimal performance" );
185 }
186 }
187
188 | rule
189 {
190 pat = $1;
191 finish_rule( pat, variable_trail_rule,
192 headcnt, trailcnt );
193
194 for ( i = 1; i <= lastsc; ++i )
195 if ( ! scxclu[i] )
196 scset[i] = mkbranch( scset[i], pat );
197 }
198
199 | scon EOF_OP
200 { build_eof_action(); }
201
202 | EOF_OP
203 {
204 /* this EOF applies to all start conditions
205 * which don't already have EOF actions
206 */
207 actvp = 0;
208
209 for ( i = 1; i <= lastsc; ++i )
210 if ( ! sceof[i] )
211 actvsc[++actvp] = i;
212
213 if ( actvp == 0 )
214 pinpoint_message(
215 "warning - all start conditions already have <<EOF>> rules" );
216
217 else
218 build_eof_action();
219 }
220
221 | error
222 { synerr( "unrecognized rule" ); }
223 ;
224
225scon : '<' namelist2 '>'
226 ;
227
228namelist2 : namelist2 ',' NAME
229 {
230 if ( (scnum = sclookup( nmstr )) == 0 )
231 format_pinpoint_message(
232 "undeclared start condition %s", nmstr );
233
234 else
235 actvsc[++actvp] = scnum;
236 }
237
238 | NAME
239 {
240 if ( (scnum = sclookup( nmstr )) == 0 )
241 format_pinpoint_message(
242 "undeclared start condition %s", nmstr );
243 else
244 actvsc[actvp = 1] = scnum;
245 }
246
247 | error
248 { synerr( "bad start condition list" ); }
249 ;
250
251rule : re2 re
252 {
253 if ( transchar[lastst[$2]] != SYM_EPSILON )
254 /* provide final transition \now/ so it
255 * will be marked as a trailing context
256 * state
257 */
258 $2 = link_machines( $2, mkstate( SYM_EPSILON ) );
259
260 mark_beginning_as_normal( $2 );
261 current_state_type = STATE_NORMAL;
262
263 if ( previous_continued_action )
264 {
265 /* we need to treat this as variable trailing
266 * context so that the backup does not happen
267 * in the action but before the action switch
268 * statement. If the backup happens in the
269 * action, then the rules "falling into" this
270 * one's action will *also* do the backup,
271 * erroneously.
272 */
273 if ( ! varlength || headcnt != 0 )
274 {
275 fprintf( stderr,
276 "%s: warning - trailing context rule at line %d made variable because\n",
277 program_name, linenum );
278 fprintf( stderr,
279 " of preceding '|' action\n" );
280 }
281
282 /* mark as variable */
283 varlength = true;
284 headcnt = 0;
285 }
286
287 if ( varlength && headcnt == 0 )
288 { /* variable trailing context rule */
289 /* mark the first part of the rule as the accepting
290 * "head" part of a trailing context rule
291 */
292 /* by the way, we didn't do this at the beginning
293 * of this production because back then
294 * current_state_type was set up for a trail
295 * rule, and add_accept() can create a new
296 * state ...
297 */
298 add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
299 variable_trail_rule = true;
300 }
301
302 else
303 trailcnt = rulelen;
304
305 $$ = link_machines( $1, $2 );
306 }
307
308 | re2 re '$'
309 { synerr( "trailing context used twice" ); }
310
311 | re '$'
312 {
313 if ( trlcontxt )
314 {
315 synerr( "trailing context used twice" );
316 $$ = mkstate( SYM_EPSILON );
317 }
318
319 else if ( previous_continued_action )
320 {
321 /* see the comment in the rule for "re2 re"
322 * above
323 */
324 if ( ! varlength || headcnt != 0 )
325 {
326 fprintf( stderr,
327 "%s: warning - trailing context rule at line %d made variable because\n",
328 program_name, linenum );
329 fprintf( stderr,
330 " of preceding '|' action\n" );
331 }
332
333 /* mark as variable */
334 varlength = true;
335 headcnt = 0;
336 }
337
7d684d5e
NW
338 if ( varlength && headcnt == 0 )
339 {
340 /* again, see the comment in the rule for "re2 re"
341 * above
342 */
343 add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
344 variable_trail_rule = true;
345 }
15637ed4 346
7d684d5e
NW
347 else
348 {
349 if ( ! varlength )
350 headcnt = rulelen;
15637ed4 351
7d684d5e
NW
352 ++rulelen;
353 trailcnt = 1;
354 }
355
356 trlcontxt = true;
15637ed4
RG
357
358 eps = mkstate( SYM_EPSILON );
359 $$ = link_machines( $1,
360 link_machines( eps, mkstate( '\n' ) ) );
361 }
362
363 | re
364 {
365 $$ = $1;
366
367 if ( trlcontxt )
368 {
369 if ( varlength && headcnt == 0 )
370 /* both head and trail are variable-length */
371 variable_trail_rule = true;
372 else
373 trailcnt = rulelen;
374 }
375 }
376 ;
377
378
379re : re '|' series
380 {
381 varlength = true;
382 $$ = mkor( $1, $3 );
383 }
384
385 | series
386 { $$ = $1; }
387 ;
388
389
390re2 : re '/'
391 {
392 /* this rule is written separately so
393 * the reduction will occur before the trailing
394 * series is parsed
395 */
396
397 if ( trlcontxt )
398 synerr( "trailing context used twice" );
399 else
400 trlcontxt = true;
401
402 if ( varlength )
403 /* we hope the trailing context is fixed-length */
404 varlength = false;
405 else
406 headcnt = rulelen;
407
408 rulelen = 0;
409
410 current_state_type = STATE_TRAILING_CONTEXT;
411 $$ = $1;
412 }
413 ;
414
415series : series singleton
416 {
417 /* this is where concatenation of adjacent patterns
418 * gets done
419 */
420 $$ = link_machines( $1, $2 );
421 }
422
423 | singleton
424 { $$ = $1; }
425 ;
426
427singleton : singleton '*'
428 {
429 varlength = true;
430
431 $$ = mkclos( $1 );
432 }
433
434 | singleton '+'
435 {
436 varlength = true;
437
438 $$ = mkposcl( $1 );
439 }
440
441 | singleton '?'
442 {
443 varlength = true;
444
445 $$ = mkopt( $1 );
446 }
447
448 | singleton '{' NUMBER ',' NUMBER '}'
449 {
450 varlength = true;
451
452 if ( $3 > $5 || $3 < 0 )
453 {
454 synerr( "bad iteration values" );
455 $$ = $1;
456 }
457 else
458 {
459 if ( $3 == 0 )
7d684d5e
NW
460 {
461 if ( $5 <= 0 )
462 {
463 synerr( "bad iteration values" );
464 $$ = $1;
465 }
466 else
467 $$ = mkopt( mkrep( $1, 1, $5 ) );
468 }
15637ed4
RG
469 else
470 $$ = mkrep( $1, $3, $5 );
471 }
472 }
473
474 | singleton '{' NUMBER ',' '}'
475 {
476 varlength = true;
477
478 if ( $3 <= 0 )
479 {
480 synerr( "iteration value must be positive" );
481 $$ = $1;
482 }
483
484 else
485 $$ = mkrep( $1, $3, INFINITY );
486 }
487
488 | singleton '{' NUMBER '}'
489 {
490 /* the singleton could be something like "(foo)",
491 * in which case we have no idea what its length
492 * is, so we punt here.
493 */
494 varlength = true;
495
496 if ( $3 <= 0 )
497 {
498 synerr( "iteration value must be positive" );
499 $$ = $1;
500 }
501
502 else
503 $$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
504 }
505
506 | '.'
507 {
508 if ( ! madeany )
509 {
510 /* create the '.' character class */
511 anyccl = cclinit();
512 ccladd( anyccl, '\n' );
513 cclnegate( anyccl );
514
515 if ( useecs )
516 mkeccl( ccltbl + cclmap[anyccl],
517 ccllen[anyccl], nextecm,
518 ecgroup, csize, csize );
519
520 madeany = true;
521 }
522
523 ++rulelen;
524
525 $$ = mkstate( -anyccl );
526 }
527
528 | fullccl
529 {
530 if ( ! cclsorted )
531 /* sort characters for fast searching. We use a
532 * shell sort since this list could be large.
533 */
534 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
535
536 if ( useecs )
537 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
538 nextecm, ecgroup, csize, csize );
539
540 ++rulelen;
541
542 $$ = mkstate( -$1 );
543 }
544
545 | PREVCCL
546 {
547 ++rulelen;
548
549 $$ = mkstate( -$1 );
550 }
551
552 | '"' string '"'
553 { $$ = $2; }
554
555 | '(' re ')'
556 { $$ = $2; }
557
558 | CHAR
559 {
560 ++rulelen;
561
562 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
563 $1 = clower( $1 );
564
565 $$ = mkstate( $1 );
566 }
567 ;
568
569fullccl : '[' ccl ']'
570 { $$ = $2; }
571
572 | '[' '^' ccl ']'
573 {
574 /* *Sigh* - to be compatible Unix lex, negated ccls
575 * match newlines
576 */
577#ifdef NOTDEF
578 ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
579 cclsorted = false; /* because we added the newline */
580#endif
581 cclnegate( $3 );
582 $$ = $3;
583 }
584 ;
585
586ccl : ccl CHAR '-' CHAR
587 {
588 if ( $2 > $4 )
589 synerr( "negative range in character class" );
590
591 else
592 {
593 if ( caseins )
594 {
595 if ( $2 >= 'A' && $2 <= 'Z' )
596 $2 = clower( $2 );
597 if ( $4 >= 'A' && $4 <= 'Z' )
598 $4 = clower( $4 );
599 }
600
601 for ( i = $2; i <= $4; ++i )
602 ccladd( $1, i );
603
604 /* keep track if this ccl is staying in alphabetical
605 * order
606 */
607 cclsorted = cclsorted && ($2 > lastchar);
608 lastchar = $4;
609 }
610
611 $$ = $1;
612 }
613
614 | ccl CHAR
615 {
616 if ( caseins )
617 if ( $2 >= 'A' && $2 <= 'Z' )
618 $2 = clower( $2 );
619
620 ccladd( $1, $2 );
621 cclsorted = cclsorted && ($2 > lastchar);
622 lastchar = $2;
623 $$ = $1;
624 }
625
626 |
627 {
628 cclsorted = true;
629 lastchar = 0;
630 $$ = cclinit();
631 }
632 ;
633
634string : string CHAR
635 {
636 if ( caseins )
637 if ( $2 >= 'A' && $2 <= 'Z' )
638 $2 = clower( $2 );
639
640 ++rulelen;
641
642 $$ = link_machines( $1, mkstate( $2 ) );
643 }
644
645 |
646 { $$ = mkstate( SYM_EPSILON ); }
647 ;
648
649%%
650
651
652/* build_eof_action - build the "<<EOF>>" action for the active start
653 * conditions
654 */
655
656void build_eof_action()
657
658 {
659 register int i;
660
661 for ( i = 1; i <= actvp; ++i )
662 {
663 if ( sceof[actvsc[i]] )
664 format_pinpoint_message(
665 "multiple <<EOF>> rules for start condition %s",
666 scname[actvsc[i]] );
667
668 else
669 {
670 sceof[actvsc[i]] = true;
671 fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n",
672 scname[actvsc[i]] );
673 }
674 }
675
676 line_directive_out( temp_action_file );
677 }
678
679
680/* synerr - report a syntax error */
681
682void synerr( str )
683char str[];
684
685 {
686 syntaxerror = true;
687 pinpoint_message( str );
688 }
689
690
691/* format_pinpoint_message - write out a message formatted with one string,
692 * pinpointing its location
693 */
694
695void format_pinpoint_message( msg, arg )
696char msg[], arg[];
697
698 {
699 char errmsg[MAXLINE];
700
701 (void) sprintf( errmsg, msg, arg );
702 pinpoint_message( errmsg );
703 }
704
705
706/* pinpoint_message - write out a message, pinpointing its location */
707
708void pinpoint_message( str )
709char str[];
710
711 {
712 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str );
713 }
714
715
716/* yyerror - eat up an error message from the parser;
717 * currently, messages are ignore
718 */
719
720void yyerror( msg )
721char msg[];
722
723 {
724 }