386BSD 0.1 development
[unix-history] / usr / src / usr.bin / lex / parse.y
/* parse.y - parser for flex input */
%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
%{
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson of Lawrence Berkeley Laboratory.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)parse.y 5.2 (Berkeley) 6/18/90";
#endif /* not lint */
#include "flexdef.h"
int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
Char clower();
static int madeany = false; /* whether we've made the '.' character class */
int previous_continued_action; /* whether the previous rule's action was '|' */
%}
%%
goal : initlex sect1 sect1end sect2 initforrule
{ /* add default rule */
int def_rule;
pat = cclinit();
cclnegate( pat );
def_rule = mkstate( -pat );
finish_rule( def_rule, false, 0, 0 );
for ( i = 1; i <= lastsc; ++i )
scset[i] = mkbranch( scset[i], def_rule );
if ( spprdflt )
fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
temp_action_file );
else
fputs( "ECHO", temp_action_file );
fputs( ";\n\tYY_BREAK\n", temp_action_file );
}
;
initlex :
{
/* initialize for processing rules */
/* create default DFA start condition */
scinstal( "INITIAL", false );
}
;
sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
|
| error '\n'
{ synerr( "unknown error processing section 1" ); }
;
sect1end : SECTEND
;
startconddecl : SCDECL
{
/* these productions are separate from the s1object
* rule because the semantics must be done before
* we parse the remainder of an s1object
*/
xcluflg = false;
}
| XSCDECL
{ xcluflg = true; }
;
namelist1 : namelist1 WHITESPACE NAME
{ scinstal( nmstr, xcluflg ); }
| NAME
{ scinstal( nmstr, xcluflg ); }
| error
{ synerr( "bad start condition list" ); }
;
sect2 : sect2 initforrule flexrule '\n'
|
;
initforrule :
{
/* initialize for a parse of one rule */
trlcontxt = variable_trail_rule = varlength = false;
trailcnt = headcnt = rulelen = 0;
current_state_type = STATE_NORMAL;
previous_continued_action = continued_action;
new_rule();
}
;
flexrule : scon '^' rule
{
pat = $3;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
for ( i = 1; i <= actvp; ++i )
scbol[actvsc[i]] =
mkbranch( scbol[actvsc[i]], pat );
if ( ! bol_needed )
{
bol_needed = true;
if ( performance_report )
pinpoint_message(
"'^' operator results in sub-optimal performance" );
}
}
| scon rule
{
pat = $2;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
for ( i = 1; i <= actvp; ++i )
scset[actvsc[i]] =
mkbranch( scset[actvsc[i]], pat );
}
| '^' rule
{
pat = $2;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
/* add to all non-exclusive start conditions,
* including the default (0) start condition
*/
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scbol[i] = mkbranch( scbol[i], pat );
if ( ! bol_needed )
{
bol_needed = true;
if ( performance_report )
pinpoint_message(
"'^' operator results in sub-optimal performance" );
}
}
| rule
{
pat = $1;
finish_rule( pat, variable_trail_rule,
headcnt, trailcnt );
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scset[i] = mkbranch( scset[i], pat );
}
| scon EOF_OP
{ build_eof_action(); }
| EOF_OP
{
/* this EOF applies to all start conditions
* which don't already have EOF actions
*/
actvp = 0;
for ( i = 1; i <= lastsc; ++i )
if ( ! sceof[i] )
actvsc[++actvp] = i;
if ( actvp == 0 )
pinpoint_message(
"warning - all start conditions already have <<EOF>> rules" );
else
build_eof_action();
}
| error
{ synerr( "unrecognized rule" ); }
;
scon : '<' namelist2 '>'
;
namelist2 : namelist2 ',' NAME
{
if ( (scnum = sclookup( nmstr )) == 0 )
format_pinpoint_message(
"undeclared start condition %s", nmstr );
else
actvsc[++actvp] = scnum;
}
| NAME
{
if ( (scnum = sclookup( nmstr )) == 0 )
format_pinpoint_message(
"undeclared start condition %s", nmstr );
else
actvsc[actvp = 1] = scnum;
}
| error
{ synerr( "bad start condition list" ); }
;
rule : re2 re
{
if ( transchar[lastst[$2]] != SYM_EPSILON )
/* provide final transition \now/ so it
* will be marked as a trailing context
* state
*/
$2 = link_machines( $2, mkstate( SYM_EPSILON ) );
mark_beginning_as_normal( $2 );
current_state_type = STATE_NORMAL;
if ( previous_continued_action )
{
/* we need to treat this as variable trailing
* context so that the backup does not happen
* in the action but before the action switch
* statement. If the backup happens in the
* action, then the rules "falling into" this
* one's action will *also* do the backup,
* erroneously.
*/
if ( ! varlength || headcnt != 0 )
{
fprintf( stderr,
"%s: warning - trailing context rule at line %d made variable because\n",
program_name, linenum );
fprintf( stderr,
" of preceding '|' action\n" );
}
/* mark as variable */
varlength = true;
headcnt = 0;
}
if ( varlength && headcnt == 0 )
{ /* variable trailing context rule */
/* mark the first part of the rule as the accepting
* "head" part of a trailing context rule
*/
/* by the way, we didn't do this at the beginning
* of this production because back then
* current_state_type was set up for a trail
* rule, and add_accept() can create a new
* state ...
*/
add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
variable_trail_rule = true;
}
else
trailcnt = rulelen;
$$ = link_machines( $1, $2 );
}
| re2 re '$'
{ synerr( "trailing context used twice" ); }
| re '$'
{
if ( trlcontxt )
{
synerr( "trailing context used twice" );
$$ = mkstate( SYM_EPSILON );
}
else if ( previous_continued_action )
{
/* see the comment in the rule for "re2 re"
* above
*/
if ( ! varlength || headcnt != 0 )
{
fprintf( stderr,
"%s: warning - trailing context rule at line %d made variable because\n",
program_name, linenum );
fprintf( stderr,
" of preceding '|' action\n" );
}
/* mark as variable */
varlength = true;
headcnt = 0;
}
trlcontxt = true;
if ( ! varlength )
headcnt = rulelen;
++rulelen;
trailcnt = 1;
eps = mkstate( SYM_EPSILON );
$$ = link_machines( $1,
link_machines( eps, mkstate( '\n' ) ) );
}
| re
{
$$ = $1;
if ( trlcontxt )
{
if ( varlength && headcnt == 0 )
/* both head and trail are variable-length */
variable_trail_rule = true;
else
trailcnt = rulelen;
}
}
;
re : re '|' series
{
varlength = true;
$$ = mkor( $1, $3 );
}
| series
{ $$ = $1; }
;
re2 : re '/'
{
/* this rule is written separately so
* the reduction will occur before the trailing
* series is parsed
*/
if ( trlcontxt )
synerr( "trailing context used twice" );
else
trlcontxt = true;
if ( varlength )
/* we hope the trailing context is fixed-length */
varlength = false;
else
headcnt = rulelen;
rulelen = 0;
current_state_type = STATE_TRAILING_CONTEXT;
$$ = $1;
}
;
series : series singleton
{
/* this is where concatenation of adjacent patterns
* gets done
*/
$$ = link_machines( $1, $2 );
}
| singleton
{ $$ = $1; }
;
singleton : singleton '*'
{
varlength = true;
$$ = mkclos( $1 );
}
| singleton '+'
{
varlength = true;
$$ = mkposcl( $1 );
}
| singleton '?'
{
varlength = true;
$$ = mkopt( $1 );
}
| singleton '{' NUMBER ',' NUMBER '}'
{
varlength = true;
if ( $3 > $5 || $3 < 0 )
{
synerr( "bad iteration values" );
$$ = $1;
}
else
{
if ( $3 == 0 )
$$ = mkopt( mkrep( $1, $3, $5 ) );
else
$$ = mkrep( $1, $3, $5 );
}
}
| singleton '{' NUMBER ',' '}'
{
varlength = true;
if ( $3 <= 0 )
{
synerr( "iteration value must be positive" );
$$ = $1;
}
else
$$ = mkrep( $1, $3, INFINITY );
}
| singleton '{' NUMBER '}'
{
/* the singleton could be something like "(foo)",
* in which case we have no idea what its length
* is, so we punt here.
*/
varlength = true;
if ( $3 <= 0 )
{
synerr( "iteration value must be positive" );
$$ = $1;
}
else
$$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
}
| '.'
{
if ( ! madeany )
{
/* create the '.' character class */
anyccl = cclinit();
ccladd( anyccl, '\n' );
cclnegate( anyccl );
if ( useecs )
mkeccl( ccltbl + cclmap[anyccl],
ccllen[anyccl], nextecm,
ecgroup, csize, csize );
madeany = true;
}
++rulelen;
$$ = mkstate( -anyccl );
}
| fullccl
{
if ( ! cclsorted )
/* sort characters for fast searching. We use a
* shell sort since this list could be large.
*/
cshell( ccltbl + cclmap[$1], ccllen[$1], true );
if ( useecs )
mkeccl( ccltbl + cclmap[$1], ccllen[$1],
nextecm, ecgroup, csize, csize );
++rulelen;
$$ = mkstate( -$1 );
}
| PREVCCL
{
++rulelen;
$$ = mkstate( -$1 );
}
| '"' string '"'
{ $$ = $2; }
| '(' re ')'
{ $$ = $2; }
| CHAR
{
++rulelen;
if ( caseins && $1 >= 'A' && $1 <= 'Z' )
$1 = clower( $1 );
$$ = mkstate( $1 );
}
;
fullccl : '[' ccl ']'
{ $$ = $2; }
| '[' '^' ccl ']'
{
/* *Sigh* - to be compatible Unix lex, negated ccls
* match newlines
*/
#ifdef NOTDEF
ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
cclsorted = false; /* because we added the newline */
#endif
cclnegate( $3 );
$$ = $3;
}
;
ccl : ccl CHAR '-' CHAR
{
if ( $2 > $4 )
synerr( "negative range in character class" );
else
{
if ( caseins )
{
if ( $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
if ( $4 >= 'A' && $4 <= 'Z' )
$4 = clower( $4 );
}
for ( i = $2; i <= $4; ++i )
ccladd( $1, i );
/* keep track if this ccl is staying in alphabetical
* order
*/
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $4;
}
$$ = $1;
}
| ccl CHAR
{
if ( caseins )
if ( $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
ccladd( $1, $2 );
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $2;
$$ = $1;
}
|
{
cclsorted = true;
lastchar = 0;
$$ = cclinit();
}
;
string : string CHAR
{
if ( caseins )
if ( $2 >= 'A' && $2 <= 'Z' )
$2 = clower( $2 );
++rulelen;
$$ = link_machines( $1, mkstate( $2 ) );
}
|
{ $$ = mkstate( SYM_EPSILON ); }
;
%%
/* build_eof_action - build the "<<EOF>>" action for the active start
* conditions
*/
void build_eof_action()
{
register int i;
for ( i = 1; i <= actvp; ++i )
{
if ( sceof[actvsc[i]] )
format_pinpoint_message(
"multiple <<EOF>> rules for start condition %s",
scname[actvsc[i]] );
else
{
sceof[actvsc[i]] = true;
fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n",
scname[actvsc[i]] );
}
}
line_directive_out( temp_action_file );
}
/* synerr - report a syntax error */
void synerr( str )
char str[];
{
syntaxerror = true;
pinpoint_message( str );
}
/* format_pinpoint_message - write out a message formatted with one string,
* pinpointing its location
*/
void format_pinpoint_message( msg, arg )
char msg[], arg[];
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
pinpoint_message( errmsg );
}
/* pinpoint_message - write out a message, pinpointing its location */
void pinpoint_message( str )
char str[];
{
fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str );
}
/* yyerror - eat up an error message from the parser;
* currently, messages are ignore
*/
void yyerror( msg )
char msg[];
{
}