[unix-history] / usr / doc / yacc / ssc

.SH
Appendix C: An Advanced Example
.PP
This Appendix gives an example of a grammar using some
of the advanced features discussed in Section 10.
The desk calculator example in Appendix A is
modified to provide a desk calculator that
does floating point interval arithmetic.
The calculator understands floating point
constants, the arithmetic operations +, \-, *, /,
unary \-, and = (assignment), and has 26 floating
point variables, ``a'' through ``z''.
Moreover, it also understands
.I intervals ,
written
.DS
	( x , y )
.DE
where
.I x
is less than or equal to
.I y .
There are 26 interval valued variables ``A'' through ``Z''
that may also be used.
The usage is similar to that in Appendix A; assignments
return no value, and print nothing, while expressions print
the (floating or interval) value.
.PP
This example explores a number of interesting features
of Yacc and C.
Intervals are represented by a structure, consisting of the
left and right endpoint values, stored as
.I double 's.
This structure is given a type name, INTERVAL, by using
.I typedef .
The Yacc value stack can also contain floating point scalars, and
integers (used to index into the arrays holding the variable values).
Notice that this entire strategy depends strongly on being able to
assign structures and unions in C.
In fact, many of the actions call functions that return structures
as well.
.PP
It is also worth noting the use of YYERROR to handle error conditions:
division by an interval containing 0, and an interval presented in
the wrong order.
In effect, the error recovery mechanism of Yacc is used to throw away the
rest of the offending line.
.PP
In addition to the mixing of types on the value stack,
this grammar also demonstrates an interesting use of syntax to
keep track of the type (e.g. scalar or interval) of intermediate
expressions.
Note that a scalar can be automatically promoted to an interval if
the context demands an interval value.
This causes a large number of conflicts when the grammar is run through
Yacc: 18 Shift/Reduce and 26 Reduce/Reduce.
The problem can be seen by looking at the two input lines:
.DS
	2.5 + ( 3.5 \- 4. )
.DE
and
.DS
	2.5 + ( 3.5 , 4. )
.DE
Notice that the 2.5 is to be used in an interval valued expression
in the second example, but this fact is not known until
the ``,'' is read; by this time, 2.5 is finished, and the parser cannot go back
and change its mind.
More generally, it might be necessary to look ahead an arbitrary number of
tokens to decide whether to convert a scalar to an interval.
This problem is evaded by having two rules for each binary interval
valued operator: one when the left operand is a scalar, and one when
the left operand is an interval.
In the second case, the right operand must be an interval,
so the conversion will be applied automatically.
Despite this evasion, there are still many cases where the
conversion may be applied or not, leading to the above
conflicts.
They are resolved by listing the rules that yield scalars first
in the specification file; in this way, the conflicts will
be resolved in the direction of keeping scalar
valued expressions scalar valued until they are forced to become
intervals.
.PP
This way of handling multiple types is very instructive, but not very general.
If there were many kinds of expression types, instead of just two,
the number of rules needed would increase dramatically, and the conflicts
even more dramatically.
Thus, while this example is instructive, it is better practice in a
more normal programming language environment to
keep the type information as part of the value, and not as part
of the grammar.
.PP
Finally, a word about the lexical analysis.
The only unusual feature is the treatment of floating point constants.
The C library routine
.I atof
is used to do the actual conversion from a character string
to a double precision value.
If the lexical analyzer detects an error,
it responds by returning a token that
is illegal in the grammar, provoking a syntax error
in the parser, and thence error recovery.
.DS L

%{

#  include  <stdio.h>
#  include  <ctype.h>

typedef  struct  interval  {
	double  lo,  hi;
	}  INTERVAL;

INTERVAL  vmul(),  vdiv();

double  atof();

double  dreg[ 26 ];
INTERVAL  vreg[ 26 ];

%}

%start    lines

%union    {
	int  ival;
	double  dval;
	INTERVAL  vval;
	}

%token  <ival>  DREG  VREG	/*  indices  into  dreg,  vreg  arrays  */

%token  <dval>  CONST		/*  floating  point  constant  */

%type  <dval>  dexp		/*  expression  */

%type  <vval>  vexp		/*  interval  expression  */

	/*  precedence  information  about  the  operators  */

%left	\'+\'  \'\-\'
%left	\'*\'  \'/\'
%left	UMINUS        /*  precedence  for  unary  minus  */

%%

lines	:	/*  empty  */
	|	lines  line
	;

line	:	dexp  \'\en\'
			{	printf(  "%15.8f\en",  $1  );  }
	|	vexp  \'\en\'
			{	printf(  "(%15.8f  ,  %15.8f  )\en",  $1.lo,  $1.hi  );  }
	|	DREG  \'=\'  dexp  \'\en\'
			{	dreg[$1]  =  $3;  }
	|	VREG  \'=\'  vexp  \'\en\'
			{	vreg[$1]  =  $3;  }
	|	error  \'\en\'
			{	yyerrok;  }
	;

dexp	:	CONST
	|	DREG
			{	$$  =  dreg[$1];  }
	|	dexp  \'+\'  dexp
			{	$$  =  $1  +  $3;  }
	|	dexp  \'\-\'  dexp
			{	$$  =  $1  \-  $3;  }
	|	dexp  \'*\'  dexp
			{	$$  =  $1  *  $3;  }
	|	dexp  \'/\'  dexp
			{	$$  =  $1  /  $3;  }
	|	\'\-\'  dexp	%prec  UMINUS
			{	$$  =  \- $2;  }
	|	\'(\'  dexp  \')\'
			{	$$  =  $2;  }
	;

vexp	:	dexp
			{	$$.hi  =  $$.lo  =  $1;  }
	|	\'(\'  dexp  \',\'  dexp  \')\'
			{
			$$.lo  =  $2;
			$$.hi  =  $4;
			if(  $$.lo  >  $$.hi  ){
				printf(  "interval  out  of  order\en"  );
				YYERROR;
				}
			}
	|	VREG
			{	$$  =  vreg[$1];    }
	|	vexp  \'+\'  vexp
			{	$$.hi  =  $1.hi  +  $3.hi;
				$$.lo  =  $1.lo  +  $3.lo;    }
	|	dexp  \'+\'  vexp
			{	$$.hi  =  $1  +  $3.hi;
				$$.lo  =  $1  +  $3.lo;    }
	|	vexp  \'\-\'  vexp
			{	$$.hi  =  $1.hi  \-  $3.lo;
				$$.lo  =  $1.lo  \-  $3.hi;    }
	|	dexp  \'\-\'  vexp
			{	$$.hi  =  $1  \-  $3.lo;
				$$.lo  =  $1  \-  $3.hi;    }
	|	vexp  \'*\'  vexp
			{	$$  =  vmul(  $1.lo,  $1.hi,  $3  );  }
	|	dexp  \'*\'  vexp
			{	$$  =  vmul(  $1,  $1,  $3  );  }
	|	vexp  \'/\'  vexp
			{	if(  dcheck(  $3  )  )  YYERROR;
				$$  =  vdiv(  $1.lo,  $1.hi,  $3  );  }
	|	dexp  \'/\'  vexp
			{	if(  dcheck(  $3  )  )  YYERROR;
				$$  =  vdiv(  $1,  $1,  $3  );  }
	|	\'\-\'  vexp	%prec  UMINUS
			{	$$.hi  =  \-$2.lo;    $$.lo  =  \-$2.hi;    }
	|	\'(\'  vexp  \')\'
			{	$$  =  $2;  }
	;

%%

#  define  BSZ  50        /*  buffer  size  for  floating  point  numbers  */

	/*  lexical  analysis  */

yylex(){
	register  c;

	while(  (c=getchar())  ==  \' \'  ){  /*  skip  over  blanks  */  }

	if(  isupper(  c  )  ){
		yylval.ival  =  c  \-  \'A\';
		return(  VREG  );
		}
	if(  islower(  c  )  ){
		yylval.ival  =  c  \-  \'a\';
		return(  DREG  );
		}

	if(  isdigit(  c  )  ||  c==\'.\'  ){
		/*  gobble  up  digits,  points,  exponents  */

		char  buf[BSZ+1],  *cp  =  buf;
		int  dot  =  0,  exp  =  0;

		for(  ;  (cp\-buf)<BSZ  ;  ++cp,c=getchar()  ){

			*cp  =  c;
			if(  isdigit(  c  )  )  continue;
			if(  c  ==  \'.\'  ){
				if(  dot++  ||  exp  )  return(  \'.\'  );    /*  will  cause  syntax  error  */
				continue;
				}

			if(  c  ==  \'e\'  ){
				if(  exp++  )  return(  \'e\'  );    /*  will  cause  syntax  error  */
				continue;
				}

			/*  end  of  number  */
			break;
			}
		*cp  =  \'\e0\';
		if(  (cp\-buf)  >=  BSZ  )  printf(  "constant  too  long:  truncated\en"  );
		else  ungetc(  c,  stdin  );    /*  push  back  last  char  read  */
		yylval.dval  =  atof(  buf  );
		return(  CONST  );
		}
	return(  c  );
	}

INTERVAL  hilo(  a,  b,  c,  d  )  double  a,  b,  c,  d;  {
	/*  returns  the  smallest  interval  containing  a,  b,  c,  and  d  */
	/*  used  by  *,  /  routines  */
	INTERVAL  v;

	if(  a>b  )  {  v.hi  =  a;    v.lo  =  b;  }
	else  {  v.hi  =  b;    v.lo  =  a;  }

	if(  c>d  )  {
		if(  c>v.hi  )  v.hi  =  c;
		if(  d<v.lo  )  v.lo  =  d;
		}
	else  {
		if(  d>v.hi  )  v.hi  =  d;
		if(  c<v.lo  )  v.lo  =  c;
		}
	return(  v  );
	}

INTERVAL  vmul(  a,  b,  v  )  double  a,  b;    INTERVAL  v;  {
	return(  hilo(  a*v.hi,  a*v.lo,  b*v.hi,  b*v.lo  )  );
	}

dcheck(  v  )  INTERVAL  v;  {
	if(  v.hi  >=  0.  &&  v.lo  <=  0.  ){
		printf(  "divisor  interval  contains  0.\en"  );
		return(  1  );
		}
	return(  0  );
	}

INTERVAL  vdiv(  a,  b,  v  )  double  a,  b;    INTERVAL  v;  {
	return(  hilo(  a/v.hi,  a/v.lo,  b/v.hi,  b/v.lo  )  );
	}
.DE
.bp
Commit	Line	Data
2240a03d TL	1	.SH
	2	Appendix C: An Advanced Example
	3	.PP
	4	This Appendix gives an example of a grammar using some
	5	of the advanced features discussed in Section 10.
	6	The desk calculator example in Appendix A is
	7	modified to provide a desk calculator that
	8	does floating point interval arithmetic.
	9	The calculator understands floating point
	10	constants, the arithmetic operations +, \-, *, /,
	11	unary \-, and = (assignment), and has 26 floating
	12	point variables, ``a'' through ``z''.
	13	Moreover, it also understands
	14	.I intervals ,
	15	written
	16	.DS
	17	( x , y )
	18	.DE
	19	where
	20	.I x
	21	is less than or equal to
	22	.I y .
	23	There are 26 interval valued variables ``A'' through ``Z''
	24	that may also be used.
	25	The usage is similar to that in Appendix A; assignments
	26	return no value, and print nothing, while expressions print
	27	the (floating or interval) value.
	28	.PP
	29	This example explores a number of interesting features
	30	of Yacc and C.
	31	Intervals are represented by a structure, consisting of the
	32	left and right endpoint values, stored as
	33	.I double 's.
	34	This structure is given a type name, INTERVAL, by using
	35	.I typedef .
	36	The Yacc value stack can also contain floating point scalars, and
	37	integers (used to index into the arrays holding the variable values).
	38	Notice that this entire strategy depends strongly on being able to
	39	assign structures and unions in C.
	40	In fact, many of the actions call functions that return structures
	41	as well.
	42	.PP
	43	It is also worth noting the use of YYERROR to handle error conditions:
	44	division by an interval containing 0, and an interval presented in
	45	the wrong order.
	46	In effect, the error recovery mechanism of Yacc is used to throw away the
	47	rest of the offending line.
	48	.PP
	49	In addition to the mixing of types on the value stack,
	50	this grammar also demonstrates an interesting use of syntax to
	51	keep track of the type (e.g. scalar or interval) of intermediate
	52	expressions.
	53	Note that a scalar can be automatically promoted to an interval if
	54	the context demands an interval value.
	55	This causes a large number of conflicts when the grammar is run through
	56	Yacc: 18 Shift/Reduce and 26 Reduce/Reduce.
	57	The problem can be seen by looking at the two input lines:
	58	.DS
	59	2.5 + ( 3.5 \- 4. )
	60	.DE
	61	and
	62	.DS
	63	2.5 + ( 3.5 , 4. )
	64	.DE
65	Notice that the 2.5 is to be used in an interval valued expression
66	in the second example, but this fact is not known until
67	the ``,'' is read; by this time, 2.5 is finished, and the parser cannot go back
68	and change its mind.
69	More generally, it might be necessary to look ahead an arbitrary number of
70	tokens to decide whether to convert a scalar to an interval.
71	This problem is evaded by having two rules for each binary interval
72	valued operator: one when the left operand is a scalar, and one when
73	the left operand is an interval.
74	In the second case, the right operand must be an interval,
75	so the conversion will be applied automatically.
76	Despite this evasion, there are still many cases where the
77	conversion may be applied or not, leading to the above
78	conflicts.
79	They are resolved by listing the rules that yield scalars first
80	in the specification file; in this way, the conflicts will
81	be resolved in the direction of keeping scalar
82	valued expressions scalar valued until they are forced to become
83	intervals.
84	.PP
85	This way of handling multiple types is very instructive, but not very general.
86	If there were many kinds of expression types, instead of just two,
87	the number of rules needed would increase dramatically, and the conflicts
88	even more dramatically.
89	Thus, while this example is instructive, it is better practice in a
90	more normal programming language environment to
91	keep the type information as part of the value, and not as part
92	of the grammar.
93	.PP
94	Finally, a word about the lexical analysis.
95	The only unusual feature is the treatment of floating point constants.
96	The C library routine
97	.I atof
98	is used to do the actual conversion from a character string
99	to a double precision value.
100	If the lexical analyzer detects an error,
101	it responds by returning a token that
102	is illegal in the grammar, provoking a syntax error
103	in the parser, and thence error recovery.
104	.DS L
105
106	%{
107
108	# include <stdio.h>
109	# include <ctype.h>
110
111	typedef struct interval {
112	double lo, hi;
113	} INTERVAL;
114
115	INTERVAL vmul(), vdiv();
116
117	double atof();
118
119	double dreg[ 26 ];
120	INTERVAL vreg[ 26 ];
121
122	%}
123
124	%start lines
125
126	%union {
127	int ival;
128	double dval;
129	INTERVAL vval;
130	}
131
132	%token <ival> DREG VREG /* indices into dreg, vreg arrays */
133
134	%token <dval> CONST /* floating point constant */
135
136	%type <dval> dexp /* expression */
137
138	%type <vval> vexp /* interval expression */
139
140	/* precedence information about the operators */
141
142	%left \'+\' \'\-\'
143	%left \'*\' \'/\'
144	%left UMINUS /* precedence for unary minus */
145
146	%%
147
148	lines : /* empty */
149	\| lines line
150	;
151
152	line : dexp \'\en\'
153	{ printf( "%15.8f\en", $1 ); }
154	\| vexp \'\en\'
155	{ printf( "(%15.8f , %15.8f )\en", $1.lo, $1.hi ); }
156	\| DREG \'=\' dexp \'\en\'
157	{ dreg[$1] = $3; }
158	\| VREG \'=\' vexp \'\en\'
159	{ vreg[$1] = $3; }
160	\| error \'\en\'
161	{ yyerrok; }
162	;
163
164	dexp : CONST
165	\| DREG
166	{ $$ = dreg[$1]; }
167	\| dexp \'+\' dexp
168	{ $$ = $1 + $3; }
169	\| dexp \'\-\' dexp
170	{ $$ = $1 \- $3; }
171	\| dexp \'*\' dexp
172	{ $$ = $1 * $3; }
173	\| dexp \'/\' dexp
174	{ $$ = $1 / $3; }
175	\| \'\-\' dexp %prec UMINUS
176	{ $$ = \- $2; }
177	\| \'(\' dexp \')\'
178	{ $$ = $2; }
179	;
180
181	vexp : dexp
182	{ $$.hi = $$.lo = $1; }
183	\| \'(\' dexp \',\' dexp \')\'
184	{
185	$$.lo = $2;
186	$$.hi = $4;
187	if( $$.lo > $$.hi ){
188	printf( "interval out of order\en" );
189	YYERROR;
190	}
191	}
192	\| VREG
193	{ $$ = vreg[$1]; }
194	\| vexp \'+\' vexp
195	{ $$.hi = $1.hi + $3.hi;
196	$$.lo = $1.lo + $3.lo; }
197	\| dexp \'+\' vexp
198	{ $$.hi = $1 + $3.hi;
199	$$.lo = $1 + $3.lo; }
200	\| vexp \'\-\' vexp
201	{ $$.hi = $1.hi \- $3.lo;
202	$$.lo = $1.lo \- $3.hi; }
203	\| dexp \'\-\' vexp
204	{ $$.hi = $1 \- $3.lo;
205	$$.lo = $1 \- $3.hi; }
206	\| vexp \'*\' vexp
207	{ $$ = vmul( $1.lo, $1.hi, $3 ); }
208	\| dexp \'*\' vexp
209	{ $$ = vmul( $1, $1, $3 ); }
210	\| vexp \'/\' vexp
211	{ if( dcheck( $3 ) ) YYERROR;
212	$$ = vdiv( $1.lo, $1.hi, $3 ); }
213	\| dexp \'/\' vexp
214	{ if( dcheck( $3 ) ) YYERROR;
215	$$ = vdiv( $1, $1, $3 ); }
216	\| \'\-\' vexp %prec UMINUS
217	{ $$.hi = \-$2.lo; $$.lo = \-$2.hi; }
218	\| \'(\' vexp \')\'
219	{ $$ = $2; }
220	;
221
222	%%
223
224	# define BSZ 50 /* buffer size for floating point numbers */
225
226	/* lexical analysis */
227
228	yylex(){
229	register c;
230
231	while( (c=getchar()) == \' \' ){ /* skip over blanks */ }
232
233	if( isupper( c ) ){
234	yylval.ival = c \- \'A\';
235	return( VREG );
236	}
237	if( islower( c ) ){
238	yylval.ival = c \- \'a\';
239	return( DREG );
240	}
241
242	if( isdigit( c ) \|\| c==\'.\' ){
243	/* gobble up digits, points, exponents */
244
245	char buf[BSZ+1], *cp = buf;
246	int dot = 0, exp = 0;
247
248	for( ; (cp\-buf)<BSZ ; ++cp,c=getchar() ){
249
250	*cp = c;
251	if( isdigit( c ) ) continue;
252	if( c == \'.\' ){
253	if( dot++ \|\| exp ) return( \'.\' ); /* will cause syntax error */
254	continue;
255	}
256
257	if( c == \'e\' ){
258	if( exp++ ) return( \'e\' ); /* will cause syntax error */
259	continue;
260	}
261
262	/* end of number */
263	break;
264	}
265	*cp = \'\e0\';
266	if( (cp\-buf) >= BSZ ) printf( "constant too long: truncated\en" );
267	else ungetc( c, stdin ); /* push back last char read */
268	yylval.dval = atof( buf );
269	return( CONST );
270	}
271	return( c );
272	}
273
274	INTERVAL hilo( a, b, c, d ) double a, b, c, d; {
275	/* returns the smallest interval containing a, b, c, and d */
276	/* used by , / routines /
277	INTERVAL v;
278
279	if( a>b ) { v.hi = a; v.lo = b; }
280	else { v.hi = b; v.lo = a; }
281
282	if( c>d ) {
283	if( c>v.hi ) v.hi = c;
284	if( d<v.lo ) v.lo = d;
285	}
286	else {
287	if( d>v.hi ) v.hi = d;
288	if( c<v.lo ) v.lo = c;
289	}
290	return( v );
291	}
292
293	INTERVAL vmul( a, b, v ) double a, b; INTERVAL v; {
294	return( hilo( av.hi, av.lo, bv.hi, bv.lo ) );
295	}
296
297	dcheck( v ) INTERVAL v; {
298	if( v.hi >= 0. && v.lo <= 0. ){
299	printf( "divisor interval contains 0.\en" );
300	return( 1 );
301	}
302	return( 0 );
303	}
304
305	INTERVAL vdiv( a, b, v ) double a, b; INTERVAL v; {
306	return( hilo( a/v.hi, a/v.lo, b/v.hi, b/v.lo ) );
307	}
308	.DE
309	.bp