Commit | Line | Data |
---|---|---|
3e1b6cd1 KM |
1 | /* |
2 | * Copyright (c) 1979 Regents of the University of California. | |
3 | * All rights reserved. The Berkeley software License Agreement | |
4 | * specifies the terms and conditions for redistribution. | |
5 | */ | |
6 | ||
7 | #ifndef lint | |
8 | static char sccsid[] = "@(#)ey2.c 5.1 (Berkeley) %G%"; | |
9 | #endif not lint | |
10 | ||
11 | # include "ey.h" | |
12 | # define IDENTIFIER 257 | |
13 | # define MARK 258 | |
14 | # define TERM 259 | |
15 | # define LEFT 260 | |
16 | # define BINARY 261 | |
17 | # define RIGHT 262 | |
18 | # define PREC 263 | |
19 | # define LCURLY 264 | |
20 | # define C_IDENTIFIER 265 /* name followed by colon */ | |
21 | # define NUMBER 266 | |
22 | ||
23 | FILE *copen(); | |
24 | ||
25 | setup(argc,argv) int argc; char *argv[]; | |
26 | { int i,j,lev,t; | |
27 | int c; | |
28 | ||
29 | foutput = stdout; | |
30 | i = 1; | |
31 | while( argc >= 2 && argv[1][0] == '-' ) { | |
32 | while( *++(argv[1]) ){ | |
33 | switch( *argv[1] ){ | |
34 | case 'v': | |
35 | case 'V': | |
36 | foutput = copen("y.output", 'w' ); | |
37 | if( foutput == 0 ) error( "cannot open y.output"); | |
38 | continue; | |
39 | case 'o': | |
40 | case 'O': | |
41 | oflag = 1; | |
42 | continue; | |
43 | case 'r': | |
44 | case 'R': | |
45 | oflag = 1; | |
46 | rflag = 1; | |
47 | continue; | |
48 | default: error( "illegal option: %c", *argv[1]); | |
49 | } | |
50 | } | |
51 | argv++; | |
52 | argc--; | |
53 | } | |
54 | ||
55 | ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' ); | |
56 | if( ftable==0 ) error( "cannot open table file" ); | |
57 | if( argc > 1 ) { cin = copen( argv[1], 'r' ); | |
58 | if( cin == 0 ) error( "cannot open input" ); | |
59 | } | |
60 | settab(); | |
61 | fprintf( cout , "#\n"); | |
62 | ctokn = "$end"; | |
63 | defin(0); /* eof */ | |
64 | extval = 0400; /* beginning of assigned values */ | |
65 | ctokn = "error"; | |
66 | defin(0); | |
67 | ctokn = "$accept"; | |
68 | defin(1); | |
69 | mem=mem0; | |
70 | cnamp = cnames; | |
71 | lev=0; | |
72 | i=0; | |
73 | ||
74 | while( ( t = gettok() ) != EOF ) { | |
75 | switch( t ){ | |
76 | case IDENTIFIER: j = chfind(0); | |
77 | trmlev[j] = lev; | |
78 | continue; | |
79 | case ',': | |
80 | case ';': continue; | |
81 | case TERM: lev=0; continue; | |
82 | case LEFT: lev=(++i<<3)|01; continue; | |
83 | case BINARY: lev=(++i<<3)|02; continue; | |
84 | case RIGHT: lev=(++i<<3)|03; continue; | |
85 | case MARK: | |
86 | defout(); | |
87 | if( rflag ){ /* RATFOR */ | |
88 | fprintf( cout , "define yyerrok yyerrf = 0\n" ); | |
89 | fprintf( cout , "define yyclearin yychar = -1\n" ); | |
90 | fprintf( cout , "subroutine yyactr(yyprdn)\n"); | |
91 | fprintf( cout , "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" ); | |
92 | fprintf( cout , "common/yylcom/yychar,yyerrf,yydebu\n" ); | |
93 | fprintf( cout , "integer yychar, yyerrf, yydebu\n" ); | |
94 | fprintf( cout , "integer yyprdn,yyval,yylval,yypv,yyvalv\n" ); | |
95 | } | |
96 | else { | |
97 | fprintf( cout , "#define yyclearin yychar = -1\n" ); | |
98 | fprintf( cout , "#define yyerrok yyerrflag = 0\n" ); | |
99 | fprintf( cout , "extern int yychar, yyerrflag;\n" ); | |
100 | fprintf( cout , "\nint yyval 0;\nint *yypv;\nint yylval 0;"); | |
101 | fprintf( cout , "\nyyactr(__np__){\n"); | |
102 | } | |
103 | break; | |
104 | case LCURLY: defout(); | |
105 | cpycode(); | |
106 | continue; | |
107 | case NUMBER: | |
108 | trmset[j].value = numbval; | |
109 | if( j < ndefout && j>2 ) | |
110 | error("please define type # of %s earlier", trmset[j].name ); | |
111 | continue; | |
112 | default: error("bad precedence syntax, input %d", t ); | |
113 | } | |
114 | break; | |
115 | } | |
116 | prdptr[0]=mem; | |
117 | /* added production */ | |
118 | *mem++ = NTBASE; | |
119 | *mem++ = NTBASE+1; | |
120 | *mem++ = 1; | |
121 | *mem++ = 0; | |
122 | prdptr[1]=mem; | |
123 | i=0; | |
124 | ||
125 | /* i is 0 when a rule can begin, 1 otherwise */ | |
126 | ||
127 | for(;;) switch( t=gettok() ) { | |
128 | case C_IDENTIFIER: if( mem == prdptr[1] ) { /* first time */ | |
129 | if( rflag ){ | |
130 | fprintf( cout , "goto 1000\n" ); | |
131 | } | |
132 | else fprintf( cout , "\nswitch(__np__){\n"); | |
133 | } | |
134 | if( i != 0 ) error( "previous rule not terminated" ); | |
135 | *mem = chfind(1); | |
136 | if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" ); | |
137 | i=1; | |
138 | ++mem; | |
139 | continue; | |
140 | case IDENTIFIER: | |
141 | *mem=chfind(1); | |
142 | if(*mem < NTBASE)levprd[nprod]=trmlev[*mem]; | |
143 | mem++; | |
144 | if(i==0) error("missing :"); | |
145 | continue; | |
146 | case '=': levprd[nprod] |= 04; | |
147 | if( i==0 ) error("semicolon preceeds action"); | |
148 | fprintf( cout , rflag?"\n%d ":"\ncase %d:", nprod ); | |
149 | cpyact(); | |
150 | fprintf( cout , rflag ? " return" : " break;" ); | |
151 | case '|': | |
152 | case ';': if(i){ | |
153 | *mem++ = -nprod; | |
154 | prdptr[++nprod] = mem; | |
155 | levprd[nprod]=0; | |
156 | i=0;} | |
157 | if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];} | |
158 | continue; | |
159 | case 0: /* End Of File */ | |
160 | case EOF: | |
161 | case MARK: if( i != 0 ) error( "rule not terminated before %%%% or EOF" ); | |
162 | settab(); | |
163 | finact(); | |
164 | /* copy the programs which follow the rules */ | |
165 | if( t == MARK ){ | |
166 | while (( c=fgetc( cin)) != EOF ) fputc(c,cout); | |
167 | } | |
168 | return; | |
169 | case PREC: | |
170 | if( i==0 ) error( "%%prec must appear inside rule" ); | |
171 | if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" ); | |
172 | j=chfind(2); | |
173 | if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name); | |
174 | levprd[nprod]=trmlev[j]; | |
175 | continue; | |
176 | case LCURLY: | |
177 | if( i!=0 ) error( "%%{ appears within a rule" ); | |
178 | cpycode(); | |
179 | continue; | |
180 | default: error( "syntax error, input %d", t ); | |
181 | } | |
182 | } | |
183 | ||
184 | finact(){ | |
185 | /* finish action routine */ | |
186 | register i; | |
187 | ||
188 | if( rflag ){ | |
189 | ||
190 | fprintf( cout , "\n1000 goto(" ); | |
191 | for( i=1; i<nprod; ++i ){ | |
192 | fprintf( cout , "%d,", (levprd[i]&04)==0?999:i ); | |
193 | } | |
194 | fprintf( cout , "999),yyprdn\n" ); | |
195 | fprintf( cout , "999 return\nend\n" ); | |
196 | fprintf( cout , "define YYERRCODE %d\n", trmset[2].value ); | |
197 | } | |
198 | else { | |
199 | fprintf( cout , "\n}\n}\n" ); | |
200 | fprintf( cout , "int yyerrval %d;\n", trmset[2].value ); | |
201 | } | |
202 | } | |
203 | defin(t) { | |
204 | /* define ctokn to be a terminal if t=0 | |
205 | or a nonterminal if t=1 */ | |
206 | char *cp,*p; | |
207 | int c; | |
208 | ||
209 | ||
210 | if (t) { | |
211 | if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim); | |
212 | nontrst[nnonter].name = ctokn; | |
213 | return( NTBASE + nnonter ); | |
214 | } | |
215 | else { | |
216 | if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim ); | |
217 | trmset[nterms].name = ctokn; | |
218 | if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */ | |
219 | trmset[nterms].value = ctokn[1]; | |
220 | else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */ | |
221 | if( ctokn[3] == '\0' ){ /* single character escape sequence */ | |
222 | switch ( ctokn[2] ){ | |
223 | /* character which is escaped */ | |
224 | case 'n': trmset[nterms].value = '\n'; break; | |
225 | case 'r': trmset[nterms].value = '\r'; break; | |
226 | case 'b': trmset[nterms].value = '\b'; break; | |
227 | case 't': trmset[nterms].value = '\t'; break; | |
228 | case '\'': trmset[nterms].value = '\''; break; | |
229 | case '"': trmset[nterms].value = '"'; break; | |
230 | case '\\': trmset[nterms].value = '\\'; break; | |
231 | default: error( "invalid escape" ); | |
232 | } | |
233 | } | |
234 | else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */ | |
235 | if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' || | |
236 | ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" ); | |
237 | trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0'; | |
238 | if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" ); | |
239 | } | |
240 | } | |
241 | else { | |
242 | trmset[nterms].value = extval++; | |
243 | ||
244 | } | |
245 | trmlev[nterms] = 0; | |
246 | return( nterms ); | |
247 | } | |
248 | } | |
249 | ||
250 | defout(){ /* write out the defines (at the end of the declaration section) */ | |
251 | ||
252 | _REGISTER int i, c; | |
253 | _REGISTER char *cp; | |
254 | ||
255 | for( i=ndefout; i<=nterms; ++i ){ | |
256 | ||
257 | cp = trmset[i].name; | |
258 | if( *cp == ' ' ) ++cp; /* literals */ | |
259 | ||
260 | for( ; (c= *cp)!='\0'; ++cp ){ | |
261 | ||
262 | if( c>='a' && c<='z' || | |
263 | c>='A' && c<='Z' || | |
264 | c>='0' && c<='9' || | |
265 | c=='_' ) ; /* VOID */ | |
266 | else goto nodef; | |
267 | } | |
268 | ||
269 | /* define it */ | |
270 | ||
271 | fprintf( cout , "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value ); | |
272 | ||
273 | nodef: ; | |
274 | } | |
275 | ||
276 | ndefout = nterms+1; | |
277 | ||
278 | } | |
279 | ||
280 | chstash( c ){ | |
281 | /* put character away into cnames */ | |
282 | if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" ); | |
283 | else *cnamp++ = c; | |
284 | } | |
285 | ||
286 | int gettok() { | |
287 | int j, base; | |
288 | static int peekline; /* number of '\n' seen in lookahead */ | |
289 | auto int c, match, reserve; | |
290 | ||
291 | begin: | |
292 | reserve = 0; | |
293 | if( peekc>=0 ) { | |
294 | c = peekc; | |
295 | lineno += peekline; | |
296 | peekc = -1; | |
297 | peekline = 0; | |
298 | } | |
299 | else c = fgetc( cin); | |
300 | while( c==' ' || c=='\n' || c=='\t' || c == '\014'){ | |
301 | if( c == '\n' ) ++lineno; | |
302 | c=fgetc( cin); | |
303 | } | |
304 | if (c=='/') | |
305 | {if (fgetc( cin)!='*')error("illegal /"); | |
306 | c=fgetc( cin); | |
307 | while(c != EOF) { | |
308 | if( c == '\n' ) ++lineno; | |
309 | if (c=='*') | |
310 | {if((c=fgetc( cin))=='/')break;} | |
311 | else c=fgetc( cin);} | |
312 | if (!c) return(0); | |
313 | goto begin;} | |
314 | j=0; | |
315 | switch(c){ | |
316 | case '"': | |
317 | case '\'': match = c; | |
318 | ctokn = cnamp; | |
319 | chstash( ' ' ); | |
320 | while(1){ | |
321 | c = fgetc( cin); | |
322 | if( c == '\n' || c == '\0' ) | |
323 | error("illegal or missing ' or \""); | |
324 | if( c == '\\' ){ | |
325 | c = fgetc( cin); | |
326 | chstash( '\\' ); | |
327 | } | |
328 | else if( c == match ) break; | |
329 | chstash( c ); | |
330 | } | |
331 | break; | |
332 | case '%': | |
333 | case '\\': switch(c=fgetc( cin)) | |
334 | {case '0': return(TERM); | |
335 | case '<': return(LEFT); | |
336 | case '2': return(BINARY); | |
337 | case '>': return(RIGHT); | |
338 | case '%': | |
339 | case '\\': return(MARK); | |
340 | case '=': return(PREC); | |
341 | case '{': return(LCURLY); | |
342 | default: reserve = 1; | |
343 | } | |
344 | default: if( c >= '0' && c <= '9' ){ /* number */ | |
345 | numbval = c-'0' ; | |
346 | base = (c=='0') ? 8 : 10 ; | |
347 | for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){ | |
348 | numbval = numbval*base + c - '0'; | |
349 | } | |
350 | peekc = c; | |
351 | return(NUMBER); | |
352 | } | |
353 | else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){ | |
354 | ctokn = cnamp; | |
355 | while( (c>='a'&&c<='z') || | |
356 | (c>='A'&&c<='Z') || | |
357 | (c>='0'&&c<='9') || | |
358 | c=='_' || c=='.' || c=='$' ) { | |
359 | chstash( c ); | |
360 | if( peekc>=0 ) { c = peekc; peekc = -1; } | |
361 | else c = fgetc( cin); | |
362 | } | |
363 | } | |
364 | else return(c); | |
365 | ||
366 | peekc=c; | |
367 | } | |
368 | chstash( '\0' ); | |
369 | ||
370 | if( reserve ){ /* find a reserved word */ | |
371 | if( compare("term")) return( TERM ); | |
372 | if( compare("TERM")) return( TERM ); | |
373 | if( compare("token")) return( TERM ); | |
374 | if( compare("TOKEN")) return( TERM ); | |
375 | if( compare("left")) return( LEFT ); | |
376 | if( compare("LEFT")) return( LEFT ); | |
377 | if( compare("nonassoc")) return( BINARY ); | |
378 | if( compare("NONASSOC")) return( BINARY ); | |
379 | if( compare("binary")) return( BINARY ); | |
380 | if( compare("BINARY")) return( BINARY ); | |
381 | if( compare("right")) return( RIGHT ); | |
382 | if( compare("RIGHT")) return( RIGHT ); | |
383 | if( compare("prec")) return( PREC ); | |
384 | if( compare("PREC")) return( PREC ); | |
385 | error("invalid escape, or illegal reserved word: %s", ctokn ); | |
386 | } | |
387 | ||
388 | /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */ | |
389 | ||
390 | look: | |
391 | while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' ) | |
392 | { | |
393 | if( peekc == '\n' ) ++peekline; | |
394 | peekc = fgetc( cin); | |
395 | } | |
396 | ||
397 | if( peekc != ':' ) return( IDENTIFIER ); | |
398 | peekc = -1; | |
399 | lineno += peekline; | |
400 | peekline = 0; | |
401 | return( C_IDENTIFIER ); | |
402 | } | |
403 | chfind(t) | |
404 | ||
405 | { int i,j; | |
406 | ||
407 | if (ctokn[0]==' ')t=0; | |
408 | for(i=1;i<=nterms;i++) | |
409 | if(compare(trmset[i].name)){ | |
410 | cnamp = ctokn; | |
411 | return( i ); | |
412 | } | |
413 | for(i=1;i<=nnonter;i++) | |
414 | if(compare(nontrst[i].name)) { | |
415 | cnamp = ctokn; | |
416 | return( i+NTBASE ); | |
417 | } | |
418 | /* cannot find name */ | |
419 | if( t>1 && ctokn[0] != ' ' ) | |
420 | error( "%s should have been defined earlier", ctokn ); | |
421 | return( defin( t ) ); | |
422 | } | |
423 | ||
424 | cpycode(){ /* copies code between \{ and \} */ | |
425 | ||
426 | int c; | |
427 | c = fgetc( cin); | |
428 | if( c == '\n' ) { | |
429 | c = fgetc( cin); | |
430 | lineno++; | |
431 | } | |
432 | while( c != EOF ){ | |
433 | if( c=='\\' ) | |
434 | if( (c=fgetc( cin)) == '}' ) return; | |
435 | else fputc('\\',cout); | |
436 | if( c=='%' ) | |
437 | if( (c=fgetc( cin)) == '}' ) return; | |
438 | else fputc('%',cout); | |
439 | fputc( c, cout ); | |
440 | if( c == '\n' ) ++lineno; | |
441 | c = fgetc( cin); | |
442 | } | |
443 | error("eof before %%}"); | |
444 | } | |
445 | ||
446 | cpyact(){ /* copy C action to the next ; or closing } */ | |
447 | int brac, c, match, *i, j, s; | |
448 | ||
449 | brac = 0; | |
450 | ||
451 | loop: | |
452 | c = fgetc( cin); | |
453 | swt: | |
454 | switch( c ){ | |
455 | ||
456 | case ';': | |
457 | if( brac == 0 ){ | |
458 | fputc( c, cout ); | |
459 | return; | |
460 | } | |
461 | goto lcopy; | |
462 | ||
463 | case '{': | |
464 | brac++; | |
465 | goto lcopy; | |
466 | ||
467 | case '$': | |
468 | s = 1; | |
469 | c = fgetc( cin); | |
470 | if( c == '$' ){ | |
471 | fprintf( cout , "yyval"); | |
472 | goto loop; | |
473 | } | |
474 | if( c == '-' ){ | |
475 | s = -s; | |
476 | c = fgetc( cin); | |
477 | } | |
478 | if( c>='0' && c <= '9' ){ | |
479 | j=0; | |
480 | while( c>='0' && c<= '9' ){ | |
481 | j= j*10+c-'0'; | |
482 | c = fgetc( cin); | |
483 | } | |
484 | if( rflag ) fprintf( cout , "yyvalv(yypv%c%d)", s==1?'+':'-', j ); | |
485 | else fprintf( cout , "yypv[%d]", s*j ); | |
486 | goto swt; | |
487 | } | |
488 | fputc( '$' , cout); | |
489 | if( s<0 ) fputc('-', cout); | |
490 | goto swt; | |
491 | ||
492 | case '}': | |
493 | brac--; | |
494 | if( brac == 0 ){ | |
495 | fputc( c , cout); | |
496 | return; | |
497 | } | |
498 | goto lcopy; | |
499 | ||
500 | case '/': /* look for comments */ | |
501 | fputc( c ,cout); | |
502 | c = fgetc( cin); | |
503 | if( c != '*' ) goto swt; | |
504 | ||
505 | /* it really is a comment */ | |
506 | ||
507 | fputc( c , cout); | |
508 | while( (c=fgetc( cin)) != EOF ){ | |
509 | if( c=='*' ){ | |
510 | fputc( c , cout); | |
511 | if( (c=fgetc( cin)) == '/' ) goto lcopy; | |
512 | } | |
513 | fputc( c , cout); | |
514 | } | |
515 | error( "EOF inside comment" ); | |
516 | ||
517 | case '\'': /* character constant */ | |
518 | match = '\''; | |
519 | goto string; | |
520 | ||
521 | case '"': /* character string */ | |
522 | match = '"'; | |
523 | ||
524 | string: | |
525 | ||
526 | fputc( c , cout); | |
527 | while( (c=fgetc( cin)) != EOF ){ | |
528 | ||
529 | if( c=='\\' ){ | |
530 | fputc( c , cout); | |
531 | c=fgetc( cin); | |
532 | } | |
533 | else if( c==match ) goto lcopy; | |
534 | fputc( c , cout); | |
535 | } | |
536 | error( "EOF in string or character constant" ); | |
537 | ||
538 | case '\0': | |
539 | error("action does not terminate"); | |
540 | case '\n': ++lineno; | |
541 | goto lcopy; | |
542 | ||
543 | } | |
544 | ||
545 | lcopy: | |
546 | fputc( c , cout); | |
547 | goto loop; | |
548 | } |