Research V7 development
[unix-history] / usr / src / cmd / lex / parser.y
CommitLineData
c81a83b8
ML
1%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
2%left SCON '/' NEWE
3%left '|'
4%left '$' '^'
5%left CHAR CCL NCCL '(' '.' STR NULLS
6%left ITER
7%left CAT
8%left '*' '+' '?'
9
10%{
11# include "ldefs.c"
12%}
13%%
14%{
15int i;
16int j,k;
17int g;
18char *p;
19%}
20acc : lexinput
21 ={
22# ifdef DEBUG
23 if(debug) sect2dump();
24# endif
25 }
26 ;
27lexinput: defns delim prods end
28 | defns delim end
29 ={
30 if(!funcflag)phead2();
31 funcflag = TRUE;
32 }
33 | error
34 ={
35# ifdef DEBUG
36 if(debug) {
37 sect1dump();
38 sect2dump();
39 }
40# endif
41 }
42 ;
43end: delim | ;
44defns: defns STR STR
45 ={ scopy($2,dp);
46 def[dptr] = dp;
47 dp =+ slength($2) + 1;
48 scopy($3,dp);
49 subs[dptr++] = dp;
50 if(dptr >= DEFSIZE)
51 error("Too many definitions");
52 dp =+ slength($3) + 1;
53 if(dp >= dchar+DEFCHAR)
54 error("Definitions too long");
55 subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
56 }
57 |
58 ;
59delim: DELIM
60 ={
61# ifdef DEBUG
62 if(sect == DEFSECTION && debug) sect1dump();
63# endif
64 sect++;
65 }
66 ;
67prods: prods pr
68 ={ $$ = mn2(RNEWE,$1,$2);
69 }
70 | pr
71 ={ $$ = $1;}
72 ;
73pr: r NEWE
74 ={
75 if(divflg == TRUE)
76 i = mn1(S1FINAL,casecount);
77 else i = mn1(FINAL,casecount);
78 $$ = mn2(RCAT,$1,i);
79 divflg = FALSE;
80 casecount++;
81 }
82 | error NEWE
83 ={
84# ifdef DEBUG
85 if(debug) sect2dump();
86# endif
87 }
88r: CHAR
89 ={ $$ = mn0($1); }
90 | STR
91 ={
92 p = $1;
93 i = mn0(*p++);
94 while(*p)
95 i = mn2(RSTR,i,*p++);
96 $$ = i;
97 }
98 | '.'
99 ={ symbol['\n'] = 0;
100 if(psave == FALSE){
101 p = ccptr;
102 psave = ccptr;
103 for(i=1;i<'\n';i++){
104 symbol[i] = 1;
105 *ccptr++ = i;
106 }
107 for(i='\n'+1;i<NCH;i++){
108 symbol[i] = 1;
109 *ccptr++ = i;
110 }
111 *ccptr++ = 0;
112 if(ccptr > ccl+CCLSIZE)
113 error("Too many large character classes");
114 }
115 else
116 p = psave;
117 $$ = mn1(RCCL,p);
118 cclinter(1);
119 }
120 | CCL
121 ={ $$ = mn1(RCCL,$1); }
122 | NCCL
123 ={ $$ = mn1(RNCCL,$1); }
124 | r '*'
125 ={ $$ = mn1(STAR,$1); }
126 | r '+'
127 ={ $$ = mn1(PLUS,$1); }
128 | r '?'
129 ={ $$ = mn1(QUEST,$1); }
130 | r '|' r
131 ={ $$ = mn2(BAR,$1,$3); }
132 | r r %prec CAT
133 ={ $$ = mn2(RCAT,$1,$2); }
134 | r '/' r
135 ={ if(!divflg){
136 j = mn1(S2FINAL,-casecount);
137 i = mn2(RCAT,$1,j);
138 $$ = mn2(DIV,i,$3);
139 }
140 else {
141 $$ = mn2(RCAT,$1,$3);
142 warning("Extra slash removed");
143 }
144 divflg = TRUE;
145 }
146 | r ITER ',' ITER '}'
147 ={ if($2 > $4){
148 i = $2;
149 $2 = $4;
150 $4 = i;
151 }
152 if($4 <= 0)
153 warning("Iteration range must be positive");
154 else {
155 j = $1;
156 for(k = 2; k<=$2;k++)
157 j = mn2(RCAT,j,dupl($1));
158 for(i = $2+1; i<=$4; i++){
159 g = dupl($1);
160 for(k=2;k<=i;k++)
161 g = mn2(RCAT,g,dupl($1));
162 j = mn2(BAR,j,g);
163 }
164 $$ = j;
165 }
166 }
167 | r ITER '}'
168 ={
169 if($2 < 0)warning("Can't have negative iteration");
170 else if($2 == 0) $$ = mn0(RNULLS);
171 else {
172 j = $1;
173 for(k=2;k<=$2;k++)
174 j = mn2(RCAT,j,dupl($1));
175 $$ = j;
176 }
177 }
178 | r ITER ',' '}'
179 ={
180 /* from n to infinity */
181 if($2 < 0)warning("Can't have negative iteration");
182 else if($2 == 0) $$ = mn1(STAR,$1);
183 else if($2 == 1)$$ = mn1(PLUS,$1);
184 else { /* >= 2 iterations minimum */
185 j = $1;
186 for(k=2;k<$2;k++)
187 j = mn2(RCAT,j,dupl($1));
188 k = mn1(PLUS,dupl($1));
189 $$ = mn2(RCAT,j,k);
190 }
191 }
192 | SCON r
193 ={ $$ = mn2(RSCON,$2,$1); }
194 | '^' r
195 ={ $$ = mn1(CARAT,$2); }
196 | r '$'
197 ={ i = mn0('\n');
198 if(!divflg){
199 j = mn1(S2FINAL,-casecount);
200 k = mn2(RCAT,$1,j);
201 $$ = mn2(DIV,k,i);
202 }
203 else $$ = mn2(RCAT,$1,i);
204 divflg = TRUE;
205 }
206 | '(' r ')'
207 ={ $$ = $2; }
208 | NULLS
209 ={ $$ = mn0(RNULLS); }
210 ;
211%%
212yylex(){
213 register char *p;
214 register int c, i;
215 char *t, *xp;
216 int n, j, k, x;
217 static int sectbegin;
218 static char token[TOKENSIZE];
219 static int iter;
220
221# ifdef DEBUG
222 yylval = 0;
223# endif
224
225 if(sect == DEFSECTION) { /* definitions section */
226 while(!eof) {
227 if(prev == '\n'){ /* next char is at beginning of line */
228 getl(p=buf);
229 switch(*p){
230 case '%':
231 switch(c= *(p+1)){
232 case '%':
233 lgate();
234 if(!ratfor)fprintf(fout,"# ");
235 fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']);
236 if(!ratfor)fprintf(fout,"yylex(){\nint nstr; extern int yyprevious;\n");
237 sectbegin = TRUE;
238 i = treesize*(sizeof(*name)+sizeof(*left)+
239 sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
240 c = myalloc(i,1);
241 if(c == 0)
242 error("Too little core for parse tree");
243 p = c;
244 cfree(p,i,1);
245 name = myalloc(treesize,sizeof(*name));
246 left = myalloc(treesize,sizeof(*left));
247 right = myalloc(treesize,sizeof(*right));
248 nullstr = myalloc(treesize,sizeof(*nullstr));
249 parent = myalloc(treesize,sizeof(*parent));
250 if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
251 error("Too little core for parse tree");
252 return(freturn(DELIM));
253 case 'p': case 'P': /* has overridden number of positions */
254 while(*p && !digit(*p))p++;
255 maxpos = siconv(p);
256# ifdef DEBUG
257 if (debug) printf("positions (%%p) now %d\n",maxpos);
258# endif
259 if(report == 2)report = 1;
260 continue;
261 case 'n': case 'N': /* has overridden number of states */
262 while(*p && !digit(*p))p++;
263 nstates = siconv(p);
264# ifdef DEBUG
265 if(debug)printf( " no. states (%%n) now %d\n",nstates);
266# endif
267 if(report == 2)report = 1;
268 continue;
269 case 'e': case 'E': /* has overridden number of tree nodes */
270 while(*p && !digit(*p))p++;
271 treesize = siconv(p);
272# ifdef DEBUG
273 if (debug) printf("treesize (%%e) now %d\n",treesize);
274# endif
275 if(report == 2)report = 1;
276 continue;
277 case 'o': case 'O':
278 while (*p && !digit(*p))p++;
279 outsize = siconv(p);
280 if (report ==2) report=1;
281 continue;
282 case 'a': case 'A': /* has overridden number of transitions */
283 while(*p && !digit(*p))p++;
284 if(report == 2)report = 1;
285 ntrans = siconv(p);
286# ifdef DEBUG
287 if (debug)printf("N. trans (%%a) now %d\n",ntrans);
288# endif
289 continue;
290 case 'k': case 'K': /* overriden packed char classes */
291 while (*p && !digit(*p))p++;
292 if (report==2) report=1;
293 cfree(pchar, pchlen, sizeof(*pchar));
294 pchlen = siconv(p);
295# ifdef DEBUG
296 if (debug) printf( "Size classes (%%k) now %d\n",pchlen);
297# endif
298 pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
299 continue;
300 case 't': case 'T': /* character set specifier */
301 ZCH = atoi(p+2);
302 if (ZCH < NCH) ZCH = NCH;
303 if (ZCH > 2*NCH) error("ch table needs redeclaration");
304 chset = TRUE;
305 for(i = 0; i<ZCH; i++)
306 ctable[i] = 0;
307 while(getl(p) && scomp(p,"%T") != 0 && scomp(p,"%t") != 0){
308 if((n = siconv(p)) <= 0 || n > ZCH){
309 warning("Character value %d out of range",n);
310 continue;
311 }
312 while(!space(*p) && *p) p++;
313 while(space(*p)) p++;
314 t = p;
315 while(*t){
316 c = ctrans(&t);
317 if(ctable[c]){
318 if (printable(c))
319 warning("Character '%c' used twice",c);
320 else
321 warning("Character %o used twice",c);
322 }
323 else ctable[c] = n;
324 t++;
325 }
326 p = buf;
327 }
328 {
329 char chused[2*NCH]; int kr;
330 for(i=0; i<ZCH; i++)
331 chused[i]=0;
332 for(i=0; i<NCH; i++)
333 chused[ctable[i]]=1;
334 for(kr=i=1; i<NCH; i++)
335 if (ctable[i]==0)
336 {
337 while (chused[kr] == 0)
338 kr++;
339 ctable[i]=kr;
340 chused[kr]=1;
341 }
342 }
343 lgate();
344 continue;
345 case 'r': case 'R':
346 c = 'r';
347 case 'c': case 'C':
348 if(lgatflg)
349 error("Too late for language specifier");
350 ratfor = (c == 'r');
351 continue;
352 case '{':
353 lgate();
354 while(getl(p) && scomp(p,"%}") != 0)
355 fprintf(fout, "%s\n",p);
356 if(p[0] == '%') continue;
357 error("Premature eof");
358 case 's': case 'S': /* start conditions */
359 lgate();
360 while(*p && index(*p," \t,") < 0) p++;
361 n = TRUE;
362 while(n){
363 while(*p && index(*p," \t,") >= 0) p++;
364 t = p;
365 while(*p && index(*p," \t,") < 0)p++;
366 if(!*p) n = FALSE;
367 *p++ = 0;
368 if (*t == 0) continue;
369 i = sptr*2;
370 if(!ratfor)fprintf(fout,"# ");
371 fprintf(fout,"define %s %d\n",t,i);
372 scopy(t,sp);
373 sname[sptr++] = sp;
374 sname[sptr] = 0; /* required by lookup */
375 if(sptr >= STARTSIZE)
376 error("Too many start conditions");
377 sp =+ slength(sp) + 1;
378 if(sp >= schar+STARTCHAR)
379 error("Start conditions too long");
380 }
381 continue;
382 default:
383 warning("Invalid request %s",p);
384 continue;
385 } /* end of switch after seeing '%' */
386 case ' ': case '\t': /* must be code */
387 lgate();
388 fprintf(fout, "%s\n",p);
389 continue;
390 default: /* definition */
391 while(*p && !space(*p)) p++;
392 if(*p == 0)
393 continue;
394 prev = *p;
395 *p = 0;
396 bptr = p+1;
397 yylval = buf;
398 if(digit(buf[0]))
399 warning("Substitution strings may not begin with digits");
400 return(freturn(STR));
401 }
402 }
403 /* still sect 1, but prev != '\n' */
404 else {
405 p = bptr;
406 while(*p && space(*p)) p++;
407 if(*p == 0)
408 warning("No translation given - null string assumed");
409 scopy(p,token);
410 yylval = token;
411 prev = '\n';
412 return(freturn(STR));
413 }
414 }
415 /* end of section one processing */
416 }
417 else if(sect == RULESECTION){ /* rules and actions */
418 while(!eof){
419 switch(c=gch()){
420 case '\0':
421 return(freturn(0));
422 case '\n':
423 if(prev == '\n') continue;
424 x = NEWE;
425 break;
426 case ' ':
427 case '\t':
428 if(sectbegin == TRUE){
429 cpyact();
430 while((c=gch()) && c != '\n');
431 continue;
432 }
433 if(!funcflag)phead2();
434 funcflag = TRUE;
435 if(ratfor)fprintf(fout,"%d\n",30000+casecount);
436 else fprintf(fout,"case %d:\n",casecount);
437 if(cpyact()){
438 if(ratfor)fprintf(fout,"goto 30997\n");
439 else fprintf(fout,"break;\n");
440 }
441 while((c=gch()) && c != '\n');
442 if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
443 warning("Executable statements should occur right after %%");
444 continue;
445 }
446 x = NEWE;
447 break;
448 case '%':
449 if(prev != '\n') goto character;
450 if(peek == '{'){ /* included code */
451 getl(buf);
452 while(!eof && getl(buf) && scomp("%}",buf) != 0)
453 fprintf(fout,"%s\n",buf);
454 continue;
455 }
456 if(peek == '%'){
457 c = gch();
458 c = gch();
459 x = DELIM;
460 break;
461 }
462 goto character;
463 case '|':
464 if(peek == ' ' || peek == '\t' || peek == '\n'){
465 if(ratfor)fprintf(fout,"%d\n",30000+casecount++);
466 else fprintf(fout,"case %d:\n",casecount++);
467 continue;
468 }
469 x = '|';
470 break;
471 case '$':
472 if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
473 x = c;
474 break;
475 }
476 goto character;
477 case '^':
478 if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
479 x = c;
480 break;
481 case '?':
482 case '+':
483 case '.':
484 case '*':
485 case '(':
486 case ')':
487 case ',':
488 case '/':
489 x = c;
490 break;
491 case '}':
492 iter = FALSE;
493 x = c;
494 break;
495 case '{': /* either iteration or definition */
496 if(digit(c=gch())){ /* iteration */
497 iter = TRUE;
498 ieval:
499 i = 0;
500 while(digit(c)){
501 token[i++] = c;
502 c = gch();
503 }
504 token[i] = 0;
505 yylval = siconv(token);
506 munput('c',c);
507 x = ITER;
508 break;
509 }
510 else { /* definition */
511 i = 0;
512 while(c && c!='}'){
513 token[i++] = c;
514 c = gch();
515 }
516 token[i] = 0;
517 i = lookup(token,def);
518 if(i < 0)
519 warning("Definition %s not found",token);
520 else
521 munput('s',subs[i]);
522 continue;
523 }
524 case '<': /* start condition ? */
525 if(prev != '\n') /* not at line begin, not start */
526 goto character;
527 t = slptr;
528 do {
529 i = 0;
530 c = gch();
531 while(c != ',' && c && c != '>'){
532 token[i++] = c;
533 c = gch();
534 }
535 token[i] = 0;
536 if(i == 0)
537 goto character;
538 i = lookup(token,sname);
539 if(i < 0) {
540 warning("Undefined start condition %s",token);
541 continue;
542 }
543 *slptr++ = i+1;
544 } while(c && c != '>');
545 *slptr++ = 0;
546 /* check if previous value re-usable */
547 for (xp=slist; xp<t; )
548 {
549 if (strcmp(xp, t)==0)
550 break;
551 while (*xp++);
552 }
553 if (xp<t)
554 {
555 /* re-use previous pointer to string */
556 slptr=t;
557 t=xp;
558 }
559 if(slptr > slist+STARTSIZE) /* note not packed ! */
560 error("Too many start conditions used");
561 yylval = t;
562 x = SCON;
563 break;
564 case '"':
565 i = 0;
566 while((c=gch()) && c != '"' && c != '\n'){
567 if(c == '\\') c = usescape(c=gch());
568 token[i++] = c;
569 if(i > TOKENSIZE){
570 warning("String too long");
571 i = TOKENSIZE-1;
572 break;
573 }
574 }
575 if(c == '\n') {
576 yyline--;
577 warning("Non-terminated string");
578 yyline++;
579 }
580 token[i] = 0;
581 if(i == 0)x = NULLS;
582 else if(i == 1){
583 yylval = token[0];
584 x = CHAR;
585 }
586 else {
587 yylval = token;
588 x = STR;
589 }
590 break;
591 case '[':
592 for(i=1;i<NCH;i++) symbol[i] = 0;
593 x = CCL;
594 if((c = gch()) == '^'){
595 x = NCCL;
596 c = gch();
597 }
598 while(c != ']' && c){
599 if(c == '\\') c = usescape(c=gch());
600 symbol[c] = 1;
601 j = c;
602 if((c=gch()) == '-' && peek != ']'){ /* range specified */
603 c = gch();
604 if(c == '\\') c = usescape(c=gch());
605 k = c;
606 if(j > k) {
607 n = j;
608 j = k;
609 k = n;
610 }
611 if(!(('A' <= j && k <= 'Z') ||
612 ('a' <= j && k <= 'z') ||
613 ('0' <= j && k <= '9')))
614 warning("Non-portable Character Class");
615 for(n=j+1;n<=k;n++)
616 symbol[n] = 1; /* implementation dependent */
617 c = gch();
618 }
619 }
620 /* try to pack ccl's */
621 i = 0;
622 for(j=0;j<NCH;j++)
623 if(symbol[j])token[i++] = j;
624 token[i] = 0;
625 p = ccptr;
626 if(optim){
627 p = ccl;
628 while(p <ccptr && scomp(token,p) != 0)p++;
629 }
630 if(p < ccptr) /* found it */
631 yylval = p;
632 else {
633 yylval = ccptr;
634 scopy(token,ccptr);
635 ccptr =+ slength(token) + 1;
636 if(ccptr >= ccl+CCLSIZE)
637 error("Too many large character classes");
638 }
639 cclinter(x==CCL);
640 break;
641 case '\\':
642 c = usescape(c=gch());
643 default:
644 character:
645 if(iter){ /* second part of an iteration */
646 iter = FALSE;
647 if('0' <= c && c <= '9')
648 goto ieval;
649 }
650 if(alpha(peek)){
651 i = 0;
652 yylval = token;
653 token[i++] = c;
654 while(alpha(peek))
655 token[i++] = gch();
656 if(peek == '?' || peek == '*' || peek == '+')
657 munput('c',token[--i]);
658 token[i] = 0;
659 if(i == 1){
660 yylval = token[0];
661 x = CHAR;
662 }
663 else x = STR;
664 }
665 else {
666 yylval = c;
667 x = CHAR;
668 }
669 }
670 scon = FALSE;
671 if(x == SCON)scon = TRUE;
672 sectbegin = FALSE;
673 return(freturn(x));
674 }
675 }
676 /* section three */
677 ptail();
678# ifdef DEBUG
679 if(debug)
680 fprintf(fout,"\n/*this comes from section three - debug */\n");
681# endif
682 while(getl(buf) && !eof)
683 fprintf(fout,"%s\n",buf);
684 return(freturn(0));
685 }
686/* end of yylex */
687# ifdef DEBUG
688freturn(i)
689 int i; {
690 if(yydebug) {
691 printf("now return ");
692 if(i < NCH) allprint(i);
693 else printf("%d",i);
694 printf(" yylval = ");
695 switch(i){
696 case STR: case CCL: case NCCL:
697 strpt(yylval);
698 break;
699 case CHAR:
700 allprint(yylval);
701 break;
702 default:
703 printf("%d",yylval);
704 break;
705 }
706 putchar('\n');
707 }
708 return(i);
709 }
710# endif