From 337dae54280f373ee24a4c323385b555376280bc Mon Sep 17 00:00:00 2001 From: Tom London Date: Sun, 5 Nov 1978 23:40:20 -0500 Subject: [PATCH] Bell 32V development Work on file usr/src/cmd/yacc/y3.c Work on file usr/src/cmd/yacc/y2.c Work on file usr/src/cmd/yacc/dextern Work on file usr/src/cmd/yacc/y1.c Work on file usr/src/cmd/yacc/yaccdiffs Work on file usr/src/cmd/yacc/yaccnews Work on file usr/src/cmd/yacc/y4.c Work on file usr/src/cmd/yacc/yaccpar Co-Authored-By: John Reiser Synthesized-from: 32v --- usr/src/cmd/yacc/dextern | 259 +++++++++++ usr/src/cmd/yacc/y1.c | 653 ++++++++++++++++++++++++++++ usr/src/cmd/yacc/y2.c | 859 +++++++++++++++++++++++++++++++++++++ usr/src/cmd/yacc/y3.c | 404 +++++++++++++++++ usr/src/cmd/yacc/y4.c | 325 ++++++++++++++ usr/src/cmd/yacc/yaccdiffs | 198 +++++++++ usr/src/cmd/yacc/yaccnews | 149 +++++++ usr/src/cmd/yacc/yaccpar | 149 +++++++ 8 files changed, 2996 insertions(+) create mode 100644 usr/src/cmd/yacc/dextern create mode 100644 usr/src/cmd/yacc/y1.c create mode 100644 usr/src/cmd/yacc/y2.c create mode 100644 usr/src/cmd/yacc/y3.c create mode 100644 usr/src/cmd/yacc/y4.c create mode 100644 usr/src/cmd/yacc/yaccdiffs create mode 100644 usr/src/cmd/yacc/yaccnews create mode 100644 usr/src/cmd/yacc/yaccpar diff --git a/usr/src/cmd/yacc/dextern b/usr/src/cmd/yacc/dextern new file mode 100644 index 0000000000..dca3b2e975 --- /dev/null +++ b/usr/src/cmd/yacc/dextern @@ -0,0 +1,259 @@ +# include +# include +# include "files" + + /* MANIFEST CONSTANT DEFINITIONS */ + + /* base of nonterminal internal numbers */ +# define NTBASE 010000 + + /* internal codes for error and accept actions */ + +# define ERRCODE 8190 +# define ACCEPTCODE 8191 + + /* sizes and limits */ + +# ifdef HUGE +# define ACTSIZE 12000 +# define MEMSIZE 12000 +# define NSTATES 750 +# define NTERMS 127 +# define NPROD 600 +# define NNONTERM 300 +# define TEMPSIZE 1200 +# define CNAMSZ 5000 +# define LSETSIZE 600 +# define WSETSIZE 350 +# endif + +# ifdef MEDIUM +# define ACTSIZE 4000 +# define MEMSIZE 5200 +# define NSTATES 600 +# define NTERMS 127 +# define NPROD 400 +# define NNONTERM 200 +# define TEMPSIZE 800 +# define CNAMSZ 4000 +# define LSETSIZE 450 +# define WSETSIZE 250 +# endif + +# define NAMESIZE 50 +# define NTYPES 63 + +# ifdef WORD32 +# define TBITSET ((32+NTERMS)/32) + + /* bit packing macros (may be machine dependent) */ +# define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037))) +# define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037))) + + /* number of words needed to hold n+1 bits */ +# define NWORDS(n) (((n)+32)/32) + +# else + +# define TBITSET ((16+NTERMS)/16) + + /* bit packing macros (may be machine dependent) */ +# define BIT(a,i) ((a)[(i)>>4] & (1<<((i)&017))) +# define SETBIT(a,i) ((a)[(i)>>4] |= (1<<((i)&017))) + + /* number of words needed to hold n+1 bits */ +# define NWORDS(n) (((n)+16)/16) +# endif + + /* relationships which must hold: + TBITSET ints must hold NTERMS+1 bits... + WSETSIZE >= NNONTERM + LSETSIZE >= NNONTERM + TEMPSIZE >= NTERMS + NNONTERMs + 1 + TEMPSIZE >= NSTATES + */ + + /* associativities */ + +# define NOASC 0 /* no assoc. */ +# define LASC 1 /* left assoc. */ +# define RASC 2 /* right assoc. */ +# define BASC 3 /* binary assoc. */ + + /* flags for state generation */ + +# define DONE 0 +# define MUSTDO 1 +# define MUSTLOOKAHEAD 2 + + /* flags for a rule having an action, and being reduced */ + +# define ACTFLAG 04 +# define REDFLAG 010 + + /* output parser flags */ +# define YYFLAG1 (-1000) + + /* macros for getting associativity and precedence levels */ + +# define ASSOC(i) ((i)&03) +# define PLEVEL(i) (((i)>>4)&077) +# define TYPE(i) ((i>>10)&077) + + /* macros for setting associativity and precedence levels */ + +# define SETASC(i,j) i|=j +# define SETPLEV(i,j) i |= (j<<4) +# define SETTYPE(i,j) i |= (j<<10) + + /* looping macros */ + +# define TLOOP(i) for(i=1;i<=ntokens;++i) +# define NTLOOP(i) for(i=0;i<=nnonter;++i) +# define PLOOP(s,i) for(i=s;i0 ; ++p ) ; + p = prdptr[-*p]; + q = chcopy( sarr, nontrst[*p-NTBASE].name ); + q = chcopy( q, " : " ); + + for(;;){ + *q++ = ++p==pp ? '_' : ' '; + *q = '\0'; + if((i = *p) <= 0) break; + q = chcopy( q, symnam(i) ); + if( q> &sarr[ISIZE-30] ) error( "item too big" ); + } + + if( (i = *pp) < 0 ){ /* an item calling for a reduction */ + q = chcopy( q, " (" ); + sprintf( q, "%d)", -i ); + } + + return( sarr ); + } + +char *symnam(i){ /* return a pointer to the name of symbol i */ + char *cp; + + cp = (i>=NTBASE) ? nontrst[i-NTBASE].name : tokset[i].name ; + if( *cp == ' ' ) ++cp; + return( cp ); + } + +struct wset *zzcwp = wsets; +int zzgoent = 0; +int zzgobest = 0; +int zzacent = 0; +int zzexcp = 0; +int zzclose = 0; +int zzsrconf = 0; +int * zzmemsz = mem0; +int zzrrconf = 0; + +summary(){ /* output the summary on the tty */ + + if( foutput!=NULL ){ + fprintf( foutput, "\n%d/%d terminals, %d/%d nonterminals\n", ntokens, NTERMS, + nnonter, NNONTERM ); + fprintf( foutput, "%d/%d grammar rules, %d/%d states\n", nprod, NPROD, nstate, NSTATES ); + fprintf( foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", zzsrconf, zzrrconf ); + fprintf( foutput, "%d/%d working sets used\n", zzcwp-wsets, WSETSIZE ); + fprintf( foutput, "memory: states,etc. %d/%d, parser %d/%d\n", zzmemsz-mem0, MEMSIZE, + memp-amem, ACTSIZE ); + fprintf( foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE ); + fprintf( foutput, "%d extra closures\n", zzclose - 2*nstate ); + fprintf( foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp ); + fprintf( foutput, "%d goto entries\n", zzgoent ); + fprintf( foutput, "%d entries saved by goto default\n", zzgobest ); + } + if( zzsrconf!=0 || zzrrconf!=0 ){ + fprintf( stdout,"\nconflicts: "); + if( zzsrconf )fprintf( stdout, "%d shift/reduce" , zzsrconf ); + if( zzsrconf && zzrrconf )fprintf( stdout, ", " ); + if( zzrrconf )fprintf( stdout, "%d reduce/reduce" , zzrrconf ); + fprintf( stdout, "\n" ); + } + + fclose( ftemp ); + if( fdefine != NULL ) fclose( fdefine ); + } + +/* VARARGS1 */ +error(s,a1) char *s; { /* write out error comment */ + + ++nerrors; + fprintf( stderr, "\n fatal error: "); + fprintf( stderr, s,a1); + fprintf( stderr, ", line %d\n", lineno ); + if( !fatfl ) return; + summary(); + exit(1); + } + +aryfil( v, n, c ) int *v,n,c; { /* set elements 0 through n-1 to c */ + int i; + for( i=0; ilset; + if( pp == 0 ) fprintf( foutput, "\tNULL"); + else { + fprintf( foutput, " { " ); + TLOOP(j) { + if( BIT(pp,j) ) fprintf( foutput, "%s ", symnam(j) ); + } + fprintf( foutput, "}" ); + } + } + +cpres(){ /* compute an array with the beginnings of productions yielding given nonterminals + The array pres points to these lists */ + /* the array pyield has the lists: the total size is only NPROD+1 */ + register **pmem; + register c, j, i; + static int * pyield[NPROD]; + + pmem = pyield; + + NTLOOP(i){ + c = i+NTBASE; + pres[i] = pmem; + fatfl = 0; /* make undefined symbols nonfatal */ + PLOOP(0,j){ + if (*prdptr[j] == c) *pmem++ = prdptr[j]+1; + } + if(pres[i] == pmem){ + error("nonterminal %s not defined!", nontrst[i].name); + } + } + pres[i] = pmem; + fatfl = 1; + if( nerrors ){ + summary(); + exit(1); + } + if( pmem != &pyield[nprod] ) error( "internal Yacc error: pyield %d", pmem-&pyield[nprod] ); + } + +int indebug = 0; +cpfir() { + /* compute an array with the first of nonterminals */ + register *p, **s, i, **t, ch, changes; + + zzcwp = &wsets[nnonter]; + NTLOOP(i){ + aryfil( wsets[i].ws.lset, tbitset, 0 ); + t = pres[i+1]; + for( s=pres[i]; s 0 ; ++p ) { + if( ch < NTBASE ) { + SETBIT( wsets[i].ws.lset, ch ); + break; + } + else if( !pempty[ch-NTBASE] ) break; + } + } + } + + /* now, reflect transitivity */ + + changes = 1; + while( changes ){ + changes = 0; + NTLOOP(i){ + t = pres[i+1]; + for( s=pres[i]; s= 0; ++p ) { + changes |= setunion( wsets[i].ws.lset, wsets[ch].ws.lset ); + if( !pempty[ch] ) break; + } + } + } + } + + NTLOOP(i) pfirst[i] = flset( &wsets[i].ws ); + if( !indebug ) return; + if( (foutput!=NULL) ){ + NTLOOP(i) { + fprintf( foutput, "\n%s: ", nontrst[i].name ); + prlook( pfirst[i] ); + fprintf( foutput, " %d\n", pempty[i] ); + } + } + } + +state(c){ /* sorts last state,and sees if it equals earlier ones. returns state number */ + int size1,size2; + register i; + struct item *p1, *p2, *k, *l, *q1, *q2; + p1 = pstate[nstate]; + p2 = pstate[nstate+1]; + if(p1==p2) return(0); /* null state */ + /* sort the items */ + for(k=p2-1;k>p1;k--) { /* make k the biggest */ + for(l=k-1;l>=p1;--l)if( l->pitem > k->pitem ){ + int *s; + struct looksets *ss; + s = k->pitem; + k->pitem = l->pitem; + l->pitem = s; + ss = k->look; + k->look = l->look; + l->look = ss; + } + } + size1 = p2 - p1; /* size of state */ + + for( i= (c>=NTBASE)?ntstates[c-NTBASE]:tstates[c]; i != 0; i = mstates[i] ) { + /* get ith state */ + q1 = pstate[i]; + q2 = pstate[i+1]; + size2 = q2 - q1; + if (size1 != size2) continue; + k=p1; + for(l=q1;lpitem != k->pitem ) break; + ++k; + } + if (l != q2) continue; + /* found it */ + pstate[nstate+1] = pstate[nstate]; /* delete last state */ + /* fix up lookaheads */ + if( nolook ) return(i); + for( l=q1,k=p1; llook->lset[s]; + if( setunion( clset.lset, k->look->lset ) ) { + tystate[i] = MUSTDO; + /* register the new set */ + l->look = flset( &clset ); + } + } + return (i); + } + /* state is new */ + if( nolook ) error( "yacc state/nolook error" ); + pstate[nstate+2] = p2; + if(nstate+1 >= NSTATES) error("too many states" ); + if( c >= NTBASE ){ + mstates[ nstate ] = ntstates[ c-NTBASE ]; + ntstates[ c-NTBASE ] = nstate; + } + else { + mstates[ nstate ] = tstates[ c ]; + tstates[ c ] = nstate; + } + tystate[nstate]=MUSTDO; + return(nstate++); + } + +int pidebug = 0; /* debugging flag for putitem */ +putitem( ptr, lptr ) int *ptr; struct looksets *lptr; { + register struct item *j; + + if( pidebug && (foutput!=NULL) ) { + fprintf( foutput, "putitem(%s), state %d\n", writem(ptr), nstate ); + } + j = pstate[nstate+1]; + j->pitem = ptr; + if( !nolook ) j->look = flset( lptr ); + pstate[nstate+1] = ++j; + if( (int *)j > zzmemsz ){ + zzmemsz = (int *)j; + if( zzmemsz >= &mem0[MEMSIZE] ) error( "out of state space" ); + } + } + +cempty(){ /* mark nonterminals which derive the empty string */ + /* also, look for nonterminals which don't derive any token strings */ + +# define EMPTY 1 +# define WHOKNOWS 0 +# define OK 1 + + register i, *p; + + /* first, use the array pempty to detect productions that can never be reduced */ + /* set pempty to WHONOWS */ + aryfil( pempty, nnonter+1, WHOKNOWS ); + + /* now, look at productions, marking nonterminals which derive something */ + + more: + PLOOP(0,i){ + if( pempty[ *prdptr[i] - NTBASE ] ) continue; + for( p=prdptr[i]+1; *p>=0; ++p ){ + if( *p>=NTBASE && pempty[ *p-NTBASE ] == WHOKNOWS ) break; + } + if( *p < 0 ){ /* production can be derived */ + pempty[ *prdptr[i]-NTBASE ] = OK; + goto more; + } + } + + /* now, look at the nonterminals, to see if they are all OK */ + + NTLOOP(i){ + /* the added production rises or falls as the start symbol ... */ + if( i == 0 ) continue; + if( pempty[ i ] != OK ) { + fatfl = 0; + error( "nonterminal %s never derives any token string", nontrst[i].name ); + } + } + + if( nerrors ){ + summary(); + exit(1); + } + + /* now, compute the pempty array, to see which nonterminals derive the empty string */ + + /* set pempty to WHOKNOWS */ + + aryfil( pempty, nnonter+1, WHOKNOWS ); + + /* loop as long as we keep finding empty nonterminals */ + +again: + PLOOP(1,i){ + if( pempty[ *prdptr[i]-NTBASE ]==WHOKNOWS ){ /* not known to be empty */ + for( p=prdptr[i]+1; *p>=NTBASE && pempty[*p-NTBASE]==EMPTY ; ++p ) ; + if( *p < 0 ){ /* we have a nontrivially empty nonterminal */ + pempty[*prdptr[i]-NTBASE] = EMPTY; + goto again; /* got one ... try for another */ + } + } + } + + } + +int gsdebug = 0; +stagen(){ /* generate the states */ + + int i, j; + register c; + register struct wset *p, *q; + + /* initialize */ + + nstate = 0; + /* THIS IS FUNNY from the standpoint of portability */ + /* it represents the magic moment when the mem0 array, which has + /* been holding the productions, starts to hold item pointers, of a + /* different type... */ + /* someday, alloc should be used to allocate all this stuff... for now, we + /* accept that if pointers don't fit in integers, there is a problem... */ + + pstate[0] = pstate[1] = (struct item *)mem; + aryfil( clset.lset, tbitset, 0 ); + putitem( prdptr[0]+1, &clset ); + tystate[0] = MUSTDO; + nstate = 1; + pstate[2] = pstate[1]; + + aryfil( amem, ACTSIZE, 0 ); + + /* now, the main state generation loop */ + + more: + SLOOP(i){ + if( tystate[i] != MUSTDO ) continue; + tystate[i] = DONE; + aryfil( temp1, nnonter+1, 0 ); + /* take state i, close it, and do gotos */ + closure(i); + WSLOOP(wsets,p){ /* generate goto's */ + if( p->flag ) continue; + p->flag = 1; + c = *(p->pitem); + if( c <= 1 ) { + if( pstate[i+1]-pstate[i] <= p-wsets ) tystate[i] = MUSTLOOKAHEAD; + continue; + } + /* do a goto on c */ + WSLOOP(p,q){ + if( c == *(q->pitem) ){ /* this item contributes to the goto */ + putitem( q->pitem + 1, &q->ws ); + q->flag = 1; + } + } + if( c < NTBASE ) { + state(c); /* register new state */ + } + else { + temp1[c-NTBASE] = state(c); + } + } + if( gsdebug && (foutput!=NULL) ){ + fprintf( foutput, "%d: ", i ); + NTLOOP(j) { + if( temp1[j] ) fprintf( foutput, "%s %d, ", nontrst[j].name, temp1[j] ); + } + fprintf( foutput, "\n"); + } + indgo[i] = apack( &temp1[1], nnonter-1 ) - 1; + goto more; /* we have done one goto; do some more */ + } + /* no more to do... stop */ + } + +int cldebug = 0; /* debugging flag for closure */ +closure(i){ /* generate the closure of state i */ + + int c, ch, work, k; + register struct wset *u, *v; + int *pi; + int **s, **t; + struct item *q; + register struct item *p; + + ++zzclose; + + /* first, copy kernel of state i to wsets */ + + cwp = wsets; + ITMLOOP(i,p,q){ + cwp->pitem = p->pitem; + cwp->flag = 1; /* this item must get closed */ + SETLOOP(k) cwp->ws.lset[k] = p->look->lset[k]; + WSBUMP(cwp); + } + + /* now, go through the loop, closing each item */ + + work = 1; + while( work ){ + work = 0; + WSLOOP(wsets,u){ + + if( u->flag == 0 ) continue; + c = *(u->pitem); /* dot is before c */ + + if( c < NTBASE ){ + u->flag = 0; + continue; /* only interesting case is where . is before nonterminal */ + } + + /* compute the lookahead */ + aryfil( clset.lset, tbitset, 0 ); + + /* find items involving c */ + + WSLOOP(u,v){ + if( v->flag == 1 && *(pi=v->pitem) == c ){ + v->flag = 0; + if( nolook ) continue; + while( (ch= *++pi)>0 ){ + if( ch < NTBASE ){ /* terminal symbol */ + SETBIT( clset.lset, ch ); + break; + } + /* nonterminal symbol */ + setunion( clset.lset, pfirst[ch-NTBASE]->lset ); + if( !pempty[ch-NTBASE] ) break; + } + if( ch<=0 ) setunion( clset.lset, v->ws.lset ); + } + } + + /* now loop over productions derived from c */ + + c -= NTBASE; /* c is now nonterminal number */ + + t = pres[c+1]; + for( s=pres[c]; spitem == *s ){ /* yes, it is there */ + if( nolook ) goto nexts; + if( setunion( v->ws.lset, clset.lset ) ) v->flag = work = 1; + goto nexts; + } + } + + /* not there; make a new entry */ + if( cwp-wsets+1 >= WSETSIZE ) error( "working set overflow" ); + cwp->pitem = *s; + cwp->flag = 1; + if( !nolook ){ + work = 1; + SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; + } + WSBUMP(cwp); + + nexts: ; + } + + } + } + + /* have computed closure; flags are reset; return */ + + if( cwp > zzcwp ) zzcwp = cwp; + if( cldebug && (foutput!=NULL) ){ + fprintf( foutput, "\nState %d, nolook = %d\n", i, nolook ); + WSLOOP(wsets,u){ + if( u->flag ) fprintf( foutput, "flag set!\n"); + u->flag = 0; + fprintf( foutput, "\t%s", writem(u->pitem)); + prlook( &u->ws ); + fprintf( foutput, "\n" ); + } + } + } + +struct looksets *flset( p ) struct looksets *p; { + /* decide if the lookahead set pointed to by p is known */ + /* return pointer to a perminent location for the set */ + + register struct looksets *q; + int j, *w; + register *u, *v; + + for( q = &lkst[nlset]; q-- > lkst; ){ + u = p->lset; + v = q->lset; + w = & v[tbitset]; + while( v= LSETSIZE )error("too many lookahead sets" ); + SETLOOP(j){ + q->lset[j] = p->lset[j]; + } + return( q ); + } diff --git a/usr/src/cmd/yacc/y2.c b/usr/src/cmd/yacc/y2.c new file mode 100644 index 0000000000..a625896a5f --- /dev/null +++ b/usr/src/cmd/yacc/y2.c @@ -0,0 +1,859 @@ +# include "dextern" +# define IDENTIFIER 257 +# define MARK 258 +# define TERM 259 +# define LEFT 260 +# define RIGHT 261 +# define BINARY 262 +# define PREC 263 +# define LCURLY 264 +# define C_IDENTIFIER 265 /* name followed by colon */ +# define NUMBER 266 +# define START 267 +# define TYPEDEF 268 +# define TYPENAME 269 +# define UNION 270 +# define ENDFILE 0 + + /* communication variables between various I/O routines */ + +char *infile; /* input file name */ +int numbval; /* value of an input number */ +char tokname[NAMESIZE]; /* input token name */ + + /* storage of names */ + +char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */ +int cnamsz = CNAMSZ; /* size of cnames */ +char * cnamp = cnames; /* place where next name is to be put in */ +int ndefout = 3; /* number of defined symbols output */ + + /* storage of types */ +int ntypes; /* number of types defined */ +char * typeset[NTYPES]; /* pointers to type tags */ + + /* symbol tables for tokens and nonterminals */ + +int ntokens = 0; +struct toksymb tokset[NTERMS]; +int toklev[NTERMS]; +int nnonter = -1; +struct ntsymb nontrst[NNONTERM]; +int start; /* start symbol */ + + /* assigned token type values */ +int extval = 0; + + /* input and output file descriptors */ + +FILE * finput; /* yacc input file */ +FILE * faction; /* file for saving actions */ +FILE * fdefine; /* file for # defines */ +FILE * ftable; /* y.tab.c file */ +FILE * ftemp; /* tempfile to pass 2 */ +FILE * foutput; /* y.output file */ + + /* storage for grammar rules */ + +int mem0[MEMSIZE] ; /* production storage */ +int *mem = mem0; +int nprod= 1; /* number of productions */ +int *prdptr[NPROD]; /* pointers to descriptions of productions */ +int levprd[NPROD] ; /* precedence levels for the productions */ + + +setup(argc,argv) int argc; char *argv[]; +{ int i,j,lev,t, ty; + int c; + int *p; + char actname[8]; + + foutput = NULL; + fdefine = NULL; + i = 1; + while( argc >= 2 && argv[1][0] == '-' ) { + while( *++(argv[1]) ){ + switch( *argv[1] ){ + case 'v': + case 'V': + foutput = fopen(FILEU, "w" ); + if( foutput == NULL ) error( "cannot open y.output" ); + continue; + case 'D': + case 'd': + fdefine = fopen( FILED, "w" ); + continue; + case 'o': + case 'O': + fprintf( stderr, "`o' flag now default in yacc\n" ); + continue; + + case 'r': + case 'R': + error( "Ratfor Yacc is dead: sorry...\n" ); + + default: + error( "illegal option: %c", *argv[1]); + } + } + argv++; + argc--; + } + + ftable = fopen( OFILE, "w" ); + if( ftable == NULL ) error( "cannot open table file" ); + + ftemp = fopen( TEMPNAME, "w" ); + faction = fopen( ACTNAME, "w" ); + if( ftemp==NULL || faction==NULL ) error( "cannot open temp file" ); + + if( argc < 2 || ((finput=fopen( infile=argv[1], "r" )) == NULL ) ){ + error( "cannot open input file" ); + } + + cnamp = cnames; + defin(0,"$end"); + extval = 0400; + defin(0,"error"); + defin(1,"$accept"); + mem=mem0; + lev = 0; + ty = 0; + i=0; + + /* sorry -- no yacc parser here..... + we must bootstrap somehow... */ + + for( t=gettok(); t!=MARK && t!= ENDFILE; ){ + switch( t ){ + + case ';': + t = gettok(); + break; + + case START: + if( (t=gettok()) != IDENTIFIER ){ + error( "bad %%start construction" ); + } + start = chfind(1,tokname); + t = gettok(); + continue; + + case TYPEDEF: + if( (t=gettok()) != TYPENAME ) error( "bad syntax in %%type" ); + ty = numbval; + for(;;){ + t = gettok(); + switch( t ){ + + case IDENTIFIER: + if( (t=chfind( 1, tokname ) ) < NTBASE ) { + j = TYPE( toklev[t] ); + if( j!= 0 && j != ty ){ + error( "type redeclaration of token %s", + tokset[t].name ); + } + else SETTYPE( toklev[t],ty); + } + else { + j = nontrst[t-NTBASE].tvalue; + if( j != 0 && j != ty ){ + error( "type redeclaration of nonterminal %s", + nontrst[t-NTBASE].name ); + } + else nontrst[t-NTBASE].tvalue = ty; + } + case ',': + continue; + + case ';': + t = gettok(); + break; + default: + break; + } + break; + } + continue; + + case UNION: + /* copy the union declaration to the output */ + cpyunion(); + t = gettok(); + continue; + + case LEFT: + case BINARY: + case RIGHT: + ++i; + case TERM: + lev = t-TERM; /* nonzero means new prec. and assoc. */ + ty = 0; + + /* get identifiers so defined */ + + t = gettok(); + if( t == TYPENAME ){ /* there is a type defined */ + ty = numbval; + t = gettok(); + } + + for(;;) { + switch( t ){ + + case ',': + t = gettok(); + continue; + + case ';': + break; + + case IDENTIFIER: + j = chfind(0,tokname); + if( lev ){ + if( ASSOC(toklev[j]) ) error( "redeclaration of precedence of %s", tokname ); + SETASC(toklev[j],lev); + SETPLEV(toklev[j],i); + } + if( ty ){ + if( TYPE(toklev[j]) ) error( "redeclaration of type of %s", tokname ); + SETTYPE(toklev[j],ty); + } + if( (t=gettok()) == NUMBER ){ + tokset[j].value = numbval; + if( j < ndefout && j>2 ){ + error( "please define type number of %s earlier", + tokset[j].name ); + } + t=gettok(); + } + continue; + + } + + break; + } + + continue; + + case LCURLY: + defout(); + cpycode(); + t = gettok(); + continue; + + default: + error( "syntax error" ); + + } + + } + + if( t == ENDFILE ){ + error( "unexpected EOF before %%" ); + } + + /* t is MARK */ + + defout(); + + fprintf( ftable, "#define yyclearin yychar = -1\n" ); + fprintf( ftable, "#define yyerrok yyerrflag = 0\n" ); + fprintf( ftable, "extern int yychar;\nextern short yyerrflag;\n" ); + fprintf( ftable, "#ifndef YYMAXDEPTH\n#define YYMAXDEPTH 150\n#endif\n" ); + if( !ntypes ) fprintf( ftable, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n" ); + fprintf( ftable, "YYSTYPE yylval, yyval;\n" ); + + prdptr[0]=mem; + /* added production */ + *mem++ = NTBASE; + *mem++ = start; /* if start is 0, we will overwrite with the lhs of the first rule */ + *mem++ = 1; + *mem++ = 0; + prdptr[1]=mem; + + while( (t=gettok()) == LCURLY ) cpycode(); + + if( t != C_IDENTIFIER ) error( "bad syntax on first rule" ); + + if( !start ) prdptr[0][1] = chfind(1,tokname); + + /* read rules */ + + while( t!=MARK && t!=ENDFILE ){ + + /* process a rule */ + + if( t == '|' ){ + *mem++ = *prdptr[nprod-1]; + } + else if( t == C_IDENTIFIER ){ + *mem = chfind(1,tokname); + if( *mem < NTBASE ) error( "token illegal on LHS of grammar rule" ); + ++mem; + } + else error( "illegal rule: missing semicolon or | ?" ); + + /* read rule body */ + + + t = gettok(); + more_rule: + while( t == IDENTIFIER ) { + *mem = chfind(1,tokname); + if( *mem=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name); + levprd[nprod]=toklev[j]; + t = gettok(); + } + + if( t == '=' ){ + levprd[nprod] |= ACTFLAG; + fprintf( faction, "\ncase %d:", nprod ); + cpyact( mem-prdptr[nprod]-1 ); + fprintf( faction, " break;" ); + if( (t=gettok()) == IDENTIFIER ){ + /* action within rule... */ + + sprintf( actname, "$$%d", nprod ); + j = chfind(1,actname); /* make it a nonterminal */ + + /* the current rule will become rule number nprod+1 */ + /* move the contents down, and make room for the null */ + + for( p=mem; p>=prdptr[nprod]; --p ) p[2] = *p; + mem += 2; + + /* enter null production for action */ + + p = prdptr[nprod]; + + *p++ = j; + *p++ = -nprod; + + /* update the production information */ + + levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; + levprd[nprod] = ACTFLAG; + + if( ++nprod >= NPROD ) error( "more than %d rules", NPROD ); + prdptr[nprod] = p; + + /* make the action appear in the original rule */ + *mem++ = j; + + /* get some more of the rule */ + + goto more_rule; + } + + } + + while( t == ';' ) t = gettok(); + + *mem++ = -nprod; + + /* check that default action is reasonable */ + + if( ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].tvalue ){ + /* no explicit action, LHS has value */ + register tempty; + tempty = prdptr[nprod][1]; + if( tempty < 0 ) error( "must return a value, since LHS has a type" ); + else if( tempty >= NTBASE ) tempty = nontrst[tempty-NTBASE].tvalue; + else tempty = TYPE( toklev[tempty] ); + if( tempty != nontrst[*prdptr[nprod]-NTBASE].tvalue ){ + error( "default action causes potential type clash" ); + } + } + + if( ++nprod >= NPROD ) error( "more than %d rules", NPROD ); + prdptr[nprod] = mem; + levprd[nprod]=0; + + } + + /* end of all rules */ + + finact(); + if( t == MARK ){ + fprintf( ftable, "\n# line %d \"%s\"\n", lineno, infile ); + while( (c=getc(finput)) != EOF ) putc( c, ftable ); + } + fclose( finput ); + } + +finact(){ + /* finish action routine */ + + fclose(faction); + + fprintf( ftable, "# define YYERRCODE %d\n", tokset[2].value ); + + } + +defin( t, s ) register char *s; { +/* define s to be a terminal if t=0 + or a nonterminal if t=1 */ + + register val; + + if (t) { + if( ++nnonter >= NNONTERM ) error("too many nonterminals, limit %d",NNONTERM); + nontrst[nnonter].name = cstash(s); + return( NTBASE + nnonter ); + } + /* must be a token */ + if( ++ntokens >= NTERMS ) error("too many terminals, limit %d",NTERMS ); + tokset[ntokens].name = cstash(s); + + /* establish value for token */ + + if( s[0]==' ' && s[2]=='\0' ) /* single character literal */ + val = s[1]; + else if ( s[0]==' ' && s[1]=='\\' ) { /* escape sequence */ + if( s[3] == '\0' ){ /* single character escape sequence */ + switch ( s[2] ){ + /* character which is escaped */ + case 'n': val = '\n'; break; + case 'r': val = '\r'; break; + case 'b': val = '\b'; break; + case 't': val = '\t'; break; + case 'f': val = '\f'; break; + case '\'': val = '\''; break; + case '"': val = '"'; break; + case '\\': val = '\\'; break; + default: error( "invalid escape" ); + } + } + else if( s[2] <= '7' && s[2]>='0' ){ /* \nnn sequence */ + if( s[3]<'0' || s[3] > '7' || s[4]<'0' || + s[4]>'7' || s[5] != '\0' ) error("illegal \\nnn construction" ); + val = 64*s[2] + 8*s[3] + s[4] - 73*'0'; + if( val == 0 ) error( "'\\000' is illegal" ); + } + } + else { + val = extval++; + } + tokset[ntokens].value = val; + toklev[ntokens] = 0; + return( ntokens ); + } + +defout(){ /* write out the defines (at the end of the declaration section) */ + + register int i, c; + register char *cp; + + for( i=ndefout; i<=ntokens; ++i ){ + + cp = tokset[i].name; + if( *cp == ' ' ) ++cp; /* literals */ + + for( ; (c= *cp)!='\0'; ++cp ){ + + if( islower(c) || isupper(c) || isdigit(c) || c=='_' ); /* VOID */ + else goto nodef; + } + + fprintf( ftable, "# define %s %d\n", tokset[i].name, tokset[i].value ); + if( fdefine != NULL ) fprintf( fdefine, "# define %s %d\n", tokset[i].name, tokset[i].value ); + + nodef: ; + } + + ndefout = ntokens+1; + + } + +char * +cstash( s ) register char *s; { + char *temp; + + temp = cnamp; + do { + if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" ); + else *cnamp++ = *s; + } while ( *s++ ); + return( temp ); + } + +gettok() { + register i, base; + static int peekline; /* number of '\n' seen in lookahead */ + register c, match, reserve; + +begin: + reserve = 0; + lineno += peekline; + peekline = 0; + c = getc(finput); + while( c==' ' || c=='\n' || c=='\t' || c=='\f' ){ + if( c == '\n' ) ++lineno; + c=getc(finput); + } + if( c == '/' ){ /* skip comment */ + lineno += skipcom(); + goto begin; + } + + switch(c){ + + case EOF: + return(ENDFILE); + case '{': + ungetc( c, finput ); + return( '=' ); /* action ... */ + case '<': /* get, and look up, a type name (union member name) */ + i = 0; + while( (c=getc(finput)) != '>' && c>=0 && c!= '\n' ){ + tokname[i] = c; + if( ++i >= NAMESIZE ) --i; + } + if( c != '>' ) error( "unterminated < ... > clause" ); + tokname[i] = '\0'; + for( i=1; i<=ntypes; ++i ){ + if( !strcmp( typeset[i], tokname ) ){ + numbval = i; + return( TYPENAME ); + } + } + typeset[numbval = ++ntypes] = cstash( tokname ); + return( TYPENAME ); + + case '"': + case '\'': + match = c; + tokname[0] = ' '; + i = 1; + for(;;){ + c = getc(finput); + if( c == '\n' || c == EOF ) + error("illegal or missing ' or \"" ); + if( c == '\\' ){ + c = getc(finput); + tokname[i] = '\\'; + if( ++i >= NAMESIZE ) --i; + } + else if( c == match ) break; + tokname[i] = c; + if( ++i >= NAMESIZE ) --i; + } + break; + + case '%': + case '\\': + + switch(c=getc(finput)) { + + case '0': return(TERM); + case '<': return(LEFT); + case '2': return(BINARY); + case '>': return(RIGHT); + case '%': + case '\\': return(MARK); + case '=': return(PREC); + case '{': return(LCURLY); + default: reserve = 1; + } + + default: + + if( isdigit(c) ){ /* number */ + numbval = c-'0' ; + base = (c=='0') ? 8 : 10 ; + for( c=getc(finput); isdigit(c) ; c=getc(finput) ){ + numbval = numbval*base + c - '0'; + } + ungetc( c, finput ); + return(NUMBER); + } + else if( islower(c) || isupper(c) || c=='_' || c=='.' || c=='$' ){ + i = 0; + while( islower(c) || isupper(c) || isdigit(c) || c=='_' || c=='.' || c=='$' ){ + tokname[i] = c; + if( reserve && isupper(c) ) tokname[i] += 'a'-'A'; + if( ++i >= NAMESIZE ) --i; + c = getc(finput); + } + } + else return(c); + + ungetc( c, finput ); + } + + tokname[i] = '\0'; + + if( reserve ){ /* find a reserved word */ + if( !strcmp(tokname,"term")) return( TERM ); + if( !strcmp(tokname,"token")) return( TERM ); + if( !strcmp(tokname,"left")) return( LEFT ); + if( !strcmp(tokname,"nonassoc")) return( BINARY ); + if( !strcmp(tokname,"binary")) return( BINARY ); + if( !strcmp(tokname,"right")) return( RIGHT ); + if( !strcmp(tokname,"prec")) return( PREC ); + if( !strcmp(tokname,"start")) return( START ); + if( !strcmp(tokname,"type")) return( TYPEDEF ); + if( !strcmp(tokname,"union")) return( UNION ); + error("invalid escape, or illegal reserved word: %s", tokname ); + } + + /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */ + + c = getc(finput); + while( c==' ' || c=='\t'|| c=='\n' || c=='\f' || c== '/' ) { + if( c == '\n' ) ++peekline; + else if( c == '/' ){ /* look for comments */ + peekline += skipcom(); + } + c = getc(finput); + } + if( c == ':' ) return( C_IDENTIFIER ); + ungetc( c, finput ); + return( IDENTIFIER ); +} + +fdtype( t ){ /* determine the type of a symbol */ + register v; + if( t >= NTBASE ) v = nontrst[t-NTBASE].tvalue; + else v = TYPE( toklev[t] ); + if( v <= 0 ) error( "must specify type for %s", (t>=NTBASE)?nontrst[t-NTBASE].name: + tokset[t].name ); + return( v ); + } + +chfind( t, s ) register char *s; { + int i; + + if (s[0]==' ')t=0; + TLOOP(i){ + if(!strcmp(s,tokset[i].name)){ + return( i ); + } + } + NTLOOP(i){ + if(!strcmp(s,nontrst[i].name)) { + return( i+NTBASE ); + } + } + /* cannot find name */ + if( t>1 ) + error( "%s should have been defined earlier", s ); + return( defin( t, s ) ); + } + +cpyunion(){ + /* copy the union declaration to the output, and the define file if present */ + + int level, c; + fprintf( ftable, "\n# line %d \"%s\"\n", lineno, infile ); + fprintf( ftable, "typedef union " ); + if( fdefine ) fprintf( fdefine, "\ntypedef union " ); + + level = 0; + for(;;){ + if( (c=getc(finput)) < 0 ) error( "EOF encountered while processing %%union" ); + putc( c, ftable ); + if( fdefine ) putc( c, fdefine ); + + switch( c ){ + + case '\n': + ++lineno; + break; + + case '{': + ++level; + break; + + case '}': + --level; + if( level == 0 ) { /* we are finished copying */ + fprintf( ftable, " YYSTYPE;\n" ); + if( fdefine ) fprintf( fdefine, " YYSTYPE;\nextern YYSTYPE yylval;\n" ); + return; + } + } + } + } + +cpycode(){ /* copies code between \{ and \} */ + + int c; + c = getc(finput); + if( c == '\n' ) { + c = getc(finput); + lineno++; + } + fprintf( ftable, "\n# line %d \"%s\"\n", lineno, infile ); + while( c>=0 ){ + if( c=='\\' ) + if( (c=getc(finput)) == '}' ) return; + else putc('\\', ftable ); + if( c=='%' ) + if( (c=getc(finput)) == '}' ) return; + else putc('%', ftable ); + putc( c , ftable ); + if( c == '\n' ) ++lineno; + c = getc(finput); + } + error("eof before %%}" ); + } + +skipcom(){ /* skip over comments */ + register c, i=0; /* i is the number of lines skipped */ + + /* skipcom is called after reading a / */ + + if( getc(finput) != '*' ) error( "illegal comment" ); + c = getc(finput); + while( c != EOF ){ + while( c == '*' ){ + if( (c=getc(finput)) == '/' ) return( i ); + } + if( c == '\n' ) ++i; + c = getc(finput); + } + error( "EOF inside comment" ); + /* NOTREACHED */ + } + +cpyact(offset){ /* copy C action to the next ; or closing } */ + int brac, c, match, j, s, tok; + + fprintf( faction, "\n# line %d \"%s\"\n", lineno, infile ); + + brac = 0; + +loop: + c = getc(finput); +swt: + switch( c ){ + +case ';': + if( brac == 0 ){ + putc( c , faction ); + return; + } + goto lcopy; + +case '{': + brac++; + goto lcopy; + +case '$': + s = 1; + tok = -1; + c = getc(finput); + if( c == '<' ){ /* type description */ + ungetc( c, finput ); + if( gettok() != TYPENAME ) error( "bad syntax on $ clause" ); + tok = numbval; + c = getc(finput); + } + if( c == '$' ){ + fprintf( faction, "yyval"); + if( ntypes ){ /* put out the proper tag... */ + if( tok < 0 ) tok = fdtype( *prdptr[nprod] ); + fprintf( faction, ".%s", typeset[tok] ); + } + goto loop; + } + if( c == '-' ){ + s = -s; + c = getc(finput); + } + if( isdigit(c) ){ + j=0; + while( isdigit(c) ){ + j= j*10+c-'0'; + c = getc(finput); + } + + j = j*s - offset; + if( j > 0 ){ + error( "Illegal use of $%d", j+offset ); + } + + fprintf( faction, "yypvt[-%d]", -j ); + if( ntypes ){ /* put out the proper tag */ + if( j+offset <= 0 && tok < 0 ) error( "must specify type of $%d", j+offset ); + if( tok < 0 ) tok = fdtype( prdptr[nprod][j+offset] ); + fprintf( faction, ".%s", typeset[tok] ); + } + goto swt; + } + putc( '$' , faction ); + if( s<0 ) putc('-', faction ); + goto swt; + +case '}': + if( --brac ) goto lcopy; + putc( c, faction ); + return; + + +case '/': /* look for comments */ + putc( c , faction ); + c = getc(finput); + if( c != '*' ) goto swt; + + /* it really is a comment */ + + putc( c , faction ); + c = getc(finput); + while( c != EOF ){ + while( c=='*' ){ + putc( c , faction ); + if( (c=getc(finput)) == '/' ) goto lcopy; + } + putc( c , faction ); + if( c == '\n' )++lineno; + c = getc(finput); + } + error( "EOF inside comment" ); + +case '\'': /* character constant */ + match = '\''; + goto string; + +case '"': /* character string */ + match = '"'; + + string: + + putc( c , faction ); + while( c=getc(finput) ){ + + if( c=='\\' ){ + putc( c , faction ); + c=getc(finput); + if( c == '\n' ) ++lineno; + } + else if( c==match ) goto lcopy; + else if( c=='\n' ) error( "newline in string or char. const." ); + putc( c , faction ); + } + error( "EOF in string or character constant" ); + +case EOF: + error("action does not terminate" ); + +case '\n': ++lineno; + goto lcopy; + + } + +lcopy: + putc( c , faction ); + goto loop; + } diff --git a/usr/src/cmd/yacc/y3.c b/usr/src/cmd/yacc/y3.c new file mode 100644 index 0000000000..7e793734e8 --- /dev/null +++ b/usr/src/cmd/yacc/y3.c @@ -0,0 +1,404 @@ +# include "dextern" + + /* important local variables */ +int lastred; /* the number of the last reduction of a state */ +int defact[NSTATES]; /* the default actions of states */ + +output(){ /* print the output for the states */ + + int i, k, c; + register struct wset *u, *v; + + fprintf( ftable, "short yyexca[] ={\n" ); + + SLOOP(i) { /* output the stuff for state i */ + nolook = !(tystate[i]==MUSTLOOKAHEAD); + closure(i); + /* output actions */ + nolook = 1; + aryfil( temp1, ntokens+nnonter+1, 0 ); + WSLOOP(wsets,u){ + c = *( u->pitem ); + if( c>1 && cpitem) ) putitem( v->pitem+1, (struct looksets *)0 ); + } + temp1[c] = state(c); + } + else if( c > NTBASE && temp1[ (c -= NTBASE) + ntokens ] == 0 ){ + temp1[ c+ntokens ] = amem[indgo[i]+c]; + } + } + + if( i == 1 ) temp1[1] = ACCEPTCODE; + + /* now, we have the shifts; look at the reductions */ + + lastred = 0; + WSLOOP(wsets,u){ + c = *( u->pitem ); + if( c<=0 ){ /* reduction */ + lastred = -c; + TLOOP(k){ + if( BIT(u->ws.lset,k) ){ + if( temp1[k] == 0 ) temp1[k] = c; + else if( temp1[k]<0 ){ /* reduce/reduce conflict */ + if( foutput!=NULL ) + fprintf( foutput, + "\n%d: reduce/reduce conflict (red'ns %d and %d ) on %s", + i, -temp1[k], lastred, symnam(k) ); + if( -temp1[k] > lastred ) temp1[k] = -lastred; + ++zzrrconf; + } + else { /* potential shift/reduce conflict */ + precftn( lastred, k, i ); + } + } + } + } + } + wract(i); + } + + fprintf( ftable, "\t};\n" ); + + wdef( "YYNPROD", nprod ); + + } + +int pkdebug = 0; +apack(p, n ) int *p;{ /* pack state i from temp1 into amem */ + int off; + register *pp, *qq, *rr; + int *q, *r; + + /* we don't need to worry about checking because we + we will only look entries known to be there... */ + + /* eliminate leading and trailing 0's */ + + q = p+n; + for( pp=p,off=0 ; *pp==0 && pp<=q; ++pp,--off ) /* VOID */ ; + if( pp > q ) return(0); /* no actions */ + p = pp; + + /* now, find a place for the elements from p to q, inclusive */ + + r = &amem[ACTSIZE-1]; + for( rr=amem; rr<=r; ++rr,++off ){ /* try rr */ + for( qq=rr,pp=p ; pp<=q ; ++pp,++qq){ + if( *pp != 0 ){ + if( *pp != *qq && *qq != 0 ) goto nextk; + } + } + + /* we have found an acceptable k */ + + if( pkdebug && foutput!=NULL ) fprintf( foutput, "off = %d, k = %d\n", off, rr-amem ); + + for( qq=rr,pp=p; pp<=q; ++pp,++qq ){ + if( *pp ){ + if( qq > r ) error( "action table overflow" ); + if( qq>memp ) memp = qq; + *qq = *pp; + } + } + if( pkdebug && foutput!=NULL ){ + for( pp=amem; pp<= memp; pp+=10 ){ + fprintf( foutput, "\t"); + for( qq=pp; qq<=pp+9; ++qq ) fprintf( foutput, "%d ", *qq ); + fprintf( foutput, "\n"); + } + } + return( off ); + + nextk: ; + } + error("no space in action table" ); + /* NOTREACHED */ + } + +go2out(){ /* output the gotos for the nontermninals */ + int i, j, k, best, count, cbest, times; + + fprintf( ftemp, "$\n" ); /* mark begining of gotos */ + + for( i=1; i<=nnonter; ++i ) { + go2gen(i); + + /* find the best one to make default */ + + best = -1; + times = 0; + + for( j=0; j<=nstate; ++j ){ /* is j the most frequent */ + if( tystate[j] == 0 ) continue; + if( tystate[j] == best ) continue; + + /* is tystate[j] the most frequent */ + + count = 0; + cbest = tystate[j]; + + for( k=j; k<=nstate; ++k ) if( tystate[k]==cbest ) ++count; + + if( count > times ){ + best = cbest; + times = count; + } + } + + /* best is now the default entry */ + + zzgobest += (times-1); + for( j=0; j<=nstate; ++j ){ + if( tystate[j] != 0 && tystate[j]!=best ){ + fprintf( ftemp, "%d,%d,", j, tystate[j] ); + zzgoent += 1; + } + } + + /* now, the default */ + + zzgoent += 1; + fprintf( ftemp, "%d\n", best ); + + } + + + + } + +int g2debug = 0; +go2gen(c){ /* output the gotos for nonterminal c */ + + int i, work, cc; + struct item *p, *q; + + + /* first, find nonterminals with gotos on c */ + + aryfil( temp1, nnonter+1, 0 ); + temp1[c] = 1; + + work = 1; + while( work ){ + work = 0; + PLOOP(0,i){ + if( (cc=prdptr[i][1]-NTBASE) >= 0 ){ /* cc is a nonterminal */ + if( temp1[cc] != 0 ){ /* cc has a goto on c */ + cc = *prdptr[i]-NTBASE; /* thus, the left side of production i does too */ + if( temp1[cc] == 0 ){ + work = 1; + temp1[cc] = 1; + } + } + } + } + } + + /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ + + if( g2debug && foutput!=NULL ){ + fprintf( foutput, "%s: gotos on ", nontrst[c].name ); + NTLOOP(i) if( temp1[i] ) fprintf( foutput, "%s ", nontrst[i].name); + fprintf( foutput, "\n"); + } + + /* now, go through and put gotos into tystate */ + + aryfil( tystate, nstate, 0 ); + SLOOP(i){ + ITMLOOP(i,p,q){ + if( (cc= *p->pitem) >= NTBASE ){ + if( temp1[cc -= NTBASE] ){ /* goto on c is possible */ + tystate[i] = amem[indgo[i]+c]; + break; + } + } + } + } + } + +precftn(r,t,s){ /* decide a shift/reduce conflict by precedence. + /* r is a rule number, t a token number */ + /* the conflict is in state s */ + /* temp1[t] is changed to reflect the action */ + + int lp,lt, action; + + lp = levprd[r]; + lt = toklev[t]; + if( PLEVEL(lt) == 0 || PLEVEL(lp) == 0 ) { + /* conflict */ + if( foutput != NULL ) fprintf( foutput, "\n%d: shift/reduce conflict (shift %d, red'n %d) on %s", + s, temp1[t], r, symnam(t) ); + ++zzsrconf; + return; + } + if( PLEVEL(lt) == PLEVEL(lp) ) action = ASSOC(lt); + else if( PLEVEL(lt) > PLEVEL(lp) ) action = RASC; /* shift */ + else action = LASC; /* reduce */ + + switch( action ){ + + case BASC: /* error action */ + temp1[t] = ERRCODE; + return; + + case LASC: /* reduce */ + temp1[t] = -r; + return; + + } + } + +wract(i){ /* output state i */ + /* temp1 has the actions, lastred the default */ + int p, p0, p1; + int ntimes, tred, count, j; + int flag; + + /* find the best choice for lastred */ + + lastred = 0; + ntimes = 0; + TLOOP(j){ + if( temp1[j] >= 0 ) continue; + if( temp1[j]+lastred == 0 ) continue; + /* count the number of appearances of temp1[j] */ + count = 0; + tred = -temp1[j]; + levprd[tred] |= REDFLAG; + TLOOP(p){ + if( temp1[p]+tred == 0 ) ++count; + } + if( count >ntimes ){ + lastred = tred; + ntimes = count; + } + } + + /* for error recovery, arrange that, if there is a shift on the + /* error recovery token, `error', that the default be the error action */ + if( temp1[1] > 0 ) lastred = 0; + + /* clear out entries in temp1 which equal lastred */ + TLOOP(p) if( temp1[p]+lastred == 0 )temp1[p]=0; + + wrstate(i); + defact[i] = lastred; + + flag = 0; + TLOOP(p0){ + if( (p1=temp1[p0])!=0 ) { + if( p1 < 0 ){ + p1 = -p1; + goto exc; + } + else if( p1 == ACCEPTCODE ) { + p1 = -1; + goto exc; + } + else if( p1 == ERRCODE ) { + p1 = 0; + goto exc; + exc: + if( flag++ == 0 ) fprintf( ftable, "-1, %d,\n", i ); + fprintf( ftable, "\t%d, %d,\n", tokset[p0].value, p1 ); + ++zzexcp; + } + else { + fprintf( ftemp, "%d,%d,", tokset[p0].value, p1 ); + ++zzacent; + } + } + } + if( flag ) { + defact[i] = -2; + fprintf( ftable, "\t-2, %d,\n", lastred ); + } + fprintf( ftemp, "\n" ); + return; + } + +wrstate(i){ /* writes state i */ + register j0,j1; + register struct item *pp, *qq; + register struct wset *u; + + if( foutput == NULL ) return; + fprintf( foutput, "\nstate %d\n",i); + ITMLOOP(i,pp,qq) fprintf( foutput, "\t%s\n", writem(pp->pitem)); + if( tystate[i] == MUSTLOOKAHEAD ){ + /* print out empty productions in closure */ + WSLOOP( wsets+(pstate[i+1]-pstate[i]), u ){ + if( *(u->pitem) < 0 ) fprintf( foutput, "\t%s\n", writem(u->pitem) ); + } + } + + /* check for state equal to another */ + + TLOOP(j0) if( (j1=temp1[j0]) != 0 ){ + fprintf( foutput, "\n\t%s ", symnam(j0) ); + if( j1>0 ){ /* shift, error, or accept */ + if( j1 == ACCEPTCODE ) fprintf( foutput, "accept" ); + else if( j1 == ERRCODE ) fprintf( foutput, "error" ); + else fprintf( foutput, "shift %d", j1 ); + } + else fprintf( foutput, "reduce %d",-j1 ); + } + + /* output the final production */ + + if( lastred ) fprintf( foutput, "\n\t. reduce %d\n\n", lastred ); + else fprintf( foutput, "\n\t. error\n\n" ); + + /* now, output nonterminal actions */ + + j1 = ntokens; + for( j0 = 1; j0 <= nnonter; ++j0 ){ + if( temp1[++j1] ) fprintf( foutput, "\t%s goto %d\n", symnam( j0+NTBASE), temp1[j1] ); + } + + } + +wdef( s, n ) char *s; { /* output a definition of s to the value n */ + fprintf( ftable, "# define %s %d\n", s, n ); + } + +warray( s, v, n ) char *s; int *v, n; { + + register i; + + fprintf( ftable, "short %s[]={\n", s ); + for( i=0; i j ) j = *p; + if( *p < k ) k = *p; + } + if( k <= j ){ /* nontrivial situation */ + /* temporarily, kill this for compatibility + j -= k; /* j is now the range */ + if( k > maxoff ) maxoff = k; + } + greed[i] = (yypact[i+1]-yypact[i]) + 2*j; + if( j > maxspr ) maxspr = j; + } + + /* initialize ggreed table */ + + for( i=1; i<=nnonter; ++i ){ + ggreed[i] = 1; + j = 0; + /* minimum entry index is always 0 */ + q = mem + yypgo[i+1] -1; + for( p = mem+yypgo[i]; p j ) j = *p; + } + ggreed[i] = ggreed[i] + 2*j; + if( j > maxoff ) maxoff = j; + } + + + /* now, prepare to put the shift actions into the a array */ + + for( i=0; i1 ) fprintf( ftable, "State %d: null\n", i ); + pa[i] = YYFLAG1; + } + + while( (i = nxti()) != NOMORE ) { + if( i >= 0 ) stin(i); + else gin(-i); + + } + + if( adb>2 ){ /* print a array */ + for( p=a; p <= maxa; p += 10){ + fprintf( ftable, "%4d ", p-a ); + for( i=0; i<10; ++i ) fprintf( ftable, "%4d ", p[i] ); + fprintf( ftable, "\n" ); + } + } + /* write out the output appropriate to the language */ + + aoutput(); + + osummary(); + ZAPFILE(TEMPNAME); + } + +gin(i){ + + register *p, *r, *s, *q1, *q2; + + /* enter gotos on nonterminal i into array a */ + + ggreed[i] = 0; + + q2 = mem+ yypgo[i+1] - 1; + q1 = mem + yypgo[i]; + + /* now, find a place for it */ + + for( p=a; p < &a[ACTSIZE]; ++p ){ + if( *p ) continue; + for( r=q1; r maxa ){ + if( (maxa=s) > &a[ACTSIZE] ) error( "a array overflow" ); + } + } + /* we have found a spot */ + + *p = *q2; + if( p > maxa ){ + if( (maxa=p) > &a[ACTSIZE] ) error( "a array overflow" ); + } + for( r=q1; r1 ) fprintf( ftable, "Nonterminal %d, entry at %d\n" , i, pgo[i] ); + goto nextgi; + + nextgp: ; + } + + error( "cannot place goto %d\n", i ); + + nextgi: ; + } + +stin(i){ + register *r, *s, n, flag, j, *q1, *q2; + + greed[i] = 0; + + /* enter state i into the a array */ + + q2 = mem+yypact[i+1]; + q1 = mem+yypact[i]; + /* find an acceptable place */ + + for( n= -maxoff; n1 ) fprintf( ftable, "State %d: entry at %d equals state %d\n", + i, n, j ); + return; + } + goto nextn; /* we have some disagreement */ + } + } + + for( r = q1; r < q2; r += 2 ){ + if( (s = *r + n + a ) >= &a[ACTSIZE] ) error( "out of space in optimizer a array" ); + if( s > maxa ) maxa = s; + if( *s != 0 && *s != r[1] ) error( "clobber of a array, pos'n %d, by %d", s-a, r[1] ); + *s = r[1]; + } + pa[i] = n; + if( adb>1 ) fprintf( ftable, "State %d: entry at %d\n", i, pa[i] ); + return; + + nextn: ; + } + + error( "Error; failure to place state %d\n", i ); + + } + +nxti(){ /* finds the next i */ + register i, max, maxi; + + max = 0; + + for( i=1; i<= nnonter; ++i ) if( ggreed[i] >= max ){ + max = ggreed[i]; + maxi = -i; + } + + for( i=0; i= max ){ + max = greed[i]; + maxi = i; + } + + if( nxdb ) fprintf( ftable, "nxti = %d, max = %d\n", maxi, max ); + if( max==0 ) return( NOMORE ); + else return( maxi ); + } + +osummary(){ + /* write summary */ + + register i, *p; + + if( foutput == NULL ) return; + i=0; + for( p=maxa; p>=a; --p ) { + if( *p == 0 ) ++i; + } + + fprintf( foutput, "Optimizer space used: input %d/%d, output %d/%d\n", + pmem-mem+1, MEMSIZE, maxa-a+1, ACTSIZE ); + fprintf( foutput, "%d table entries, %d zero\n", (maxa-a)+1, i ); + fprintf( foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff ); + + } + +aoutput(){ /* this version is for C */ + + + /* write out the optimized parser */ + + fprintf( ftable, "# define YYLAST %d\n", maxa-a+1 ); + + arout( "yyact", a, (maxa-a)+1 ); + arout( "yypact", pa, nstate ); + arout( "yypgo", pgo, nnonter+1 ); + + } + +arout( s, v, n ) char *s; int *v, n; { + + register i; + + fprintf( ftable, "short %s[]={\n", s ); + for( i=0; i &mem[MEMSIZE] ) error( "out of space" ); + return( c ); + + } diff --git a/usr/src/cmd/yacc/yaccdiffs b/usr/src/cmd/yacc/yaccdiffs new file mode 100644 index 0000000000..e5f7f4fda6 --- /dev/null +++ b/usr/src/cmd/yacc/yaccdiffs @@ -0,0 +1,198 @@ + + + + + + + + + + Yacc Differences + + + + + +This document gives a short list of differences between the +new Yacc and earlier Yaccs. + +_B_u_g_s _F_i_x_e_d + +1. There was a bug which caused Yacc to silently steal + away in the night if an action had mismatched '' in it; + this is fixed. + +2. A number of table size overflow conditions used to be + checked incorrectly or not at all; this is now better. + +3. A bug which suppressed the printing of some rules with + empty RHS's on the y.output file has been fixed. + +_S_p_e_e_d_u_p_s, _S_h_r_i_n_k_s, _a_n_d _D_i_d_d_l_e_s + +1. The old optimizer (-o) flag is now the default in Yacc. + At the same time, the Yacc process itself has been sped + up; the result is that Yacc takes about the same or + slightly longer on short inputs, but is much faster on + long inputs. + +2. The optimized parsers produced by Yacc are likely to be + 2-3 times faster and 1-2k bytes smaller than the old + ones, for medium/large grammars. The time to parse is + now essentially independent of the grammar size; it + used to grow as the size of the grammar did. + +3. The y.output file has been considerably reformatted, to + make it easier to read. The old "goto" table is gone; + the goto's for nonterminal symbols are now printed in + the states where they occur. Rules which can be + reduced in a state have their rule number printed after + them, in (). This makes it much easier to interpret + the "reduce n" actions. The message "same as n" has + been removed; duplicate states are in fact duplicated, + saving shuffling and cross-referencing. + +4. Various table sizes are somewhat bigger. + +5. The form feed character, and the construction '\f', are + now recognized; form feed is ignored (=whitespace) on + input. + + + + + January 14, 1977 + + + + + + - 2 - + + + +6. The arrays "yysterm" and "yysnter" are no longer pro- + duced on output; they were little used, and took up a + surprising amount of space in the parser. + +7. Rules in the input which are not reduced are now com- + plained about; they may represent unreachable parts of + the grammar, botched precedence, or duplicate rules. + As with conflicts, a summary complaint, "n rules not + reduced", appears at the terminal; more information is + on the y.output file. + +_N_e_w _F_e_a_t_u_r_e_s + +1. The actions are now copied into the middle of the + parser, rather than being gathered into a separate rou- + tine. It's faster. Also, you can return a value from + yyparse (and stop parsing...) by saying `return(x);' in + an action. There are macros which simulate various + interesting parsing actions: + + YYERROR causes the parser to behave as if a syntax + error had been encountered (i.e., do error recovery) + YYACCEPT causes a return from yyparse with a value of 0 + YYABORT causes a return from yyparse with a value of 1 + + +2. The repositioning of the actions may cause scope prob- + lems for some people who include lexical analyzers in + funny places. This can probably be avoided by using + another new feature: the `-d' option. Invoking Yacc + with the -d option causes the #defines generated by + Yacc to be written out onto a file called "y.tab.h", + (as well as on the "y.tab.c" file). This can then be + included as desired in lexical analyzers, etc. + +3. Actions are now permitted within rules; for such + actions, $$, $1, $2, etc. continue to have their usual + meanings. An error message is returned if any $n + refers to a value lying to the right of the action in + the rule. These internal actions are assumed to return + a value, which is accessed through the $n mechanism. + In the y.output file, the actions are referred to by + created nonterminal names of the form $$nnn. All + actions within rules are assumed to be distinct. If + some actions are the same, Yacc might report + reduce/reduce conflicts which could be resolved by + explicitly identifying identical actions; does anyone + have a good idea for a syntax to do this? The = sign + may now be omitted in action constructions of the form + ={ ... }. + + + + + + + January 14, 1977 + + + + + + - 3 - + + + +4. As a result of the rearrangement of rules, people who + thought they knew what $1 really turned into, and wrote + programs which referred to yypv[1], etc., are in trou- + ble. See Steve Johnson if you are really suffering. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + January 14, 1977 + + diff --git a/usr/src/cmd/yacc/yaccnews b/usr/src/cmd/yacc/yaccnews new file mode 100644 index 0000000000..547c81e41b --- /dev/null +++ b/usr/src/cmd/yacc/yaccnews @@ -0,0 +1,149 @@ +5/18/78 +A new version of Yacc has been installed which contains some new +features relating to error recovery, detection of funny conditions in the +grammar, and strong typing. Existing grammars should continue to work, +with the possible exception of somewhat better error recovery behavior. +More details follow: + +*** Ratfor and EFL Yacc are dead. Long live C! + +*** The y.tab.c file now uses the # line feature to reflect + most error conditions in actions, etc., back to the yacc source + file, rather than the y.tab.c file. As always with such features, + lookahead may cause the line number to be one too large + occasionally. + +*** The error recovery algorithm has been changed to cause the + parser never to reduce on a state where there is a shift + on the special token `error'. This has the effect of causing + the error recovery action to take place somewhat closer to the + location of the error than previously. It does not affect the + behavior of the parser in the absence of errors. The parse + tables may be 1-2% larger as a result of this change. + +*** Yacc now detects the existence of nonterminals in the grammar + which can never derive any strings of tokens (even the empty string). + The simplest example is the grammar: + %% + s : s 'a' ; + Here, one must reduce `s' in order to reduce `s': the + parser would always report error. If such nonterminals are + present, Yacc reports all such, then terminates. + +*** There is a new reserved word, %start. When used in the declarations + section, it may be used to declare the start symbol of the grammar. + If %start does not appear, the start symbol is, as at present, the + first nonterminal symbol encountered. + +*** Yacc produced parsers are notorious for producing many many + comments from lint. The problem is the value stack of the + parser, which typically may contain integers, pointers, and + possibly even floating point, etc., values. The lack + of tight specification of this stack leads to potential + nonportability, and considerable loss of the diagnostic power + of lint. Thus, some new features have been added which make use + of the new structure and union facilities of C. In effect, + the user of Yacc may `honestly' declare the value stack, as + well as the lexical interface variable, yylval, to be unions + of all the types desired. Yacc will keep track of the types + declared for all terminals and nonterminals, and automatically + insert the appropriate union tag for all constructions such + as $1, $$, etc. It is up to the user to supply the appropriate + union declaration, and to declare the type of all the terminal + and nonterminal symbols which will have values. If the type + declaration feature is used at all, it must be used correctly; + if it is not used, the default values are integers, as at present. + The new type declaration features are described below: + +*** There is a new keyword, %union. A construction such as + %union { + int inttag; + float floattag; + struct mumble *ptrtag; + } + can be used, in the declarations section, to declare + the type of the yacc stack. The declaration is + effectively copied to the y.tab.c file, and, if the -d + option is present, to the y.tab.h file as well. The + declaration is used to declare the typedef YYSTYPE, which is the + type of the value stack. If the -d option is present, + the declaration + extern YYSTYPE yylval; + is also placed onto the y.tab.h file. Note that the lexical + analyzer must be changed to use the appropriate union tag when + assigning values. It is not necessary that the %union + mechanism be used, as long as there is a union type YYSTYPE + defined in the declarations section. + +*** The %token, %left, %right, and %nonassoc declarations now + accept a union tag, enclosed in angle brackets (<...>), immediately + after the keyword. All tokens mentioned in that declaration are + taken to have the appropriate type. + +*** There is a new keyword, %type, also followed by a union tag + in angle brackets, which may be used in the declarations section to + declare nonterminal symbols to have a particular type. + + In both cases, whenever a $$ or $n is encountered in an action, + the appropriate union tag is supplied by Yacc. Once any type is + declared, it is an error to use a $$ or $n whose type is unknown. + It is also illegal to have a grammar rule whose LHS has a type, + but the rule has no action and the default action { $$ = $1; } + would be inapplicable because $1 had a different type. + +*** There are occasional times when the type of something is + not known (for example, when an action within a rule returns a + value). In this case, the $$ and $n syntax is extended + to permit the declaration of the type: the syntax is + $$ + and + $n + respectively. This rather strange syntax is necessitated by the + need to distinguish the <> surrounding the tag from the < and > + operators of C in the action. It is anticipated that the usage + will be rare. + +*** As always, report gripes, bugs, suggestions to SCJ *** + +12/01/76 +A newer version of Yacc has been installed which copies the actions directly +into the parser, rather than gathering them into a separate routine. +The advantages include +1. It's faster +2. You can return a value from yyparse (and stop parsing...) by + saying `return(x);' in an action +3. There are macros which simulate various interesting parsing + actions: + YYERROR causes the parser to behave as if a syntax + error had been encountered (i.e., do error recovery) + YYACCEPT causes a return from yyparse with a value of 0 + YYABORT causes a return from yyparse with a value of 1 + +The repositioning of the actions may cause scope problems +for some people who include lexical analyzers in funny places. +This can probably be avoided by using another +new feature: the `-d' option. +Invoking Yacc with the -d option causes the #defines +generated by Yacc to be written out onto a file +called "y.tab.h". This can then be included as desired +in lexical analyzers, etc. + +11/28/76 +A new version of Yacc has been installed which permits actions within +rules. For such actions, $$ and $1, $2, etc. continue to have their +usual meanings. An error message is returned if any $n refers to +a value lying to the right of the action in the rule. + +These internal actions are assumed to return a value, which is accessed +through the $n mechanism. + +In the y.output file, the actions are referred to by created nonterminal +names of the form $$nnn. + +All actions within rules are assumed to be distinct. If some actions +are the same, Yacc might report reduce/reduce conflicts which could +be resolved by explicitly identifying identical actions; does anyone +have a good idea for a syntax to do this? + +In the new Yacc, the = sign may now be omitted in action constructions +of the form ={ ... } diff --git a/usr/src/cmd/yacc/yaccpar b/usr/src/cmd/yacc/yaccpar new file mode 100644 index 0000000000..daa01c4f5b --- /dev/null +++ b/usr/src/cmd/yacc/yaccpar @@ -0,0 +1,149 @@ +# +# define YYFLAG -1000 +# define YYERROR goto yyerrlab +# define YYACCEPT return(0) +# define YYABORT return(1) + +/* parser for yacc output */ + +#ifdef YYDEBUG +int yydebug = 0; /* 1 for debugging */ +#endif +YYSTYPE yyv[YYMAXDEPTH]; /* where the values are stored */ +int yychar = -1; /* current input token number */ +int yynerrs = 0; /* number of errors */ +short yyerrflag = 0; /* error recovery flag */ + +yyparse() { + + short yys[YYMAXDEPTH]; + short yyj, yym; + register YYSTYPE *yypvt; + register short yystate, *yyps, yyn; + register YYSTYPE *yypv; + register short *yyxi; + + yystate = 0; + yychar = -1; + yynerrs = 0; + yyerrflag = 0; + yyps= &yys[-1]; + yypv= &yyv[-1]; + + yystack: /* put a state and value onto the stack */ + +#ifdef YYDEBUG + if( yydebug ) printf( "state %d, char 0%o\n", yystate, yychar ); +#endif + if( ++yyps> &yys[YYMAXDEPTH] ) { yyerror( "yacc stack overflow" ); return(1); } + *yyps = yystate; + ++yypv; + *yypv = yyval; + + yynewstate: + + yyn = yypact[yystate]; + + if( yyn<= YYFLAG ) goto yydefault; /* simple state */ + + if( yychar<0 ) if( (yychar=yylex())<0 ) yychar=0; + if( (yyn += yychar)<0 || yyn >= YYLAST ) goto yydefault; + + if( yychk[ yyn=yyact[ yyn ] ] == yychar ){ /* valid shift */ + yychar = -1; + yyval = yylval; + yystate = yyn; + if( yyerrflag > 0 ) --yyerrflag; + goto yystack; + } + + yydefault: + /* default state action */ + + if( (yyn=yydef[yystate]) == -2 ) { + if( yychar<0 ) if( (yychar=yylex())<0 ) yychar = 0; + /* look through exception table */ + + for( yyxi=yyexca; (*yyxi!= (-1)) || (yyxi[1]!=yystate) ; yyxi += 2 ) ; /* VOID */ + + while( *(yyxi+=2) >= 0 ){ + if( *yyxi == yychar ) break; + } + if( (yyn = yyxi[1]) < 0 ) return(0); /* accept */ + } + + if( yyn == 0 ){ /* error */ + /* error ... attempt to resume parsing */ + + switch( yyerrflag ){ + + case 0: /* brand new error */ + + yyerror( "syntax error" ); + yyerrlab: + ++yynerrs; + + case 1: + case 2: /* incompletely recovered error ... try again */ + + yyerrflag = 3; + + /* find a state where "error" is a legal shift action */ + + while ( yyps >= yys ) { + yyn = yypact[*yyps] + YYERRCODE; + if( yyn>= 0 && yyn < YYLAST && yychk[yyact[yyn]] == YYERRCODE ){ + yystate = yyact[yyn]; /* simulate a shift of "error" */ + goto yystack; + } + yyn = yypact[*yyps]; + + /* the current yyps has no shift onn "error", pop stack */ + +#ifdef YYDEBUG + if( yydebug ) printf( "error recovery pops state %d, uncovers %d\n", *yyps, yyps[-1] ); +#endif + --yyps; + --yypv; + } + + /* there is no state on the stack with an error shift ... abort */ + + yyabort: + return(1); + + + case 3: /* no shift yet; clobber input char */ + +#ifdef YYDEBUG + if( yydebug ) printf( "error recovery discards char %d\n", yychar ); +#endif + + if( yychar == 0 ) goto yyabort; /* don't discard EOF, quit */ + yychar = -1; + goto yynewstate; /* try again in the same state */ + + } + + } + + /* reduction by production yyn */ + +#ifdef YYDEBUG + if( yydebug ) printf("reduce %d\n",yyn); +#endif + yyps -= yyr2[yyn]; + yypvt = yypv; + yypv -= yyr2[yyn]; + yyval = yypv[1]; + yym=yyn; + /* consult goto table to find next state */ + yyn = yyr1[yyn]; + yyj = yypgo[yyn] + *yyps + 1; + if( yyj>=YYLAST || yychk[ yystate = yyact[yyj] ] != -yyn ) yystate = yyact[yypgo[yyn]]; + switch(yym){ + $A + } + goto yystack; /* stack new state and value */ + + } -- 2.20.1