BSD 4 release
[unix-history] / usr / src / cmd / pc0 / yyrecover.c
/* Copyright (c) 1979 Regents of the University of California */
static char sccsid[] = "@(#)yyrecover.c 1.1 8/27/80";
#include "whoami.h"
#include "0.h"
#include "yy.h"
/*
* Very simplified version of Graham-Rhodes error recovery
* method for LALR parsers. Backward move is embodied in
* default reductions of the yacc parser until an error condition
* is reached. Forward move is over a small number of input tokens
* and cannot "condense". The basic corrections are:
*
* 1) Delete the input token.
*
* 2) Replace the current input with a legal input.
*
* 3) Insert a legal token.
*
* All corrections are weighted, considered only if they allow
* at least two shifts, and the cost of a correction increases if
* it allows shifting over only a part of the lookahead.
*
* Another error situation is that which occurs when an identifier "fails"
* a reduction because it is not the required "class".
* In this case, we also consider replacing this identifier, which has
* already been shifted over, with an identifier of the correct class.
*
* Another correction performed here is unique symbol insertion.
* If the current state admits only one input, and no other alternative
* correction presents itself, then that symbol will be inserted.
* There is a danger in this of looping, and it is handled
* by counting true shifts over input (see below).
*
*
* A final class of corrections, considered only when the error
* occurred immediately after a shift over a terminal, involves
* the three basic corrections above, but with the point of error
* considered to be before this terminal was shifted over, effectively
* "unreading" this terminal. This is a feeble attempt at elimination
* of the left-right bias and because "if" has a low weight and some
* statements are quite simple i.e.
*
* cse ch of ...
*
* we can get a small number of errors. The major deficiency of
* this is that we back up only one token, and that the forward
* move is over a small number of tokens, often not enough to really
* tell what the input should be, e.g. in
*
* a[i] > a[i - 1] ...
*
* In such cases a bad identifier (misspelled keyword) or omitted
* keyword will be change or inserted as "if" as it has the lowest cost.
* This is not terribly bad, as "if"s are most common.
* This also allows the correction of other errors.
*
* This recovery depends on the default reductions which delay
* noticing the error until the parse reaches a state where the
* relevant "alternatives" are visible. Note that it does not
* consider tokens which will cause reductions before being
* shifted over. This requires the grammar to be written in a
* certain way for the recovery to work correctly.
* In some sense, also, the recovery suffers because we have
* LALR(1) tables rather than LR(1) tables, e.g. in
*
* if rec.field < rec2,field2 then
*/
\f
/*
* Definitions of possible corrective actions
*/
#define CPANIC 0
#define CDELETE 1
#define CREPLACE 2
#define CINSERT 3
#define CUNIQUE 4
#define CCHIDENT 5
/*
* Multiplicative cost factors for corrective actions.
*
* When an error occurs we take YCSIZ - 1 look-ahead tokens.
* If a correction being considered will shift over only part of
* that look-ahead, it is not completely discarded, but rather
* "weighted", its cost being multiplied by a weighting factor.
* For a correction to be considered its weighted cost must be less
* than CLIMIT.
*
* Non-weighted costs are considered:
*
* LOW <= 3
* MEDIUM 4,5
* HIGH >= 6
*
* CURRENT WEIGHTING STRATEGY: Aug 20, 1977
*
* For all kinds of corrections we demand shifts over two symbols.
* Corrections have high weight even after two symbol
* shifts because the costs for deleting and inserting symbols are actually
* quite low; we do not want to change weighty symbols
* on inconclusive evidence.
*
* The weights are the same after the third look ahead.
* This prevents later, unrelated errors from causing "funny"
* biases of the weights toward one type of correction.
*
* Current look ahead is 5 symbols.
*/
/*** CLIMIT is defined in yy.h for yycosts ***/
#define CPRLIMIT 50
#define CCHIDCOST 3
char insmult[8] = {INFINITY, INFINITY, INFINITY, 15, 8, 6, 3, 1};
char repmult[7] = {INFINITY, INFINITY, INFINITY, 8, 6, 3, 1};
char delmult[6] = {INFINITY, INFINITY, INFINITY, 6, 3, 1};
\f
#define NOCHAR -1
#define Eprintf if (errtrace) printf
#define Tprintf if (testtrace) printf
/*
* Action arrays of the parser needed here
*/
int yyact[], yypact[], *yypv;
/*
* Yytips is the tip of the stack when using
* the function loccor to check for local
* syntactic correctness. As we don't want
* to copy the whole parser stack, but want
* to simulate parser moves, we "split"
* the parser stack and keep the tip here.
*/
#define YYTIPSIZ 16
int yytips[YYTIPSIZ], yytipct;
int yytipv[YYTIPSIZ];
/*
* The array YC saves the lookahead tokens for the
* forward moves.
* Yccnt is the number of tokens in the YC array.
*/
#define YCSIZ 6
int yCcnt;
struct yytok YC0[YCSIZ + 1];
struct yytok *YC;
/*
* YCps gives the top of stack at
* the point of error.
*/
bool yyunique 1;
STATIC unsigned yyTshifts;
\f
/*
* Cact is the corrective action we have decided on
* so far, ccost its cost, and cchar the associated token.
* Cflag tells if the correction is over the previous input token.
*/
int cact, ccost, cchar, cflag;
/*
* ACtok holds the token under
* consideration when examining
* the lookaheads in a state.
*/
struct yytok ACtok;
#define acchar ACtok.Yychar
#define aclval ACtok.Yylval
/*
* Make a correction to the current stack which has
* top of stack pointer Ps.
*/
yyrecover(Ps0, idfail)
int *Ps0, idfail;
{
register int c, i;
int yyrwant, yyrhave;
#ifdef PI
Recovery = 1;
#endif
YC = &YC0[1];
#ifdef DEBUG
if (errtrace) {
setpfx('p');
yerror("Point of error");
printf("States %d %d ...", Ps0[0], Ps0[-1]);
if (idfail)
printf(" [Idfail]");
pchr('\n');
printf("Input %s%s", tokname(&Y , 0)
, tokname(&Y , 1));
}
#endif
/*
* We first save the current input token
* and its associated semantic information.
*/
if (yychar < 0)
yychar = yylex();
copy(&YC[0], &Y, sizeof Y);
/*
* Set the default action and cost
*/
cact = CPANIC, ccost = CLIMIT, cflag = 0;
/*
* Peek ahead
*/
for (yCcnt = 1; yCcnt < YCSIZ; yCcnt++) {
yychar = yylex();
copy(&YC[yCcnt], &Y, sizeof YC[0]);
#ifdef DEBUG
Eprintf(" | %s%s", tokname(&YC[yCcnt] , 0 )
, tokname(&YC[yCcnt] , 1 ));
#endif
}
#ifdef DEBUG
Eprintf("\n");
#endif
/*
* If we are here because a reduction failed, try
* correcting that.
*/
if (idfail) {
/*
* Save the particulars about
* the kind of identifier we want/have.
*/
yyrwant = yyidwant;
yyrhave = yyidhave;
#ifdef DEBUG
Tprintf(" Try Replace %s identifier with %s identifier cost=%d\n",
classes[yyidhave], classes[yyidwant], CCHIDCOST);
#endif
/*
* Save the semantics of the ID on the
* stack, and null them out to free
* up the reduction in question.
*/
i = yypv[0];
yypv[0] = nullsem(YID);
c = correct(NOCHAR, 0, CCHIDCOST, &repmult[2], Ps0, yypv);
yypv[0] = i;
#ifdef DEBUG
if (c < CPRLIMIT || fulltrace)
Eprintf("Cost %2d Replace %s identifier with %s identifier\n", c, classes[yyrhave], classes[yyrwant]);
#endif
if (c < ccost)
cact = CCHIDENT, ccost = c, cchar = YID;
}
/*
* First try correcting the state we are in
*/
trystate(Ps0, yypv, 0, &insmult[1], &delmult[1], &repmult[1]);
/*
* Now, if we just shifted over a terminal, try
* correcting it.
*/
if (OY.Yychar != -1 && OY.Yylval != nullsem(OY.Yychar)) {
YC--;
copy(&YC[0], &OY, sizeof YC[0]);
trystate(Ps0 - 1, yypv - 1, 1, insmult, delmult, repmult);
if (cflag == 0)
YC++;
else {
yypv--;
#ifdef PXP
yypw--;
#endif
Ps0--;
yCcnt++;
}
}
/*
* Restoring the first look ahead into
* the scanner token allows the error message
* routine to print the error message with the text
* of the correct line.
*/
copy(&Y, &YC[0], sizeof Y);
/*
* Unique symbol insertion.
*
* If there was no reasonable correction found,
* but only one input to the parser is acceptable
* we report that, and try it.
*
* Special precautions here to prevent looping.
* The number of true inputs shifted over at the point
* of the last unique insertion is recorded in the
* variable yyTshifts. If this is not less than
* the current number in yytshifts, we do not insert.
* Thus, after one unique insertion, no more unique
* insertions will be made until an input is shifted
* over. This guarantees termination.
*/
if (cact == CPANIC && !idfail) {
register int *ap;
ap = &yyact[yypact[*Ps0 + 1]];
if (*ap == -ERROR)
ap =+ 2;
if (ap[0] <= 0 && ap[2] > 0) {
cchar = -ap[0];
if (cchar == YEOF)
yyexeof();
if (cchar != ERROR && yyTshifts < yytshifts) {
cact = CUNIQUE;
#ifdef DEBUG
Eprintf("Unique symbol %s%s\n"
, charname(cchar , 0 )
, charname(cchar , 1 ));
#endif
/*
* Note that the inserted symbol
* will not be counted as a true input
* (i.e. the "yytshifts--" below)
* so that a true shift will be needed
* to make yytshifts > yyTshifts.
*/
yyTshifts = yytshifts;
}
}
}
/*
* Set up to perform the correction.
* Build a token appropriate for replacement
* or insertion in the yytok structure ACchar
* having the attributes of the input at the
* point of error.
*/
copy(&ACtok, &YC[0], sizeof ACtok);
acchar = cchar;
aclval = nullsem(acchar);
if (aclval != NIL)
recovered();
switch (cact) {
/*
* Panic, just restore the
* lookahead and return.
*/
case CPANIC:
setpfx('E');
if (idfail) {
copy(&Y, &OY, sizeof Y);
if (yyrhave == NIL) {
#ifdef PI
if (yybaduse(yypv[0], yyeline, ISUNDEF) == NIL)
#endif
yerror("Undefined identifier");
} else {
yerror("Improper %s identifier", classes[yyrhave]);
#ifdef PI
yybaduse(yypv[0], yyeline, NIL);
#endif
}
/*
* Suppress message from panic routine
*/
yyshifts = 1;
}
i = 0;
/* Note that on one path we dont touch yyshifts ! */
break;
/*
* Delete the input.
* Mark this as a shift over true input.
* Restore the lookahead starting at
* the second token.
*/
case CDELETE:
if (ccost != 0)
yerror("Deleted %s%s", tokname(&YC[0] , 0 )
, tokname(&YC[0] , 1 ));
yytshifts++;
i = 1;
yyshifts = 0;
break;
/*
* Replace the input with a new token.
*/
case CREPLACE:
if (acchar == YEOF)
yyexeof();
if (acchar == YEND)
aclval = NIL;
yerror("Replaced %s%s with a %s%s",
tokname(&YC[0] , 0 ),
tokname(&YC[0] , 1 ),
tokname(&ACtok , 0 ),
tokname(&ACtok , 1 ));
copy(&YC[0], &ACtok, sizeof YC[0]);
i = 0;
yyshifts = 0;
break;
/*
* Insert a token.
* Don't count this token as a true input shift.
* For inserted "end"s pas.y is responsible
* for the error message later so suppress it.
* Restore all the lookahead.
*/
case CINSERT:
if (acchar == YEOF)
yyexeof();
if (acchar != YEND)
yerror("Inserted %s%s",
tokname(&ACtok , 0 ),
tokname(&ACtok , 1 ));
yytshifts--;
i = 0;
yyshifts = 0;
break;
/*
* Make a unique symbol correction.
* Like an insertion but a different message.
*/
case CUNIQUE:
setpfx('E');
yerror("Expected %s%s",
tokname(&ACtok , 0 ),
tokname(&ACtok , 1 ));
yytshifts--;
i = 0;
if (ccost == 0 || yyunique)
yyshifts = 0;
else
yyshifts = -1;
break;
/*
* Change an identifier's type
* to make it work.
*/
case CCHIDENT:
copy(&Y, &OY, sizeof Y);
#ifdef PI
i = 1 << yyrwant;
#endif
if (yyrhave == NIL) {
yerror("Undefined %s", classes[yyrwant]);
#ifdef PI
i =| ISUNDEF;
#endif
} else
yerror("Replaced %s id with a %s id", classes[yyrhave], classes[yyrwant]);
#ifdef PI
yybaduse(yypv[0], yyeline, i);
#endif
yypv[0] = nullsem(YID);
i = 0;
yyshifts = 0;
break;
}
/*
* Restore the desired portion of the lookahead,
* and possibly the inserted or unique inserted token.
*/
for (yCcnt--; yCcnt >= i; yCcnt--)
unyylex(&YC[yCcnt]);
if (cact == CINSERT || cact == CUNIQUE)
unyylex(&ACtok);
/*
* Put the scanner back in sync.
*/
yychar = yylex();
/*
* We succeeded if we didn't "panic".
*/
Recovery = 0;
Ps = Ps0;
return (cact != CPANIC);
}
yyexeof()
{
yerror("End-of-file expected - QUIT");
pexit(ERRS);
}
yyunexeof()
{
yerror("Unexpected end-of-file - QUIT");
pexit(ERRS);
}
\f
/*
* Try corrections with the state at Ps0.
* Flag is 0 if this is the top of stack state,
* 1 if it is the state below.
*/
trystate(Ps0, Pv0, flag, insmult, delmult, repmult)
int *Ps0, *Pv0, flag;
char *insmult, *delmult, *repmult;
{
/*
* C is a working cost, ap a pointer into the action
* table for looking at feasible alternatives.
*/
register int c, *ap;
int i, *actions;
#ifdef DEBUG
Eprintf("Trying state %d\n", *Ps0);
#endif
/*
* Try deletion.
* Correct returns a cost.
*/
#ifdef DEBUG
Tprintf(" Try Delete %s%s cost=%d\n",
tokname(&YC[0] , 0 ),
tokname(&YC[0] , 1 ),
delcost(YC[0].Yychar));
#endif
c = delcost(YC[0].Yychar);
#ifndef DEBUG
if (c < ccost) {
#endif
c = correct(NOCHAR, 1, c, delmult, Ps0, Pv0);
#ifdef DEBUG
if (c < CPRLIMIT || fulltrace)
Eprintf("Cost %2d Delete %s%s\n", c,
tokname(&YC[0] , 0 ),
tokname(&YC[0] , 1 ));
#endif
if (c < ccost)
cact = CDELETE, ccost = c, cflag = flag;
#ifndef DEBUG
}
#endif
/*
* Look at the inputs to this state
* which will cause parse action shift.
*/
aclval = NIL;
ap = &yyact[yypact[*Ps0 + 1]];
/*
* Skip action on error to
* detect true unique inputs.
* Error action is always first.
*/
if (*ap == -ERROR)
ap=+ 2;
/*
* Loop through the test actions
* for this state.
*/
for (actions = ap; *ap <= 0; ap =+ 2) {
/*
* Extract the token of this action
*/
acchar = -*ap;
/*
* Try insertion
*/
#ifdef DEBUG
Tprintf(" Try Insert %s%s cost=%d\n"
, charname(acchar , 0 )
, charname(acchar , 1 )
, inscost(acchar));
#endif
c = inscost(acchar, YC[0].Yychar);
#ifndef DEBUG
if (c < ccost) {
#endif
if (c == 0) {
c = correct(acchar, 0, 1, insmult + 1, Ps0, Pv0);
#ifdef DEBUG
Eprintf("Cost %2d Freebie %s%s\n", c
, charname(acchar , 0 )
, charname(acchar , 1 ));
#endif
if (c < ccost)
cact = CUNIQUE, ccost = 0, cchar = acchar, cflag = flag;
} else {
c = correct(acchar, 0, c, insmult, Ps0, Pv0);
#ifdef DEBUG
if (c < CPRLIMIT || fulltrace)
Eprintf("Cost %2d Insert %s%s\n", c
, charname(acchar , 0 )
, charname(acchar , 1 ));
#endif
if (c < ccost)
cact = CINSERT, ccost = c, cchar = acchar, cflag = flag;
}
#ifndef DEBUG
}
#endif
/*
* Try replacement
*/
#ifdef DEBUG
Tprintf(" Try Replace %s%s with %s%s cost=%d\n",
tokname(&YC[0] , 0 ),
tokname(&YC[0] , 1 ),
charname(acchar , 0 ),
charname(acchar , 1 ),
repcost(YC[0].Yychar, acchar));
#endif
c = repcost(YC[0].Yychar, acchar);
#ifndef DEBUG
if (c < ccost) {
#endif
c = correct(acchar, 1, repcost(YC[0].Yychar, acchar), repmult, Ps0, Pv0);
#ifdef DEBUG
if (c < CPRLIMIT || fulltrace)
Eprintf("Cost %2d Replace %s%s with %s%s\n",
c,
tokname(&YC[0] , 0 ),
tokname(&YC[0] , 1 ),
tokname(&ACtok , 0 ),
tokname(&ACtok , 1 ));
#endif
if (c < ccost)
cact = CREPLACE, ccost = c, cchar = acchar, cflag = flag;
#ifndef DEBUG
}
#endif
}
}
\f
int *yCpv;
char yyredfail;
/*
* The ntok structure is used to build a
* scanner structure for tokens inserted
* from the argument "fchar" to "correct" below.
*/
static struct yytok ntok;
/*
* Compute the cost of a correction
* C is the base cost for it.
* Fchar is the first input character from
* the current state, NOCHAR if none.
* The rest of the inputs come from the array
* YC, starting at origin and continuing to the
* last character there, YC[yCcnt - 1].Yychar.
*
* The cost returned is INFINITE if this correction
* allows no shifts, otherwise is weighted based
* on the number of shifts this allows against the
* maximum number possible with the available lookahead.
*/
correct(fchar, origin, c, multvec, Ps0, Pv0)
register int fchar, c;
int origin;
char *multvec;
int *Ps0, *Pv0;
{
register char *mv;
/*
* Ps is the top of the parse stack after the most
* recent local correctness check. Loccor returns
* NIL when we cannot shift.
*/
register int *ps;
yyredfail = 0;
/*
* Initialize the tip parse and semantic stacks.
*/
ps = Ps0;
yytips[0] = *ps;
ps--;
yytipv[0] = Pv0[0];
yCpv = Pv0 - 1;
yytipct = 1;
/*
* Shift while possible.
* Adjust cost as necessary.
*/
mv = multvec;
do {
if (fchar != NOCHAR) {
copy(&ntok, &YC[0], sizeof ntok);
ntok.Yychar = fchar, ntok.Yylval = nullsem(fchar);
fchar = NOCHAR;
ps = loccor(ps, &ntok);
} else
ps = loccor(ps, &YC[origin++]);
if (ps == NIL) {
if (yyredfail && mv > multvec)
mv--;
c =* *mv;
break;
}
mv++;
} while (*mv != 1);
return (c);
}
\f
extern int yygo[], yypgo[], yyr1[], yyr2[];
/*
* Local syntactic correctness check.
* The arguments to this routine are a
* top of stack pointer, ps, and an input
* token tok. Also, implicitly, the contents
* of the yytips array which contains the tip
* of the stack, and into which the new top
* state on the stack will be placed if we shift.
*
* If we succeed, we return a new top of stack
* pointer, else we return NIL.
*/
loccor(ps, ntok)
int *ps;
struct yytok *ntok;
{
register int *p, n;
register int nchar;
int i;
if (ps == NIL)
return (NIL);
nchar = ntok->Yychar;
yyeline = ntok->Yyeline;
#ifdef DEBUG
Tprintf(" Stack ");
for (i = yytipct - 1; i >= 0; i--)
Tprintf("%d ", yytips[i]);
Tprintf("| %d, Input %s%s\n", *ps
, charname(nchar , 0 )
, charname(nchar , 1 ));
#endif
/*
* As in the yacc parser yyparse,
* p traces through the action list
* and "n" is the information associated
* with the action.
*/
newstate:
p = &yyact[ yypact[yytips[yytipct - 1]+1] ];
actn:
/*
* Search the parse actions table
* for something useful to do.
* While n is non-positive, it is the
* arithmetic inverse of the token to be tested.
* This allows a fast check.
*/
while ((n = *p++) <= 0)
if ((n =+ nchar) != 0)
p++;
switch (n >> 12) {
/*
* SHIFT
*/
case 2:
n =& 07777;
yyredfail = 0;
if (nchar == YID)
yyredfail++;
if (yytipct == YYTIPSIZ) {
tipover:
#ifdef DEBUG
Tprintf("\tTIP OVFLO\n");
#endif
return (NIL);
}
yytips[yytipct] = n;
yytipv[yytipct] = ntok->Yylval;
yytipct++;
#ifdef DEBUG
Tprintf("\tShift to state %d\n", n);
#endif
return (ps);
/*
* REDUCE
*/
case 3:
n =& 07777;
if (yyEactr(n, yytipv[yytipct - 1]) == 0) {
#ifdef DEBUG
Tprintf("\tYyEactr objects: have %s id, want %s id\n", classes[yyidhave], classes[yyidwant]);
#endif
return (NIL);
}
yyredfail = 0;
i = yyr2[n];
#ifdef DEBUG
Tprintf("\tReduce, length %d,", i);
#endif
if (i > yytipct) {
i =- yytipct;
yytipct = 0;
ps =- i;
yCpv =- i;
} else
yytipct =- i;
if (yytipct >= YYTIPSIZ)
goto tipover;
/*
* Use goto table to find next state
*/
p = &yygo[yypgo[yyr1[n]]];
i = yytipct ? yytips[yytipct - 1] : *ps;
while (*p != i && *p >= 0)
p =+ 2;
#ifdef DEBUG
Tprintf(" new state %d\n", p[1]);
#endif
yytips[yytipct] = p[1];
yytipct++;
goto newstate;
/*
* ACCEPT
*/
case 4:
#ifdef DEBUG
Tprintf("\tAccept\n");
#endif
return (ps);
/*
* ERROR
*/
case 1:
#ifdef DEBUG
Tprintf("\tError\n");
#endif
return (0);
}
panic("loccor");
}