usr/src/cmd/pc0/yyrecover.c

/* Copyright (c) 1979 Regents of the University of California */

static  char sccsid[] = "@(#)yyrecover.c 1.1 8/27/80";

#include "whoami.h"
#include "0.h"
#include "yy.h"

/*
 * Very simplified version of Graham-Rhodes error recovery
 * method for LALR parsers.  Backward move is embodied in
 * default reductions of the yacc parser until an error condition
 * is reached.  Forward move is over a small number of input tokens
 * and cannot "condense".  The basic corrections are:
 *
 *      1) Delete the input token.
 *
 *      2) Replace the current input with a legal input.
 *
 *      3) Insert a legal token.
 *
 * All corrections are weighted, considered only if they allow
 * at least two shifts, and the cost of a correction increases if
 * it allows shifting over only a part of the lookahead.
 *
 * Another error situation is that which occurs when an identifier "fails"
 * a reduction because it is not the required "class".
 * In this case, we also consider replacing this identifier, which has
 * already been shifted over, with an identifier of the correct class.
 *
 * Another correction performed here is unique symbol insertion.
 * If the current state admits only one input, and no other alternative
 * correction presents itself, then that symbol will be inserted.
 * There is a danger in this of looping, and it is handled
 * by counting true shifts over input (see below).
 *
 *
 * A final class of corrections, considered only when the error
 * occurred immediately after a shift over a terminal, involves
 * the three basic corrections above, but with the point of error
 * considered to be before this terminal was shifted over, effectively
 * "unreading" this terminal.  This is a feeble attempt at elimination
 * of the left-right bias and because "if" has a low weight and some
 * statements are quite simple i.e.
 *
 *      cse ch of ...
 *
 * we can get a small number of errors.  The major deficiency of
 * this is that we back up only one token, and that the forward
 * move is over a small number of tokens, often not enough to really
 * tell what the input should be, e.g. in
 *
 *      a[i] > a[i - 1] ...
 *
 * In such cases a bad identifier (misspelled keyword) or omitted
 * keyword will be change or inserted as "if" as it has the lowest cost.
 * This is not terribly bad, as "if"s are most common.
 * This also allows the correction of other errors.
 *
 * This recovery depends on the default reductions which delay
 * noticing the error until the parse reaches a state where the
 * relevant "alternatives" are visible.  Note that it does not
 * consider tokens which will cause reductions before being
 * shifted over.  This requires the grammar to be written in a
 * certain way for the recovery to work correctly.
 * In some sense, also, the recovery suffers because we have
 * LALR(1) tables rather than LR(1) tables, e.g. in
 *
 *      if rec.field < rec2,field2 then
 */
\f
/*
 * Definitions of possible corrective actions
 */
#define CPANIC          0
#define CDELETE         1
#define CREPLACE        2
#define CINSERT         3
#define CUNIQUE         4
#define CCHIDENT        5

/*
 * Multiplicative cost factors for corrective actions.
 *
 * When an error occurs we take YCSIZ - 1 look-ahead tokens.
 * If a correction being considered will shift over only part of
 * that look-ahead, it is not completely discarded, but rather
 * "weighted", its cost being multiplied by a weighting factor.
 * For a correction to be considered its weighted cost must be less
 * than CLIMIT.
 *
 * Non-weighted costs are considered:
 *
 *      LOW     <= 3
 *      MEDIUM  4,5
 *      HIGH    >= 6
 *
 * CURRENT WEIGHTING STRATEGY: Aug 20, 1977
 *
 * For all kinds of corrections we demand shifts over two symbols.
 * Corrections have high weight even after two symbol
 * shifts because the costs for deleting and inserting symbols are actually
 * quite low; we do not want to change weighty symbols
 * on inconclusive evidence.
 *
 * The weights are the same after the third look ahead.
 * This prevents later, unrelated errors from causing "funny"
 * biases of the weights toward one type of correction.
 *
 * Current look ahead is 5 symbols.
 */

/*** CLIMIT is defined in yy.h for yycosts ***/
#define CPRLIMIT        50
#define CCHIDCOST       3

char    insmult[8]      = {INFINITY, INFINITY, INFINITY, 15, 8, 6, 3, 1};
char    repmult[7]      = {INFINITY, INFINITY, INFINITY, 8, 6, 3, 1};
char    delmult[6]      = {INFINITY, INFINITY, INFINITY, 6, 3, 1};
\f
#define NOCHAR  -1

#define Eprintf if (errtrace) printf
#define Tprintf if (testtrace) printf

/*
 * Action arrays of the parser needed here
 */
int     yyact[], yypact[], *yypv;

/*
 * Yytips is the tip of the stack when using
 * the function loccor to check for local
 * syntactic correctness. As we don't want
 * to copy the whole parser stack, but want
 * to simulate parser moves, we "split"
 * the parser stack and keep the tip here.
 */
#define YYTIPSIZ 16
int     yytips[YYTIPSIZ], yytipct;
int     yytipv[YYTIPSIZ];

/*
 * The array YC saves the lookahead tokens for the
 * forward moves.
 * Yccnt is the number of tokens in the YC array.
 */
#define YCSIZ   6

int     yCcnt;
struct  yytok YC0[YCSIZ + 1];
struct  yytok *YC;

/*
 * YCps gives the top of stack at
 * the point of error.
 */

bool    yyunique        1;

STATIC  unsigned yyTshifts;
\f
/*
 * Cact is the corrective action we have decided on
 * so far, ccost its cost, and cchar the associated token.
 * Cflag tells if the correction is over the previous input token.
 */
int     cact, ccost, cchar, cflag;

/*
 * ACtok holds the token under
 * consideration when examining
 * the lookaheads in a state.
 */
struct  yytok ACtok;

#define acchar  ACtok.Yychar
#define aclval  ACtok.Yylval

/*
 * Make a correction to the current stack which has
 * top of stack pointer Ps.
 */
yyrecover(Ps0, idfail)
        int *Ps0, idfail;
{
        register int c, i;
        int yyrwant, yyrhave;

#ifdef PI
        Recovery = 1;
#endif

        YC = &YC0[1];
#ifdef DEBUG
        if (errtrace) {
                setpfx('p');
                yerror("Point of error");
                printf("States %d %d ...", Ps0[0], Ps0[-1]);
                if (idfail)
                        printf(" [Idfail]");
                pchr('\n');
                printf("Input %s%s", tokname(&Y , 0)
                                   , tokname(&Y , 1));
        }

#endif
        /*
         * We first save the current input token
         * and its associated semantic information.
         */
        if (yychar < 0)
                yychar = yylex();
        copy(&YC[0], &Y, sizeof Y);

        /*
         * Set the default action and cost
         */
        cact = CPANIC, ccost = CLIMIT, cflag = 0;

        /*
         * Peek ahead
         */
        for (yCcnt = 1; yCcnt < YCSIZ; yCcnt++) {
                yychar = yylex();
                copy(&YC[yCcnt], &Y, sizeof YC[0]);
#ifdef DEBUG
                Eprintf(" | %s%s", tokname(&YC[yCcnt] , 0 )
                                 , tokname(&YC[yCcnt] , 1 ));
#endif
        }
#ifdef DEBUG
        Eprintf("\n");
#endif

        /*
         * If we are here because a reduction failed, try
         * correcting that.
         */
        if (idfail) {
                /*
                 * Save the particulars about
                 * the kind of identifier we want/have.
                 */
                yyrwant = yyidwant;
                yyrhave = yyidhave;
#ifdef DEBUG
                Tprintf("  Try Replace %s identifier with %s identifier cost=%d\n",
                    classes[yyidhave], classes[yyidwant], CCHIDCOST);
#endif

                /*
                 * Save the semantics of the ID on the
                 * stack, and null them out to free
                 * up the reduction in question.
                 */
                i = yypv[0];
                yypv[0] = nullsem(YID);
                c = correct(NOCHAR, 0, CCHIDCOST, &repmult[2], Ps0, yypv);
                yypv[0] = i;
#ifdef DEBUG
                if (c < CPRLIMIT || fulltrace)
                        Eprintf("Cost %2d Replace %s identifier with %s identifier\n", c, classes[yyrhave], classes[yyrwant]);
#endif
                if (c < ccost)
                        cact = CCHIDENT, ccost = c, cchar = YID;
        }

        /*
         * First try correcting the state we are in
         */
        trystate(Ps0, yypv, 0, &insmult[1], &delmult[1], &repmult[1]);

        /*
         * Now, if we just shifted over a terminal, try
         * correcting it.
         */
        if (OY.Yychar != -1 && OY.Yylval != nullsem(OY.Yychar)) {
                YC--;
                copy(&YC[0], &OY, sizeof YC[0]);
                trystate(Ps0 - 1, yypv - 1, 1, insmult, delmult, repmult);
                if (cflag == 0)
                        YC++;
                else {
                        yypv--;
#ifdef PXP
                        yypw--;
#endif
                        Ps0--;
                        yCcnt++;
                }
        }

        /*
         * Restoring the first look ahead into
         * the scanner token allows the error message
         * routine to print the error message with the text
         * of the correct line.
         */
        copy(&Y, &YC[0], sizeof Y);

        /*
         * Unique symbol insertion.
         *
         * If there was no reasonable correction found,
         * but only one input to the parser is acceptable
         * we report that, and try it.
         *
         * Special precautions here to prevent looping.
         * The number of true inputs shifted over at the point
         * of the last unique insertion is recorded in the
         * variable yyTshifts.  If this is not less than
         * the current number in yytshifts, we do not insert.
         * Thus, after one unique insertion, no more unique
         * insertions will be made until an input is shifted
         * over.  This guarantees termination.
         */
        if (cact == CPANIC && !idfail) {
                register int *ap;

                ap = &yyact[yypact[*Ps0 + 1]];
                if (*ap == -ERROR)
                        ap =+ 2;
                if (ap[0] <= 0 && ap[2] > 0) {
                        cchar = -ap[0];
                        if (cchar == YEOF)
                                yyexeof();
                        if (cchar != ERROR && yyTshifts < yytshifts) {
                                cact = CUNIQUE;
#ifdef DEBUG
                                Eprintf("Unique symbol %s%s\n"
                                        , charname(cchar , 0 )
                                        , charname(cchar , 1 ));
#endif
                                /*
                                 * Note that the inserted symbol
                                 * will not be counted as a true input
                                 * (i.e. the "yytshifts--" below)
                                 * so that a true shift will be needed
                                 * to make yytshifts > yyTshifts.
                                 */
                                yyTshifts = yytshifts;
                        }
                }
        }

        /*
         * Set up to perform the correction.
         * Build a token appropriate for replacement
         * or insertion in the yytok structure ACchar
         * having the attributes of the input at the
         * point of error.
         */
        copy(&ACtok, &YC[0], sizeof ACtok);
        acchar = cchar;
        aclval = nullsem(acchar);
        if (aclval != NIL)
                recovered();
        switch (cact) {
                /*
                 * Panic, just restore the
                 * lookahead and return.
                 */
                case CPANIC:
                        setpfx('E');
                        if (idfail) {
                                copy(&Y, &OY, sizeof Y);
                                if (yyrhave == NIL) {
#ifdef PI
                                        if (yybaduse(yypv[0], yyeline, ISUNDEF) == NIL)
#endif
                                                yerror("Undefined identifier");
                                } else {
                                        yerror("Improper %s identifier", classes[yyrhave]);
#ifdef PI
                                        yybaduse(yypv[0], yyeline, NIL);
#endif
                                }
                                /*
                                 * Suppress message from panic routine
                                 */
                                yyshifts = 1;
                        }
                        i = 0;
                        /* Note that on one path we dont touch yyshifts ! */
                        break;
                /*
                 * Delete the input.
                 * Mark this as a shift over true input.
                 * Restore the lookahead starting at
                 * the second token.
                 */
                case CDELETE:
                        if (ccost != 0)
                                yerror("Deleted %s%s", tokname(&YC[0] , 0 )
                                                     , tokname(&YC[0] , 1 ));
                        yytshifts++;
                        i = 1;
                        yyshifts = 0;
                        break;
                /*
                 * Replace the input with a new token.
                 */
                case CREPLACE:
                        if (acchar == YEOF)
                                yyexeof();
                        if (acchar == YEND)
                                aclval = NIL;
                        yerror("Replaced %s%s with a %s%s",
                            tokname(&YC[0] , 0 ),
                            tokname(&YC[0] , 1 ),
                            tokname(&ACtok , 0 ),
                            tokname(&ACtok , 1 ));
                        copy(&YC[0], &ACtok, sizeof YC[0]);
                        i = 0;
                        yyshifts = 0;
                        break;
                /*
                 * Insert a token.
                 * Don't count this token as a true input shift.
                 * For inserted "end"s pas.y is responsible
                 * for the error message later so suppress it.
                 * Restore all the lookahead.
                 */
                case CINSERT:
                        if (acchar == YEOF)
                                yyexeof();
                        if (acchar != YEND)
                                yerror("Inserted %s%s",
                                        tokname(&ACtok , 0 ),
                                        tokname(&ACtok , 1 ));
                        yytshifts--;
                        i = 0;
                        yyshifts = 0;
                        break;
                /*
                 * Make a unique symbol correction.
                 * Like an insertion but a different message.
                 */
                case CUNIQUE:
                        setpfx('E');
                        yerror("Expected %s%s",
                                tokname(&ACtok , 0 ),
                                tokname(&ACtok , 1 ));
                        yytshifts--;
                        i = 0;
                        if (ccost == 0 || yyunique)
                                yyshifts = 0;
                        else
                                yyshifts = -1;
                        break;
                /*
                 * Change an identifier's type
                 * to make it work.
                 */
                case CCHIDENT:
                        copy(&Y, &OY, sizeof Y);
#ifdef PI
                        i = 1 << yyrwant;
#endif
                        if (yyrhave == NIL) {
                                yerror("Undefined %s", classes[yyrwant]);
#ifdef PI
                                i =| ISUNDEF;
#endif
                        } else
                                yerror("Replaced %s id with a %s id", classes[yyrhave], classes[yyrwant]);
#ifdef PI
                        yybaduse(yypv[0], yyeline, i);
#endif
                        yypv[0] = nullsem(YID);
                        i = 0;
                        yyshifts = 0;
                        break;
        }

        /*
         * Restore the desired portion of the lookahead,
         * and possibly the inserted or unique inserted token.
         */
        for (yCcnt--; yCcnt >= i; yCcnt--)
                unyylex(&YC[yCcnt]);
        if (cact == CINSERT || cact == CUNIQUE)
                unyylex(&ACtok);

        /*
         * Put the scanner back in sync.
         */
        yychar = yylex();

        /*
         * We succeeded if we didn't "panic".
         */
        Recovery = 0;
        Ps = Ps0;
        return (cact != CPANIC);
}

yyexeof()
{

        yerror("End-of-file expected - QUIT");
        pexit(ERRS);
}

yyunexeof()
{

        yerror("Unexpected end-of-file - QUIT");
        pexit(ERRS);
}
\f
/*
 * Try corrections with the state at Ps0.
 * Flag is 0 if this is the top of stack state,
 * 1 if it is the state below.
 */
trystate(Ps0, Pv0, flag, insmult, delmult, repmult)
        int *Ps0, *Pv0, flag;
        char *insmult, *delmult, *repmult;
{
        /*
         * C is a working cost, ap a pointer into the action
         * table for looking at feasible alternatives.
         */
        register int c, *ap;
        int i, *actions;

#ifdef DEBUG
        Eprintf("Trying state %d\n", *Ps0);
#endif
        /*
         * Try deletion.
         * Correct returns a cost.
         */
#ifdef DEBUG
        Tprintf("  Try Delete %s%s cost=%d\n",
                tokname(&YC[0] , 0 ),
                tokname(&YC[0] , 1 ),
                delcost(YC[0].Yychar));
#endif
        c = delcost(YC[0].Yychar);
#ifndef DEBUG
        if (c < ccost) {
#endif
                c = correct(NOCHAR, 1, c, delmult, Ps0, Pv0);
#ifdef DEBUG
                if (c < CPRLIMIT || fulltrace)
                        Eprintf("Cost %2d Delete %s%s\n", c,
                                tokname(&YC[0] , 0 ),
                                tokname(&YC[0] , 1 ));
#endif
                if (c < ccost)
                        cact = CDELETE, ccost = c, cflag = flag;
#ifndef DEBUG
        }
#endif

        /*
         * Look at the inputs to this state
         * which will cause parse action shift.
         */
        aclval = NIL;
        ap = &yyact[yypact[*Ps0 + 1]];

        /*
         * Skip action on error to
         * detect true unique inputs.
         * Error action is always first.
         */
        if (*ap == -ERROR)
                ap=+ 2;

        /*
         * Loop through the test actions
         * for this state.
         */
        for (actions = ap; *ap <= 0; ap =+ 2) {
                /*
                 * Extract the token of this action
                 */
                acchar = -*ap;

                /*
                 * Try insertion
                 */
#ifdef DEBUG
                Tprintf("  Try Insert %s%s cost=%d\n"
                        , charname(acchar , 0 )
                        , charname(acchar , 1 )
                        , inscost(acchar));
#endif
                c = inscost(acchar, YC[0].Yychar);
#ifndef DEBUG
                if (c < ccost) {
#endif
                        if (c == 0) {
                                c = correct(acchar, 0, 1, insmult + 1, Ps0, Pv0);
#ifdef DEBUG
                                Eprintf("Cost %2d Freebie %s%s\n", c
                                        , charname(acchar , 0 )
                                        , charname(acchar , 1 ));
#endif
                                if (c < ccost)
                                        cact = CUNIQUE, ccost = 0, cchar = acchar, cflag = flag;
                        } else {
                                c = correct(acchar, 0, c, insmult, Ps0, Pv0);
#ifdef DEBUG
                                if (c < CPRLIMIT || fulltrace)
                                        Eprintf("Cost %2d Insert %s%s\n", c
                                                , charname(acchar , 0 )
                                                , charname(acchar , 1 ));
#endif
                                if (c < ccost)
                                        cact = CINSERT, ccost = c, cchar = acchar, cflag = flag;
                        }
#ifndef DEBUG
                }
#endif

                /*
                 * Try replacement
                 */
#ifdef DEBUG
                Tprintf("  Try Replace %s%s with %s%s cost=%d\n",
                    tokname(&YC[0] , 0 ),
                    tokname(&YC[0] , 1 ),
                    charname(acchar , 0 ),
                    charname(acchar , 1 ),
                    repcost(YC[0].Yychar, acchar));
#endif
                c = repcost(YC[0].Yychar, acchar);
#ifndef DEBUG
                if (c < ccost) {
#endif
                        c = correct(acchar, 1, repcost(YC[0].Yychar, acchar), repmult, Ps0, Pv0);
#ifdef DEBUG
                        if (c < CPRLIMIT || fulltrace)
                                Eprintf("Cost %2d Replace %s%s with %s%s\n",
                                        c,
                                        tokname(&YC[0] , 0 ),
                                        tokname(&YC[0] , 1 ),
                                        tokname(&ACtok , 0 ),
                                        tokname(&ACtok , 1 ));
#endif
                        if (c < ccost)
                                cact = CREPLACE, ccost = c, cchar = acchar, cflag = flag;
#ifndef DEBUG
                }
#endif
        }
}
\f
int     *yCpv;
char    yyredfail;

/*
 * The ntok structure is used to build a
 * scanner structure for tokens inserted
 * from the argument "fchar" to "correct" below.
 */
static  struct yytok ntok;

/*
 * Compute the cost of a correction
 * C is the base cost for it.
 * Fchar is the first input character from
 * the current state, NOCHAR if none.
 * The rest of the inputs come from the array
 * YC, starting at origin and continuing to the
 * last character there, YC[yCcnt - 1].Yychar.
 *
 * The cost returned is INFINITE if this correction
 * allows no shifts, otherwise is weighted based
 * on the number of shifts this allows against the
 * maximum number possible with the available lookahead.
 */
correct(fchar, origin, c, multvec, Ps0, Pv0)
        register int fchar, c;
        int origin;
        char *multvec;
        int *Ps0, *Pv0;
{
        register char *mv;

        /*
         * Ps is the top of the parse stack after the most
         * recent local correctness check.  Loccor returns
         * NIL when we cannot shift.
         */
        register int *ps;

        yyredfail = 0;
        /*
         * Initialize the tip parse and semantic stacks.
         */
        ps = Ps0;
        yytips[0] = *ps;
        ps--;
        yytipv[0] = Pv0[0];
        yCpv = Pv0 - 1;
        yytipct = 1;

        /*
         * Shift while possible.
         * Adjust cost as necessary.
         */
        mv = multvec;
        do {
                if (fchar != NOCHAR) {
                        copy(&ntok, &YC[0], sizeof ntok);
                        ntok.Yychar = fchar, ntok.Yylval = nullsem(fchar);
                        fchar = NOCHAR;
                        ps = loccor(ps, &ntok);
                } else
                        ps = loccor(ps, &YC[origin++]);
                if (ps == NIL) {
                        if (yyredfail && mv > multvec)
                                mv--;
                        c =* *mv;
                        break;
                }
                mv++;
        } while (*mv != 1);
        return (c);
}
\f
extern  int yygo[], yypgo[], yyr1[], yyr2[];
/*
 * Local syntactic correctness check.
 * The arguments to this routine are a
 * top of stack pointer, ps, and an input
 * token tok.  Also, implicitly, the contents
 * of the yytips array which contains the tip
 * of the stack, and into which the new top
 * state on the stack will be placed if we shift.
 *
 * If we succeed, we return a new top of stack
 * pointer, else we return NIL.
 */
loccor(ps, ntok)
        int *ps;
        struct yytok *ntok;
{
        register int *p, n;
        register int nchar;
        int i;

        if (ps == NIL)
                return (NIL);
        nchar = ntok->Yychar;
        yyeline = ntok->Yyeline;
#ifdef DEBUG
        Tprintf("    Stack ");
        for (i = yytipct - 1; i >= 0; i--)
                Tprintf("%d ", yytips[i]);
        Tprintf("| %d, Input %s%s\n", *ps
                , charname(nchar , 0 )
                , charname(nchar , 1 ));
#endif
        /*
         * As in the yacc parser yyparse,
         * p traces through the action list
         * and "n" is the information associated
         * with the action.
         */
newstate:
        p = &yyact[ yypact[yytips[yytipct - 1]+1] ];

actn:
        /*
         * Search the parse actions table
         * for something useful to do.
         * While n is non-positive, it is the
         * arithmetic inverse of the token to be tested.
         * This allows a fast check.
         */
        while ((n = *p++) <= 0)
                if ((n =+ nchar) != 0)
                        p++;
        switch (n >> 12) {
                /*
                 * SHIFT
                 */
                case 2:
                        n =& 07777;
                        yyredfail = 0;
                        if (nchar == YID)
                                yyredfail++;
                        if (yytipct == YYTIPSIZ) {
tipover:
#ifdef DEBUG
                                Tprintf("\tTIP OVFLO\n");
#endif
                                return (NIL);
                        }
                        yytips[yytipct] = n;
                        yytipv[yytipct] = ntok->Yylval;
                        yytipct++;
#ifdef DEBUG
                        Tprintf("\tShift to state %d\n", n);
#endif
                        return (ps);
                /*
                 * REDUCE
                 */
                case 3:
                        n =& 07777;
                        if (yyEactr(n, yytipv[yytipct - 1]) == 0) {
#ifdef DEBUG
                                Tprintf("\tYyEactr objects: have %s id, want %s id\n", classes[yyidhave], classes[yyidwant]);
#endif
                                return (NIL);
                        }
                        yyredfail = 0;
                        i = yyr2[n];
#ifdef DEBUG
                        Tprintf("\tReduce, length %d,", i);
#endif
                        if (i > yytipct) {
                                i =- yytipct;
                                yytipct = 0;
                                ps =- i;
                                yCpv =- i;
                        } else
                                yytipct =- i;
                        if (yytipct >= YYTIPSIZ)
                                goto tipover;
                        /*
                         * Use goto table to find next state
                         */
                        p = &yygo[yypgo[yyr1[n]]];
                        i = yytipct ? yytips[yytipct - 1] : *ps;
                        while (*p != i && *p >= 0)
                                p =+ 2;
#ifdef DEBUG
                        Tprintf(" new state %d\n", p[1]);
#endif
                        yytips[yytipct] = p[1];
                        yytipct++;
                        goto newstate;
                /*
                 * ACCEPT
                 */
                case 4:
#ifdef DEBUG
                        Tprintf("\tAccept\n");
#endif
                        return (ps);
                /*
                 * ERROR
                 */
                case 1:
#ifdef DEBUG
                        Tprintf("\tError\n");
#endif
                        return (0);
        }
        panic("loccor");
}