/* Copyright (c) 1979 Regents of the University of California */
static char sccsid
[] = "@(#)yyrecover.c 1.1 8/27/80";
* Very simplified version of Graham-Rhodes error recovery
* method for LALR parsers. Backward move is embodied in
* default reductions of the yacc parser until an error condition
* is reached. Forward move is over a small number of input tokens
* and cannot "condense". The basic corrections are:
* 1) Delete the input token.
* 2) Replace the current input with a legal input.
* 3) Insert a legal token.
* All corrections are weighted, considered only if they allow
* at least two shifts, and the cost of a correction increases if
* it allows shifting over only a part of the lookahead.
* Another error situation is that which occurs when an identifier "fails"
* a reduction because it is not the required "class".
* In this case, we also consider replacing this identifier, which has
* already been shifted over, with an identifier of the correct class.
* Another correction performed here is unique symbol insertion.
* If the current state admits only one input, and no other alternative
* correction presents itself, then that symbol will be inserted.
* There is a danger in this of looping, and it is handled
* by counting true shifts over input (see below).
* A final class of corrections, considered only when the error
* occurred immediately after a shift over a terminal, involves
* the three basic corrections above, but with the point of error
* considered to be before this terminal was shifted over, effectively
* "unreading" this terminal. This is a feeble attempt at elimination
* of the left-right bias and because "if" has a low weight and some
* statements are quite simple i.e.
* we can get a small number of errors. The major deficiency of
* this is that we back up only one token, and that the forward
* move is over a small number of tokens, often not enough to really
* tell what the input should be, e.g. in
* In such cases a bad identifier (misspelled keyword) or omitted
* keyword will be change or inserted as "if" as it has the lowest cost.
* This is not terribly bad, as "if"s are most common.
* This also allows the correction of other errors.
* This recovery depends on the default reductions which delay
* noticing the error until the parse reaches a state where the
* relevant "alternatives" are visible. Note that it does not
* consider tokens which will cause reductions before being
* shifted over. This requires the grammar to be written in a
* certain way for the recovery to work correctly.
* In some sense, also, the recovery suffers because we have
* LALR(1) tables rather than LR(1) tables, e.g. in
* if rec.field < rec2,field2 then
* Definitions of possible corrective actions
* Multiplicative cost factors for corrective actions.
* When an error occurs we take YCSIZ - 1 look-ahead tokens.
* If a correction being considered will shift over only part of
* that look-ahead, it is not completely discarded, but rather
* "weighted", its cost being multiplied by a weighting factor.
* For a correction to be considered its weighted cost must be less
* Non-weighted costs are considered:
* CURRENT WEIGHTING STRATEGY: Aug 20, 1977
* For all kinds of corrections we demand shifts over two symbols.
* Corrections have high weight even after two symbol
* shifts because the costs for deleting and inserting symbols are actually
* quite low; we do not want to change weighty symbols
* on inconclusive evidence.
* The weights are the same after the third look ahead.
* This prevents later, unrelated errors from causing "funny"
* biases of the weights toward one type of correction.
* Current look ahead is 5 symbols.
/*** CLIMIT is defined in yy.h for yycosts ***/
char insmult
[8] = {INFINITY
, INFINITY
, INFINITY
, 15, 8, 6, 3, 1};
char repmult
[7] = {INFINITY
, INFINITY
, INFINITY
, 8, 6, 3, 1};
char delmult
[6] = {INFINITY
, INFINITY
, INFINITY
, 6, 3, 1};
#define Eprintf if (errtrace) printf
#define Tprintf if (testtrace) printf
* Action arrays of the parser needed here
int yyact
[], yypact
[], *yypv
;
* Yytips is the tip of the stack when using
* the function loccor to check for local
* syntactic correctness. As we don't want
* to copy the whole parser stack, but want
* to simulate parser moves, we "split"
* the parser stack and keep the tip here.
int yytips
[YYTIPSIZ
], yytipct
;
* The array YC saves the lookahead tokens for the
* Yccnt is the number of tokens in the YC array.
struct yytok YC0
[YCSIZ
+ 1];
* YCps gives the top of stack at
STATIC
unsigned yyTshifts
;
* Cact is the corrective action we have decided on
* so far, ccost its cost, and cchar the associated token.
* Cflag tells if the correction is over the previous input token.
int cact
, ccost
, cchar
, cflag
;
* ACtok holds the token under
* consideration when examining
* the lookaheads in a state.
#define acchar ACtok.Yychar
#define aclval ACtok.Yylval
* Make a correction to the current stack which has
* top of stack pointer Ps.
yerror("Point of error");
printf("States %d %d ...", Ps0
[0], Ps0
[-1]);
printf("Input %s%s", tokname(&Y
, 0)
* We first save the current input token
* and its associated semantic information.
copy(&YC
[0], &Y
, sizeof Y
);
* Set the default action and cost
cact
= CPANIC
, ccost
= CLIMIT
, cflag
= 0;
for (yCcnt
= 1; yCcnt
< YCSIZ
; yCcnt
++) {
copy(&YC
[yCcnt
], &Y
, sizeof YC
[0]);
Eprintf(" | %s%s", tokname(&YC
[yCcnt
] , 0 )
, tokname(&YC
[yCcnt
] , 1 ));
* If we are here because a reduction failed, try
* Save the particulars about
* the kind of identifier we want/have.
Tprintf(" Try Replace %s identifier with %s identifier cost=%d\n",
classes
[yyidhave
], classes
[yyidwant
], CCHIDCOST
);
* Save the semantics of the ID on the
* stack, and null them out to free
* up the reduction in question.
c
= correct(NOCHAR
, 0, CCHIDCOST
, &repmult
[2], Ps0
, yypv
);
if (c
< CPRLIMIT
|| fulltrace
)
Eprintf("Cost %2d Replace %s identifier with %s identifier\n", c
, classes
[yyrhave
], classes
[yyrwant
]);
cact
= CCHIDENT
, ccost
= c
, cchar
= YID
;
* First try correcting the state we are in
trystate(Ps0
, yypv
, 0, &insmult
[1], &delmult
[1], &repmult
[1]);
* Now, if we just shifted over a terminal, try
if (OY
.Yychar
!= -1 && OY
.Yylval
!= nullsem(OY
.Yychar
)) {
copy(&YC
[0], &OY
, sizeof YC
[0]);
trystate(Ps0
- 1, yypv
- 1, 1, insmult
, delmult
, repmult
);
* Restoring the first look ahead into
* the scanner token allows the error message
* routine to print the error message with the text
copy(&Y
, &YC
[0], sizeof Y
);
* Unique symbol insertion.
* If there was no reasonable correction found,
* but only one input to the parser is acceptable
* we report that, and try it.
* Special precautions here to prevent looping.
* The number of true inputs shifted over at the point
* of the last unique insertion is recorded in the
* variable yyTshifts. If this is not less than
* the current number in yytshifts, we do not insert.
* Thus, after one unique insertion, no more unique
* insertions will be made until an input is shifted
* over. This guarantees termination.
if (cact
== CPANIC
&& !idfail
) {
ap
= &yyact
[yypact
[*Ps0
+ 1]];
if (ap
[0] <= 0 && ap
[2] > 0) {
if (cchar
!= ERROR
&& yyTshifts
< yytshifts
) {
Eprintf("Unique symbol %s%s\n"
* Note that the inserted symbol
* will not be counted as a true input
* (i.e. the "yytshifts--" below)
* so that a true shift will be needed
* to make yytshifts > yyTshifts.
* Set up to perform the correction.
* Build a token appropriate for replacement
* or insertion in the yytok structure ACchar
* having the attributes of the input at the
copy(&ACtok
, &YC
[0], sizeof ACtok
);
aclval
= nullsem(acchar
);
* Panic, just restore the
if (yybaduse(yypv
[0], yyeline
, ISUNDEF
) == NIL
)
yerror("Undefined identifier");
yerror("Improper %s identifier", classes
[yyrhave
]);
yybaduse(yypv
[0], yyeline
, NIL
);
* Suppress message from panic routine
/* Note that on one path we dont touch yyshifts ! */
* Mark this as a shift over true input.
* Restore the lookahead starting at
yerror("Deleted %s%s", tokname(&YC
[0] , 0 )
* Replace the input with a new token.
yerror("Replaced %s%s with a %s%s",
copy(&YC
[0], &ACtok
, sizeof YC
[0]);
* Don't count this token as a true input shift.
* For inserted "end"s pas.y is responsible
* for the error message later so suppress it.
* Restore all the lookahead.
* Make a unique symbol correction.
* Like an insertion but a different message.
if (ccost
== 0 || yyunique
)
* Change an identifier's type
yerror("Undefined %s", classes
[yyrwant
]);
yerror("Replaced %s id with a %s id", classes
[yyrhave
], classes
[yyrwant
]);
yybaduse(yypv
[0], yyeline
, i
);
* Restore the desired portion of the lookahead,
* and possibly the inserted or unique inserted token.
for (yCcnt
--; yCcnt
>= i
; yCcnt
--)
if (cact
== CINSERT
|| cact
== CUNIQUE
)
* Put the scanner back in sync.
* We succeeded if we didn't "panic".
yerror("End-of-file expected - QUIT");
yerror("Unexpected end-of-file - QUIT");
* Try corrections with the state at Ps0.
* Flag is 0 if this is the top of stack state,
* 1 if it is the state below.
trystate(Ps0
, Pv0
, flag
, insmult
, delmult
, repmult
)
char *insmult
, *delmult
, *repmult
;
* C is a working cost, ap a pointer into the action
* table for looking at feasible alternatives.
Eprintf("Trying state %d\n", *Ps0
);
* Correct returns a cost.
Tprintf(" Try Delete %s%s cost=%d\n",
c
= delcost(YC
[0].Yychar
);
c
= correct(NOCHAR
, 1, c
, delmult
, Ps0
, Pv0
);
if (c
< CPRLIMIT
|| fulltrace
)
Eprintf("Cost %2d Delete %s%s\n", c
,
cact
= CDELETE
, ccost
= c
, cflag
= flag
;
* Look at the inputs to this state
* which will cause parse action shift.
ap
= &yyact
[yypact
[*Ps0
+ 1]];
* Skip action on error to
* detect true unique inputs.
* Error action is always first.
* Loop through the test actions
for (actions
= ap
; *ap
<= 0; ap
=+ 2) {
* Extract the token of this action
Tprintf(" Try Insert %s%s cost=%d\n"
c
= inscost(acchar
, YC
[0].Yychar
);
c
= correct(acchar
, 0, 1, insmult
+ 1, Ps0
, Pv0
);
Eprintf("Cost %2d Freebie %s%s\n", c
, charname(acchar
, 1 ));
cact
= CUNIQUE
, ccost
= 0, cchar
= acchar
, cflag
= flag
;
c
= correct(acchar
, 0, c
, insmult
, Ps0
, Pv0
);
if (c
< CPRLIMIT
|| fulltrace
)
Eprintf("Cost %2d Insert %s%s\n", c
, charname(acchar
, 1 ));
cact
= CINSERT
, ccost
= c
, cchar
= acchar
, cflag
= flag
;
Tprintf(" Try Replace %s%s with %s%s cost=%d\n",
repcost(YC
[0].Yychar
, acchar
));
c
= repcost(YC
[0].Yychar
, acchar
);
c
= correct(acchar
, 1, repcost(YC
[0].Yychar
, acchar
), repmult
, Ps0
, Pv0
);
if (c
< CPRLIMIT
|| fulltrace
)
Eprintf("Cost %2d Replace %s%s with %s%s\n",
cact
= CREPLACE
, ccost
= c
, cchar
= acchar
, cflag
= flag
;
* The ntok structure is used to build a
* scanner structure for tokens inserted
* from the argument "fchar" to "correct" below.
static struct yytok ntok
;
* Compute the cost of a correction
* C is the base cost for it.
* Fchar is the first input character from
* the current state, NOCHAR if none.
* The rest of the inputs come from the array
* YC, starting at origin and continuing to the
* last character there, YC[yCcnt - 1].Yychar.
* The cost returned is INFINITE if this correction
* allows no shifts, otherwise is weighted based
* on the number of shifts this allows against the
* maximum number possible with the available lookahead.
correct(fchar
, origin
, c
, multvec
, Ps0
, Pv0
)
* Ps is the top of the parse stack after the most
* recent local correctness check. Loccor returns
* NIL when we cannot shift.
* Initialize the tip parse and semantic stacks.
* Adjust cost as necessary.
copy(&ntok
, &YC
[0], sizeof ntok
);
ntok
.Yychar
= fchar
, ntok
.Yylval
= nullsem(fchar
);
ps
= loccor(ps
, &YC
[origin
++]);
if (yyredfail
&& mv
> multvec
)
extern int yygo
[], yypgo
[], yyr1
[], yyr2
[];
* Local syntactic correctness check.
* The arguments to this routine are a
* top of stack pointer, ps, and an input
* token tok. Also, implicitly, the contents
* of the yytips array which contains the tip
* of the stack, and into which the new top
* state on the stack will be placed if we shift.
* If we succeed, we return a new top of stack
* pointer, else we return NIL.
for (i
= yytipct
- 1; i
>= 0; i
--)
Tprintf("%d ", yytips
[i
]);
Tprintf("| %d, Input %s%s\n", *ps
* As in the yacc parser yyparse,
* p traces through the action list
* and "n" is the information associated
p
= &yyact
[ yypact
[yytips
[yytipct
- 1]+1] ];
* Search the parse actions table
* for something useful to do.
* While n is non-positive, it is the
* arithmetic inverse of the token to be tested.
* This allows a fast check.
if (yytipct
== YYTIPSIZ
) {
Tprintf("\tTIP OVFLO\n");
yytipv
[yytipct
] = ntok
->Yylval
;
Tprintf("\tShift to state %d\n", n
);
if (yyEactr(n
, yytipv
[yytipct
- 1]) == 0) {
Tprintf("\tYyEactr objects: have %s id, want %s id\n", classes
[yyidhave
], classes
[yyidwant
]);
Tprintf("\tReduce, length %d,", i
);
* Use goto table to find next state
p
= &yygo
[yypgo
[yyr1
[n
]]];
i
= yytipct
? yytips
[yytipct
- 1] : *ps
;
while (*p
!= i
&& *p
>= 0)
Tprintf(" new state %d\n", p
[1]);