* Copyright (c) 1991 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* %sccs.include.redist.c%
static char sccsid
[] = "@(#)parser.c 5.9 (Berkeley) %G%";
#include "expand.h" /* defines rmescapes() */
#include "redir.h" /* defines copyfd() */
/* values returned by readtoken */
struct heredoc
*next
; /* next here document in list */
union node
*here
; /* redirection node */
char *eofmark
; /* string indicating end of input */
int striptabs
; /* if set, strip leading tabs */
struct heredoc
*heredoclist
; /* list of here documents to read */
int parsebackquote
; /* nonzero if we are inside backquotes */
int doprompt
; /* if set, prompt the user */
int needprompt
; /* true if interactive and at start of line */
int lasttoken
; /* last token read */
MKINIT
int tokpushback
; /* last token pushed back */
char *wordtext
; /* text of last word returned by readtoken */
MKINIT
int checkkwd
; /* 1 == check for kwds, 2 == also eat newlines */
struct nodelist
*backquotelist
;
int quoteflag
; /* set if (part of) last token was quoted */
int startlinno
; /* line # where last token started */
#define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
static const char argvars
[5] = {CTLVAR
, VSNORMAL
|VSQUOTE
, '@', '=', '\0'};
static const char types
[] = "}-+?=";
STATIC
union node
*list
__P((int));
STATIC
union node
*andor
__P((void));
STATIC
union node
*pipeline
__P((void));
STATIC
union node
*command
__P((void));
STATIC
union node
*simplecmd
__P((void));
STATIC
void parsefname
__P((void));
STATIC
void parseheredoc
__P((void));
STATIC
int readtoken
__P((void));
STATIC
int readtoken1
__P((int, char const *, char *, int));
STATIC
void attyline
__P((void));
STATIC
int noexpand
__P((char *));
STATIC
void synexpect
__P((int));
STATIC
void synerror
__P((char *));
STATIC
void setprompt
__P((int));
* Read and parse a command. Returns NEOF on end of file. (NULL is a
* valid parse tree indicating a blank line.)
union node
*n1
, *n2
, *n3
;
if (nlflag
== 0 && tokendlist
[peektoken()])
if (n1
->type
== NCMD
|| n1
->type
== NPIPE
) {
} else if (n1
->type
== NREDIR
) {
n3
= (union node
*)stalloc(sizeof (struct nredir
));
n3
->nredir
.redirect
= NULL
;
if (readtoken() == TNL
) {
if (tokendlist
[peektoken()])
n3
= (union node
*)stalloc(sizeof (struct nbinary
));
pungetc(); /* push back EOF on input */
union node
*n1
, *n2
, *n3
;
if ((t
= readtoken()) == TAND
) {
n3
= (union node
*)stalloc(sizeof (struct nbinary
));
union node
*n1
, *pipenode
, *notnode
;
struct nodelist
*lp
, *prev
;
TRACE(("pipeline: entered\n"));
while (readtoken() == TNOT
) {
TRACE(("pipeline: TNOT recognized\n"));
if (readtoken() == TPIPE
) {
pipenode
= (union node
*)stalloc(sizeof (struct npipe
));
pipenode
->npipe
.backgnd
= 0;
lp
= (struct nodelist
*)stalloc(sizeof (struct nodelist
));
pipenode
->npipe
.cmdlist
= lp
;
lp
= (struct nodelist
*)stalloc(sizeof (struct nodelist
));
} while (readtoken() == TPIPE
);
notnode
= (union node
*)stalloc(sizeof (struct nnot
));
union node
*redir
, **rpp
;
n1
= (union node
*)stalloc(sizeof (struct nif
));
if (readtoken() != TTHEN
)
n1
->nif
.ifpart
= list(0);
while (readtoken() == TELIF
) {
n2
->nif
.elsepart
= (union node
*)stalloc(sizeof (struct nif
));
if (readtoken() != TTHEN
)
n2
->nif
.ifpart
= list(0);
n2
->nif
.elsepart
= list(0);
n1
= (union node
*)stalloc(sizeof (struct nbinary
));
n1
->type
= (lasttoken
== TWHILE
)? NWHILE
: NUNTIL
;
n1
->nbinary
.ch1
= list(0);
if ((got
=readtoken()) != TDO
) {
TRACE(("expecting DO got %s %s\n", tokname
[got
], got
== TWORD
? wordtext
: ""));
n1
->nbinary
.ch2
= list(0);
if (readtoken() != TDONE
)
if (readtoken() != TWORD
|| quoteflag
|| ! goodname(wordtext
))
synerror("Bad for loop variable");
n1
= (union node
*)stalloc(sizeof (struct nfor
));
if (readtoken() == TWORD
&& ! quoteflag
&& equal(wordtext
, "in")) {
while (readtoken() == TWORD
) {
n2
= (union node
*)stalloc(sizeof (struct narg
));
n2
->narg
.text
= wordtext
;
n2
->narg
.backquote
= backquotelist
;
static const char argvars
[5] = {CTLVAR
, VSNORMAL
|VSQUOTE
,
n2
= (union node
*)stalloc(sizeof (struct narg
));
n2
->narg
.text
= (char *)argvars
;
n2
->narg
.backquote
= NULL
;
if (lasttoken
!= TNL
&& lasttoken
!= TSEMI
)
if ((t
= readtoken()) == TDO
)
n1
= (union node
*)stalloc(sizeof (struct ncase
));
if (readtoken() != TWORD
)
n1
->ncase
.expr
= n2
= (union node
*)stalloc(sizeof (struct narg
));
n2
->narg
.text
= wordtext
;
n2
->narg
.backquote
= backquotelist
;
while (readtoken() == TNL
);
if (lasttoken
!= TWORD
|| ! equal(wordtext
, "in"))
synerror("expecting \"in\"");
while (checkkwd
= 2, readtoken() == TWORD
) {
*cpp
= cp
= (union node
*)stalloc(sizeof (struct nclist
));
app
= &cp
->nclist
.pattern
;
*app
= ap
= (union node
*)stalloc(sizeof (struct narg
));
ap
->narg
.text
= wordtext
;
ap
->narg
.backquote
= backquotelist
;
if (readtoken() != TPIPE
)
if (readtoken() != TWORD
)
cp
->nclist
.body
= list(0);
if ((t
= readtoken()) == TESAC
)
n1
= (union node
*)stalloc(sizeof (struct nredir
));
n1
->nredir
.redirect
= NULL
;
/* Now check for redirection which may follow command */
while (readtoken() == TREDIR
) {
if (n1
->type
!= NSUBSHELL
) {
n2
= (union node
*)stalloc(sizeof (struct nredir
));
n1
->nredir
.redirect
= redir
;
union node
*redir
, **rpp
;
if (readtoken() == TWORD
) {
n
= (union node
*)stalloc(sizeof (struct narg
));
n
->narg
.backquote
= backquotelist
;
} else if (lasttoken
== TREDIR
) {
parsefname(); /* read name of redirection file */
} else if (lasttoken
== TLP
&& app
== &args
->narg
.next
if (! goodname(n
->narg
.text
))
synerror("Bad function name");
n
->narg
.next
= command();
n
= (union node
*)stalloc(sizeof (struct ncmd
));
n
->ncmd
.redirect
= redir
;
union node
*n
= redirnode
;
if (readtoken() != TWORD
)
struct heredoc
*here
= heredoc
;
TRACE(("Here document %d\n", n
->type
));
while (*wordtext
== '\t')
if (! noexpand(wordtext
) || (i
= strlen(wordtext
)) == 0 || i
> EOFMARKLEN
)
synerror("Illegal eof marker for << redirection");
here
->eofmark
= wordtext
;
for (p
= heredoclist
; p
->next
; p
= p
->next
);
} else if (n
->type
== NTOFD
|| n
->type
== NFROMFD
) {
if (is_digit(wordtext
[0]))
n
->ndup
.dupfd
= digit_val(wordtext
[0]);
else if (wordtext
[0] == '-')
if (wordtext
[1] != '\0') {
synerror("Bad fd number");
n
->nfile
.fname
= (union node
*)stalloc(sizeof (struct narg
));
n
->narg
.backquote
= backquotelist
;
* Input any here documents.
heredoclist
= here
->next
;
readtoken1(pgetc(), here
->here
->type
== NHERE
? SQSYNTAX
: DQSYNTAX
,
here
->eofmark
, here
->striptabs
);
n
= (union node
*)stalloc(sizeof (struct narg
));
n
->narg
.backquote
= backquotelist
;
here
->here
->nhere
.doc
= n
;
STATIC
int xxreadtoken();
int savecheckkwd
= checkkwd
;
int alreadyseen
= tokpushback
;
* check for keywords and aliases
if (t
== TWORD
&& !quoteflag
) {
register char * const *pp
, *s
;
for (pp
= parsekwd
; *pp
; pp
++) {
if (**pp
== *wordtext
&& equal(*pp
, wordtext
)) {
lasttoken
= t
= pp
- parsekwd
+ KWDOFFSET
;
TRACE(("keyword %s recognized\n", tokname
[t
]));
if (ap
= lookupalias(wordtext
, 1)) {
pushstring(ap
->val
, strlen(ap
->val
), ap
);
TRACE(("token %s %s\n", tokname
[t
], t
== TWORD
? wordtext
: ""));
TRACE(("reread token %s %s\n", tokname
[t
], t
== TWORD
? wordtext
: ""));
* Read the next input token.
* If the token is a word, we set backquotelist to the list of cmds in
* backquotes. We set quoteflag to true if any part of the word was
* If the token is TREDIR, then we set redirnode to a structure containing
* In all cases, the variable startlinno is set to the number of the line
* on which the token starts.
* [Change comment: here documents and internal procedures]
* [Readtoken shouldn't have any arguments. Perhaps we should make the
* word parsing code into a separate routine. In this case, readtoken
* doesn't need to have any internal procedures, but parseword does.
* We could also make parseoperator in essence the main routine, and
* have parseword (readtoken1?) handle both words and redirection.]
#define RETURN(token) return lasttoken = token
for (;;) { /* until token or start of word found */
if (c
== ' ' || c
== '\t')
continue; /* quick check for white space first */
while ((c
= pgetc()) != '\n' && c
!= PEOF
);
return readtoken1(c
, BASESYNTAX
, (char *)NULL
, 0);
* If eofmark is NULL, read a word or a redirection symbol. If eofmark
* is not NULL, read a here document. In the latter case, eofmark is the
* word which marks the end of the document and striptabs is true if
* leading tabs should be stripped from the document. The argument firstc
* is the first character of the input token or document.
* Because C does not have internal subroutines, I have simulated them
* using goto's to implement the subroutine linkage. The following macros
* will run code that appears at the end of readtoken1.
#define CHECKEND() {goto checkend; checkend_return:;}
#define PARSEREDIR() {goto parseredir; parseredir_return:;}
#define PARSESUB() {goto parsesub; parsesub_return:;}
#define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
#define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
#define PARSEARITH() {goto parsearith; parsearith_return:;}
readtoken1(firstc
, syntax
, eofmark
, striptabs
)
char line
[EOFMARKLEN
+ 1];
int varnest
; /* levels of variables expansion */
int arinest
; /* levels of arithmetic expansion */
int parenlevel
; /* levels of parens in arithmetic */
char const *prevsyntax
; /* syntax before arithmetic */
loop
: { /* for each line, until end of word */
if (c
== '\034' && doprompt
&& attyset() && ! equal(termval(), "emacs")) {
if (syntax
== BASESYNTAX
)
CHECKEND(); /* set c to PEOF if at end of here document */
for (;;) { /* until end of line or end of word */
CHECKSTRSPACE(3, out
); /* permit 3 calls to USTPUTC */
if (parsebackquote
&& c
== '\\') {
c
= pgetc(); /* XXX - compat with old /bin/sh */
if (c
!= '\\' && c
!= '`' && c
!= '$') {
if (syntax
== BASESYNTAX
)
goto endword
; /* exit outer loop */
goto loop
; /* continue outer loop */
if (eofmark
== NULL
|| dblquote
)
case CBACK
: /* backslash */
if (dblquote
&& c
!= '\\' && c
!= '`' && c
!= '$'
&& (c
!= '"' || eofmark
!= NULL
))
PARSESUB(); /* parse substitution */
case CLP
: /* '(' in arithmetic */
case CRP
: /* ')' in arithmetic */
* (don't 2nd guess - no error)
if (parsebackquote
&& syntax
== BASESYNTAX
) {
return lasttoken
= TENDBQUOTE
;
goto endword
; /* exit outer loop */
goto endword
; /* exit outer loop */
goto endword
; /* exit outer loop */
synerror("Missing '))'");
if (syntax
!= BASESYNTAX
&& eofmark
== NULL
)
synerror("Unterminated quoted string");
len
= out
- stackblock();
if ((c
== '>' || c
== '<')
&& (*out
== '\0' || is_digit(*out
))) {
return lasttoken
= TREDIR
;
return lasttoken
= TWORD
;
/* end of readtoken routine */
* Check to see whether we are at the end of the here document. When this
* is called, c is set to the first character of the next input line. If
* we are at the end of the here document, this routine sets the c to PEOF.
if (pfgets(line
, sizeof line
) != NULL
) {
for (q
= eofmark
+ 1 ; *q
&& *p
== *q
; p
++, q
++);
if (*p
== '\n' && *q
== '\0') {
pushstring(line
, strlen(line
), NULL
);
* Parse a redirection operator. The variable "out" points to a string
* specifying the fd to be redirected. The variable "c" contains the
* first character of the redirection operator.
np
= (union node
*)stalloc(sizeof (struct nfile
));
if (sizeof (struct nfile
) != sizeof (struct nhere
)) {
np
= (union node
*)stalloc(sizeof (struct nhere
));
heredoc
= (struct heredoc
*)stalloc(sizeof (struct heredoc
));
if ((c
= pgetc()) == '-') {
np
->nfile
.fd
= digit_val(fd
);
* Parse a substitution. At this point, we have read the dollar sign
static const char types
[] = "}-+?=";
if (c
!= '(' && c
!= '{' && !is_name(c
) && !is_special(c
)) {
} else if (c
== '(') { /* $(command) or $((arith)) */
typeloc
= out
- stackblock();
badsub
: synerror("Bad substitution");
subtype
= p
- types
+ VSNORMAL
;
*(stackblock() + typeloc
) = subtype
| flags
;
* Called to parse command substitutions. Newstyle is set if the command
* is enclosed inside $(...); nlpp is a pointer to the head of the linked
* list of commands (passed by reference), and savelen is the number of
* characters on the top of the stack which must be preserved.
struct jmploc
*volatile savehandler
;
savepbq
= parsebackquote
;
if (setjmp(jmploc
.loc
)) {
longjmp(handler
->loc
, 1);
savelen
= out
- stackblock();
bcopy(stackblock(), str
, savelen
);
*nlpp
= (struct nodelist
*)stalloc(sizeof (struct nodelist
));
parsebackquote
= oldstyle
;
t
= oldstyle
? TENDBQUOTE
: TRP
;
while (stackblocksize() <= savelen
)
bcopy(str
, out
, savelen
);
parsebackquote
= savepbq
;
USTPUTC(CTLBACKQ
| CTLQUOTE
, out
);
goto parsebackq_oldreturn
;
goto parsebackq_newreturn
;
* Parse an arithmetic expansion (indicate start of one and set state)
* we collapse embedded arithmetic expansion to
* parenthesis, which should be equivalent
* Returns true if the text contains nothing to expand (no dollar signs
while ((c
= *p
++) != '\0') {
else if (BASESYNTAX
[c
] == CCTL
)
* Return true if the argument is a legal variable name (a letter or
* underscore followed by zero or more letters, underscores, and digits).
* Called when an unexpected token is read during the parse. The argument
* is the token that is expected, or -1 if more than one type of token can
fmtstr(msg
, 64, "%s unexpected (expecting %s)",
tokname
[lasttoken
], tokname
[token
]);
fmtstr(msg
, 64, "%s unexpected", tokname
[lasttoken
]);
outfmt(&errout
, "%s: %d: ", commandname
, startlinno
);
outfmt(&errout
, "Syntax error: %s\n", msg
);
out2str(getprompt(NULL
));
* called by editline -- any expansions to the prompt
return "<internal prompt error>";