* Copyright (c) 1991 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* %sccs.include.redist.c%
static char sccsid
[] = "@(#)parser.c 5.2 (Berkeley) %G%";
#include "expand.h" /* defines rmescapes() */
#include "redir.h" /* defines copyfd() */
/* values returned by readtoken */
struct heredoc
*next
; /* next here document in list */
union node
*here
; /* redirection node */
char *eofmark
; /* string indicating end of input */
int striptabs
; /* if set, strip leading tabs */
struct heredoc
*heredoclist
; /* list of here documents to read */
int parsebackquote
; /* nonzero if we are inside backquotes */
int doprompt
; /* if set, prompt the user */
int needprompt
; /* true if interactive and at start of line */
int lasttoken
; /* last token read */
MKINIT
int tokpushback
; /* last token pushed back */
char *wordtext
; /* text of last word returned by readtoken */
struct nodelist
*backquotelist
;
int quoteflag
; /* set if (part of) last token was quoted */
int startlinno
; /* line # where last token started */
#define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
static const char argvars
[5] = {CTLVAR
, VSNORMAL
|VSQUOTE
, '@', '=', '\0'};
static const char types
[] = "}-+?=";
STATIC
union node
*list(int);
STATIC
union node
*andor(void);
STATIC
union node
*pipeline(void);
STATIC
union node
*command(void);
STATIC
union node
*simplecmd(void);
STATIC
void parsefname(void);
STATIC
void parseheredoc(void);
STATIC
void checkkwd(void);
STATIC
int readtoken(void);
STATIC
int readtoken1(int, char const *, char *, int);
STATIC
void attyline(void);
STATIC
int noexpand(char *);
STATIC
void synexpect(int);
STATIC
void synerror(char *);
STATIC
union node
*list();
STATIC
union node
*andor();
STATIC
union node
*pipeline();
STATIC
union node
*command();
STATIC
union node
*simplecmd();
STATIC
void parsefname();
STATIC
void parseheredoc();
STATIC
void putprompt(char *);
#define putprompt(s) out2str(s)
* Read and parse a command. Returns NEOF on end of file. (NULL is a
* valid parse tree indicating a blank line.)
if ((t
= readtoken()) == TEOF
)
union node
*n1
, *n2
, *n3
;
if (nlflag
== 0 && tokendlist
[lasttoken
])
if (n1
->type
== NCMD
|| n1
->type
== NPIPE
) {
} else if (n1
->type
== NREDIR
) {
n3
= (union node
*)stalloc(sizeof (struct nredir
));
n3
->nredir
.redirect
= NULL
;
if (readtoken() == TNL
) {
if (tokendlist
[lasttoken
])
n3
= (union node
*)stalloc(sizeof (struct nbinary
));
pungetc(); /* push back EOF on input */
union node
*n1
, *n2
, *n3
;
if ((t
= readtoken()) == TAND
) {
n3
= (union node
*)stalloc(sizeof (struct nbinary
));
union node
*n1
, *pipenode
;
struct nodelist
*lp
, *prev
;
if (readtoken() == TPIPE
) {
pipenode
= (union node
*)stalloc(sizeof (struct npipe
));
pipenode
->npipe
.backgnd
= 0;
lp
= (struct nodelist
*)stalloc(sizeof (struct nodelist
));
pipenode
->npipe
.cmdlist
= lp
;
lp
= (struct nodelist
*)stalloc(sizeof (struct nodelist
));
} while (readtoken() == TPIPE
);
union node
*redir
, **rpp
;
n1
= (union node
*)stalloc(sizeof (struct nif
));
if (readtoken() != TTHEN
)
n1
->nif
.ifpart
= list(0);
while (readtoken() == TELIF
) {
n2
->nif
.elsepart
= (union node
*)stalloc(sizeof (struct nif
));
if (readtoken() != TTHEN
)
n2
->nif
.ifpart
= list(0);
n2
->nif
.elsepart
= list(0);
n1
= (union node
*)stalloc(sizeof (struct nbinary
));
n1
->type
= (lasttoken
== TWHILE
)? NWHILE
: NUNTIL
;
n1
->nbinary
.ch1
= list(0);
n1
->nbinary
.ch2
= list(0);
if (readtoken() != TDONE
)
if (readtoken() != TWORD
|| quoteflag
|| ! goodname(wordtext
))
synerror("Bad for loop variable");
n1
= (union node
*)stalloc(sizeof (struct nfor
));
if (readtoken() == TWORD
&& ! quoteflag
&& equal(wordtext
, "in")) {
while (readtoken() == TWORD
) {
n2
= (union node
*)stalloc(sizeof (struct narg
));
n2
->narg
.text
= wordtext
;
n2
->narg
.backquote
= backquotelist
;
static const char argvars
[5] = {CTLVAR
, VSNORMAL
|VSQUOTE
,
n2
= (union node
*)stalloc(sizeof (struct narg
));
n2
->narg
.text
= (char *)argvars
;
n2
->narg
.backquote
= NULL
;
if (lasttoken
!= TNL
&& lasttoken
!= TSEMI
)
if ((t
= readtoken()) == TDO
)
n1
= (union node
*)stalloc(sizeof (struct ncase
));
if (readtoken() != TWORD
)
n1
->ncase
.expr
= n2
= (union node
*)stalloc(sizeof (struct narg
));
n2
->narg
.text
= wordtext
;
n2
->narg
.backquote
= backquotelist
;
while (readtoken() == TNL
);
if (lasttoken
!= TWORD
|| ! equal(wordtext
, "in"))
synerror("expecting \"in\"");
while (checkkwd(), readtoken() == TWORD
) {
*cpp
= cp
= (union node
*)stalloc(sizeof (struct nclist
));
app
= &cp
->nclist
.pattern
;
*app
= ap
= (union node
*)stalloc(sizeof (struct narg
));
ap
->narg
.text
= wordtext
;
ap
->narg
.backquote
= backquotelist
;
if (readtoken() != TPIPE
)
if (readtoken() != TWORD
)
cp
->nclist
.body
= list(0);
if ((t
= readtoken()) == TESAC
)
n1
= (union node
*)stalloc(sizeof (struct nredir
));
n1
->nredir
.redirect
= NULL
;
/* Now check for redirection which may follow command */
while (readtoken() == TREDIR
) {
if (n1
->type
!= NSUBSHELL
) {
n2
= (union node
*)stalloc(sizeof (struct nredir
));
n1
->nredir
.redirect
= redir
;
union node
*redir
, **rpp
;
if (readtoken() == TWORD
) {
n
= (union node
*)stalloc(sizeof (struct narg
));
n
->narg
.backquote
= backquotelist
;
} else if (lasttoken
== TREDIR
) {
parsefname(); /* read name of redirection file */
} else if (lasttoken
== TLP
&& app
== &args
->narg
.next
if (! goodname(n
->narg
.text
))
synerror("Bad function name");
n
->narg
.next
= command();
n
= (union node
*)stalloc(sizeof (struct ncmd
));
n
->ncmd
.redirect
= redir
;
union node
*n
= redirnode
;
if (readtoken() != TWORD
)
struct heredoc
*here
= heredoc
;
TRACE(("Here document %d\n", n
->type
));
while (*wordtext
== '\t')
if (! noexpand(wordtext
) || (i
= strlen(wordtext
)) == 0 || i
> EOFMARKLEN
)
synerror("Illegal eof marker for << redirection");
here
->eofmark
= wordtext
;
for (p
= heredoclist
; p
->next
; p
= p
->next
);
} else if (n
->type
== NTOFD
|| n
->type
== NFROMFD
) {
if (is_digit(wordtext
[0]))
n
->ndup
.dupfd
= digit_val(wordtext
[0]);
else if (wordtext
[0] == '-')
if (wordtext
[1] != '\0') {
synerror("Bad fd number");
n
->nfile
.fname
= (union node
*)stalloc(sizeof (struct narg
));
n
->narg
.backquote
= backquotelist
;
* Input any here documents.
heredoclist
= here
->next
;
readtoken1(pgetc(), here
->here
->type
== NHERE
? SQSYNTAX
: DQSYNTAX
,
here
->eofmark
, here
->striptabs
);
n
= (union node
*)stalloc(sizeof (struct narg
));
n
->narg
.backquote
= backquotelist
;
here
->here
->nhere
.doc
= n
;
* This routine is called to tell readtoken that we are at the beginning
* of a command, so newlines should be ignored and keywords should be
* checked for. We munge things here rather than setting a flag for
register char *const *pp
;
while ((t
= readtoken()) == TNL
)
if (t
== TWORD
&& quoteflag
== 0) {
for (pp
= parsekwd
; *pp
; pp
++) {
if (**pp
== *wordtext
&& equal(*pp
, wordtext
)) {
lasttoken
= pp
- parsekwd
+ KWDOFFSET
;
STATIC
int xxreadtoken();
TRACE(("token %s %s\n", tokname
[t
], t
== TWORD
? wordtext
: ""));
* Read the next input token.
* If the token is a word, we set backquotelist to the list of cmds in
* backquotes. We set quoteflag to true if any part of the word was
* If the token is TREDIR, then we set redirnode to a structure containing
* In all cases, the variable startlinno is set to the number of the line
* on which the token starts.
* [Change comment: here documents and internal procedures]
* [Readtoken shouldn't have any arguments. Perhaps we should make the
* word parsing code into a separate routine. In this case, readtoken
* doesn't need to have any internal procedures, but parseword does.
* We could also make parseoperator in essence the main routine, and
* have parseword (readtoken1?) handle both words and redirection.]
#define RETURN(token) return lasttoken = token
for (;;) { /* until token or start of word found */
if (c
== ' ' || c
== '\t')
continue; /* quick check for white space first */
while ((c
= pgetc()) != '\n' && c
!= PEOF
);
return readtoken1(c
, BASESYNTAX
, (char *)NULL
, 0);
* If eofmark is NULL, read a word or a redirection symbol. If eofmark
* is not NULL, read a here document. In the latter case, eofmark is the
* word which marks the end of the document and striptabs is true if
* leading tabs should be stripped from the document. The argument firstc
* is the first character of the input token or document.
* Because C does not have internal subroutines, I have simulated them
* using goto's to implement the subroutine linkage. The following macros
* will run code that appears at the end of readtoken1.
#define CHECKEND() {goto checkend; checkend_return:;}
#define PARSEREDIR() {goto parseredir; parseredir_return:;}
#define PARSESUB() {goto parsesub; parsesub_return:;}
#define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
#define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
readtoken1(firstc
, syntax
, eofmark
, striptabs
)
char line
[EOFMARKLEN
+ 1];
loop
: { /* for each line, until end of word */
if (c
== '\034' && doprompt
&& attyset() && ! equal(termval(), "emacs")) {
if (syntax
== BASESYNTAX
)
CHECKEND(); /* set c to PEOF if at end of here document */
for (;;) { /* until end of line or end of word */
CHECKSTRSPACE(3, out
); /* permit 3 calls to USTPUTC */
if (syntax
== BASESYNTAX
)
goto endword
; /* exit outer loop */
goto loop
; /* continue outer loop */
if (eofmark
== NULL
|| dblquote
)
case CBACK
: /* backslash */
if (dblquote
&& c
!= '\\' && c
!= '`' && c
!= '$'
&& (c
!= '"' || eofmark
!= NULL
))
PARSESUB(); /* parse substitution */
if (parsebackquote
&& syntax
== BASESYNTAX
) {
return lasttoken
= TENDBQUOTE
;
goto endword
; /* exit outer loop */
goto endword
; /* exit outer loop */
goto endword
; /* exit outer loop */
if (syntax
!= BASESYNTAX
&& eofmark
== NULL
)
synerror("Unterminated quoted string");
len
= out
- stackblock();
if ((c
== '>' || c
== '<')
&& (*out
== '\0' || is_digit(*out
))) {
return lasttoken
= TREDIR
;
return lasttoken
= TWORD
;
/* end of readtoken routine */
* Check to see whether we are at the end of the here document. When this
* is called, c is set to the first character of the next input line. If
* we are at the end of the here document, this routine sets the c to PEOF.
if (pfgets(line
, sizeof line
) != NULL
) {
for (q
= eofmark
+ 1 ; *q
&& *p
== *q
; p
++, q
++);
if (*p
== '\n' && *q
== '\0') {
ppushback(line
, strlen(line
));
* Parse a redirection operator. The variable "out" points to a string
* specifying the fd to be redirected. The variable "c" contains the
* first character of the redirection operator.
np
= (union node
*)stalloc(sizeof (struct nfile
));
if (sizeof (struct nfile
) != sizeof (struct nhere
)) {
np
= (union node
*)stalloc(sizeof (struct nhere
));
heredoc
= (struct heredoc
*)stalloc(sizeof (struct heredoc
));
if ((c
= pgetc()) == '-') {
np
->nfile
.fd
= digit_val(fd
);
* Parse a substitution. At this point, we have read the dollar sign
static const char types
[] = "}-+?=";
if (c
!= '(' && c
!= '{' && !is_name(c
) && !is_special(c
)) {
} else if (c
== '(') { /* $(command) */
typeloc
= out
- stackblock();
badsub
: synerror("Bad substitution");
subtype
= p
- types
+ VSNORMAL
;
*(stackblock() + typeloc
) = subtype
| flags
;
* Called to parse command substitutions. Newstyle is set if the command
* is enclosed inside $(...); nlpp is a pointer to the head of the linked
* list of commands (passed by reference), and savelen is the number of
* characters on the top of the stack which must be preserved.
struct jmploc
*volatile savehandler
;
savepbq
= parsebackquote
;
if (setjmp(jmploc
.loc
)) {
savelen
= out
- stackblock();
bcopy(stackblock(), str
, savelen
);
*nlpp
= (struct nodelist
*)stalloc(sizeof (struct nodelist
));
parsebackquote
= oldstyle
;
t
= oldstyle
? TENDBQUOTE
: TRP
;
while (stackblocksize() <= savelen
)
bcopy(str
, out
, savelen
);
parsebackquote
= savepbq
;
USTPUTC(CTLBACKQ
+ dblquote
, out
);
goto parsebackq_oldreturn
;
goto parsebackq_newreturn
;
* Called to process a command generated by atty. We execute the line,
* and catch any errors that occur so they don't propagate outside of
struct jmploc
*volatile savehandler
;
if (pfgets(line
, sizeof line
) == NULL
)
return; /* "can't happen" */
if (setjmp(jmploc
.loc
)) {
if (exception
== EXERROR
)
* Output a prompt for atty. We output the prompt as part of the
* appropriate escape sequence.
if (attyset() && ! equal(termval(), "emacs")) {
if ((unsigned)(*p
- ' ') <= '~' - ' ')
* Returns true if the text contains nothing to expand (no dollar signs
while ((c
= *p
++) != '\0') {
else if (BASESYNTAX
[c
] == CCTL
)
* Return true if the argument is a legal variable name (a letter or
* underscore followed by zero or more letters, underscores, and digits).
* Called when an unexpected token is read during the parse. The argument
* is the token that is expected, or -1 if more than one type of token can
fmtstr(msg
, 64, "%s unexpected (expecting %s)",
tokname
[lasttoken
], tokname
[token
]);
fmtstr(msg
, 64, "%s unexpected", tokname
[lasttoken
]);
outfmt(&errout
, "%s: %d: ", commandname
, startlinno
);
outfmt(&errout
, "Syntax error: %s\n", msg
);