[unix-history] / usr / src / contrib / rc-1.4 / lex.c

/* lex.c: rc's lexical analyzer */

#include "rc.h"
#include "y.tab.h"

/*
	Special characters (i.e., "non-word") in rc:
		\t \n # ; & | ^ $ = ~ ` ' { } @ ! ( ) < > \

	The lexical analyzer is fairly straightforward. The only really
	unclean part concerns backslash continuation and "double
	backslashes". A backslash followed by a newline is treated as a
	space, otherwise backslash is not a special characeter (i.e.,
	it can be part of a word).  This introduces a host of unwanted
	special cases. In our case, \ cannot be a word character, since
	we wish to read in all word characters in a tight loop.

	Note: to save the trouble of declaring these arrays with TRUEs
	and FALSEs, I am assuming that FALSE = 0, TRUE = 1. (and so is
	it declared in rc.h)
*/

#define BUFSIZE ((size_t) 1000)	/*	malloc hates power of 2 buffers? */
#define BUFMAX (8 * BUFSIZE)	/* 	How big the buffer can get before we re-allocate the
					space at BUFSIZE again. Premature optimization? Maybe.
				*/

typedef enum wordstates {
	NW, RW, KW /* "nonword", "realword", "keyword" */
} wordstates;

static void getpair(int);

int lineno;

const char nw[] = {
	1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

const char dnw[] = {
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};

static size_t bufsize = BUFSIZE;
static char *realbuf = NULL;
static bool newline = FALSE;
static bool errset = FALSE;
static bool prerror = FALSE;
static wordstates w = NW;
static int fd_left, fd_right;

#define checkfreecaret {if (w != NW) { w = NW; ugchar(c); return '^'; }}

enum filedescriptors {
	UNSET = -9, CLOSED = -1
};

extern int yylex() {
	static bool dollar = FALSE;
	bool saw_meta = FALSE;
	int c;
	size_t i;			/* The purpose of all these local assignments is to	*/
	const char *meta;		/* allow optimizing compilers like gcc to load these	*/
	char *buf = realbuf;		/* values into registers. On a sparc this is a		*/
	YYSTYPE *y = &yylval;		/* win, in code size *and* execution time		*/
	if (errset) {
		errset = FALSE;
		return '\n';
	}
	/* rc variable-names may contain only alnum, '*' and '_', so use dnw if we are scanning one. */
	meta = (dollar ? dnw : nw);
	dollar = FALSE;
	if (newline) {
		--lineno; /* slight space optimization; print_prompt2() always increments lineno */
		print_prompt2();
		newline = FALSE;
	}
top:	while ((c = gchar()) == ' ' || c == '\t')
		w = NW;
	if (c == EOF)
		return END;
	if (!meta[(unsigned char) c]) {	/* it's a word or keyword. */
		checkfreecaret;
		w = RW;
		i = 0;
	read:	do {
			buf[i++] = c;
			if (c == '?' || c == '[' || c == '*')
				saw_meta = TRUE;
			if (i >= bufsize)
				buf = realbuf = erealloc(buf, bufsize *= 2);
		} while ((c = gchar()) != EOF && !meta[(unsigned char) c]);
		while (c == '\\') {
			if ((c = gchar()) == '\n') {
				print_prompt2();
				c = ' '; /* Pretend a space was read */
				break;
			} else {
	bs:			if (meta != dnw) { /* all words but varnames may have a bslash */
					buf[i++] = '\\';
					if (i >= bufsize)
						buf = realbuf = erealloc(buf, bufsize *= 2);
					if (!meta[(unsigned char) c])
						goto read;
				} else {
					ugchar(c);
					c = '\\';
					break;
				}
			}
		}
		ugchar(c);
		buf[i] = '\0';
		w = KW;
		if (i == 2) {
			if (*buf == 'i' && buf[1] == 'f') return IF;
			if (*buf == 'f' && buf[1] == 'n') return FN;
			if (*buf == 'i' && buf[1] == 'n') return IN;
		}
		if (streq(buf, "for")) return FOR;
		if (streq(buf, "else")) return ELSE;
		if (streq(buf, "switch")) return SWITCH;
		if (streq(buf, "while")) return WHILE;
		if (streq(buf, "case")) return CASE;
		w = RW;
		y->word.w = ncpy(buf);
		if (saw_meta) {
			char *r, *s;

			y->word.m = nalloc(strlen(buf) + 1);
			for (r = buf, s = y->word.m; *r != '\0'; r++, s++)
				*s = (*r == '?' || *r == '[' || *r == '*');
		} else {
			y->word.m = NULL;
		}
		return WORD;
	}
	if (c == '`' || c == '!' || c == '@' || c == '~' || c == '$' || c == '\'') {
		checkfreecaret;
		if (c == '!' || c == '@' || c == '~')
			w = KW;
	}
	switch (c) {
	case '\0':
		pr_error("warning: null character ignored");
		goto top;
	case '!':
		return BANG;
	case '@':
		return SUBSHELL;
	case '~':
		return TWIDDLE;
	case '`':
		c = gchar();
		if (c == '`')
			return BACKBACK;
		ugchar(c);
		return '`';
	case '$':
		dollar = TRUE;
		c = gchar();
		if (c == '#')
			return COUNT;
		if (c == '^')
			return FLAT;
		ugchar(c);
		return '$';
	case '\'':
		w = RW;
		i = 0;
		do {
			buf[i++] = c;
			if (c == '\n')
				print_prompt2();
			if (c == EOF) {
				w = NW;
				scanerror("eof in quoted string");
				return HUH;
			}
			if (i >= bufsize)
				buf = realbuf = erealloc(buf, bufsize *= 2);
		} while ((c = gchar()) != '\'' || (c = gchar()) == '\''); /* quote "'" thus: 'how''s it going?' */
		ugchar(c);
		buf[i] = '\0';
		y->word.w = ncpy(buf);
		y->word.m = NULL;
		return WORD;
	case '\\':
		if ((c = gchar()) == '\n') {
			print_prompt2();
			goto top; /* Pretend it was just another space. */
		}
		ugchar(c);
		c = '\\';
		checkfreecaret;
		c = gchar();
		i = 0;
		goto bs;
	case '(':
		if (w == RW) /* SUB's happen only after real words, not keyowrds, so if () and while () work */
			c = SUB;
		w = NW;
		return c;
	case '#':
		while ((c = gchar()) != '\n') /* skip comment until newline */
			if (c == EOF)
				return END;
		/* FALLTHROUGH */
	case '\n':
		lineno++;
		newline = TRUE;
		/* FALLTHROUGH */
	case ';':
	case '^':
	case ')':
	case '=':
	case '{': case '}':
		w = NW;
		return c;
	case '&':
		w = NW;
		c = gchar();
		if (c == '&')
			return ANDAND;
		ugchar(c);
		return '&';
	case '|':
		w = NW;
		c = gchar();
		if (c == '|')
			return OROR;
		getpair(c);
		if (errset)
			return HUH;
		if ((y->pipe.left = fd_left) == UNSET)
			y->pipe.left = 1;				/* default to fd 1 */
		if ((y->pipe.right = fd_right) == UNSET)
			y->pipe.right = 0;				/* default to fd 0 */
		if (y->pipe.right == CLOSED) {
			scanerror("expected digit after '='");		/* can't close a pipe */
			return HUH;
		}
		return PIPE;
	case '>':
		c = gchar();
		if (c == '>') {
			c = gchar();
			y->redir.type = rAppend;
		} else
			y->redir.type = rCreate;
		y->redir.fd = 1;
		goto common;
	case '<':
		c = gchar();
		if (c == '<') {
			c = gchar();
			if (c == '<') {
				c = gchar();
				y->redir.type = rHerestring;
			} else {
				y->redir.type = rHeredoc;
			}
		} else
			y->redir.type = rFrom;
		y->redir.fd = 0;
	common:
		w = NW;
		getpair(c);
		if (errset)
			return HUH;
		if (fd_right == UNSET) { /* redirection, not dup */
			if (fd_left != UNSET) {
				y->redir.fd = fd_left;
				return SREDIR;
			}
			return (y->redir.type == rFrom || y->redir.type == rCreate) ? REDIR : SREDIR;
		} else { /* dup; recast yylval */
			y->dup.type = y->redir.type;
			y->dup.left = fd_left;
			y->dup.right = fd_right;
			return DUP;
		}
	default:
		w = NW;
		return c; /* don't know what it is, let yacc barf on it */
	}
}

extern void yyerror(const char *s) {
	char *tok;
	if (prerror) { /* don't print "syntax error" if there's a more informative scanerror */
		prerror = FALSE;
		return;
	}
	if (!interactive) {
		if (w != NW)
			tok = realbuf;
		else if (last == EOF)
			tok = "eof";
		else if (last == '\n')
			tok = "end of line";
		else
			tok = nprint((last < 32 || last > 126) ? "(decimal %d)" : "'%c'", last);
		fprint(2, "line %d: %s near %s\n", lineno - (last == '\n'), s, tok);
	} else
		fprint(2, "%s\n", s);
}

extern void scanerror(char *s) {
	flushu(); /* flush upto newline */
	yyerror(s);
	errset = prerror = TRUE;
}

extern void inityy() {
	newline = FALSE;
	w = NW;
	hq = NULL;
	/* return memory to the system if the buffer got too large */
	if (bufsize > BUFMAX && realbuf != NULL) {
		efree(realbuf);
		bufsize = BUFSIZE;
		realbuf = ealloc(bufsize);
	} else if (realbuf == NULL)
		realbuf = ealloc(bufsize);
}

extern void print_prompt2() {
	lineno++;
	if (interactive)
		fprint(2, "%s", prompt2);
}

/*
   Scan in a pair of integers for redirections like >[2=1]. CLOSED represents a closed file
   descriptor (i.e., >[2=]) and UNSET represents an undesignated file descriptor (e.g.,
   >[2] is represented as (2,UNSET).

   This function makes use of unsigned compares to make range tests in one compare operation.
*/

static void getpair(int c) {
	int n;
	fd_left = fd_right = UNSET;
	if (c != '[') {
		ugchar(c);
		return;
	}
	if ((unsigned int) (n = gchar() - '0') > 9) {
		scanerror("expected digit after '['");
		return;
	}
	while ((unsigned int) (c = gchar() - '0') <= 9)
		n = n * 10 + c;
	fd_left = n;
	c += '0';
	switch (c) {
	default:
		scanerror("expected '=' or ']' after digit");
		return;
	case ']':
		return;
	case '=':
		if ((unsigned int) (n = gchar() - '0') > 9) {
			if (n != ']' - '0') {
				scanerror("expected digit or ']' after '='");
				return;
			}
			fd_right = CLOSED;
		} else {
			while ((unsigned int) (c = gchar() - '0') <= 9)
				n = n * 10 + c;
			if (c != ']' - '0') {
				scanerror("expected ']' after digit");
				return;
			}
			fd_right = n;
		}
	}
}
Commit	Line	Data
95fa4dd9 C	1	/* lex.c: rc's lexical analyzer */
	2
	3	#include "rc.h"
	4	#include "y.tab.h"
	5
	6	/*
	7	Special characters (i.e., "non-word") in rc:
	8	\t \n # ; & \| ^ $ = ~ ` ' { } @ ! ( ) < > \
	9
	10	The lexical analyzer is fairly straightforward. The only really
	11	unclean part concerns backslash continuation and "double
	12	backslashes". A backslash followed by a newline is treated as a
	13	space, otherwise backslash is not a special characeter (i.e.,
	14	it can be part of a word). This introduces a host of unwanted
	15	special cases. In our case, \ cannot be a word character, since
	16	we wish to read in all word characters in a tight loop.
	17
	18	Note: to save the trouble of declaring these arrays with TRUEs
	19	and FALSEs, I am assuming that FALSE = 0, TRUE = 1. (and so is
	20	it declared in rc.h)
	21	*/
	22
	23	#define BUFSIZE ((size_t) 1000) /* malloc hates power of 2 buffers? */
	24	#define BUFMAX (8 * BUFSIZE) /* How big the buffer can get before we re-allocate the
	25	space at BUFSIZE again. Premature optimization? Maybe.
	26	*/
	27
	28	typedef enum wordstates {
	29	NW, RW, KW /* "nonword", "realword", "keyword" */
	30	} wordstates;
	31
	32	static void getpair(int);
	33
	34	int lineno;
	35
	36	const char nw[] = {
	37	1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	38	1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	39	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
	40	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	41	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	42	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	43	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	44	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
	45	};
	46
	47	const char dnw[] = {
	48	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	49	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
	50	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	51	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
	52	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	53	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	54	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	55	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
	56	};
	57
	58	static size_t bufsize = BUFSIZE;
	59	static char *realbuf = NULL;
	60	static bool newline = FALSE;
	61	static bool errset = FALSE;
	62	static bool prerror = FALSE;
	63	static wordstates w = NW;
	64	static int fd_left, fd_right;
65
66	#define checkfreecaret {if (w != NW) { w = NW; ugchar(c); return '^'; }}
67
68	enum filedescriptors {
69	UNSET = -9, CLOSED = -1
70	};
71
72	extern int yylex() {
73	static bool dollar = FALSE;
74	bool saw_meta = FALSE;
75	int c;
76	size_t i; /* The purpose of all these local assignments is to */
77	const char meta; / allow optimizing compilers like gcc to load these */
78	char buf = realbuf; / values into registers. On a sparc this is a */
79	YYSTYPE y = &yylval; / win, in code size and execution time */
80	if (errset) {
81	errset = FALSE;
82	return '\n';
83	}
84	/* rc variable-names may contain only alnum, '' and '_', so use dnw if we are scanning one. /
85	meta = (dollar ? dnw : nw);
86	dollar = FALSE;
87	if (newline) {
88	--lineno; /* slight space optimization; print_prompt2() always increments lineno */
89	print_prompt2();
90	newline = FALSE;
91	}
92	top: while ((c = gchar()) == ' ' \|\| c == '\t')
93	w = NW;
94	if (c == EOF)
95	return END;
96	if (!meta[(unsigned char) c]) { /* it's a word or keyword. */
97	checkfreecaret;
98	w = RW;
99	i = 0;
100	read: do {
101	buf[i++] = c;
102	if (c == '?' \|\| c == '[' \|\| c == '*')
103	saw_meta = TRUE;
104	if (i >= bufsize)
105	buf = realbuf = erealloc(buf, bufsize *= 2);
106	} while ((c = gchar()) != EOF && !meta[(unsigned char) c]);
107	while (c == '\\') {
108	if ((c = gchar()) == '\n') {
109	print_prompt2();
110	c = ' '; /* Pretend a space was read */
111	break;
112	} else {
113	bs: if (meta != dnw) { /* all words but varnames may have a bslash */
114	buf[i++] = '\\';
115	if (i >= bufsize)
116	buf = realbuf = erealloc(buf, bufsize *= 2);
117	if (!meta[(unsigned char) c])
118	goto read;
119	} else {
120	ugchar(c);
121	c = '\\';
122	break;
123	}
124	}
125	}
126	ugchar(c);
127	buf[i] = '\0';
128	w = KW;
129	if (i == 2) {
130	if (*buf == 'i' && buf[1] == 'f') return IF;
131	if (*buf == 'f' && buf[1] == 'n') return FN;
132	if (*buf == 'i' && buf[1] == 'n') return IN;
133	}
134	if (streq(buf, "for")) return FOR;
135	if (streq(buf, "else")) return ELSE;
136	if (streq(buf, "switch")) return SWITCH;
137	if (streq(buf, "while")) return WHILE;
138	if (streq(buf, "case")) return CASE;
139	w = RW;
140	y->word.w = ncpy(buf);
141	if (saw_meta) {
142	char r, s;
143
144	y->word.m = nalloc(strlen(buf) + 1);
145	for (r = buf, s = y->word.m; *r != '\0'; r++, s++)
146	s = (r == '?' \|\| r == '[' \|\| r == '*');
147	} else {
148	y->word.m = NULL;
149	}
150	return WORD;
151	}
152	if (c == '`' \|\| c == '!' \|\| c == '@' \|\| c == '~' \|\| c == '$' \|\| c == '\'') {
153	checkfreecaret;
154	if (c == '!' \|\| c == '@' \|\| c == '~')
155	w = KW;
156	}
157	switch (c) {
158	case '\0':
159	pr_error("warning: null character ignored");
160	goto top;
161	case '!':
162	return BANG;
163	case '@':
164	return SUBSHELL;
165	case '~':
166	return TWIDDLE;
167	case '`':
168	c = gchar();
169	if (c == '`')
170	return BACKBACK;
171	ugchar(c);
172	return '`';
173	case '$':
174	dollar = TRUE;
175	c = gchar();
176	if (c == '#')
177	return COUNT;
178	if (c == '^')
179	return FLAT;
180	ugchar(c);
181	return '$';
182	case '\'':
183	w = RW;
184	i = 0;
185	do {
186	buf[i++] = c;
187	if (c == '\n')
188	print_prompt2();
189	if (c == EOF) {
190	w = NW;
191	scanerror("eof in quoted string");
192	return HUH;
193	}
194	if (i >= bufsize)
195	buf = realbuf = erealloc(buf, bufsize *= 2);
196	} while ((c = gchar()) != '\'' \|\| (c = gchar()) == '\''); /* quote "'" thus: 'how''s it going?' */
197	ugchar(c);
198	buf[i] = '\0';
199	y->word.w = ncpy(buf);
200	y->word.m = NULL;
201	return WORD;
202	case '\\':
203	if ((c = gchar()) == '\n') {
204	print_prompt2();
205	goto top; /* Pretend it was just another space. */
206	}
207	ugchar(c);
208	c = '\\';
209	checkfreecaret;
210	c = gchar();
211	i = 0;
212	goto bs;
213	case '(':
214	if (w == RW) /* SUB's happen only after real words, not keyowrds, so if () and while () work */
215	c = SUB;
216	w = NW;
217	return c;
218	case '#':
219	while ((c = gchar()) != '\n') /* skip comment until newline */
220	if (c == EOF)
221	return END;
222	/* FALLTHROUGH */
223	case '\n':
224	lineno++;
225	newline = TRUE;
226	/* FALLTHROUGH */
227	case ';':
228	case '^':
229	case ')':
230	case '=':
231	case '{': case '}':
232	w = NW;
233	return c;
234	case '&':
235	w = NW;
236	c = gchar();
237	if (c == '&')
238	return ANDAND;
239	ugchar(c);
240	return '&';
241	case '\|':
242	w = NW;
243	c = gchar();
244	if (c == '\|')
245	return OROR;
246	getpair(c);
247	if (errset)
248	return HUH;
249	if ((y->pipe.left = fd_left) == UNSET)
250	y->pipe.left = 1; /* default to fd 1 */
251	if ((y->pipe.right = fd_right) == UNSET)
252	y->pipe.right = 0; /* default to fd 0 */
253	if (y->pipe.right == CLOSED) {
254	scanerror("expected digit after '='"); /* can't close a pipe */
255	return HUH;
256	}
257	return PIPE;
258	case '>':
259	c = gchar();
260	if (c == '>') {
261	c = gchar();
262	y->redir.type = rAppend;
263	} else
264	y->redir.type = rCreate;
265	y->redir.fd = 1;
266	goto common;
267	case '<':
268	c = gchar();
269	if (c == '<') {
270	c = gchar();
271	if (c == '<') {
272	c = gchar();
273	y->redir.type = rHerestring;
274	} else {
275	y->redir.type = rHeredoc;
276	}
277	} else
278	y->redir.type = rFrom;
279	y->redir.fd = 0;
280	common:
281	w = NW;
282	getpair(c);
283	if (errset)
284	return HUH;
285	if (fd_right == UNSET) { /* redirection, not dup */
286	if (fd_left != UNSET) {
287	y->redir.fd = fd_left;
288	return SREDIR;
289	}
290	return (y->redir.type == rFrom \|\| y->redir.type == rCreate) ? REDIR : SREDIR;
291	} else { /* dup; recast yylval */
292	y->dup.type = y->redir.type;
293	y->dup.left = fd_left;
294	y->dup.right = fd_right;
295	return DUP;
296	}
297	default:
298	w = NW;
299	return c; /* don't know what it is, let yacc barf on it */
300	}
301	}
302
303	extern void yyerror(const char *s) {
304	char *tok;
305	if (prerror) { /* don't print "syntax error" if there's a more informative scanerror */
306	prerror = FALSE;
307	return;
308	}
309	if (!interactive) {
310	if (w != NW)
311	tok = realbuf;
312	else if (last == EOF)
313	tok = "eof";
314	else if (last == '\n')
315	tok = "end of line";
316	else
317	tok = nprint((last < 32 \|\| last > 126) ? "(decimal %d)" : "'%c'", last);
318	fprint(2, "line %d: %s near %s\n", lineno - (last == '\n'), s, tok);
319	} else
320	fprint(2, "%s\n", s);
321	}
322
323	extern void scanerror(char *s) {
324	flushu(); /* flush upto newline */
325	yyerror(s);
326	errset = prerror = TRUE;
327	}
328
329	extern void inityy() {
330	newline = FALSE;
331	w = NW;
332	hq = NULL;
333	/* return memory to the system if the buffer got too large */
334	if (bufsize > BUFMAX && realbuf != NULL) {
335	efree(realbuf);
336	bufsize = BUFSIZE;
337	realbuf = ealloc(bufsize);
338	} else if (realbuf == NULL)
339	realbuf = ealloc(bufsize);
340	}
341
342	extern void print_prompt2() {
343	lineno++;
344	if (interactive)
345	fprint(2, "%s", prompt2);
346	}
347
348	/*
349	Scan in a pair of integers for redirections like >[2=1]. CLOSED represents a closed file
350	descriptor (i.e., >[2=]) and UNSET represents an undesignated file descriptor (e.g.,
351	>[2] is represented as (2,UNSET).
352
353	This function makes use of unsigned compares to make range tests in one compare operation.
354	*/
355
356	static void getpair(int c) {
357	int n;
358	fd_left = fd_right = UNSET;
359	if (c != '[') {
360	ugchar(c);
361	return;
362	}
363	if ((unsigned int) (n = gchar() - '0') > 9) {
364	scanerror("expected digit after '['");
365	return;
366	}
367	while ((unsigned int) (c = gchar() - '0') <= 9)
368	n = n * 10 + c;
369	fd_left = n;
370	c += '0';
371	switch (c) {
372	default:
373	scanerror("expected '=' or ']' after digit");
374	return;
375	case ']':
376	return;
377	case '=':
378	if ((unsigned int) (n = gchar() - '0') > 9) {
379	if (n != ']' - '0') {
380	scanerror("expected digit or ']' after '='");
381	return;
382	}
383	fd_right = CLOSED;
384	} else {
385	while ((unsigned int) (c = gchar() - '0') <= 9)
386	n = n * 10 + c;
387	if (c != ']' - '0') {
388	scanerror("expected ']' after digit");
389	return;
390	}
391	fd_right = n;
392	}
393	}
394	}