From 83bed650a0e475dc7861c00a019b312852cc50b7 Mon Sep 17 00:00:00 2001
From: CSRG <csrg@ucbvax.Berkeley.EDU>
Date: Thu, 29 Apr 1993 21:53:14 -0800
Subject: [PATCH] BSD 4_4_Lite2 development Work on file
 usr/src/contrib/rc-1.4/lex.c

Synthesized-from: CSRG/cd3/4.4BSD-Lite2
---
 usr/src/contrib/rc-1.4/lex.c | 394 +++++++++++++++++++++++++++++++++++
 1 file changed, 394 insertions(+)
 create mode 100644 usr/src/contrib/rc-1.4/lex.c

diff --git a/usr/src/contrib/rc-1.4/lex.c b/usr/src/contrib/rc-1.4/lex.c
new file mode 100644
index 0000000000..c283f67f99
--- /dev/null
+++ b/usr/src/contrib/rc-1.4/lex.c
@@ -0,0 +1,394 @@
+/* lex.c: rc's lexical analyzer */
+
+#include "rc.h"
+#include "y.tab.h"
+
+/*
+	Special characters (i.e., "non-word") in rc:
+		\t \n # ; & | ^ $ = ~ ` ' { } @ ! ( ) < > \
+
+	The lexical analyzer is fairly straightforward. The only really
+	unclean part concerns backslash continuation and "double
+	backslashes". A backslash followed by a newline is treated as a
+	space, otherwise backslash is not a special characeter (i.e.,
+	it can be part of a word).  This introduces a host of unwanted
+	special cases. In our case, \ cannot be a word character, since
+	we wish to read in all word characters in a tight loop.
+
+	Note: to save the trouble of declaring these arrays with TRUEs
+	and FALSEs, I am assuming that FALSE = 0, TRUE = 1. (and so is
+	it declared in rc.h)
+*/
+
+#define BUFSIZE ((size_t) 1000)	/*	malloc hates power of 2 buffers? */
+#define BUFMAX (8 * BUFSIZE)	/* 	How big the buffer can get before we re-allocate the
+					space at BUFSIZE again. Premature optimization? Maybe.
+				*/
+
+typedef enum wordstates {
+	NW, RW, KW /* "nonword", "realword", "keyword" */
+} wordstates;
+
+static void getpair(int);
+
+int lineno;
+
+const char nw[] = {
+	1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
+	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const char dnw[] = {
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+static size_t bufsize = BUFSIZE;
+static char *realbuf = NULL;
+static bool newline = FALSE;
+static bool errset = FALSE;
+static bool prerror = FALSE;
+static wordstates w = NW;
+static int fd_left, fd_right;
+
+#define checkfreecaret {if (w != NW) { w = NW; ugchar(c); return '^'; }}
+
+enum filedescriptors {
+	UNSET = -9, CLOSED = -1
+};
+
+extern int yylex() {
+	static bool dollar = FALSE;
+	bool saw_meta = FALSE;
+	int c;
+	size_t i;			/* The purpose of all these local assignments is to	*/
+	const char *meta;		/* allow optimizing compilers like gcc to load these	*/
+	char *buf = realbuf;		/* values into registers. On a sparc this is a		*/
+	YYSTYPE *y = &yylval;		/* win, in code size *and* execution time		*/
+	if (errset) {
+		errset = FALSE;
+		return '\n';
+	}
+	/* rc variable-names may contain only alnum, '*' and '_', so use dnw if we are scanning one. */
+	meta = (dollar ? dnw : nw);
+	dollar = FALSE;
+	if (newline) {
+		--lineno; /* slight space optimization; print_prompt2() always increments lineno */
+		print_prompt2();
+		newline = FALSE;
+	}
+top:	while ((c = gchar()) == ' ' || c == '\t')
+		w = NW;
+	if (c == EOF)
+		return END;
+	if (!meta[(unsigned char) c]) {	/* it's a word or keyword. */
+		checkfreecaret;
+		w = RW;
+		i = 0;
+	read:	do {
+			buf[i++] = c;
+			if (c == '?' || c == '[' || c == '*')
+				saw_meta = TRUE;
+			if (i >= bufsize)
+				buf = realbuf = erealloc(buf, bufsize *= 2);
+		} while ((c = gchar()) != EOF && !meta[(unsigned char) c]);
+		while (c == '\\') {
+			if ((c = gchar()) == '\n') {
+				print_prompt2();
+				c = ' '; /* Pretend a space was read */
+				break;
+			} else {
+	bs:			if (meta != dnw) { /* all words but varnames may have a bslash */
+					buf[i++] = '\\';
+					if (i >= bufsize)
+						buf = realbuf = erealloc(buf, bufsize *= 2);
+					if (!meta[(unsigned char) c])
+						goto read;
+				} else {
+					ugchar(c);
+					c = '\\';
+					break;
+				}
+			}
+		}
+		ugchar(c);
+		buf[i] = '\0';
+		w = KW;
+		if (i == 2) {
+			if (*buf == 'i' && buf[1] == 'f') return IF;
+			if (*buf == 'f' && buf[1] == 'n') return FN;
+			if (*buf == 'i' && buf[1] == 'n') return IN;
+		}
+		if (streq(buf, "for")) return FOR;
+		if (streq(buf, "else")) return ELSE;
+		if (streq(buf, "switch")) return SWITCH;
+		if (streq(buf, "while")) return WHILE;
+		if (streq(buf, "case")) return CASE;
+		w = RW;
+		y->word.w = ncpy(buf);
+		if (saw_meta) {
+			char *r, *s;
+
+			y->word.m = nalloc(strlen(buf) + 1);
+			for (r = buf, s = y->word.m; *r != '\0'; r++, s++)
+				*s = (*r == '?' || *r == '[' || *r == '*');
+		} else {
+			y->word.m = NULL;
+		}
+		return WORD;
+	}
+	if (c == '`' || c == '!' || c == '@' || c == '~' || c == '$' || c == '\'') {
+		checkfreecaret;
+		if (c == '!' || c == '@' || c == '~')
+			w = KW;
+	}
+	switch (c) {
+	case '\0':
+		pr_error("warning: null character ignored");
+		goto top;
+	case '!':
+		return BANG;
+	case '@':
+		return SUBSHELL;
+	case '~':
+		return TWIDDLE;
+	case '`':
+		c = gchar();
+		if (c == '`')
+			return BACKBACK;
+		ugchar(c);
+		return '`';
+	case '$':
+		dollar = TRUE;
+		c = gchar();
+		if (c == '#')
+			return COUNT;
+		if (c == '^')
+			return FLAT;
+		ugchar(c);
+		return '$';
+	case '\'':
+		w = RW;
+		i = 0;
+		do {
+			buf[i++] = c;
+			if (c == '\n')
+				print_prompt2();
+			if (c == EOF) {
+				w = NW;
+				scanerror("eof in quoted string");
+				return HUH;
+			}
+			if (i >= bufsize)
+				buf = realbuf = erealloc(buf, bufsize *= 2);
+		} while ((c = gchar()) != '\'' || (c = gchar()) == '\''); /* quote "'" thus: 'how''s it going?' */
+		ugchar(c);
+		buf[i] = '\0';
+		y->word.w = ncpy(buf);
+		y->word.m = NULL;
+		return WORD;
+	case '\\':
+		if ((c = gchar()) == '\n') {
+			print_prompt2();
+			goto top; /* Pretend it was just another space. */
+		}
+		ugchar(c);
+		c = '\\';
+		checkfreecaret;
+		c = gchar();
+		i = 0;
+		goto bs;
+	case '(':
+		if (w == RW) /* SUB's happen only after real words, not keyowrds, so if () and while () work */
+			c = SUB;
+		w = NW;
+		return c;
+	case '#':
+		while ((c = gchar()) != '\n') /* skip comment until newline */
+			if (c == EOF)
+				return END;
+		/* FALLTHROUGH */
+	case '\n':
+		lineno++;
+		newline = TRUE;
+		/* FALLTHROUGH */
+	case ';':
+	case '^':
+	case ')':
+	case '=':
+	case '{': case '}':
+		w = NW;
+		return c;
+	case '&':
+		w = NW;
+		c = gchar();
+		if (c == '&')
+			return ANDAND;
+		ugchar(c);
+		return '&';
+	case '|':
+		w = NW;
+		c = gchar();
+		if (c == '|')
+			return OROR;
+		getpair(c);
+		if (errset)
+			return HUH;
+		if ((y->pipe.left = fd_left) == UNSET)
+			y->pipe.left = 1;				/* default to fd 1 */
+		if ((y->pipe.right = fd_right) == UNSET)
+			y->pipe.right = 0;				/* default to fd 0 */
+		if (y->pipe.right == CLOSED) {
+			scanerror("expected digit after '='");		/* can't close a pipe */
+			return HUH;
+		}
+		return PIPE;
+	case '>':
+		c = gchar();
+		if (c == '>') {
+			c = gchar();
+			y->redir.type = rAppend;
+		} else
+			y->redir.type = rCreate;
+		y->redir.fd = 1;
+		goto common;
+	case '<':
+		c = gchar();
+		if (c == '<') {
+			c = gchar();
+			if (c == '<') {
+				c = gchar();
+				y->redir.type = rHerestring;
+			} else {
+				y->redir.type = rHeredoc;
+			}
+		} else
+			y->redir.type = rFrom;
+		y->redir.fd = 0;
+	common:
+		w = NW;
+		getpair(c);
+		if (errset)
+			return HUH;
+		if (fd_right == UNSET) { /* redirection, not dup */
+			if (fd_left != UNSET) {
+				y->redir.fd = fd_left;
+				return SREDIR;
+			}
+			return (y->redir.type == rFrom || y->redir.type == rCreate) ? REDIR : SREDIR;
+		} else { /* dup; recast yylval */
+			y->dup.type = y->redir.type;
+			y->dup.left = fd_left;
+			y->dup.right = fd_right;
+			return DUP;
+		}
+	default:
+		w = NW;
+		return c; /* don't know what it is, let yacc barf on it */
+	}
+}
+
+extern void yyerror(const char *s) {
+	char *tok;
+	if (prerror) { /* don't print "syntax error" if there's a more informative scanerror */
+		prerror = FALSE;
+		return;
+	}
+	if (!interactive) {
+		if (w != NW)
+			tok = realbuf;
+		else if (last == EOF)
+			tok = "eof";
+		else if (last == '\n')
+			tok = "end of line";
+		else
+			tok = nprint((last < 32 || last > 126) ? "(decimal %d)" : "'%c'", last);
+		fprint(2, "line %d: %s near %s\n", lineno - (last == '\n'), s, tok);
+	} else
+		fprint(2, "%s\n", s);
+}
+
+extern void scanerror(char *s) {
+	flushu(); /* flush upto newline */
+	yyerror(s);
+	errset = prerror = TRUE;
+}
+
+extern void inityy() {
+	newline = FALSE;
+	w = NW;
+	hq = NULL;
+	/* return memory to the system if the buffer got too large */
+	if (bufsize > BUFMAX && realbuf != NULL) {
+		efree(realbuf);
+		bufsize = BUFSIZE;
+		realbuf = ealloc(bufsize);
+	} else if (realbuf == NULL)
+		realbuf = ealloc(bufsize);
+}
+
+extern void print_prompt2() {
+	lineno++;
+	if (interactive)
+		fprint(2, "%s", prompt2);
+}
+
+/*
+   Scan in a pair of integers for redirections like >[2=1]. CLOSED represents a closed file
+   descriptor (i.e., >[2=]) and UNSET represents an undesignated file descriptor (e.g.,
+   >[2] is represented as (2,UNSET).
+
+   This function makes use of unsigned compares to make range tests in one compare operation.
+*/
+
+static void getpair(int c) {
+	int n;
+	fd_left = fd_right = UNSET;
+	if (c != '[') {
+		ugchar(c);
+		return;
+	}
+	if ((unsigned int) (n = gchar() - '0') > 9) {
+		scanerror("expected digit after '['");
+		return;
+	}
+	while ((unsigned int) (c = gchar() - '0') <= 9)
+		n = n * 10 + c;
+	fd_left = n;
+	c += '0';
+	switch (c) {
+	default:
+		scanerror("expected '=' or ']' after digit");
+		return;
+	case ']':
+		return;
+	case '=':
+		if ((unsigned int) (n = gchar() - '0') > 9) {
+			if (n != ']' - '0') {
+				scanerror("expected digit or ']' after '='");
+				return;
+			}
+			fd_right = CLOSED;
+		} else {
+			while ((unsigned int) (c = gchar() - '0') <= 9)
+				n = n * 10 + c;
+			if (c != ']' - '0') {
+				scanerror("expected ']' after digit");
+				return;
+			}
+			fd_right = n;
+		}
+	}
+}
-- 
2.20.1