From 83bed650a0e475dc7861c00a019b312852cc50b7 Mon Sep 17 00:00:00 2001 From: CSRG Date: Thu, 29 Apr 1993 21:53:14 -0800 Subject: [PATCH] BSD 4_4_Lite2 development Work on file usr/src/contrib/rc-1.4/lex.c Synthesized-from: CSRG/cd3/4.4BSD-Lite2 --- usr/src/contrib/rc-1.4/lex.c | 394 +++++++++++++++++++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 usr/src/contrib/rc-1.4/lex.c diff --git a/usr/src/contrib/rc-1.4/lex.c b/usr/src/contrib/rc-1.4/lex.c new file mode 100644 index 0000000000..c283f67f99 --- /dev/null +++ b/usr/src/contrib/rc-1.4/lex.c @@ -0,0 +1,394 @@ +/* lex.c: rc's lexical analyzer */ + +#include "rc.h" +#include "y.tab.h" + +/* + Special characters (i.e., "non-word") in rc: + \t \n # ; & | ^ $ = ~ ` ' { } @ ! ( ) < > \ + + The lexical analyzer is fairly straightforward. The only really + unclean part concerns backslash continuation and "double + backslashes". A backslash followed by a newline is treated as a + space, otherwise backslash is not a special characeter (i.e., + it can be part of a word). This introduces a host of unwanted + special cases. In our case, \ cannot be a word character, since + we wish to read in all word characters in a tight loop. + + Note: to save the trouble of declaring these arrays with TRUEs + and FALSEs, I am assuming that FALSE = 0, TRUE = 1. (and so is + it declared in rc.h) +*/ + +#define BUFSIZE ((size_t) 1000) /* malloc hates power of 2 buffers? */ +#define BUFMAX (8 * BUFSIZE) /* How big the buffer can get before we re-allocate the + space at BUFSIZE again. Premature optimization? Maybe. + */ + +typedef enum wordstates { + NW, RW, KW /* "nonword", "realword", "keyword" */ +} wordstates; + +static void getpair(int); + +int lineno; + +const char nw[] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +const char dnw[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static size_t bufsize = BUFSIZE; +static char *realbuf = NULL; +static bool newline = FALSE; +static bool errset = FALSE; +static bool prerror = FALSE; +static wordstates w = NW; +static int fd_left, fd_right; + +#define checkfreecaret {if (w != NW) { w = NW; ugchar(c); return '^'; }} + +enum filedescriptors { + UNSET = -9, CLOSED = -1 +}; + +extern int yylex() { + static bool dollar = FALSE; + bool saw_meta = FALSE; + int c; + size_t i; /* The purpose of all these local assignments is to */ + const char *meta; /* allow optimizing compilers like gcc to load these */ + char *buf = realbuf; /* values into registers. On a sparc this is a */ + YYSTYPE *y = &yylval; /* win, in code size *and* execution time */ + if (errset) { + errset = FALSE; + return '\n'; + } + /* rc variable-names may contain only alnum, '*' and '_', so use dnw if we are scanning one. */ + meta = (dollar ? dnw : nw); + dollar = FALSE; + if (newline) { + --lineno; /* slight space optimization; print_prompt2() always increments lineno */ + print_prompt2(); + newline = FALSE; + } +top: while ((c = gchar()) == ' ' || c == '\t') + w = NW; + if (c == EOF) + return END; + if (!meta[(unsigned char) c]) { /* it's a word or keyword. */ + checkfreecaret; + w = RW; + i = 0; + read: do { + buf[i++] = c; + if (c == '?' || c == '[' || c == '*') + saw_meta = TRUE; + if (i >= bufsize) + buf = realbuf = erealloc(buf, bufsize *= 2); + } while ((c = gchar()) != EOF && !meta[(unsigned char) c]); + while (c == '\\') { + if ((c = gchar()) == '\n') { + print_prompt2(); + c = ' '; /* Pretend a space was read */ + break; + } else { + bs: if (meta != dnw) { /* all words but varnames may have a bslash */ + buf[i++] = '\\'; + if (i >= bufsize) + buf = realbuf = erealloc(buf, bufsize *= 2); + if (!meta[(unsigned char) c]) + goto read; + } else { + ugchar(c); + c = '\\'; + break; + } + } + } + ugchar(c); + buf[i] = '\0'; + w = KW; + if (i == 2) { + if (*buf == 'i' && buf[1] == 'f') return IF; + if (*buf == 'f' && buf[1] == 'n') return FN; + if (*buf == 'i' && buf[1] == 'n') return IN; + } + if (streq(buf, "for")) return FOR; + if (streq(buf, "else")) return ELSE; + if (streq(buf, "switch")) return SWITCH; + if (streq(buf, "while")) return WHILE; + if (streq(buf, "case")) return CASE; + w = RW; + y->word.w = ncpy(buf); + if (saw_meta) { + char *r, *s; + + y->word.m = nalloc(strlen(buf) + 1); + for (r = buf, s = y->word.m; *r != '\0'; r++, s++) + *s = (*r == '?' || *r == '[' || *r == '*'); + } else { + y->word.m = NULL; + } + return WORD; + } + if (c == '`' || c == '!' || c == '@' || c == '~' || c == '$' || c == '\'') { + checkfreecaret; + if (c == '!' || c == '@' || c == '~') + w = KW; + } + switch (c) { + case '\0': + pr_error("warning: null character ignored"); + goto top; + case '!': + return BANG; + case '@': + return SUBSHELL; + case '~': + return TWIDDLE; + case '`': + c = gchar(); + if (c == '`') + return BACKBACK; + ugchar(c); + return '`'; + case '$': + dollar = TRUE; + c = gchar(); + if (c == '#') + return COUNT; + if (c == '^') + return FLAT; + ugchar(c); + return '$'; + case '\'': + w = RW; + i = 0; + do { + buf[i++] = c; + if (c == '\n') + print_prompt2(); + if (c == EOF) { + w = NW; + scanerror("eof in quoted string"); + return HUH; + } + if (i >= bufsize) + buf = realbuf = erealloc(buf, bufsize *= 2); + } while ((c = gchar()) != '\'' || (c = gchar()) == '\''); /* quote "'" thus: 'how''s it going?' */ + ugchar(c); + buf[i] = '\0'; + y->word.w = ncpy(buf); + y->word.m = NULL; + return WORD; + case '\\': + if ((c = gchar()) == '\n') { + print_prompt2(); + goto top; /* Pretend it was just another space. */ + } + ugchar(c); + c = '\\'; + checkfreecaret; + c = gchar(); + i = 0; + goto bs; + case '(': + if (w == RW) /* SUB's happen only after real words, not keyowrds, so if () and while () work */ + c = SUB; + w = NW; + return c; + case '#': + while ((c = gchar()) != '\n') /* skip comment until newline */ + if (c == EOF) + return END; + /* FALLTHROUGH */ + case '\n': + lineno++; + newline = TRUE; + /* FALLTHROUGH */ + case ';': + case '^': + case ')': + case '=': + case '{': case '}': + w = NW; + return c; + case '&': + w = NW; + c = gchar(); + if (c == '&') + return ANDAND; + ugchar(c); + return '&'; + case '|': + w = NW; + c = gchar(); + if (c == '|') + return OROR; + getpair(c); + if (errset) + return HUH; + if ((y->pipe.left = fd_left) == UNSET) + y->pipe.left = 1; /* default to fd 1 */ + if ((y->pipe.right = fd_right) == UNSET) + y->pipe.right = 0; /* default to fd 0 */ + if (y->pipe.right == CLOSED) { + scanerror("expected digit after '='"); /* can't close a pipe */ + return HUH; + } + return PIPE; + case '>': + c = gchar(); + if (c == '>') { + c = gchar(); + y->redir.type = rAppend; + } else + y->redir.type = rCreate; + y->redir.fd = 1; + goto common; + case '<': + c = gchar(); + if (c == '<') { + c = gchar(); + if (c == '<') { + c = gchar(); + y->redir.type = rHerestring; + } else { + y->redir.type = rHeredoc; + } + } else + y->redir.type = rFrom; + y->redir.fd = 0; + common: + w = NW; + getpair(c); + if (errset) + return HUH; + if (fd_right == UNSET) { /* redirection, not dup */ + if (fd_left != UNSET) { + y->redir.fd = fd_left; + return SREDIR; + } + return (y->redir.type == rFrom || y->redir.type == rCreate) ? REDIR : SREDIR; + } else { /* dup; recast yylval */ + y->dup.type = y->redir.type; + y->dup.left = fd_left; + y->dup.right = fd_right; + return DUP; + } + default: + w = NW; + return c; /* don't know what it is, let yacc barf on it */ + } +} + +extern void yyerror(const char *s) { + char *tok; + if (prerror) { /* don't print "syntax error" if there's a more informative scanerror */ + prerror = FALSE; + return; + } + if (!interactive) { + if (w != NW) + tok = realbuf; + else if (last == EOF) + tok = "eof"; + else if (last == '\n') + tok = "end of line"; + else + tok = nprint((last < 32 || last > 126) ? "(decimal %d)" : "'%c'", last); + fprint(2, "line %d: %s near %s\n", lineno - (last == '\n'), s, tok); + } else + fprint(2, "%s\n", s); +} + +extern void scanerror(char *s) { + flushu(); /* flush upto newline */ + yyerror(s); + errset = prerror = TRUE; +} + +extern void inityy() { + newline = FALSE; + w = NW; + hq = NULL; + /* return memory to the system if the buffer got too large */ + if (bufsize > BUFMAX && realbuf != NULL) { + efree(realbuf); + bufsize = BUFSIZE; + realbuf = ealloc(bufsize); + } else if (realbuf == NULL) + realbuf = ealloc(bufsize); +} + +extern void print_prompt2() { + lineno++; + if (interactive) + fprint(2, "%s", prompt2); +} + +/* + Scan in a pair of integers for redirections like >[2=1]. CLOSED represents a closed file + descriptor (i.e., >[2=]) and UNSET represents an undesignated file descriptor (e.g., + >[2] is represented as (2,UNSET). + + This function makes use of unsigned compares to make range tests in one compare operation. +*/ + +static void getpair(int c) { + int n; + fd_left = fd_right = UNSET; + if (c != '[') { + ugchar(c); + return; + } + if ((unsigned int) (n = gchar() - '0') > 9) { + scanerror("expected digit after '['"); + return; + } + while ((unsigned int) (c = gchar() - '0') <= 9) + n = n * 10 + c; + fd_left = n; + c += '0'; + switch (c) { + default: + scanerror("expected '=' or ']' after digit"); + return; + case ']': + return; + case '=': + if ((unsigned int) (n = gchar() - '0') > 9) { + if (n != ']' - '0') { + scanerror("expected digit or ']' after '='"); + return; + } + fd_right = CLOSED; + } else { + while ((unsigned int) (c = gchar() - '0') <= 9) + n = n * 10 + c; + if (c != ']' - '0') { + scanerror("expected ']' after digit"); + return; + } + fd_right = n; + } + } +} -- 2.20.1