BSD 4_4_Lite1 development
[unix-history] / usr / src / contrib / rc-1.4 / lex.c
CommitLineData
95fa4dd9
C
1/* lex.c: rc's lexical analyzer */
2
3#include "rc.h"
4#include "y.tab.h"
5
6/*
7 Special characters (i.e., "non-word") in rc:
8 \t \n # ; & | ^ $ = ~ ` ' { } @ ! ( ) < > \
9
10 The lexical analyzer is fairly straightforward. The only really
11 unclean part concerns backslash continuation and "double
12 backslashes". A backslash followed by a newline is treated as a
13 space, otherwise backslash is not a special characeter (i.e.,
14 it can be part of a word). This introduces a host of unwanted
15 special cases. In our case, \ cannot be a word character, since
16 we wish to read in all word characters in a tight loop.
17
18 Note: to save the trouble of declaring these arrays with TRUEs
19 and FALSEs, I am assuming that FALSE = 0, TRUE = 1. (and so is
20 it declared in rc.h)
21*/
22
23#define BUFSIZE ((size_t) 1000) /* malloc hates power of 2 buffers? */
24#define BUFMAX (8 * BUFSIZE) /* How big the buffer can get before we re-allocate the
25 space at BUFSIZE again. Premature optimization? Maybe.
26 */
27
28typedef enum wordstates {
29 NW, RW, KW /* "nonword", "realword", "keyword" */
30} wordstates;
31
32static void getpair(int);
33
34int lineno;
35
36const char nw[] = {
37 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
39 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
40 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
45};
46
47const char dnw[] = {
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
50 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
51 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
56};
57
58static size_t bufsize = BUFSIZE;
59static char *realbuf = NULL;
60static bool newline = FALSE;
61static bool errset = FALSE;
62static bool prerror = FALSE;
63static wordstates w = NW;
64static int fd_left, fd_right;
65
66#define checkfreecaret {if (w != NW) { w = NW; ugchar(c); return '^'; }}
67
68enum filedescriptors {
69 UNSET = -9, CLOSED = -1
70};
71
72extern int yylex() {
73 static bool dollar = FALSE;
74 bool saw_meta = FALSE;
75 int c;
76 size_t i; /* The purpose of all these local assignments is to */
77 const char *meta; /* allow optimizing compilers like gcc to load these */
78 char *buf = realbuf; /* values into registers. On a sparc this is a */
79 YYSTYPE *y = &yylval; /* win, in code size *and* execution time */
80 if (errset) {
81 errset = FALSE;
82 return '\n';
83 }
84 /* rc variable-names may contain only alnum, '*' and '_', so use dnw if we are scanning one. */
85 meta = (dollar ? dnw : nw);
86 dollar = FALSE;
87 if (newline) {
88 --lineno; /* slight space optimization; print_prompt2() always increments lineno */
89 print_prompt2();
90 newline = FALSE;
91 }
92top: while ((c = gchar()) == ' ' || c == '\t')
93 w = NW;
94 if (c == EOF)
95 return END;
96 if (!meta[(unsigned char) c]) { /* it's a word or keyword. */
97 checkfreecaret;
98 w = RW;
99 i = 0;
100 read: do {
101 buf[i++] = c;
102 if (c == '?' || c == '[' || c == '*')
103 saw_meta = TRUE;
104 if (i >= bufsize)
105 buf = realbuf = erealloc(buf, bufsize *= 2);
106 } while ((c = gchar()) != EOF && !meta[(unsigned char) c]);
107 while (c == '\\') {
108 if ((c = gchar()) == '\n') {
109 print_prompt2();
110 c = ' '; /* Pretend a space was read */
111 break;
112 } else {
113 bs: if (meta != dnw) { /* all words but varnames may have a bslash */
114 buf[i++] = '\\';
115 if (i >= bufsize)
116 buf = realbuf = erealloc(buf, bufsize *= 2);
117 if (!meta[(unsigned char) c])
118 goto read;
119 } else {
120 ugchar(c);
121 c = '\\';
122 break;
123 }
124 }
125 }
126 ugchar(c);
127 buf[i] = '\0';
128 w = KW;
129 if (i == 2) {
130 if (*buf == 'i' && buf[1] == 'f') return IF;
131 if (*buf == 'f' && buf[1] == 'n') return FN;
132 if (*buf == 'i' && buf[1] == 'n') return IN;
133 }
134 if (streq(buf, "for")) return FOR;
135 if (streq(buf, "else")) return ELSE;
136 if (streq(buf, "switch")) return SWITCH;
137 if (streq(buf, "while")) return WHILE;
138 if (streq(buf, "case")) return CASE;
139 w = RW;
140 y->word.w = ncpy(buf);
141 if (saw_meta) {
142 char *r, *s;
143
144 y->word.m = nalloc(strlen(buf) + 1);
145 for (r = buf, s = y->word.m; *r != '\0'; r++, s++)
146 *s = (*r == '?' || *r == '[' || *r == '*');
147 } else {
148 y->word.m = NULL;
149 }
150 return WORD;
151 }
152 if (c == '`' || c == '!' || c == '@' || c == '~' || c == '$' || c == '\'') {
153 checkfreecaret;
154 if (c == '!' || c == '@' || c == '~')
155 w = KW;
156 }
157 switch (c) {
158 case '\0':
159 pr_error("warning: null character ignored");
160 goto top;
161 case '!':
162 return BANG;
163 case '@':
164 return SUBSHELL;
165 case '~':
166 return TWIDDLE;
167 case '`':
168 c = gchar();
169 if (c == '`')
170 return BACKBACK;
171 ugchar(c);
172 return '`';
173 case '$':
174 dollar = TRUE;
175 c = gchar();
176 if (c == '#')
177 return COUNT;
178 if (c == '^')
179 return FLAT;
180 ugchar(c);
181 return '$';
182 case '\'':
183 w = RW;
184 i = 0;
185 do {
186 buf[i++] = c;
187 if (c == '\n')
188 print_prompt2();
189 if (c == EOF) {
190 w = NW;
191 scanerror("eof in quoted string");
192 return HUH;
193 }
194 if (i >= bufsize)
195 buf = realbuf = erealloc(buf, bufsize *= 2);
196 } while ((c = gchar()) != '\'' || (c = gchar()) == '\''); /* quote "'" thus: 'how''s it going?' */
197 ugchar(c);
198 buf[i] = '\0';
199 y->word.w = ncpy(buf);
200 y->word.m = NULL;
201 return WORD;
202 case '\\':
203 if ((c = gchar()) == '\n') {
204 print_prompt2();
205 goto top; /* Pretend it was just another space. */
206 }
207 ugchar(c);
208 c = '\\';
209 checkfreecaret;
210 c = gchar();
211 i = 0;
212 goto bs;
213 case '(':
214 if (w == RW) /* SUB's happen only after real words, not keyowrds, so if () and while () work */
215 c = SUB;
216 w = NW;
217 return c;
218 case '#':
219 while ((c = gchar()) != '\n') /* skip comment until newline */
220 if (c == EOF)
221 return END;
222 /* FALLTHROUGH */
223 case '\n':
224 lineno++;
225 newline = TRUE;
226 /* FALLTHROUGH */
227 case ';':
228 case '^':
229 case ')':
230 case '=':
231 case '{': case '}':
232 w = NW;
233 return c;
234 case '&':
235 w = NW;
236 c = gchar();
237 if (c == '&')
238 return ANDAND;
239 ugchar(c);
240 return '&';
241 case '|':
242 w = NW;
243 c = gchar();
244 if (c == '|')
245 return OROR;
246 getpair(c);
247 if (errset)
248 return HUH;
249 if ((y->pipe.left = fd_left) == UNSET)
250 y->pipe.left = 1; /* default to fd 1 */
251 if ((y->pipe.right = fd_right) == UNSET)
252 y->pipe.right = 0; /* default to fd 0 */
253 if (y->pipe.right == CLOSED) {
254 scanerror("expected digit after '='"); /* can't close a pipe */
255 return HUH;
256 }
257 return PIPE;
258 case '>':
259 c = gchar();
260 if (c == '>') {
261 c = gchar();
262 y->redir.type = rAppend;
263 } else
264 y->redir.type = rCreate;
265 y->redir.fd = 1;
266 goto common;
267 case '<':
268 c = gchar();
269 if (c == '<') {
270 c = gchar();
271 if (c == '<') {
272 c = gchar();
273 y->redir.type = rHerestring;
274 } else {
275 y->redir.type = rHeredoc;
276 }
277 } else
278 y->redir.type = rFrom;
279 y->redir.fd = 0;
280 common:
281 w = NW;
282 getpair(c);
283 if (errset)
284 return HUH;
285 if (fd_right == UNSET) { /* redirection, not dup */
286 if (fd_left != UNSET) {
287 y->redir.fd = fd_left;
288 return SREDIR;
289 }
290 return (y->redir.type == rFrom || y->redir.type == rCreate) ? REDIR : SREDIR;
291 } else { /* dup; recast yylval */
292 y->dup.type = y->redir.type;
293 y->dup.left = fd_left;
294 y->dup.right = fd_right;
295 return DUP;
296 }
297 default:
298 w = NW;
299 return c; /* don't know what it is, let yacc barf on it */
300 }
301}
302
303extern void yyerror(const char *s) {
304 char *tok;
305 if (prerror) { /* don't print "syntax error" if there's a more informative scanerror */
306 prerror = FALSE;
307 return;
308 }
309 if (!interactive) {
310 if (w != NW)
311 tok = realbuf;
312 else if (last == EOF)
313 tok = "eof";
314 else if (last == '\n')
315 tok = "end of line";
316 else
317 tok = nprint((last < 32 || last > 126) ? "(decimal %d)" : "'%c'", last);
318 fprint(2, "line %d: %s near %s\n", lineno - (last == '\n'), s, tok);
319 } else
320 fprint(2, "%s\n", s);
321}
322
323extern void scanerror(char *s) {
324 flushu(); /* flush upto newline */
325 yyerror(s);
326 errset = prerror = TRUE;
327}
328
329extern void inityy() {
330 newline = FALSE;
331 w = NW;
332 hq = NULL;
333 /* return memory to the system if the buffer got too large */
334 if (bufsize > BUFMAX && realbuf != NULL) {
335 efree(realbuf);
336 bufsize = BUFSIZE;
337 realbuf = ealloc(bufsize);
338 } else if (realbuf == NULL)
339 realbuf = ealloc(bufsize);
340}
341
342extern void print_prompt2() {
343 lineno++;
344 if (interactive)
345 fprint(2, "%s", prompt2);
346}
347
348/*
349 Scan in a pair of integers for redirections like >[2=1]. CLOSED represents a closed file
350 descriptor (i.e., >[2=]) and UNSET represents an undesignated file descriptor (e.g.,
351 >[2] is represented as (2,UNSET).
352
353 This function makes use of unsigned compares to make range tests in one compare operation.
354*/
355
356static void getpair(int c) {
357 int n;
358 fd_left = fd_right = UNSET;
359 if (c != '[') {
360 ugchar(c);
361 return;
362 }
363 if ((unsigned int) (n = gchar() - '0') > 9) {
364 scanerror("expected digit after '['");
365 return;
366 }
367 while ((unsigned int) (c = gchar() - '0') <= 9)
368 n = n * 10 + c;
369 fd_left = n;
370 c += '0';
371 switch (c) {
372 default:
373 scanerror("expected '=' or ']' after digit");
374 return;
375 case ']':
376 return;
377 case '=':
378 if ((unsigned int) (n = gchar() - '0') > 9) {
379 if (n != ']' - '0') {
380 scanerror("expected digit or ']' after '='");
381 return;
382 }
383 fd_right = CLOSED;
384 } else {
385 while ((unsigned int) (c = gchar() - '0') <= 9)
386 n = n * 10 + c;
387 if (c != ']' - '0') {
388 scanerror("expected ']' after digit");
389 return;
390 }
391 fd_right = n;
392 }
393 }
394}