add pathnames.h
[unix-history] / usr / src / usr.bin / window / scanner.c
CommitLineData
60de5df9 1/*
46e9ea25
KB
2 * Copyright (c) 1983 Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms are permitted
5e8b0e60
KB
6 * provided that the above copyright notice and this paragraph are
7 * duplicated in all such forms and that any documentation,
8 * advertising materials, and other materials related to such
9 * distribution and use acknowledge that the software was developed
10 * by the University of California, Berkeley. The name of the
11 * University may not be used to endorse or promote products derived
12 * from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
60de5df9
EW
16 */
17
46e9ea25 18#ifndef lint
5e8b0e60 19static char sccsid[] = "@(#)scanner.c 3.11 (Berkeley) %G%";
46e9ea25
KB
20#endif /* not lint */
21
5b3d0ab2
EW
22#include <stdio.h>
23#include "value.h"
24#include "token.h"
25#include "context.h"
26#include "string.h"
27
28s_getc()
29{
30 register c;
31
32 switch (cx.x_type) {
33 case X_FILE:
34 c = getc(cx.x_fp);
35 if (cx.x_bol && c != EOF) {
36 cx.x_bol = 0;
37 cx.x_lineno++;
38 }
39 if (c == '\n')
40 cx.x_bol = 1;
41 return c;
42 case X_BUF:
43 if (*cx.x_bufp != 0)
44 return *cx.x_bufp++ & 0xff;
45 else
46 return EOF;
47 }
48 /*NOTREACHED*/
49}
50
51s_ungetc(c)
52{
53 if (c == EOF)
54 return EOF;
55 switch (cx.x_type) {
56 case X_FILE:
57 cx.x_bol = 0;
58 return ungetc(c, cx.x_fp);
59 case X_BUF:
60 if (cx.x_bufp > cx.x_buf)
61 return *--cx.x_bufp = c;
62 else
63 return EOF;
64 }
65 /*NOTREACHED*/
66}
67
68s_gettok()
69{
70 char buf[100];
71 register char *p = buf;
72 register c;
73 register state = 0;
5b3d0ab2
EW
74
75loop:
76 c = s_getc();
77 switch (state) {
fa064f47 78 case 0:
d70e12f1
EW
79 switch (c) {
80 case ' ':
81 case '\t':
82 break;
5b3d0ab2
EW
83 case '\n':
84 case ';':
85 cx.x_token = T_EOL;
86 state = -1;
87 break;
88 case '#':
fa064f47 89 state = 1;
5b3d0ab2
EW
90 break;
91 case EOF:
92 cx.x_token = T_EOF;
93 state = -1;
94 break;
95 case 'a': case 'b': case 'c': case 'd': case 'e':
96 case 'f': case 'g': case 'h': case 'i': case 'j':
97 case 'k': case 'l': case 'm': case 'n': case 'o':
98 case 'p': case 'q': case 'r': case 's': case 't':
99 case 'u': case 'v': case 'w': case 'x': case 'y':
100 case 'z':
101 case 'A': case 'B': case 'C': case 'D': case 'E':
102 case 'F': case 'G': case 'H': case 'I': case 'J':
103 case 'K': case 'L': case 'M': case 'N': case 'O':
104 case 'P': case 'Q': case 'R': case 'S': case 'T':
105 case 'U': case 'V': case 'W': case 'X': case 'Y':
106 case 'Z':
c1428031 107 case '_': case '.':
5b3d0ab2
EW
108 *p++ = c;
109 state = 2;
110 break;
111 case '"':
5b3d0ab2
EW
112 state = 3;
113 break;
fa064f47
EW
114 case '\'':
115 state = 4;
116 break;
5b3d0ab2 117 case '\\':
d70e12f1
EW
118 switch (c = s_gettok1()) {
119 case -1:
120 break;
121 case -2:
122 state = 0;
123 break;
124 default:
125 *p++ = c;
126 state = 2;
127 }
5b3d0ab2
EW
128 break;
129 case '0':
b58ec5e7 130 cx.x_val.v_num = 0;
5b3d0ab2
EW
131 state = 10;
132 break;
133 case '1': case '2': case '3': case '4':
134 case '5': case '6': case '7': case '8': case '9':
135 cx.x_val.v_num = c - '0';
136 state = 11;
137 break;
138 case '>':
139 state = 20;
140 break;
141 case '<':
142 state = 21;
143 break;
144 case '=':
145 state = 22;
146 break;
147 case '!':
148 state = 23;
149 break;
150 case '&':
151 state = 24;
152 break;
153 case '|':
154 state = 25;
155 break;
5119bdf8
EW
156 case '$':
157 state = 26;
158 break;
5b3d0ab2
EW
159 case '~':
160 cx.x_token = T_COMP;
161 state = -1;
162 break;
163 case '+':
164 cx.x_token = T_PLUS;
165 state = -1;
166 break;
167 case '-':
168 cx.x_token = T_MINUS;
169 state = -1;
170 break;
171 case '*':
172 cx.x_token = T_MUL;
173 state = -1;
174 break;
175 case '/':
176 cx.x_token = T_DIV;
177 state = -1;
178 break;
179 case '%':
180 cx.x_token = T_MOD;
181 state = -1;
182 break;
183 case '^':
184 cx.x_token = T_XOR;
185 state = -1;
186 break;
187 case '(':
188 cx.x_token = T_LP;
189 state = -1;
190 break;
191 case ')':
192 cx.x_token = T_RP;
193 state = -1;
194 break;
5b3d0ab2
EW
195 case ',':
196 cx.x_token = T_COMMA;
197 state = -1;
198 break;
199 case '?':
200 cx.x_token = T_QUEST;
201 state = -1;
202 break;
203 case ':':
204 cx.x_token = T_COLON;
205 state = -1;
206 break;
92acfdcb
EW
207 case '[':
208 cx.x_token = T_LB;
209 state = -1;
210 break;
211 case ']':
212 cx.x_token = T_RB;
213 state = -1;
214 break;
5b3d0ab2
EW
215 default:
216 cx.x_val.v_num = c;
217 cx.x_token = T_CHAR;
218 state = -1;
219 break;
220 }
221 break;
fa064f47
EW
222 case 1: /* got # */
223 if (c == '\n' || c == EOF) {
224 (void) s_ungetc(c);
225 state = 0;
226 }
227 break;
5b3d0ab2
EW
228 case 2: /* unquoted string */
229 switch (c) {
230 case 'a': case 'b': case 'c': case 'd': case 'e':
231 case 'f': case 'g': case 'h': case 'i': case 'j':
232 case 'k': case 'l': case 'm': case 'n': case 'o':
233 case 'p': case 'q': case 'r': case 's': case 't':
234 case 'u': case 'v': case 'w': case 'x': case 'y':
235 case 'z':
236 case 'A': case 'B': case 'C': case 'D': case 'E':
237 case 'F': case 'G': case 'H': case 'I': case 'J':
238 case 'K': case 'L': case 'M': case 'N': case 'O':
239 case 'P': case 'Q': case 'R': case 'S': case 'T':
240 case 'U': case 'V': case 'W': case 'X': case 'Y':
241 case 'Z':
c1428031 242 case '_': case '.':
5b3d0ab2
EW
243 case '0': case '1': case '2': case '3': case '4':
244 case '5': case '6': case '7': case '8': case '9':
245 if (p < buf + sizeof buf - 1)
246 *p++ = c;
247 break;
248 case '"':
5b3d0ab2
EW
249 state = 3;
250 break;
fa064f47
EW
251 case '\'':
252 state = 4;
253 break;
5b3d0ab2 254 case '\\':
d70e12f1
EW
255 switch (c = s_gettok1()) {
256 case -2:
257 (void) s_ungetc(' ');
258 case -1:
259 break;
260 default:
261 if (p < buf + sizeof buf - 1)
262 *p++ = c;
263 }
5b3d0ab2
EW
264 break;
265 default:
266 (void) s_ungetc(c);
267 case EOF:
268 *p = 0;
269 cx.x_token = T_STR;
270 switch (*buf) {
271 case 'i':
272 if (buf[1] == 'f' && buf[2] == 0)
273 cx.x_token = T_IF;
274 break;
275 case 't':
fa064f47
EW
276 if (buf[1] == 'h' && buf[2] == 'e'
277 && buf[3] == 'n' && buf[4] == 0)
5b3d0ab2
EW
278 cx.x_token = T_THEN;
279 break;
280 case 'e':
fa064f47
EW
281 if (buf[1] == 'n' && buf[2] == 'd'
282 && buf[3] == 'i' && buf[4] == 'f'
283 && buf[5] == 0)
284 cx.x_token = T_ENDIF;
285 else if (buf[1] == 'l' && buf[2] == 's')
286 if (buf[3] == 'i' && buf[4] == 'f'
287 && buf[5] == 0)
5b3d0ab2 288 cx.x_token = T_ELSIF;
fa064f47
EW
289 else if (buf[3] == 'e' && buf[4] == 0)
290 cx.x_token = T_ELSE;
5b3d0ab2
EW
291 break;
292 }
fa064f47
EW
293 if (cx.x_token == T_STR
294 && (cx.x_val.v_str = str_cpy(buf)) == 0) {
295 p_memerror();
296 cx.x_token = T_EOF;
297 }
5b3d0ab2
EW
298 state = -1;
299 break;
300 }
301 break;
fa064f47 302 case 3: /* " quoted string */
5b3d0ab2
EW
303 switch (c) {
304 case '\n':
305 (void) s_ungetc(c);
306 case EOF:
fa064f47 307 case '"':
5b3d0ab2
EW
308 state = 2;
309 break;
310 case '\\':
d70e12f1
EW
311 switch (c = s_gettok1()) {
312 case -1:
313 case -2: /* newlines are invisible */
314 break;
315 default:
316 if (p < buf + sizeof buf - 1)
317 *p++ = c;
318 }
5b3d0ab2
EW
319 break;
320 default:
fa064f47 321 if (p < buf + sizeof buf - 1)
5b3d0ab2
EW
322 *p++ = c;
323 break;
324 }
325 break;
fa064f47
EW
326 case 4: /* ' quoted string */
327 switch (c) {
328 case '\n':
5b3d0ab2 329 (void) s_ungetc(c);
fa064f47
EW
330 case EOF:
331 case '\'':
332 state = 2;
333 break;
334 case '\\':
335 switch (c = s_gettok1()) {
336 case -1:
337 case -2: /* newlines are invisible */
338 break;
339 default:
340 if (p < buf + sizeof buf - 1)
341 *p++ = c;
342 }
343 break;
344 default:
345 if (p < buf + sizeof buf - 1)
346 *p++ = c;
347 break;
5b3d0ab2
EW
348 }
349 break;
350 case 10: /* got 0 */
351 switch (c) {
352 case 'x':
353 case 'X':
354 cx.x_val.v_num = 0;
355 state = 12;
356 break;
357 case '0': case '1': case '2': case '3': case '4':
358 case '5': case '6': case '7':
359 cx.x_val.v_num = c - '0';
360 state = 13;
361 break;
362 case '8': case '9':
363 cx.x_val.v_num = c - '0';
364 state = 11;
365 break;
366 default:
367 (void) s_ungetc(c);
368 state = -1;
369 cx.x_token = T_NUM;
370 }
371 break;
372 case 11: /* decimal number */
373 switch (c) {
374 case '0': case '1': case '2': case '3': case '4':
375 case '5': case '6': case '7': case '8': case '9':
376 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
377 break;
378 default:
379 (void) s_ungetc(c);
380 state = -1;
381 cx.x_token = T_NUM;
382 }
383 break;
384 case 12: /* hex number */
385 switch (c) {
386 case '0': case '1': case '2': case '3': case '4':
387 case '5': case '6': case '7': case '8': case '9':
388 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
389 break;
390 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
391 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
392 break;
393 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
394 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
395 break;
396 default:
397 (void) s_ungetc(c);
398 state = -1;
399 cx.x_token = T_NUM;
400 }
401 break;
402 case 13: /* octal number */
403 switch (c) {
404 case '0': case '1': case '2': case '3': case '4':
405 case '5': case '6': case '7':
406 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
407 break;
408 default:
409 (void) s_ungetc(c);
410 state = -1;
411 cx.x_token = T_NUM;
412 }
413 break;
414 case 20: /* got > */
415 switch (c) {
416 case '=':
417 cx.x_token = T_GE;
418 state = -1;
419 break;
420 case '>':
421 cx.x_token = T_RS;
422 state = -1;
423 break;
424 default:
425 (void) s_ungetc(c);
426 cx.x_token = T_GT;
427 state = -1;
428 }
429 break;
430 case 21: /* got < */
431 switch (c) {
432 case '=':
433 cx.x_token = T_LE;
434 state = -1;
435 break;
436 case '<':
437 cx.x_token = T_LS;
438 state = -1;
439 break;
440 default:
441 (void) s_ungetc(c);
442 cx.x_token = T_LT;
443 state = -1;
444 }
445 break;
446 case 22: /* got = */
447 switch (c) {
448 case '=':
449 cx.x_token = T_EQ;
450 state = -1;
451 break;
452 default:
453 (void) s_ungetc(c);
454 cx.x_token = T_ASSIGN;
455 state = -1;
456 }
457 break;
458 case 23: /* got ! */
459 switch (c) {
460 case '=':
461 cx.x_token = T_NE;
462 state = -1;
463 break;
464 default:
465 (void) s_ungetc(c);
466 cx.x_token = T_NOT;
467 state = -1;
468 }
469 break;
5119bdf8 470 case 24: /* got & */
5b3d0ab2
EW
471 switch (c) {
472 case '&':
473 cx.x_token = T_ANDAND;
474 state = -1;
475 break;
476 default:
477 (void) s_ungetc(c);
478 cx.x_token = T_AND;
479 state = -1;
480 }
481 break;
5119bdf8 482 case 25: /* got | */
5b3d0ab2
EW
483 switch (c) {
484 case '|':
485 cx.x_token = T_OROR;
486 state = -1;
487 break;
488 default:
489 (void) s_ungetc(c);
490 cx.x_token = T_OR;
491 state = -1;
492 }
493 break;
5119bdf8
EW
494 case 26: /* got $ */
495 switch (c) {
496 case '?':
497 cx.x_token = T_DQ;
498 state = -1;
499 break;
500 default:
501 (void) s_ungetc(c);
502 cx.x_token = T_DOLLAR;
503 state = -1;
504 }
505 break;
5b3d0ab2
EW
506 default:
507 abort();
508 }
509 if (state >= 0)
510 goto loop;
511 return cx.x_token;
512}
d70e12f1
EW
513
514s_gettok1()
515{
516 register c;
517 register n;
518
519 c = s_getc(); /* got \ */
520 switch (c) {
521 case EOF:
522 return -1;
523 case '\n':
524 return -2;
525 case 'b':
526 return '\b';
527 case 'f':
528 return '\f';
529 case 'n':
530 return '\n';
531 case 'r':
532 return '\r';
533 case 't':
534 return '\t';
535 default:
536 return c;
537 case '0': case '1': case '2': case '3': case '4':
538 case '5': case '6': case '7':
539 break;
540 }
541 n = c - '0';
542 c = s_getc(); /* got \[0-7] */
543 if (c < '0' || c > '7') {
544 (void) s_ungetc(c);
545 return n;
546 }
547 n = n * 8 + c - '0';
548 c = s_getc(); /* got \[0-7][0-7] */
549 if (c < '0' || c > '7') {
550 (void) s_ungetc(c);
551 return n;
552 }
553 return n * 8 + c - '0';
554}