Commit | Line | Data |
---|---|---|
60de5df9 | 1 | /* |
46e9ea25 KB |
2 | * Copyright (c) 1983 Regents of the University of California. |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms are permitted | |
5e8b0e60 KB |
6 | * provided that the above copyright notice and this paragraph are |
7 | * duplicated in all such forms and that any documentation, | |
8 | * advertising materials, and other materials related to such | |
9 | * distribution and use acknowledge that the software was developed | |
10 | * by the University of California, Berkeley. The name of the | |
11 | * University may not be used to endorse or promote products derived | |
12 | * from this software without specific prior written permission. | |
13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |
14 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | |
15 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
60de5df9 EW |
16 | */ |
17 | ||
46e9ea25 | 18 | #ifndef lint |
5e8b0e60 | 19 | static char sccsid[] = "@(#)scanner.c 3.11 (Berkeley) %G%"; |
46e9ea25 KB |
20 | #endif /* not lint */ |
21 | ||
5b3d0ab2 EW |
22 | #include <stdio.h> |
23 | #include "value.h" | |
24 | #include "token.h" | |
25 | #include "context.h" | |
26 | #include "string.h" | |
27 | ||
28 | s_getc() | |
29 | { | |
30 | register c; | |
31 | ||
32 | switch (cx.x_type) { | |
33 | case X_FILE: | |
34 | c = getc(cx.x_fp); | |
35 | if (cx.x_bol && c != EOF) { | |
36 | cx.x_bol = 0; | |
37 | cx.x_lineno++; | |
38 | } | |
39 | if (c == '\n') | |
40 | cx.x_bol = 1; | |
41 | return c; | |
42 | case X_BUF: | |
43 | if (*cx.x_bufp != 0) | |
44 | return *cx.x_bufp++ & 0xff; | |
45 | else | |
46 | return EOF; | |
47 | } | |
48 | /*NOTREACHED*/ | |
49 | } | |
50 | ||
51 | s_ungetc(c) | |
52 | { | |
53 | if (c == EOF) | |
54 | return EOF; | |
55 | switch (cx.x_type) { | |
56 | case X_FILE: | |
57 | cx.x_bol = 0; | |
58 | return ungetc(c, cx.x_fp); | |
59 | case X_BUF: | |
60 | if (cx.x_bufp > cx.x_buf) | |
61 | return *--cx.x_bufp = c; | |
62 | else | |
63 | return EOF; | |
64 | } | |
65 | /*NOTREACHED*/ | |
66 | } | |
67 | ||
68 | s_gettok() | |
69 | { | |
70 | char buf[100]; | |
71 | register char *p = buf; | |
72 | register c; | |
73 | register state = 0; | |
5b3d0ab2 EW |
74 | |
75 | loop: | |
76 | c = s_getc(); | |
77 | switch (state) { | |
fa064f47 | 78 | case 0: |
d70e12f1 EW |
79 | switch (c) { |
80 | case ' ': | |
81 | case '\t': | |
82 | break; | |
5b3d0ab2 EW |
83 | case '\n': |
84 | case ';': | |
85 | cx.x_token = T_EOL; | |
86 | state = -1; | |
87 | break; | |
88 | case '#': | |
fa064f47 | 89 | state = 1; |
5b3d0ab2 EW |
90 | break; |
91 | case EOF: | |
92 | cx.x_token = T_EOF; | |
93 | state = -1; | |
94 | break; | |
95 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
96 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
97 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
98 | case 'p': case 'q': case 'r': case 's': case 't': | |
99 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
100 | case 'z': | |
101 | case 'A': case 'B': case 'C': case 'D': case 'E': | |
102 | case 'F': case 'G': case 'H': case 'I': case 'J': | |
103 | case 'K': case 'L': case 'M': case 'N': case 'O': | |
104 | case 'P': case 'Q': case 'R': case 'S': case 'T': | |
105 | case 'U': case 'V': case 'W': case 'X': case 'Y': | |
106 | case 'Z': | |
c1428031 | 107 | case '_': case '.': |
5b3d0ab2 EW |
108 | *p++ = c; |
109 | state = 2; | |
110 | break; | |
111 | case '"': | |
5b3d0ab2 EW |
112 | state = 3; |
113 | break; | |
fa064f47 EW |
114 | case '\'': |
115 | state = 4; | |
116 | break; | |
5b3d0ab2 | 117 | case '\\': |
d70e12f1 EW |
118 | switch (c = s_gettok1()) { |
119 | case -1: | |
120 | break; | |
121 | case -2: | |
122 | state = 0; | |
123 | break; | |
124 | default: | |
125 | *p++ = c; | |
126 | state = 2; | |
127 | } | |
5b3d0ab2 EW |
128 | break; |
129 | case '0': | |
b58ec5e7 | 130 | cx.x_val.v_num = 0; |
5b3d0ab2 EW |
131 | state = 10; |
132 | break; | |
133 | case '1': case '2': case '3': case '4': | |
134 | case '5': case '6': case '7': case '8': case '9': | |
135 | cx.x_val.v_num = c - '0'; | |
136 | state = 11; | |
137 | break; | |
138 | case '>': | |
139 | state = 20; | |
140 | break; | |
141 | case '<': | |
142 | state = 21; | |
143 | break; | |
144 | case '=': | |
145 | state = 22; | |
146 | break; | |
147 | case '!': | |
148 | state = 23; | |
149 | break; | |
150 | case '&': | |
151 | state = 24; | |
152 | break; | |
153 | case '|': | |
154 | state = 25; | |
155 | break; | |
5119bdf8 EW |
156 | case '$': |
157 | state = 26; | |
158 | break; | |
5b3d0ab2 EW |
159 | case '~': |
160 | cx.x_token = T_COMP; | |
161 | state = -1; | |
162 | break; | |
163 | case '+': | |
164 | cx.x_token = T_PLUS; | |
165 | state = -1; | |
166 | break; | |
167 | case '-': | |
168 | cx.x_token = T_MINUS; | |
169 | state = -1; | |
170 | break; | |
171 | case '*': | |
172 | cx.x_token = T_MUL; | |
173 | state = -1; | |
174 | break; | |
175 | case '/': | |
176 | cx.x_token = T_DIV; | |
177 | state = -1; | |
178 | break; | |
179 | case '%': | |
180 | cx.x_token = T_MOD; | |
181 | state = -1; | |
182 | break; | |
183 | case '^': | |
184 | cx.x_token = T_XOR; | |
185 | state = -1; | |
186 | break; | |
187 | case '(': | |
188 | cx.x_token = T_LP; | |
189 | state = -1; | |
190 | break; | |
191 | case ')': | |
192 | cx.x_token = T_RP; | |
193 | state = -1; | |
194 | break; | |
5b3d0ab2 EW |
195 | case ',': |
196 | cx.x_token = T_COMMA; | |
197 | state = -1; | |
198 | break; | |
199 | case '?': | |
200 | cx.x_token = T_QUEST; | |
201 | state = -1; | |
202 | break; | |
203 | case ':': | |
204 | cx.x_token = T_COLON; | |
205 | state = -1; | |
206 | break; | |
92acfdcb EW |
207 | case '[': |
208 | cx.x_token = T_LB; | |
209 | state = -1; | |
210 | break; | |
211 | case ']': | |
212 | cx.x_token = T_RB; | |
213 | state = -1; | |
214 | break; | |
5b3d0ab2 EW |
215 | default: |
216 | cx.x_val.v_num = c; | |
217 | cx.x_token = T_CHAR; | |
218 | state = -1; | |
219 | break; | |
220 | } | |
221 | break; | |
fa064f47 EW |
222 | case 1: /* got # */ |
223 | if (c == '\n' || c == EOF) { | |
224 | (void) s_ungetc(c); | |
225 | state = 0; | |
226 | } | |
227 | break; | |
5b3d0ab2 EW |
228 | case 2: /* unquoted string */ |
229 | switch (c) { | |
230 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
231 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
232 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
233 | case 'p': case 'q': case 'r': case 's': case 't': | |
234 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
235 | case 'z': | |
236 | case 'A': case 'B': case 'C': case 'D': case 'E': | |
237 | case 'F': case 'G': case 'H': case 'I': case 'J': | |
238 | case 'K': case 'L': case 'M': case 'N': case 'O': | |
239 | case 'P': case 'Q': case 'R': case 'S': case 'T': | |
240 | case 'U': case 'V': case 'W': case 'X': case 'Y': | |
241 | case 'Z': | |
c1428031 | 242 | case '_': case '.': |
5b3d0ab2 EW |
243 | case '0': case '1': case '2': case '3': case '4': |
244 | case '5': case '6': case '7': case '8': case '9': | |
245 | if (p < buf + sizeof buf - 1) | |
246 | *p++ = c; | |
247 | break; | |
248 | case '"': | |
5b3d0ab2 EW |
249 | state = 3; |
250 | break; | |
fa064f47 EW |
251 | case '\'': |
252 | state = 4; | |
253 | break; | |
5b3d0ab2 | 254 | case '\\': |
d70e12f1 EW |
255 | switch (c = s_gettok1()) { |
256 | case -2: | |
257 | (void) s_ungetc(' '); | |
258 | case -1: | |
259 | break; | |
260 | default: | |
261 | if (p < buf + sizeof buf - 1) | |
262 | *p++ = c; | |
263 | } | |
5b3d0ab2 EW |
264 | break; |
265 | default: | |
266 | (void) s_ungetc(c); | |
267 | case EOF: | |
268 | *p = 0; | |
269 | cx.x_token = T_STR; | |
270 | switch (*buf) { | |
271 | case 'i': | |
272 | if (buf[1] == 'f' && buf[2] == 0) | |
273 | cx.x_token = T_IF; | |
274 | break; | |
275 | case 't': | |
fa064f47 EW |
276 | if (buf[1] == 'h' && buf[2] == 'e' |
277 | && buf[3] == 'n' && buf[4] == 0) | |
5b3d0ab2 EW |
278 | cx.x_token = T_THEN; |
279 | break; | |
280 | case 'e': | |
fa064f47 EW |
281 | if (buf[1] == 'n' && buf[2] == 'd' |
282 | && buf[3] == 'i' && buf[4] == 'f' | |
283 | && buf[5] == 0) | |
284 | cx.x_token = T_ENDIF; | |
285 | else if (buf[1] == 'l' && buf[2] == 's') | |
286 | if (buf[3] == 'i' && buf[4] == 'f' | |
287 | && buf[5] == 0) | |
5b3d0ab2 | 288 | cx.x_token = T_ELSIF; |
fa064f47 EW |
289 | else if (buf[3] == 'e' && buf[4] == 0) |
290 | cx.x_token = T_ELSE; | |
5b3d0ab2 EW |
291 | break; |
292 | } | |
fa064f47 EW |
293 | if (cx.x_token == T_STR |
294 | && (cx.x_val.v_str = str_cpy(buf)) == 0) { | |
295 | p_memerror(); | |
296 | cx.x_token = T_EOF; | |
297 | } | |
5b3d0ab2 EW |
298 | state = -1; |
299 | break; | |
300 | } | |
301 | break; | |
fa064f47 | 302 | case 3: /* " quoted string */ |
5b3d0ab2 EW |
303 | switch (c) { |
304 | case '\n': | |
305 | (void) s_ungetc(c); | |
306 | case EOF: | |
fa064f47 | 307 | case '"': |
5b3d0ab2 EW |
308 | state = 2; |
309 | break; | |
310 | case '\\': | |
d70e12f1 EW |
311 | switch (c = s_gettok1()) { |
312 | case -1: | |
313 | case -2: /* newlines are invisible */ | |
314 | break; | |
315 | default: | |
316 | if (p < buf + sizeof buf - 1) | |
317 | *p++ = c; | |
318 | } | |
5b3d0ab2 EW |
319 | break; |
320 | default: | |
fa064f47 | 321 | if (p < buf + sizeof buf - 1) |
5b3d0ab2 EW |
322 | *p++ = c; |
323 | break; | |
324 | } | |
325 | break; | |
fa064f47 EW |
326 | case 4: /* ' quoted string */ |
327 | switch (c) { | |
328 | case '\n': | |
5b3d0ab2 | 329 | (void) s_ungetc(c); |
fa064f47 EW |
330 | case EOF: |
331 | case '\'': | |
332 | state = 2; | |
333 | break; | |
334 | case '\\': | |
335 | switch (c = s_gettok1()) { | |
336 | case -1: | |
337 | case -2: /* newlines are invisible */ | |
338 | break; | |
339 | default: | |
340 | if (p < buf + sizeof buf - 1) | |
341 | *p++ = c; | |
342 | } | |
343 | break; | |
344 | default: | |
345 | if (p < buf + sizeof buf - 1) | |
346 | *p++ = c; | |
347 | break; | |
5b3d0ab2 EW |
348 | } |
349 | break; | |
350 | case 10: /* got 0 */ | |
351 | switch (c) { | |
352 | case 'x': | |
353 | case 'X': | |
354 | cx.x_val.v_num = 0; | |
355 | state = 12; | |
356 | break; | |
357 | case '0': case '1': case '2': case '3': case '4': | |
358 | case '5': case '6': case '7': | |
359 | cx.x_val.v_num = c - '0'; | |
360 | state = 13; | |
361 | break; | |
362 | case '8': case '9': | |
363 | cx.x_val.v_num = c - '0'; | |
364 | state = 11; | |
365 | break; | |
366 | default: | |
367 | (void) s_ungetc(c); | |
368 | state = -1; | |
369 | cx.x_token = T_NUM; | |
370 | } | |
371 | break; | |
372 | case 11: /* decimal number */ | |
373 | switch (c) { | |
374 | case '0': case '1': case '2': case '3': case '4': | |
375 | case '5': case '6': case '7': case '8': case '9': | |
376 | cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; | |
377 | break; | |
378 | default: | |
379 | (void) s_ungetc(c); | |
380 | state = -1; | |
381 | cx.x_token = T_NUM; | |
382 | } | |
383 | break; | |
384 | case 12: /* hex number */ | |
385 | switch (c) { | |
386 | case '0': case '1': case '2': case '3': case '4': | |
387 | case '5': case '6': case '7': case '8': case '9': | |
388 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; | |
389 | break; | |
390 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
391 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; | |
392 | break; | |
393 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
394 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; | |
395 | break; | |
396 | default: | |
397 | (void) s_ungetc(c); | |
398 | state = -1; | |
399 | cx.x_token = T_NUM; | |
400 | } | |
401 | break; | |
402 | case 13: /* octal number */ | |
403 | switch (c) { | |
404 | case '0': case '1': case '2': case '3': case '4': | |
405 | case '5': case '6': case '7': | |
406 | cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; | |
407 | break; | |
408 | default: | |
409 | (void) s_ungetc(c); | |
410 | state = -1; | |
411 | cx.x_token = T_NUM; | |
412 | } | |
413 | break; | |
414 | case 20: /* got > */ | |
415 | switch (c) { | |
416 | case '=': | |
417 | cx.x_token = T_GE; | |
418 | state = -1; | |
419 | break; | |
420 | case '>': | |
421 | cx.x_token = T_RS; | |
422 | state = -1; | |
423 | break; | |
424 | default: | |
425 | (void) s_ungetc(c); | |
426 | cx.x_token = T_GT; | |
427 | state = -1; | |
428 | } | |
429 | break; | |
430 | case 21: /* got < */ | |
431 | switch (c) { | |
432 | case '=': | |
433 | cx.x_token = T_LE; | |
434 | state = -1; | |
435 | break; | |
436 | case '<': | |
437 | cx.x_token = T_LS; | |
438 | state = -1; | |
439 | break; | |
440 | default: | |
441 | (void) s_ungetc(c); | |
442 | cx.x_token = T_LT; | |
443 | state = -1; | |
444 | } | |
445 | break; | |
446 | case 22: /* got = */ | |
447 | switch (c) { | |
448 | case '=': | |
449 | cx.x_token = T_EQ; | |
450 | state = -1; | |
451 | break; | |
452 | default: | |
453 | (void) s_ungetc(c); | |
454 | cx.x_token = T_ASSIGN; | |
455 | state = -1; | |
456 | } | |
457 | break; | |
458 | case 23: /* got ! */ | |
459 | switch (c) { | |
460 | case '=': | |
461 | cx.x_token = T_NE; | |
462 | state = -1; | |
463 | break; | |
464 | default: | |
465 | (void) s_ungetc(c); | |
466 | cx.x_token = T_NOT; | |
467 | state = -1; | |
468 | } | |
469 | break; | |
5119bdf8 | 470 | case 24: /* got & */ |
5b3d0ab2 EW |
471 | switch (c) { |
472 | case '&': | |
473 | cx.x_token = T_ANDAND; | |
474 | state = -1; | |
475 | break; | |
476 | default: | |
477 | (void) s_ungetc(c); | |
478 | cx.x_token = T_AND; | |
479 | state = -1; | |
480 | } | |
481 | break; | |
5119bdf8 | 482 | case 25: /* got | */ |
5b3d0ab2 EW |
483 | switch (c) { |
484 | case '|': | |
485 | cx.x_token = T_OROR; | |
486 | state = -1; | |
487 | break; | |
488 | default: | |
489 | (void) s_ungetc(c); | |
490 | cx.x_token = T_OR; | |
491 | state = -1; | |
492 | } | |
493 | break; | |
5119bdf8 EW |
494 | case 26: /* got $ */ |
495 | switch (c) { | |
496 | case '?': | |
497 | cx.x_token = T_DQ; | |
498 | state = -1; | |
499 | break; | |
500 | default: | |
501 | (void) s_ungetc(c); | |
502 | cx.x_token = T_DOLLAR; | |
503 | state = -1; | |
504 | } | |
505 | break; | |
5b3d0ab2 EW |
506 | default: |
507 | abort(); | |
508 | } | |
509 | if (state >= 0) | |
510 | goto loop; | |
511 | return cx.x_token; | |
512 | } | |
d70e12f1 EW |
513 | |
514 | s_gettok1() | |
515 | { | |
516 | register c; | |
517 | register n; | |
518 | ||
519 | c = s_getc(); /* got \ */ | |
520 | switch (c) { | |
521 | case EOF: | |
522 | return -1; | |
523 | case '\n': | |
524 | return -2; | |
525 | case 'b': | |
526 | return '\b'; | |
527 | case 'f': | |
528 | return '\f'; | |
529 | case 'n': | |
530 | return '\n'; | |
531 | case 'r': | |
532 | return '\r'; | |
533 | case 't': | |
534 | return '\t'; | |
535 | default: | |
536 | return c; | |
537 | case '0': case '1': case '2': case '3': case '4': | |
538 | case '5': case '6': case '7': | |
539 | break; | |
540 | } | |
541 | n = c - '0'; | |
542 | c = s_getc(); /* got \[0-7] */ | |
543 | if (c < '0' || c > '7') { | |
544 | (void) s_ungetc(c); | |
545 | return n; | |
546 | } | |
547 | n = n * 8 + c - '0'; | |
548 | c = s_getc(); /* got \[0-7][0-7] */ | |
549 | if (c < '0' || c > '7') { | |
550 | (void) s_ungetc(c); | |
551 | return n; | |
552 | } | |
553 | return n * 8 + c - '0'; | |
554 | } |