Commit | Line | Data |
---|---|---|
5b3d0ab2 | 1 | #ifndef lint |
60de5df9 | 2 | static char sccsid[] = "@(#)scanner.c 3.8 %G%"; |
5b3d0ab2 EW |
3 | #endif |
4 | ||
60de5df9 EW |
5 | /* |
6 | * Copyright (c) 1983 Regents of the University of California, | |
7 | * All rights reserved. Redistribution permitted subject to | |
8 | * the terms of the Berkeley Software License Agreement. | |
9 | */ | |
10 | ||
5b3d0ab2 EW |
11 | #include <stdio.h> |
12 | #include "value.h" | |
13 | #include "token.h" | |
14 | #include "context.h" | |
15 | #include "string.h" | |
16 | ||
17 | s_getc() | |
18 | { | |
19 | register c; | |
20 | ||
21 | switch (cx.x_type) { | |
22 | case X_FILE: | |
23 | c = getc(cx.x_fp); | |
24 | if (cx.x_bol && c != EOF) { | |
25 | cx.x_bol = 0; | |
26 | cx.x_lineno++; | |
27 | } | |
28 | if (c == '\n') | |
29 | cx.x_bol = 1; | |
30 | return c; | |
31 | case X_BUF: | |
32 | if (*cx.x_bufp != 0) | |
33 | return *cx.x_bufp++ & 0xff; | |
34 | else | |
35 | return EOF; | |
36 | } | |
37 | /*NOTREACHED*/ | |
38 | } | |
39 | ||
40 | s_ungetc(c) | |
41 | { | |
42 | if (c == EOF) | |
43 | return EOF; | |
44 | switch (cx.x_type) { | |
45 | case X_FILE: | |
46 | cx.x_bol = 0; | |
47 | return ungetc(c, cx.x_fp); | |
48 | case X_BUF: | |
49 | if (cx.x_bufp > cx.x_buf) | |
50 | return *--cx.x_bufp = c; | |
51 | else | |
52 | return EOF; | |
53 | } | |
54 | /*NOTREACHED*/ | |
55 | } | |
56 | ||
57 | s_gettok() | |
58 | { | |
59 | char buf[100]; | |
60 | register char *p = buf; | |
61 | register c; | |
62 | register state = 0; | |
5b3d0ab2 EW |
63 | |
64 | loop: | |
65 | c = s_getc(); | |
66 | switch (state) { | |
fa064f47 | 67 | case 0: |
d70e12f1 EW |
68 | switch (c) { |
69 | case ' ': | |
70 | case '\t': | |
71 | break; | |
5b3d0ab2 EW |
72 | case '\n': |
73 | case ';': | |
74 | cx.x_token = T_EOL; | |
75 | state = -1; | |
76 | break; | |
77 | case '#': | |
fa064f47 | 78 | state = 1; |
5b3d0ab2 EW |
79 | break; |
80 | case EOF: | |
81 | cx.x_token = T_EOF; | |
82 | state = -1; | |
83 | break; | |
84 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
85 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
86 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
87 | case 'p': case 'q': case 'r': case 's': case 't': | |
88 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
89 | case 'z': | |
90 | case 'A': case 'B': case 'C': case 'D': case 'E': | |
91 | case 'F': case 'G': case 'H': case 'I': case 'J': | |
92 | case 'K': case 'L': case 'M': case 'N': case 'O': | |
93 | case 'P': case 'Q': case 'R': case 'S': case 'T': | |
94 | case 'U': case 'V': case 'W': case 'X': case 'Y': | |
95 | case 'Z': | |
c1428031 | 96 | case '_': case '.': |
5b3d0ab2 EW |
97 | *p++ = c; |
98 | state = 2; | |
99 | break; | |
100 | case '"': | |
5b3d0ab2 EW |
101 | state = 3; |
102 | break; | |
fa064f47 EW |
103 | case '\'': |
104 | state = 4; | |
105 | break; | |
5b3d0ab2 | 106 | case '\\': |
d70e12f1 EW |
107 | switch (c = s_gettok1()) { |
108 | case -1: | |
109 | break; | |
110 | case -2: | |
111 | state = 0; | |
112 | break; | |
113 | default: | |
114 | *p++ = c; | |
115 | state = 2; | |
116 | } | |
5b3d0ab2 EW |
117 | break; |
118 | case '0': | |
b58ec5e7 | 119 | cx.x_val.v_num = 0; |
5b3d0ab2 EW |
120 | state = 10; |
121 | break; | |
122 | case '1': case '2': case '3': case '4': | |
123 | case '5': case '6': case '7': case '8': case '9': | |
124 | cx.x_val.v_num = c - '0'; | |
125 | state = 11; | |
126 | break; | |
127 | case '>': | |
128 | state = 20; | |
129 | break; | |
130 | case '<': | |
131 | state = 21; | |
132 | break; | |
133 | case '=': | |
134 | state = 22; | |
135 | break; | |
136 | case '!': | |
137 | state = 23; | |
138 | break; | |
139 | case '&': | |
140 | state = 24; | |
141 | break; | |
142 | case '|': | |
143 | state = 25; | |
144 | break; | |
5119bdf8 EW |
145 | case '$': |
146 | state = 26; | |
147 | break; | |
5b3d0ab2 EW |
148 | case '~': |
149 | cx.x_token = T_COMP; | |
150 | state = -1; | |
151 | break; | |
152 | case '+': | |
153 | cx.x_token = T_PLUS; | |
154 | state = -1; | |
155 | break; | |
156 | case '-': | |
157 | cx.x_token = T_MINUS; | |
158 | state = -1; | |
159 | break; | |
160 | case '*': | |
161 | cx.x_token = T_MUL; | |
162 | state = -1; | |
163 | break; | |
164 | case '/': | |
165 | cx.x_token = T_DIV; | |
166 | state = -1; | |
167 | break; | |
168 | case '%': | |
169 | cx.x_token = T_MOD; | |
170 | state = -1; | |
171 | break; | |
172 | case '^': | |
173 | cx.x_token = T_XOR; | |
174 | state = -1; | |
175 | break; | |
176 | case '(': | |
177 | cx.x_token = T_LP; | |
178 | state = -1; | |
179 | break; | |
180 | case ')': | |
181 | cx.x_token = T_RP; | |
182 | state = -1; | |
183 | break; | |
5b3d0ab2 EW |
184 | case ',': |
185 | cx.x_token = T_COMMA; | |
186 | state = -1; | |
187 | break; | |
188 | case '?': | |
189 | cx.x_token = T_QUEST; | |
190 | state = -1; | |
191 | break; | |
192 | case ':': | |
193 | cx.x_token = T_COLON; | |
194 | state = -1; | |
195 | break; | |
196 | default: | |
197 | cx.x_val.v_num = c; | |
198 | cx.x_token = T_CHAR; | |
199 | state = -1; | |
200 | break; | |
201 | } | |
202 | break; | |
fa064f47 EW |
203 | case 1: /* got # */ |
204 | if (c == '\n' || c == EOF) { | |
205 | (void) s_ungetc(c); | |
206 | state = 0; | |
207 | } | |
208 | break; | |
5b3d0ab2 EW |
209 | case 2: /* unquoted string */ |
210 | switch (c) { | |
211 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
212 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
213 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
214 | case 'p': case 'q': case 'r': case 's': case 't': | |
215 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
216 | case 'z': | |
217 | case 'A': case 'B': case 'C': case 'D': case 'E': | |
218 | case 'F': case 'G': case 'H': case 'I': case 'J': | |
219 | case 'K': case 'L': case 'M': case 'N': case 'O': | |
220 | case 'P': case 'Q': case 'R': case 'S': case 'T': | |
221 | case 'U': case 'V': case 'W': case 'X': case 'Y': | |
222 | case 'Z': | |
c1428031 | 223 | case '_': case '.': |
5b3d0ab2 EW |
224 | case '0': case '1': case '2': case '3': case '4': |
225 | case '5': case '6': case '7': case '8': case '9': | |
226 | if (p < buf + sizeof buf - 1) | |
227 | *p++ = c; | |
228 | break; | |
229 | case '"': | |
5b3d0ab2 EW |
230 | state = 3; |
231 | break; | |
fa064f47 EW |
232 | case '\'': |
233 | state = 4; | |
234 | break; | |
5b3d0ab2 | 235 | case '\\': |
d70e12f1 EW |
236 | switch (c = s_gettok1()) { |
237 | case -2: | |
238 | (void) s_ungetc(' '); | |
239 | case -1: | |
240 | break; | |
241 | default: | |
242 | if (p < buf + sizeof buf - 1) | |
243 | *p++ = c; | |
244 | } | |
5b3d0ab2 EW |
245 | break; |
246 | default: | |
247 | (void) s_ungetc(c); | |
248 | case EOF: | |
249 | *p = 0; | |
250 | cx.x_token = T_STR; | |
251 | switch (*buf) { | |
252 | case 'i': | |
253 | if (buf[1] == 'f' && buf[2] == 0) | |
254 | cx.x_token = T_IF; | |
255 | break; | |
256 | case 't': | |
fa064f47 EW |
257 | if (buf[1] == 'h' && buf[2] == 'e' |
258 | && buf[3] == 'n' && buf[4] == 0) | |
5b3d0ab2 EW |
259 | cx.x_token = T_THEN; |
260 | break; | |
261 | case 'e': | |
fa064f47 EW |
262 | if (buf[1] == 'n' && buf[2] == 'd' |
263 | && buf[3] == 'i' && buf[4] == 'f' | |
264 | && buf[5] == 0) | |
265 | cx.x_token = T_ENDIF; | |
266 | else if (buf[1] == 'l' && buf[2] == 's') | |
267 | if (buf[3] == 'i' && buf[4] == 'f' | |
268 | && buf[5] == 0) | |
5b3d0ab2 | 269 | cx.x_token = T_ELSIF; |
fa064f47 EW |
270 | else if (buf[3] == 'e' && buf[4] == 0) |
271 | cx.x_token = T_ELSE; | |
5b3d0ab2 EW |
272 | break; |
273 | } | |
fa064f47 EW |
274 | if (cx.x_token == T_STR |
275 | && (cx.x_val.v_str = str_cpy(buf)) == 0) { | |
276 | p_memerror(); | |
277 | cx.x_token = T_EOF; | |
278 | } | |
5b3d0ab2 EW |
279 | state = -1; |
280 | break; | |
281 | } | |
282 | break; | |
fa064f47 | 283 | case 3: /* " quoted string */ |
5b3d0ab2 EW |
284 | switch (c) { |
285 | case '\n': | |
286 | (void) s_ungetc(c); | |
287 | case EOF: | |
fa064f47 | 288 | case '"': |
5b3d0ab2 EW |
289 | state = 2; |
290 | break; | |
291 | case '\\': | |
d70e12f1 EW |
292 | switch (c = s_gettok1()) { |
293 | case -1: | |
294 | case -2: /* newlines are invisible */ | |
295 | break; | |
296 | default: | |
297 | if (p < buf + sizeof buf - 1) | |
298 | *p++ = c; | |
299 | } | |
5b3d0ab2 EW |
300 | break; |
301 | default: | |
fa064f47 | 302 | if (p < buf + sizeof buf - 1) |
5b3d0ab2 EW |
303 | *p++ = c; |
304 | break; | |
305 | } | |
306 | break; | |
fa064f47 EW |
307 | case 4: /* ' quoted string */ |
308 | switch (c) { | |
309 | case '\n': | |
5b3d0ab2 | 310 | (void) s_ungetc(c); |
fa064f47 EW |
311 | case EOF: |
312 | case '\'': | |
313 | state = 2; | |
314 | break; | |
315 | case '\\': | |
316 | switch (c = s_gettok1()) { | |
317 | case -1: | |
318 | case -2: /* newlines are invisible */ | |
319 | break; | |
320 | default: | |
321 | if (p < buf + sizeof buf - 1) | |
322 | *p++ = c; | |
323 | } | |
324 | break; | |
325 | default: | |
326 | if (p < buf + sizeof buf - 1) | |
327 | *p++ = c; | |
328 | break; | |
5b3d0ab2 EW |
329 | } |
330 | break; | |
331 | case 10: /* got 0 */ | |
332 | switch (c) { | |
333 | case 'x': | |
334 | case 'X': | |
335 | cx.x_val.v_num = 0; | |
336 | state = 12; | |
337 | break; | |
338 | case '0': case '1': case '2': case '3': case '4': | |
339 | case '5': case '6': case '7': | |
340 | cx.x_val.v_num = c - '0'; | |
341 | state = 13; | |
342 | break; | |
343 | case '8': case '9': | |
344 | cx.x_val.v_num = c - '0'; | |
345 | state = 11; | |
346 | break; | |
347 | default: | |
348 | (void) s_ungetc(c); | |
349 | state = -1; | |
350 | cx.x_token = T_NUM; | |
351 | } | |
352 | break; | |
353 | case 11: /* decimal number */ | |
354 | switch (c) { | |
355 | case '0': case '1': case '2': case '3': case '4': | |
356 | case '5': case '6': case '7': case '8': case '9': | |
357 | cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; | |
358 | break; | |
359 | default: | |
360 | (void) s_ungetc(c); | |
361 | state = -1; | |
362 | cx.x_token = T_NUM; | |
363 | } | |
364 | break; | |
365 | case 12: /* hex number */ | |
366 | switch (c) { | |
367 | case '0': case '1': case '2': case '3': case '4': | |
368 | case '5': case '6': case '7': case '8': case '9': | |
369 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; | |
370 | break; | |
371 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
372 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; | |
373 | break; | |
374 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
375 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; | |
376 | break; | |
377 | default: | |
378 | (void) s_ungetc(c); | |
379 | state = -1; | |
380 | cx.x_token = T_NUM; | |
381 | } | |
382 | break; | |
383 | case 13: /* octal number */ | |
384 | switch (c) { | |
385 | case '0': case '1': case '2': case '3': case '4': | |
386 | case '5': case '6': case '7': | |
387 | cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; | |
388 | break; | |
389 | default: | |
390 | (void) s_ungetc(c); | |
391 | state = -1; | |
392 | cx.x_token = T_NUM; | |
393 | } | |
394 | break; | |
395 | case 20: /* got > */ | |
396 | switch (c) { | |
397 | case '=': | |
398 | cx.x_token = T_GE; | |
399 | state = -1; | |
400 | break; | |
401 | case '>': | |
402 | cx.x_token = T_RS; | |
403 | state = -1; | |
404 | break; | |
405 | default: | |
406 | (void) s_ungetc(c); | |
407 | cx.x_token = T_GT; | |
408 | state = -1; | |
409 | } | |
410 | break; | |
411 | case 21: /* got < */ | |
412 | switch (c) { | |
413 | case '=': | |
414 | cx.x_token = T_LE; | |
415 | state = -1; | |
416 | break; | |
417 | case '<': | |
418 | cx.x_token = T_LS; | |
419 | state = -1; | |
420 | break; | |
421 | default: | |
422 | (void) s_ungetc(c); | |
423 | cx.x_token = T_LT; | |
424 | state = -1; | |
425 | } | |
426 | break; | |
427 | case 22: /* got = */ | |
428 | switch (c) { | |
429 | case '=': | |
430 | cx.x_token = T_EQ; | |
431 | state = -1; | |
432 | break; | |
433 | default: | |
434 | (void) s_ungetc(c); | |
435 | cx.x_token = T_ASSIGN; | |
436 | state = -1; | |
437 | } | |
438 | break; | |
439 | case 23: /* got ! */ | |
440 | switch (c) { | |
441 | case '=': | |
442 | cx.x_token = T_NE; | |
443 | state = -1; | |
444 | break; | |
445 | default: | |
446 | (void) s_ungetc(c); | |
447 | cx.x_token = T_NOT; | |
448 | state = -1; | |
449 | } | |
450 | break; | |
5119bdf8 | 451 | case 24: /* got & */ |
5b3d0ab2 EW |
452 | switch (c) { |
453 | case '&': | |
454 | cx.x_token = T_ANDAND; | |
455 | state = -1; | |
456 | break; | |
457 | default: | |
458 | (void) s_ungetc(c); | |
459 | cx.x_token = T_AND; | |
460 | state = -1; | |
461 | } | |
462 | break; | |
5119bdf8 | 463 | case 25: /* got | */ |
5b3d0ab2 EW |
464 | switch (c) { |
465 | case '|': | |
466 | cx.x_token = T_OROR; | |
467 | state = -1; | |
468 | break; | |
469 | default: | |
470 | (void) s_ungetc(c); | |
471 | cx.x_token = T_OR; | |
472 | state = -1; | |
473 | } | |
474 | break; | |
5119bdf8 EW |
475 | case 26: /* got $ */ |
476 | switch (c) { | |
477 | case '?': | |
478 | cx.x_token = T_DQ; | |
479 | state = -1; | |
480 | break; | |
481 | default: | |
482 | (void) s_ungetc(c); | |
483 | cx.x_token = T_DOLLAR; | |
484 | state = -1; | |
485 | } | |
486 | break; | |
5b3d0ab2 EW |
487 | default: |
488 | abort(); | |
489 | } | |
490 | if (state >= 0) | |
491 | goto loop; | |
492 | return cx.x_token; | |
493 | } | |
d70e12f1 EW |
494 | |
495 | s_gettok1() | |
496 | { | |
497 | register c; | |
498 | register n; | |
499 | ||
500 | c = s_getc(); /* got \ */ | |
501 | switch (c) { | |
502 | case EOF: | |
503 | return -1; | |
504 | case '\n': | |
505 | return -2; | |
506 | case 'b': | |
507 | return '\b'; | |
508 | case 'f': | |
509 | return '\f'; | |
510 | case 'n': | |
511 | return '\n'; | |
512 | case 'r': | |
513 | return '\r'; | |
514 | case 't': | |
515 | return '\t'; | |
516 | default: | |
517 | return c; | |
518 | case '0': case '1': case '2': case '3': case '4': | |
519 | case '5': case '6': case '7': | |
520 | break; | |
521 | } | |
522 | n = c - '0'; | |
523 | c = s_getc(); /* got \[0-7] */ | |
524 | if (c < '0' || c > '7') { | |
525 | (void) s_ungetc(c); | |
526 | return n; | |
527 | } | |
528 | n = n * 8 + c - '0'; | |
529 | c = s_getc(); /* got \[0-7][0-7] */ | |
530 | if (c < '0' || c > '7') { | |
531 | (void) s_ungetc(c); | |
532 | return n; | |
533 | } | |
534 | return n * 8 + c - '0'; | |
535 | } |