Commit | Line | Data |
---|---|---|
5b3d0ab2 | 1 | #ifndef lint |
c1428031 | 2 | static char *sccsid = "@(#)scanner.c 3.3 83/12/12"; |
5b3d0ab2 EW |
3 | #endif |
4 | ||
5 | #include <stdio.h> | |
6 | #include "value.h" | |
7 | #include "token.h" | |
8 | #include "context.h" | |
9 | #include "string.h" | |
10 | ||
11 | s_getc() | |
12 | { | |
13 | register c; | |
14 | ||
15 | switch (cx.x_type) { | |
16 | case X_FILE: | |
17 | c = getc(cx.x_fp); | |
18 | if (cx.x_bol && c != EOF) { | |
19 | cx.x_bol = 0; | |
20 | cx.x_lineno++; | |
21 | } | |
22 | if (c == '\n') | |
23 | cx.x_bol = 1; | |
24 | return c; | |
25 | case X_BUF: | |
26 | if (*cx.x_bufp != 0) | |
27 | return *cx.x_bufp++ & 0xff; | |
28 | else | |
29 | return EOF; | |
30 | } | |
31 | /*NOTREACHED*/ | |
32 | } | |
33 | ||
34 | s_ungetc(c) | |
35 | { | |
36 | if (c == EOF) | |
37 | return EOF; | |
38 | switch (cx.x_type) { | |
39 | case X_FILE: | |
40 | cx.x_bol = 0; | |
41 | return ungetc(c, cx.x_fp); | |
42 | case X_BUF: | |
43 | if (cx.x_bufp > cx.x_buf) | |
44 | return *--cx.x_bufp = c; | |
45 | else | |
46 | return EOF; | |
47 | } | |
48 | /*NOTREACHED*/ | |
49 | } | |
50 | ||
51 | s_gettok() | |
52 | { | |
53 | char buf[100]; | |
54 | register char *p = buf; | |
55 | register c; | |
56 | register state = 0; | |
57 | char quote = 0; | |
58 | ||
59 | loop: | |
60 | c = s_getc(); | |
61 | switch (state) { | |
62 | case 0: /* blank skipping */ | |
63 | if (c != ' ' && c != '\t') { | |
64 | (void) s_ungetc(c); | |
65 | state = 1; | |
66 | } | |
67 | break; | |
68 | case 1: /* beginning of token */ | |
69 | switch (c) { | |
70 | case '\n': | |
71 | case ';': | |
72 | cx.x_token = T_EOL; | |
73 | state = -1; | |
74 | break; | |
75 | case '#': | |
76 | state = 7; | |
77 | break; | |
78 | case EOF: | |
79 | cx.x_token = T_EOF; | |
80 | state = -1; | |
81 | break; | |
82 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
83 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
84 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
85 | case 'p': case 'q': case 'r': case 's': case 't': | |
86 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
87 | case 'z': | |
88 | case 'A': case 'B': case 'C': case 'D': case 'E': | |
89 | case 'F': case 'G': case 'H': case 'I': case 'J': | |
90 | case 'K': case 'L': case 'M': case 'N': case 'O': | |
91 | case 'P': case 'Q': case 'R': case 'S': case 'T': | |
92 | case 'U': case 'V': case 'W': case 'X': case 'Y': | |
93 | case 'Z': | |
c1428031 | 94 | case '_': case '.': |
5b3d0ab2 EW |
95 | *p++ = c; |
96 | state = 2; | |
97 | break; | |
98 | case '"': | |
99 | case '\'': | |
100 | quote = c; | |
101 | state = 3; | |
102 | break; | |
103 | case '\\': | |
104 | state = 4; | |
105 | break; | |
106 | case '0': | |
b58ec5e7 | 107 | cx.x_val.v_num = 0; |
5b3d0ab2 EW |
108 | state = 10; |
109 | break; | |
110 | case '1': case '2': case '3': case '4': | |
111 | case '5': case '6': case '7': case '8': case '9': | |
112 | cx.x_val.v_num = c - '0'; | |
113 | state = 11; | |
114 | break; | |
115 | case '>': | |
116 | state = 20; | |
117 | break; | |
118 | case '<': | |
119 | state = 21; | |
120 | break; | |
121 | case '=': | |
122 | state = 22; | |
123 | break; | |
124 | case '!': | |
125 | state = 23; | |
126 | break; | |
127 | case '&': | |
128 | state = 24; | |
129 | break; | |
130 | case '|': | |
131 | state = 25; | |
132 | break; | |
133 | case '~': | |
134 | cx.x_token = T_COMP; | |
135 | state = -1; | |
136 | break; | |
137 | case '+': | |
138 | cx.x_token = T_PLUS; | |
139 | state = -1; | |
140 | break; | |
141 | case '-': | |
142 | cx.x_token = T_MINUS; | |
143 | state = -1; | |
144 | break; | |
145 | case '*': | |
146 | cx.x_token = T_MUL; | |
147 | state = -1; | |
148 | break; | |
149 | case '/': | |
150 | cx.x_token = T_DIV; | |
151 | state = -1; | |
152 | break; | |
153 | case '%': | |
154 | cx.x_token = T_MOD; | |
155 | state = -1; | |
156 | break; | |
157 | case '^': | |
158 | cx.x_token = T_XOR; | |
159 | state = -1; | |
160 | break; | |
161 | case '(': | |
162 | cx.x_token = T_LP; | |
163 | state = -1; | |
164 | break; | |
165 | case ')': | |
166 | cx.x_token = T_RP; | |
167 | state = -1; | |
168 | break; | |
169 | case '$': | |
170 | cx.x_token = T_DOLLAR; | |
171 | state = -1; | |
172 | break; | |
173 | case ',': | |
174 | cx.x_token = T_COMMA; | |
175 | state = -1; | |
176 | break; | |
177 | case '?': | |
178 | cx.x_token = T_QUEST; | |
179 | state = -1; | |
180 | break; | |
181 | case ':': | |
182 | cx.x_token = T_COLON; | |
183 | state = -1; | |
184 | break; | |
185 | default: | |
186 | cx.x_val.v_num = c; | |
187 | cx.x_token = T_CHAR; | |
188 | state = -1; | |
189 | break; | |
190 | } | |
191 | break; | |
192 | case 2: /* unquoted string */ | |
193 | switch (c) { | |
194 | case 'a': case 'b': case 'c': case 'd': case 'e': | |
195 | case 'f': case 'g': case 'h': case 'i': case 'j': | |
196 | case 'k': case 'l': case 'm': case 'n': case 'o': | |
197 | case 'p': case 'q': case 'r': case 's': case 't': | |
198 | case 'u': case 'v': case 'w': case 'x': case 'y': | |
199 | case 'z': | |
200 | case 'A': case 'B': case 'C': case 'D': case 'E': | |
201 | case 'F': case 'G': case 'H': case 'I': case 'J': | |
202 | case 'K': case 'L': case 'M': case 'N': case 'O': | |
203 | case 'P': case 'Q': case 'R': case 'S': case 'T': | |
204 | case 'U': case 'V': case 'W': case 'X': case 'Y': | |
205 | case 'Z': | |
c1428031 | 206 | case '_': case '.': |
5b3d0ab2 EW |
207 | case '0': case '1': case '2': case '3': case '4': |
208 | case '5': case '6': case '7': case '8': case '9': | |
209 | if (p < buf + sizeof buf - 1) | |
210 | *p++ = c; | |
211 | break; | |
212 | case '"': | |
213 | case '\'': | |
214 | quote = c; | |
215 | state = 3; | |
216 | break; | |
217 | case '\\': | |
218 | state = 4; | |
219 | break; | |
220 | default: | |
221 | (void) s_ungetc(c); | |
222 | case EOF: | |
223 | *p = 0; | |
224 | cx.x_token = T_STR; | |
225 | switch (*buf) { | |
226 | case 'i': | |
227 | if (buf[1] == 'f' && buf[2] == 0) | |
228 | cx.x_token = T_IF; | |
229 | break; | |
230 | case 't': | |
231 | if (strcmp(buf, "then") == 0) | |
232 | cx.x_token = T_THEN; | |
233 | break; | |
234 | case 'e': | |
235 | switch (buf[1]) { | |
236 | case 'n': | |
237 | if (strcmp(buf, "endif") == 0) | |
238 | cx.x_token = T_ENDIF; | |
239 | break; | |
240 | case 'l': | |
241 | if (strcmp(buf, "else") == 0) | |
242 | cx.x_token = T_ELSE; | |
243 | if (strcmp(buf, "elsif") == 0) | |
244 | cx.x_token = T_ELSIF; | |
245 | break; | |
246 | } | |
247 | break; | |
248 | } | |
249 | if (cx.x_token == T_STR) | |
250 | if ((cx.x_val.v_str = str_cpy(buf)) == 0) { | |
251 | p_memerror(); | |
252 | cx.x_token = T_EOF; | |
253 | } | |
254 | state = -1; | |
255 | break; | |
256 | } | |
257 | break; | |
258 | case 3: /* quoted string */ | |
259 | switch (c) { | |
260 | case '\n': | |
261 | (void) s_ungetc(c); | |
262 | case EOF: | |
263 | state = 2; | |
264 | break; | |
265 | case '\\': | |
266 | state = 4; | |
267 | break; | |
268 | default: | |
269 | if (c == quote) { | |
270 | quote = 0; | |
271 | state = 2; | |
272 | } else if (p < buf + sizeof buf - 1) | |
273 | *p++ = c; | |
274 | break; | |
275 | } | |
276 | break; | |
277 | case 4: /* got \ */ | |
278 | switch (c) { | |
279 | case EOF: | |
280 | state = 2; | |
281 | break; | |
282 | case '0': case '1': case '2': case '3': case '4': | |
283 | case '5': case '6': case '7': | |
284 | if (p < buf + sizeof buf - 1) | |
285 | *p = c - '0'; | |
286 | state = 5; | |
287 | break; | |
288 | case 'b': | |
289 | c = '\b'; | |
290 | goto foo; | |
291 | case 'f': | |
292 | c = '\f'; | |
293 | goto foo; | |
294 | case 'n': | |
295 | c = '\n'; | |
296 | goto foo; | |
297 | case 'r': | |
298 | c = '\r'; | |
299 | goto foo; | |
300 | case 't': | |
301 | c = '\t'; | |
302 | foo: | |
303 | default: | |
304 | if (p < buf + sizeof buf - 1) | |
305 | *p++ = c; | |
306 | case '\n': /* swallow the \n */ | |
307 | state = quote == 0 ? 2 : 3; | |
308 | } | |
309 | break; | |
310 | case 5: /* got \[0-7] */ | |
311 | if (c >= '0' && c <= '7') { | |
312 | *p = *p * 8 + c - '0'; | |
313 | state = 6; | |
314 | } else { | |
315 | (void) s_ungetc(c); | |
316 | p++; | |
317 | state = quote == 0 ? 2 : 3; | |
318 | } | |
319 | break; | |
320 | case 6: /* got \[0-7][0-7] */ | |
321 | if (c >= '0' && c <= '7') | |
322 | *p = *p * 8 + c - '0'; | |
323 | else | |
324 | (void) s_ungetc(c); | |
325 | p++; | |
326 | state = quote == 0 ? 2 : 3; | |
327 | break; | |
328 | case 7: /* got # */ | |
329 | if (c == '\n' || c == EOF) { | |
330 | (void) s_ungetc(c); | |
331 | state = 1; | |
332 | } | |
333 | break; | |
334 | case 10: /* got 0 */ | |
335 | switch (c) { | |
336 | case 'x': | |
337 | case 'X': | |
338 | cx.x_val.v_num = 0; | |
339 | state = 12; | |
340 | break; | |
341 | case '0': case '1': case '2': case '3': case '4': | |
342 | case '5': case '6': case '7': | |
343 | cx.x_val.v_num = c - '0'; | |
344 | state = 13; | |
345 | break; | |
346 | case '8': case '9': | |
347 | cx.x_val.v_num = c - '0'; | |
348 | state = 11; | |
349 | break; | |
350 | default: | |
351 | (void) s_ungetc(c); | |
352 | state = -1; | |
353 | cx.x_token = T_NUM; | |
354 | } | |
355 | break; | |
356 | case 11: /* decimal number */ | |
357 | switch (c) { | |
358 | case '0': case '1': case '2': case '3': case '4': | |
359 | case '5': case '6': case '7': case '8': case '9': | |
360 | cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0'; | |
361 | break; | |
362 | default: | |
363 | (void) s_ungetc(c); | |
364 | state = -1; | |
365 | cx.x_token = T_NUM; | |
366 | } | |
367 | break; | |
368 | case 12: /* hex number */ | |
369 | switch (c) { | |
370 | case '0': case '1': case '2': case '3': case '4': | |
371 | case '5': case '6': case '7': case '8': case '9': | |
372 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0'; | |
373 | break; | |
374 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
375 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10; | |
376 | break; | |
377 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
378 | cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10; | |
379 | break; | |
380 | default: | |
381 | (void) s_ungetc(c); | |
382 | state = -1; | |
383 | cx.x_token = T_NUM; | |
384 | } | |
385 | break; | |
386 | case 13: /* octal number */ | |
387 | switch (c) { | |
388 | case '0': case '1': case '2': case '3': case '4': | |
389 | case '5': case '6': case '7': | |
390 | cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0'; | |
391 | break; | |
392 | default: | |
393 | (void) s_ungetc(c); | |
394 | state = -1; | |
395 | cx.x_token = T_NUM; | |
396 | } | |
397 | break; | |
398 | case 20: /* got > */ | |
399 | switch (c) { | |
400 | case '=': | |
401 | cx.x_token = T_GE; | |
402 | state = -1; | |
403 | break; | |
404 | case '>': | |
405 | cx.x_token = T_RS; | |
406 | state = -1; | |
407 | break; | |
408 | default: | |
409 | (void) s_ungetc(c); | |
410 | cx.x_token = T_GT; | |
411 | state = -1; | |
412 | } | |
413 | break; | |
414 | case 21: /* got < */ | |
415 | switch (c) { | |
416 | case '=': | |
417 | cx.x_token = T_LE; | |
418 | state = -1; | |
419 | break; | |
420 | case '<': | |
421 | cx.x_token = T_LS; | |
422 | state = -1; | |
423 | break; | |
424 | default: | |
425 | (void) s_ungetc(c); | |
426 | cx.x_token = T_LT; | |
427 | state = -1; | |
428 | } | |
429 | break; | |
430 | case 22: /* got = */ | |
431 | switch (c) { | |
432 | case '=': | |
433 | cx.x_token = T_EQ; | |
434 | state = -1; | |
435 | break; | |
436 | default: | |
437 | (void) s_ungetc(c); | |
438 | cx.x_token = T_ASSIGN; | |
439 | state = -1; | |
440 | } | |
441 | break; | |
442 | case 23: /* got ! */ | |
443 | switch (c) { | |
444 | case '=': | |
445 | cx.x_token = T_NE; | |
446 | state = -1; | |
447 | break; | |
448 | default: | |
449 | (void) s_ungetc(c); | |
450 | cx.x_token = T_NOT; | |
451 | state = -1; | |
452 | } | |
453 | break; | |
454 | case 24: /* and & */ | |
455 | switch (c) { | |
456 | case '&': | |
457 | cx.x_token = T_ANDAND; | |
458 | state = -1; | |
459 | break; | |
460 | default: | |
461 | (void) s_ungetc(c); | |
462 | cx.x_token = T_AND; | |
463 | state = -1; | |
464 | } | |
465 | break; | |
466 | case 25: /* and | */ | |
467 | switch (c) { | |
468 | case '|': | |
469 | cx.x_token = T_OROR; | |
470 | state = -1; | |
471 | break; | |
472 | default: | |
473 | (void) s_ungetc(c); | |
474 | cx.x_token = T_OR; | |
475 | state = -1; | |
476 | } | |
477 | break; | |
478 | default: | |
479 | abort(); | |
480 | } | |
481 | if (state >= 0) | |
482 | goto loop; | |
483 | return cx.x_token; | |
484 | } |