Commit | Line | Data |
---|---|---|
b76d33f6 ML |
1 | /* Copyright (c) 1982 Regents of the University of California */ |
2 | ||
a2d98673 | 3 | static char sccsid[] = "@(#)scanner.c 1.8 %G%"; |
b76d33f6 ML |
4 | |
5 | /* | |
6 | * Debugger scanner. | |
7 | */ | |
8 | ||
9 | #include "defs.h" | |
10 | #include "scanner.h" | |
11 | #include "main.h" | |
12 | #include "keywords.h" | |
13 | #include "tree.h" | |
14 | #include "symbols.h" | |
15 | #include "names.h" | |
16 | #include "y.tab.h" | |
17 | ||
18 | #ifndef public | |
19 | typedef int Token; | |
20 | #endif | |
21 | ||
22 | public String initfile = ".dbxinit"; | |
23 | ||
24 | typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; | |
25 | ||
26 | private Charclass class[256 + 1]; | |
27 | private Charclass *lexclass = class + 1; | |
28 | ||
29 | #define isdigit(c) (lexclass[c] == NUM) | |
30 | #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) | |
31 | #define ishexdigit(c) ( \ | |
32 | isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ | |
33 | ) | |
34 | ||
35 | #define MAXLINESIZE 1024 | |
36 | ||
37 | private File in; | |
38 | private Char linebuf[MAXLINESIZE]; | |
39 | private Char *curchar; | |
40 | ||
41 | #define MAXINCLDEPTH 10 | |
42 | ||
43 | private struct { | |
44 | File savefile; | |
45 | Filename savefn; | |
46 | int savelineno; | |
47 | } inclinfo[MAXINCLDEPTH]; | |
48 | ||
49 | private unsigned int curinclindex; | |
50 | ||
b76d33f6 ML |
51 | private Token getident(); |
52 | private Token getnum(); | |
53 | private Token getstring(); | |
54 | private Boolean eofinput(); | |
55 | private Char charcon(); | |
56 | private Char charlookup(); | |
57 | ||
58 | private enterlexclass(class, s) | |
59 | Charclass class; | |
60 | String s; | |
61 | { | |
62 | register char *p; | |
63 | ||
64 | for (p = s; *p != '\0'; p++) { | |
65 | lexclass[*p] = class; | |
66 | } | |
67 | } | |
68 | ||
69 | public scanner_init() | |
70 | { | |
71 | register Integer i; | |
72 | ||
73 | for (i = 0; i < 257; i++) { | |
74 | class[i] = OTHER; | |
75 | } | |
76 | enterlexclass(WHITE, " \t"); | |
77 | enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); | |
78 | enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); | |
79 | enterlexclass(NUM, "0123456789"); | |
80 | in = stdin; | |
81 | errfilename = nil; | |
82 | errlineno = 0; | |
83 | curchar = linebuf; | |
84 | linebuf[0] = '\0'; | |
b76d33f6 ML |
85 | } |
86 | ||
87 | /* | |
88 | * Read a single token. | |
89 | * | |
90 | * Input is line buffered. | |
91 | * | |
92 | * There are two "modes" of operation: one as in a compiler, | |
93 | * and one for reading shell-like syntax. | |
94 | */ | |
95 | ||
96 | private Boolean shellmode; | |
97 | ||
98 | public Token yylex() | |
99 | { | |
100 | register int c; | |
101 | register char *p; | |
102 | register Token t; | |
103 | String line; | |
104 | ||
105 | p = curchar; | |
106 | if (*p == '\0') { | |
107 | do { | |
108 | if (isterm(in)) { | |
a2d98673 | 109 | printf("(%s) ", cmdname); |
5c0d33e7 | 110 | fflush(stdout); |
b76d33f6 ML |
111 | } |
112 | line = fgets(linebuf, MAXLINESIZE, in); | |
113 | } while (line == nil and not eofinput()); | |
114 | if (line == nil) { | |
115 | c = EOF; | |
116 | } else { | |
117 | p = linebuf; | |
118 | while (lexclass[*p] == WHITE) { | |
119 | p++; | |
120 | } | |
121 | shellmode = false; | |
122 | } | |
123 | } else { | |
124 | while (lexclass[*p] == WHITE) { | |
125 | p++; | |
126 | } | |
127 | } | |
128 | curchar = p; | |
129 | c = *p; | |
130 | if (lexclass[c] == ALPHA) { | |
131 | t = getident(); | |
132 | } else if (lexclass[c] == NUM) { | |
4ff5dcd5 ML |
133 | if (shellmode) { |
134 | t = getident(); | |
135 | } else { | |
136 | t = getnum(); | |
137 | } | |
b76d33f6 ML |
138 | } else { |
139 | ++curchar; | |
140 | switch (c) { | |
141 | case '\n': | |
142 | t = '\n'; | |
143 | if (errlineno != 0) { | |
144 | errlineno++; | |
145 | } | |
146 | break; | |
147 | ||
148 | case '"': | |
149 | case '\'': | |
150 | t = getstring(); | |
151 | break; | |
152 | ||
153 | case '.': | |
154 | if (shellmode) { | |
155 | --curchar; | |
156 | t = getident(); | |
157 | } else if (isdigit(*curchar)) { | |
158 | --curchar; | |
159 | t = getnum(); | |
160 | } else { | |
161 | t = '.'; | |
162 | } | |
163 | break; | |
164 | ||
165 | case '<': | |
166 | if (not shellmode and *curchar == '<') { | |
167 | ++curchar; | |
168 | t = LFORMER; | |
169 | } else { | |
170 | t = '<'; | |
171 | } | |
172 | break; | |
173 | ||
174 | case '>': | |
175 | if (not shellmode and *curchar == '>') { | |
176 | ++curchar; | |
177 | t = RFORMER; | |
178 | } else { | |
179 | t = '>'; | |
180 | } | |
181 | break; | |
182 | ||
183 | case '#': | |
184 | if (*curchar == '^') { | |
185 | ++curchar; | |
186 | t = ABSTRACTION; | |
187 | } else { | |
188 | t = '#'; | |
189 | } | |
190 | break; | |
191 | ||
192 | case '-': | |
193 | if (shellmode) { | |
194 | --curchar; | |
195 | t = getident(); | |
196 | } else if (*curchar == '>') { | |
197 | ++curchar; | |
198 | t = ARROW; | |
199 | } else { | |
200 | t = '-'; | |
201 | } | |
202 | break; | |
203 | ||
204 | case EOF: | |
205 | t = 0; | |
206 | break; | |
207 | ||
208 | default: | |
209 | if (shellmode and index("!&*()[]", c) == nil) { | |
210 | --curchar; | |
211 | t = getident(); | |
212 | } else { | |
213 | t = c; | |
214 | } | |
215 | break; | |
216 | } | |
217 | } | |
218 | # ifdef LEXDEBUG | |
219 | if (lexdebug) { | |
220 | fprintf(stderr, "yylex returns "); | |
221 | print_token(stderr, t); | |
222 | fprintf(stderr, "\n"); | |
223 | } | |
224 | # endif | |
225 | return t; | |
226 | } | |
227 | ||
228 | /* | |
229 | * Parser error handling. | |
230 | */ | |
231 | ||
232 | public yyerror(s) | |
233 | String s; | |
234 | { | |
235 | register Char *p, *tokenbegin, *tokenend; | |
236 | register Integer len; | |
237 | ||
238 | if (streq(s, "syntax error")) { | |
239 | beginerrmsg(); | |
240 | tokenend = curchar - 1; | |
241 | tokenbegin = tokenend; | |
242 | while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { | |
243 | --tokenbegin; | |
244 | } | |
245 | len = tokenend - tokenbegin + 1; | |
246 | p = tokenbegin; | |
247 | if (p > &linebuf[0]) { | |
248 | while (lexclass[*p] == WHITE and p > &linebuf[0]) { | |
249 | --p; | |
250 | } | |
251 | } | |
252 | if (p == &linebuf[0]) { | |
253 | fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); | |
254 | } else { | |
255 | fprintf(stderr, "syntax error"); | |
256 | if (len != 0) { | |
257 | fprintf(stderr, " on \"%.*s\"", len, tokenbegin); | |
258 | } | |
259 | } | |
260 | enderrmsg(); | |
261 | } else { | |
262 | error(s); | |
263 | } | |
264 | } | |
265 | ||
266 | /* | |
267 | * Eat the current line. | |
268 | */ | |
269 | ||
270 | public gobble() | |
271 | { | |
272 | curchar = linebuf; | |
273 | linebuf[0] = '\0'; | |
274 | } | |
275 | ||
276 | /* | |
277 | * Scan an identifier and check to see if it's a keyword. | |
278 | */ | |
279 | ||
280 | private Token getident() | |
281 | { | |
282 | char buf[256]; | |
283 | register Char *p, *q; | |
284 | register Token t; | |
285 | ||
286 | p = curchar; | |
287 | q = buf; | |
288 | if (shellmode) { | |
289 | do { | |
290 | *q++ = *p++; | |
291 | } while (index(" \t\n!&<>*[]()", *p) == nil); | |
292 | } else { | |
293 | do { | |
294 | *q++ = *p++; | |
295 | } while (isalnum(*p)); | |
296 | } | |
297 | curchar = p; | |
298 | *q = '\0'; | |
299 | yylval.y_name = identname(buf, false); | |
300 | if (not shellmode) { | |
301 | t = findkeyword(yylval.y_name); | |
302 | if (t == nil) { | |
303 | t = NAME; | |
304 | } | |
305 | } else { | |
306 | t = NAME; | |
307 | } | |
308 | return t; | |
309 | } | |
310 | ||
311 | /* | |
312 | * Scan a number. | |
313 | */ | |
314 | ||
315 | private Token getnum() | |
316 | { | |
317 | char buf[256]; | |
318 | register Char *p, *q; | |
319 | register Token t; | |
320 | Integer base; | |
321 | ||
322 | p = curchar; | |
323 | q = buf; | |
324 | if (*p == '0') { | |
325 | if (*(p+1) == 'x') { | |
326 | p += 2; | |
327 | base = 16; | |
328 | } else { | |
329 | base = 8; | |
330 | } | |
331 | } else { | |
332 | base = 10; | |
333 | } | |
334 | if (base == 16) { | |
335 | do { | |
336 | *q++ = *p++; | |
337 | } while (ishexdigit(*p)); | |
338 | } else { | |
339 | do { | |
340 | *q++ = *p++; | |
341 | } while (isdigit(*p)); | |
342 | } | |
343 | if (*p == '.') { | |
344 | do { | |
345 | *q++ = *p++; | |
346 | } while (isdigit(*p)); | |
347 | if (*p == 'e' or *p == 'E') { | |
348 | p++; | |
349 | if (*p == '+' or *p == '-' or isdigit(*p)) { | |
350 | *q++ = 'e'; | |
351 | do { | |
352 | *q++ = *p++; | |
353 | } while (isdigit(*p)); | |
354 | } | |
355 | } | |
356 | *q = '\0'; | |
357 | yylval.y_real = atof(buf); | |
358 | t = REAL; | |
359 | } else { | |
360 | *q = '\0'; | |
361 | switch (base) { | |
362 | case 10: | |
363 | yylval.y_int = atol(buf); | |
364 | break; | |
365 | ||
366 | case 8: | |
367 | yylval.y_int = octal(buf); | |
368 | break; | |
369 | ||
370 | case 16: | |
371 | yylval.y_int = hex(buf); | |
372 | break; | |
373 | ||
374 | default: | |
375 | badcaseval(base); | |
376 | } | |
377 | t = INT; | |
378 | } | |
379 | curchar = p; | |
380 | return t; | |
381 | } | |
382 | ||
383 | /* | |
384 | * Convert a string of octal digits to an integer. | |
385 | */ | |
386 | ||
387 | private int octal(s) | |
388 | String s; | |
389 | { | |
390 | register Char *p; | |
391 | register Integer n; | |
392 | ||
393 | n = 0; | |
394 | for (p = s; *p != '\0'; p++) { | |
395 | n = 8*n + (*p - '0'); | |
396 | } | |
397 | return n; | |
398 | } | |
399 | ||
400 | /* | |
401 | * Convert a string of hexadecimal digits to an integer. | |
402 | */ | |
403 | ||
404 | private int hex(s) | |
405 | String s; | |
406 | { | |
407 | register Char *p; | |
408 | register Integer n; | |
409 | ||
410 | n = 0; | |
411 | for (p = s; *p != '\0'; p++) { | |
412 | n *= 16; | |
413 | if (*p >= 'a' and *p <= 'f') { | |
414 | n += (*p - 'a' + 10); | |
415 | } else if (*p >= 'A' and *p <= 'F') { | |
416 | n += (*p - 'A' + 10); | |
417 | } else { | |
418 | n += (*p - '0'); | |
419 | } | |
420 | } | |
421 | return n; | |
422 | } | |
423 | ||
424 | /* | |
425 | * Scan a string. | |
426 | */ | |
427 | ||
428 | private Token getstring() | |
429 | { | |
430 | char buf[256]; | |
431 | register Char *p, *q; | |
432 | Boolean endofstring; | |
433 | ||
434 | p = curchar; | |
435 | q = buf; | |
436 | endofstring = false; | |
437 | while (not endofstring) { | |
438 | if (*p == '\n' or *p == '\0') { | |
439 | error("non-terminated string"); | |
440 | endofstring = true; | |
73ad9ebb ML |
441 | } else if (*p == '"' or *p == '\'') { |
442 | if (*(p+1) != *p) { | |
b76d33f6 ML |
443 | endofstring = true; |
444 | } else { | |
445 | *q++ = *p; | |
446 | } | |
447 | } else { | |
9267e4d8 | 448 | curchar = p; |
c37ad836 ML |
449 | *q++ = charcon(p); |
450 | p = curchar; | |
b76d33f6 ML |
451 | } |
452 | p++; | |
453 | } | |
454 | curchar = p; | |
455 | *q = '\0'; | |
456 | yylval.y_string = strdup(buf); | |
457 | return STRING; | |
458 | } | |
459 | ||
460 | /* | |
461 | * Process a character constant. | |
462 | * Watch out for backslashes. | |
463 | */ | |
464 | ||
c37ad836 ML |
465 | private Char charcon(p) |
466 | char *p; | |
b76d33f6 | 467 | { |
c37ad836 | 468 | char c, buf[10], *q; |
b76d33f6 | 469 | |
c37ad836 ML |
470 | if (*p == '\\') { |
471 | ++p; | |
b76d33f6 ML |
472 | if (*p != '\\') { |
473 | q = buf; | |
474 | do { | |
475 | *q++ = *p++; | |
c37ad836 | 476 | } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); |
b76d33f6 ML |
477 | *q = '\0'; |
478 | if (isdigit(buf[0])) { | |
479 | c = (Char) octal(buf); | |
480 | } else { | |
481 | c = charlookup(buf); | |
482 | } | |
c37ad836 | 483 | curchar = p - 1; |
b76d33f6 ML |
484 | } else { |
485 | c = '\\'; | |
486 | } | |
487 | } else { | |
c37ad836 | 488 | c = *p; |
b76d33f6 ML |
489 | } |
490 | return c; | |
491 | } | |
492 | ||
493 | /* | |
494 | * Do a lookup for a ASCII character name. | |
495 | */ | |
496 | ||
497 | private String ascii[] = { | |
498 | "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", | |
499 | "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", | |
500 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", | |
501 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", | |
502 | "SP", nil | |
503 | }; | |
504 | ||
505 | private char charlookup(s) | |
506 | String s; | |
507 | { | |
508 | register int i; | |
509 | ||
510 | for (i = 0; ascii[i] != NULL; i++) { | |
511 | if (streq(s, ascii[i])) { | |
512 | return i; | |
513 | } | |
514 | } | |
515 | if (streq(s, "DEL")) { | |
516 | return 0177; | |
517 | } | |
518 | error("unknown ascii name \"%s\"", s); | |
519 | return '?'; | |
520 | } | |
521 | ||
522 | /* | |
523 | * Input file management routines. | |
524 | */ | |
525 | ||
526 | public setinput(filename) | |
527 | Filename filename; | |
528 | { | |
529 | File f; | |
530 | ||
531 | f = fopen(filename, "r"); | |
532 | if (f == nil) { | |
533 | error("can't open %s", filename); | |
534 | } else { | |
535 | if (curinclindex >= MAXINCLDEPTH) { | |
536 | error("unreasonable input nesting on \"%s\"", filename); | |
537 | } | |
538 | inclinfo[curinclindex].savefile = in; | |
539 | inclinfo[curinclindex].savefn = errfilename; | |
540 | inclinfo[curinclindex].savelineno = errlineno; | |
541 | curinclindex++; | |
542 | in = f; | |
543 | errfilename = filename; | |
544 | errlineno = 1; | |
545 | } | |
546 | } | |
547 | ||
548 | private Boolean eofinput() | |
549 | { | |
550 | register Boolean b; | |
551 | ||
552 | if (curinclindex == 0) { | |
553 | if (isterm(in)) { | |
554 | putchar('\n'); | |
a2d98673 | 555 | clearerr(in); |
b76d33f6 ML |
556 | b = false; |
557 | } else { | |
558 | b = true; | |
559 | } | |
560 | } else { | |
561 | fclose(in); | |
562 | --curinclindex; | |
563 | in = inclinfo[curinclindex].savefile; | |
564 | errfilename = inclinfo[curinclindex].savefn; | |
565 | errlineno = inclinfo[curinclindex].savelineno; | |
566 | b = false; | |
567 | } | |
568 | return b; | |
569 | } | |
570 | ||
571 | /* | |
572 | * Pop the current input. Return whether successful. | |
573 | */ | |
574 | ||
575 | public Boolean popinput() | |
576 | { | |
577 | Boolean b; | |
578 | ||
579 | if (curinclindex == 0) { | |
580 | b = false; | |
581 | } else { | |
582 | b = (Boolean) (not eofinput()); | |
583 | } | |
584 | return b; | |
585 | } | |
586 | ||
587 | /* | |
588 | * Return whether we are currently reading from standard input. | |
589 | */ | |
590 | ||
591 | public Boolean isstdin() | |
592 | { | |
593 | return (Boolean) (in == stdin); | |
594 | } | |
595 | ||
596 | /* | |
597 | * Send the current line to the shell. | |
598 | */ | |
599 | ||
600 | public shellline() | |
601 | { | |
602 | register char *p; | |
603 | ||
604 | p = curchar; | |
605 | while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { | |
606 | ++p; | |
607 | } | |
608 | shell(p); | |
609 | if (*p == '\0' and isterm(in)) { | |
610 | putchar('\n'); | |
611 | } | |
612 | erecover(); | |
613 | } | |
614 | ||
615 | /* | |
616 | * Read the rest of the current line in "shell mode". | |
617 | */ | |
618 | ||
619 | public beginshellmode() | |
620 | { | |
621 | shellmode = true; | |
622 | } | |
623 | ||
624 | /* | |
625 | * Print out a token for debugging. | |
626 | */ | |
627 | ||
628 | public print_token(f, t) | |
629 | File f; | |
630 | Token t; | |
631 | { | |
632 | if (t == '\n') { | |
633 | fprintf(f, "char '\\n'"); | |
634 | } else if (t == EOF) { | |
635 | fprintf(f, "EOF"); | |
636 | } else if (t < 256) { | |
637 | fprintf(f, "char '%c'", t); | |
638 | } else { | |
639 | fprintf(f, "\"%s\"", keywdstring(t)); | |
640 | } | |
641 | } |