Commit | Line | Data |
---|---|---|
b76d33f6 ML |
1 | /* Copyright (c) 1982 Regents of the University of California */ |
2 | ||
550fe947 | 3 | static char sccsid[] = "@(#)scanner.c 1.2 %G%"; |
b76d33f6 ML |
4 | |
5 | /* | |
6 | * Debugger scanner. | |
7 | */ | |
8 | ||
9 | #include "defs.h" | |
10 | #include "scanner.h" | |
11 | #include "main.h" | |
12 | #include "keywords.h" | |
13 | #include "tree.h" | |
14 | #include "symbols.h" | |
15 | #include "names.h" | |
16 | #include "y.tab.h" | |
17 | ||
18 | #ifndef public | |
19 | typedef int Token; | |
20 | #endif | |
21 | ||
22 | public String initfile = ".dbxinit"; | |
23 | ||
24 | typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; | |
25 | ||
26 | private Charclass class[256 + 1]; | |
27 | private Charclass *lexclass = class + 1; | |
28 | ||
29 | #define isdigit(c) (lexclass[c] == NUM) | |
30 | #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) | |
31 | #define ishexdigit(c) ( \ | |
32 | isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ | |
33 | ) | |
34 | ||
35 | #define MAXLINESIZE 1024 | |
36 | ||
37 | private File in; | |
38 | private Char linebuf[MAXLINESIZE]; | |
39 | private Char *curchar; | |
40 | ||
41 | #define MAXINCLDEPTH 10 | |
42 | ||
43 | private struct { | |
44 | File savefile; | |
45 | Filename savefn; | |
46 | int savelineno; | |
47 | } inclinfo[MAXINCLDEPTH]; | |
48 | ||
49 | private unsigned int curinclindex; | |
50 | ||
51 | private Boolean firsttoken = true; | |
52 | private Boolean firstinit = true; | |
53 | ||
54 | private Token getident(); | |
55 | private Token getnum(); | |
56 | private Token getstring(); | |
57 | private Boolean eofinput(); | |
58 | private Char charcon(); | |
59 | private Char charlookup(); | |
60 | ||
61 | private enterlexclass(class, s) | |
62 | Charclass class; | |
63 | String s; | |
64 | { | |
65 | register char *p; | |
66 | ||
67 | for (p = s; *p != '\0'; p++) { | |
68 | lexclass[*p] = class; | |
69 | } | |
70 | } | |
71 | ||
72 | public scanner_init() | |
73 | { | |
74 | register Integer i; | |
75 | ||
76 | for (i = 0; i < 257; i++) { | |
77 | class[i] = OTHER; | |
78 | } | |
79 | enterlexclass(WHITE, " \t"); | |
80 | enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); | |
81 | enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); | |
82 | enterlexclass(NUM, "0123456789"); | |
83 | in = stdin; | |
84 | errfilename = nil; | |
85 | errlineno = 0; | |
86 | curchar = linebuf; | |
87 | linebuf[0] = '\0'; | |
88 | if (runfirst) { | |
89 | firstinit = false; | |
90 | firsttoken = false; | |
91 | } else if (firstinit and isterm(in)) { | |
92 | firstinit = false; | |
93 | printf("> "); | |
94 | fflush(stdout); | |
95 | } | |
96 | } | |
97 | ||
98 | /* | |
99 | * Read a single token. | |
100 | * | |
101 | * Input is line buffered. | |
102 | * | |
103 | * There are two "modes" of operation: one as in a compiler, | |
104 | * and one for reading shell-like syntax. | |
105 | */ | |
106 | ||
107 | private Boolean shellmode; | |
108 | ||
109 | public Token yylex() | |
110 | { | |
111 | register int c; | |
112 | register char *p; | |
113 | register Token t; | |
114 | String line; | |
115 | ||
116 | p = curchar; | |
117 | if (*p == '\0') { | |
118 | do { | |
119 | if (isterm(in)) { | |
120 | if (firsttoken) { | |
121 | firsttoken = false; | |
122 | } else { | |
123 | printf("> "); | |
124 | fflush(stdout); | |
125 | } | |
126 | } | |
127 | line = fgets(linebuf, MAXLINESIZE, in); | |
128 | } while (line == nil and not eofinput()); | |
129 | if (line == nil) { | |
130 | c = EOF; | |
131 | } else { | |
132 | p = linebuf; | |
133 | while (lexclass[*p] == WHITE) { | |
134 | p++; | |
135 | } | |
136 | shellmode = false; | |
137 | } | |
138 | } else { | |
139 | while (lexclass[*p] == WHITE) { | |
140 | p++; | |
141 | } | |
142 | } | |
143 | curchar = p; | |
144 | c = *p; | |
145 | if (lexclass[c] == ALPHA) { | |
146 | t = getident(); | |
147 | } else if (lexclass[c] == NUM) { | |
148 | t = getnum(); | |
149 | } else { | |
150 | ++curchar; | |
151 | switch (c) { | |
152 | case '\n': | |
153 | t = '\n'; | |
154 | if (errlineno != 0) { | |
155 | errlineno++; | |
156 | } | |
157 | break; | |
158 | ||
159 | case '"': | |
160 | case '\'': | |
161 | t = getstring(); | |
162 | break; | |
163 | ||
164 | case '.': | |
165 | if (shellmode) { | |
166 | --curchar; | |
167 | t = getident(); | |
168 | } else if (isdigit(*curchar)) { | |
169 | --curchar; | |
170 | t = getnum(); | |
171 | } else { | |
172 | t = '.'; | |
173 | } | |
174 | break; | |
175 | ||
176 | case '<': | |
177 | if (not shellmode and *curchar == '<') { | |
178 | ++curchar; | |
179 | t = LFORMER; | |
180 | } else { | |
181 | t = '<'; | |
182 | } | |
183 | break; | |
184 | ||
185 | case '>': | |
186 | if (not shellmode and *curchar == '>') { | |
187 | ++curchar; | |
188 | t = RFORMER; | |
189 | } else { | |
190 | t = '>'; | |
191 | } | |
192 | break; | |
193 | ||
194 | case '#': | |
195 | if (*curchar == '^') { | |
196 | ++curchar; | |
197 | t = ABSTRACTION; | |
198 | } else { | |
199 | t = '#'; | |
200 | } | |
201 | break; | |
202 | ||
203 | case '-': | |
204 | if (shellmode) { | |
205 | --curchar; | |
206 | t = getident(); | |
207 | } else if (*curchar == '>') { | |
208 | ++curchar; | |
209 | t = ARROW; | |
210 | } else { | |
211 | t = '-'; | |
212 | } | |
213 | break; | |
214 | ||
215 | case EOF: | |
216 | t = 0; | |
217 | break; | |
218 | ||
219 | default: | |
220 | if (shellmode and index("!&*()[]", c) == nil) { | |
221 | --curchar; | |
222 | t = getident(); | |
223 | } else { | |
224 | t = c; | |
225 | } | |
226 | break; | |
227 | } | |
228 | } | |
229 | # ifdef LEXDEBUG | |
230 | if (lexdebug) { | |
231 | fprintf(stderr, "yylex returns "); | |
232 | print_token(stderr, t); | |
233 | fprintf(stderr, "\n"); | |
234 | } | |
235 | # endif | |
236 | return t; | |
237 | } | |
238 | ||
239 | /* | |
240 | * Parser error handling. | |
241 | */ | |
242 | ||
243 | public yyerror(s) | |
244 | String s; | |
245 | { | |
246 | register Char *p, *tokenbegin, *tokenend; | |
247 | register Integer len; | |
248 | ||
249 | if (streq(s, "syntax error")) { | |
250 | beginerrmsg(); | |
251 | tokenend = curchar - 1; | |
252 | tokenbegin = tokenend; | |
253 | while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { | |
254 | --tokenbegin; | |
255 | } | |
256 | len = tokenend - tokenbegin + 1; | |
257 | p = tokenbegin; | |
258 | if (p > &linebuf[0]) { | |
259 | while (lexclass[*p] == WHITE and p > &linebuf[0]) { | |
260 | --p; | |
261 | } | |
262 | } | |
263 | if (p == &linebuf[0]) { | |
264 | fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); | |
265 | } else { | |
266 | fprintf(stderr, "syntax error"); | |
267 | if (len != 0) { | |
268 | fprintf(stderr, " on \"%.*s\"", len, tokenbegin); | |
269 | } | |
270 | } | |
271 | enderrmsg(); | |
272 | } else { | |
273 | error(s); | |
274 | } | |
275 | } | |
276 | ||
277 | /* | |
278 | * Eat the current line. | |
279 | */ | |
280 | ||
281 | public gobble() | |
282 | { | |
283 | curchar = linebuf; | |
284 | linebuf[0] = '\0'; | |
285 | } | |
286 | ||
287 | /* | |
288 | * Scan an identifier and check to see if it's a keyword. | |
289 | */ | |
290 | ||
291 | private Token getident() | |
292 | { | |
293 | char buf[256]; | |
294 | register Char *p, *q; | |
295 | register Token t; | |
296 | ||
297 | p = curchar; | |
298 | q = buf; | |
299 | if (shellmode) { | |
300 | do { | |
301 | *q++ = *p++; | |
302 | } while (index(" \t\n!&<>*[]()", *p) == nil); | |
303 | } else { | |
304 | do { | |
305 | *q++ = *p++; | |
306 | } while (isalnum(*p)); | |
307 | } | |
308 | curchar = p; | |
309 | *q = '\0'; | |
310 | yylval.y_name = identname(buf, false); | |
311 | if (not shellmode) { | |
312 | t = findkeyword(yylval.y_name); | |
313 | if (t == nil) { | |
314 | t = NAME; | |
315 | } | |
316 | } else { | |
317 | t = NAME; | |
318 | } | |
319 | return t; | |
320 | } | |
321 | ||
322 | /* | |
323 | * Scan a number. | |
324 | */ | |
325 | ||
326 | private Token getnum() | |
327 | { | |
328 | char buf[256]; | |
329 | register Char *p, *q; | |
330 | register Token t; | |
331 | Integer base; | |
332 | ||
333 | p = curchar; | |
334 | q = buf; | |
335 | if (*p == '0') { | |
336 | if (*(p+1) == 'x') { | |
337 | p += 2; | |
338 | base = 16; | |
339 | } else { | |
340 | base = 8; | |
341 | } | |
342 | } else { | |
343 | base = 10; | |
344 | } | |
345 | if (base == 16) { | |
346 | do { | |
347 | *q++ = *p++; | |
348 | } while (ishexdigit(*p)); | |
349 | } else { | |
350 | do { | |
351 | *q++ = *p++; | |
352 | } while (isdigit(*p)); | |
353 | } | |
354 | if (*p == '.') { | |
355 | do { | |
356 | *q++ = *p++; | |
357 | } while (isdigit(*p)); | |
358 | if (*p == 'e' or *p == 'E') { | |
359 | p++; | |
360 | if (*p == '+' or *p == '-' or isdigit(*p)) { | |
361 | *q++ = 'e'; | |
362 | do { | |
363 | *q++ = *p++; | |
364 | } while (isdigit(*p)); | |
365 | } | |
366 | } | |
367 | *q = '\0'; | |
368 | yylval.y_real = atof(buf); | |
369 | t = REAL; | |
370 | } else { | |
371 | *q = '\0'; | |
372 | switch (base) { | |
373 | case 10: | |
374 | yylval.y_int = atol(buf); | |
375 | break; | |
376 | ||
377 | case 8: | |
378 | yylval.y_int = octal(buf); | |
379 | break; | |
380 | ||
381 | case 16: | |
382 | yylval.y_int = hex(buf); | |
383 | break; | |
384 | ||
385 | default: | |
386 | badcaseval(base); | |
387 | } | |
388 | t = INT; | |
389 | } | |
390 | curchar = p; | |
391 | return t; | |
392 | } | |
393 | ||
394 | /* | |
395 | * Convert a string of octal digits to an integer. | |
396 | */ | |
397 | ||
398 | private int octal(s) | |
399 | String s; | |
400 | { | |
401 | register Char *p; | |
402 | register Integer n; | |
403 | ||
404 | n = 0; | |
405 | for (p = s; *p != '\0'; p++) { | |
406 | n = 8*n + (*p - '0'); | |
407 | } | |
408 | return n; | |
409 | } | |
410 | ||
411 | /* | |
412 | * Convert a string of hexadecimal digits to an integer. | |
413 | */ | |
414 | ||
415 | private int hex(s) | |
416 | String s; | |
417 | { | |
418 | register Char *p; | |
419 | register Integer n; | |
420 | ||
421 | n = 0; | |
422 | for (p = s; *p != '\0'; p++) { | |
423 | n *= 16; | |
424 | if (*p >= 'a' and *p <= 'f') { | |
425 | n += (*p - 'a' + 10); | |
426 | } else if (*p >= 'A' and *p <= 'F') { | |
427 | n += (*p - 'A' + 10); | |
428 | } else { | |
429 | n += (*p - '0'); | |
430 | } | |
431 | } | |
432 | return n; | |
433 | } | |
434 | ||
435 | /* | |
436 | * Scan a string. | |
437 | */ | |
438 | ||
439 | private Token getstring() | |
440 | { | |
441 | char buf[256]; | |
442 | register Char *p, *q; | |
443 | Boolean endofstring; | |
444 | ||
445 | p = curchar; | |
446 | q = buf; | |
447 | endofstring = false; | |
448 | while (not endofstring) { | |
449 | if (*p == '\n' or *p == '\0') { | |
450 | error("non-terminated string"); | |
451 | endofstring = true; | |
452 | } else if (*p == '"') { | |
453 | if (*(p+1) != '"') { | |
454 | endofstring = true; | |
455 | } else { | |
456 | *q++ = *p; | |
457 | } | |
458 | } else { | |
459 | *q++ = charcon(*p); | |
460 | } | |
461 | p++; | |
462 | } | |
463 | curchar = p; | |
464 | *q = '\0'; | |
465 | yylval.y_string = strdup(buf); | |
466 | return STRING; | |
467 | } | |
468 | ||
469 | /* | |
470 | * Process a character constant. | |
471 | * Watch out for backslashes. | |
472 | */ | |
473 | ||
474 | private Char charcon(ch) | |
475 | Char ch; | |
476 | { | |
477 | Char c, buf[10], *p, *q; | |
478 | ||
479 | p = curchar; | |
480 | if (ch == '\\') { | |
481 | if (*p != '\\') { | |
482 | q = buf; | |
483 | do { | |
484 | *q++ = *p++; | |
485 | } while (*p != '\\' and *p != '\n' and *p != '\0'); | |
486 | if (*p != '\\') { | |
487 | ungetc(*p, in); | |
488 | error("non-terminated character constant"); | |
489 | } | |
490 | *q = '\0'; | |
491 | if (isdigit(buf[0])) { | |
492 | c = (Char) octal(buf); | |
493 | } else { | |
494 | c = charlookup(buf); | |
495 | } | |
496 | curchar = p; | |
497 | } else { | |
498 | c = '\\'; | |
499 | } | |
500 | } else { | |
501 | c = ch; | |
502 | } | |
503 | return c; | |
504 | } | |
505 | ||
506 | /* | |
507 | * Do a lookup for a ASCII character name. | |
508 | */ | |
509 | ||
510 | private String ascii[] = { | |
511 | "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", | |
512 | "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", | |
513 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", | |
514 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", | |
515 | "SP", nil | |
516 | }; | |
517 | ||
518 | private char charlookup(s) | |
519 | String s; | |
520 | { | |
521 | register int i; | |
522 | ||
523 | for (i = 0; ascii[i] != NULL; i++) { | |
524 | if (streq(s, ascii[i])) { | |
525 | return i; | |
526 | } | |
527 | } | |
528 | if (streq(s, "DEL")) { | |
529 | return 0177; | |
530 | } | |
531 | error("unknown ascii name \"%s\"", s); | |
532 | return '?'; | |
533 | } | |
534 | ||
535 | /* | |
536 | * Input file management routines. | |
537 | */ | |
538 | ||
539 | public setinput(filename) | |
540 | Filename filename; | |
541 | { | |
542 | File f; | |
543 | ||
544 | f = fopen(filename, "r"); | |
545 | if (f == nil) { | |
546 | error("can't open %s", filename); | |
547 | } else { | |
548 | if (curinclindex >= MAXINCLDEPTH) { | |
549 | error("unreasonable input nesting on \"%s\"", filename); | |
550 | } | |
551 | inclinfo[curinclindex].savefile = in; | |
552 | inclinfo[curinclindex].savefn = errfilename; | |
553 | inclinfo[curinclindex].savelineno = errlineno; | |
554 | curinclindex++; | |
555 | in = f; | |
556 | errfilename = filename; | |
557 | errlineno = 1; | |
558 | } | |
559 | } | |
560 | ||
561 | private Boolean eofinput() | |
562 | { | |
563 | register Boolean b; | |
564 | ||
565 | if (curinclindex == 0) { | |
566 | if (isterm(in)) { | |
567 | putchar('\n'); | |
568 | b = false; | |
569 | } else { | |
570 | b = true; | |
571 | } | |
572 | } else { | |
573 | fclose(in); | |
574 | --curinclindex; | |
575 | in = inclinfo[curinclindex].savefile; | |
576 | errfilename = inclinfo[curinclindex].savefn; | |
577 | errlineno = inclinfo[curinclindex].savelineno; | |
578 | b = false; | |
579 | } | |
580 | return b; | |
581 | } | |
582 | ||
583 | /* | |
584 | * Pop the current input. Return whether successful. | |
585 | */ | |
586 | ||
587 | public Boolean popinput() | |
588 | { | |
589 | Boolean b; | |
590 | ||
591 | if (curinclindex == 0) { | |
592 | b = false; | |
593 | } else { | |
594 | b = (Boolean) (not eofinput()); | |
595 | } | |
596 | return b; | |
597 | } | |
598 | ||
599 | /* | |
600 | * Return whether we are currently reading from standard input. | |
601 | */ | |
602 | ||
603 | public Boolean isstdin() | |
604 | { | |
605 | return (Boolean) (in == stdin); | |
606 | } | |
607 | ||
608 | /* | |
609 | * Send the current line to the shell. | |
610 | */ | |
611 | ||
612 | public shellline() | |
613 | { | |
614 | register char *p; | |
615 | ||
616 | p = curchar; | |
617 | while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { | |
618 | ++p; | |
619 | } | |
620 | shell(p); | |
621 | if (*p == '\0' and isterm(in)) { | |
622 | putchar('\n'); | |
623 | } | |
624 | erecover(); | |
625 | } | |
626 | ||
627 | /* | |
628 | * Read the rest of the current line in "shell mode". | |
629 | */ | |
630 | ||
631 | public beginshellmode() | |
632 | { | |
633 | shellmode = true; | |
634 | } | |
635 | ||
636 | /* | |
637 | * Print out a token for debugging. | |
638 | */ | |
639 | ||
640 | public print_token(f, t) | |
641 | File f; | |
642 | Token t; | |
643 | { | |
644 | if (t == '\n') { | |
645 | fprintf(f, "char '\\n'"); | |
646 | } else if (t == EOF) { | |
647 | fprintf(f, "EOF"); | |
648 | } else if (t < 256) { | |
649 | fprintf(f, "char '%c'", t); | |
650 | } else { | |
651 | fprintf(f, "\"%s\"", keywdstring(t)); | |
652 | } | |
653 | } |