Commit | Line | Data |
---|---|---|
b76d33f6 ML |
1 | /* Copyright (c) 1982 Regents of the University of California */ |
2 | ||
4ff5dcd5 | 3 | static char sccsid[] = "@(#)scanner.c 1.6 %G%"; |
b76d33f6 ML |
4 | |
5 | /* | |
6 | * Debugger scanner. | |
7 | */ | |
8 | ||
9 | #include "defs.h" | |
10 | #include "scanner.h" | |
11 | #include "main.h" | |
12 | #include "keywords.h" | |
13 | #include "tree.h" | |
14 | #include "symbols.h" | |
15 | #include "names.h" | |
16 | #include "y.tab.h" | |
17 | ||
18 | #ifndef public | |
19 | typedef int Token; | |
20 | #endif | |
21 | ||
22 | public String initfile = ".dbxinit"; | |
23 | ||
24 | typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; | |
25 | ||
26 | private Charclass class[256 + 1]; | |
27 | private Charclass *lexclass = class + 1; | |
28 | ||
29 | #define isdigit(c) (lexclass[c] == NUM) | |
30 | #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) | |
31 | #define ishexdigit(c) ( \ | |
32 | isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ | |
33 | ) | |
34 | ||
35 | #define MAXLINESIZE 1024 | |
36 | ||
37 | private File in; | |
38 | private Char linebuf[MAXLINESIZE]; | |
39 | private Char *curchar; | |
40 | ||
41 | #define MAXINCLDEPTH 10 | |
42 | ||
43 | private struct { | |
44 | File savefile; | |
45 | Filename savefn; | |
46 | int savelineno; | |
47 | } inclinfo[MAXINCLDEPTH]; | |
48 | ||
49 | private unsigned int curinclindex; | |
50 | ||
b76d33f6 ML |
51 | private Token getident(); |
52 | private Token getnum(); | |
53 | private Token getstring(); | |
54 | private Boolean eofinput(); | |
55 | private Char charcon(); | |
56 | private Char charlookup(); | |
57 | ||
58 | private enterlexclass(class, s) | |
59 | Charclass class; | |
60 | String s; | |
61 | { | |
62 | register char *p; | |
63 | ||
64 | for (p = s; *p != '\0'; p++) { | |
65 | lexclass[*p] = class; | |
66 | } | |
67 | } | |
68 | ||
69 | public scanner_init() | |
70 | { | |
71 | register Integer i; | |
72 | ||
73 | for (i = 0; i < 257; i++) { | |
74 | class[i] = OTHER; | |
75 | } | |
76 | enterlexclass(WHITE, " \t"); | |
77 | enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); | |
78 | enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); | |
79 | enterlexclass(NUM, "0123456789"); | |
80 | in = stdin; | |
81 | errfilename = nil; | |
82 | errlineno = 0; | |
83 | curchar = linebuf; | |
84 | linebuf[0] = '\0'; | |
b76d33f6 ML |
85 | } |
86 | ||
87 | /* | |
88 | * Read a single token. | |
89 | * | |
90 | * Input is line buffered. | |
91 | * | |
92 | * There are two "modes" of operation: one as in a compiler, | |
93 | * and one for reading shell-like syntax. | |
94 | */ | |
95 | ||
96 | private Boolean shellmode; | |
97 | ||
98 | public Token yylex() | |
99 | { | |
100 | register int c; | |
101 | register char *p; | |
102 | register Token t; | |
103 | String line; | |
104 | ||
105 | p = curchar; | |
106 | if (*p == '\0') { | |
107 | do { | |
108 | if (isterm(in)) { | |
5c0d33e7 ML |
109 | printf("> "); |
110 | fflush(stdout); | |
b76d33f6 ML |
111 | } |
112 | line = fgets(linebuf, MAXLINESIZE, in); | |
113 | } while (line == nil and not eofinput()); | |
114 | if (line == nil) { | |
115 | c = EOF; | |
116 | } else { | |
117 | p = linebuf; | |
118 | while (lexclass[*p] == WHITE) { | |
119 | p++; | |
120 | } | |
121 | shellmode = false; | |
122 | } | |
123 | } else { | |
124 | while (lexclass[*p] == WHITE) { | |
125 | p++; | |
126 | } | |
127 | } | |
128 | curchar = p; | |
129 | c = *p; | |
130 | if (lexclass[c] == ALPHA) { | |
131 | t = getident(); | |
132 | } else if (lexclass[c] == NUM) { | |
4ff5dcd5 ML |
133 | if (shellmode) { |
134 | t = getident(); | |
135 | } else { | |
136 | t = getnum(); | |
137 | } | |
b76d33f6 ML |
138 | } else { |
139 | ++curchar; | |
140 | switch (c) { | |
141 | case '\n': | |
142 | t = '\n'; | |
143 | if (errlineno != 0) { | |
144 | errlineno++; | |
145 | } | |
146 | break; | |
147 | ||
148 | case '"': | |
149 | case '\'': | |
150 | t = getstring(); | |
151 | break; | |
152 | ||
153 | case '.': | |
154 | if (shellmode) { | |
155 | --curchar; | |
156 | t = getident(); | |
157 | } else if (isdigit(*curchar)) { | |
158 | --curchar; | |
159 | t = getnum(); | |
160 | } else { | |
161 | t = '.'; | |
162 | } | |
163 | break; | |
164 | ||
165 | case '<': | |
166 | if (not shellmode and *curchar == '<') { | |
167 | ++curchar; | |
168 | t = LFORMER; | |
169 | } else { | |
170 | t = '<'; | |
171 | } | |
172 | break; | |
173 | ||
174 | case '>': | |
175 | if (not shellmode and *curchar == '>') { | |
176 | ++curchar; | |
177 | t = RFORMER; | |
178 | } else { | |
179 | t = '>'; | |
180 | } | |
181 | break; | |
182 | ||
183 | case '#': | |
184 | if (*curchar == '^') { | |
185 | ++curchar; | |
186 | t = ABSTRACTION; | |
187 | } else { | |
188 | t = '#'; | |
189 | } | |
190 | break; | |
191 | ||
192 | case '-': | |
193 | if (shellmode) { | |
194 | --curchar; | |
195 | t = getident(); | |
196 | } else if (*curchar == '>') { | |
197 | ++curchar; | |
198 | t = ARROW; | |
199 | } else { | |
200 | t = '-'; | |
201 | } | |
202 | break; | |
203 | ||
204 | case EOF: | |
205 | t = 0; | |
206 | break; | |
207 | ||
208 | default: | |
209 | if (shellmode and index("!&*()[]", c) == nil) { | |
210 | --curchar; | |
211 | t = getident(); | |
212 | } else { | |
213 | t = c; | |
214 | } | |
215 | break; | |
216 | } | |
217 | } | |
218 | # ifdef LEXDEBUG | |
219 | if (lexdebug) { | |
220 | fprintf(stderr, "yylex returns "); | |
221 | print_token(stderr, t); | |
222 | fprintf(stderr, "\n"); | |
223 | } | |
224 | # endif | |
225 | return t; | |
226 | } | |
227 | ||
228 | /* | |
229 | * Parser error handling. | |
230 | */ | |
231 | ||
232 | public yyerror(s) | |
233 | String s; | |
234 | { | |
235 | register Char *p, *tokenbegin, *tokenend; | |
236 | register Integer len; | |
237 | ||
238 | if (streq(s, "syntax error")) { | |
239 | beginerrmsg(); | |
240 | tokenend = curchar - 1; | |
241 | tokenbegin = tokenend; | |
242 | while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { | |
243 | --tokenbegin; | |
244 | } | |
245 | len = tokenend - tokenbegin + 1; | |
246 | p = tokenbegin; | |
247 | if (p > &linebuf[0]) { | |
248 | while (lexclass[*p] == WHITE and p > &linebuf[0]) { | |
249 | --p; | |
250 | } | |
251 | } | |
252 | if (p == &linebuf[0]) { | |
253 | fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); | |
254 | } else { | |
255 | fprintf(stderr, "syntax error"); | |
256 | if (len != 0) { | |
257 | fprintf(stderr, " on \"%.*s\"", len, tokenbegin); | |
258 | } | |
259 | } | |
260 | enderrmsg(); | |
261 | } else { | |
262 | error(s); | |
263 | } | |
264 | } | |
265 | ||
266 | /* | |
267 | * Eat the current line. | |
268 | */ | |
269 | ||
270 | public gobble() | |
271 | { | |
272 | curchar = linebuf; | |
273 | linebuf[0] = '\0'; | |
274 | } | |
275 | ||
276 | /* | |
277 | * Scan an identifier and check to see if it's a keyword. | |
278 | */ | |
279 | ||
280 | private Token getident() | |
281 | { | |
282 | char buf[256]; | |
283 | register Char *p, *q; | |
284 | register Token t; | |
285 | ||
286 | p = curchar; | |
287 | q = buf; | |
288 | if (shellmode) { | |
289 | do { | |
290 | *q++ = *p++; | |
291 | } while (index(" \t\n!&<>*[]()", *p) == nil); | |
292 | } else { | |
293 | do { | |
294 | *q++ = *p++; | |
295 | } while (isalnum(*p)); | |
296 | } | |
297 | curchar = p; | |
298 | *q = '\0'; | |
299 | yylval.y_name = identname(buf, false); | |
300 | if (not shellmode) { | |
301 | t = findkeyword(yylval.y_name); | |
302 | if (t == nil) { | |
303 | t = NAME; | |
304 | } | |
305 | } else { | |
306 | t = NAME; | |
307 | } | |
308 | return t; | |
309 | } | |
310 | ||
311 | /* | |
312 | * Scan a number. | |
313 | */ | |
314 | ||
315 | private Token getnum() | |
316 | { | |
317 | char buf[256]; | |
318 | register Char *p, *q; | |
319 | register Token t; | |
320 | Integer base; | |
321 | ||
322 | p = curchar; | |
323 | q = buf; | |
324 | if (*p == '0') { | |
325 | if (*(p+1) == 'x') { | |
326 | p += 2; | |
327 | base = 16; | |
328 | } else { | |
329 | base = 8; | |
330 | } | |
331 | } else { | |
332 | base = 10; | |
333 | } | |
334 | if (base == 16) { | |
335 | do { | |
336 | *q++ = *p++; | |
337 | } while (ishexdigit(*p)); | |
338 | } else { | |
339 | do { | |
340 | *q++ = *p++; | |
341 | } while (isdigit(*p)); | |
342 | } | |
343 | if (*p == '.') { | |
344 | do { | |
345 | *q++ = *p++; | |
346 | } while (isdigit(*p)); | |
347 | if (*p == 'e' or *p == 'E') { | |
348 | p++; | |
349 | if (*p == '+' or *p == '-' or isdigit(*p)) { | |
350 | *q++ = 'e'; | |
351 | do { | |
352 | *q++ = *p++; | |
353 | } while (isdigit(*p)); | |
354 | } | |
355 | } | |
356 | *q = '\0'; | |
357 | yylval.y_real = atof(buf); | |
358 | t = REAL; | |
359 | } else { | |
360 | *q = '\0'; | |
361 | switch (base) { | |
362 | case 10: | |
363 | yylval.y_int = atol(buf); | |
364 | break; | |
365 | ||
366 | case 8: | |
367 | yylval.y_int = octal(buf); | |
368 | break; | |
369 | ||
370 | case 16: | |
371 | yylval.y_int = hex(buf); | |
372 | break; | |
373 | ||
374 | default: | |
375 | badcaseval(base); | |
376 | } | |
377 | t = INT; | |
378 | } | |
379 | curchar = p; | |
380 | return t; | |
381 | } | |
382 | ||
383 | /* | |
384 | * Convert a string of octal digits to an integer. | |
385 | */ | |
386 | ||
387 | private int octal(s) | |
388 | String s; | |
389 | { | |
390 | register Char *p; | |
391 | register Integer n; | |
392 | ||
393 | n = 0; | |
394 | for (p = s; *p != '\0'; p++) { | |
395 | n = 8*n + (*p - '0'); | |
396 | } | |
397 | return n; | |
398 | } | |
399 | ||
400 | /* | |
401 | * Convert a string of hexadecimal digits to an integer. | |
402 | */ | |
403 | ||
404 | private int hex(s) | |
405 | String s; | |
406 | { | |
407 | register Char *p; | |
408 | register Integer n; | |
409 | ||
410 | n = 0; | |
411 | for (p = s; *p != '\0'; p++) { | |
412 | n *= 16; | |
413 | if (*p >= 'a' and *p <= 'f') { | |
414 | n += (*p - 'a' + 10); | |
415 | } else if (*p >= 'A' and *p <= 'F') { | |
416 | n += (*p - 'A' + 10); | |
417 | } else { | |
418 | n += (*p - '0'); | |
419 | } | |
420 | } | |
421 | return n; | |
422 | } | |
423 | ||
424 | /* | |
425 | * Scan a string. | |
426 | */ | |
427 | ||
428 | private Token getstring() | |
429 | { | |
430 | char buf[256]; | |
431 | register Char *p, *q; | |
432 | Boolean endofstring; | |
433 | ||
434 | p = curchar; | |
435 | q = buf; | |
436 | endofstring = false; | |
437 | while (not endofstring) { | |
438 | if (*p == '\n' or *p == '\0') { | |
439 | error("non-terminated string"); | |
440 | endofstring = true; | |
73ad9ebb ML |
441 | } else if (*p == '"' or *p == '\'') { |
442 | if (*(p+1) != *p) { | |
b76d33f6 ML |
443 | endofstring = true; |
444 | } else { | |
445 | *q++ = *p; | |
446 | } | |
447 | } else { | |
c37ad836 ML |
448 | *q++ = charcon(p); |
449 | p = curchar; | |
b76d33f6 ML |
450 | } |
451 | p++; | |
452 | } | |
453 | curchar = p; | |
454 | *q = '\0'; | |
455 | yylval.y_string = strdup(buf); | |
456 | return STRING; | |
457 | } | |
458 | ||
459 | /* | |
460 | * Process a character constant. | |
461 | * Watch out for backslashes. | |
462 | */ | |
463 | ||
c37ad836 ML |
464 | private Char charcon(p) |
465 | char *p; | |
b76d33f6 | 466 | { |
c37ad836 | 467 | char c, buf[10], *q; |
b76d33f6 | 468 | |
c37ad836 ML |
469 | if (*p == '\\') { |
470 | ++p; | |
b76d33f6 ML |
471 | if (*p != '\\') { |
472 | q = buf; | |
473 | do { | |
474 | *q++ = *p++; | |
c37ad836 | 475 | } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); |
b76d33f6 ML |
476 | *q = '\0'; |
477 | if (isdigit(buf[0])) { | |
478 | c = (Char) octal(buf); | |
479 | } else { | |
480 | c = charlookup(buf); | |
481 | } | |
c37ad836 | 482 | curchar = p - 1; |
b76d33f6 ML |
483 | } else { |
484 | c = '\\'; | |
485 | } | |
486 | } else { | |
c37ad836 | 487 | c = *p; |
b76d33f6 ML |
488 | } |
489 | return c; | |
490 | } | |
491 | ||
492 | /* | |
493 | * Do a lookup for a ASCII character name. | |
494 | */ | |
495 | ||
496 | private String ascii[] = { | |
497 | "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", | |
498 | "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", | |
499 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", | |
500 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", | |
501 | "SP", nil | |
502 | }; | |
503 | ||
504 | private char charlookup(s) | |
505 | String s; | |
506 | { | |
507 | register int i; | |
508 | ||
509 | for (i = 0; ascii[i] != NULL; i++) { | |
510 | if (streq(s, ascii[i])) { | |
511 | return i; | |
512 | } | |
513 | } | |
514 | if (streq(s, "DEL")) { | |
515 | return 0177; | |
516 | } | |
517 | error("unknown ascii name \"%s\"", s); | |
518 | return '?'; | |
519 | } | |
520 | ||
521 | /* | |
522 | * Input file management routines. | |
523 | */ | |
524 | ||
525 | public setinput(filename) | |
526 | Filename filename; | |
527 | { | |
528 | File f; | |
529 | ||
530 | f = fopen(filename, "r"); | |
531 | if (f == nil) { | |
532 | error("can't open %s", filename); | |
533 | } else { | |
534 | if (curinclindex >= MAXINCLDEPTH) { | |
535 | error("unreasonable input nesting on \"%s\"", filename); | |
536 | } | |
537 | inclinfo[curinclindex].savefile = in; | |
538 | inclinfo[curinclindex].savefn = errfilename; | |
539 | inclinfo[curinclindex].savelineno = errlineno; | |
540 | curinclindex++; | |
541 | in = f; | |
542 | errfilename = filename; | |
543 | errlineno = 1; | |
544 | } | |
545 | } | |
546 | ||
547 | private Boolean eofinput() | |
548 | { | |
549 | register Boolean b; | |
550 | ||
551 | if (curinclindex == 0) { | |
552 | if (isterm(in)) { | |
553 | putchar('\n'); | |
554 | b = false; | |
555 | } else { | |
556 | b = true; | |
557 | } | |
558 | } else { | |
559 | fclose(in); | |
560 | --curinclindex; | |
561 | in = inclinfo[curinclindex].savefile; | |
562 | errfilename = inclinfo[curinclindex].savefn; | |
563 | errlineno = inclinfo[curinclindex].savelineno; | |
564 | b = false; | |
565 | } | |
566 | return b; | |
567 | } | |
568 | ||
569 | /* | |
570 | * Pop the current input. Return whether successful. | |
571 | */ | |
572 | ||
573 | public Boolean popinput() | |
574 | { | |
575 | Boolean b; | |
576 | ||
577 | if (curinclindex == 0) { | |
578 | b = false; | |
579 | } else { | |
580 | b = (Boolean) (not eofinput()); | |
581 | } | |
582 | return b; | |
583 | } | |
584 | ||
585 | /* | |
586 | * Return whether we are currently reading from standard input. | |
587 | */ | |
588 | ||
589 | public Boolean isstdin() | |
590 | { | |
591 | return (Boolean) (in == stdin); | |
592 | } | |
593 | ||
594 | /* | |
595 | * Send the current line to the shell. | |
596 | */ | |
597 | ||
598 | public shellline() | |
599 | { | |
600 | register char *p; | |
601 | ||
602 | p = curchar; | |
603 | while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { | |
604 | ++p; | |
605 | } | |
606 | shell(p); | |
607 | if (*p == '\0' and isterm(in)) { | |
608 | putchar('\n'); | |
609 | } | |
610 | erecover(); | |
611 | } | |
612 | ||
613 | /* | |
614 | * Read the rest of the current line in "shell mode". | |
615 | */ | |
616 | ||
617 | public beginshellmode() | |
618 | { | |
619 | shellmode = true; | |
620 | } | |
621 | ||
622 | /* | |
623 | * Print out a token for debugging. | |
624 | */ | |
625 | ||
626 | public print_token(f, t) | |
627 | File f; | |
628 | Token t; | |
629 | { | |
630 | if (t == '\n') { | |
631 | fprintf(f, "char '\\n'"); | |
632 | } else if (t == EOF) { | |
633 | fprintf(f, "EOF"); | |
634 | } else if (t < 256) { | |
635 | fprintf(f, "char '%c'", t); | |
636 | } else { | |
637 | fprintf(f, "\"%s\"", keywdstring(t)); | |
638 | } | |
639 | } |