Commit | Line | Data |
---|---|---|
b76d33f6 ML |
1 | /* Copyright (c) 1982 Regents of the University of California */ |
2 | ||
e1f4dbca | 3 | static char sccsid[] = "@(#)scanner.c 1.9 (Berkeley) %G%"; |
b76d33f6 ML |
4 | |
5 | /* | |
6 | * Debugger scanner. | |
7 | */ | |
8 | ||
9 | #include "defs.h" | |
10 | #include "scanner.h" | |
11 | #include "main.h" | |
12 | #include "keywords.h" | |
13 | #include "tree.h" | |
14 | #include "symbols.h" | |
15 | #include "names.h" | |
16 | #include "y.tab.h" | |
17 | ||
18 | #ifndef public | |
19 | typedef int Token; | |
20 | #endif | |
21 | ||
22 | public String initfile = ".dbxinit"; | |
23 | ||
24 | typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; | |
25 | ||
26 | private Charclass class[256 + 1]; | |
27 | private Charclass *lexclass = class + 1; | |
28 | ||
29 | #define isdigit(c) (lexclass[c] == NUM) | |
30 | #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) | |
31 | #define ishexdigit(c) ( \ | |
32 | isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ | |
33 | ) | |
34 | ||
35 | #define MAXLINESIZE 1024 | |
36 | ||
37 | private File in; | |
38 | private Char linebuf[MAXLINESIZE]; | |
2fd0f574 | 39 | private Char *curchar, *prevchar; |
b76d33f6 ML |
40 | |
41 | #define MAXINCLDEPTH 10 | |
42 | ||
43 | private struct { | |
44 | File savefile; | |
45 | Filename savefn; | |
46 | int savelineno; | |
47 | } inclinfo[MAXINCLDEPTH]; | |
48 | ||
49 | private unsigned int curinclindex; | |
50 | ||
b76d33f6 ML |
51 | private Token getident(); |
52 | private Token getnum(); | |
53 | private Token getstring(); | |
54 | private Boolean eofinput(); | |
55 | private Char charcon(); | |
56 | private Char charlookup(); | |
57 | ||
58 | private enterlexclass(class, s) | |
59 | Charclass class; | |
60 | String s; | |
61 | { | |
62 | register char *p; | |
63 | ||
64 | for (p = s; *p != '\0'; p++) { | |
65 | lexclass[*p] = class; | |
66 | } | |
67 | } | |
68 | ||
69 | public scanner_init() | |
70 | { | |
71 | register Integer i; | |
72 | ||
73 | for (i = 0; i < 257; i++) { | |
74 | class[i] = OTHER; | |
75 | } | |
76 | enterlexclass(WHITE, " \t"); | |
77 | enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); | |
78 | enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); | |
79 | enterlexclass(NUM, "0123456789"); | |
80 | in = stdin; | |
81 | errfilename = nil; | |
82 | errlineno = 0; | |
83 | curchar = linebuf; | |
84 | linebuf[0] = '\0'; | |
b76d33f6 ML |
85 | } |
86 | ||
87 | /* | |
88 | * Read a single token. | |
89 | * | |
90 | * Input is line buffered. | |
91 | * | |
92 | * There are two "modes" of operation: one as in a compiler, | |
93 | * and one for reading shell-like syntax. | |
94 | */ | |
95 | ||
96 | private Boolean shellmode; | |
97 | ||
98 | public Token yylex() | |
99 | { | |
100 | register int c; | |
101 | register char *p; | |
102 | register Token t; | |
103 | String line; | |
104 | ||
105 | p = curchar; | |
106 | if (*p == '\0') { | |
107 | do { | |
108 | if (isterm(in)) { | |
a2d98673 | 109 | printf("(%s) ", cmdname); |
5c0d33e7 | 110 | fflush(stdout); |
b76d33f6 ML |
111 | } |
112 | line = fgets(linebuf, MAXLINESIZE, in); | |
113 | } while (line == nil and not eofinput()); | |
114 | if (line == nil) { | |
115 | c = EOF; | |
116 | } else { | |
117 | p = linebuf; | |
118 | while (lexclass[*p] == WHITE) { | |
119 | p++; | |
120 | } | |
121 | shellmode = false; | |
122 | } | |
123 | } else { | |
124 | while (lexclass[*p] == WHITE) { | |
125 | p++; | |
126 | } | |
127 | } | |
128 | curchar = p; | |
2fd0f574 | 129 | prevchar = curchar; |
b76d33f6 ML |
130 | c = *p; |
131 | if (lexclass[c] == ALPHA) { | |
132 | t = getident(); | |
133 | } else if (lexclass[c] == NUM) { | |
4ff5dcd5 ML |
134 | if (shellmode) { |
135 | t = getident(); | |
136 | } else { | |
137 | t = getnum(); | |
138 | } | |
b76d33f6 ML |
139 | } else { |
140 | ++curchar; | |
141 | switch (c) { | |
142 | case '\n': | |
143 | t = '\n'; | |
144 | if (errlineno != 0) { | |
145 | errlineno++; | |
146 | } | |
147 | break; | |
148 | ||
149 | case '"': | |
150 | case '\'': | |
151 | t = getstring(); | |
152 | break; | |
153 | ||
154 | case '.': | |
155 | if (shellmode) { | |
156 | --curchar; | |
157 | t = getident(); | |
158 | } else if (isdigit(*curchar)) { | |
159 | --curchar; | |
160 | t = getnum(); | |
161 | } else { | |
162 | t = '.'; | |
163 | } | |
164 | break; | |
165 | ||
166 | case '<': | |
167 | if (not shellmode and *curchar == '<') { | |
168 | ++curchar; | |
169 | t = LFORMER; | |
170 | } else { | |
171 | t = '<'; | |
172 | } | |
173 | break; | |
174 | ||
175 | case '>': | |
176 | if (not shellmode and *curchar == '>') { | |
177 | ++curchar; | |
178 | t = RFORMER; | |
179 | } else { | |
180 | t = '>'; | |
181 | } | |
182 | break; | |
183 | ||
184 | case '#': | |
185 | if (*curchar == '^') { | |
186 | ++curchar; | |
187 | t = ABSTRACTION; | |
188 | } else { | |
189 | t = '#'; | |
190 | } | |
191 | break; | |
192 | ||
193 | case '-': | |
194 | if (shellmode) { | |
195 | --curchar; | |
196 | t = getident(); | |
197 | } else if (*curchar == '>') { | |
198 | ++curchar; | |
199 | t = ARROW; | |
200 | } else { | |
201 | t = '-'; | |
202 | } | |
203 | break; | |
204 | ||
205 | case EOF: | |
206 | t = 0; | |
207 | break; | |
208 | ||
209 | default: | |
210 | if (shellmode and index("!&*()[]", c) == nil) { | |
211 | --curchar; | |
212 | t = getident(); | |
213 | } else { | |
214 | t = c; | |
215 | } | |
216 | break; | |
217 | } | |
218 | } | |
219 | # ifdef LEXDEBUG | |
220 | if (lexdebug) { | |
221 | fprintf(stderr, "yylex returns "); | |
222 | print_token(stderr, t); | |
223 | fprintf(stderr, "\n"); | |
224 | } | |
225 | # endif | |
226 | return t; | |
227 | } | |
228 | ||
229 | /* | |
230 | * Parser error handling. | |
231 | */ | |
232 | ||
233 | public yyerror(s) | |
234 | String s; | |
235 | { | |
2fd0f574 SL |
236 | register char *p; |
237 | register integer start; | |
b76d33f6 ML |
238 | |
239 | if (streq(s, "syntax error")) { | |
240 | beginerrmsg(); | |
2fd0f574 SL |
241 | p = prevchar; |
242 | start = p - &linebuf[0]; | |
b76d33f6 ML |
243 | if (p > &linebuf[0]) { |
244 | while (lexclass[*p] == WHITE and p > &linebuf[0]) { | |
245 | --p; | |
246 | } | |
247 | } | |
2fd0f574 SL |
248 | fprintf(stderr, "%s", linebuf); |
249 | if (start != 0) { | |
250 | fprintf(stderr, "%*c", start, ' '); | |
251 | } | |
b76d33f6 | 252 | if (p == &linebuf[0]) { |
2fd0f574 | 253 | fprintf(stderr, "^ unrecognized command"); |
b76d33f6 | 254 | } else { |
2fd0f574 | 255 | fprintf(stderr, "^ syntax error"); |
b76d33f6 ML |
256 | } |
257 | enderrmsg(); | |
258 | } else { | |
259 | error(s); | |
260 | } | |
261 | } | |
262 | ||
263 | /* | |
264 | * Eat the current line. | |
265 | */ | |
266 | ||
267 | public gobble() | |
268 | { | |
269 | curchar = linebuf; | |
270 | linebuf[0] = '\0'; | |
271 | } | |
272 | ||
273 | /* | |
274 | * Scan an identifier and check to see if it's a keyword. | |
275 | */ | |
276 | ||
277 | private Token getident() | |
278 | { | |
279 | char buf[256]; | |
280 | register Char *p, *q; | |
281 | register Token t; | |
282 | ||
283 | p = curchar; | |
284 | q = buf; | |
285 | if (shellmode) { | |
286 | do { | |
287 | *q++ = *p++; | |
2fd0f574 | 288 | } while (index(" \t\n!&<>*[]()'\"", *p) == nil); |
b76d33f6 ML |
289 | } else { |
290 | do { | |
291 | *q++ = *p++; | |
292 | } while (isalnum(*p)); | |
293 | } | |
294 | curchar = p; | |
295 | *q = '\0'; | |
296 | yylval.y_name = identname(buf, false); | |
297 | if (not shellmode) { | |
298 | t = findkeyword(yylval.y_name); | |
299 | if (t == nil) { | |
300 | t = NAME; | |
301 | } | |
302 | } else { | |
303 | t = NAME; | |
304 | } | |
305 | return t; | |
306 | } | |
307 | ||
308 | /* | |
309 | * Scan a number. | |
310 | */ | |
311 | ||
312 | private Token getnum() | |
313 | { | |
314 | char buf[256]; | |
315 | register Char *p, *q; | |
316 | register Token t; | |
317 | Integer base; | |
318 | ||
319 | p = curchar; | |
320 | q = buf; | |
321 | if (*p == '0') { | |
322 | if (*(p+1) == 'x') { | |
323 | p += 2; | |
324 | base = 16; | |
325 | } else { | |
326 | base = 8; | |
327 | } | |
328 | } else { | |
329 | base = 10; | |
330 | } | |
331 | if (base == 16) { | |
332 | do { | |
333 | *q++ = *p++; | |
334 | } while (ishexdigit(*p)); | |
335 | } else { | |
336 | do { | |
337 | *q++ = *p++; | |
338 | } while (isdigit(*p)); | |
339 | } | |
340 | if (*p == '.') { | |
341 | do { | |
342 | *q++ = *p++; | |
343 | } while (isdigit(*p)); | |
344 | if (*p == 'e' or *p == 'E') { | |
345 | p++; | |
346 | if (*p == '+' or *p == '-' or isdigit(*p)) { | |
347 | *q++ = 'e'; | |
348 | do { | |
349 | *q++ = *p++; | |
350 | } while (isdigit(*p)); | |
351 | } | |
352 | } | |
353 | *q = '\0'; | |
354 | yylval.y_real = atof(buf); | |
355 | t = REAL; | |
356 | } else { | |
357 | *q = '\0'; | |
358 | switch (base) { | |
359 | case 10: | |
360 | yylval.y_int = atol(buf); | |
361 | break; | |
362 | ||
363 | case 8: | |
364 | yylval.y_int = octal(buf); | |
365 | break; | |
366 | ||
367 | case 16: | |
368 | yylval.y_int = hex(buf); | |
369 | break; | |
370 | ||
371 | default: | |
372 | badcaseval(base); | |
373 | } | |
374 | t = INT; | |
375 | } | |
376 | curchar = p; | |
377 | return t; | |
378 | } | |
379 | ||
380 | /* | |
381 | * Convert a string of octal digits to an integer. | |
382 | */ | |
383 | ||
384 | private int octal(s) | |
385 | String s; | |
386 | { | |
387 | register Char *p; | |
388 | register Integer n; | |
389 | ||
390 | n = 0; | |
391 | for (p = s; *p != '\0'; p++) { | |
392 | n = 8*n + (*p - '0'); | |
393 | } | |
394 | return n; | |
395 | } | |
396 | ||
397 | /* | |
398 | * Convert a string of hexadecimal digits to an integer. | |
399 | */ | |
400 | ||
401 | private int hex(s) | |
402 | String s; | |
403 | { | |
404 | register Char *p; | |
405 | register Integer n; | |
406 | ||
407 | n = 0; | |
408 | for (p = s; *p != '\0'; p++) { | |
409 | n *= 16; | |
410 | if (*p >= 'a' and *p <= 'f') { | |
411 | n += (*p - 'a' + 10); | |
412 | } else if (*p >= 'A' and *p <= 'F') { | |
413 | n += (*p - 'A' + 10); | |
414 | } else { | |
415 | n += (*p - '0'); | |
416 | } | |
417 | } | |
418 | return n; | |
419 | } | |
420 | ||
421 | /* | |
422 | * Scan a string. | |
423 | */ | |
424 | ||
425 | private Token getstring() | |
426 | { | |
427 | char buf[256]; | |
428 | register Char *p, *q; | |
429 | Boolean endofstring; | |
430 | ||
431 | p = curchar; | |
432 | q = buf; | |
433 | endofstring = false; | |
434 | while (not endofstring) { | |
435 | if (*p == '\n' or *p == '\0') { | |
436 | error("non-terminated string"); | |
437 | endofstring = true; | |
73ad9ebb ML |
438 | } else if (*p == '"' or *p == '\'') { |
439 | if (*(p+1) != *p) { | |
b76d33f6 ML |
440 | endofstring = true; |
441 | } else { | |
442 | *q++ = *p; | |
443 | } | |
444 | } else { | |
9267e4d8 | 445 | curchar = p; |
c37ad836 ML |
446 | *q++ = charcon(p); |
447 | p = curchar; | |
b76d33f6 ML |
448 | } |
449 | p++; | |
450 | } | |
451 | curchar = p; | |
452 | *q = '\0'; | |
453 | yylval.y_string = strdup(buf); | |
454 | return STRING; | |
455 | } | |
456 | ||
457 | /* | |
458 | * Process a character constant. | |
459 | * Watch out for backslashes. | |
460 | */ | |
461 | ||
c37ad836 ML |
462 | private Char charcon(p) |
463 | char *p; | |
b76d33f6 | 464 | { |
c37ad836 | 465 | char c, buf[10], *q; |
b76d33f6 | 466 | |
c37ad836 ML |
467 | if (*p == '\\') { |
468 | ++p; | |
b76d33f6 ML |
469 | if (*p != '\\') { |
470 | q = buf; | |
471 | do { | |
472 | *q++ = *p++; | |
c37ad836 | 473 | } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); |
b76d33f6 ML |
474 | *q = '\0'; |
475 | if (isdigit(buf[0])) { | |
476 | c = (Char) octal(buf); | |
477 | } else { | |
478 | c = charlookup(buf); | |
479 | } | |
c37ad836 | 480 | curchar = p - 1; |
b76d33f6 ML |
481 | } else { |
482 | c = '\\'; | |
483 | } | |
484 | } else { | |
c37ad836 | 485 | c = *p; |
b76d33f6 ML |
486 | } |
487 | return c; | |
488 | } | |
489 | ||
490 | /* | |
491 | * Do a lookup for a ASCII character name. | |
492 | */ | |
493 | ||
494 | private String ascii[] = { | |
495 | "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", | |
496 | "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", | |
497 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", | |
498 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", | |
499 | "SP", nil | |
500 | }; | |
501 | ||
502 | private char charlookup(s) | |
503 | String s; | |
504 | { | |
505 | register int i; | |
506 | ||
507 | for (i = 0; ascii[i] != NULL; i++) { | |
508 | if (streq(s, ascii[i])) { | |
509 | return i; | |
510 | } | |
511 | } | |
512 | if (streq(s, "DEL")) { | |
513 | return 0177; | |
514 | } | |
515 | error("unknown ascii name \"%s\"", s); | |
516 | return '?'; | |
517 | } | |
518 | ||
519 | /* | |
520 | * Input file management routines. | |
521 | */ | |
522 | ||
523 | public setinput(filename) | |
524 | Filename filename; | |
525 | { | |
526 | File f; | |
527 | ||
528 | f = fopen(filename, "r"); | |
529 | if (f == nil) { | |
530 | error("can't open %s", filename); | |
531 | } else { | |
532 | if (curinclindex >= MAXINCLDEPTH) { | |
533 | error("unreasonable input nesting on \"%s\"", filename); | |
534 | } | |
535 | inclinfo[curinclindex].savefile = in; | |
536 | inclinfo[curinclindex].savefn = errfilename; | |
537 | inclinfo[curinclindex].savelineno = errlineno; | |
538 | curinclindex++; | |
539 | in = f; | |
540 | errfilename = filename; | |
541 | errlineno = 1; | |
542 | } | |
543 | } | |
544 | ||
545 | private Boolean eofinput() | |
546 | { | |
547 | register Boolean b; | |
548 | ||
549 | if (curinclindex == 0) { | |
550 | if (isterm(in)) { | |
551 | putchar('\n'); | |
a2d98673 | 552 | clearerr(in); |
b76d33f6 ML |
553 | b = false; |
554 | } else { | |
555 | b = true; | |
556 | } | |
557 | } else { | |
558 | fclose(in); | |
559 | --curinclindex; | |
560 | in = inclinfo[curinclindex].savefile; | |
561 | errfilename = inclinfo[curinclindex].savefn; | |
562 | errlineno = inclinfo[curinclindex].savelineno; | |
563 | b = false; | |
564 | } | |
565 | return b; | |
566 | } | |
567 | ||
568 | /* | |
569 | * Pop the current input. Return whether successful. | |
570 | */ | |
571 | ||
572 | public Boolean popinput() | |
573 | { | |
574 | Boolean b; | |
575 | ||
576 | if (curinclindex == 0) { | |
577 | b = false; | |
578 | } else { | |
579 | b = (Boolean) (not eofinput()); | |
580 | } | |
581 | return b; | |
582 | } | |
583 | ||
584 | /* | |
585 | * Return whether we are currently reading from standard input. | |
586 | */ | |
587 | ||
588 | public Boolean isstdin() | |
589 | { | |
590 | return (Boolean) (in == stdin); | |
591 | } | |
592 | ||
593 | /* | |
594 | * Send the current line to the shell. | |
595 | */ | |
596 | ||
597 | public shellline() | |
598 | { | |
599 | register char *p; | |
600 | ||
601 | p = curchar; | |
602 | while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { | |
603 | ++p; | |
604 | } | |
605 | shell(p); | |
606 | if (*p == '\0' and isterm(in)) { | |
607 | putchar('\n'); | |
608 | } | |
609 | erecover(); | |
610 | } | |
611 | ||
612 | /* | |
613 | * Read the rest of the current line in "shell mode". | |
614 | */ | |
615 | ||
616 | public beginshellmode() | |
617 | { | |
618 | shellmode = true; | |
619 | } | |
620 | ||
621 | /* | |
622 | * Print out a token for debugging. | |
623 | */ | |
624 | ||
625 | public print_token(f, t) | |
626 | File f; | |
627 | Token t; | |
628 | { | |
629 | if (t == '\n') { | |
630 | fprintf(f, "char '\\n'"); | |
631 | } else if (t == EOF) { | |
632 | fprintf(f, "EOF"); | |
633 | } else if (t < 256) { | |
634 | fprintf(f, "char '%c'", t); | |
635 | } else { | |
636 | fprintf(f, "\"%s\"", keywdstring(t)); | |
637 | } | |
638 | } |