Commit | Line | Data |
---|---|---|
b76d33f6 ML |
1 | /* Copyright (c) 1982 Regents of the University of California */ |
2 | ||
5c0d33e7 | 3 | static char sccsid[] = "@(#)scanner.c 1.5 %G%"; |
b76d33f6 ML |
4 | |
5 | /* | |
6 | * Debugger scanner. | |
7 | */ | |
8 | ||
9 | #include "defs.h" | |
10 | #include "scanner.h" | |
11 | #include "main.h" | |
12 | #include "keywords.h" | |
13 | #include "tree.h" | |
14 | #include "symbols.h" | |
15 | #include "names.h" | |
16 | #include "y.tab.h" | |
17 | ||
18 | #ifndef public | |
19 | typedef int Token; | |
20 | #endif | |
21 | ||
22 | public String initfile = ".dbxinit"; | |
23 | ||
24 | typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; | |
25 | ||
26 | private Charclass class[256 + 1]; | |
27 | private Charclass *lexclass = class + 1; | |
28 | ||
29 | #define isdigit(c) (lexclass[c] == NUM) | |
30 | #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) | |
31 | #define ishexdigit(c) ( \ | |
32 | isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ | |
33 | ) | |
34 | ||
35 | #define MAXLINESIZE 1024 | |
36 | ||
37 | private File in; | |
38 | private Char linebuf[MAXLINESIZE]; | |
39 | private Char *curchar; | |
40 | ||
41 | #define MAXINCLDEPTH 10 | |
42 | ||
43 | private struct { | |
44 | File savefile; | |
45 | Filename savefn; | |
46 | int savelineno; | |
47 | } inclinfo[MAXINCLDEPTH]; | |
48 | ||
49 | private unsigned int curinclindex; | |
50 | ||
b76d33f6 ML |
51 | private Token getident(); |
52 | private Token getnum(); | |
53 | private Token getstring(); | |
54 | private Boolean eofinput(); | |
55 | private Char charcon(); | |
56 | private Char charlookup(); | |
57 | ||
58 | private enterlexclass(class, s) | |
59 | Charclass class; | |
60 | String s; | |
61 | { | |
62 | register char *p; | |
63 | ||
64 | for (p = s; *p != '\0'; p++) { | |
65 | lexclass[*p] = class; | |
66 | } | |
67 | } | |
68 | ||
69 | public scanner_init() | |
70 | { | |
71 | register Integer i; | |
72 | ||
73 | for (i = 0; i < 257; i++) { | |
74 | class[i] = OTHER; | |
75 | } | |
76 | enterlexclass(WHITE, " \t"); | |
77 | enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); | |
78 | enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); | |
79 | enterlexclass(NUM, "0123456789"); | |
80 | in = stdin; | |
81 | errfilename = nil; | |
82 | errlineno = 0; | |
83 | curchar = linebuf; | |
84 | linebuf[0] = '\0'; | |
b76d33f6 ML |
85 | } |
86 | ||
87 | /* | |
88 | * Read a single token. | |
89 | * | |
90 | * Input is line buffered. | |
91 | * | |
92 | * There are two "modes" of operation: one as in a compiler, | |
93 | * and one for reading shell-like syntax. | |
94 | */ | |
95 | ||
96 | private Boolean shellmode; | |
97 | ||
98 | public Token yylex() | |
99 | { | |
100 | register int c; | |
101 | register char *p; | |
102 | register Token t; | |
103 | String line; | |
104 | ||
105 | p = curchar; | |
106 | if (*p == '\0') { | |
107 | do { | |
108 | if (isterm(in)) { | |
5c0d33e7 ML |
109 | printf("> "); |
110 | fflush(stdout); | |
b76d33f6 ML |
111 | } |
112 | line = fgets(linebuf, MAXLINESIZE, in); | |
113 | } while (line == nil and not eofinput()); | |
114 | if (line == nil) { | |
115 | c = EOF; | |
116 | } else { | |
117 | p = linebuf; | |
118 | while (lexclass[*p] == WHITE) { | |
119 | p++; | |
120 | } | |
121 | shellmode = false; | |
122 | } | |
123 | } else { | |
124 | while (lexclass[*p] == WHITE) { | |
125 | p++; | |
126 | } | |
127 | } | |
128 | curchar = p; | |
129 | c = *p; | |
130 | if (lexclass[c] == ALPHA) { | |
131 | t = getident(); | |
132 | } else if (lexclass[c] == NUM) { | |
133 | t = getnum(); | |
134 | } else { | |
135 | ++curchar; | |
136 | switch (c) { | |
137 | case '\n': | |
138 | t = '\n'; | |
139 | if (errlineno != 0) { | |
140 | errlineno++; | |
141 | } | |
142 | break; | |
143 | ||
144 | case '"': | |
145 | case '\'': | |
146 | t = getstring(); | |
147 | break; | |
148 | ||
149 | case '.': | |
150 | if (shellmode) { | |
151 | --curchar; | |
152 | t = getident(); | |
153 | } else if (isdigit(*curchar)) { | |
154 | --curchar; | |
155 | t = getnum(); | |
156 | } else { | |
157 | t = '.'; | |
158 | } | |
159 | break; | |
160 | ||
161 | case '<': | |
162 | if (not shellmode and *curchar == '<') { | |
163 | ++curchar; | |
164 | t = LFORMER; | |
165 | } else { | |
166 | t = '<'; | |
167 | } | |
168 | break; | |
169 | ||
170 | case '>': | |
171 | if (not shellmode and *curchar == '>') { | |
172 | ++curchar; | |
173 | t = RFORMER; | |
174 | } else { | |
175 | t = '>'; | |
176 | } | |
177 | break; | |
178 | ||
179 | case '#': | |
180 | if (*curchar == '^') { | |
181 | ++curchar; | |
182 | t = ABSTRACTION; | |
183 | } else { | |
184 | t = '#'; | |
185 | } | |
186 | break; | |
187 | ||
188 | case '-': | |
189 | if (shellmode) { | |
190 | --curchar; | |
191 | t = getident(); | |
192 | } else if (*curchar == '>') { | |
193 | ++curchar; | |
194 | t = ARROW; | |
195 | } else { | |
196 | t = '-'; | |
197 | } | |
198 | break; | |
199 | ||
200 | case EOF: | |
201 | t = 0; | |
202 | break; | |
203 | ||
204 | default: | |
205 | if (shellmode and index("!&*()[]", c) == nil) { | |
206 | --curchar; | |
207 | t = getident(); | |
208 | } else { | |
209 | t = c; | |
210 | } | |
211 | break; | |
212 | } | |
213 | } | |
214 | # ifdef LEXDEBUG | |
215 | if (lexdebug) { | |
216 | fprintf(stderr, "yylex returns "); | |
217 | print_token(stderr, t); | |
218 | fprintf(stderr, "\n"); | |
219 | } | |
220 | # endif | |
221 | return t; | |
222 | } | |
223 | ||
224 | /* | |
225 | * Parser error handling. | |
226 | */ | |
227 | ||
228 | public yyerror(s) | |
229 | String s; | |
230 | { | |
231 | register Char *p, *tokenbegin, *tokenend; | |
232 | register Integer len; | |
233 | ||
234 | if (streq(s, "syntax error")) { | |
235 | beginerrmsg(); | |
236 | tokenend = curchar - 1; | |
237 | tokenbegin = tokenend; | |
238 | while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { | |
239 | --tokenbegin; | |
240 | } | |
241 | len = tokenend - tokenbegin + 1; | |
242 | p = tokenbegin; | |
243 | if (p > &linebuf[0]) { | |
244 | while (lexclass[*p] == WHITE and p > &linebuf[0]) { | |
245 | --p; | |
246 | } | |
247 | } | |
248 | if (p == &linebuf[0]) { | |
249 | fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); | |
250 | } else { | |
251 | fprintf(stderr, "syntax error"); | |
252 | if (len != 0) { | |
253 | fprintf(stderr, " on \"%.*s\"", len, tokenbegin); | |
254 | } | |
255 | } | |
256 | enderrmsg(); | |
257 | } else { | |
258 | error(s); | |
259 | } | |
260 | } | |
261 | ||
262 | /* | |
263 | * Eat the current line. | |
264 | */ | |
265 | ||
266 | public gobble() | |
267 | { | |
268 | curchar = linebuf; | |
269 | linebuf[0] = '\0'; | |
270 | } | |
271 | ||
272 | /* | |
273 | * Scan an identifier and check to see if it's a keyword. | |
274 | */ | |
275 | ||
276 | private Token getident() | |
277 | { | |
278 | char buf[256]; | |
279 | register Char *p, *q; | |
280 | register Token t; | |
281 | ||
282 | p = curchar; | |
283 | q = buf; | |
284 | if (shellmode) { | |
285 | do { | |
286 | *q++ = *p++; | |
287 | } while (index(" \t\n!&<>*[]()", *p) == nil); | |
288 | } else { | |
289 | do { | |
290 | *q++ = *p++; | |
291 | } while (isalnum(*p)); | |
292 | } | |
293 | curchar = p; | |
294 | *q = '\0'; | |
295 | yylval.y_name = identname(buf, false); | |
296 | if (not shellmode) { | |
297 | t = findkeyword(yylval.y_name); | |
298 | if (t == nil) { | |
299 | t = NAME; | |
300 | } | |
301 | } else { | |
302 | t = NAME; | |
303 | } | |
304 | return t; | |
305 | } | |
306 | ||
307 | /* | |
308 | * Scan a number. | |
309 | */ | |
310 | ||
311 | private Token getnum() | |
312 | { | |
313 | char buf[256]; | |
314 | register Char *p, *q; | |
315 | register Token t; | |
316 | Integer base; | |
317 | ||
318 | p = curchar; | |
319 | q = buf; | |
320 | if (*p == '0') { | |
321 | if (*(p+1) == 'x') { | |
322 | p += 2; | |
323 | base = 16; | |
324 | } else { | |
325 | base = 8; | |
326 | } | |
327 | } else { | |
328 | base = 10; | |
329 | } | |
330 | if (base == 16) { | |
331 | do { | |
332 | *q++ = *p++; | |
333 | } while (ishexdigit(*p)); | |
334 | } else { | |
335 | do { | |
336 | *q++ = *p++; | |
337 | } while (isdigit(*p)); | |
338 | } | |
339 | if (*p == '.') { | |
340 | do { | |
341 | *q++ = *p++; | |
342 | } while (isdigit(*p)); | |
343 | if (*p == 'e' or *p == 'E') { | |
344 | p++; | |
345 | if (*p == '+' or *p == '-' or isdigit(*p)) { | |
346 | *q++ = 'e'; | |
347 | do { | |
348 | *q++ = *p++; | |
349 | } while (isdigit(*p)); | |
350 | } | |
351 | } | |
352 | *q = '\0'; | |
353 | yylval.y_real = atof(buf); | |
354 | t = REAL; | |
355 | } else { | |
356 | *q = '\0'; | |
357 | switch (base) { | |
358 | case 10: | |
359 | yylval.y_int = atol(buf); | |
360 | break; | |
361 | ||
362 | case 8: | |
363 | yylval.y_int = octal(buf); | |
364 | break; | |
365 | ||
366 | case 16: | |
367 | yylval.y_int = hex(buf); | |
368 | break; | |
369 | ||
370 | default: | |
371 | badcaseval(base); | |
372 | } | |
373 | t = INT; | |
374 | } | |
375 | curchar = p; | |
376 | return t; | |
377 | } | |
378 | ||
379 | /* | |
380 | * Convert a string of octal digits to an integer. | |
381 | */ | |
382 | ||
383 | private int octal(s) | |
384 | String s; | |
385 | { | |
386 | register Char *p; | |
387 | register Integer n; | |
388 | ||
389 | n = 0; | |
390 | for (p = s; *p != '\0'; p++) { | |
391 | n = 8*n + (*p - '0'); | |
392 | } | |
393 | return n; | |
394 | } | |
395 | ||
396 | /* | |
397 | * Convert a string of hexadecimal digits to an integer. | |
398 | */ | |
399 | ||
400 | private int hex(s) | |
401 | String s; | |
402 | { | |
403 | register Char *p; | |
404 | register Integer n; | |
405 | ||
406 | n = 0; | |
407 | for (p = s; *p != '\0'; p++) { | |
408 | n *= 16; | |
409 | if (*p >= 'a' and *p <= 'f') { | |
410 | n += (*p - 'a' + 10); | |
411 | } else if (*p >= 'A' and *p <= 'F') { | |
412 | n += (*p - 'A' + 10); | |
413 | } else { | |
414 | n += (*p - '0'); | |
415 | } | |
416 | } | |
417 | return n; | |
418 | } | |
419 | ||
420 | /* | |
421 | * Scan a string. | |
422 | */ | |
423 | ||
424 | private Token getstring() | |
425 | { | |
426 | char buf[256]; | |
427 | register Char *p, *q; | |
428 | Boolean endofstring; | |
429 | ||
430 | p = curchar; | |
431 | q = buf; | |
432 | endofstring = false; | |
433 | while (not endofstring) { | |
434 | if (*p == '\n' or *p == '\0') { | |
435 | error("non-terminated string"); | |
436 | endofstring = true; | |
73ad9ebb ML |
437 | } else if (*p == '"' or *p == '\'') { |
438 | if (*(p+1) != *p) { | |
b76d33f6 ML |
439 | endofstring = true; |
440 | } else { | |
441 | *q++ = *p; | |
442 | } | |
443 | } else { | |
c37ad836 ML |
444 | *q++ = charcon(p); |
445 | p = curchar; | |
b76d33f6 ML |
446 | } |
447 | p++; | |
448 | } | |
449 | curchar = p; | |
450 | *q = '\0'; | |
451 | yylval.y_string = strdup(buf); | |
452 | return STRING; | |
453 | } | |
454 | ||
455 | /* | |
456 | * Process a character constant. | |
457 | * Watch out for backslashes. | |
458 | */ | |
459 | ||
c37ad836 ML |
460 | private Char charcon(p) |
461 | char *p; | |
b76d33f6 | 462 | { |
c37ad836 | 463 | char c, buf[10], *q; |
b76d33f6 | 464 | |
c37ad836 ML |
465 | if (*p == '\\') { |
466 | ++p; | |
b76d33f6 ML |
467 | if (*p != '\\') { |
468 | q = buf; | |
469 | do { | |
470 | *q++ = *p++; | |
c37ad836 | 471 | } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); |
b76d33f6 ML |
472 | *q = '\0'; |
473 | if (isdigit(buf[0])) { | |
474 | c = (Char) octal(buf); | |
475 | } else { | |
476 | c = charlookup(buf); | |
477 | } | |
c37ad836 | 478 | curchar = p - 1; |
b76d33f6 ML |
479 | } else { |
480 | c = '\\'; | |
481 | } | |
482 | } else { | |
c37ad836 | 483 | c = *p; |
b76d33f6 ML |
484 | } |
485 | return c; | |
486 | } | |
487 | ||
488 | /* | |
489 | * Do a lookup for a ASCII character name. | |
490 | */ | |
491 | ||
492 | private String ascii[] = { | |
493 | "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", | |
494 | "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", | |
495 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", | |
496 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", | |
497 | "SP", nil | |
498 | }; | |
499 | ||
500 | private char charlookup(s) | |
501 | String s; | |
502 | { | |
503 | register int i; | |
504 | ||
505 | for (i = 0; ascii[i] != NULL; i++) { | |
506 | if (streq(s, ascii[i])) { | |
507 | return i; | |
508 | } | |
509 | } | |
510 | if (streq(s, "DEL")) { | |
511 | return 0177; | |
512 | } | |
513 | error("unknown ascii name \"%s\"", s); | |
514 | return '?'; | |
515 | } | |
516 | ||
517 | /* | |
518 | * Input file management routines. | |
519 | */ | |
520 | ||
521 | public setinput(filename) | |
522 | Filename filename; | |
523 | { | |
524 | File f; | |
525 | ||
526 | f = fopen(filename, "r"); | |
527 | if (f == nil) { | |
528 | error("can't open %s", filename); | |
529 | } else { | |
530 | if (curinclindex >= MAXINCLDEPTH) { | |
531 | error("unreasonable input nesting on \"%s\"", filename); | |
532 | } | |
533 | inclinfo[curinclindex].savefile = in; | |
534 | inclinfo[curinclindex].savefn = errfilename; | |
535 | inclinfo[curinclindex].savelineno = errlineno; | |
536 | curinclindex++; | |
537 | in = f; | |
538 | errfilename = filename; | |
539 | errlineno = 1; | |
540 | } | |
541 | } | |
542 | ||
543 | private Boolean eofinput() | |
544 | { | |
545 | register Boolean b; | |
546 | ||
547 | if (curinclindex == 0) { | |
548 | if (isterm(in)) { | |
549 | putchar('\n'); | |
550 | b = false; | |
551 | } else { | |
552 | b = true; | |
553 | } | |
554 | } else { | |
555 | fclose(in); | |
556 | --curinclindex; | |
557 | in = inclinfo[curinclindex].savefile; | |
558 | errfilename = inclinfo[curinclindex].savefn; | |
559 | errlineno = inclinfo[curinclindex].savelineno; | |
560 | b = false; | |
561 | } | |
562 | return b; | |
563 | } | |
564 | ||
565 | /* | |
566 | * Pop the current input. Return whether successful. | |
567 | */ | |
568 | ||
569 | public Boolean popinput() | |
570 | { | |
571 | Boolean b; | |
572 | ||
573 | if (curinclindex == 0) { | |
574 | b = false; | |
575 | } else { | |
576 | b = (Boolean) (not eofinput()); | |
577 | } | |
578 | return b; | |
579 | } | |
580 | ||
581 | /* | |
582 | * Return whether we are currently reading from standard input. | |
583 | */ | |
584 | ||
585 | public Boolean isstdin() | |
586 | { | |
587 | return (Boolean) (in == stdin); | |
588 | } | |
589 | ||
590 | /* | |
591 | * Send the current line to the shell. | |
592 | */ | |
593 | ||
594 | public shellline() | |
595 | { | |
596 | register char *p; | |
597 | ||
598 | p = curchar; | |
599 | while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { | |
600 | ++p; | |
601 | } | |
602 | shell(p); | |
603 | if (*p == '\0' and isterm(in)) { | |
604 | putchar('\n'); | |
605 | } | |
606 | erecover(); | |
607 | } | |
608 | ||
609 | /* | |
610 | * Read the rest of the current line in "shell mode". | |
611 | */ | |
612 | ||
613 | public beginshellmode() | |
614 | { | |
615 | shellmode = true; | |
616 | } | |
617 | ||
618 | /* | |
619 | * Print out a token for debugging. | |
620 | */ | |
621 | ||
622 | public print_token(f, t) | |
623 | File f; | |
624 | Token t; | |
625 | { | |
626 | if (t == '\n') { | |
627 | fprintf(f, "char '\\n'"); | |
628 | } else if (t == EOF) { | |
629 | fprintf(f, "EOF"); | |
630 | } else if (t < 256) { | |
631 | fprintf(f, "char '%c'", t); | |
632 | } else { | |
633 | fprintf(f, "\"%s\"", keywdstring(t)); | |
634 | } | |
635 | } |