date and time created 83/08/05 13:35:04 by sam
[unix-history] / usr / src / old / dbx / scanner.c
CommitLineData
b76d33f6
ML
1/* Copyright (c) 1982 Regents of the University of California */
2
a2d98673 3static char sccsid[] = "@(#)scanner.c 1.8 %G%";
b76d33f6
ML
4
5/*
6 * Debugger scanner.
7 */
8
9#include "defs.h"
10#include "scanner.h"
11#include "main.h"
12#include "keywords.h"
13#include "tree.h"
14#include "symbols.h"
15#include "names.h"
16#include "y.tab.h"
17
18#ifndef public
19typedef int Token;
20#endif
21
22public String initfile = ".dbxinit";
23
24typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25
26private Charclass class[256 + 1];
27private Charclass *lexclass = class + 1;
28
29#define isdigit(c) (lexclass[c] == NUM)
30#define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31#define ishexdigit(c) ( \
32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33)
34
35#define MAXLINESIZE 1024
36
37private File in;
38private Char linebuf[MAXLINESIZE];
39private Char *curchar;
40
41#define MAXINCLDEPTH 10
42
43private struct {
44 File savefile;
45 Filename savefn;
46 int savelineno;
47} inclinfo[MAXINCLDEPTH];
48
49private unsigned int curinclindex;
50
b76d33f6
ML
51private Token getident();
52private Token getnum();
53private Token getstring();
54private Boolean eofinput();
55private Char charcon();
56private Char charlookup();
57
58private enterlexclass(class, s)
59Charclass class;
60String s;
61{
62 register char *p;
63
64 for (p = s; *p != '\0'; p++) {
65 lexclass[*p] = class;
66 }
67}
68
69public scanner_init()
70{
71 register Integer i;
72
73 for (i = 0; i < 257; i++) {
74 class[i] = OTHER;
75 }
76 enterlexclass(WHITE, " \t");
77 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79 enterlexclass(NUM, "0123456789");
80 in = stdin;
81 errfilename = nil;
82 errlineno = 0;
83 curchar = linebuf;
84 linebuf[0] = '\0';
b76d33f6
ML
85}
86
87/*
88 * Read a single token.
89 *
90 * Input is line buffered.
91 *
92 * There are two "modes" of operation: one as in a compiler,
93 * and one for reading shell-like syntax.
94 */
95
96private Boolean shellmode;
97
98public Token yylex()
99{
100 register int c;
101 register char *p;
102 register Token t;
103 String line;
104
105 p = curchar;
106 if (*p == '\0') {
107 do {
108 if (isterm(in)) {
a2d98673 109 printf("(%s) ", cmdname);
5c0d33e7 110 fflush(stdout);
b76d33f6
ML
111 }
112 line = fgets(linebuf, MAXLINESIZE, in);
113 } while (line == nil and not eofinput());
114 if (line == nil) {
115 c = EOF;
116 } else {
117 p = linebuf;
118 while (lexclass[*p] == WHITE) {
119 p++;
120 }
121 shellmode = false;
122 }
123 } else {
124 while (lexclass[*p] == WHITE) {
125 p++;
126 }
127 }
128 curchar = p;
129 c = *p;
130 if (lexclass[c] == ALPHA) {
131 t = getident();
132 } else if (lexclass[c] == NUM) {
4ff5dcd5
ML
133 if (shellmode) {
134 t = getident();
135 } else {
136 t = getnum();
137 }
b76d33f6
ML
138 } else {
139 ++curchar;
140 switch (c) {
141 case '\n':
142 t = '\n';
143 if (errlineno != 0) {
144 errlineno++;
145 }
146 break;
147
148 case '"':
149 case '\'':
150 t = getstring();
151 break;
152
153 case '.':
154 if (shellmode) {
155 --curchar;
156 t = getident();
157 } else if (isdigit(*curchar)) {
158 --curchar;
159 t = getnum();
160 } else {
161 t = '.';
162 }
163 break;
164
165 case '<':
166 if (not shellmode and *curchar == '<') {
167 ++curchar;
168 t = LFORMER;
169 } else {
170 t = '<';
171 }
172 break;
173
174 case '>':
175 if (not shellmode and *curchar == '>') {
176 ++curchar;
177 t = RFORMER;
178 } else {
179 t = '>';
180 }
181 break;
182
183 case '#':
184 if (*curchar == '^') {
185 ++curchar;
186 t = ABSTRACTION;
187 } else {
188 t = '#';
189 }
190 break;
191
192 case '-':
193 if (shellmode) {
194 --curchar;
195 t = getident();
196 } else if (*curchar == '>') {
197 ++curchar;
198 t = ARROW;
199 } else {
200 t = '-';
201 }
202 break;
203
204 case EOF:
205 t = 0;
206 break;
207
208 default:
209 if (shellmode and index("!&*()[]", c) == nil) {
210 --curchar;
211 t = getident();
212 } else {
213 t = c;
214 }
215 break;
216 }
217 }
218# ifdef LEXDEBUG
219 if (lexdebug) {
220 fprintf(stderr, "yylex returns ");
221 print_token(stderr, t);
222 fprintf(stderr, "\n");
223 }
224# endif
225 return t;
226}
227
228/*
229 * Parser error handling.
230 */
231
232public yyerror(s)
233String s;
234{
235 register Char *p, *tokenbegin, *tokenend;
236 register Integer len;
237
238 if (streq(s, "syntax error")) {
239 beginerrmsg();
240 tokenend = curchar - 1;
241 tokenbegin = tokenend;
242 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
243 --tokenbegin;
244 }
245 len = tokenend - tokenbegin + 1;
246 p = tokenbegin;
247 if (p > &linebuf[0]) {
248 while (lexclass[*p] == WHITE and p > &linebuf[0]) {
249 --p;
250 }
251 }
252 if (p == &linebuf[0]) {
253 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
254 } else {
255 fprintf(stderr, "syntax error");
256 if (len != 0) {
257 fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
258 }
259 }
260 enderrmsg();
261 } else {
262 error(s);
263 }
264}
265
266/*
267 * Eat the current line.
268 */
269
270public gobble()
271{
272 curchar = linebuf;
273 linebuf[0] = '\0';
274}
275
276/*
277 * Scan an identifier and check to see if it's a keyword.
278 */
279
280private Token getident()
281{
282 char buf[256];
283 register Char *p, *q;
284 register Token t;
285
286 p = curchar;
287 q = buf;
288 if (shellmode) {
289 do {
290 *q++ = *p++;
291 } while (index(" \t\n!&<>*[]()", *p) == nil);
292 } else {
293 do {
294 *q++ = *p++;
295 } while (isalnum(*p));
296 }
297 curchar = p;
298 *q = '\0';
299 yylval.y_name = identname(buf, false);
300 if (not shellmode) {
301 t = findkeyword(yylval.y_name);
302 if (t == nil) {
303 t = NAME;
304 }
305 } else {
306 t = NAME;
307 }
308 return t;
309}
310
311/*
312 * Scan a number.
313 */
314
315private Token getnum()
316{
317 char buf[256];
318 register Char *p, *q;
319 register Token t;
320 Integer base;
321
322 p = curchar;
323 q = buf;
324 if (*p == '0') {
325 if (*(p+1) == 'x') {
326 p += 2;
327 base = 16;
328 } else {
329 base = 8;
330 }
331 } else {
332 base = 10;
333 }
334 if (base == 16) {
335 do {
336 *q++ = *p++;
337 } while (ishexdigit(*p));
338 } else {
339 do {
340 *q++ = *p++;
341 } while (isdigit(*p));
342 }
343 if (*p == '.') {
344 do {
345 *q++ = *p++;
346 } while (isdigit(*p));
347 if (*p == 'e' or *p == 'E') {
348 p++;
349 if (*p == '+' or *p == '-' or isdigit(*p)) {
350 *q++ = 'e';
351 do {
352 *q++ = *p++;
353 } while (isdigit(*p));
354 }
355 }
356 *q = '\0';
357 yylval.y_real = atof(buf);
358 t = REAL;
359 } else {
360 *q = '\0';
361 switch (base) {
362 case 10:
363 yylval.y_int = atol(buf);
364 break;
365
366 case 8:
367 yylval.y_int = octal(buf);
368 break;
369
370 case 16:
371 yylval.y_int = hex(buf);
372 break;
373
374 default:
375 badcaseval(base);
376 }
377 t = INT;
378 }
379 curchar = p;
380 return t;
381}
382
383/*
384 * Convert a string of octal digits to an integer.
385 */
386
387private int octal(s)
388String s;
389{
390 register Char *p;
391 register Integer n;
392
393 n = 0;
394 for (p = s; *p != '\0'; p++) {
395 n = 8*n + (*p - '0');
396 }
397 return n;
398}
399
400/*
401 * Convert a string of hexadecimal digits to an integer.
402 */
403
404private int hex(s)
405String s;
406{
407 register Char *p;
408 register Integer n;
409
410 n = 0;
411 for (p = s; *p != '\0'; p++) {
412 n *= 16;
413 if (*p >= 'a' and *p <= 'f') {
414 n += (*p - 'a' + 10);
415 } else if (*p >= 'A' and *p <= 'F') {
416 n += (*p - 'A' + 10);
417 } else {
418 n += (*p - '0');
419 }
420 }
421 return n;
422}
423
424/*
425 * Scan a string.
426 */
427
428private Token getstring()
429{
430 char buf[256];
431 register Char *p, *q;
432 Boolean endofstring;
433
434 p = curchar;
435 q = buf;
436 endofstring = false;
437 while (not endofstring) {
438 if (*p == '\n' or *p == '\0') {
439 error("non-terminated string");
440 endofstring = true;
73ad9ebb
ML
441 } else if (*p == '"' or *p == '\'') {
442 if (*(p+1) != *p) {
b76d33f6
ML
443 endofstring = true;
444 } else {
445 *q++ = *p;
446 }
447 } else {
9267e4d8 448 curchar = p;
c37ad836
ML
449 *q++ = charcon(p);
450 p = curchar;
b76d33f6
ML
451 }
452 p++;
453 }
454 curchar = p;
455 *q = '\0';
456 yylval.y_string = strdup(buf);
457 return STRING;
458}
459
460/*
461 * Process a character constant.
462 * Watch out for backslashes.
463 */
464
c37ad836
ML
465private Char charcon(p)
466char *p;
b76d33f6 467{
c37ad836 468 char c, buf[10], *q;
b76d33f6 469
c37ad836
ML
470 if (*p == '\\') {
471 ++p;
b76d33f6
ML
472 if (*p != '\\') {
473 q = buf;
474 do {
475 *q++ = *p++;
c37ad836 476 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
b76d33f6
ML
477 *q = '\0';
478 if (isdigit(buf[0])) {
479 c = (Char) octal(buf);
480 } else {
481 c = charlookup(buf);
482 }
c37ad836 483 curchar = p - 1;
b76d33f6
ML
484 } else {
485 c = '\\';
486 }
487 } else {
c37ad836 488 c = *p;
b76d33f6
ML
489 }
490 return c;
491}
492
493/*
494 * Do a lookup for a ASCII character name.
495 */
496
497private String ascii[] = {
498 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
499 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI",
500 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
501 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
502 "SP", nil
503};
504
505private char charlookup(s)
506String s;
507{
508 register int i;
509
510 for (i = 0; ascii[i] != NULL; i++) {
511 if (streq(s, ascii[i])) {
512 return i;
513 }
514 }
515 if (streq(s, "DEL")) {
516 return 0177;
517 }
518 error("unknown ascii name \"%s\"", s);
519 return '?';
520}
521
522/*
523 * Input file management routines.
524 */
525
526public setinput(filename)
527Filename filename;
528{
529 File f;
530
531 f = fopen(filename, "r");
532 if (f == nil) {
533 error("can't open %s", filename);
534 } else {
535 if (curinclindex >= MAXINCLDEPTH) {
536 error("unreasonable input nesting on \"%s\"", filename);
537 }
538 inclinfo[curinclindex].savefile = in;
539 inclinfo[curinclindex].savefn = errfilename;
540 inclinfo[curinclindex].savelineno = errlineno;
541 curinclindex++;
542 in = f;
543 errfilename = filename;
544 errlineno = 1;
545 }
546}
547
548private Boolean eofinput()
549{
550 register Boolean b;
551
552 if (curinclindex == 0) {
553 if (isterm(in)) {
554 putchar('\n');
a2d98673 555 clearerr(in);
b76d33f6
ML
556 b = false;
557 } else {
558 b = true;
559 }
560 } else {
561 fclose(in);
562 --curinclindex;
563 in = inclinfo[curinclindex].savefile;
564 errfilename = inclinfo[curinclindex].savefn;
565 errlineno = inclinfo[curinclindex].savelineno;
566 b = false;
567 }
568 return b;
569}
570
571/*
572 * Pop the current input. Return whether successful.
573 */
574
575public Boolean popinput()
576{
577 Boolean b;
578
579 if (curinclindex == 0) {
580 b = false;
581 } else {
582 b = (Boolean) (not eofinput());
583 }
584 return b;
585}
586
587/*
588 * Return whether we are currently reading from standard input.
589 */
590
591public Boolean isstdin()
592{
593 return (Boolean) (in == stdin);
594}
595
596/*
597 * Send the current line to the shell.
598 */
599
600public shellline()
601{
602 register char *p;
603
604 p = curchar;
605 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
606 ++p;
607 }
608 shell(p);
609 if (*p == '\0' and isterm(in)) {
610 putchar('\n');
611 }
612 erecover();
613}
614
615/*
616 * Read the rest of the current line in "shell mode".
617 */
618
619public beginshellmode()
620{
621 shellmode = true;
622}
623
624/*
625 * Print out a token for debugging.
626 */
627
628public print_token(f, t)
629File f;
630Token t;
631{
632 if (t == '\n') {
633 fprintf(f, "char '\\n'");
634 } else if (t == EOF) {
635 fprintf(f, "EOF");
636 } else if (t < 256) {
637 fprintf(f, "char '%c'", t);
638 } else {
639 fprintf(f, "\"%s\"", keywdstring(t));
640 }
641}