new ADDS entries from ralph@arpa, and trs80II from marshall
[unix-history] / usr / src / old / dbx / scanner.c
CommitLineData
b76d33f6
ML
1/* Copyright (c) 1982 Regents of the University of California */
2
4ff5dcd5 3static char sccsid[] = "@(#)scanner.c 1.6 %G%";
b76d33f6
ML
4
5/*
6 * Debugger scanner.
7 */
8
9#include "defs.h"
10#include "scanner.h"
11#include "main.h"
12#include "keywords.h"
13#include "tree.h"
14#include "symbols.h"
15#include "names.h"
16#include "y.tab.h"
17
18#ifndef public
19typedef int Token;
20#endif
21
22public String initfile = ".dbxinit";
23
24typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25
26private Charclass class[256 + 1];
27private Charclass *lexclass = class + 1;
28
29#define isdigit(c) (lexclass[c] == NUM)
30#define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31#define ishexdigit(c) ( \
32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33)
34
35#define MAXLINESIZE 1024
36
37private File in;
38private Char linebuf[MAXLINESIZE];
39private Char *curchar;
40
41#define MAXINCLDEPTH 10
42
43private struct {
44 File savefile;
45 Filename savefn;
46 int savelineno;
47} inclinfo[MAXINCLDEPTH];
48
49private unsigned int curinclindex;
50
b76d33f6
ML
51private Token getident();
52private Token getnum();
53private Token getstring();
54private Boolean eofinput();
55private Char charcon();
56private Char charlookup();
57
58private enterlexclass(class, s)
59Charclass class;
60String s;
61{
62 register char *p;
63
64 for (p = s; *p != '\0'; p++) {
65 lexclass[*p] = class;
66 }
67}
68
69public scanner_init()
70{
71 register Integer i;
72
73 for (i = 0; i < 257; i++) {
74 class[i] = OTHER;
75 }
76 enterlexclass(WHITE, " \t");
77 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79 enterlexclass(NUM, "0123456789");
80 in = stdin;
81 errfilename = nil;
82 errlineno = 0;
83 curchar = linebuf;
84 linebuf[0] = '\0';
b76d33f6
ML
85}
86
87/*
88 * Read a single token.
89 *
90 * Input is line buffered.
91 *
92 * There are two "modes" of operation: one as in a compiler,
93 * and one for reading shell-like syntax.
94 */
95
96private Boolean shellmode;
97
98public Token yylex()
99{
100 register int c;
101 register char *p;
102 register Token t;
103 String line;
104
105 p = curchar;
106 if (*p == '\0') {
107 do {
108 if (isterm(in)) {
5c0d33e7
ML
109 printf("> ");
110 fflush(stdout);
b76d33f6
ML
111 }
112 line = fgets(linebuf, MAXLINESIZE, in);
113 } while (line == nil and not eofinput());
114 if (line == nil) {
115 c = EOF;
116 } else {
117 p = linebuf;
118 while (lexclass[*p] == WHITE) {
119 p++;
120 }
121 shellmode = false;
122 }
123 } else {
124 while (lexclass[*p] == WHITE) {
125 p++;
126 }
127 }
128 curchar = p;
129 c = *p;
130 if (lexclass[c] == ALPHA) {
131 t = getident();
132 } else if (lexclass[c] == NUM) {
4ff5dcd5
ML
133 if (shellmode) {
134 t = getident();
135 } else {
136 t = getnum();
137 }
b76d33f6
ML
138 } else {
139 ++curchar;
140 switch (c) {
141 case '\n':
142 t = '\n';
143 if (errlineno != 0) {
144 errlineno++;
145 }
146 break;
147
148 case '"':
149 case '\'':
150 t = getstring();
151 break;
152
153 case '.':
154 if (shellmode) {
155 --curchar;
156 t = getident();
157 } else if (isdigit(*curchar)) {
158 --curchar;
159 t = getnum();
160 } else {
161 t = '.';
162 }
163 break;
164
165 case '<':
166 if (not shellmode and *curchar == '<') {
167 ++curchar;
168 t = LFORMER;
169 } else {
170 t = '<';
171 }
172 break;
173
174 case '>':
175 if (not shellmode and *curchar == '>') {
176 ++curchar;
177 t = RFORMER;
178 } else {
179 t = '>';
180 }
181 break;
182
183 case '#':
184 if (*curchar == '^') {
185 ++curchar;
186 t = ABSTRACTION;
187 } else {
188 t = '#';
189 }
190 break;
191
192 case '-':
193 if (shellmode) {
194 --curchar;
195 t = getident();
196 } else if (*curchar == '>') {
197 ++curchar;
198 t = ARROW;
199 } else {
200 t = '-';
201 }
202 break;
203
204 case EOF:
205 t = 0;
206 break;
207
208 default:
209 if (shellmode and index("!&*()[]", c) == nil) {
210 --curchar;
211 t = getident();
212 } else {
213 t = c;
214 }
215 break;
216 }
217 }
218# ifdef LEXDEBUG
219 if (lexdebug) {
220 fprintf(stderr, "yylex returns ");
221 print_token(stderr, t);
222 fprintf(stderr, "\n");
223 }
224# endif
225 return t;
226}
227
228/*
229 * Parser error handling.
230 */
231
232public yyerror(s)
233String s;
234{
235 register Char *p, *tokenbegin, *tokenend;
236 register Integer len;
237
238 if (streq(s, "syntax error")) {
239 beginerrmsg();
240 tokenend = curchar - 1;
241 tokenbegin = tokenend;
242 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
243 --tokenbegin;
244 }
245 len = tokenend - tokenbegin + 1;
246 p = tokenbegin;
247 if (p > &linebuf[0]) {
248 while (lexclass[*p] == WHITE and p > &linebuf[0]) {
249 --p;
250 }
251 }
252 if (p == &linebuf[0]) {
253 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
254 } else {
255 fprintf(stderr, "syntax error");
256 if (len != 0) {
257 fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
258 }
259 }
260 enderrmsg();
261 } else {
262 error(s);
263 }
264}
265
266/*
267 * Eat the current line.
268 */
269
270public gobble()
271{
272 curchar = linebuf;
273 linebuf[0] = '\0';
274}
275
276/*
277 * Scan an identifier and check to see if it's a keyword.
278 */
279
280private Token getident()
281{
282 char buf[256];
283 register Char *p, *q;
284 register Token t;
285
286 p = curchar;
287 q = buf;
288 if (shellmode) {
289 do {
290 *q++ = *p++;
291 } while (index(" \t\n!&<>*[]()", *p) == nil);
292 } else {
293 do {
294 *q++ = *p++;
295 } while (isalnum(*p));
296 }
297 curchar = p;
298 *q = '\0';
299 yylval.y_name = identname(buf, false);
300 if (not shellmode) {
301 t = findkeyword(yylval.y_name);
302 if (t == nil) {
303 t = NAME;
304 }
305 } else {
306 t = NAME;
307 }
308 return t;
309}
310
311/*
312 * Scan a number.
313 */
314
315private Token getnum()
316{
317 char buf[256];
318 register Char *p, *q;
319 register Token t;
320 Integer base;
321
322 p = curchar;
323 q = buf;
324 if (*p == '0') {
325 if (*(p+1) == 'x') {
326 p += 2;
327 base = 16;
328 } else {
329 base = 8;
330 }
331 } else {
332 base = 10;
333 }
334 if (base == 16) {
335 do {
336 *q++ = *p++;
337 } while (ishexdigit(*p));
338 } else {
339 do {
340 *q++ = *p++;
341 } while (isdigit(*p));
342 }
343 if (*p == '.') {
344 do {
345 *q++ = *p++;
346 } while (isdigit(*p));
347 if (*p == 'e' or *p == 'E') {
348 p++;
349 if (*p == '+' or *p == '-' or isdigit(*p)) {
350 *q++ = 'e';
351 do {
352 *q++ = *p++;
353 } while (isdigit(*p));
354 }
355 }
356 *q = '\0';
357 yylval.y_real = atof(buf);
358 t = REAL;
359 } else {
360 *q = '\0';
361 switch (base) {
362 case 10:
363 yylval.y_int = atol(buf);
364 break;
365
366 case 8:
367 yylval.y_int = octal(buf);
368 break;
369
370 case 16:
371 yylval.y_int = hex(buf);
372 break;
373
374 default:
375 badcaseval(base);
376 }
377 t = INT;
378 }
379 curchar = p;
380 return t;
381}
382
383/*
384 * Convert a string of octal digits to an integer.
385 */
386
387private int octal(s)
388String s;
389{
390 register Char *p;
391 register Integer n;
392
393 n = 0;
394 for (p = s; *p != '\0'; p++) {
395 n = 8*n + (*p - '0');
396 }
397 return n;
398}
399
400/*
401 * Convert a string of hexadecimal digits to an integer.
402 */
403
404private int hex(s)
405String s;
406{
407 register Char *p;
408 register Integer n;
409
410 n = 0;
411 for (p = s; *p != '\0'; p++) {
412 n *= 16;
413 if (*p >= 'a' and *p <= 'f') {
414 n += (*p - 'a' + 10);
415 } else if (*p >= 'A' and *p <= 'F') {
416 n += (*p - 'A' + 10);
417 } else {
418 n += (*p - '0');
419 }
420 }
421 return n;
422}
423
424/*
425 * Scan a string.
426 */
427
428private Token getstring()
429{
430 char buf[256];
431 register Char *p, *q;
432 Boolean endofstring;
433
434 p = curchar;
435 q = buf;
436 endofstring = false;
437 while (not endofstring) {
438 if (*p == '\n' or *p == '\0') {
439 error("non-terminated string");
440 endofstring = true;
73ad9ebb
ML
441 } else if (*p == '"' or *p == '\'') {
442 if (*(p+1) != *p) {
b76d33f6
ML
443 endofstring = true;
444 } else {
445 *q++ = *p;
446 }
447 } else {
c37ad836
ML
448 *q++ = charcon(p);
449 p = curchar;
b76d33f6
ML
450 }
451 p++;
452 }
453 curchar = p;
454 *q = '\0';
455 yylval.y_string = strdup(buf);
456 return STRING;
457}
458
459/*
460 * Process a character constant.
461 * Watch out for backslashes.
462 */
463
c37ad836
ML
464private Char charcon(p)
465char *p;
b76d33f6 466{
c37ad836 467 char c, buf[10], *q;
b76d33f6 468
c37ad836
ML
469 if (*p == '\\') {
470 ++p;
b76d33f6
ML
471 if (*p != '\\') {
472 q = buf;
473 do {
474 *q++ = *p++;
c37ad836 475 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
b76d33f6
ML
476 *q = '\0';
477 if (isdigit(buf[0])) {
478 c = (Char) octal(buf);
479 } else {
480 c = charlookup(buf);
481 }
c37ad836 482 curchar = p - 1;
b76d33f6
ML
483 } else {
484 c = '\\';
485 }
486 } else {
c37ad836 487 c = *p;
b76d33f6
ML
488 }
489 return c;
490}
491
492/*
493 * Do a lookup for a ASCII character name.
494 */
495
496private String ascii[] = {
497 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
498 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI",
499 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
500 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
501 "SP", nil
502};
503
504private char charlookup(s)
505String s;
506{
507 register int i;
508
509 for (i = 0; ascii[i] != NULL; i++) {
510 if (streq(s, ascii[i])) {
511 return i;
512 }
513 }
514 if (streq(s, "DEL")) {
515 return 0177;
516 }
517 error("unknown ascii name \"%s\"", s);
518 return '?';
519}
520
521/*
522 * Input file management routines.
523 */
524
525public setinput(filename)
526Filename filename;
527{
528 File f;
529
530 f = fopen(filename, "r");
531 if (f == nil) {
532 error("can't open %s", filename);
533 } else {
534 if (curinclindex >= MAXINCLDEPTH) {
535 error("unreasonable input nesting on \"%s\"", filename);
536 }
537 inclinfo[curinclindex].savefile = in;
538 inclinfo[curinclindex].savefn = errfilename;
539 inclinfo[curinclindex].savelineno = errlineno;
540 curinclindex++;
541 in = f;
542 errfilename = filename;
543 errlineno = 1;
544 }
545}
546
547private Boolean eofinput()
548{
549 register Boolean b;
550
551 if (curinclindex == 0) {
552 if (isterm(in)) {
553 putchar('\n');
554 b = false;
555 } else {
556 b = true;
557 }
558 } else {
559 fclose(in);
560 --curinclindex;
561 in = inclinfo[curinclindex].savefile;
562 errfilename = inclinfo[curinclindex].savefn;
563 errlineno = inclinfo[curinclindex].savelineno;
564 b = false;
565 }
566 return b;
567}
568
569/*
570 * Pop the current input. Return whether successful.
571 */
572
573public Boolean popinput()
574{
575 Boolean b;
576
577 if (curinclindex == 0) {
578 b = false;
579 } else {
580 b = (Boolean) (not eofinput());
581 }
582 return b;
583}
584
585/*
586 * Return whether we are currently reading from standard input.
587 */
588
589public Boolean isstdin()
590{
591 return (Boolean) (in == stdin);
592}
593
594/*
595 * Send the current line to the shell.
596 */
597
598public shellline()
599{
600 register char *p;
601
602 p = curchar;
603 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
604 ++p;
605 }
606 shell(p);
607 if (*p == '\0' and isterm(in)) {
608 putchar('\n');
609 }
610 erecover();
611}
612
613/*
614 * Read the rest of the current line in "shell mode".
615 */
616
617public beginshellmode()
618{
619 shellmode = true;
620}
621
622/*
623 * Print out a token for debugging.
624 */
625
626public print_token(f, t)
627File f;
628Token t;
629{
630 if (t == '\n') {
631 fprintf(f, "char '\\n'");
632 } else if (t == EOF) {
633 fprintf(f, "EOF");
634 } else if (t < 256) {
635 fprintf(f, "char '%c'", t);
636 } else {
637 fprintf(f, "\"%s\"", keywdstring(t));
638 }
639}