date and time created 82/12/15 04:08:41 by linton
[unix-history] / usr / src / old / dbx / scanner.c
CommitLineData
b76d33f6
ML
1/* Copyright (c) 1982 Regents of the University of California */
2
3static char sccsid[] = "@(#)@(#)scanner.c 1.1 %G%";
4
5/*
6 * Debugger scanner.
7 */
8
9#include "defs.h"
10#include "scanner.h"
11#include "main.h"
12#include "keywords.h"
13#include "tree.h"
14#include "symbols.h"
15#include "names.h"
16#include "y.tab.h"
17
18#ifndef public
19typedef int Token;
20#endif
21
22public String initfile = ".dbxinit";
23
24typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25
26private Charclass class[256 + 1];
27private Charclass *lexclass = class + 1;
28
29#define isdigit(c) (lexclass[c] == NUM)
30#define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31#define ishexdigit(c) ( \
32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33)
34
35#define MAXLINESIZE 1024
36
37private File in;
38private Char linebuf[MAXLINESIZE];
39private Char *curchar;
40
41#define MAXINCLDEPTH 10
42
43private struct {
44 File savefile;
45 Filename savefn;
46 int savelineno;
47} inclinfo[MAXINCLDEPTH];
48
49private unsigned int curinclindex;
50
51private Boolean firsttoken = true;
52private Boolean firstinit = true;
53
54private Token getident();
55private Token getnum();
56private Token getstring();
57private Boolean eofinput();
58private Char charcon();
59private Char charlookup();
60
61private enterlexclass(class, s)
62Charclass class;
63String s;
64{
65 register char *p;
66
67 for (p = s; *p != '\0'; p++) {
68 lexclass[*p] = class;
69 }
70}
71
72public scanner_init()
73{
74 register Integer i;
75
76 for (i = 0; i < 257; i++) {
77 class[i] = OTHER;
78 }
79 enterlexclass(WHITE, " \t");
80 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
81 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
82 enterlexclass(NUM, "0123456789");
83 in = stdin;
84 errfilename = nil;
85 errlineno = 0;
86 curchar = linebuf;
87 linebuf[0] = '\0';
88 if (runfirst) {
89 firstinit = false;
90 firsttoken = false;
91 } else if (firstinit and isterm(in)) {
92 firstinit = false;
93 printf("> ");
94 fflush(stdout);
95 }
96}
97
98/*
99 * Read a single token.
100 *
101 * Input is line buffered.
102 *
103 * There are two "modes" of operation: one as in a compiler,
104 * and one for reading shell-like syntax.
105 */
106
107private Boolean shellmode;
108
109public Token yylex()
110{
111 register int c;
112 register char *p;
113 register Token t;
114 String line;
115
116 p = curchar;
117 if (*p == '\0') {
118 do {
119 if (isterm(in)) {
120 if (firsttoken) {
121 firsttoken = false;
122 } else {
123 printf("> ");
124 fflush(stdout);
125 }
126 }
127 line = fgets(linebuf, MAXLINESIZE, in);
128 } while (line == nil and not eofinput());
129 if (line == nil) {
130 c = EOF;
131 } else {
132 p = linebuf;
133 while (lexclass[*p] == WHITE) {
134 p++;
135 }
136 shellmode = false;
137 }
138 } else {
139 while (lexclass[*p] == WHITE) {
140 p++;
141 }
142 }
143 curchar = p;
144 c = *p;
145 if (lexclass[c] == ALPHA) {
146 t = getident();
147 } else if (lexclass[c] == NUM) {
148 t = getnum();
149 } else {
150 ++curchar;
151 switch (c) {
152 case '\n':
153 t = '\n';
154 if (errlineno != 0) {
155 errlineno++;
156 }
157 break;
158
159 case '"':
160 case '\'':
161 t = getstring();
162 break;
163
164 case '.':
165 if (shellmode) {
166 --curchar;
167 t = getident();
168 } else if (isdigit(*curchar)) {
169 --curchar;
170 t = getnum();
171 } else {
172 t = '.';
173 }
174 break;
175
176 case '<':
177 if (not shellmode and *curchar == '<') {
178 ++curchar;
179 t = LFORMER;
180 } else {
181 t = '<';
182 }
183 break;
184
185 case '>':
186 if (not shellmode and *curchar == '>') {
187 ++curchar;
188 t = RFORMER;
189 } else {
190 t = '>';
191 }
192 break;
193
194 case '#':
195 if (*curchar == '^') {
196 ++curchar;
197 t = ABSTRACTION;
198 } else {
199 t = '#';
200 }
201 break;
202
203 case '-':
204 if (shellmode) {
205 --curchar;
206 t = getident();
207 } else if (*curchar == '>') {
208 ++curchar;
209 t = ARROW;
210 } else {
211 t = '-';
212 }
213 break;
214
215 case EOF:
216 t = 0;
217 break;
218
219 default:
220 if (shellmode and index("!&*()[]", c) == nil) {
221 --curchar;
222 t = getident();
223 } else {
224 t = c;
225 }
226 break;
227 }
228 }
229# ifdef LEXDEBUG
230 if (lexdebug) {
231 fprintf(stderr, "yylex returns ");
232 print_token(stderr, t);
233 fprintf(stderr, "\n");
234 }
235# endif
236 return t;
237}
238
239/*
240 * Parser error handling.
241 */
242
243public yyerror(s)
244String s;
245{
246 register Char *p, *tokenbegin, *tokenend;
247 register Integer len;
248
249 if (streq(s, "syntax error")) {
250 beginerrmsg();
251 tokenend = curchar - 1;
252 tokenbegin = tokenend;
253 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
254 --tokenbegin;
255 }
256 len = tokenend - tokenbegin + 1;
257 p = tokenbegin;
258 if (p > &linebuf[0]) {
259 while (lexclass[*p] == WHITE and p > &linebuf[0]) {
260 --p;
261 }
262 }
263 if (p == &linebuf[0]) {
264 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
265 } else {
266 fprintf(stderr, "syntax error");
267 if (len != 0) {
268 fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
269 }
270 }
271 enderrmsg();
272 } else {
273 error(s);
274 }
275}
276
277/*
278 * Eat the current line.
279 */
280
281public gobble()
282{
283 curchar = linebuf;
284 linebuf[0] = '\0';
285}
286
287/*
288 * Scan an identifier and check to see if it's a keyword.
289 */
290
291private Token getident()
292{
293 char buf[256];
294 register Char *p, *q;
295 register Token t;
296
297 p = curchar;
298 q = buf;
299 if (shellmode) {
300 do {
301 *q++ = *p++;
302 } while (index(" \t\n!&<>*[]()", *p) == nil);
303 } else {
304 do {
305 *q++ = *p++;
306 } while (isalnum(*p));
307 }
308 curchar = p;
309 *q = '\0';
310 yylval.y_name = identname(buf, false);
311 if (not shellmode) {
312 t = findkeyword(yylval.y_name);
313 if (t == nil) {
314 t = NAME;
315 }
316 } else {
317 t = NAME;
318 }
319 return t;
320}
321
322/*
323 * Scan a number.
324 */
325
326private Token getnum()
327{
328 char buf[256];
329 register Char *p, *q;
330 register Token t;
331 Integer base;
332
333 p = curchar;
334 q = buf;
335 if (*p == '0') {
336 if (*(p+1) == 'x') {
337 p += 2;
338 base = 16;
339 } else {
340 base = 8;
341 }
342 } else {
343 base = 10;
344 }
345 if (base == 16) {
346 do {
347 *q++ = *p++;
348 } while (ishexdigit(*p));
349 } else {
350 do {
351 *q++ = *p++;
352 } while (isdigit(*p));
353 }
354 if (*p == '.') {
355 do {
356 *q++ = *p++;
357 } while (isdigit(*p));
358 if (*p == 'e' or *p == 'E') {
359 p++;
360 if (*p == '+' or *p == '-' or isdigit(*p)) {
361 *q++ = 'e';
362 do {
363 *q++ = *p++;
364 } while (isdigit(*p));
365 }
366 }
367 *q = '\0';
368 yylval.y_real = atof(buf);
369 t = REAL;
370 } else {
371 *q = '\0';
372 switch (base) {
373 case 10:
374 yylval.y_int = atol(buf);
375 break;
376
377 case 8:
378 yylval.y_int = octal(buf);
379 break;
380
381 case 16:
382 yylval.y_int = hex(buf);
383 break;
384
385 default:
386 badcaseval(base);
387 }
388 t = INT;
389 }
390 curchar = p;
391 return t;
392}
393
394/*
395 * Convert a string of octal digits to an integer.
396 */
397
398private int octal(s)
399String s;
400{
401 register Char *p;
402 register Integer n;
403
404 n = 0;
405 for (p = s; *p != '\0'; p++) {
406 n = 8*n + (*p - '0');
407 }
408 return n;
409}
410
411/*
412 * Convert a string of hexadecimal digits to an integer.
413 */
414
415private int hex(s)
416String s;
417{
418 register Char *p;
419 register Integer n;
420
421 n = 0;
422 for (p = s; *p != '\0'; p++) {
423 n *= 16;
424 if (*p >= 'a' and *p <= 'f') {
425 n += (*p - 'a' + 10);
426 } else if (*p >= 'A' and *p <= 'F') {
427 n += (*p - 'A' + 10);
428 } else {
429 n += (*p - '0');
430 }
431 }
432 return n;
433}
434
435/*
436 * Scan a string.
437 */
438
439private Token getstring()
440{
441 char buf[256];
442 register Char *p, *q;
443 Boolean endofstring;
444
445 p = curchar;
446 q = buf;
447 endofstring = false;
448 while (not endofstring) {
449 if (*p == '\n' or *p == '\0') {
450 error("non-terminated string");
451 endofstring = true;
452 } else if (*p == '"') {
453 if (*(p+1) != '"') {
454 endofstring = true;
455 } else {
456 *q++ = *p;
457 }
458 } else {
459 *q++ = charcon(*p);
460 }
461 p++;
462 }
463 curchar = p;
464 *q = '\0';
465 yylval.y_string = strdup(buf);
466 return STRING;
467}
468
469/*
470 * Process a character constant.
471 * Watch out for backslashes.
472 */
473
474private Char charcon(ch)
475Char ch;
476{
477 Char c, buf[10], *p, *q;
478
479 p = curchar;
480 if (ch == '\\') {
481 if (*p != '\\') {
482 q = buf;
483 do {
484 *q++ = *p++;
485 } while (*p != '\\' and *p != '\n' and *p != '\0');
486 if (*p != '\\') {
487 ungetc(*p, in);
488 error("non-terminated character constant");
489 }
490 *q = '\0';
491 if (isdigit(buf[0])) {
492 c = (Char) octal(buf);
493 } else {
494 c = charlookup(buf);
495 }
496 curchar = p;
497 } else {
498 c = '\\';
499 }
500 } else {
501 c = ch;
502 }
503 return c;
504}
505
506/*
507 * Do a lookup for a ASCII character name.
508 */
509
510private String ascii[] = {
511 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
512 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI",
513 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
514 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
515 "SP", nil
516};
517
518private char charlookup(s)
519String s;
520{
521 register int i;
522
523 for (i = 0; ascii[i] != NULL; i++) {
524 if (streq(s, ascii[i])) {
525 return i;
526 }
527 }
528 if (streq(s, "DEL")) {
529 return 0177;
530 }
531 error("unknown ascii name \"%s\"", s);
532 return '?';
533}
534
535/*
536 * Input file management routines.
537 */
538
539public setinput(filename)
540Filename filename;
541{
542 File f;
543
544 f = fopen(filename, "r");
545 if (f == nil) {
546 error("can't open %s", filename);
547 } else {
548 if (curinclindex >= MAXINCLDEPTH) {
549 error("unreasonable input nesting on \"%s\"", filename);
550 }
551 inclinfo[curinclindex].savefile = in;
552 inclinfo[curinclindex].savefn = errfilename;
553 inclinfo[curinclindex].savelineno = errlineno;
554 curinclindex++;
555 in = f;
556 errfilename = filename;
557 errlineno = 1;
558 }
559}
560
561private Boolean eofinput()
562{
563 register Boolean b;
564
565 if (curinclindex == 0) {
566 if (isterm(in)) {
567 putchar('\n');
568 b = false;
569 } else {
570 b = true;
571 }
572 } else {
573 fclose(in);
574 --curinclindex;
575 in = inclinfo[curinclindex].savefile;
576 errfilename = inclinfo[curinclindex].savefn;
577 errlineno = inclinfo[curinclindex].savelineno;
578 b = false;
579 }
580 return b;
581}
582
583/*
584 * Pop the current input. Return whether successful.
585 */
586
587public Boolean popinput()
588{
589 Boolean b;
590
591 if (curinclindex == 0) {
592 b = false;
593 } else {
594 b = (Boolean) (not eofinput());
595 }
596 return b;
597}
598
599/*
600 * Return whether we are currently reading from standard input.
601 */
602
603public Boolean isstdin()
604{
605 return (Boolean) (in == stdin);
606}
607
608/*
609 * Send the current line to the shell.
610 */
611
612public shellline()
613{
614 register char *p;
615
616 p = curchar;
617 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
618 ++p;
619 }
620 shell(p);
621 if (*p == '\0' and isterm(in)) {
622 putchar('\n');
623 }
624 erecover();
625}
626
627/*
628 * Read the rest of the current line in "shell mode".
629 */
630
631public beginshellmode()
632{
633 shellmode = true;
634}
635
636/*
637 * Print out a token for debugging.
638 */
639
640public print_token(f, t)
641File f;
642Token t;
643{
644 if (t == '\n') {
645 fprintf(f, "char '\\n'");
646 } else if (t == EOF) {
647 fprintf(f, "EOF");
648 } else if (t < 256) {
649 fprintf(f, "char '%c'", t);
650 } else {
651 fprintf(f, "\"%s\"", keywdstring(t));
652 }
653}