no copyrights on Makefile's
[unix-history] / usr / src / old / dbx / scanner.c
CommitLineData
2a24676e 1/*
8a90f3aa
KB
2 * Copyright (c) 1983 The Regents of the University of California.
3 * All rights reserved.
4 *
6ecf3d85 5 * %sccs.include.redist.c%
2a24676e 6 */
b76d33f6 7
2a24676e 8#ifndef lint
6ecf3d85 9static char sccsid[] = "@(#)scanner.c 5.3 (Berkeley) %G%";
8a90f3aa 10#endif /* not lint */
b76d33f6
ML
11
12/*
13 * Debugger scanner.
14 */
15
16#include "defs.h"
17#include "scanner.h"
18#include "main.h"
19#include "keywords.h"
20#include "tree.h"
21#include "symbols.h"
22#include "names.h"
23#include "y.tab.h"
24
25#ifndef public
26typedef int Token;
b76d33f6 27
0022c355
ML
28#define MAXLINESIZE 10240
29
30#endif
b76d33f6 31
f7adfe8e 32public String initfile = ".dbxinit";
b76d33f6 33
0022c355
ML
34typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
35
36private Charclass class[256 + 1];
37private Charclass *lexclass = class + 1;
38
39#define isdigit(c) (lexclass[c] == NUM)
40#define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
41#define ishexdigit(c) ( \
42 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
43)
44
45public boolean chkalias;
46public char scanner_linebuf[MAXLINESIZE];
47
48private File in;
49private char *curchar, *prevchar;
50
51#define MAXINCLDEPTH 10
52
53private struct {
54 File savefile;
55 Filename savefn;
56 int savelineno;
57} inclinfo[MAXINCLDEPTH];
58
59private unsigned int curinclindex;
60
b76d33f6
ML
61private Token getident();
62private Token getnum();
63private Token getstring();
0022c355
ML
64private Boolean eofinput();
65private char charcon();
b76d33f6 66
0022c355
ML
67private enterlexclass(class, s)
68Charclass class;
69String s;
70{
71 register char *p;
72
73 for (p = s; *p != '\0'; p++) {
74 lexclass[*p] = class;
75 }
76}
b76d33f6
ML
77
78public scanner_init()
79{
0022c355
ML
80 register Integer i;
81
82 for (i = 0; i < 257; i++) {
83 class[i] = OTHER;
84 }
85 enterlexclass(WHITE, " \t");
86 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
87 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
88 enterlexclass(NUM, "0123456789");
89 in = stdin;
90 errfilename = nil;
91 errlineno = 0;
92 curchar = scanner_linebuf;
93 scanner_linebuf[0] = '\0';
94 chkalias = true;
b76d33f6
ML
95}
96
97/*
98 * Read a single token.
0022c355
ML
99 *
100 * The input is line buffered. Tokens cannot cross line boundaries.
101 *
b76d33f6 102 * There are two "modes" of operation: one as in a compiler,
0022c355
ML
103 * and one for reading shell-like syntax. In the first mode
104 * there is the additional choice of doing alias processing.
b76d33f6 105 */
0022c355
ML
106
107private Boolean shellmode;
108
b76d33f6
ML
109public Token yylex()
110{
0022c355
ML
111 register int c;
112 register char *p;
113 register Token t;
114 String line;
115 integer n;
116
117 p = curchar;
118 if (*p == '\0') {
119 do {
120 if (isterm(in)) {
121 printf("(%s) ", cmdname);
122 }
123 fflush(stdout);
124 line = fgets(scanner_linebuf, MAXLINESIZE, in);
125 } while (line == nil and not eofinput());
126 if (line == nil) {
127 c = EOF;
128 } else {
129 p = scanner_linebuf;
130 while (lexclass[*p] == WHITE) {
131 p++;
132 }
133 shellmode = false;
b76d33f6 134 }
0022c355
ML
135 chkalias = true;
136 } else {
137 while (lexclass[*p] == WHITE) {
138 p++;
b76d33f6 139 }
0022c355
ML
140 }
141 curchar = p;
142 prevchar = curchar;
143 c = *p;
144 if (lexclass[c] == ALPHA) {
145 t = getident(chkalias);
146 } else if (lexclass[c] == NUM) {
147 if (shellmode) {
148 t = getident(chkalias);
149 } else {
150 t = getnum();
4ff5dcd5 151 }
0022c355
ML
152 } else {
153 ++curchar;
b76d33f6 154 switch (c) {
0022c355 155 case '\n':
b76d33f6 156 t = '\n';
0022c355
ML
157 if (errlineno != 0) {
158 errlineno++;
b76d33f6
ML
159 }
160 break;
161
0022c355
ML
162 case '"':
163 case '\'':
f7adfe8e 164 t = getstring(c);
b76d33f6
ML
165 break;
166
0022c355 167 case '.':
b76d33f6 168 if (shellmode) {
0022c355
ML
169 --curchar;
170 t = getident(chkalias);
171 } else if (isdigit(*curchar)) {
172 --curchar;
173 t = getnum();
174 } else {
175 t = '.';
b76d33f6 176 }
b76d33f6
ML
177 break;
178
0022c355
ML
179 case '-':
180 if (shellmode) {
181 --curchar;
182 t = getident(chkalias);
183 } else if (*curchar == '>') {
184 ++curchar;
185 t = ARROW;
186 } else {
187 t = '-';
188 }
b76d33f6
ML
189 break;
190
0022c355
ML
191 case '#':
192 if (not isterm(in)) {
193 *p = '\0';
194 curchar = p;
195 t = '\n';
196 ++errlineno;
197 } else {
198 t = '#';
199 }
b76d33f6
ML
200 break;
201
0022c355
ML
202 case '\\':
203 if (*(p+1) == '\n') {
204 n = MAXLINESIZE - (p - &scanner_linebuf[0]);
205 if (n > 1) {
206 if (fgets(p, n, in) == nil) {
207 t = 0;
208 } else {
209 curchar = p;
210 t = yylex();
211 }
212 } else {
213 t = '\\';
214 }
215 } else {
216 t = '\\';
b76d33f6
ML
217 }
218 break;
219
0022c355 220 case EOF:
b76d33f6
ML
221 t = 0;
222 break;
223
0022c355
ML
224 default:
225 if (shellmode and index("!&*<>()[]", c) == nil) {
226 --curchar;
227 t = getident(chkalias);
228 } else {
229 t = c;
230 }
b76d33f6
ML
231 break;
232 }
0022c355
ML
233 }
234 chkalias = false;
235# ifdef LEXDEBUG
b76d33f6 236 if (lexdebug) {
0022c355
ML
237 fprintf(stderr, "yylex returns ");
238 print_token(stderr, t);
239 fprintf(stderr, "\n");
b76d33f6 240 }
0022c355
ML
241# endif
242 return t;
b76d33f6
ML
243}
244
245/*
0022c355
ML
246 * Put the given string before the current character
247 * in the current line, thus inserting it into the input stream.
b76d33f6 248 */
f7adfe8e 249
0022c355
ML
250public insertinput (s)
251String s;
b76d33f6 252{
0022c355
ML
253 register char *p, *q;
254 int need, avail, shift;
255
256 q = s;
257 need = strlen(q);
258 avail = curchar - &scanner_linebuf[0];
259 if (need <= avail) {
260 curchar = &scanner_linebuf[avail - need];
261 p = curchar;
262 while (*q != '\0') {
263 *p++ = *q++;
f7adfe8e 264 }
0022c355
ML
265 } else {
266 p = curchar;
267 while (*p != '\0') {
268 ++p;
b76d33f6 269 }
0022c355
ML
270 shift = need - avail;
271 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
272 error("alias expansion too large");
b76d33f6 273 }
0022c355
ML
274 for (;;) {
275 *(p + shift) = *p;
276 if (p == curchar) {
b76d33f6 277 break;
0022c355
ML
278 }
279 --p;
280 }
281 p = &scanner_linebuf[0];
282 while (*q != '\0') {
283 *p++ = *q++;
b76d33f6 284 }
0022c355
ML
285 curchar = &scanner_linebuf[0];
286 }
b76d33f6
ML
287}
288
289/*
0022c355 290 * Get the actuals for a macro call.
b76d33f6 291 */
b76d33f6 292
0022c355
ML
293private String movetochar (str, c)
294String str;
295char c;
b76d33f6 296{
0022c355
ML
297 register char *p;
298
299 while (*p != c) {
300 if (*p == '\0') {
301 error("missing ')' in macro call");
302 } else if (*p == ')') {
303 error("not enough parameters in macro call");
304 } else if (*p == ',') {
305 error("too many parameters in macro call");
b76d33f6 306 }
0022c355
ML
307 ++p;
308 }
309 return p;
b76d33f6
ML
310}
311
0022c355
ML
312private String *getactuals (n)
313integer n;
b76d33f6 314{
0022c355
ML
315 String *a;
316 register char *p;
317 int i;
318
319 a = newarr(String, n);
320 p = curchar;
321 while (*p != '(') {
322 if (lexclass[*p] != WHITE) {
323 error("missing actuals for macro");
b76d33f6 324 }
0022c355
ML
325 ++p;
326 }
327 ++p;
328 for (i = 0; i < n - 1; i++) {
329 a[i] = p;
330 p = movetochar(p, ',');
331 *p = '\0';
332 ++p;
333 }
334 a[n-1] = p;
335 p = movetochar(p, ')');
336 *p = '\0';
337 curchar = p + 1;
338 return a;
b76d33f6
ML
339}
340
341/*
0022c355
ML
342 * Do command macro expansion, assuming curchar points to the beginning
343 * of the actuals, and we are not in shell mode.
b76d33f6 344 */
0022c355
ML
345
346private expand (pl, str)
347List pl;
348String str;
b76d33f6 349{
0022c355
ML
350 char buf[4096], namebuf[100];
351 register char *p, *q, *r;
352 String *actual;
353 Name n;
354 integer i;
355 boolean match;
356
357 if (pl == nil) {
358 insertinput(str);
359 } else {
360 actual = getactuals(list_size(pl));
361 p = buf;
362 q = str;
363 while (*q != '\0') {
364 if (p >= &buf[4096]) {
365 error("alias expansion too large");
366 }
367 if (lexclass[*q] == ALPHA) {
368 r = namebuf;
369 do {
370 *r++ = *q++;
371 } while (isalnum(*q));
372 *r = '\0';
373 i = 0;
374 match = false;
375 foreach(Name, n, pl)
376 if (streq(ident(n), namebuf)) {
377 match = true;
378 break;
379 }
380 ++i;
381 endfor
382 if (match) {
383 r = actual[i];
384 } else {
385 r = namebuf;
f7adfe8e 386 }
0022c355
ML
387 while (*r != '\0') {
388 *p++ = *r++;
389 }
390 } else {
391 *p++ = *q++;
392 }
b76d33f6 393 }
0022c355
ML
394 *p = '\0';
395 insertinput(buf);
396 }
b76d33f6
ML
397}
398
399/*
f7adfe8e 400 * Parser error handling.
b76d33f6 401 */
0022c355 402
f7adfe8e 403public yyerror(s)
b76d33f6
ML
404String s;
405{
0022c355
ML
406 register char *p;
407 register integer start;
408
409 if (streq(s, "syntax error")) {
410 beginerrmsg();
411 p = prevchar;
412 start = p - &scanner_linebuf[0];
413 if (p > &scanner_linebuf[0]) {
414 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
415 --p;
416 }
417 }
418 fprintf(stderr, "%s", scanner_linebuf);
419 if (start != 0) {
420 fprintf(stderr, "%*c", start, ' ');
421 }
422 if (p == &scanner_linebuf[0]) {
423 fprintf(stderr, "^ unrecognized command");
424 } else {
425 fprintf(stderr, "^ syntax error");
b76d33f6 426 }
0022c355
ML
427 enderrmsg();
428 } else {
f7adfe8e 429 error(s);
0022c355 430 }
b76d33f6
ML
431}
432
433/*
f7adfe8e 434 * Eat the current line.
b76d33f6 435 */
f7adfe8e 436
0022c355 437public gobble ()
f7adfe8e 438{
0022c355
ML
439 curchar = scanner_linebuf;
440 scanner_linebuf[0] = '\0';
f7adfe8e 441}
b76d33f6 442
f7adfe8e 443/*
0022c355
ML
444 * Scan an identifier.
445 *
446 * If chkalias is true, check first to see if it's an alias.
447 * Otherwise, check to see if it's a keyword.
f7adfe8e 448 */
0022c355
ML
449
450private Token getident (chkalias)
451boolean chkalias;
b76d33f6 452{
0022c355
ML
453 char buf[1024];
454 register char *p, *q;
455 register Token t;
456 List pl;
457 String str;
458
459 p = curchar;
460 q = buf;
461 if (shellmode) {
462 do {
463 *q++ = *p++;
464 } while (index(" \t\n!&<>*[]()'\"", *p) == nil);
465 } else {
466 do {
467 *q++ = *p++;
468 } while (isalnum(*p));
469 }
470 curchar = p;
471 *q = '\0';
472 yylval.y_name = identname(buf, false);
473 if (chkalias) {
474 if (findalias(yylval.y_name, &pl, &str)) {
475 expand(pl, str);
476 while (lexclass[*curchar] == WHITE) {
477 ++curchar;
478 }
479 if (pl == nil) {
480 t = getident(false);
481 } else {
482 t = getident(true);
483 }
484 } else if (shellmode) {
485 t = NAME;
486 } else {
487 t = findkeyword(yylval.y_name, NAME);
b76d33f6 488 }
0022c355
ML
489 } else if (shellmode) {
490 t = NAME;
491 } else {
492 t = findkeyword(yylval.y_name, NAME);
493 }
494 return t;
b76d33f6
ML
495}
496
f7adfe8e 497/*
0022c355 498 * Scan a number.
f7adfe8e 499 */
0022c355
ML
500
501private Token getnum()
b76d33f6 502{
0022c355
ML
503 char buf[1024];
504 register Char *p, *q;
505 register Token t;
506 Integer base;
507
508 p = curchar;
509 q = buf;
510 if (*p == '0') {
511 if (*(p+1) == 'x') {
512 p += 2;
513 base = 16;
514 } else if (*(p+1) == 't') {
515 base = 10;
516 } else if (varIsSet("$hexin")) {
517 base = 16;
518 } else {
519 base = 8;
520 }
521 } else if (varIsSet("$hexin")) {
522 base = 16;
523 } else if (varIsSet("$octin")) {
524 base = 8;
525 } else {
526 base = 10;
527 }
528 if (base == 16) {
529 do {
530 *q++ = *p++;
531 } while (ishexdigit(*p));
532 } else {
533 do {
534 *q++ = *p++;
535 } while (isdigit(*p));
536 }
537 if (*p == '.') {
538 do {
539 *q++ = *p++;
540 } while (isdigit(*p));
541 if (*p == 'e' or *p == 'E') {
542 p++;
543 if (*p == '+' or *p == '-' or isdigit(*p)) {
544 *q++ = 'e';
545 do {
546 *q++ = *p++;
547 } while (isdigit(*p));
548 }
549 }
550 *q = '\0';
551 yylval.y_real = atof(buf);
552 t = REAL;
553 } else {
554 *q = '\0';
555 switch (base) {
556 case 10:
557 yylval.y_int = atol(buf);
558 break;
b76d33f6 559
0022c355
ML
560 case 8:
561 yylval.y_int = octal(buf);
562 break;
563
564 case 16:
565 yylval.y_int = hex(buf);
566 break;
567
568 default:
569 badcaseval(base);
570 }
571 t = INT;
572 }
573 curchar = p;
574 return t;
b76d33f6
ML
575}
576
577/*
0022c355 578 * Convert a string of octal digits to an integer.
b76d33f6 579 */
f7adfe8e 580
0022c355
ML
581private int octal(s)
582String s;
f7adfe8e 583{
0022c355
ML
584 register Char *p;
585 register Integer n;
586
587 n = 0;
588 for (p = s; *p != '\0'; p++) {
589 n = 8*n + (*p - '0');
590 }
591 return n;
f7adfe8e
SL
592}
593
0022c355
ML
594/*
595 * Convert a string of hexadecimal digits to an integer.
596 */
f7adfe8e 597
0022c355
ML
598private int hex(s)
599String s;
f7adfe8e 600{
0022c355
ML
601 register Char *p;
602 register Integer n;
603
604 n = 0;
605 for (p = s; *p != '\0'; p++) {
606 n *= 16;
607 if (*p >= 'a' and *p <= 'f') {
608 n += (*p - 'a' + 10);
609 } else if (*p >= 'A' and *p <= 'F') {
610 n += (*p - 'A' + 10);
611 } else {
612 n += (*p - '0');
613 }
614 }
615 return n;
b76d33f6
ML
616}
617
618/*
0022c355 619 * Scan a string.
b76d33f6 620 */
f7adfe8e 621
0022c355
ML
622private Token getstring (quote)
623char quote;
624{
625 register char *p, *q;
626 char buf[MAXLINESIZE];
627 boolean endofstring;
628 Token t;
629
630 p = curchar;
631 q = buf;
632 endofstring = false;
633 while (not endofstring) {
634 if (*p == '\\' and *(p+1) == '\n') {
635 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
636 error("non-terminated string");
637 }
638 p = &scanner_linebuf[0] - 1;
639 } else if (*p == '\n' or *p == '\0') {
640 error("non-terminated string");
641 endofstring = true;
642 } else if (*p == quote) {
643 endofstring = true;
644 } else {
645 curchar = p;
646 *q++ = charcon(p);
647 p = curchar;
f7adfe8e 648 }
0022c355
ML
649 p++;
650 }
651 curchar = p;
652 *q = '\0';
653 if (quote == '\'' and buf[1] == '\0') {
654 yylval.y_char = buf[0];
655 t = CHAR;
656 } else {
657 yylval.y_string = strdup(buf);
658 t = STRING;
659 }
660 return t;
f7adfe8e
SL
661}
662
0022c355
ML
663/*
664 * Process a character constant.
665 * Watch out for backslashes.
666 */
f7adfe8e 667
0022c355
ML
668private char charcon (s)
669String s;
670{
671 register char *p, *q;
672 char c, buf[10];
673
674 p = s;
675 if (*p == '\\') {
676 ++p;
677 switch (*p) {
678 case '\\':
679 c = '\\';
f7adfe8e
SL
680 break;
681
0022c355
ML
682 case 'n':
683 c = '\n';
f7adfe8e
SL
684 break;
685
0022c355
ML
686 case 'r':
687 c = '\r';
688 break;
f7adfe8e 689
0022c355
ML
690 case 't':
691 c = '\t';
f7adfe8e
SL
692 break;
693
0022c355
ML
694 case '\'':
695 case '"':
696 c = *p;
f7adfe8e
SL
697 break;
698
0022c355
ML
699 default:
700 if (isdigit(*p)) {
701 q = buf;
702 do {
703 *q++ = *p++;
704 } while (isdigit(*p));
705 *q = '\0';
706 c = (char) octal(buf);
707 }
708 --p;
709 break;
f7adfe8e 710 }
0022c355
ML
711 curchar = p;
712 } else {
713 c = *p;
714 }
715 return c;
b76d33f6
ML
716}
717
718/*
0022c355 719 * Input file management routines.
b76d33f6 720 */
f7adfe8e 721
0022c355
ML
722public setinput(filename)
723Filename filename;
724{
725 File f;
726
727 f = fopen(filename, "r");
728 if (f == nil) {
729 error("can't open %s", filename);
730 } else {
731 if (curinclindex >= MAXINCLDEPTH) {
732 error("unreasonable input nesting on \"%s\"", filename);
f7adfe8e 733 }
0022c355
ML
734 inclinfo[curinclindex].savefile = in;
735 inclinfo[curinclindex].savefn = errfilename;
736 inclinfo[curinclindex].savelineno = errlineno;
737 curinclindex++;
738 in = f;
739 errfilename = filename;
740 errlineno = 1;
741 }
f7adfe8e
SL
742}
743
0022c355 744private Boolean eofinput()
b76d33f6 745{
0022c355 746 register Boolean b;
f7adfe8e 747
0022c355
ML
748 if (curinclindex == 0) {
749 if (isterm(in)) {
750 putchar('\n');
751 clearerr(in);
752 b = false;
753 } else {
754 b = true;
755 }
756 } else {
757 fclose(in);
758 --curinclindex;
759 in = inclinfo[curinclindex].savefile;
760 errfilename = inclinfo[curinclindex].savefn;
761 errlineno = inclinfo[curinclindex].savelineno;
762 b = false;
763 }
764 return b;
b76d33f6
ML
765}
766
767/*
0022c355 768 * Pop the current input. Return whether successful.
b76d33f6 769 */
f7adfe8e 770
0022c355
ML
771public Boolean popinput()
772{
773 Boolean b;
774
775 if (curinclindex == 0) {
776 b = false;
777 } else {
778 b = (Boolean) (not eofinput());
779 }
780 return b;
b76d33f6
ML
781}
782
783/*
f7adfe8e 784 * Return whether we are currently reading from standard input.
b76d33f6 785 */
0022c355 786
f7adfe8e
SL
787public Boolean isstdin()
788{
0022c355
ML
789 return (Boolean) (in == stdin);
790}
b76d33f6 791
0022c355
ML
792/*
793 * Send the current line to the shell.
794 */
795
796public shellline()
797{
798 register char *p;
799
800 p = curchar;
801 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
802 ++p;
803 }
804 shell(p);
805 if (*p == '\0' and isterm(in)) {
806 putchar('\n');
807 }
808 erecover();
f7adfe8e
SL
809}
810
0022c355
ML
811/*
812 * Read the rest of the current line in "shell mode".
813 */
814
815public beginshellmode()
b76d33f6 816{
0022c355
ML
817 shellmode = true;
818}
f7adfe8e 819
0022c355
ML
820/*
821 * Print out a token for debugging.
822 */
823
824public print_token(f, t)
825File f;
826Token t;
827{
828 if (t == '\n') {
829 fprintf(f, "char '\\n'");
830 } else if (t == EOF) {
831 fprintf(f, "EOF");
832 } else if (t < 256) {
833 fprintf(f, "char '%c'", t);
834 } else {
835 fprintf(f, "\"%s\"", keywdstring(t));
836 }
b76d33f6 837}