cleanup, add manual pages
[unix-history] / usr / src / old / dbx / scanner.c
CommitLineData
2a24676e
DF
1/*
2 * Copyright (c) 1983 Regents of the University of California.
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 */
b76d33f6 6
2a24676e
DF
7#ifndef lint
8static char sccsid[] = "@(#)scanner.c 5.1 (Berkeley) %G%";
9#endif not lint
0022c355
ML
10
11static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $";
b76d33f6
ML
12
13/*
14 * Debugger scanner.
15 */
16
17#include "defs.h"
18#include "scanner.h"
19#include "main.h"
20#include "keywords.h"
21#include "tree.h"
22#include "symbols.h"
23#include "names.h"
24#include "y.tab.h"
25
26#ifndef public
27typedef int Token;
b76d33f6 28
0022c355
ML
29#define MAXLINESIZE 10240
30
31#endif
b76d33f6 32
f7adfe8e 33public String initfile = ".dbxinit";
b76d33f6 34
0022c355
ML
35typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
36
37private Charclass class[256 + 1];
38private Charclass *lexclass = class + 1;
39
40#define isdigit(c) (lexclass[c] == NUM)
41#define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
42#define ishexdigit(c) ( \
43 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
44)
45
46public boolean chkalias;
47public char scanner_linebuf[MAXLINESIZE];
48
49private File in;
50private char *curchar, *prevchar;
51
52#define MAXINCLDEPTH 10
53
54private struct {
55 File savefile;
56 Filename savefn;
57 int savelineno;
58} inclinfo[MAXINCLDEPTH];
59
60private unsigned int curinclindex;
61
b76d33f6
ML
62private Token getident();
63private Token getnum();
64private Token getstring();
0022c355
ML
65private Boolean eofinput();
66private char charcon();
b76d33f6 67
0022c355
ML
68private enterlexclass(class, s)
69Charclass class;
70String s;
71{
72 register char *p;
73
74 for (p = s; *p != '\0'; p++) {
75 lexclass[*p] = class;
76 }
77}
b76d33f6
ML
78
79public scanner_init()
80{
0022c355
ML
81 register Integer i;
82
83 for (i = 0; i < 257; i++) {
84 class[i] = OTHER;
85 }
86 enterlexclass(WHITE, " \t");
87 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
88 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
89 enterlexclass(NUM, "0123456789");
90 in = stdin;
91 errfilename = nil;
92 errlineno = 0;
93 curchar = scanner_linebuf;
94 scanner_linebuf[0] = '\0';
95 chkalias = true;
b76d33f6
ML
96}
97
98/*
99 * Read a single token.
0022c355
ML
100 *
101 * The input is line buffered. Tokens cannot cross line boundaries.
102 *
b76d33f6 103 * There are two "modes" of operation: one as in a compiler,
0022c355
ML
104 * and one for reading shell-like syntax. In the first mode
105 * there is the additional choice of doing alias processing.
b76d33f6 106 */
0022c355
ML
107
108private Boolean shellmode;
109
b76d33f6
ML
110public Token yylex()
111{
0022c355
ML
112 register int c;
113 register char *p;
114 register Token t;
115 String line;
116 integer n;
117
118 p = curchar;
119 if (*p == '\0') {
120 do {
121 if (isterm(in)) {
122 printf("(%s) ", cmdname);
123 }
124 fflush(stdout);
125 line = fgets(scanner_linebuf, MAXLINESIZE, in);
126 } while (line == nil and not eofinput());
127 if (line == nil) {
128 c = EOF;
129 } else {
130 p = scanner_linebuf;
131 while (lexclass[*p] == WHITE) {
132 p++;
133 }
134 shellmode = false;
b76d33f6 135 }
0022c355
ML
136 chkalias = true;
137 } else {
138 while (lexclass[*p] == WHITE) {
139 p++;
b76d33f6 140 }
0022c355
ML
141 }
142 curchar = p;
143 prevchar = curchar;
144 c = *p;
145 if (lexclass[c] == ALPHA) {
146 t = getident(chkalias);
147 } else if (lexclass[c] == NUM) {
148 if (shellmode) {
149 t = getident(chkalias);
150 } else {
151 t = getnum();
4ff5dcd5 152 }
0022c355
ML
153 } else {
154 ++curchar;
b76d33f6 155 switch (c) {
0022c355 156 case '\n':
b76d33f6 157 t = '\n';
0022c355
ML
158 if (errlineno != 0) {
159 errlineno++;
b76d33f6
ML
160 }
161 break;
162
0022c355
ML
163 case '"':
164 case '\'':
f7adfe8e 165 t = getstring(c);
b76d33f6
ML
166 break;
167
0022c355 168 case '.':
b76d33f6 169 if (shellmode) {
0022c355
ML
170 --curchar;
171 t = getident(chkalias);
172 } else if (isdigit(*curchar)) {
173 --curchar;
174 t = getnum();
175 } else {
176 t = '.';
b76d33f6 177 }
b76d33f6
ML
178 break;
179
0022c355
ML
180 case '-':
181 if (shellmode) {
182 --curchar;
183 t = getident(chkalias);
184 } else if (*curchar == '>') {
185 ++curchar;
186 t = ARROW;
187 } else {
188 t = '-';
189 }
b76d33f6
ML
190 break;
191
0022c355
ML
192 case '#':
193 if (not isterm(in)) {
194 *p = '\0';
195 curchar = p;
196 t = '\n';
197 ++errlineno;
198 } else {
199 t = '#';
200 }
b76d33f6
ML
201 break;
202
0022c355
ML
203 case '\\':
204 if (*(p+1) == '\n') {
205 n = MAXLINESIZE - (p - &scanner_linebuf[0]);
206 if (n > 1) {
207 if (fgets(p, n, in) == nil) {
208 t = 0;
209 } else {
210 curchar = p;
211 t = yylex();
212 }
213 } else {
214 t = '\\';
215 }
216 } else {
217 t = '\\';
b76d33f6
ML
218 }
219 break;
220
0022c355 221 case EOF:
b76d33f6
ML
222 t = 0;
223 break;
224
0022c355
ML
225 default:
226 if (shellmode and index("!&*<>()[]", c) == nil) {
227 --curchar;
228 t = getident(chkalias);
229 } else {
230 t = c;
231 }
b76d33f6
ML
232 break;
233 }
0022c355
ML
234 }
235 chkalias = false;
236# ifdef LEXDEBUG
b76d33f6 237 if (lexdebug) {
0022c355
ML
238 fprintf(stderr, "yylex returns ");
239 print_token(stderr, t);
240 fprintf(stderr, "\n");
b76d33f6 241 }
0022c355
ML
242# endif
243 return t;
b76d33f6
ML
244}
245
246/*
0022c355
ML
247 * Put the given string before the current character
248 * in the current line, thus inserting it into the input stream.
b76d33f6 249 */
f7adfe8e 250
0022c355
ML
251public insertinput (s)
252String s;
b76d33f6 253{
0022c355
ML
254 register char *p, *q;
255 int need, avail, shift;
256
257 q = s;
258 need = strlen(q);
259 avail = curchar - &scanner_linebuf[0];
260 if (need <= avail) {
261 curchar = &scanner_linebuf[avail - need];
262 p = curchar;
263 while (*q != '\0') {
264 *p++ = *q++;
f7adfe8e 265 }
0022c355
ML
266 } else {
267 p = curchar;
268 while (*p != '\0') {
269 ++p;
b76d33f6 270 }
0022c355
ML
271 shift = need - avail;
272 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
273 error("alias expansion too large");
b76d33f6 274 }
0022c355
ML
275 for (;;) {
276 *(p + shift) = *p;
277 if (p == curchar) {
b76d33f6 278 break;
0022c355
ML
279 }
280 --p;
281 }
282 p = &scanner_linebuf[0];
283 while (*q != '\0') {
284 *p++ = *q++;
b76d33f6 285 }
0022c355
ML
286 curchar = &scanner_linebuf[0];
287 }
b76d33f6
ML
288}
289
290/*
0022c355 291 * Get the actuals for a macro call.
b76d33f6 292 */
b76d33f6 293
0022c355
ML
294private String movetochar (str, c)
295String str;
296char c;
b76d33f6 297{
0022c355
ML
298 register char *p;
299
300 while (*p != c) {
301 if (*p == '\0') {
302 error("missing ')' in macro call");
303 } else if (*p == ')') {
304 error("not enough parameters in macro call");
305 } else if (*p == ',') {
306 error("too many parameters in macro call");
b76d33f6 307 }
0022c355
ML
308 ++p;
309 }
310 return p;
b76d33f6
ML
311}
312
0022c355
ML
313private String *getactuals (n)
314integer n;
b76d33f6 315{
0022c355
ML
316 String *a;
317 register char *p;
318 int i;
319
320 a = newarr(String, n);
321 p = curchar;
322 while (*p != '(') {
323 if (lexclass[*p] != WHITE) {
324 error("missing actuals for macro");
b76d33f6 325 }
0022c355
ML
326 ++p;
327 }
328 ++p;
329 for (i = 0; i < n - 1; i++) {
330 a[i] = p;
331 p = movetochar(p, ',');
332 *p = '\0';
333 ++p;
334 }
335 a[n-1] = p;
336 p = movetochar(p, ')');
337 *p = '\0';
338 curchar = p + 1;
339 return a;
b76d33f6
ML
340}
341
342/*
0022c355
ML
343 * Do command macro expansion, assuming curchar points to the beginning
344 * of the actuals, and we are not in shell mode.
b76d33f6 345 */
0022c355
ML
346
347private expand (pl, str)
348List pl;
349String str;
b76d33f6 350{
0022c355
ML
351 char buf[4096], namebuf[100];
352 register char *p, *q, *r;
353 String *actual;
354 Name n;
355 integer i;
356 boolean match;
357
358 if (pl == nil) {
359 insertinput(str);
360 } else {
361 actual = getactuals(list_size(pl));
362 p = buf;
363 q = str;
364 while (*q != '\0') {
365 if (p >= &buf[4096]) {
366 error("alias expansion too large");
367 }
368 if (lexclass[*q] == ALPHA) {
369 r = namebuf;
370 do {
371 *r++ = *q++;
372 } while (isalnum(*q));
373 *r = '\0';
374 i = 0;
375 match = false;
376 foreach(Name, n, pl)
377 if (streq(ident(n), namebuf)) {
378 match = true;
379 break;
380 }
381 ++i;
382 endfor
383 if (match) {
384 r = actual[i];
385 } else {
386 r = namebuf;
f7adfe8e 387 }
0022c355
ML
388 while (*r != '\0') {
389 *p++ = *r++;
390 }
391 } else {
392 *p++ = *q++;
393 }
b76d33f6 394 }
0022c355
ML
395 *p = '\0';
396 insertinput(buf);
397 }
b76d33f6
ML
398}
399
400/*
f7adfe8e 401 * Parser error handling.
b76d33f6 402 */
0022c355 403
f7adfe8e 404public yyerror(s)
b76d33f6
ML
405String s;
406{
0022c355
ML
407 register char *p;
408 register integer start;
409
410 if (streq(s, "syntax error")) {
411 beginerrmsg();
412 p = prevchar;
413 start = p - &scanner_linebuf[0];
414 if (p > &scanner_linebuf[0]) {
415 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
416 --p;
417 }
418 }
419 fprintf(stderr, "%s", scanner_linebuf);
420 if (start != 0) {
421 fprintf(stderr, "%*c", start, ' ');
422 }
423 if (p == &scanner_linebuf[0]) {
424 fprintf(stderr, "^ unrecognized command");
425 } else {
426 fprintf(stderr, "^ syntax error");
b76d33f6 427 }
0022c355
ML
428 enderrmsg();
429 } else {
f7adfe8e 430 error(s);
0022c355 431 }
b76d33f6
ML
432}
433
434/*
f7adfe8e 435 * Eat the current line.
b76d33f6 436 */
f7adfe8e 437
0022c355 438public gobble ()
f7adfe8e 439{
0022c355
ML
440 curchar = scanner_linebuf;
441 scanner_linebuf[0] = '\0';
f7adfe8e 442}
b76d33f6 443
f7adfe8e 444/*
0022c355
ML
445 * Scan an identifier.
446 *
447 * If chkalias is true, check first to see if it's an alias.
448 * Otherwise, check to see if it's a keyword.
f7adfe8e 449 */
0022c355
ML
450
451private Token getident (chkalias)
452boolean chkalias;
b76d33f6 453{
0022c355
ML
454 char buf[1024];
455 register char *p, *q;
456 register Token t;
457 List pl;
458 String str;
459
460 p = curchar;
461 q = buf;
462 if (shellmode) {
463 do {
464 *q++ = *p++;
465 } while (index(" \t\n!&<>*[]()'\"", *p) == nil);
466 } else {
467 do {
468 *q++ = *p++;
469 } while (isalnum(*p));
470 }
471 curchar = p;
472 *q = '\0';
473 yylval.y_name = identname(buf, false);
474 if (chkalias) {
475 if (findalias(yylval.y_name, &pl, &str)) {
476 expand(pl, str);
477 while (lexclass[*curchar] == WHITE) {
478 ++curchar;
479 }
480 if (pl == nil) {
481 t = getident(false);
482 } else {
483 t = getident(true);
484 }
485 } else if (shellmode) {
486 t = NAME;
487 } else {
488 t = findkeyword(yylval.y_name, NAME);
b76d33f6 489 }
0022c355
ML
490 } else if (shellmode) {
491 t = NAME;
492 } else {
493 t = findkeyword(yylval.y_name, NAME);
494 }
495 return t;
b76d33f6
ML
496}
497
f7adfe8e 498/*
0022c355 499 * Scan a number.
f7adfe8e 500 */
0022c355
ML
501
502private Token getnum()
b76d33f6 503{
0022c355
ML
504 char buf[1024];
505 register Char *p, *q;
506 register Token t;
507 Integer base;
508
509 p = curchar;
510 q = buf;
511 if (*p == '0') {
512 if (*(p+1) == 'x') {
513 p += 2;
514 base = 16;
515 } else if (*(p+1) == 't') {
516 base = 10;
517 } else if (varIsSet("$hexin")) {
518 base = 16;
519 } else {
520 base = 8;
521 }
522 } else if (varIsSet("$hexin")) {
523 base = 16;
524 } else if (varIsSet("$octin")) {
525 base = 8;
526 } else {
527 base = 10;
528 }
529 if (base == 16) {
530 do {
531 *q++ = *p++;
532 } while (ishexdigit(*p));
533 } else {
534 do {
535 *q++ = *p++;
536 } while (isdigit(*p));
537 }
538 if (*p == '.') {
539 do {
540 *q++ = *p++;
541 } while (isdigit(*p));
542 if (*p == 'e' or *p == 'E') {
543 p++;
544 if (*p == '+' or *p == '-' or isdigit(*p)) {
545 *q++ = 'e';
546 do {
547 *q++ = *p++;
548 } while (isdigit(*p));
549 }
550 }
551 *q = '\0';
552 yylval.y_real = atof(buf);
553 t = REAL;
554 } else {
555 *q = '\0';
556 switch (base) {
557 case 10:
558 yylval.y_int = atol(buf);
559 break;
b76d33f6 560
0022c355
ML
561 case 8:
562 yylval.y_int = octal(buf);
563 break;
564
565 case 16:
566 yylval.y_int = hex(buf);
567 break;
568
569 default:
570 badcaseval(base);
571 }
572 t = INT;
573 }
574 curchar = p;
575 return t;
b76d33f6
ML
576}
577
578/*
0022c355 579 * Convert a string of octal digits to an integer.
b76d33f6 580 */
f7adfe8e 581
0022c355
ML
582private int octal(s)
583String s;
f7adfe8e 584{
0022c355
ML
585 register Char *p;
586 register Integer n;
587
588 n = 0;
589 for (p = s; *p != '\0'; p++) {
590 n = 8*n + (*p - '0');
591 }
592 return n;
f7adfe8e
SL
593}
594
0022c355
ML
595/*
596 * Convert a string of hexadecimal digits to an integer.
597 */
f7adfe8e 598
0022c355
ML
599private int hex(s)
600String s;
f7adfe8e 601{
0022c355
ML
602 register Char *p;
603 register Integer n;
604
605 n = 0;
606 for (p = s; *p != '\0'; p++) {
607 n *= 16;
608 if (*p >= 'a' and *p <= 'f') {
609 n += (*p - 'a' + 10);
610 } else if (*p >= 'A' and *p <= 'F') {
611 n += (*p - 'A' + 10);
612 } else {
613 n += (*p - '0');
614 }
615 }
616 return n;
b76d33f6
ML
617}
618
619/*
0022c355 620 * Scan a string.
b76d33f6 621 */
f7adfe8e 622
0022c355
ML
623private Token getstring (quote)
624char quote;
625{
626 register char *p, *q;
627 char buf[MAXLINESIZE];
628 boolean endofstring;
629 Token t;
630
631 p = curchar;
632 q = buf;
633 endofstring = false;
634 while (not endofstring) {
635 if (*p == '\\' and *(p+1) == '\n') {
636 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
637 error("non-terminated string");
638 }
639 p = &scanner_linebuf[0] - 1;
640 } else if (*p == '\n' or *p == '\0') {
641 error("non-terminated string");
642 endofstring = true;
643 } else if (*p == quote) {
644 endofstring = true;
645 } else {
646 curchar = p;
647 *q++ = charcon(p);
648 p = curchar;
f7adfe8e 649 }
0022c355
ML
650 p++;
651 }
652 curchar = p;
653 *q = '\0';
654 if (quote == '\'' and buf[1] == '\0') {
655 yylval.y_char = buf[0];
656 t = CHAR;
657 } else {
658 yylval.y_string = strdup(buf);
659 t = STRING;
660 }
661 return t;
f7adfe8e
SL
662}
663
0022c355
ML
664/*
665 * Process a character constant.
666 * Watch out for backslashes.
667 */
f7adfe8e 668
0022c355
ML
669private char charcon (s)
670String s;
671{
672 register char *p, *q;
673 char c, buf[10];
674
675 p = s;
676 if (*p == '\\') {
677 ++p;
678 switch (*p) {
679 case '\\':
680 c = '\\';
f7adfe8e
SL
681 break;
682
0022c355
ML
683 case 'n':
684 c = '\n';
f7adfe8e
SL
685 break;
686
0022c355
ML
687 case 'r':
688 c = '\r';
689 break;
f7adfe8e 690
0022c355
ML
691 case 't':
692 c = '\t';
f7adfe8e
SL
693 break;
694
0022c355
ML
695 case '\'':
696 case '"':
697 c = *p;
f7adfe8e
SL
698 break;
699
0022c355
ML
700 default:
701 if (isdigit(*p)) {
702 q = buf;
703 do {
704 *q++ = *p++;
705 } while (isdigit(*p));
706 *q = '\0';
707 c = (char) octal(buf);
708 }
709 --p;
710 break;
f7adfe8e 711 }
0022c355
ML
712 curchar = p;
713 } else {
714 c = *p;
715 }
716 return c;
b76d33f6
ML
717}
718
719/*
0022c355 720 * Input file management routines.
b76d33f6 721 */
f7adfe8e 722
0022c355
ML
723public setinput(filename)
724Filename filename;
725{
726 File f;
727
728 f = fopen(filename, "r");
729 if (f == nil) {
730 error("can't open %s", filename);
731 } else {
732 if (curinclindex >= MAXINCLDEPTH) {
733 error("unreasonable input nesting on \"%s\"", filename);
f7adfe8e 734 }
0022c355
ML
735 inclinfo[curinclindex].savefile = in;
736 inclinfo[curinclindex].savefn = errfilename;
737 inclinfo[curinclindex].savelineno = errlineno;
738 curinclindex++;
739 in = f;
740 errfilename = filename;
741 errlineno = 1;
742 }
f7adfe8e
SL
743}
744
0022c355 745private Boolean eofinput()
b76d33f6 746{
0022c355 747 register Boolean b;
f7adfe8e 748
0022c355
ML
749 if (curinclindex == 0) {
750 if (isterm(in)) {
751 putchar('\n');
752 clearerr(in);
753 b = false;
754 } else {
755 b = true;
756 }
757 } else {
758 fclose(in);
759 --curinclindex;
760 in = inclinfo[curinclindex].savefile;
761 errfilename = inclinfo[curinclindex].savefn;
762 errlineno = inclinfo[curinclindex].savelineno;
763 b = false;
764 }
765 return b;
b76d33f6
ML
766}
767
768/*
0022c355 769 * Pop the current input. Return whether successful.
b76d33f6 770 */
f7adfe8e 771
0022c355
ML
772public Boolean popinput()
773{
774 Boolean b;
775
776 if (curinclindex == 0) {
777 b = false;
778 } else {
779 b = (Boolean) (not eofinput());
780 }
781 return b;
b76d33f6
ML
782}
783
784/*
f7adfe8e 785 * Return whether we are currently reading from standard input.
b76d33f6 786 */
0022c355 787
f7adfe8e
SL
788public Boolean isstdin()
789{
0022c355
ML
790 return (Boolean) (in == stdin);
791}
b76d33f6 792
0022c355
ML
793/*
794 * Send the current line to the shell.
795 */
796
797public shellline()
798{
799 register char *p;
800
801 p = curchar;
802 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
803 ++p;
804 }
805 shell(p);
806 if (*p == '\0' and isterm(in)) {
807 putchar('\n');
808 }
809 erecover();
f7adfe8e
SL
810}
811
0022c355
ML
812/*
813 * Read the rest of the current line in "shell mode".
814 */
815
816public beginshellmode()
b76d33f6 817{
0022c355
ML
818 shellmode = true;
819}
f7adfe8e 820
0022c355
ML
821/*
822 * Print out a token for debugging.
823 */
824
825public print_token(f, t)
826File f;
827Token t;
828{
829 if (t == '\n') {
830 fprintf(f, "char '\\n'");
831 } else if (t == EOF) {
832 fprintf(f, "EOF");
833 } else if (t < 256) {
834 fprintf(f, "char '%c'", t);
835 } else {
836 fprintf(f, "\"%s\"", keywdstring(t));
837 }
b76d33f6 838}