This commit was generated by cvs2svn to track changes on a CVS vendor
[unix-history] / usr.bin / yacc / reader.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Robert Paul Corbett.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 1/20/91";
39#endif /* not lint */
40
41#include "defs.h"
42
43/* The line size must be a positive integer. One hundred was chosen */
44/* because few lines in Yacc input grammars exceed 100 characters. */
45/* Note that if a line exceeds LINESIZE characters, the line buffer */
46/* will be expanded to accomodate it. */
47
48#define LINESIZE 100
49
50char *cache;
51int cinc, cache_size;
52
53int ntags, tagmax;
54char **tag_table;
55
56char saw_eof, unionized;
57char *cptr, *line;
58int linesize;
59
60bucket *goal;
61int prec;
62int gensym;
63char last_was_action;
64
65int maxitems;
66bucket **pitem;
67
68int maxrules;
69bucket **plhs;
70
71int name_pool_size;
72char *name_pool;
73
74char line_format[] = "#line %d \"%s\"\n";
75
76
77cachec(c)
78int c;
79{
80 assert(cinc >= 0);
81 if (cinc >= cache_size)
82 {
83 cache_size += 256;
84 cache = REALLOC(cache, cache_size);
85 if (cache == 0) no_space();
86 }
87 cache[cinc] = c;
88 ++cinc;
89}
90
91
92get_line()
93{
94 register FILE *f = input_file;
95 register int c;
96 register int i;
97
98 if (saw_eof || (c = getc(f)) == EOF)
99 {
100 if (line) { FREE(line); line = 0; }
101 cptr = 0;
102 saw_eof = 1;
103 return;
104 }
105
106 if (line == 0 || linesize != (LINESIZE + 1))
107 {
108 if (line) FREE(line);
109 linesize = LINESIZE + 1;
110 line = MALLOC(linesize);
111 if (line == 0) no_space();
112 }
113
114 i = 0;
115 ++lineno;
116 for (;;)
117 {
118 line[i] = c;
119 if (c == '\n') { cptr = line; return; }
120 if (++i >= linesize)
121 {
122 linesize += LINESIZE;
123 line = REALLOC(line, linesize);
124 if (line == 0) no_space();
125 }
126 c = getc(f);
127 if (c == EOF)
128 {
129 line[i] = '\n';
130 saw_eof = 1;
131 cptr = line;
132 return;
133 }
134 }
135}
136
137
138char *
139dup_line()
140{
141 register char *p, *s, *t;
142
143 if (line == 0) return (0);
144 s = line;
145 while (*s != '\n') ++s;
146 p = MALLOC(s - line + 1);
147 if (p == 0) no_space();
148
149 s = line;
150 t = p;
151 while ((*t++ = *s++) != '\n') continue;
152 return (p);
153}
154
155
156skip_comment()
157{
158 register char *s;
159
160 int st_lineno = lineno;
161 char *st_line = dup_line();
162 char *st_cptr = st_line + (cptr - line);
163
164 s = cptr + 2;
165 for (;;)
166 {
167 if (*s == '*' && s[1] == '/')
168 {
169 cptr = s + 2;
170 FREE(st_line);
171 return;
172 }
173 if (*s == '\n')
174 {
175 get_line();
176 if (line == 0)
177 unterminated_comment(st_lineno, st_line, st_cptr);
178 s = cptr;
179 }
180 else
181 ++s;
182 }
183}
184
185
186int
187nextc()
188{
189 register char *s;
190
191 if (line == 0)
192 {
193 get_line();
194 if (line == 0)
195 return (EOF);
196 }
197
198 s = cptr;
199 for (;;)
200 {
201 switch (*s)
202 {
203 case '\n':
204 get_line();
205 if (line == 0) return (EOF);
206 s = cptr;
207 break;
208
209 case ' ':
210 case '\t':
211 case '\f':
212 case '\r':
213 case '\v':
214 case ',':
215 case ';':
216 ++s;
217 break;
218
219 case '\\':
220 cptr = s;
221 return ('%');
222
223 case '/':
224 if (s[1] == '*')
225 {
226 cptr = s;
227 skip_comment();
228 s = cptr;
229 break;
230 }
231 else if (s[1] == '/')
232 {
233 get_line();
234 if (line == 0) return (EOF);
235 s = cptr;
236 break;
237 }
238 /* fall through */
239
240 default:
241 cptr = s;
242 return (*s);
243 }
244 }
245}
246
247
248int
249keyword()
250{
251 register int c;
252 char *t_cptr = cptr;
253
254 c = *++cptr;
255 if (isalpha(c))
256 {
257 cinc = 0;
258 for (;;)
259 {
260 if (isalpha(c))
261 {
262 if (isupper(c)) c = tolower(c);
263 cachec(c);
264 }
265 else if (isdigit(c) || c == '_' || c == '.' || c == '$')
266 cachec(c);
267 else
268 break;
269 c = *++cptr;
270 }
271 cachec(NUL);
272
273 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
274 return (TOKEN);
275 if (strcmp(cache, "type") == 0)
276 return (TYPE);
277 if (strcmp(cache, "left") == 0)
278 return (LEFT);
279 if (strcmp(cache, "right") == 0)
280 return (RIGHT);
281 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
282 return (NONASSOC);
283 if (strcmp(cache, "start") == 0)
284 return (START);
285 if (strcmp(cache, "union") == 0)
286 return (UNION);
287 if (strcmp(cache, "ident") == 0)
288 return (IDENT);
289 }
290 else
291 {
292 ++cptr;
293 if (c == '{')
294 return (TEXT);
295 if (c == '%' || c == '\\')
296 return (MARK);
297 if (c == '<')
298 return (LEFT);
299 if (c == '>')
300 return (RIGHT);
301 if (c == '0')
302 return (TOKEN);
303 if (c == '2')
304 return (NONASSOC);
305 }
306 syntax_error(lineno, line, t_cptr);
307 /*NOTREACHED*/
308}
309
310
311copy_ident()
312{
313 register int c;
314 register FILE *f = output_file;
315
316 c = nextc();
317 if (c == EOF) unexpected_EOF();
318 if (c != '"') syntax_error(lineno, line, cptr);
319 ++outline;
320 fprintf(f, "#ident \"");
321 for (;;)
322 {
323 c = *++cptr;
324 if (c == '\n')
325 {
326 fprintf(f, "\"\n");
327 return;
328 }
329 putc(c, f);
330 if (c == '"')
331 {
332 putc('\n', f);
333 ++cptr;
334 return;
335 }
336 }
337}
338
339
340copy_text()
341{
342 register int c;
343 int quote;
344 register FILE *f = text_file;
345 int need_newline = 0;
346 int t_lineno = lineno;
347 char *t_line = dup_line();
348 char *t_cptr = t_line + (cptr - line - 2);
349
350 if (*cptr == '\n')
351 {
352 get_line();
353 if (line == 0)
354 unterminated_text(t_lineno, t_line, t_cptr);
355 }
356 if (!lflag) fprintf(f, line_format, lineno, input_file_name);
357
358loop:
359 c = *cptr++;
360 switch (c)
361 {
362 case '\n':
363 next_line:
364 putc('\n', f);
365 need_newline = 0;
366 get_line();
367 if (line) goto loop;
368 unterminated_text(t_lineno, t_line, t_cptr);
369
370 case '\'':
371 case '"':
372 {
373 int s_lineno = lineno;
374 char *s_line = dup_line();
375 char *s_cptr = s_line + (cptr - line - 1);
376
377 quote = c;
378 putc(c, f);
379 for (;;)
380 {
381 c = *cptr++;
382 putc(c, f);
383 if (c == quote)
384 {
385 need_newline = 1;
386 FREE(s_line);
387 goto loop;
388 }
389 if (c == '\n')
390 unterminated_string(s_lineno, s_line, s_cptr);
391 if (c == '\\')
392 {
393 c = *cptr++;
394 putc(c, f);
395 if (c == '\n')
396 {
397 get_line();
398 if (line == 0)
399 unterminated_string(s_lineno, s_line, s_cptr);
400 }
401 }
402 }
403 }
404
405 case '/':
406 putc(c, f);
407 need_newline = 1;
408 c = *cptr;
409 if (c == '/')
410 {
411 putc('*', f);
412 while ((c = *++cptr) != '\n')
413 {
414 if (c == '*' && cptr[1] == '/')
415 fprintf(f, "* ");
416 else
417 putc(c, f);
418 }
419 fprintf(f, "*/");
420 goto next_line;
421 }
422 if (c == '*')
423 {
424 int c_lineno = lineno;
425 char *c_line = dup_line();
426 char *c_cptr = c_line + (cptr - line - 1);
427
428 putc('*', f);
429 ++cptr;
430 for (;;)
431 {
432 c = *cptr++;
433 putc(c, f);
434 if (c == '*' && *cptr == '/')
435 {
436 putc('/', f);
437 ++cptr;
438 FREE(c_line);
439 goto loop;
440 }
441 if (c == '\n')
442 {
443 get_line();
444 if (line == 0)
445 unterminated_comment(c_lineno, c_line, c_cptr);
446 }
447 }
448 }
449 need_newline = 1;
450 goto loop;
451
452 case '%':
453 case '\\':
454 if (*cptr == '}')
455 {
456 if (need_newline) putc('\n', f);
457 ++cptr;
458 FREE(t_line);
459 return;
460 }
461 /* fall through */
462
463 default:
464 putc(c, f);
465 need_newline = 1;
466 goto loop;
467 }
468}
469
470
471copy_union()
472{
473 register int c;
474 int quote;
475 int depth;
476 int u_lineno = lineno;
477 char *u_line = dup_line();
478 char *u_cptr = u_line + (cptr - line - 6);
479
480 if (unionized) over_unionized(cptr - 6);
481 unionized = 1;
482
483 if (!lflag)
484 fprintf(text_file, line_format, lineno, input_file_name);
485
486 fprintf(text_file, "typedef union");
487 if (dflag) fprintf(union_file, "typedef union");
488
489 depth = 0;
490loop:
491 c = *cptr++;
492 putc(c, text_file);
493 if (dflag) putc(c, union_file);
494 switch (c)
495 {
496 case '\n':
497 next_line:
498 get_line();
499 if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
500 goto loop;
501
502 case '{':
503 ++depth;
504 goto loop;
505
506 case '}':
507 if (--depth == 0)
508 {
509 fprintf(text_file, " YYSTYPE;\n");
510 FREE(u_line);
511 return;
512 }
513 goto loop;
514
515 case '\'':
516 case '"':
517 {
518 int s_lineno = lineno;
519 char *s_line = dup_line();
520 char *s_cptr = s_line + (cptr - line - 1);
521
522 quote = c;
523 for (;;)
524 {
525 c = *cptr++;
526 putc(c, text_file);
527 if (dflag) putc(c, union_file);
528 if (c == quote)
529 {
530 FREE(s_line);
531 goto loop;
532 }
533 if (c == '\n')
534 unterminated_string(s_lineno, s_line, s_cptr);
535 if (c == '\\')
536 {
537 c = *cptr++;
538 putc(c, text_file);
539 if (dflag) putc(c, union_file);
540 if (c == '\n')
541 {
542 get_line();
543 if (line == 0)
544 unterminated_string(s_lineno, s_line, s_cptr);
545 }
546 }
547 }
548 }
549
550 case '/':
551 c = *cptr;
552 if (c == '/')
553 {
554 putc('*', text_file);
555 if (dflag) putc('*', union_file);
556 while ((c = *++cptr) != '\n')
557 {
558 if (c == '*' && cptr[1] == '/')
559 {
560 fprintf(text_file, "* ");
561 if (dflag) fprintf(union_file, "* ");
562 }
563 else
564 {
565 putc(c, text_file);
566 if (dflag) putc(c, union_file);
567 }
568 }
569 fprintf(text_file, "*/\n");
570 if (dflag) fprintf(union_file, "*/\n");
571 goto next_line;
572 }
573 if (c == '*')
574 {
575 int c_lineno = lineno;
576 char *c_line = dup_line();
577 char *c_cptr = c_line + (cptr - line - 1);
578
579 putc('*', text_file);
580 if (dflag) putc('*', union_file);
581 ++cptr;
582 for (;;)
583 {
584 c = *cptr++;
585 putc(c, text_file);
586 if (dflag) putc(c, union_file);
587 if (c == '*' && *cptr == '/')
588 {
589 putc('/', text_file);
590 if (dflag) putc('/', union_file);
591 ++cptr;
592 FREE(c_line);
593 goto loop;
594 }
595 if (c == '\n')
596 {
597 get_line();
598 if (line == 0)
599 unterminated_comment(c_lineno, c_line, c_cptr);
600 }
601 }
602 }
603 goto loop;
604
605 default:
606 goto loop;
607 }
608}
609
610
611int
612hexval(c)
613int c;
614{
615 if (c >= '0' && c <= '9')
616 return (c - '0');
617 if (c >= 'A' && c <= 'F')
618 return (c - 'A' + 10);
619 if (c >= 'a' && c <= 'f')
620 return (c - 'a' + 10);
621 return (-1);
622}
623
624
625bucket *
626get_literal()
627{
628 register int c, quote;
629 register int i;
630 register int n;
631 register char *s;
632 register bucket *bp;
633 int s_lineno = lineno;
634 char *s_line = dup_line();
635 char *s_cptr = s_line + (cptr - line);
636
637 quote = *cptr++;
638 cinc = 0;
639 for (;;)
640 {
641 c = *cptr++;
642 if (c == quote) break;
643 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
644 if (c == '\\')
645 {
646 char *c_cptr = cptr - 1;
647
648 c = *cptr++;
649 switch (c)
650 {
651 case '\n':
652 get_line();
653 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
654 continue;
655
656 case '0': case '1': case '2': case '3':
657 case '4': case '5': case '6': case '7':
658 n = c - '0';
659 c = *cptr;
660 if (IS_OCTAL(c))
661 {
662 n = (n << 3) + (c - '0');
663 c = *++cptr;
664 if (IS_OCTAL(c))
665 {
666 n = (n << 3) + (c - '0');
667 ++cptr;
668 }
669 }
670 if (n > MAXCHAR) illegal_character(c_cptr);
671 c = n;
672 break;
673
674 case 'x':
675 c = *cptr++;
676 n = hexval(c);
677 if (n < 0 || n >= 16)
678 illegal_character(c_cptr);
679 for (;;)
680 {
681 c = *cptr;
682 i = hexval(c);
683 if (i < 0 || i >= 16) break;
684 ++cptr;
685 n = (n << 4) + i;
686 if (n > MAXCHAR) illegal_character(c_cptr);
687 }
688 c = n;
689 break;
690
691 case 'a': c = 7; break;
692 case 'b': c = '\b'; break;
693 case 'f': c = '\f'; break;
694 case 'n': c = '\n'; break;
695 case 'r': c = '\r'; break;
696 case 't': c = '\t'; break;
697 case 'v': c = '\v'; break;
698 }
699 }
700 cachec(c);
701 }
702 FREE(s_line);
703
704 n = cinc;
705 s = MALLOC(n);
706 if (s == 0) no_space();
707
708 for (i = 0; i < n; ++i)
709 s[i] = cache[i];
710
711 cinc = 0;
712 if (n == 1)
713 cachec('\'');
714 else
715 cachec('"');
716
717 for (i = 0; i < n; ++i)
718 {
719 c = ((unsigned char *)s)[i];
720 if (c == '\\' || c == cache[0])
721 {
722 cachec('\\');
723 cachec(c);
724 }
725 else if (isprint(c))
726 cachec(c);
727 else
728 {
729 cachec('\\');
730 switch (c)
731 {
732 case 7: cachec('a'); break;
733 case '\b': cachec('b'); break;
734 case '\f': cachec('f'); break;
735 case '\n': cachec('n'); break;
736 case '\r': cachec('r'); break;
737 case '\t': cachec('t'); break;
738 case '\v': cachec('v'); break;
739 default:
740 cachec(((c >> 6) & 7) + '0');
741 cachec(((c >> 3) & 7) + '0');
742 cachec((c & 7) + '0');
743 break;
744 }
745 }
746 }
747
748 if (n == 1)
749 cachec('\'');
750 else
751 cachec('"');
752
753 cachec(NUL);
754 bp = lookup(cache);
755 bp->class = TERM;
756 if (n == 1 && bp->value == UNDEFINED)
757 bp->value = *(unsigned char *)s;
758 FREE(s);
759
760 return (bp);
761}
762
763
764int
765is_reserved(name)
766char *name;
767{
768 char *s;
769
770 if (strcmp(name, ".") == 0 ||
771 strcmp(name, "$accept") == 0 ||
772 strcmp(name, "$end") == 0)
773 return (1);
774
775 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
776 {
777 s = name + 3;
778 while (isdigit(*s)) ++s;
779 if (*s == NUL) return (1);
780 }
781
782 return (0);
783}
784
785
786bucket *
787get_name()
788{
789 register int c;
790
791 cinc = 0;
792 for (c = *cptr; IS_IDENT(c); c = *++cptr)
793 cachec(c);
794 cachec(NUL);
795
796 if (is_reserved(cache)) used_reserved(cache);
797
798 return (lookup(cache));
799}
800
801
802int
803get_number()
804{
805 register int c;
806 register int n;
807
808 n = 0;
809 for (c = *cptr; isdigit(c); c = *++cptr)
810 n = 10*n + (c - '0');
811
812 return (n);
813}
814
815
816char *
817get_tag()
818{
819 register int c;
820 register int i;
821 register char *s;
822 int t_lineno = lineno;
823 char *t_line = dup_line();
824 char *t_cptr = t_line + (cptr - line);
825
826 ++cptr;
827 c = nextc();
828 if (c == EOF) unexpected_EOF();
829 if (!isalpha(c) && c != '_' && c != '$')
830 illegal_tag(t_lineno, t_line, t_cptr);
831
832 cinc = 0;
833 do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
834 cachec(NUL);
835
836 c = nextc();
837 if (c == EOF) unexpected_EOF();
838 if (c != '>')
839 illegal_tag(t_lineno, t_line, t_cptr);
840 ++cptr;
841
842 for (i = 0; i < ntags; ++i)
843 {
844 if (strcmp(cache, tag_table[i]) == 0)
845 return (tag_table[i]);
846 }
847
848 if (ntags >= tagmax)
849 {
850 tagmax += 16;
851 tag_table = (char **)
852 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
853 : MALLOC(tagmax*sizeof(char *)));
854 if (tag_table == 0) no_space();
855 }
856
857 s = MALLOC(cinc);
858 if (s == 0) no_space();
859 strcpy(s, cache);
860 tag_table[ntags] = s;
861 ++ntags;
862 FREE(t_line);
863 return (s);
864}
865
866
867declare_tokens(assoc)
868int assoc;
869{
870 register int c;
871 register bucket *bp;
872 int value;
873 char *tag = 0;
874
875 if (assoc != TOKEN) ++prec;
876
877 c = nextc();
878 if (c == EOF) unexpected_EOF();
879 if (c == '<')
880 {
881 tag = get_tag();
882 c = nextc();
883 if (c == EOF) unexpected_EOF();
884 }
885
886 for (;;)
887 {
888 if (isalpha(c) || c == '_' || c == '.' || c == '$')
889 bp = get_name();
890 else if (c == '\'' || c == '"')
891 bp = get_literal();
892 else
893 return;
894
895 if (bp == goal) tokenized_start(bp->name);
896 bp->class = TERM;
897
898 if (tag)
899 {
900 if (bp->tag && tag != bp->tag)
901 retyped_warning(bp->name);
902 bp->tag = tag;
903 }
904
905 if (assoc != TOKEN)
906 {
907 if (bp->prec && prec != bp->prec)
908 reprec_warning(bp->name);
909 bp->assoc = assoc;
910 bp->prec = prec;
911 }
912
913 c = nextc();
914 if (c == EOF) unexpected_EOF();
915 value = UNDEFINED;
916 if (isdigit(c))
917 {
918 value = get_number();
919 if (bp->value != UNDEFINED && value != bp->value)
920 revalued_warning(bp->name);
921 bp->value = value;
922 c = nextc();
923 if (c == EOF) unexpected_EOF();
924 }
925 }
926}
927
928
929declare_types()
930{
931 register int c;
932 register bucket *bp;
933 char *tag;
934
935 c = nextc();
936 if (c == EOF) unexpected_EOF();
937 if (c != '<') syntax_error(lineno, line, cptr);
938 tag = get_tag();
939
940 for (;;)
941 {
942 c = nextc();
943 if (isalpha(c) || c == '_' || c == '.' || c == '$')
944 bp = get_name();
945 else if (c == '\'' || c == '"')
946 bp = get_literal();
947 else
948 return;
949
950 if (bp->tag && tag != bp->tag)
951 retyped_warning(bp->name);
952 bp->tag = tag;
953 }
954}
955
956
957declare_start()
958{
959 register int c;
960 register bucket *bp;
961
962 c = nextc();
963 if (c == EOF) unexpected_EOF();
964 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
965 syntax_error(lineno, line, cptr);
966 bp = get_name();
967 if (bp->class == TERM)
968 terminal_start(bp->name);
969 if (goal && goal != bp)
970 restarted_warning();
971 goal = bp;
972}
973
974
975read_declarations()
976{
977 register int c, k;
978
979 cache_size = 256;
980 cache = MALLOC(cache_size);
981 if (cache == 0) no_space();
982
983 for (;;)
984 {
985 c = nextc();
986 if (c == EOF) unexpected_EOF();
987 if (c != '%') syntax_error(lineno, line, cptr);
988 switch (k = keyword())
989 {
990 case MARK:
991 return;
992
993 case IDENT:
994 copy_ident();
995 break;
996
997 case TEXT:
998 copy_text();
999 break;
1000
1001 case UNION:
1002 copy_union();
1003 break;
1004
1005 case TOKEN:
1006 case LEFT:
1007 case RIGHT:
1008 case NONASSOC:
1009 declare_tokens(k);
1010 break;
1011
1012 case TYPE:
1013 declare_types();
1014 break;
1015
1016 case START:
1017 declare_start();
1018 break;
1019 }
1020 }
1021}
1022
1023
1024initialize_grammar()
1025{
1026 nitems = 4;
1027 maxitems = 300;
1028 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1029 if (pitem == 0) no_space();
1030 pitem[0] = 0;
1031 pitem[1] = 0;
1032 pitem[2] = 0;
1033 pitem[3] = 0;
1034
1035 nrules = 3;
1036 maxrules = 100;
1037 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1038 if (plhs == 0) no_space();
1039 plhs[0] = 0;
1040 plhs[1] = 0;
1041 plhs[2] = 0;
1042 rprec = (short *) MALLOC(maxrules*sizeof(short));
1043 if (rprec == 0) no_space();
1044 rprec[0] = 0;
1045 rprec[1] = 0;
1046 rprec[2] = 0;
1047 rassoc = (char *) MALLOC(maxrules*sizeof(char));
1048 if (rassoc == 0) no_space();
1049 rassoc[0] = TOKEN;
1050 rassoc[1] = TOKEN;
1051 rassoc[2] = TOKEN;
1052}
1053
1054
1055expand_items()
1056{
1057 maxitems += 300;
1058 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1059 if (pitem == 0) no_space();
1060}
1061
1062
1063expand_rules()
1064{
1065 maxrules += 100;
1066 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1067 if (plhs == 0) no_space();
1068 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1069 if (rprec == 0) no_space();
1070 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1071 if (rassoc == 0) no_space();
1072}
1073
1074
1075advance_to_start()
1076{
1077 register int c;
1078 register bucket *bp;
1079 char *s_cptr;
1080 int s_lineno;
1081
1082 for (;;)
1083 {
1084 c = nextc();
1085 if (c != '%') break;
1086 s_cptr = cptr;
1087 switch (keyword())
1088 {
1089 case MARK:
1090 no_grammar();
1091
1092 case TEXT:
1093 copy_text();
1094 break;
1095
1096 case START:
1097 declare_start();
1098 break;
1099
1100 default:
1101 syntax_error(lineno, line, s_cptr);
1102 }
1103 }
1104
1105 c = nextc();
1106 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1107 syntax_error(lineno, line, cptr);
1108 bp = get_name();
1109 if (goal == 0)
1110 {
1111 if (bp->class == TERM)
1112 terminal_start(bp->name);
1113 goal = bp;
1114 }
1115
1116 s_lineno = lineno;
1117 c = nextc();
1118 if (c == EOF) unexpected_EOF();
1119 if (c != ':') syntax_error(lineno, line, cptr);
1120 start_rule(bp, s_lineno);
1121 ++cptr;
1122}
1123
1124
1125start_rule(bp, s_lineno)
1126register bucket *bp;
1127int s_lineno;
1128{
1129 if (bp->class == TERM)
1130 terminal_lhs(s_lineno);
1131 bp->class = NONTERM;
1132 if (nrules >= maxrules)
1133 expand_rules();
1134 plhs[nrules] = bp;
1135 rprec[nrules] = UNDEFINED;
1136 rassoc[nrules] = TOKEN;
1137}
1138
1139
1140end_rule()
1141{
1142 register int i;
1143
1144 if (!last_was_action && plhs[nrules]->tag)
1145 {
1146 for (i = nitems - 1; pitem[i]; --i) continue;
1147 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1148 default_action_warning();
1149 }
1150
1151 last_was_action = 0;
1152 if (nitems >= maxitems) expand_items();
1153 pitem[nitems] = 0;
1154 ++nitems;
1155 ++nrules;
1156}
1157
1158
1159insert_empty_rule()
1160{
1161 register bucket *bp, **bpp;
1162
1163 assert(cache);
1164 sprintf(cache, "$$%d", ++gensym);
1165 bp = make_bucket(cache);
1166 last_symbol->next = bp;
1167 last_symbol = bp;
1168 bp->tag = plhs[nrules]->tag;
1169 bp->class = NONTERM;
1170
1171 if ((nitems += 2) > maxitems)
1172 expand_items();
1173 bpp = pitem + nitems - 1;
1174 *bpp-- = bp;
1175 while (bpp[0] = bpp[-1]) --bpp;
1176
1177 if (++nrules >= maxrules)
1178 expand_rules();
1179 plhs[nrules] = plhs[nrules-1];
1180 plhs[nrules-1] = bp;
1181 rprec[nrules] = rprec[nrules-1];
1182 rprec[nrules-1] = 0;
1183 rassoc[nrules] = rassoc[nrules-1];
1184 rassoc[nrules-1] = TOKEN;
1185}
1186
1187
1188add_symbol()
1189{
1190 register int c;
1191 register bucket *bp;
1192 int s_lineno = lineno;
1193
1194 c = *cptr;
1195 if (c == '\'' || c == '"')
1196 bp = get_literal();
1197 else
1198 bp = get_name();
1199
1200 c = nextc();
1201 if (c == ':')
1202 {
1203 end_rule();
1204 start_rule(bp, s_lineno);
1205 ++cptr;
1206 return;
1207 }
1208
1209 if (last_was_action)
1210 insert_empty_rule();
1211 last_was_action = 0;
1212
1213 if (++nitems > maxitems)
1214 expand_items();
1215 pitem[nitems-1] = bp;
1216}
1217
1218
1219copy_action()
1220{
1221 register int c;
1222 register int i, n;
1223 int depth;
1224 int quote;
1225 char *tag;
1226 register FILE *f = action_file;
1227 int a_lineno = lineno;
1228 char *a_line = dup_line();
1229 char *a_cptr = a_line + (cptr - line);
1230
1231 if (last_was_action)
1232 insert_empty_rule();
1233 last_was_action = 1;
1234
1235 fprintf(f, "case %d:\n", nrules - 2);
1236 if (!lflag)
1237 fprintf(f, line_format, lineno, input_file_name);
1238 if (*cptr == '=') ++cptr;
1239
1240 n = 0;
1241 for (i = nitems - 1; pitem[i]; --i) ++n;
1242
1243 depth = 0;
1244loop:
1245 c = *cptr;
1246 if (c == '$')
1247 {
1248 if (cptr[1] == '<')
1249 {
1250 int d_lineno = lineno;
1251 char *d_line = dup_line();
1252 char *d_cptr = d_line + (cptr - line);
1253
1254 ++cptr;
1255 tag = get_tag();
1256 c = *cptr;
1257 if (c == '$')
1258 {
1259 fprintf(f, "yyval.%s", tag);
1260 ++cptr;
1261 FREE(d_line);
1262 goto loop;
1263 }
1264 else if (isdigit(c))
1265 {
1266 i = get_number();
1267 if (i > n) dollar_warning(d_lineno, i);
1268 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1269 FREE(d_line);
1270 goto loop;
1271 }
1272 else if (c == '-' && isdigit(cptr[1]))
1273 {
1274 ++cptr;
1275 i = -get_number() - n;
1276 fprintf(f, "yyvsp[%d].%s", i, tag);
1277 FREE(d_line);
1278 goto loop;
1279 }
1280 else
1281 dollar_error(d_lineno, d_line, d_cptr);
1282 }
1283 else if (cptr[1] == '$')
1284 {
1285 if (ntags)
1286 {
1287 tag = plhs[nrules]->tag;
1288 if (tag == 0) untyped_lhs();
1289 fprintf(f, "yyval.%s", tag);
1290 }
1291 else
1292 fprintf(f, "yyval");
1293 cptr += 2;
1294 goto loop;
1295 }
1296 else if (isdigit(cptr[1]))
1297 {
1298 ++cptr;
1299 i = get_number();
1300 if (ntags)
1301 {
1302 if (i <= 0 || i > n)
1303 unknown_rhs(i);
1304 tag = pitem[nitems + i - n - 1]->tag;
1305 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1306 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1307 }
1308 else
1309 {
1310 if (i > n)
1311 dollar_warning(lineno, i);
1312 fprintf(f, "yyvsp[%d]", i - n);
1313 }
1314 goto loop;
1315 }
1316 else if (cptr[1] == '-')
1317 {
1318 cptr += 2;
1319 i = get_number();
1320 if (ntags)
1321 unknown_rhs(-i);
1322 fprintf(f, "yyvsp[%d]", -i - n);
1323 goto loop;
1324 }
1325 }
1326 if (isalpha(c) || c == '_' || c == '$')
1327 {
1328 do
1329 {
1330 putc(c, f);
1331 c = *++cptr;
1332 } while (isalnum(c) || c == '_' || c == '$');
1333 goto loop;
1334 }
1335 putc(c, f);
1336 ++cptr;
1337 switch (c)
1338 {
1339 case '\n':
1340 next_line:
1341 get_line();
1342 if (line) goto loop;
1343 unterminated_action(a_lineno, a_line, a_cptr);
1344
1345 case ';':
1346 if (depth > 0) goto loop;
1347 fprintf(f, "\nbreak;\n");
1348 return;
1349
1350 case '{':
1351 ++depth;
1352 goto loop;
1353
1354 case '}':
1355 if (--depth > 0) goto loop;
1356 fprintf(f, "\nbreak;\n");
1357 return;
1358
1359 case '\'':
1360 case '"':
1361 {
1362 int s_lineno = lineno;
1363 char *s_line = dup_line();
1364 char *s_cptr = s_line + (cptr - line - 1);
1365
1366 quote = c;
1367 for (;;)
1368 {
1369 c = *cptr++;
1370 putc(c, f);
1371 if (c == quote)
1372 {
1373 FREE(s_line);
1374 goto loop;
1375 }
1376 if (c == '\n')
1377 unterminated_string(s_lineno, s_line, s_cptr);
1378 if (c == '\\')
1379 {
1380 c = *cptr++;
1381 putc(c, f);
1382 if (c == '\n')
1383 {
1384 get_line();
1385 if (line == 0)
1386 unterminated_string(s_lineno, s_line, s_cptr);
1387 }
1388 }
1389 }
1390 }
1391
1392 case '/':
1393 c = *cptr;
1394 if (c == '/')
1395 {
1396 putc('*', f);
1397 while ((c = *++cptr) != '\n')
1398 {
1399 if (c == '*' && cptr[1] == '/')
1400 fprintf(f, "* ");
1401 else
1402 putc(c, f);
1403 }
1404 fprintf(f, "*/\n");
1405 goto next_line;
1406 }
1407 if (c == '*')
1408 {
1409 int c_lineno = lineno;
1410 char *c_line = dup_line();
1411 char *c_cptr = c_line + (cptr - line - 1);
1412
1413 putc('*', f);
1414 ++cptr;
1415 for (;;)
1416 {
1417 c = *cptr++;
1418 putc(c, f);
1419 if (c == '*' && *cptr == '/')
1420 {
1421 putc('/', f);
1422 ++cptr;
1423 FREE(c_line);
1424 goto loop;
1425 }
1426 if (c == '\n')
1427 {
1428 get_line();
1429 if (line == 0)
1430 unterminated_comment(c_lineno, c_line, c_cptr);
1431 }
1432 }
1433 }
1434 goto loop;
1435
1436 default:
1437 goto loop;
1438 }
1439}
1440
1441
1442int
1443mark_symbol()
1444{
1445 register int c;
1446 register bucket *bp;
1447
1448 c = cptr[1];
1449 if (c == '%' || c == '\\')
1450 {
1451 cptr += 2;
1452 return (1);
1453 }
1454
1455 if (c == '=')
1456 cptr += 2;
1457 else if ((c == 'p' || c == 'P') &&
1458 ((c = cptr[2]) == 'r' || c == 'R') &&
1459 ((c = cptr[3]) == 'e' || c == 'E') &&
1460 ((c = cptr[4]) == 'c' || c == 'C') &&
1461 ((c = cptr[5], !IS_IDENT(c))))
1462 cptr += 5;
1463 else
1464 syntax_error(lineno, line, cptr);
1465
1466 c = nextc();
1467 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1468 bp = get_name();
1469 else if (c == '\'' || c == '"')
1470 bp = get_literal();
1471 else
1472 {
1473 syntax_error(lineno, line, cptr);
1474 /*NOTREACHED*/
1475 }
1476
1477 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1478 prec_redeclared();
1479
1480 rprec[nrules] = bp->prec;
1481 rassoc[nrules] = bp->assoc;
1482 return (0);
1483}
1484
1485
1486read_grammar()
1487{
1488 register int c;
1489
1490 initialize_grammar();
1491 advance_to_start();
1492
1493 for (;;)
1494 {
1495 c = nextc();
1496 if (c == EOF) break;
1497 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1498 c == '"')
1499 add_symbol();
1500 else if (c == '{' || c == '=')
1501 copy_action();
1502 else if (c == '|')
1503 {
1504 end_rule();
1505 start_rule(plhs[nrules-1], 0);
1506 ++cptr;
1507 }
1508 else if (c == '%')
1509 {
1510 if (mark_symbol()) break;
1511 }
1512 else
1513 syntax_error(lineno, line, cptr);
1514 }
1515 end_rule();
1516}
1517
1518
1519free_tags()
1520{
1521 register int i;
1522
1523 if (tag_table == 0) return;
1524
1525 for (i = 0; i < ntags; ++i)
1526 {
1527 assert(tag_table[i]);
1528 FREE(tag_table[i]);
1529 }
1530 FREE(tag_table);
1531}
1532
1533
1534pack_names()
1535{
1536 register bucket *bp;
1537 register char *p, *s, *t;
1538
1539 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1540 for (bp = first_symbol; bp; bp = bp->next)
1541 name_pool_size += strlen(bp->name) + 1;
1542 name_pool = MALLOC(name_pool_size);
1543 if (name_pool == 0) no_space();
1544
1545 strcpy(name_pool, "$accept");
1546 strcpy(name_pool+8, "$end");
1547 t = name_pool + 13;
1548 for (bp = first_symbol; bp; bp = bp->next)
1549 {
1550 p = t;
1551 s = bp->name;
1552 while (*t++ = *s++) continue;
1553 FREE(bp->name);
1554 bp->name = p;
1555 }
1556}
1557
1558
1559check_symbols()
1560{
1561 register bucket *bp;
1562
1563 if (goal->class == UNKNOWN)
1564 undefined_goal(goal->name);
1565
1566 for (bp = first_symbol; bp; bp = bp->next)
1567 {
1568 if (bp->class == UNKNOWN)
1569 {
1570 undefined_symbol_warning(bp->name);
1571 bp->class = TERM;
1572 }
1573 }
1574}
1575
1576
1577pack_symbols()
1578{
1579 register bucket *bp;
1580 register bucket **v;
1581 register int i, j, k, n;
1582
1583 nsyms = 2;
1584 ntokens = 1;
1585 for (bp = first_symbol; bp; bp = bp->next)
1586 {
1587 ++nsyms;
1588 if (bp->class == TERM) ++ntokens;
1589 }
1590 start_symbol = ntokens;
1591 nvars = nsyms - ntokens;
1592
1593 symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1594 if (symbol_name == 0) no_space();
1595 symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1596 if (symbol_value == 0) no_space();
1597 symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1598 if (symbol_prec == 0) no_space();
1599 symbol_assoc = MALLOC(nsyms);
1600 if (symbol_assoc == 0) no_space();
1601
1602 v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1603 if (v == 0) no_space();
1604
1605 v[0] = 0;
1606 v[start_symbol] = 0;
1607
1608 i = 1;
1609 j = start_symbol + 1;
1610 for (bp = first_symbol; bp; bp = bp->next)
1611 {
1612 if (bp->class == TERM)
1613 v[i++] = bp;
1614 else
1615 v[j++] = bp;
1616 }
1617 assert(i == ntokens && j == nsyms);
1618
1619 for (i = 1; i < ntokens; ++i)
1620 v[i]->index = i;
1621
1622 goal->index = start_symbol + 1;
1623 k = start_symbol + 2;
1624 while (++i < nsyms)
1625 if (v[i] != goal)
1626 {
1627 v[i]->index = k;
1628 ++k;
1629 }
1630
1631 goal->value = 0;
1632 k = 1;
1633 for (i = start_symbol + 1; i < nsyms; ++i)
1634 {
1635 if (v[i] != goal)
1636 {
1637 v[i]->value = k;
1638 ++k;
1639 }
1640 }
1641
1642 k = 0;
1643 for (i = 1; i < ntokens; ++i)
1644 {
1645 n = v[i]->value;
1646 if (n > 256)
1647 {
1648 for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1649 symbol_value[j] = symbol_value[j-1];
1650 symbol_value[j] = n;
1651 }
1652 }
1653
1654 if (v[1]->value == UNDEFINED)
1655 v[1]->value = 256;
1656
1657 j = 0;
1658 n = 257;
1659 for (i = 2; i < ntokens; ++i)
1660 {
1661 if (v[i]->value == UNDEFINED)
1662 {
1663 while (j < k && n == symbol_value[j])
1664 {
1665 while (++j < k && n == symbol_value[j]) continue;
1666 ++n;
1667 }
1668 v[i]->value = n;
1669 ++n;
1670 }
1671 }
1672
1673 symbol_name[0] = name_pool + 8;
1674 symbol_value[0] = 0;
1675 symbol_prec[0] = 0;
1676 symbol_assoc[0] = TOKEN;
1677 for (i = 1; i < ntokens; ++i)
1678 {
1679 symbol_name[i] = v[i]->name;
1680 symbol_value[i] = v[i]->value;
1681 symbol_prec[i] = v[i]->prec;
1682 symbol_assoc[i] = v[i]->assoc;
1683 }
1684 symbol_name[start_symbol] = name_pool;
1685 symbol_value[start_symbol] = -1;
1686 symbol_prec[start_symbol] = 0;
1687 symbol_assoc[start_symbol] = TOKEN;
1688 for (++i; i < nsyms; ++i)
1689 {
1690 k = v[i]->index;
1691 symbol_name[k] = v[i]->name;
1692 symbol_value[k] = v[i]->value;
1693 symbol_prec[k] = v[i]->prec;
1694 symbol_assoc[k] = v[i]->assoc;
1695 }
1696
1697 FREE(v);
1698}
1699
1700
1701pack_grammar()
1702{
1703 register int i, j;
1704 int assoc, prec;
1705
1706 ritem = (short *) MALLOC(nitems*sizeof(short));
1707 if (ritem == 0) no_space();
1708 rlhs = (short *) MALLOC(nrules*sizeof(short));
1709 if (rlhs == 0) no_space();
1710 rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1711 if (rrhs == 0) no_space();
1712 rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1713 if (rprec == 0) no_space();
1714 rassoc = REALLOC(rassoc, nrules);
1715 if (rassoc == 0) no_space();
1716
1717 ritem[0] = -1;
1718 ritem[1] = goal->index;
1719 ritem[2] = 0;
1720 ritem[3] = -2;
1721 rlhs[0] = 0;
1722 rlhs[1] = 0;
1723 rlhs[2] = start_symbol;
1724 rrhs[0] = 0;
1725 rrhs[1] = 0;
1726 rrhs[2] = 1;
1727
1728 j = 4;
1729 for (i = 3; i < nrules; ++i)
1730 {
1731 rlhs[i] = plhs[i]->index;
1732 rrhs[i] = j;
1733 assoc = TOKEN;
1734 prec = 0;
1735 while (pitem[j])
1736 {
1737 ritem[j] = pitem[j]->index;
1738 if (pitem[j]->class == TERM)
1739 {
1740 prec = pitem[j]->prec;
1741 assoc = pitem[j]->assoc;
1742 }
1743 ++j;
1744 }
1745 ritem[j] = -i;
1746 ++j;
1747 if (rprec[i] == UNDEFINED)
1748 {
1749 rprec[i] = prec;
1750 rassoc[i] = assoc;
1751 }
1752 }
1753 rrhs[i] = j;
1754
1755 FREE(plhs);
1756 FREE(pitem);
1757}
1758
1759
1760print_grammar()
1761{
1762 register int i, j, k;
1763 int spacing;
1764 register FILE *f = verbose_file;
1765
1766 if (!vflag) return;
1767
1768 k = 1;
1769 for (i = 2; i < nrules; ++i)
1770 {
1771 if (rlhs[i] != rlhs[i-1])
1772 {
1773 if (i != 2) fprintf(f, "\n");
1774 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1775 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1776 }
1777 else
1778 {
1779 fprintf(f, "%4d ", i - 2);
1780 j = spacing;
1781 while (--j >= 0) putc(' ', f);
1782 putc('|', f);
1783 }
1784
1785 while (ritem[k] >= 0)
1786 {
1787 fprintf(f, " %s", symbol_name[ritem[k]]);
1788 ++k;
1789 }
1790 ++k;
1791 putc('\n', f);
1792 }
1793}
1794
1795
1796reader()
1797{
1798 write_section(banner);
1799 create_symbol_table();
1800 read_declarations();
1801 read_grammar();
1802 free_symbol_table();
1803 free_tags();
1804 pack_names();
1805 check_symbols();
1806 pack_symbols();
1807 pack_grammar();
1808 free_symbols();
1809 print_grammar();
1810}