386BSD 0.1 development
[unix-history] / usr / othersrc / public / zsh-2.2 / src / lex.c
CommitLineData
dbf02a84
WJ
1/*
2 *
3 * lex.c - lexical analysis
4 *
5 * This file is part of zsh, the Z shell.
6 *
7 * This software is Copyright 1992 by Paul Falstad
8 *
9 * Permission is hereby granted to copy, reproduce, redistribute or otherwise
10 * use this software as long as: there is no monetary profit gained
11 * specifically from the use or reproduction of this software, it is not
12 * sold, rented, traded or otherwise marketed, and this copyright notice is
13 * included prominently in any copy made.
14 *
15 * The author make no claims as to the fitness or correctness of this software
16 * for any use whatsoever, and it is provided as is. Any use of this software
17 * is at the user's own risk.
18 *
19 */
20
21#include "zsh.h"
22
23/* lexical state */
24
25static int xincmdpos,xincond,xincasepat,dbparens,xdbparens,xalstat;
26static char *xhlastw;
27
28static int xisfirstln, xisfirstch, xhistremmed, xhistdone,
29 xspaceflag, xstophist, xlithist, xalstackind,xhlinesz;
30static char *xhline, *xhptr;
31
32/* save the lexical state */
33
34/* is this a hack or what? */
35
36void lexsave() /**/
37{
38 xincmdpos = incmdpos;
39 xincond = incond;
40 xincasepat = incasepat;
41 xdbparens = dbparens;
42 xalstat = alstat;
43 xalstackind = alstackind;
44 xisfirstln = isfirstln;
45 xisfirstch = isfirstch;
46 xhistremmed = histremmed;
47 xhistdone = histdone;
48 xspaceflag = spaceflag;
49 xstophist = stophist;
50 xlithist = lithist;
51 xhline = hline;
52 xhptr = hptr;
53 xhlastw = hlastw;
54 xhlinesz = hlinesz;
55 inredir = 0;
56}
57
58/* restore lexical state */
59
60void lexrestore() /**/
61{
62 incmdpos = xincmdpos;
63 incond = xincond;
64 incasepat = xincasepat;
65 dbparens = xdbparens;
66 alstat = xalstat;
67 isfirstln = xisfirstln;
68 isfirstch = xisfirstch;
69 histremmed = xhistremmed;
70 histdone = xhistdone;
71 spaceflag = xspaceflag;
72 stophist = xstophist;
73 lithist = xlithist;
74 hline = xhline;
75 hptr = xhptr;
76 hlastw = xhlastw;
77 clearalstack();
78 alstackind = xalstackind;
79 hlinesz = xhlinesz;
80 lexstop = errflag = 0;
81}
82
83void yylex() /**/
84{
85 if (tok == LEXERR) return;
86 do
87 tok = gettok();
88 while (tok != ENDINPUT && exalias());
89 if (tok != NEWLIN) isnewlin = 0;
90 else isnewlin = (inbufct) ? -1 : 1;
91 if (tok == SEMI || tok == NEWLIN) tok = SEPER;
92}
93
94void ctxtlex() /**/
95{
96static int oldpos;
97
98 yylex();
99 switch (tok) {
100 case SEPER: case NEWLIN: case SEMI: case DSEMI: case AMPER:
101 case INPAR: case INBRACE: case DBAR: case DAMPER: case BAR:
102 case BARAMP: case INOUTPAR: case DO: case THEN: case ELIF:
103 case ELSE: incmdpos = 1; break;
104 case STRING: /* case ENVSTRING: */ case ENVARRAY: case OUTPAR:
105 case CASE: incmdpos = 0; break;
106 }
107 if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
108 inredir = 1;
109 oldpos = incmdpos;
110 incmdpos = 0;
111 } else if (inredir) {
112 incmdpos = oldpos;
113 inredir = 0;
114 }
115}
116
117#define LX1_BKSLASH 0
118#define LX1_COMMENT 1
119#define LX1_NEWLIN 2
120#define LX1_SEMI 3
121#define LX1_BANG 4
122#define LX1_AMPER 5
123#define LX1_BAR 6
124#define LX1_INPAR 7
125#define LX1_OUTPAR 8
126#define LX1_INBRACE 9
127#define LX1_OUTBRACE 10
128#define LX1_INBRACK 11
129#define LX1_OUTBRACK 12
130#define LX1_INANG 13
131#define LX1_OUTANG 14
132#define LX1_OTHER 15
133
134#define LX2_BREAK 0
135#define LX2_OUTPAR 1
136#define LX2_BAR 2
137#define LX2_STRING 3
138#define LX2_INBRACK 4
139#define LX2_OUTBRACK 5
140#define LX2_TILDE 6
141#define LX2_INPAR 7
142#define LX2_INBRACE 8
143#define LX2_OUTBRACE 9
144#define LX2_OUTANG 10
145#define LX2_INANG 11
146#define LX2_EQUALS 12
147#define LX2_BKSLASH 13
148#define LX2_QUOTE 14
149#define LX2_DQUOTE 15
150#define LX2_BQUOTE 16
151#define LX2_OTHER 17
152
153unsigned char lexact1[256],lexact2[256],lextok2[256];
154
155void initlextabs() /**/
156{
157int t0;
158static char *lx1 = "\\q\n;!&|(){}[]<>xx";
159static char *lx2 = "x)|$[]~({}><=\\\'\"`x";
160
161 for (t0 = 0; t0 != 256; t0++) {
162 lexact1[t0] = LX1_OTHER;
163 lexact2[t0] = LX2_OTHER;
164 lextok2[t0] = t0;
165 }
166 for (t0 = 0; lx1[t0]; t0++)
167 if (lx1[t0] != 'x')
168 lexact1[lx1[t0]] = t0;
169 for (t0 = 0; lx2[t0]; t0++)
170 if (lx2[t0] != 'x')
171 lexact2[lx2[t0]] = t0;
172 lexact2[';'] = LX2_BREAK;
173 lexact2['&'] = LX2_BREAK;
174 lextok2[','] = Comma;
175 lextok2['*'] = Star;
176 lextok2['?'] = Quest;
177 lextok2['{'] = Inbrace;
178 lextok2['['] = Inbrack;
179 lextok2['$'] = String;
180}
181
182/* initialize lexical state */
183
184void lexinit() /**/
185{
186 incond = incasepat = nocorrect =
187 dbparens = alstat = lexstop = 0;
188 incmdpos = 1;
189 tok = ENDINPUT;
190 if (isset(EXTENDEDGLOB))
191 {
192 lextok2['#'] = Pound;
193 lextok2['^'] = Hat;
194 }
195 else
196 {
197 lextok2['#'] = '#';
198 lextok2['^'] = '^';
199 }
200}
201
202int len = 0,bsiz = 256;
203char *bptr;
204
205/* add a char to the string buffer */
206
207void add(c) /**/
208int c;
209{
210 *bptr++ = c;
211 if (bsiz == ++len)
212 {
213 int newbsiz;
214
215 newbsiz = bsiz * 8;
216 while (newbsiz < inbufct)
217 newbsiz *= 2;
218 bptr = len+(tokstr = hrealloc(tokstr,bsiz,newbsiz));
219 bsiz = newbsiz;
220 }
221}
222
223static void unadd()
224{
225 bptr--; len--;
226}
227
228int gettok() /**/
229{
230int bct = 0,pct = 0,brct = 0;
231int c,d,intpos = 1;
232int peekfd = -1,peek,ninbracks;
233
234beginning:
235 hlastw = NULL;
236 tokstr = NULL;
237 parbegin = -1;
238 while (iblank(c = hgetc()) && !lexstop);
239 isfirstln = 0;
240 wordbeg = inbufct;
241 hwbegin();
242 hwaddc(c);
243 if (dbparens) /* handle ((...)) */
244 {
245 pct = 2;
246 peek = STRING;
247 len = dbparens = 0;
248 bptr = tokstr = ncalloc(bsiz = 256);
249 for (;;)
250 {
251 if (c == '(')
252 pct++;
253 else if (c == ')')
254 pct--;
255 else if (c == '\n')
256 {
257 zerr("parse error: )) expected",NULL,0);
258 peek = LEXERR;
259 return peek;
260 }
261 else if (c == '$')
262 c = Qstring;
263 if (pct >= 2)
264 add(c);
265 if (pct)
266 c = hgetc();
267 else
268 break;
269 }
270 *bptr = '\0';
271 return peek;
272 }
273 if (idigit(c)) /* handle 1< foo */
274 {
275 d = hgetc();
276 hungetc(d);
277 lexstop = 0;
278 if (d == '>' || d == '<')
279 {
280 peekfd = c-'0';
281 c = hgetc();
282 }
283 }
284
285 /* chars in initial position in word */
286
287 if (c == hashchar &&
288 (isset(INTERACTIVECOMMENTS) ||
289 (!zleparse && (!interact || unset(SHINSTDIN) || strin))))
290 {
291 /* changed hgetch to hgetc so comments appear in history */
292 stophist = 1;
293 while ((c = hgetc()) != '\n' && !lexstop);
294 if (c == '\n') {
295 hwaddc('\n');
296 peek = NEWLIN;
297 } else {
298 peek = (errflag) ? LEXERR : ENDINPUT;
299 errflag = 1;
300 }
301 return peek;
302 }
303 if (lexstop)
304 return (errflag) ? LEXERR : ENDINPUT;
305 switch (lexact1[(unsigned char) c])
306 {
307 case LX1_BKSLASH:
308 d = hgetc();
309 if (d == '\n')
310 goto beginning;
311 hungetc(d);
312 break;
313 case LX1_NEWLIN: return NEWLIN;
314 case LX1_SEMI:
315 d = hgetc();
316 if (d != ';')
317 {
318 hungetc(d);
319 return SEMI;
320 }
321 return DSEMI;
322 case LX1_BANG:
323 d = hgetc();
324 hungetc(d);
325 if (!inblank(d))
326 break;
327 if (incmdpos || incond)
328 return BANG;
329 break;
330 case LX1_AMPER:
331 d = hgetc();
332 if (d != '&')
333 {
334 hungetc(d);
335 return AMPER;
336 }
337 return DAMPER;
338 case LX1_BAR:
339 d = hgetc();
340 if (d == '|')
341 return DBAR;
342 else if (d == '&')
343 return BARAMP;
344 hungetc(d);
345 return BAR;
346 case LX1_INPAR:
347 d = hgetc();
348 if (d == '(' && incmdpos)
349 {
350 tokstr = strdup("let");
351 dbparens = 1;
352 return STRING;
353 }
354 else if (d == ')')
355 return INOUTPAR;
356 hungetc(d);
357 if (!(incond || incmdpos))
358 break;
359 return INPAR;
360 case LX1_OUTPAR: return OUTPAR;
361 case LX1_INBRACE: if (!incmdpos) break; return INBRACE;
362 case LX1_OUTBRACE: return OUTBRACE;
363 case LX1_INBRACK:
364 if (!incmdpos)
365 break;
366 d = hgetc();
367 if (d == '[')
368 return DINBRACK;
369 hungetc(d);
370 break;
371 case LX1_OUTBRACK:
372 if (!incond)
373 break;
374 d = hgetc();
375 if (d == ']')
376 return DOUTBRACK;
377 hungetc(d);
378 break;
379 case LX1_INANG:
380 d = hgetc();
381 if ((!incmdpos && d == '(') || incasepat) {
382 hungetc(d);
383 break;
384 } else if (d == '<') {
385 int e = hgetc();
386
387 if (e == '(') {
388 hungetc(e);
389 hungetc(d);
390 peek = INANG;
391 } else if (e == '<')
392 peek = TRINANG;
393 else if (e == '-')
394 peek = DINANGDASH;
395 else {
396 hungetc(e);
397 peek = DINANG;
398 }
399 } else if (d == '&')
400 peek = INANGAMP;
401 else {
402 peek = INANG;
403 hungetc(d);
404 }
405 tokfd = peekfd;
406 return peek;
407 case LX1_OUTANG:
408 d = hgetc();
409 if (d == '(')
410 {
411 hungetc(d);
412 break;
413 }
414 else if (d == '&')
415 {
416 d = hgetc();
417 if (d == '!')
418 peek = OUTANGAMPBANG;
419 else
420 {
421 hungetc(d);
422 peek = OUTANGAMP;
423 }
424 }
425 else if (d == '!')
426 peek = OUTANGBANG;
427 else if (d == '>')
428 {
429 d = hgetc();
430 if (d == '&')
431 {
432 d = hgetc();
433 if (d == '!')
434 peek = DOUTANGAMPBANG;
435 else
436 {
437 hungetc(d);
438 peek = DOUTANGAMP;
439 }
440 }
441 else if (d == '!')
442 peek = DOUTANGBANG;
443 else if (d == '(')
444 {
445 hungetc(d);
446 hungetc('>');
447 peek = OUTANG;
448 }
449 else
450 {
451 hungetc(d);
452 peek = DOUTANG;
453 if (isset(NOCLOBBER)) hwaddc('!');
454 }
455 }
456 else
457 {
458 hungetc(d);
459 peek = OUTANG;
460 if (isset(NOCLOBBER)) hwaddc('!');
461 }
462 tokfd = peekfd;
463 return peek;
464 }
465
466 /* we've started a string, now get the rest of it, performing
467 tokenization */
468
469 peek = STRING;
470 len = 0;
471 bptr = tokstr = ncalloc(bsiz = 256);
472 for(;;)
473 {
474 int act;
475 int d;
476
477 if (inblank(c))
478 act = LX2_BREAK;
479 else
480 {
481 act = lexact2[(unsigned char) c];
482 c = lextok2[(unsigned char) c];
483 }
484 switch (act)
485 {
486 case LX2_BREAK: goto brk;
487 case LX2_OUTPAR:
488 if (!pct)
489 goto brk;
490 c = Outpar;
491 pct--;
492 break;
493 case LX2_BAR:
494 if (!pct && !incasepat)
495 goto brk;
496 c = Bar;
497 break;
498 case LX2_STRING:
499 d = hgetc();
500 if (d == '[')
501 {
502 add(String);
503 add(Inbrack);
504 ninbracks = 1;
505 while (ninbracks && (c = hgetc()) && !lexstop) {
506 if (c == '[') ninbracks++;
507 else if (c == ']') ninbracks--;
508 if (ninbracks) add(c);
509 }
510 c = Outbrack;
511 }
512 else if (d == '(')
513 {
514 add(String);
515 if (skipcomm()) { peek = LEXERR; goto brk; }
516 c = Outpar;
517 }
518 else
519 hungetc(d);
520 break;
521 case LX2_INBRACK: brct++; break;
522 case LX2_OUTBRACK:
523 if (incond && !brct)
524 goto brk;
525 brct--;
526 c = Outbrack;
527 break;
528 case LX2_TILDE: /* if (intpos) */ c = Tilde; break;
529 case LX2_INPAR:
530 d = hgetc();
531 hungetc(d);
532 if (d == ')' || (incmdpos && peek != ENVSTRING))
533 goto brk;
534 pct++;
535 c = Inpar;
536 break;
537 case LX2_INBRACE: bct++; break;
538 case LX2_OUTBRACE:
539 if (!bct)
540 goto brk;
541 bct--;
542 c = Outbrace;
543 break;
544 case LX2_OUTANG:
545 d = hgetc();
546 if (d != '(')
547 {
548 hungetc(d);
549 goto brk;
550 }
551 add(Outang);
552 if (skipcomm()) { peek = LEXERR; goto brk; }
553 c = Outpar;
554 break;
555 case LX2_INANG:
556 d = hgetc();
557 if (!(idigit(d) || d == '-' || d == '>' || d == '(' || d == ')'))
558 {
559 hungetc(d);
560 goto brk;
561 }
562 c = Inang;
563 if (d == '(')
564 {
565 add(c);
566 if (skipcomm()) { peek = LEXERR; goto brk; }
567 c = Outpar;
568 }
569 else if (d == ')')
570 hungetc(d);
571 else
572 {
573 add(c);
574 c = d;
575 while (c != '>' && !lexstop)
576 add(c),c = hgetc();
577 c = Outang;
578 }
579 break;
580 case LX2_EQUALS:
581 if (intpos)
582 {
583 d = hgetc();
584 if (d != '(')
585 {
586 hungetc(d);
587 c = Equals;
588 }
589 else
590 {
591 add(Equals);
592 if (skipcomm()) { peek = LEXERR; goto brk; }
593 c = Outpar;
594 }
595 }
596 else if (peek != ENVSTRING && incmdpos)
597 {
598 d = hgetc();
599 if (d == '(' && incmdpos)
600 {
601 *bptr = '\0';
602 return ENVARRAY;
603 }
604 hungetc(d);
605 peek = ENVSTRING;
606 intpos = 2;
607 }
608 break;
609 case LX2_BKSLASH:
610 c = hgetc();
611 if (c == '\n')
612 {
613 c = hgetc();
614 continue;
615 }
616 add(c);
617 c = hgetc();
618 continue;
619 case LX2_QUOTE:
620 add(Nularg);
621
622 /* we add the Nularg to prevent this:
623
624 echo $PA'TH'
625
626 from printing the path. */
627
628 for (;;) {
629 while ((c = hgetc()) != '\'' && !lexstop) {
630 if (isset(CSHJUNKIEQUOTES) && c == '\n') {
631 if (bptr[-1] == '\\') unadd(); else break;
632 }
633 add(c);
634 }
635 if (c != '\'') {
636 zerr("unmatched \'",NULL,0);
637 peek = LEXERR;
638 goto brk;
639 }
640 d = hgetc();
641 if (d != '\'' || unset(RCQUOTES)) break;
642 add(c);
643 }
644 hungetc(d);
645 c = Nularg;
646 break;
647 case LX2_DQUOTE:
648 add(Nularg);
649 while ((c = hgetc()) != '\"' && !lexstop)
650 if (c == '\\')
651 {
652 c = hgetc();
653 if (c != '\n')
654 {
655 if (c != '$' && c != '\\' && c != '\"' && c != '`')
656 add('\\');
657 add(c);
658 }
659 }
660 else {
661 if (isset(CSHJUNKIEQUOTES) && c == '\n') {
662 if (bptr[-1] == '\\') unadd(); else break;
663 }
664 if (c == '$') {
665 d = hgetc();
666 if (d == '(') {
667 add(Qstring);
668 if (skipcomm()) { peek = LEXERR; goto brk; }
669 c = Outpar;
670 } else if (d == '[') {
671 add(String);
672 add(Inbrack);
673 while ((c = hgetc()) != ']' && !lexstop)
674 add(c);
675 c = Outbrack;
676 } else {
677 c = Qstring;
678 hungetc(d);
679 }
680 } else if (c == '`')
681 c = Qtick;
682 add(c);
683 }
684 if (c != '\"') {
685 zerr("unmatched \"",NULL,0);
686 peek = LEXERR;
687 goto brk;
688 }
689 c = Nularg;
690 break;
691 case LX2_BQUOTE:
692 add(Tick);
693 parbegin = inbufct;
694 while ((c = hgetc()) != '`' && !lexstop)
695 if (c == '\\')
696 {
697 c = hgetc();
698 if (c != '\n')
699 {
700 if (c != '`' && c != '\\' && c != '$')
701 add('\\');
702 add(c);
703 }
704 }
705 else {
706 if (isset(CSHJUNKIEQUOTES) && c == '\n') {
707 if (bptr[-1] == '\\') unadd(); else break;
708 }
709 add(c);
710 }
711 if (c != '`') {
712 if (!zleparse) zerr("unmatched `",NULL,0);
713 peek = LEXERR;
714 goto brk;
715 }
716 c = Tick;
717 parbegin = -1;
718 break;
719 }
720 add(c);
721 c = hgetc();
722 if (intpos)
723 intpos--;
724 if (lexstop)
725 break;
726 }
727brk:
728 hungetc(c);
729 *bptr = '\0';
730 return peek;
731}
732
733/* expand aliases, perhaps */
734
735int exalias() /**/
736{
737struct alias *an;
738char *s,*t;
739
740 s = yytext = hwadd();
741 for (t = s; *t && *t != HISTSPACE; t++);
742 if (!*t)
743 t = NULL;
744 else
745 *t = '\0';
746 if (interact && isset(SHINSTDIN) && !strin && !incasepat && tok == STRING &&
747 (isset(CORRECTALL) || (isset(CORRECT) && incmdpos)) && !nocorrect)
748 spckword(&tokstr,&s,&t,!incmdpos,1);
749 if (zleparse && !alstackind) {
750 int zp = zleparse;
751 gotword(s);
752 if (zp && !zleparse) {
753 if (t) *t = HISTSPACE;
754 return 0;
755 }
756 }
757 an = gethnode(s,aliastab);
758 if (t) *t = HISTSPACE;
759 if (alstackind != MAXAL && an && !an->inuse)
760 if (!(an->cmd && !incmdpos && alstat != ALSTAT_MORE)) {
761 if (an->cmd < 0) {
762 tok = DO-an->cmd-1;
763 return 0;
764 } else {
765 an->inuse = 1;
766 hungets(ALPOPS);
767 hungets((alstack[alstackind++] = an)->text);
768 alstat = 0;
769 /* remove from history if it begins with space */
770 if (isset(HISTIGNORESPACE) && an->text[0] == ' ') remhist();
771 lexstop = 0;
772 return 1;
773 }
774 }
775 return 0;
776}
777
778/* skip (...) */
779
780int skipcomm() /**/
781{
782int pct = 1,c;
783
784 parbegin = inbufct;
785 c = Inpar;
786 do
787 {
788 add(c);
789 c = hgetc();
790 if (itok(c) || lexstop)
791 break;
792 else if (c == '(') pct++;
793 else if (c == ')') pct--;
794 else if (c == '\\')
795 {
796 add(c);
797 c = hgetc();
798 }
799 else if (c == '\'')
800 {
801 add(c);
802 while ((c = hgetc()) != '\'' && !lexstop)
803 add(c);
804 }
805 else if (c == '\"')
806 {
807 add(c);
808 while ((c = hgetc()) != '\"' && !lexstop)
809 if (c == '\\')
810 {
811 add(c);
812 add(hgetc());
813 }
814 else add(c);
815 }
816 else if (c == '`')
817 {
818 add(c);
819 while ((c = hgetc()) != '`' && !lexstop)
820 if (c == '\\') add(c), add(hgetc());
821 else add(c);
822 }
823 }
824 while(pct);
825 if (!lexstop) parbegin = -1;
826 return lexstop;
827}
828