add destdin and vgrindt
[unix-history] / usr / src / usr.bin / ex / ex_re.c
CommitLineData
7c4625ef 1/* Copyright (c) 1980 Regents of the University of California */
b41bbcb5 2static char *sccsid = "@(#)ex_re.c 5.1 %G%";
22316d4f
MH
3#include "ex.h"
4#include "ex_re.h"
5
6/*
7 * Global, substitute and regular expressions.
8 * Very similar to ed, with some re extensions and
9 * confirmed substitute.
10 */
11global(k)
12 bool k;
13{
14 register char *gp;
15 register int c;
16 register line *a1;
17 char globuf[GBSIZE], *Cwas;
18 int lines = lineDOL();
19 int oinglobal = inglobal;
20 char *oglobp = globp;
21
22 Cwas = Command;
23 /*
24 * States of inglobal:
25 * 0: ordinary - not in a global command.
26 * 1: text coming from some buffer, not tty.
27 * 2: like 1, but the source of the buffer is a global command.
28 * Hence you're only in a global command if inglobal==2. This
29 * strange sounding convention is historically derived from
30 * everybody simulating a global command.
31 */
32 if (inglobal==2)
33 error("Global within global@not allowed");
34 markDOT();
35 setall();
36 nonzero();
37 if (skipend())
38 error("Global needs re|Missing regular expression for global");
39 c = getchar();
40 ignore(compile(c, 1));
41 savere(scanre);
42 gp = globuf;
43 while ((c = getchar()) != '\n') {
44 switch (c) {
45
46 case EOF:
47 c = '\n';
48 goto brkwh;
49
50 case '\\':
51 c = getchar();
52 switch (c) {
53
54 case '\\':
55 ungetchar(c);
56 break;
57
58 case '\n':
59 break;
60
61 default:
62 *gp++ = '\\';
63 break;
64 }
65 break;
66 }
67 *gp++ = c;
68 if (gp >= &globuf[GBSIZE - 2])
69 error("Global command too long");
70 }
71brkwh:
72 ungetchar(c);
73out:
74 newline();
75 *gp++ = c;
76 *gp++ = 0;
887e3e0d 77 saveall();
22316d4f
MH
78 inglobal = 2;
79 for (a1 = one; a1 <= dol; a1++) {
80 *a1 &= ~01;
81 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
82 *a1 |= 01;
83 }
84 /* should use gdelete from ed to avoid n**2 here on g/.../d */
22316d4f
MH
85 if (inopen)
86 inopen = -1;
87 for (a1 = one; a1 <= dol; a1++) {
88 if (*a1 & 01) {
89 *a1 &= ~01;
90 dot = a1;
91 globp = globuf;
92 commands(1, 1);
93 a1 = zero;
94 }
95 }
96 globp = oglobp;
97 inglobal = oinglobal;
98 endline = 1;
99 Command = Cwas;
100 netchHAD(lines);
101 setlastchar(EOF);
102 if (inopen) {
103 ungetchar(EOF);
104 inopen = 1;
105 }
106}
107
d266c416 108bool cflag;
22316d4f
MH
109int scount, slines, stotal;
110
111substitute(c)
112 int c;
113{
114 register line *addr;
115 register int n;
116 int gsubf;
117
118 gsubf = compsub(c);
887e3e0d 119 if(FIXUNDO)
22316d4f
MH
120 save12(), undkind = UNDCHANGE;
121 stotal = 0;
122 slines = 0;
123 for (addr = addr1; addr <= addr2; addr++) {
124 scount = 0;
125 if (dosubcon(0, addr) == 0)
126 continue;
127 if (gsubf) {
128#ifdef notdef
129 /*
130 * should check but loc2 is already munged.
131 * This needs a fancier check later.
132 */
133 if (loc1 == loc2)
134 error("substitution loop");
135#endif
136 while (*loc2)
137 if (dosubcon(1, addr) == 0)
138 break;
139 }
140 if (scount) {
141 stotal += scount;
142 slines++;
143 putmark(addr);
144 n = append(getsub, addr);
145 addr += n;
146 addr2 += n;
147 }
148 }
d266c416 149 if (stotal == 0 && !inglobal && !cflag)
22316d4f
MH
150 error("Fail|Substitute pattern match failed");
151 snote(stotal, slines);
152 return (stotal);
153}
154
155compsub(ch)
156{
157 register int seof, c, uselastre;
158 static int gsubf;
159
160 if (!value(EDCOMPATIBLE))
d266c416 161 gsubf = cflag = 0;
22316d4f
MH
162 uselastre = 0;
163 switch (ch) {
164
165 case 's':
166 ignore(skipwh());
167 seof = getchar();
168 if (endcmd(seof) || any(seof, "gcr")) {
169 ungetchar(seof);
170 goto redo;
171 }
172 if (isalpha(seof) || isdigit(seof))
173 error("Substitute needs re|Missing regular expression for substitute");
174 seof = compile(seof, 1);
175 uselastre = 1;
176 comprhs(seof);
177 gsubf = 0;
d266c416 178 cflag = 0;
22316d4f
MH
179 break;
180
181 case '~':
182 uselastre = 1;
183 /* fall into ... */
184 case '&':
185 redo:
186 if (re.Expbuf[0] == 0)
187 error("No previous re|No previous regular expression");
d266c416
MH
188 if (subre.Expbuf[0] == 0)
189 error("No previous substitute re|No previous substitute to repeat");
22316d4f
MH
190 break;
191 }
192 for (;;) {
193 c = getchar();
194 switch (c) {
195
196 case 'g':
197 gsubf = !gsubf;
198 continue;
199
200 case 'c':
d266c416 201 cflag = !cflag;
22316d4f
MH
202 continue;
203
204 case 'r':
205 uselastre = 1;
206 continue;
207
208 default:
209 ungetchar(c);
210 setcount();
211 newline();
212 if (uselastre)
213 savere(subre);
214 else
215 resre(subre);
216 return (gsubf);
217 }
218 }
219}
220
221comprhs(seof)
222 int seof;
223{
224 register char *rp, *orp;
225 register int c;
226 char orhsbuf[LBSIZE / 2];
227
228 rp = rhsbuf;
229 CP(orhsbuf, rp);
230 for (;;) {
231 c = getchar();
232 if (c == seof)
233 break;
234 switch (c) {
235
236 case '\\':
237 c = getchar();
238 if (c == EOF) {
239 ungetchar(c);
240 break;
241 }
242 if (value(MAGIC)) {
243 /*
244 * When "magic", \& turns into a plain &,
245 * and all other chars work fine quoted.
246 */
247 if (c != '&')
248 c |= QUOTE;
249 break;
250 }
251magic:
252 if (c == '~') {
253 for (orp = orhsbuf; *orp; *rp++ = *orp++)
254 if (rp >= &rhsbuf[LBSIZE / 2 + 1])
255 goto toobig;
256 continue;
257 }
258 c |= QUOTE;
259 break;
260
261 case '\n':
262 case EOF:
887e3e0d
MH
263 if (!(globp && globp[0])) {
264 ungetchar(c);
265 goto endrhs;
266 }
22316d4f
MH
267
268 case '~':
269 case '&':
270 if (value(MAGIC))
271 goto magic;
272 break;
273 }
274 if (rp >= &rhsbuf[LBSIZE / 2 - 1])
275toobig:
276 error("Replacement pattern too long@- limit 256 characters");
277 *rp++ = c;
278 }
279endrhs:
280 *rp++ = 0;
281}
282
283getsub()
284{
285 register char *p;
286
287 if ((p = linebp) == 0)
288 return (EOF);
289 strcLIN(p);
290 linebp = 0;
291 return (0);
292}
293
294dosubcon(f, a)
295 bool f;
296 line *a;
297{
298
299 if (execute(f, a) == 0)
300 return (0);
301 if (confirmed(a)) {
302 dosub();
303 scount++;
304 }
305 return (1);
306}
307
308confirmed(a)
309 line *a;
310{
311 register int c, ch;
312
d266c416 313 if (cflag == 0)
22316d4f
MH
314 return (1);
315 pofix();
316 pline(lineno(a));
317 if (inopen)
318 putchar('\n' | QUOTE);
319 c = column(loc1 - 1);
320 ugo(c - 1 + (inopen ? 1 : 0), ' ');
321 ugo(column(loc2 - 1) - c, '^');
322 flush();
323 ch = c = getkey();
324again:
325 if (c == '\r')
326 c = '\n';
327 if (inopen)
328 putchar(c), flush();
329 if (c != '\n' && c != EOF) {
330 c = getkey();
331 goto again;
332 }
333 noteinp();
334 return (ch == 'y');
335}
336
337getch()
338{
339 char c;
340
341 if (read(2, &c, 1) != 1)
342 return (EOF);
343 return (c & TRIM);
344}
345
346ugo(cnt, with)
347 int with;
348 int cnt;
349{
350
351 if (cnt > 0)
352 do
353 putchar(with);
354 while (--cnt > 0);
355}
356
357int casecnt;
358bool destuc;
359
360dosub()
361{
362 register char *lp, *sp, *rp;
363 int c;
364
365 lp = linebuf;
366 sp = genbuf;
367 rp = rhsbuf;
368 while (lp < loc1)
369 *sp++ = *lp++;
370 casecnt = 0;
371 while (c = *rp++) {
372 if (c & QUOTE)
373 switch (c & TRIM) {
374
375 case '&':
376 sp = place(sp, loc1, loc2);
377 if (sp == 0)
378 goto ovflo;
379 continue;
380
381 case 'l':
382 casecnt = 1;
383 destuc = 0;
384 continue;
385
386 case 'L':
387 casecnt = LBSIZE;
388 destuc = 0;
389 continue;
390
391 case 'u':
392 casecnt = 1;
393 destuc = 1;
394 continue;
395
396 case 'U':
397 casecnt = LBSIZE;
398 destuc = 1;
399 continue;
400
401 case 'E':
402 case 'e':
403 casecnt = 0;
404 continue;
405 }
406 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
407 sp = place(sp, braslist[c - '1'], braelist[c - '1']);
408 if (sp == 0)
409 goto ovflo;
410 continue;
411 }
412 if (casecnt)
413 *sp++ = fixcase(c & TRIM);
414 else
415 *sp++ = c & TRIM;
416 if (sp >= &genbuf[LBSIZE])
417ovflo:
44232d5b 418 error("Line overflow@in substitute");
22316d4f
MH
419 }
420 lp = loc2;
421 loc2 = sp + (linebuf - genbuf);
422 while (*sp++ = *lp++)
423 if (sp >= &genbuf[LBSIZE])
424 goto ovflo;
425 strcLIN(genbuf);
426}
427
428fixcase(c)
429 register int c;
430{
431
432 if (casecnt == 0)
433 return (c);
434 casecnt--;
435 if (destuc) {
436 if (islower(c))
437 c = toupper(c);
438 } else
439 if (isupper(c))
440 c = tolower(c);
441 return (c);
442}
443
444char *
445place(sp, l1, l2)
446 register char *sp, *l1, *l2;
447{
448
449 while (l1 < l2) {
450 *sp++ = fixcase(*l1++);
451 if (sp >= &genbuf[LBSIZE])
452 return (0);
453 }
454 return (sp);
455}
456
457snote(total, lines)
458 register int total, lines;
459{
460
461 if (!notable(total))
462 return;
463 printf(mesg("%d subs|%d substitutions"), total);
464 if (lines != 1 && lines != total)
465 printf(" on %d lines", lines);
466 noonl();
467 flush();
468}
469
470compile(eof, oknl)
471 int eof;
472 int oknl;
473{
474 register int c;
475 register char *ep;
476 char *lastep;
477 char bracket[NBRA], *bracketp, *rhsp;
478 int cclcnt;
479
480 if (isalpha(eof) || isdigit(eof))
481 error("Regular expressions cannot be delimited by letters or digits");
482 ep = expbuf;
483 c = getchar();
484 if (eof == '\\')
485 switch (c) {
486
487 case '/':
488 case '?':
489 if (scanre.Expbuf[0] == 0)
490error("No previous scan re|No previous scanning regular expression");
491 resre(scanre);
492 return (c);
493
494 case '&':
495 if (subre.Expbuf[0] == 0)
496error("No previous substitute re|No previous substitute regular expression");
497 resre(subre);
498 return (c);
499
500 default:
501 error("Badly formed re|Regular expression \\ must be followed by / or ?");
502 }
503 if (c == eof || c == '\n' || c == EOF) {
504 if (*ep == 0)
505 error("No previous re|No previous regular expression");
506 if (c == '\n' && oknl == 0)
507 error("Missing closing delimiter@for regular expression");
508 if (c != eof)
509 ungetchar(c);
510 return (eof);
511 }
512 bracketp = bracket;
513 nbra = 0;
514 circfl = 0;
515 if (c == '^') {
516 c = getchar();
517 circfl++;
518 }
519 ungetchar(c);
520 for (;;) {
521 if (ep >= &expbuf[ESIZE - 2])
522complex:
523 cerror("Re too complex|Regular expression too complicated");
524 c = getchar();
525 if (c == eof || c == EOF) {
526 if (bracketp != bracket)
527cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
d266c416 528 *ep++ = CEOFC;
22316d4f
MH
529 if (c == EOF)
530 ungetchar(c);
531 return (eof);
532 }
533 if (value(MAGIC)) {
534 if (c != '*' || ep == expbuf)
535 lastep = ep;
536 } else
537 if (c != '\\' || peekchar() != '*' || ep == expbuf)
538 lastep = ep;
539 switch (c) {
540
541 case '\\':
542 c = getchar();
543 switch (c) {
544
545 case '(':
546 if (nbra >= NBRA)
547cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
548 *bracketp++ = nbra;
549 *ep++ = CBRA;
550 *ep++ = nbra++;
551 continue;
552
553 case ')':
554 if (bracketp <= bracket)
555cerror("Extra \\)|More \\)'s than \\('s in regular expression");
556 *ep++ = CKET;
557 *ep++ = *--bracketp;
558 continue;
559
560 case '<':
561 *ep++ = CBRC;
562 continue;
563
564 case '>':
565 *ep++ = CLET;
566 continue;
567 }
568 if (value(MAGIC) == 0)
569magic:
570 switch (c) {
571
572 case '.':
573 *ep++ = CDOT;
574 continue;
575
576 case '~':
577 rhsp = rhsbuf;
578 while (*rhsp) {
579 if (*rhsp & QUOTE) {
580 c = *rhsp & TRIM;
581 if (c == '&')
582error("Replacement pattern contains &@- cannot use in re");
583 if (c >= '1' && c <= '9')
584error("Replacement pattern contains \\d@- cannot use in re");
585 }
586 if (ep >= &expbuf[ESIZE-2])
587 goto complex;
588 *ep++ = CCHR;
589 *ep++ = *rhsp++ & TRIM;
590 }
591 continue;
592
593 case '*':
594 if (ep == expbuf)
595 break;
596 if (*lastep == CBRA || *lastep == CKET)
597cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
598 if (*lastep == CCHR && (lastep[1] & QUOTE))
599cerror("Illegal *|Can't * a \\n in regular expression");
600 *lastep |= STAR;
601 continue;
602
603 case '[':
604 *ep++ = CCL;
605 *ep++ = 0;
606 cclcnt = 1;
607 c = getchar();
608 if (c == '^') {
609 c = getchar();
610 ep[-2] = NCCL;
611 }
612 if (c == ']')
613cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
614 while (c != ']') {
615 if (c == '\\' && any(peekchar(), "]-^\\"))
616 c = getchar() | QUOTE;
617 if (c == '\n' || c == EOF)
618 cerror("Missing ]");
619 *ep++ = c;
620 cclcnt++;
621 if (ep >= &expbuf[ESIZE])
622 goto complex;
623 c = getchar();
624 }
625 lastep[1] = cclcnt;
626 continue;
627 }
628 if (c == EOF) {
629 ungetchar(EOF);
630 c = '\\';
631 goto defchar;
632 }
633 *ep++ = CCHR;
634 if (c == '\n')
635cerror("No newlines in re's|Can't escape newlines into regular expressions");
636/*
637 if (c < '1' || c > NBRA + '1') {
638*/
639 *ep++ = c;
640 continue;
641/*
642 }
643 c -= '1';
644 if (c >= nbra)
645cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
646 *ep++ = c | QUOTE;
647 continue;
648*/
649
650 case '\n':
651 if (oknl) {
652 ungetchar(c);
d266c416 653 *ep++ = CEOFC;
22316d4f
MH
654 return (eof);
655 }
656cerror("Badly formed re|Missing closing delimiter for regular expression");
657
658 case '$':
659 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
660 *ep++ = CDOL;
661 continue;
662 }
663 goto defchar;
664
665 case '.':
666 case '~':
667 case '*':
668 case '[':
669 if (value(MAGIC))
670 goto magic;
671defchar:
672 default:
673 *ep++ = CCHR;
674 *ep++ = c;
675 continue;
676 }
677 }
678}
679
680cerror(s)
681 char *s;
682{
683
684 expbuf[0] = 0;
685 error(s);
686}
687
688same(a, b)
689 register int a, b;
690{
691
692 return (a == b || value(IGNORECASE) &&
693 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
694}
695
696char *locs;
697
698execute(gf, addr)
699 line *addr;
700{
701 register char *p1, *p2;
702 register int c;
703
704 if (gf) {
705 if (circfl)
706 return (0);
22316d4f
MH
707 locs = p1 = loc2;
708 } else {
709 if (addr == zero)
710 return (0);
711 p1 = linebuf;
712 getline(*addr);
713 locs = 0;
714 }
715 p2 = expbuf;
716 if (circfl) {
717 loc1 = p1;
718 return (advance(p1, p2));
719 }
720 /* fast check for first character */
721 if (*p2 == CCHR) {
722 c = p2[1];
723 do {
724 if (c != *p1 && (!value(IGNORECASE) ||
725 !((islower(c) && toupper(c) == *p1) ||
726 (islower(*p1) && toupper(*p1) == c))))
727 continue;
728 if (advance(p1, p2)) {
729 loc1 = p1;
730 return (1);
731 }
732 } while (*p1++);
733 return (0);
734 }
735 /* regular algorithm */
736 do {
737 if (advance(p1, p2)) {
738 loc1 = p1;
739 return (1);
740 }
741 } while (*p1++);
742 return (0);
743}
744
745#define uletter(c) (isalpha(c) || c == '_')
746
747advance(lp, ep)
748 register char *lp, *ep;
749{
750 register char *curlp;
751 char *sp, *sp1;
752 int c;
753
754 for (;;) switch (*ep++) {
755
756 case CCHR:
757/* useless
758 if (*ep & QUOTE) {
759 c = *ep++ & TRIM;
760 sp = braslist[c];
761 sp1 = braelist[c];
762 while (sp < sp1) {
763 if (!same(*sp, *lp))
764 return (0);
765 sp++, lp++;
766 }
767 continue;
768 }
769*/
770 if (!same(*ep, *lp))
771 return (0);
772 ep++, lp++;
773 continue;
774
775 case CDOT:
776 if (*lp++)
777 continue;
778 return (0);
779
780 case CDOL:
781 if (*lp == 0)
782 continue;
783 return (0);
784
d266c416 785 case CEOFC:
22316d4f
MH
786 loc2 = lp;
787 return (1);
788
789 case CCL:
790 if (cclass(ep, *lp++, 1)) {
791 ep += *ep;
792 continue;
793 }
794 return (0);
795
796 case NCCL:
797 if (cclass(ep, *lp++, 0)) {
798 ep += *ep;
799 continue;
800 }
801 return (0);
802
803 case CBRA:
804 braslist[*ep++] = lp;
805 continue;
806
807 case CKET:
808 braelist[*ep++] = lp;
809 continue;
810
811 case CDOT|STAR:
812 curlp = lp;
813 while (*lp++)
814 continue;
815 goto star;
816
817 case CCHR|STAR:
818 curlp = lp;
819 while (same(*lp, *ep))
820 lp++;
821 lp++;
822 ep++;
823 goto star;
824
825 case CCL|STAR:
826 case NCCL|STAR:
827 curlp = lp;
828 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
829 continue;
830 ep += *ep;
831 goto star;
832star:
833 do {
834 lp--;
835 if (lp == locs)
836 break;
837 if (advance(lp, ep))
838 return (1);
839 } while (lp > curlp);
840 return (0);
841
842 case CBRC:
843 if (lp == expbuf)
844 continue;
845 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
846 continue;
847 return (0);
848
849 case CLET:
850 if (!uletter(*lp) && !isdigit(*lp))
851 continue;
852 return (0);
853
854 default:
855 error("Re internal error");
856 }
857}
858
859cclass(set, c, af)
860 register char *set;
861 register int c;
862 int af;
863{
864 register int n;
865
866 if (c == 0)
867 return (0);
868 if (value(IGNORECASE) && isupper(c))
869 c = tolower(c);
870 n = *set++;
871 while (--n)
872 if (n > 2 && set[1] == '-') {
873 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
874 return (af);
875 set += 3;
876 n -= 2;
877 } else
878 if ((*set++ & TRIM) == c)
879 return (af);
880 return (!af);
881}