release 3.3, Feb 2, 1980
[unix-history] / usr / src / usr.bin / ex / ex_re.c
CommitLineData
22316d4f
MH
1/* Copyright (c) 1979 Regents of the University of California */
2#include "ex.h"
3#include "ex_re.h"
4
5/*
6 * Global, substitute and regular expressions.
7 * Very similar to ed, with some re extensions and
8 * confirmed substitute.
9 */
10global(k)
11 bool k;
12{
13 register char *gp;
14 register int c;
15 register line *a1;
16 char globuf[GBSIZE], *Cwas;
17 int lines = lineDOL();
18 int oinglobal = inglobal;
19 char *oglobp = globp;
20
21 Cwas = Command;
22 /*
23 * States of inglobal:
24 * 0: ordinary - not in a global command.
25 * 1: text coming from some buffer, not tty.
26 * 2: like 1, but the source of the buffer is a global command.
27 * Hence you're only in a global command if inglobal==2. This
28 * strange sounding convention is historically derived from
29 * everybody simulating a global command.
30 */
31 if (inglobal==2)
32 error("Global within global@not allowed");
33 markDOT();
34 setall();
35 nonzero();
36 if (skipend())
37 error("Global needs re|Missing regular expression for global");
38 c = getchar();
39 ignore(compile(c, 1));
40 savere(scanre);
41 gp = globuf;
42 while ((c = getchar()) != '\n') {
43 switch (c) {
44
45 case EOF:
46 c = '\n';
47 goto brkwh;
48
49 case '\\':
50 c = getchar();
51 switch (c) {
52
53 case '\\':
54 ungetchar(c);
55 break;
56
57 case '\n':
58 break;
59
60 default:
61 *gp++ = '\\';
62 break;
63 }
64 break;
65 }
66 *gp++ = c;
67 if (gp >= &globuf[GBSIZE - 2])
68 error("Global command too long");
69 }
70brkwh:
71 ungetchar(c);
72out:
73 newline();
74 *gp++ = c;
75 *gp++ = 0;
887e3e0d 76 saveall();
22316d4f
MH
77 inglobal = 2;
78 for (a1 = one; a1 <= dol; a1++) {
79 *a1 &= ~01;
80 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
81 *a1 |= 01;
82 }
83 /* should use gdelete from ed to avoid n**2 here on g/.../d */
22316d4f
MH
84 if (inopen)
85 inopen = -1;
86 for (a1 = one; a1 <= dol; a1++) {
87 if (*a1 & 01) {
88 *a1 &= ~01;
89 dot = a1;
90 globp = globuf;
91 commands(1, 1);
92 a1 = zero;
93 }
94 }
95 globp = oglobp;
96 inglobal = oinglobal;
97 endline = 1;
98 Command = Cwas;
99 netchHAD(lines);
100 setlastchar(EOF);
101 if (inopen) {
102 ungetchar(EOF);
103 inopen = 1;
104 }
105}
106
107bool xflag;
108int scount, slines, stotal;
109
110substitute(c)
111 int c;
112{
113 register line *addr;
114 register int n;
115 int gsubf;
116
117 gsubf = compsub(c);
887e3e0d 118 if(FIXUNDO)
22316d4f
MH
119 save12(), undkind = UNDCHANGE;
120 stotal = 0;
121 slines = 0;
122 for (addr = addr1; addr <= addr2; addr++) {
123 scount = 0;
124 if (dosubcon(0, addr) == 0)
125 continue;
126 if (gsubf) {
127#ifdef notdef
128 /*
129 * should check but loc2 is already munged.
130 * This needs a fancier check later.
131 */
132 if (loc1 == loc2)
133 error("substitution loop");
134#endif
135 while (*loc2)
136 if (dosubcon(1, addr) == 0)
137 break;
138 }
139 if (scount) {
140 stotal += scount;
141 slines++;
142 putmark(addr);
143 n = append(getsub, addr);
144 addr += n;
145 addr2 += n;
146 }
147 }
148 if (stotal == 0 && !inglobal && !xflag)
149 error("Fail|Substitute pattern match failed");
150 snote(stotal, slines);
151 return (stotal);
152}
153
154compsub(ch)
155{
156 register int seof, c, uselastre;
157 static int gsubf;
158
159 if (!value(EDCOMPATIBLE))
160 gsubf = xflag = 0;
161 uselastre = 0;
162 switch (ch) {
163
164 case 's':
165 ignore(skipwh());
166 seof = getchar();
167 if (endcmd(seof) || any(seof, "gcr")) {
168 ungetchar(seof);
169 goto redo;
170 }
171 if (isalpha(seof) || isdigit(seof))
172 error("Substitute needs re|Missing regular expression for substitute");
173 seof = compile(seof, 1);
174 uselastre = 1;
175 comprhs(seof);
176 gsubf = 0;
177 xflag = 0;
178 break;
179
180 case '~':
181 uselastre = 1;
182 /* fall into ... */
183 case '&':
184 redo:
185 if (re.Expbuf[0] == 0)
186 error("No previous re|No previous regular expression");
187 break;
188 }
189 for (;;) {
190 c = getchar();
191 switch (c) {
192
193 case 'g':
194 gsubf = !gsubf;
195 continue;
196
197 case 'c':
198 xflag = !xflag;
199 continue;
200
201 case 'r':
202 uselastre = 1;
203 continue;
204
205 default:
206 ungetchar(c);
207 setcount();
208 newline();
209 if (uselastre)
210 savere(subre);
211 else
212 resre(subre);
213 return (gsubf);
214 }
215 }
216}
217
218comprhs(seof)
219 int seof;
220{
221 register char *rp, *orp;
222 register int c;
223 char orhsbuf[LBSIZE / 2];
224
225 rp = rhsbuf;
226 CP(orhsbuf, rp);
227 for (;;) {
228 c = getchar();
229 if (c == seof)
230 break;
231 switch (c) {
232
233 case '\\':
234 c = getchar();
235 if (c == EOF) {
236 ungetchar(c);
237 break;
238 }
239 if (value(MAGIC)) {
240 /*
241 * When "magic", \& turns into a plain &,
242 * and all other chars work fine quoted.
243 */
244 if (c != '&')
245 c |= QUOTE;
246 break;
247 }
248magic:
249 if (c == '~') {
250 for (orp = orhsbuf; *orp; *rp++ = *orp++)
251 if (rp >= &rhsbuf[LBSIZE / 2 + 1])
252 goto toobig;
253 continue;
254 }
255 c |= QUOTE;
256 break;
257
258 case '\n':
259 case EOF:
887e3e0d
MH
260 if (!(globp && globp[0])) {
261 ungetchar(c);
262 goto endrhs;
263 }
22316d4f
MH
264
265 case '~':
266 case '&':
267 if (value(MAGIC))
268 goto magic;
269 break;
270 }
271 if (rp >= &rhsbuf[LBSIZE / 2 - 1])
272toobig:
273 error("Replacement pattern too long@- limit 256 characters");
274 *rp++ = c;
275 }
276endrhs:
277 *rp++ = 0;
278}
279
280getsub()
281{
282 register char *p;
283
284 if ((p = linebp) == 0)
285 return (EOF);
286 strcLIN(p);
287 linebp = 0;
288 return (0);
289}
290
291dosubcon(f, a)
292 bool f;
293 line *a;
294{
295
296 if (execute(f, a) == 0)
297 return (0);
298 if (confirmed(a)) {
299 dosub();
300 scount++;
301 }
302 return (1);
303}
304
305confirmed(a)
306 line *a;
307{
308 register int c, ch;
309
310 if (xflag == 0)
311 return (1);
312 pofix();
313 pline(lineno(a));
314 if (inopen)
315 putchar('\n' | QUOTE);
316 c = column(loc1 - 1);
317 ugo(c - 1 + (inopen ? 1 : 0), ' ');
318 ugo(column(loc2 - 1) - c, '^');
319 flush();
320 ch = c = getkey();
321again:
322 if (c == '\r')
323 c = '\n';
324 if (inopen)
325 putchar(c), flush();
326 if (c != '\n' && c != EOF) {
327 c = getkey();
328 goto again;
329 }
330 noteinp();
331 return (ch == 'y');
332}
333
334getch()
335{
336 char c;
337
338 if (read(2, &c, 1) != 1)
339 return (EOF);
340 return (c & TRIM);
341}
342
343ugo(cnt, with)
344 int with;
345 int cnt;
346{
347
348 if (cnt > 0)
349 do
350 putchar(with);
351 while (--cnt > 0);
352}
353
354int casecnt;
355bool destuc;
356
357dosub()
358{
359 register char *lp, *sp, *rp;
360 int c;
361
362 lp = linebuf;
363 sp = genbuf;
364 rp = rhsbuf;
365 while (lp < loc1)
366 *sp++ = *lp++;
367 casecnt = 0;
368 while (c = *rp++) {
369 if (c & QUOTE)
370 switch (c & TRIM) {
371
372 case '&':
373 sp = place(sp, loc1, loc2);
374 if (sp == 0)
375 goto ovflo;
376 continue;
377
378 case 'l':
379 casecnt = 1;
380 destuc = 0;
381 continue;
382
383 case 'L':
384 casecnt = LBSIZE;
385 destuc = 0;
386 continue;
387
388 case 'u':
389 casecnt = 1;
390 destuc = 1;
391 continue;
392
393 case 'U':
394 casecnt = LBSIZE;
395 destuc = 1;
396 continue;
397
398 case 'E':
399 case 'e':
400 casecnt = 0;
401 continue;
402 }
403 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
404 sp = place(sp, braslist[c - '1'], braelist[c - '1']);
405 if (sp == 0)
406 goto ovflo;
407 continue;
408 }
409 if (casecnt)
410 *sp++ = fixcase(c & TRIM);
411 else
412 *sp++ = c & TRIM;
413 if (sp >= &genbuf[LBSIZE])
414ovflo:
44232d5b 415 error("Line overflow@in substitute");
22316d4f
MH
416 }
417 lp = loc2;
418 loc2 = sp + (linebuf - genbuf);
419 while (*sp++ = *lp++)
420 if (sp >= &genbuf[LBSIZE])
421 goto ovflo;
422 strcLIN(genbuf);
423}
424
425fixcase(c)
426 register int c;
427{
428
429 if (casecnt == 0)
430 return (c);
431 casecnt--;
432 if (destuc) {
433 if (islower(c))
434 c = toupper(c);
435 } else
436 if (isupper(c))
437 c = tolower(c);
438 return (c);
439}
440
441char *
442place(sp, l1, l2)
443 register char *sp, *l1, *l2;
444{
445
446 while (l1 < l2) {
447 *sp++ = fixcase(*l1++);
448 if (sp >= &genbuf[LBSIZE])
449 return (0);
450 }
451 return (sp);
452}
453
454snote(total, lines)
455 register int total, lines;
456{
457
458 if (!notable(total))
459 return;
460 printf(mesg("%d subs|%d substitutions"), total);
461 if (lines != 1 && lines != total)
462 printf(" on %d lines", lines);
463 noonl();
464 flush();
465}
466
467compile(eof, oknl)
468 int eof;
469 int oknl;
470{
471 register int c;
472 register char *ep;
473 char *lastep;
474 char bracket[NBRA], *bracketp, *rhsp;
475 int cclcnt;
476
477 if (isalpha(eof) || isdigit(eof))
478 error("Regular expressions cannot be delimited by letters or digits");
479 ep = expbuf;
480 c = getchar();
481 if (eof == '\\')
482 switch (c) {
483
484 case '/':
485 case '?':
486 if (scanre.Expbuf[0] == 0)
487error("No previous scan re|No previous scanning regular expression");
488 resre(scanre);
489 return (c);
490
491 case '&':
492 if (subre.Expbuf[0] == 0)
493error("No previous substitute re|No previous substitute regular expression");
494 resre(subre);
495 return (c);
496
497 default:
498 error("Badly formed re|Regular expression \\ must be followed by / or ?");
499 }
500 if (c == eof || c == '\n' || c == EOF) {
501 if (*ep == 0)
502 error("No previous re|No previous regular expression");
503 if (c == '\n' && oknl == 0)
504 error("Missing closing delimiter@for regular expression");
505 if (c != eof)
506 ungetchar(c);
507 return (eof);
508 }
509 bracketp = bracket;
510 nbra = 0;
511 circfl = 0;
512 if (c == '^') {
513 c = getchar();
514 circfl++;
515 }
516 ungetchar(c);
517 for (;;) {
518 if (ep >= &expbuf[ESIZE - 2])
519complex:
520 cerror("Re too complex|Regular expression too complicated");
521 c = getchar();
522 if (c == eof || c == EOF) {
523 if (bracketp != bracket)
524cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
525 *ep++ = CEOF;
526 if (c == EOF)
527 ungetchar(c);
528 return (eof);
529 }
530 if (value(MAGIC)) {
531 if (c != '*' || ep == expbuf)
532 lastep = ep;
533 } else
534 if (c != '\\' || peekchar() != '*' || ep == expbuf)
535 lastep = ep;
536 switch (c) {
537
538 case '\\':
539 c = getchar();
540 switch (c) {
541
542 case '(':
543 if (nbra >= NBRA)
544cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
545 *bracketp++ = nbra;
546 *ep++ = CBRA;
547 *ep++ = nbra++;
548 continue;
549
550 case ')':
551 if (bracketp <= bracket)
552cerror("Extra \\)|More \\)'s than \\('s in regular expression");
553 *ep++ = CKET;
554 *ep++ = *--bracketp;
555 continue;
556
557 case '<':
558 *ep++ = CBRC;
559 continue;
560
561 case '>':
562 *ep++ = CLET;
563 continue;
564 }
565 if (value(MAGIC) == 0)
566magic:
567 switch (c) {
568
569 case '.':
570 *ep++ = CDOT;
571 continue;
572
573 case '~':
574 rhsp = rhsbuf;
575 while (*rhsp) {
576 if (*rhsp & QUOTE) {
577 c = *rhsp & TRIM;
578 if (c == '&')
579error("Replacement pattern contains &@- cannot use in re");
580 if (c >= '1' && c <= '9')
581error("Replacement pattern contains \\d@- cannot use in re");
582 }
583 if (ep >= &expbuf[ESIZE-2])
584 goto complex;
585 *ep++ = CCHR;
586 *ep++ = *rhsp++ & TRIM;
587 }
588 continue;
589
590 case '*':
591 if (ep == expbuf)
592 break;
593 if (*lastep == CBRA || *lastep == CKET)
594cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
595 if (*lastep == CCHR && (lastep[1] & QUOTE))
596cerror("Illegal *|Can't * a \\n in regular expression");
597 *lastep |= STAR;
598 continue;
599
600 case '[':
601 *ep++ = CCL;
602 *ep++ = 0;
603 cclcnt = 1;
604 c = getchar();
605 if (c == '^') {
606 c = getchar();
607 ep[-2] = NCCL;
608 }
609 if (c == ']')
610cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
611 while (c != ']') {
612 if (c == '\\' && any(peekchar(), "]-^\\"))
613 c = getchar() | QUOTE;
614 if (c == '\n' || c == EOF)
615 cerror("Missing ]");
616 *ep++ = c;
617 cclcnt++;
618 if (ep >= &expbuf[ESIZE])
619 goto complex;
620 c = getchar();
621 }
622 lastep[1] = cclcnt;
623 continue;
624 }
625 if (c == EOF) {
626 ungetchar(EOF);
627 c = '\\';
628 goto defchar;
629 }
630 *ep++ = CCHR;
631 if (c == '\n')
632cerror("No newlines in re's|Can't escape newlines into regular expressions");
633/*
634 if (c < '1' || c > NBRA + '1') {
635*/
636 *ep++ = c;
637 continue;
638/*
639 }
640 c -= '1';
641 if (c >= nbra)
642cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
643 *ep++ = c | QUOTE;
644 continue;
645*/
646
647 case '\n':
648 if (oknl) {
649 ungetchar(c);
650 *ep++ = CEOF;
651 return (eof);
652 }
653cerror("Badly formed re|Missing closing delimiter for regular expression");
654
655 case '$':
656 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
657 *ep++ = CDOL;
658 continue;
659 }
660 goto defchar;
661
662 case '.':
663 case '~':
664 case '*':
665 case '[':
666 if (value(MAGIC))
667 goto magic;
668defchar:
669 default:
670 *ep++ = CCHR;
671 *ep++ = c;
672 continue;
673 }
674 }
675}
676
677cerror(s)
678 char *s;
679{
680
681 expbuf[0] = 0;
682 error(s);
683}
684
685same(a, b)
686 register int a, b;
687{
688
689 return (a == b || value(IGNORECASE) &&
690 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
691}
692
693char *locs;
694
695execute(gf, addr)
696 line *addr;
697{
698 register char *p1, *p2;
699 register int c;
700
701 if (gf) {
702 if (circfl)
703 return (0);
704#ifdef notdef
705 if (loc1 == loc2)
706 loc2++;
707#endif
708 locs = p1 = loc2;
709 } else {
710 if (addr == zero)
711 return (0);
712 p1 = linebuf;
713 getline(*addr);
714 locs = 0;
715 }
716 p2 = expbuf;
717 if (circfl) {
718 loc1 = p1;
719 return (advance(p1, p2));
720 }
721 /* fast check for first character */
722 if (*p2 == CCHR) {
723 c = p2[1];
724 do {
725 if (c != *p1 && (!value(IGNORECASE) ||
726 !((islower(c) && toupper(c) == *p1) ||
727 (islower(*p1) && toupper(*p1) == c))))
728 continue;
729 if (advance(p1, p2)) {
730 loc1 = p1;
731 return (1);
732 }
733 } while (*p1++);
734 return (0);
735 }
736 /* regular algorithm */
737 do {
738 if (advance(p1, p2)) {
739 loc1 = p1;
740 return (1);
741 }
742 } while (*p1++);
743 return (0);
744}
745
746#define uletter(c) (isalpha(c) || c == '_')
747
748advance(lp, ep)
749 register char *lp, *ep;
750{
751 register char *curlp;
752 char *sp, *sp1;
753 int c;
754
755 for (;;) switch (*ep++) {
756
757 case CCHR:
758/* useless
759 if (*ep & QUOTE) {
760 c = *ep++ & TRIM;
761 sp = braslist[c];
762 sp1 = braelist[c];
763 while (sp < sp1) {
764 if (!same(*sp, *lp))
765 return (0);
766 sp++, lp++;
767 }
768 continue;
769 }
770*/
771 if (!same(*ep, *lp))
772 return (0);
773 ep++, lp++;
774 continue;
775
776 case CDOT:
777 if (*lp++)
778 continue;
779 return (0);
780
781 case CDOL:
782 if (*lp == 0)
783 continue;
784 return (0);
785
786 case CEOF:
787 loc2 = lp;
788 return (1);
789
790 case CCL:
791 if (cclass(ep, *lp++, 1)) {
792 ep += *ep;
793 continue;
794 }
795 return (0);
796
797 case NCCL:
798 if (cclass(ep, *lp++, 0)) {
799 ep += *ep;
800 continue;
801 }
802 return (0);
803
804 case CBRA:
805 braslist[*ep++] = lp;
806 continue;
807
808 case CKET:
809 braelist[*ep++] = lp;
810 continue;
811
812 case CDOT|STAR:
813 curlp = lp;
814 while (*lp++)
815 continue;
816 goto star;
817
818 case CCHR|STAR:
819 curlp = lp;
820 while (same(*lp, *ep))
821 lp++;
822 lp++;
823 ep++;
824 goto star;
825
826 case CCL|STAR:
827 case NCCL|STAR:
828 curlp = lp;
829 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
830 continue;
831 ep += *ep;
832 goto star;
833star:
834 do {
835 lp--;
836 if (lp == locs)
837 break;
838 if (advance(lp, ep))
839 return (1);
840 } while (lp > curlp);
841 return (0);
842
843 case CBRC:
844 if (lp == expbuf)
845 continue;
846 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
847 continue;
848 return (0);
849
850 case CLET:
851 if (!uletter(*lp) && !isdigit(*lp))
852 continue;
853 return (0);
854
855 default:
856 error("Re internal error");
857 }
858}
859
860cclass(set, c, af)
861 register char *set;
862 register int c;
863 int af;
864{
865 register int n;
866
867 if (c == 0)
868 return (0);
869 if (value(IGNORECASE) && isupper(c))
870 c = tolower(c);
871 n = *set++;
872 while (--n)
873 if (n > 2 && set[1] == '-') {
874 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
875 return (af);
876 set += 3;
877 n -= 2;
878 } else
879 if ((*set++ & TRIM) == c)
880 return (af);
881 return (!af);
882}