monet merge
[unix-history] / usr / src / usr.bin / ex / ex_re.c
CommitLineData
299f2784 1/* Copyright (c) 1981 Regents of the University of California */
427286eb 2static char *sccsid = "@(#)ex_re.c 7.2 %G%";
22316d4f
MH
3#include "ex.h"
4#include "ex_re.h"
5
6/*
7 * Global, substitute and regular expressions.
8 * Very similar to ed, with some re extensions and
9 * confirmed substitute.
10 */
11global(k)
12 bool k;
13{
14 register char *gp;
15 register int c;
16 register line *a1;
17 char globuf[GBSIZE], *Cwas;
18 int lines = lineDOL();
19 int oinglobal = inglobal;
20 char *oglobp = globp;
21
22 Cwas = Command;
23 /*
24 * States of inglobal:
25 * 0: ordinary - not in a global command.
26 * 1: text coming from some buffer, not tty.
27 * 2: like 1, but the source of the buffer is a global command.
28 * Hence you're only in a global command if inglobal==2. This
29 * strange sounding convention is historically derived from
30 * everybody simulating a global command.
31 */
32 if (inglobal==2)
33 error("Global within global@not allowed");
34 markDOT();
35 setall();
36 nonzero();
37 if (skipend())
38 error("Global needs re|Missing regular expression for global");
39 c = getchar();
40 ignore(compile(c, 1));
41 savere(scanre);
42 gp = globuf;
43 while ((c = getchar()) != '\n') {
44 switch (c) {
45
46 case EOF:
47 c = '\n';
48 goto brkwh;
49
50 case '\\':
51 c = getchar();
52 switch (c) {
53
54 case '\\':
55 ungetchar(c);
56 break;
57
58 case '\n':
59 break;
60
61 default:
62 *gp++ = '\\';
63 break;
64 }
65 break;
66 }
67 *gp++ = c;
68 if (gp >= &globuf[GBSIZE - 2])
69 error("Global command too long");
70 }
71brkwh:
72 ungetchar(c);
73out:
74 newline();
75 *gp++ = c;
76 *gp++ = 0;
887e3e0d 77 saveall();
22316d4f
MH
78 inglobal = 2;
79 for (a1 = one; a1 <= dol; a1++) {
80 *a1 &= ~01;
81 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
82 *a1 |= 01;
83 }
f0f2d980
MH
84#ifdef notdef
85/*
86 * This code is commented out for now. The problem is that we don't
87 * fix up the undo area the way we should. Basically, I think what has
88 * to be done is to copy the undo area down (since we shrunk everything)
89 * and move the various pointers into it down too. I will do this later
90 * when I have time. (Mark, 10-20-80)
91 */
04379bab
MH
92 /*
93 * Special case: g/.../d (avoid n^2 algorithm)
94 */
95 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
96 gdelete();
97 return;
98 }
f0f2d980 99#endif
22316d4f
MH
100 if (inopen)
101 inopen = -1;
04379bab
MH
102 /*
103 * Now for each marked line, set dot there and do the commands.
104 * Note the n^2 behavior here for lots of lines matching.
105 * This is really needed: in some cases you could delete lines,
106 * causing a marked line to be moved before a1 and missed if
107 * we didn't restart at zero each time.
108 */
22316d4f
MH
109 for (a1 = one; a1 <= dol; a1++) {
110 if (*a1 & 01) {
111 *a1 &= ~01;
112 dot = a1;
113 globp = globuf;
114 commands(1, 1);
115 a1 = zero;
116 }
117 }
118 globp = oglobp;
119 inglobal = oinglobal;
120 endline = 1;
121 Command = Cwas;
122 netchHAD(lines);
123 setlastchar(EOF);
124 if (inopen) {
125 ungetchar(EOF);
126 inopen = 1;
127 }
128}
129
04379bab
MH
130/*
131 * gdelete: delete inside a global command. Handles the
132 * special case g/r.e./d. All lines to be deleted have
133 * already been marked. Squeeze the remaining lines together.
134 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
135 * and g/r.e./.,/r.e.2/d are not treated specially. There is no
136 * good reason for this except the question: where to you draw the line?
137 */
138gdelete()
139{
140 register line *a1, *a2, *a3;
141
142 a3 = dol;
143 /* find first marked line. can skip all before it */
144 for (a1=zero; (*a1&01)==0; a1++)
145 if (a1>=a3)
146 return;
147 /* copy down unmarked lines, compacting as we go. */
148 for (a2=a1+1; a2<=a3;) {
149 if (*a2&01) {
150 a2++; /* line is marked, skip it */
151 dot = a1; /* dot left after line deletion */
152 } else
153 *a1++ = *a2++; /* unmarked, copy it */
154 }
155 dol = a1-1;
156 if (dot>dol)
157 dot = dol;
158 change();
159}
160
d266c416 161bool cflag;
22316d4f
MH
162int scount, slines, stotal;
163
164substitute(c)
165 int c;
166{
167 register line *addr;
168 register int n;
427286eb 169 int gsubf, hopcount;
22316d4f
MH
170
171 gsubf = compsub(c);
887e3e0d 172 if(FIXUNDO)
22316d4f
MH
173 save12(), undkind = UNDCHANGE;
174 stotal = 0;
175 slines = 0;
176 for (addr = addr1; addr <= addr2; addr++) {
427286eb 177 scount = hopcount = 0;
22316d4f
MH
178 if (dosubcon(0, addr) == 0)
179 continue;
180 if (gsubf) {
22316d4f 181 /*
04379bab
MH
182 * The loop can happen from s/\</&/g
183 * but we don't want to break other, reasonable cases.
22316d4f 184 */
04379bab
MH
185 while (*loc2) {
186 if (++hopcount > sizeof linebuf)
187 error("substitution loop");
22316d4f
MH
188 if (dosubcon(1, addr) == 0)
189 break;
04379bab 190 }
22316d4f
MH
191 }
192 if (scount) {
193 stotal += scount;
194 slines++;
195 putmark(addr);
196 n = append(getsub, addr);
197 addr += n;
198 addr2 += n;
199 }
200 }
d266c416 201 if (stotal == 0 && !inglobal && !cflag)
22316d4f
MH
202 error("Fail|Substitute pattern match failed");
203 snote(stotal, slines);
204 return (stotal);
205}
206
207compsub(ch)
208{
209 register int seof, c, uselastre;
210 static int gsubf;
211
212 if (!value(EDCOMPATIBLE))
d266c416 213 gsubf = cflag = 0;
22316d4f
MH
214 uselastre = 0;
215 switch (ch) {
216
217 case 's':
218 ignore(skipwh());
219 seof = getchar();
220 if (endcmd(seof) || any(seof, "gcr")) {
221 ungetchar(seof);
222 goto redo;
223 }
224 if (isalpha(seof) || isdigit(seof))
225 error("Substitute needs re|Missing regular expression for substitute");
226 seof = compile(seof, 1);
227 uselastre = 1;
228 comprhs(seof);
229 gsubf = 0;
d266c416 230 cflag = 0;
22316d4f
MH
231 break;
232
233 case '~':
234 uselastre = 1;
235 /* fall into ... */
236 case '&':
237 redo:
238 if (re.Expbuf[0] == 0)
239 error("No previous re|No previous regular expression");
d266c416
MH
240 if (subre.Expbuf[0] == 0)
241 error("No previous substitute re|No previous substitute to repeat");
22316d4f
MH
242 break;
243 }
244 for (;;) {
245 c = getchar();
246 switch (c) {
247
248 case 'g':
249 gsubf = !gsubf;
250 continue;
251
252 case 'c':
d266c416 253 cflag = !cflag;
22316d4f
MH
254 continue;
255
256 case 'r':
257 uselastre = 1;
258 continue;
259
260 default:
261 ungetchar(c);
262 setcount();
263 newline();
264 if (uselastre)
265 savere(subre);
266 else
267 resre(subre);
268 return (gsubf);
269 }
270 }
271}
272
273comprhs(seof)
274 int seof;
275{
276 register char *rp, *orp;
277 register int c;
04379bab 278 char orhsbuf[RHSSIZE];
22316d4f
MH
279
280 rp = rhsbuf;
281 CP(orhsbuf, rp);
282 for (;;) {
283 c = getchar();
284 if (c == seof)
285 break;
286 switch (c) {
287
288 case '\\':
289 c = getchar();
290 if (c == EOF) {
291 ungetchar(c);
292 break;
293 }
294 if (value(MAGIC)) {
295 /*
296 * When "magic", \& turns into a plain &,
297 * and all other chars work fine quoted.
298 */
299 if (c != '&')
300 c |= QUOTE;
301 break;
302 }
303magic:
304 if (c == '~') {
305 for (orp = orhsbuf; *orp; *rp++ = *orp++)
04379bab 306 if (rp >= &rhsbuf[RHSSIZE - 1])
22316d4f
MH
307 goto toobig;
308 continue;
309 }
310 c |= QUOTE;
311 break;
312
313 case '\n':
314 case EOF:
887e3e0d
MH
315 if (!(globp && globp[0])) {
316 ungetchar(c);
317 goto endrhs;
318 }
22316d4f
MH
319
320 case '~':
321 case '&':
322 if (value(MAGIC))
323 goto magic;
324 break;
325 }
04379bab 326 if (rp >= &rhsbuf[RHSSIZE - 1]) {
22316d4f 327toobig:
04379bab 328 *rp = 0;
22316d4f 329 error("Replacement pattern too long@- limit 256 characters");
04379bab 330 }
22316d4f
MH
331 *rp++ = c;
332 }
333endrhs:
334 *rp++ = 0;
335}
336
337getsub()
338{
339 register char *p;
340
341 if ((p = linebp) == 0)
342 return (EOF);
343 strcLIN(p);
344 linebp = 0;
345 return (0);
346}
347
348dosubcon(f, a)
349 bool f;
350 line *a;
351{
352
353 if (execute(f, a) == 0)
354 return (0);
355 if (confirmed(a)) {
356 dosub();
357 scount++;
358 }
359 return (1);
360}
361
362confirmed(a)
363 line *a;
364{
365 register int c, ch;
366
d266c416 367 if (cflag == 0)
22316d4f
MH
368 return (1);
369 pofix();
370 pline(lineno(a));
371 if (inopen)
372 putchar('\n' | QUOTE);
373 c = column(loc1 - 1);
374 ugo(c - 1 + (inopen ? 1 : 0), ' ');
375 ugo(column(loc2 - 1) - c, '^');
376 flush();
377 ch = c = getkey();
378again:
379 if (c == '\r')
380 c = '\n';
381 if (inopen)
382 putchar(c), flush();
383 if (c != '\n' && c != EOF) {
384 c = getkey();
385 goto again;
386 }
387 noteinp();
388 return (ch == 'y');
389}
390
391getch()
392{
393 char c;
394
395 if (read(2, &c, 1) != 1)
396 return (EOF);
397 return (c & TRIM);
398}
399
400ugo(cnt, with)
401 int with;
402 int cnt;
403{
404
405 if (cnt > 0)
406 do
407 putchar(with);
408 while (--cnt > 0);
409}
410
411int casecnt;
412bool destuc;
413
414dosub()
415{
416 register char *lp, *sp, *rp;
417 int c;
418
419 lp = linebuf;
420 sp = genbuf;
421 rp = rhsbuf;
422 while (lp < loc1)
423 *sp++ = *lp++;
424 casecnt = 0;
425 while (c = *rp++) {
299f2784
MH
426 /* ^V <return> from vi to split lines */
427 if (c == '\r')
428 c = '\n';
429
22316d4f
MH
430 if (c & QUOTE)
431 switch (c & TRIM) {
432
433 case '&':
434 sp = place(sp, loc1, loc2);
435 if (sp == 0)
436 goto ovflo;
437 continue;
438
439 case 'l':
440 casecnt = 1;
441 destuc = 0;
442 continue;
443
444 case 'L':
445 casecnt = LBSIZE;
446 destuc = 0;
447 continue;
448
449 case 'u':
450 casecnt = 1;
451 destuc = 1;
452 continue;
453
454 case 'U':
455 casecnt = LBSIZE;
456 destuc = 1;
457 continue;
458
459 case 'E':
460 case 'e':
461 casecnt = 0;
462 continue;
463 }
464 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
465 sp = place(sp, braslist[c - '1'], braelist[c - '1']);
466 if (sp == 0)
467 goto ovflo;
468 continue;
469 }
470 if (casecnt)
471 *sp++ = fixcase(c & TRIM);
472 else
473 *sp++ = c & TRIM;
474 if (sp >= &genbuf[LBSIZE])
475ovflo:
44232d5b 476 error("Line overflow@in substitute");
22316d4f
MH
477 }
478 lp = loc2;
479 loc2 = sp + (linebuf - genbuf);
480 while (*sp++ = *lp++)
481 if (sp >= &genbuf[LBSIZE])
482 goto ovflo;
483 strcLIN(genbuf);
484}
485
486fixcase(c)
487 register int c;
488{
489
490 if (casecnt == 0)
491 return (c);
492 casecnt--;
493 if (destuc) {
494 if (islower(c))
495 c = toupper(c);
496 } else
497 if (isupper(c))
498 c = tolower(c);
499 return (c);
500}
501
502char *
503place(sp, l1, l2)
504 register char *sp, *l1, *l2;
505{
506
507 while (l1 < l2) {
508 *sp++ = fixcase(*l1++);
509 if (sp >= &genbuf[LBSIZE])
510 return (0);
511 }
512 return (sp);
513}
514
515snote(total, lines)
516 register int total, lines;
517{
518
519 if (!notable(total))
520 return;
521 printf(mesg("%d subs|%d substitutions"), total);
522 if (lines != 1 && lines != total)
523 printf(" on %d lines", lines);
524 noonl();
525 flush();
526}
527
528compile(eof, oknl)
529 int eof;
530 int oknl;
531{
532 register int c;
533 register char *ep;
534 char *lastep;
535 char bracket[NBRA], *bracketp, *rhsp;
536 int cclcnt;
537
538 if (isalpha(eof) || isdigit(eof))
539 error("Regular expressions cannot be delimited by letters or digits");
540 ep = expbuf;
541 c = getchar();
542 if (eof == '\\')
543 switch (c) {
544
545 case '/':
546 case '?':
547 if (scanre.Expbuf[0] == 0)
548error("No previous scan re|No previous scanning regular expression");
549 resre(scanre);
550 return (c);
551
552 case '&':
553 if (subre.Expbuf[0] == 0)
554error("No previous substitute re|No previous substitute regular expression");
555 resre(subre);
556 return (c);
557
558 default:
559 error("Badly formed re|Regular expression \\ must be followed by / or ?");
560 }
561 if (c == eof || c == '\n' || c == EOF) {
562 if (*ep == 0)
563 error("No previous re|No previous regular expression");
564 if (c == '\n' && oknl == 0)
565 error("Missing closing delimiter@for regular expression");
566 if (c != eof)
567 ungetchar(c);
568 return (eof);
569 }
570 bracketp = bracket;
571 nbra = 0;
572 circfl = 0;
573 if (c == '^') {
574 c = getchar();
575 circfl++;
576 }
577 ungetchar(c);
578 for (;;) {
579 if (ep >= &expbuf[ESIZE - 2])
580complex:
581 cerror("Re too complex|Regular expression too complicated");
582 c = getchar();
583 if (c == eof || c == EOF) {
584 if (bracketp != bracket)
585cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
d266c416 586 *ep++ = CEOFC;
22316d4f
MH
587 if (c == EOF)
588 ungetchar(c);
589 return (eof);
590 }
591 if (value(MAGIC)) {
592 if (c != '*' || ep == expbuf)
593 lastep = ep;
594 } else
595 if (c != '\\' || peekchar() != '*' || ep == expbuf)
596 lastep = ep;
597 switch (c) {
598
599 case '\\':
600 c = getchar();
601 switch (c) {
602
603 case '(':
604 if (nbra >= NBRA)
605cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
606 *bracketp++ = nbra;
607 *ep++ = CBRA;
608 *ep++ = nbra++;
609 continue;
610
611 case ')':
612 if (bracketp <= bracket)
613cerror("Extra \\)|More \\)'s than \\('s in regular expression");
614 *ep++ = CKET;
615 *ep++ = *--bracketp;
616 continue;
617
618 case '<':
619 *ep++ = CBRC;
620 continue;
621
622 case '>':
623 *ep++ = CLET;
624 continue;
625 }
626 if (value(MAGIC) == 0)
627magic:
628 switch (c) {
629
630 case '.':
631 *ep++ = CDOT;
632 continue;
633
634 case '~':
635 rhsp = rhsbuf;
636 while (*rhsp) {
637 if (*rhsp & QUOTE) {
638 c = *rhsp & TRIM;
639 if (c == '&')
640error("Replacement pattern contains &@- cannot use in re");
641 if (c >= '1' && c <= '9')
642error("Replacement pattern contains \\d@- cannot use in re");
643 }
644 if (ep >= &expbuf[ESIZE-2])
645 goto complex;
646 *ep++ = CCHR;
647 *ep++ = *rhsp++ & TRIM;
648 }
649 continue;
650
651 case '*':
652 if (ep == expbuf)
653 break;
654 if (*lastep == CBRA || *lastep == CKET)
655cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
656 if (*lastep == CCHR && (lastep[1] & QUOTE))
657cerror("Illegal *|Can't * a \\n in regular expression");
658 *lastep |= STAR;
659 continue;
660
661 case '[':
662 *ep++ = CCL;
663 *ep++ = 0;
664 cclcnt = 1;
665 c = getchar();
666 if (c == '^') {
667 c = getchar();
668 ep[-2] = NCCL;
669 }
670 if (c == ']')
671cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
672 while (c != ']') {
673 if (c == '\\' && any(peekchar(), "]-^\\"))
674 c = getchar() | QUOTE;
675 if (c == '\n' || c == EOF)
676 cerror("Missing ]");
677 *ep++ = c;
678 cclcnt++;
679 if (ep >= &expbuf[ESIZE])
680 goto complex;
681 c = getchar();
682 }
683 lastep[1] = cclcnt;
684 continue;
685 }
686 if (c == EOF) {
687 ungetchar(EOF);
688 c = '\\';
689 goto defchar;
690 }
691 *ep++ = CCHR;
692 if (c == '\n')
693cerror("No newlines in re's|Can't escape newlines into regular expressions");
694/*
695 if (c < '1' || c > NBRA + '1') {
696*/
697 *ep++ = c;
698 continue;
699/*
700 }
701 c -= '1';
702 if (c >= nbra)
703cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
704 *ep++ = c | QUOTE;
705 continue;
706*/
707
708 case '\n':
709 if (oknl) {
710 ungetchar(c);
d266c416 711 *ep++ = CEOFC;
22316d4f
MH
712 return (eof);
713 }
714cerror("Badly formed re|Missing closing delimiter for regular expression");
715
716 case '$':
717 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
718 *ep++ = CDOL;
719 continue;
720 }
721 goto defchar;
722
723 case '.':
724 case '~':
725 case '*':
726 case '[':
727 if (value(MAGIC))
728 goto magic;
729defchar:
730 default:
731 *ep++ = CCHR;
732 *ep++ = c;
733 continue;
734 }
735 }
736}
737
738cerror(s)
739 char *s;
740{
741
742 expbuf[0] = 0;
743 error(s);
744}
745
746same(a, b)
747 register int a, b;
748{
749
750 return (a == b || value(IGNORECASE) &&
751 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
752}
753
754char *locs;
755
756execute(gf, addr)
757 line *addr;
758{
759 register char *p1, *p2;
760 register int c;
761
762 if (gf) {
763 if (circfl)
764 return (0);
22316d4f
MH
765 locs = p1 = loc2;
766 } else {
767 if (addr == zero)
768 return (0);
769 p1 = linebuf;
770 getline(*addr);
771 locs = 0;
772 }
773 p2 = expbuf;
774 if (circfl) {
775 loc1 = p1;
776 return (advance(p1, p2));
777 }
778 /* fast check for first character */
779 if (*p2 == CCHR) {
780 c = p2[1];
781 do {
782 if (c != *p1 && (!value(IGNORECASE) ||
783 !((islower(c) && toupper(c) == *p1) ||
784 (islower(*p1) && toupper(*p1) == c))))
785 continue;
786 if (advance(p1, p2)) {
787 loc1 = p1;
788 return (1);
789 }
790 } while (*p1++);
791 return (0);
792 }
793 /* regular algorithm */
794 do {
795 if (advance(p1, p2)) {
796 loc1 = p1;
797 return (1);
798 }
799 } while (*p1++);
800 return (0);
801}
802
803#define uletter(c) (isalpha(c) || c == '_')
804
805advance(lp, ep)
806 register char *lp, *ep;
807{
808 register char *curlp;
809 char *sp, *sp1;
810 int c;
811
812 for (;;) switch (*ep++) {
813
814 case CCHR:
815/* useless
816 if (*ep & QUOTE) {
817 c = *ep++ & TRIM;
818 sp = braslist[c];
819 sp1 = braelist[c];
820 while (sp < sp1) {
821 if (!same(*sp, *lp))
822 return (0);
823 sp++, lp++;
824 }
825 continue;
826 }
827*/
828 if (!same(*ep, *lp))
829 return (0);
830 ep++, lp++;
831 continue;
832
833 case CDOT:
834 if (*lp++)
835 continue;
836 return (0);
837
838 case CDOL:
839 if (*lp == 0)
840 continue;
841 return (0);
842
d266c416 843 case CEOFC:
22316d4f
MH
844 loc2 = lp;
845 return (1);
846
847 case CCL:
848 if (cclass(ep, *lp++, 1)) {
849 ep += *ep;
850 continue;
851 }
852 return (0);
853
854 case NCCL:
855 if (cclass(ep, *lp++, 0)) {
856 ep += *ep;
857 continue;
858 }
859 return (0);
860
861 case CBRA:
862 braslist[*ep++] = lp;
863 continue;
864
865 case CKET:
866 braelist[*ep++] = lp;
867 continue;
868
869 case CDOT|STAR:
870 curlp = lp;
871 while (*lp++)
872 continue;
873 goto star;
874
875 case CCHR|STAR:
876 curlp = lp;
877 while (same(*lp, *ep))
878 lp++;
879 lp++;
880 ep++;
881 goto star;
882
883 case CCL|STAR:
884 case NCCL|STAR:
885 curlp = lp;
886 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
887 continue;
888 ep += *ep;
889 goto star;
890star:
891 do {
892 lp--;
893 if (lp == locs)
894 break;
895 if (advance(lp, ep))
896 return (1);
897 } while (lp > curlp);
898 return (0);
899
900 case CBRC:
901 if (lp == expbuf)
902 continue;
903 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
904 continue;
905 return (0);
906
907 case CLET:
908 if (!uletter(*lp) && !isdigit(*lp))
909 continue;
910 return (0);
911
912 default:
913 error("Re internal error");
914 }
915}
916
917cclass(set, c, af)
918 register char *set;
919 register int c;
920 int af;
921{
922 register int n;
923
924 if (c == 0)
925 return (0);
926 if (value(IGNORECASE) && isupper(c))
927 c = tolower(c);
928 n = *set++;
929 while (--n)
930 if (n > 2 && set[1] == '-') {
931 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
932 return (af);
933 set += 3;
934 n -= 2;
935 } else
936 if ((*set++ & TRIM) == c)
937 return (af);
938 return (!af);
939}