BSD 4 release
[unix-history] / usr / src / cmd / ex / ex_re.c
CommitLineData
7c4625ef 1/* Copyright (c) 1980 Regents of the University of California */
31cef89c 2static char *sccsid = "@(#)ex_re.c 6.2 10/23/80";
22316d4f
MH
3#include "ex.h"
4#include "ex_re.h"
5
6/*
7 * Global, substitute and regular expressions.
8 * Very similar to ed, with some re extensions and
9 * confirmed substitute.
10 */
11global(k)
12 bool k;
13{
14 register char *gp;
15 register int c;
16 register line *a1;
17 char globuf[GBSIZE], *Cwas;
18 int lines = lineDOL();
19 int oinglobal = inglobal;
20 char *oglobp = globp;
21
22 Cwas = Command;
23 /*
24 * States of inglobal:
25 * 0: ordinary - not in a global command.
26 * 1: text coming from some buffer, not tty.
27 * 2: like 1, but the source of the buffer is a global command.
28 * Hence you're only in a global command if inglobal==2. This
29 * strange sounding convention is historically derived from
30 * everybody simulating a global command.
31 */
32 if (inglobal==2)
33 error("Global within global@not allowed");
34 markDOT();
35 setall();
36 nonzero();
37 if (skipend())
38 error("Global needs re|Missing regular expression for global");
39 c = getchar();
40 ignore(compile(c, 1));
41 savere(scanre);
42 gp = globuf;
43 while ((c = getchar()) != '\n') {
44 switch (c) {
45
46 case EOF:
47 c = '\n';
48 goto brkwh;
49
50 case '\\':
51 c = getchar();
52 switch (c) {
53
54 case '\\':
55 ungetchar(c);
56 break;
57
58 case '\n':
59 break;
60
61 default:
62 *gp++ = '\\';
63 break;
64 }
65 break;
66 }
67 *gp++ = c;
68 if (gp >= &globuf[GBSIZE - 2])
69 error("Global command too long");
70 }
71brkwh:
72 ungetchar(c);
73out:
74 newline();
75 *gp++ = c;
76 *gp++ = 0;
887e3e0d 77 saveall();
22316d4f
MH
78 inglobal = 2;
79 for (a1 = one; a1 <= dol; a1++) {
80 *a1 &= ~01;
81 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
82 *a1 |= 01;
83 }
f0f2d980
MH
84#ifdef notdef
85/*
86 * This code is commented out for now. The problem is that we don't
87 * fix up the undo area the way we should. Basically, I think what has
88 * to be done is to copy the undo area down (since we shrunk everything)
89 * and move the various pointers into it down too. I will do this later
90 * when I have time. (Mark, 10-20-80)
91 */
04379bab
MH
92 /*
93 * Special case: g/.../d (avoid n^2 algorithm)
94 */
95 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
96 gdelete();
97 return;
98 }
f0f2d980 99#endif
22316d4f
MH
100 if (inopen)
101 inopen = -1;
04379bab
MH
102 /*
103 * Now for each marked line, set dot there and do the commands.
104 * Note the n^2 behavior here for lots of lines matching.
105 * This is really needed: in some cases you could delete lines,
106 * causing a marked line to be moved before a1 and missed if
107 * we didn't restart at zero each time.
108 */
22316d4f
MH
109 for (a1 = one; a1 <= dol; a1++) {
110 if (*a1 & 01) {
111 *a1 &= ~01;
112 dot = a1;
113 globp = globuf;
114 commands(1, 1);
115 a1 = zero;
116 }
117 }
118 globp = oglobp;
119 inglobal = oinglobal;
120 endline = 1;
121 Command = Cwas;
122 netchHAD(lines);
123 setlastchar(EOF);
124 if (inopen) {
125 ungetchar(EOF);
126 inopen = 1;
127 }
128}
129
04379bab
MH
130/*
131 * gdelete: delete inside a global command. Handles the
132 * special case g/r.e./d. All lines to be deleted have
133 * already been marked. Squeeze the remaining lines together.
134 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
135 * and g/r.e./.,/r.e.2/d are not treated specially. There is no
136 * good reason for this except the question: where to you draw the line?
137 */
138gdelete()
139{
140 register line *a1, *a2, *a3;
141
142 a3 = dol;
143 /* find first marked line. can skip all before it */
144 for (a1=zero; (*a1&01)==0; a1++)
145 if (a1>=a3)
146 return;
147 /* copy down unmarked lines, compacting as we go. */
148 for (a2=a1+1; a2<=a3;) {
149 if (*a2&01) {
150 a2++; /* line is marked, skip it */
151 dot = a1; /* dot left after line deletion */
152 } else
153 *a1++ = *a2++; /* unmarked, copy it */
154 }
155 dol = a1-1;
156 if (dot>dol)
157 dot = dol;
158 change();
159}
160
d266c416 161bool cflag;
22316d4f
MH
162int scount, slines, stotal;
163
164substitute(c)
165 int c;
166{
167 register line *addr;
168 register int n;
04379bab 169 int gsubf, hopcount = 0;
22316d4f
MH
170
171 gsubf = compsub(c);
887e3e0d 172 if(FIXUNDO)
22316d4f
MH
173 save12(), undkind = UNDCHANGE;
174 stotal = 0;
175 slines = 0;
176 for (addr = addr1; addr <= addr2; addr++) {
177 scount = 0;
178 if (dosubcon(0, addr) == 0)
179 continue;
180 if (gsubf) {
22316d4f 181 /*
04379bab
MH
182 * The loop can happen from s/\</&/g
183 * but we don't want to break other, reasonable cases.
22316d4f 184 */
04379bab
MH
185 while (*loc2) {
186 if (++hopcount > sizeof linebuf)
187 error("substitution loop");
22316d4f
MH
188 if (dosubcon(1, addr) == 0)
189 break;
04379bab 190 }
22316d4f
MH
191 }
192 if (scount) {
193 stotal += scount;
194 slines++;
195 putmark(addr);
196 n = append(getsub, addr);
197 addr += n;
198 addr2 += n;
199 }
200 }
d266c416 201 if (stotal == 0 && !inglobal && !cflag)
22316d4f
MH
202 error("Fail|Substitute pattern match failed");
203 snote(stotal, slines);
204 return (stotal);
205}
206
207compsub(ch)
208{
209 register int seof, c, uselastre;
210 static int gsubf;
211
212 if (!value(EDCOMPATIBLE))
d266c416 213 gsubf = cflag = 0;
22316d4f
MH
214 uselastre = 0;
215 switch (ch) {
216
217 case 's':
218 ignore(skipwh());
219 seof = getchar();
220 if (endcmd(seof) || any(seof, "gcr")) {
221 ungetchar(seof);
222 goto redo;
223 }
224 if (isalpha(seof) || isdigit(seof))
225 error("Substitute needs re|Missing regular expression for substitute");
226 seof = compile(seof, 1);
227 uselastre = 1;
228 comprhs(seof);
229 gsubf = 0;
d266c416 230 cflag = 0;
22316d4f
MH
231 break;
232
233 case '~':
234 uselastre = 1;
235 /* fall into ... */
236 case '&':
237 redo:
238 if (re.Expbuf[0] == 0)
239 error("No previous re|No previous regular expression");
d266c416
MH
240 if (subre.Expbuf[0] == 0)
241 error("No previous substitute re|No previous substitute to repeat");
22316d4f
MH
242 break;
243 }
244 for (;;) {
245 c = getchar();
246 switch (c) {
247
248 case 'g':
249 gsubf = !gsubf;
250 continue;
251
252 case 'c':
d266c416 253 cflag = !cflag;
22316d4f
MH
254 continue;
255
256 case 'r':
257 uselastre = 1;
258 continue;
259
260 default:
261 ungetchar(c);
262 setcount();
263 newline();
264 if (uselastre)
265 savere(subre);
266 else
267 resre(subre);
268 return (gsubf);
269 }
270 }
271}
272
273comprhs(seof)
274 int seof;
275{
276 register char *rp, *orp;
277 register int c;
04379bab 278 char orhsbuf[RHSSIZE];
22316d4f
MH
279
280 rp = rhsbuf;
281 CP(orhsbuf, rp);
282 for (;;) {
283 c = getchar();
284 if (c == seof)
285 break;
286 switch (c) {
287
288 case '\\':
289 c = getchar();
290 if (c == EOF) {
291 ungetchar(c);
292 break;
293 }
294 if (value(MAGIC)) {
295 /*
296 * When "magic", \& turns into a plain &,
297 * and all other chars work fine quoted.
298 */
299 if (c != '&')
300 c |= QUOTE;
301 break;
302 }
303magic:
304 if (c == '~') {
305 for (orp = orhsbuf; *orp; *rp++ = *orp++)
04379bab 306 if (rp >= &rhsbuf[RHSSIZE - 1])
22316d4f
MH
307 goto toobig;
308 continue;
309 }
310 c |= QUOTE;
311 break;
312
313 case '\n':
314 case EOF:
887e3e0d
MH
315 if (!(globp && globp[0])) {
316 ungetchar(c);
317 goto endrhs;
318 }
22316d4f
MH
319
320 case '~':
321 case '&':
322 if (value(MAGIC))
323 goto magic;
324 break;
325 }
04379bab 326 if (rp >= &rhsbuf[RHSSIZE - 1]) {
22316d4f 327toobig:
04379bab 328 *rp = 0;
22316d4f 329 error("Replacement pattern too long@- limit 256 characters");
04379bab 330 }
22316d4f
MH
331 *rp++ = c;
332 }
333endrhs:
334 *rp++ = 0;
335}
336
337getsub()
338{
339 register char *p;
340
341 if ((p = linebp) == 0)
342 return (EOF);
343 strcLIN(p);
344 linebp = 0;
345 return (0);
346}
347
348dosubcon(f, a)
349 bool f;
350 line *a;
351{
352
353 if (execute(f, a) == 0)
354 return (0);
355 if (confirmed(a)) {
356 dosub();
357 scount++;
358 }
359 return (1);
360}
361
362confirmed(a)
363 line *a;
364{
365 register int c, ch;
366
d266c416 367 if (cflag == 0)
22316d4f
MH
368 return (1);
369 pofix();
370 pline(lineno(a));
371 if (inopen)
372 putchar('\n' | QUOTE);
373 c = column(loc1 - 1);
374 ugo(c - 1 + (inopen ? 1 : 0), ' ');
375 ugo(column(loc2 - 1) - c, '^');
376 flush();
377 ch = c = getkey();
378again:
379 if (c == '\r')
380 c = '\n';
381 if (inopen)
382 putchar(c), flush();
383 if (c != '\n' && c != EOF) {
384 c = getkey();
385 goto again;
386 }
387 noteinp();
388 return (ch == 'y');
389}
390
391getch()
392{
393 char c;
394
395 if (read(2, &c, 1) != 1)
396 return (EOF);
397 return (c & TRIM);
398}
399
400ugo(cnt, with)
401 int with;
402 int cnt;
403{
404
405 if (cnt > 0)
406 do
407 putchar(with);
408 while (--cnt > 0);
409}
410
411int casecnt;
412bool destuc;
413
414dosub()
415{
416 register char *lp, *sp, *rp;
417 int c;
418
419 lp = linebuf;
420 sp = genbuf;
421 rp = rhsbuf;
422 while (lp < loc1)
423 *sp++ = *lp++;
424 casecnt = 0;
425 while (c = *rp++) {
426 if (c & QUOTE)
427 switch (c & TRIM) {
428
429 case '&':
430 sp = place(sp, loc1, loc2);
431 if (sp == 0)
432 goto ovflo;
433 continue;
434
435 case 'l':
436 casecnt = 1;
437 destuc = 0;
438 continue;
439
440 case 'L':
441 casecnt = LBSIZE;
442 destuc = 0;
443 continue;
444
445 case 'u':
446 casecnt = 1;
447 destuc = 1;
448 continue;
449
450 case 'U':
451 casecnt = LBSIZE;
452 destuc = 1;
453 continue;
454
455 case 'E':
456 case 'e':
457 casecnt = 0;
458 continue;
459 }
460 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
461 sp = place(sp, braslist[c - '1'], braelist[c - '1']);
462 if (sp == 0)
463 goto ovflo;
464 continue;
465 }
466 if (casecnt)
467 *sp++ = fixcase(c & TRIM);
468 else
469 *sp++ = c & TRIM;
470 if (sp >= &genbuf[LBSIZE])
471ovflo:
44232d5b 472 error("Line overflow@in substitute");
22316d4f
MH
473 }
474 lp = loc2;
475 loc2 = sp + (linebuf - genbuf);
476 while (*sp++ = *lp++)
477 if (sp >= &genbuf[LBSIZE])
478 goto ovflo;
479 strcLIN(genbuf);
480}
481
482fixcase(c)
483 register int c;
484{
485
486 if (casecnt == 0)
487 return (c);
488 casecnt--;
489 if (destuc) {
490 if (islower(c))
491 c = toupper(c);
492 } else
493 if (isupper(c))
494 c = tolower(c);
495 return (c);
496}
497
498char *
499place(sp, l1, l2)
500 register char *sp, *l1, *l2;
501{
502
503 while (l1 < l2) {
504 *sp++ = fixcase(*l1++);
505 if (sp >= &genbuf[LBSIZE])
506 return (0);
507 }
508 return (sp);
509}
510
511snote(total, lines)
512 register int total, lines;
513{
514
515 if (!notable(total))
516 return;
517 printf(mesg("%d subs|%d substitutions"), total);
518 if (lines != 1 && lines != total)
519 printf(" on %d lines", lines);
520 noonl();
521 flush();
522}
523
524compile(eof, oknl)
525 int eof;
526 int oknl;
527{
528 register int c;
529 register char *ep;
530 char *lastep;
531 char bracket[NBRA], *bracketp, *rhsp;
532 int cclcnt;
533
534 if (isalpha(eof) || isdigit(eof))
535 error("Regular expressions cannot be delimited by letters or digits");
536 ep = expbuf;
537 c = getchar();
538 if (eof == '\\')
539 switch (c) {
540
541 case '/':
542 case '?':
543 if (scanre.Expbuf[0] == 0)
544error("No previous scan re|No previous scanning regular expression");
545 resre(scanre);
546 return (c);
547
548 case '&':
549 if (subre.Expbuf[0] == 0)
550error("No previous substitute re|No previous substitute regular expression");
551 resre(subre);
552 return (c);
553
554 default:
555 error("Badly formed re|Regular expression \\ must be followed by / or ?");
556 }
557 if (c == eof || c == '\n' || c == EOF) {
558 if (*ep == 0)
559 error("No previous re|No previous regular expression");
560 if (c == '\n' && oknl == 0)
561 error("Missing closing delimiter@for regular expression");
562 if (c != eof)
563 ungetchar(c);
564 return (eof);
565 }
566 bracketp = bracket;
567 nbra = 0;
568 circfl = 0;
569 if (c == '^') {
570 c = getchar();
571 circfl++;
572 }
573 ungetchar(c);
574 for (;;) {
575 if (ep >= &expbuf[ESIZE - 2])
576complex:
577 cerror("Re too complex|Regular expression too complicated");
578 c = getchar();
579 if (c == eof || c == EOF) {
580 if (bracketp != bracket)
581cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
d266c416 582 *ep++ = CEOFC;
22316d4f
MH
583 if (c == EOF)
584 ungetchar(c);
585 return (eof);
586 }
587 if (value(MAGIC)) {
588 if (c != '*' || ep == expbuf)
589 lastep = ep;
590 } else
591 if (c != '\\' || peekchar() != '*' || ep == expbuf)
592 lastep = ep;
593 switch (c) {
594
595 case '\\':
596 c = getchar();
597 switch (c) {
598
599 case '(':
600 if (nbra >= NBRA)
601cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
602 *bracketp++ = nbra;
603 *ep++ = CBRA;
604 *ep++ = nbra++;
605 continue;
606
607 case ')':
608 if (bracketp <= bracket)
609cerror("Extra \\)|More \\)'s than \\('s in regular expression");
610 *ep++ = CKET;
611 *ep++ = *--bracketp;
612 continue;
613
614 case '<':
615 *ep++ = CBRC;
616 continue;
617
618 case '>':
619 *ep++ = CLET;
620 continue;
621 }
622 if (value(MAGIC) == 0)
623magic:
624 switch (c) {
625
626 case '.':
627 *ep++ = CDOT;
628 continue;
629
630 case '~':
631 rhsp = rhsbuf;
632 while (*rhsp) {
633 if (*rhsp & QUOTE) {
634 c = *rhsp & TRIM;
635 if (c == '&')
636error("Replacement pattern contains &@- cannot use in re");
637 if (c >= '1' && c <= '9')
638error("Replacement pattern contains \\d@- cannot use in re");
639 }
640 if (ep >= &expbuf[ESIZE-2])
641 goto complex;
642 *ep++ = CCHR;
643 *ep++ = *rhsp++ & TRIM;
644 }
645 continue;
646
647 case '*':
648 if (ep == expbuf)
649 break;
650 if (*lastep == CBRA || *lastep == CKET)
651cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
652 if (*lastep == CCHR && (lastep[1] & QUOTE))
653cerror("Illegal *|Can't * a \\n in regular expression");
654 *lastep |= STAR;
655 continue;
656
657 case '[':
658 *ep++ = CCL;
659 *ep++ = 0;
660 cclcnt = 1;
661 c = getchar();
662 if (c == '^') {
663 c = getchar();
664 ep[-2] = NCCL;
665 }
666 if (c == ']')
667cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
668 while (c != ']') {
669 if (c == '\\' && any(peekchar(), "]-^\\"))
670 c = getchar() | QUOTE;
671 if (c == '\n' || c == EOF)
672 cerror("Missing ]");
673 *ep++ = c;
674 cclcnt++;
675 if (ep >= &expbuf[ESIZE])
676 goto complex;
677 c = getchar();
678 }
679 lastep[1] = cclcnt;
680 continue;
681 }
682 if (c == EOF) {
683 ungetchar(EOF);
684 c = '\\';
685 goto defchar;
686 }
687 *ep++ = CCHR;
688 if (c == '\n')
689cerror("No newlines in re's|Can't escape newlines into regular expressions");
690/*
691 if (c < '1' || c > NBRA + '1') {
692*/
693 *ep++ = c;
694 continue;
695/*
696 }
697 c -= '1';
698 if (c >= nbra)
699cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
700 *ep++ = c | QUOTE;
701 continue;
702*/
703
704 case '\n':
705 if (oknl) {
706 ungetchar(c);
d266c416 707 *ep++ = CEOFC;
22316d4f
MH
708 return (eof);
709 }
710cerror("Badly formed re|Missing closing delimiter for regular expression");
711
712 case '$':
713 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
714 *ep++ = CDOL;
715 continue;
716 }
717 goto defchar;
718
719 case '.':
720 case '~':
721 case '*':
722 case '[':
723 if (value(MAGIC))
724 goto magic;
725defchar:
726 default:
727 *ep++ = CCHR;
728 *ep++ = c;
729 continue;
730 }
731 }
732}
733
734cerror(s)
735 char *s;
736{
737
738 expbuf[0] = 0;
739 error(s);
740}
741
742same(a, b)
743 register int a, b;
744{
745
746 return (a == b || value(IGNORECASE) &&
747 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
748}
749
750char *locs;
751
752execute(gf, addr)
753 line *addr;
754{
755 register char *p1, *p2;
756 register int c;
757
758 if (gf) {
759 if (circfl)
760 return (0);
22316d4f
MH
761 locs = p1 = loc2;
762 } else {
763 if (addr == zero)
764 return (0);
765 p1 = linebuf;
766 getline(*addr);
767 locs = 0;
768 }
769 p2 = expbuf;
770 if (circfl) {
771 loc1 = p1;
772 return (advance(p1, p2));
773 }
774 /* fast check for first character */
775 if (*p2 == CCHR) {
776 c = p2[1];
777 do {
778 if (c != *p1 && (!value(IGNORECASE) ||
779 !((islower(c) && toupper(c) == *p1) ||
780 (islower(*p1) && toupper(*p1) == c))))
781 continue;
782 if (advance(p1, p2)) {
783 loc1 = p1;
784 return (1);
785 }
786 } while (*p1++);
787 return (0);
788 }
789 /* regular algorithm */
790 do {
791 if (advance(p1, p2)) {
792 loc1 = p1;
793 return (1);
794 }
795 } while (*p1++);
796 return (0);
797}
798
799#define uletter(c) (isalpha(c) || c == '_')
800
801advance(lp, ep)
802 register char *lp, *ep;
803{
804 register char *curlp;
805 char *sp, *sp1;
806 int c;
807
808 for (;;) switch (*ep++) {
809
810 case CCHR:
811/* useless
812 if (*ep & QUOTE) {
813 c = *ep++ & TRIM;
814 sp = braslist[c];
815 sp1 = braelist[c];
816 while (sp < sp1) {
817 if (!same(*sp, *lp))
818 return (0);
819 sp++, lp++;
820 }
821 continue;
822 }
823*/
824 if (!same(*ep, *lp))
825 return (0);
826 ep++, lp++;
827 continue;
828
829 case CDOT:
830 if (*lp++)
831 continue;
832 return (0);
833
834 case CDOL:
835 if (*lp == 0)
836 continue;
837 return (0);
838
d266c416 839 case CEOFC:
22316d4f
MH
840 loc2 = lp;
841 return (1);
842
843 case CCL:
844 if (cclass(ep, *lp++, 1)) {
845 ep += *ep;
846 continue;
847 }
848 return (0);
849
850 case NCCL:
851 if (cclass(ep, *lp++, 0)) {
852 ep += *ep;
853 continue;
854 }
855 return (0);
856
857 case CBRA:
858 braslist[*ep++] = lp;
859 continue;
860
861 case CKET:
862 braelist[*ep++] = lp;
863 continue;
864
865 case CDOT|STAR:
866 curlp = lp;
867 while (*lp++)
868 continue;
869 goto star;
870
871 case CCHR|STAR:
872 curlp = lp;
873 while (same(*lp, *ep))
874 lp++;
875 lp++;
876 ep++;
877 goto star;
878
879 case CCL|STAR:
880 case NCCL|STAR:
881 curlp = lp;
882 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
883 continue;
884 ep += *ep;
885 goto star;
886star:
887 do {
888 lp--;
889 if (lp == locs)
890 break;
891 if (advance(lp, ep))
892 return (1);
893 } while (lp > curlp);
894 return (0);
895
896 case CBRC:
897 if (lp == expbuf)
898 continue;
899 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
900 continue;
901 return (0);
902
903 case CLET:
904 if (!uletter(*lp) && !isdigit(*lp))
905 continue;
906 return (0);
907
908 default:
909 error("Re internal error");
910 }
911}
912
913cclass(set, c, af)
914 register char *set;
915 register int c;
916 int af;
917{
918 register int n;
919
920 if (c == 0)
921 return (0);
922 if (value(IGNORECASE) && isupper(c))
923 c = tolower(c);
924 n = *set++;
925 while (--n)
926 if (n > 2 && set[1] == '-') {
927 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
928 return (af);
929 set += 3;
930 n -= 2;
931 } else
932 if ((*set++ & TRIM) == c)
933 return (af);
934 return (!af);
935}