added sccs, Bill put in more buffers
[unix-history] / usr / src / usr.bin / ex / ex_re.c
CommitLineData
22316d4f
MH
1/* Copyright (c) 1979 Regents of the University of California */
2#include "ex.h"
3#include "ex_re.h"
4
5/*
6 * Global, substitute and regular expressions.
7 * Very similar to ed, with some re extensions and
8 * confirmed substitute.
9 */
10global(k)
11 bool k;
12{
13 register char *gp;
14 register int c;
15 register line *a1;
16 char globuf[GBSIZE], *Cwas;
17 int lines = lineDOL();
18 int oinglobal = inglobal;
19 char *oglobp = globp;
20
21 Cwas = Command;
22 /*
23 * States of inglobal:
24 * 0: ordinary - not in a global command.
25 * 1: text coming from some buffer, not tty.
26 * 2: like 1, but the source of the buffer is a global command.
27 * Hence you're only in a global command if inglobal==2. This
28 * strange sounding convention is historically derived from
29 * everybody simulating a global command.
30 */
31 if (inglobal==2)
32 error("Global within global@not allowed");
33 markDOT();
34 setall();
35 nonzero();
36 if (skipend())
37 error("Global needs re|Missing regular expression for global");
38 c = getchar();
39 ignore(compile(c, 1));
40 savere(scanre);
41 gp = globuf;
42 while ((c = getchar()) != '\n') {
43 switch (c) {
44
45 case EOF:
46 c = '\n';
47 goto brkwh;
48
49 case '\\':
50 c = getchar();
51 switch (c) {
52
53 case '\\':
54 ungetchar(c);
55 break;
56
57 case '\n':
58 break;
59
60 default:
61 *gp++ = '\\';
62 break;
63 }
64 break;
65 }
66 *gp++ = c;
67 if (gp >= &globuf[GBSIZE - 2])
68 error("Global command too long");
69 }
70brkwh:
71 ungetchar(c);
72out:
73 newline();
74 *gp++ = c;
75 *gp++ = 0;
887e3e0d 76 saveall();
22316d4f
MH
77 inglobal = 2;
78 for (a1 = one; a1 <= dol; a1++) {
79 *a1 &= ~01;
80 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
81 *a1 |= 01;
82 }
83 /* should use gdelete from ed to avoid n**2 here on g/.../d */
22316d4f
MH
84 if (inopen)
85 inopen = -1;
86 for (a1 = one; a1 <= dol; a1++) {
87 if (*a1 & 01) {
88 *a1 &= ~01;
89 dot = a1;
90 globp = globuf;
91 commands(1, 1);
92 a1 = zero;
93 }
94 }
95 globp = oglobp;
96 inglobal = oinglobal;
97 endline = 1;
98 Command = Cwas;
99 netchHAD(lines);
100 setlastchar(EOF);
101 if (inopen) {
102 ungetchar(EOF);
103 inopen = 1;
104 }
105}
106
d266c416 107bool cflag;
22316d4f
MH
108int scount, slines, stotal;
109
110substitute(c)
111 int c;
112{
113 register line *addr;
114 register int n;
115 int gsubf;
116
117 gsubf = compsub(c);
887e3e0d 118 if(FIXUNDO)
22316d4f
MH
119 save12(), undkind = UNDCHANGE;
120 stotal = 0;
121 slines = 0;
122 for (addr = addr1; addr <= addr2; addr++) {
123 scount = 0;
124 if (dosubcon(0, addr) == 0)
125 continue;
126 if (gsubf) {
127#ifdef notdef
128 /*
129 * should check but loc2 is already munged.
130 * This needs a fancier check later.
131 */
132 if (loc1 == loc2)
133 error("substitution loop");
134#endif
135 while (*loc2)
136 if (dosubcon(1, addr) == 0)
137 break;
138 }
139 if (scount) {
140 stotal += scount;
141 slines++;
142 putmark(addr);
143 n = append(getsub, addr);
144 addr += n;
145 addr2 += n;
146 }
147 }
d266c416 148 if (stotal == 0 && !inglobal && !cflag)
22316d4f
MH
149 error("Fail|Substitute pattern match failed");
150 snote(stotal, slines);
151 return (stotal);
152}
153
154compsub(ch)
155{
156 register int seof, c, uselastre;
157 static int gsubf;
158
159 if (!value(EDCOMPATIBLE))
d266c416 160 gsubf = cflag = 0;
22316d4f
MH
161 uselastre = 0;
162 switch (ch) {
163
164 case 's':
165 ignore(skipwh());
166 seof = getchar();
167 if (endcmd(seof) || any(seof, "gcr")) {
168 ungetchar(seof);
169 goto redo;
170 }
171 if (isalpha(seof) || isdigit(seof))
172 error("Substitute needs re|Missing regular expression for substitute");
173 seof = compile(seof, 1);
174 uselastre = 1;
175 comprhs(seof);
176 gsubf = 0;
d266c416 177 cflag = 0;
22316d4f
MH
178 break;
179
180 case '~':
181 uselastre = 1;
182 /* fall into ... */
183 case '&':
184 redo:
185 if (re.Expbuf[0] == 0)
186 error("No previous re|No previous regular expression");
d266c416
MH
187 if (subre.Expbuf[0] == 0)
188 error("No previous substitute re|No previous substitute to repeat");
22316d4f
MH
189 break;
190 }
191 for (;;) {
192 c = getchar();
193 switch (c) {
194
195 case 'g':
196 gsubf = !gsubf;
197 continue;
198
199 case 'c':
d266c416 200 cflag = !cflag;
22316d4f
MH
201 continue;
202
203 case 'r':
204 uselastre = 1;
205 continue;
206
207 default:
208 ungetchar(c);
209 setcount();
210 newline();
211 if (uselastre)
212 savere(subre);
213 else
214 resre(subre);
215 return (gsubf);
216 }
217 }
218}
219
220comprhs(seof)
221 int seof;
222{
223 register char *rp, *orp;
224 register int c;
225 char orhsbuf[LBSIZE / 2];
226
227 rp = rhsbuf;
228 CP(orhsbuf, rp);
229 for (;;) {
230 c = getchar();
231 if (c == seof)
232 break;
233 switch (c) {
234
235 case '\\':
236 c = getchar();
237 if (c == EOF) {
238 ungetchar(c);
239 break;
240 }
241 if (value(MAGIC)) {
242 /*
243 * When "magic", \& turns into a plain &,
244 * and all other chars work fine quoted.
245 */
246 if (c != '&')
247 c |= QUOTE;
248 break;
249 }
250magic:
251 if (c == '~') {
252 for (orp = orhsbuf; *orp; *rp++ = *orp++)
253 if (rp >= &rhsbuf[LBSIZE / 2 + 1])
254 goto toobig;
255 continue;
256 }
257 c |= QUOTE;
258 break;
259
260 case '\n':
261 case EOF:
887e3e0d
MH
262 if (!(globp && globp[0])) {
263 ungetchar(c);
264 goto endrhs;
265 }
22316d4f
MH
266
267 case '~':
268 case '&':
269 if (value(MAGIC))
270 goto magic;
271 break;
272 }
273 if (rp >= &rhsbuf[LBSIZE / 2 - 1])
274toobig:
275 error("Replacement pattern too long@- limit 256 characters");
276 *rp++ = c;
277 }
278endrhs:
279 *rp++ = 0;
280}
281
282getsub()
283{
284 register char *p;
285
286 if ((p = linebp) == 0)
287 return (EOF);
288 strcLIN(p);
289 linebp = 0;
290 return (0);
291}
292
293dosubcon(f, a)
294 bool f;
295 line *a;
296{
297
298 if (execute(f, a) == 0)
299 return (0);
300 if (confirmed(a)) {
301 dosub();
302 scount++;
303 }
304 return (1);
305}
306
307confirmed(a)
308 line *a;
309{
310 register int c, ch;
311
d266c416 312 if (cflag == 0)
22316d4f
MH
313 return (1);
314 pofix();
315 pline(lineno(a));
316 if (inopen)
317 putchar('\n' | QUOTE);
318 c = column(loc1 - 1);
319 ugo(c - 1 + (inopen ? 1 : 0), ' ');
320 ugo(column(loc2 - 1) - c, '^');
321 flush();
322 ch = c = getkey();
323again:
324 if (c == '\r')
325 c = '\n';
326 if (inopen)
327 putchar(c), flush();
328 if (c != '\n' && c != EOF) {
329 c = getkey();
330 goto again;
331 }
332 noteinp();
333 return (ch == 'y');
334}
335
336getch()
337{
338 char c;
339
340 if (read(2, &c, 1) != 1)
341 return (EOF);
342 return (c & TRIM);
343}
344
345ugo(cnt, with)
346 int with;
347 int cnt;
348{
349
350 if (cnt > 0)
351 do
352 putchar(with);
353 while (--cnt > 0);
354}
355
356int casecnt;
357bool destuc;
358
359dosub()
360{
361 register char *lp, *sp, *rp;
362 int c;
363
364 lp = linebuf;
365 sp = genbuf;
366 rp = rhsbuf;
367 while (lp < loc1)
368 *sp++ = *lp++;
369 casecnt = 0;
370 while (c = *rp++) {
371 if (c & QUOTE)
372 switch (c & TRIM) {
373
374 case '&':
375 sp = place(sp, loc1, loc2);
376 if (sp == 0)
377 goto ovflo;
378 continue;
379
380 case 'l':
381 casecnt = 1;
382 destuc = 0;
383 continue;
384
385 case 'L':
386 casecnt = LBSIZE;
387 destuc = 0;
388 continue;
389
390 case 'u':
391 casecnt = 1;
392 destuc = 1;
393 continue;
394
395 case 'U':
396 casecnt = LBSIZE;
397 destuc = 1;
398 continue;
399
400 case 'E':
401 case 'e':
402 casecnt = 0;
403 continue;
404 }
405 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
406 sp = place(sp, braslist[c - '1'], braelist[c - '1']);
407 if (sp == 0)
408 goto ovflo;
409 continue;
410 }
411 if (casecnt)
412 *sp++ = fixcase(c & TRIM);
413 else
414 *sp++ = c & TRIM;
415 if (sp >= &genbuf[LBSIZE])
416ovflo:
44232d5b 417 error("Line overflow@in substitute");
22316d4f
MH
418 }
419 lp = loc2;
420 loc2 = sp + (linebuf - genbuf);
421 while (*sp++ = *lp++)
422 if (sp >= &genbuf[LBSIZE])
423 goto ovflo;
424 strcLIN(genbuf);
425}
426
427fixcase(c)
428 register int c;
429{
430
431 if (casecnt == 0)
432 return (c);
433 casecnt--;
434 if (destuc) {
435 if (islower(c))
436 c = toupper(c);
437 } else
438 if (isupper(c))
439 c = tolower(c);
440 return (c);
441}
442
443char *
444place(sp, l1, l2)
445 register char *sp, *l1, *l2;
446{
447
448 while (l1 < l2) {
449 *sp++ = fixcase(*l1++);
450 if (sp >= &genbuf[LBSIZE])
451 return (0);
452 }
453 return (sp);
454}
455
456snote(total, lines)
457 register int total, lines;
458{
459
460 if (!notable(total))
461 return;
462 printf(mesg("%d subs|%d substitutions"), total);
463 if (lines != 1 && lines != total)
464 printf(" on %d lines", lines);
465 noonl();
466 flush();
467}
468
469compile(eof, oknl)
470 int eof;
471 int oknl;
472{
473 register int c;
474 register char *ep;
475 char *lastep;
476 char bracket[NBRA], *bracketp, *rhsp;
477 int cclcnt;
478
479 if (isalpha(eof) || isdigit(eof))
480 error("Regular expressions cannot be delimited by letters or digits");
481 ep = expbuf;
482 c = getchar();
483 if (eof == '\\')
484 switch (c) {
485
486 case '/':
487 case '?':
488 if (scanre.Expbuf[0] == 0)
489error("No previous scan re|No previous scanning regular expression");
490 resre(scanre);
491 return (c);
492
493 case '&':
494 if (subre.Expbuf[0] == 0)
495error("No previous substitute re|No previous substitute regular expression");
496 resre(subre);
497 return (c);
498
499 default:
500 error("Badly formed re|Regular expression \\ must be followed by / or ?");
501 }
502 if (c == eof || c == '\n' || c == EOF) {
503 if (*ep == 0)
504 error("No previous re|No previous regular expression");
505 if (c == '\n' && oknl == 0)
506 error("Missing closing delimiter@for regular expression");
507 if (c != eof)
508 ungetchar(c);
509 return (eof);
510 }
511 bracketp = bracket;
512 nbra = 0;
513 circfl = 0;
514 if (c == '^') {
515 c = getchar();
516 circfl++;
517 }
518 ungetchar(c);
519 for (;;) {
520 if (ep >= &expbuf[ESIZE - 2])
521complex:
522 cerror("Re too complex|Regular expression too complicated");
523 c = getchar();
524 if (c == eof || c == EOF) {
525 if (bracketp != bracket)
526cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
d266c416 527 *ep++ = CEOFC;
22316d4f
MH
528 if (c == EOF)
529 ungetchar(c);
530 return (eof);
531 }
532 if (value(MAGIC)) {
533 if (c != '*' || ep == expbuf)
534 lastep = ep;
535 } else
536 if (c != '\\' || peekchar() != '*' || ep == expbuf)
537 lastep = ep;
538 switch (c) {
539
540 case '\\':
541 c = getchar();
542 switch (c) {
543
544 case '(':
545 if (nbra >= NBRA)
546cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
547 *bracketp++ = nbra;
548 *ep++ = CBRA;
549 *ep++ = nbra++;
550 continue;
551
552 case ')':
553 if (bracketp <= bracket)
554cerror("Extra \\)|More \\)'s than \\('s in regular expression");
555 *ep++ = CKET;
556 *ep++ = *--bracketp;
557 continue;
558
559 case '<':
560 *ep++ = CBRC;
561 continue;
562
563 case '>':
564 *ep++ = CLET;
565 continue;
566 }
567 if (value(MAGIC) == 0)
568magic:
569 switch (c) {
570
571 case '.':
572 *ep++ = CDOT;
573 continue;
574
575 case '~':
576 rhsp = rhsbuf;
577 while (*rhsp) {
578 if (*rhsp & QUOTE) {
579 c = *rhsp & TRIM;
580 if (c == '&')
581error("Replacement pattern contains &@- cannot use in re");
582 if (c >= '1' && c <= '9')
583error("Replacement pattern contains \\d@- cannot use in re");
584 }
585 if (ep >= &expbuf[ESIZE-2])
586 goto complex;
587 *ep++ = CCHR;
588 *ep++ = *rhsp++ & TRIM;
589 }
590 continue;
591
592 case '*':
593 if (ep == expbuf)
594 break;
595 if (*lastep == CBRA || *lastep == CKET)
596cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
597 if (*lastep == CCHR && (lastep[1] & QUOTE))
598cerror("Illegal *|Can't * a \\n in regular expression");
599 *lastep |= STAR;
600 continue;
601
602 case '[':
603 *ep++ = CCL;
604 *ep++ = 0;
605 cclcnt = 1;
606 c = getchar();
607 if (c == '^') {
608 c = getchar();
609 ep[-2] = NCCL;
610 }
611 if (c == ']')
612cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
613 while (c != ']') {
614 if (c == '\\' && any(peekchar(), "]-^\\"))
615 c = getchar() | QUOTE;
616 if (c == '\n' || c == EOF)
617 cerror("Missing ]");
618 *ep++ = c;
619 cclcnt++;
620 if (ep >= &expbuf[ESIZE])
621 goto complex;
622 c = getchar();
623 }
624 lastep[1] = cclcnt;
625 continue;
626 }
627 if (c == EOF) {
628 ungetchar(EOF);
629 c = '\\';
630 goto defchar;
631 }
632 *ep++ = CCHR;
633 if (c == '\n')
634cerror("No newlines in re's|Can't escape newlines into regular expressions");
635/*
636 if (c < '1' || c > NBRA + '1') {
637*/
638 *ep++ = c;
639 continue;
640/*
641 }
642 c -= '1';
643 if (c >= nbra)
644cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
645 *ep++ = c | QUOTE;
646 continue;
647*/
648
649 case '\n':
650 if (oknl) {
651 ungetchar(c);
d266c416 652 *ep++ = CEOFC;
22316d4f
MH
653 return (eof);
654 }
655cerror("Badly formed re|Missing closing delimiter for regular expression");
656
657 case '$':
658 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
659 *ep++ = CDOL;
660 continue;
661 }
662 goto defchar;
663
664 case '.':
665 case '~':
666 case '*':
667 case '[':
668 if (value(MAGIC))
669 goto magic;
670defchar:
671 default:
672 *ep++ = CCHR;
673 *ep++ = c;
674 continue;
675 }
676 }
677}
678
679cerror(s)
680 char *s;
681{
682
683 expbuf[0] = 0;
684 error(s);
685}
686
687same(a, b)
688 register int a, b;
689{
690
691 return (a == b || value(IGNORECASE) &&
692 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
693}
694
695char *locs;
696
697execute(gf, addr)
698 line *addr;
699{
700 register char *p1, *p2;
701 register int c;
702
703 if (gf) {
704 if (circfl)
705 return (0);
22316d4f
MH
706 locs = p1 = loc2;
707 } else {
708 if (addr == zero)
709 return (0);
710 p1 = linebuf;
711 getline(*addr);
712 locs = 0;
713 }
714 p2 = expbuf;
715 if (circfl) {
716 loc1 = p1;
717 return (advance(p1, p2));
718 }
719 /* fast check for first character */
720 if (*p2 == CCHR) {
721 c = p2[1];
722 do {
723 if (c != *p1 && (!value(IGNORECASE) ||
724 !((islower(c) && toupper(c) == *p1) ||
725 (islower(*p1) && toupper(*p1) == c))))
726 continue;
727 if (advance(p1, p2)) {
728 loc1 = p1;
729 return (1);
730 }
731 } while (*p1++);
732 return (0);
733 }
734 /* regular algorithm */
735 do {
736 if (advance(p1, p2)) {
737 loc1 = p1;
738 return (1);
739 }
740 } while (*p1++);
741 return (0);
742}
743
744#define uletter(c) (isalpha(c) || c == '_')
745
746advance(lp, ep)
747 register char *lp, *ep;
748{
749 register char *curlp;
750 char *sp, *sp1;
751 int c;
752
753 for (;;) switch (*ep++) {
754
755 case CCHR:
756/* useless
757 if (*ep & QUOTE) {
758 c = *ep++ & TRIM;
759 sp = braslist[c];
760 sp1 = braelist[c];
761 while (sp < sp1) {
762 if (!same(*sp, *lp))
763 return (0);
764 sp++, lp++;
765 }
766 continue;
767 }
768*/
769 if (!same(*ep, *lp))
770 return (0);
771 ep++, lp++;
772 continue;
773
774 case CDOT:
775 if (*lp++)
776 continue;
777 return (0);
778
779 case CDOL:
780 if (*lp == 0)
781 continue;
782 return (0);
783
d266c416 784 case CEOFC:
22316d4f
MH
785 loc2 = lp;
786 return (1);
787
788 case CCL:
789 if (cclass(ep, *lp++, 1)) {
790 ep += *ep;
791 continue;
792 }
793 return (0);
794
795 case NCCL:
796 if (cclass(ep, *lp++, 0)) {
797 ep += *ep;
798 continue;
799 }
800 return (0);
801
802 case CBRA:
803 braslist[*ep++] = lp;
804 continue;
805
806 case CKET:
807 braelist[*ep++] = lp;
808 continue;
809
810 case CDOT|STAR:
811 curlp = lp;
812 while (*lp++)
813 continue;
814 goto star;
815
816 case CCHR|STAR:
817 curlp = lp;
818 while (same(*lp, *ep))
819 lp++;
820 lp++;
821 ep++;
822 goto star;
823
824 case CCL|STAR:
825 case NCCL|STAR:
826 curlp = lp;
827 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
828 continue;
829 ep += *ep;
830 goto star;
831star:
832 do {
833 lp--;
834 if (lp == locs)
835 break;
836 if (advance(lp, ep))
837 return (1);
838 } while (lp > curlp);
839 return (0);
840
841 case CBRC:
842 if (lp == expbuf)
843 continue;
844 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
845 continue;
846 return (0);
847
848 case CLET:
849 if (!uletter(*lp) && !isdigit(*lp))
850 continue;
851 return (0);
852
853 default:
854 error("Re internal error");
855 }
856}
857
858cclass(set, c, af)
859 register char *set;
860 register int c;
861 int af;
862{
863 register int n;
864
865 if (c == 0)
866 return (0);
867 if (value(IGNORECASE) && isupper(c))
868 c = tolower(c);
869 n = *set++;
870 while (--n)
871 if (n > 2 && set[1] == '-') {
872 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
873 return (af);
874 set += 3;
875 n -= 2;
876 } else
877 if ((*set++ & TRIM) == c)
878 return (af);
879 return (!af);
880}