date and time created 80/07/31 23:00:20 by mark
[unix-history] / usr / src / usr.bin / ex / ex_re.c
CommitLineData
22316d4f
MH
1/* Copyright (c) 1979 Regents of the University of California */
2#include "ex.h"
3#include "ex_re.h"
4
5/*
6 * Global, substitute and regular expressions.
7 * Very similar to ed, with some re extensions and
8 * confirmed substitute.
9 */
10global(k)
11 bool k;
12{
13 register char *gp;
14 register int c;
15 register line *a1;
16 char globuf[GBSIZE], *Cwas;
17 int lines = lineDOL();
18 int oinglobal = inglobal;
19 char *oglobp = globp;
20
21 Cwas = Command;
22 /*
23 * States of inglobal:
24 * 0: ordinary - not in a global command.
25 * 1: text coming from some buffer, not tty.
26 * 2: like 1, but the source of the buffer is a global command.
27 * Hence you're only in a global command if inglobal==2. This
28 * strange sounding convention is historically derived from
29 * everybody simulating a global command.
30 */
31 if (inglobal==2)
32 error("Global within global@not allowed");
33 markDOT();
34 setall();
35 nonzero();
36 if (skipend())
37 error("Global needs re|Missing regular expression for global");
38 c = getchar();
39 ignore(compile(c, 1));
40 savere(scanre);
41 gp = globuf;
42 while ((c = getchar()) != '\n') {
43 switch (c) {
44
45 case EOF:
46 c = '\n';
47 goto brkwh;
48
49 case '\\':
50 c = getchar();
51 switch (c) {
52
53 case '\\':
54 ungetchar(c);
55 break;
56
57 case '\n':
58 break;
59
60 default:
61 *gp++ = '\\';
62 break;
63 }
64 break;
65 }
66 *gp++ = c;
67 if (gp >= &globuf[GBSIZE - 2])
68 error("Global command too long");
69 }
70brkwh:
71 ungetchar(c);
72out:
73 newline();
74 *gp++ = c;
75 *gp++ = 0;
76 inglobal = 2;
77 for (a1 = one; a1 <= dol; a1++) {
78 *a1 &= ~01;
79 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
80 *a1 |= 01;
81 }
82 /* should use gdelete from ed to avoid n**2 here on g/.../d */
83 saveall();
84 if (inopen)
85 inopen = -1;
86 for (a1 = one; a1 <= dol; a1++) {
87 if (*a1 & 01) {
88 *a1 &= ~01;
89 dot = a1;
90 globp = globuf;
91 commands(1, 1);
92 a1 = zero;
93 }
94 }
95 globp = oglobp;
96 inglobal = oinglobal;
97 endline = 1;
98 Command = Cwas;
99 netchHAD(lines);
100 setlastchar(EOF);
101 if (inopen) {
102 ungetchar(EOF);
103 inopen = 1;
104 }
105}
106
107bool xflag;
108int scount, slines, stotal;
109
110substitute(c)
111 int c;
112{
113 register line *addr;
114 register int n;
115 int gsubf;
116
117 gsubf = compsub(c);
118 if (!inglobal)
119 save12(), undkind = UNDCHANGE;
120 stotal = 0;
121 slines = 0;
122 for (addr = addr1; addr <= addr2; addr++) {
123 scount = 0;
124 if (dosubcon(0, addr) == 0)
125 continue;
126 if (gsubf) {
127#ifdef notdef
128 /*
129 * should check but loc2 is already munged.
130 * This needs a fancier check later.
131 */
132 if (loc1 == loc2)
133 error("substitution loop");
134#endif
135 while (*loc2)
136 if (dosubcon(1, addr) == 0)
137 break;
138 }
139 if (scount) {
140 stotal += scount;
141 slines++;
142 putmark(addr);
143 n = append(getsub, addr);
144 addr += n;
145 addr2 += n;
146 }
147 }
148 if (stotal == 0 && !inglobal && !xflag)
149 error("Fail|Substitute pattern match failed");
150 snote(stotal, slines);
151 return (stotal);
152}
153
154compsub(ch)
155{
156 register int seof, c, uselastre;
157 static int gsubf;
158
159 if (!value(EDCOMPATIBLE))
160 gsubf = xflag = 0;
161 uselastre = 0;
162 switch (ch) {
163
164 case 's':
165 ignore(skipwh());
166 seof = getchar();
167 if (endcmd(seof) || any(seof, "gcr")) {
168 ungetchar(seof);
169 goto redo;
170 }
171 if (isalpha(seof) || isdigit(seof))
172 error("Substitute needs re|Missing regular expression for substitute");
173 seof = compile(seof, 1);
174 uselastre = 1;
175 comprhs(seof);
176 gsubf = 0;
177 xflag = 0;
178 break;
179
180 case '~':
181 uselastre = 1;
182 /* fall into ... */
183 case '&':
184 redo:
185 if (re.Expbuf[0] == 0)
186 error("No previous re|No previous regular expression");
187 break;
188 }
189 for (;;) {
190 c = getchar();
191 switch (c) {
192
193 case 'g':
194 gsubf = !gsubf;
195 continue;
196
197 case 'c':
198 xflag = !xflag;
199 continue;
200
201 case 'r':
202 uselastre = 1;
203 continue;
204
205 default:
206 ungetchar(c);
207 setcount();
208 newline();
209 if (uselastre)
210 savere(subre);
211 else
212 resre(subre);
213 return (gsubf);
214 }
215 }
216}
217
218comprhs(seof)
219 int seof;
220{
221 register char *rp, *orp;
222 register int c;
223 char orhsbuf[LBSIZE / 2];
224
225 rp = rhsbuf;
226 CP(orhsbuf, rp);
227 for (;;) {
228 c = getchar();
229 if (c == seof)
230 break;
231 switch (c) {
232
233 case '\\':
234 c = getchar();
235 if (c == EOF) {
236 ungetchar(c);
237 break;
238 }
239 if (value(MAGIC)) {
240 /*
241 * When "magic", \& turns into a plain &,
242 * and all other chars work fine quoted.
243 */
244 if (c != '&')
245 c |= QUOTE;
246 break;
247 }
248magic:
249 if (c == '~') {
250 for (orp = orhsbuf; *orp; *rp++ = *orp++)
251 if (rp >= &rhsbuf[LBSIZE / 2 + 1])
252 goto toobig;
253 continue;
254 }
255 c |= QUOTE;
256 break;
257
258 case '\n':
259 case EOF:
260 ungetchar(c);
261 goto endrhs;
262
263 case '~':
264 case '&':
265 if (value(MAGIC))
266 goto magic;
267 break;
268 }
269 if (rp >= &rhsbuf[LBSIZE / 2 - 1])
270toobig:
271 error("Replacement pattern too long@- limit 256 characters");
272 *rp++ = c;
273 }
274endrhs:
275 *rp++ = 0;
276}
277
278getsub()
279{
280 register char *p;
281
282 if ((p = linebp) == 0)
283 return (EOF);
284 strcLIN(p);
285 linebp = 0;
286 return (0);
287}
288
289dosubcon(f, a)
290 bool f;
291 line *a;
292{
293
294 if (execute(f, a) == 0)
295 return (0);
296 if (confirmed(a)) {
297 dosub();
298 scount++;
299 }
300 return (1);
301}
302
303confirmed(a)
304 line *a;
305{
306 register int c, ch;
307
308 if (xflag == 0)
309 return (1);
310 pofix();
311 pline(lineno(a));
312 if (inopen)
313 putchar('\n' | QUOTE);
314 c = column(loc1 - 1);
315 ugo(c - 1 + (inopen ? 1 : 0), ' ');
316 ugo(column(loc2 - 1) - c, '^');
317 flush();
318 ch = c = getkey();
319again:
320 if (c == '\r')
321 c = '\n';
322 if (inopen)
323 putchar(c), flush();
324 if (c != '\n' && c != EOF) {
325 c = getkey();
326 goto again;
327 }
328 noteinp();
329 return (ch == 'y');
330}
331
332getch()
333{
334 char c;
335
336 if (read(2, &c, 1) != 1)
337 return (EOF);
338 return (c & TRIM);
339}
340
341ugo(cnt, with)
342 int with;
343 int cnt;
344{
345
346 if (cnt > 0)
347 do
348 putchar(with);
349 while (--cnt > 0);
350}
351
352int casecnt;
353bool destuc;
354
355dosub()
356{
357 register char *lp, *sp, *rp;
358 int c;
359
360 lp = linebuf;
361 sp = genbuf;
362 rp = rhsbuf;
363 while (lp < loc1)
364 *sp++ = *lp++;
365 casecnt = 0;
366 while (c = *rp++) {
367 if (c & QUOTE)
368 switch (c & TRIM) {
369
370 case '&':
371 sp = place(sp, loc1, loc2);
372 if (sp == 0)
373 goto ovflo;
374 continue;
375
376 case 'l':
377 casecnt = 1;
378 destuc = 0;
379 continue;
380
381 case 'L':
382 casecnt = LBSIZE;
383 destuc = 0;
384 continue;
385
386 case 'u':
387 casecnt = 1;
388 destuc = 1;
389 continue;
390
391 case 'U':
392 casecnt = LBSIZE;
393 destuc = 1;
394 continue;
395
396 case 'E':
397 case 'e':
398 casecnt = 0;
399 continue;
400 }
401 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
402 sp = place(sp, braslist[c - '1'], braelist[c - '1']);
403 if (sp == 0)
404 goto ovflo;
405 continue;
406 }
407 if (casecnt)
408 *sp++ = fixcase(c & TRIM);
409 else
410 *sp++ = c & TRIM;
411 if (sp >= &genbuf[LBSIZE])
412ovflo:
413 error("Line overflow@in substitute - limit 512 chars");
414 }
415 lp = loc2;
416 loc2 = sp + (linebuf - genbuf);
417 while (*sp++ = *lp++)
418 if (sp >= &genbuf[LBSIZE])
419 goto ovflo;
420 strcLIN(genbuf);
421}
422
423fixcase(c)
424 register int c;
425{
426
427 if (casecnt == 0)
428 return (c);
429 casecnt--;
430 if (destuc) {
431 if (islower(c))
432 c = toupper(c);
433 } else
434 if (isupper(c))
435 c = tolower(c);
436 return (c);
437}
438
439char *
440place(sp, l1, l2)
441 register char *sp, *l1, *l2;
442{
443
444 while (l1 < l2) {
445 *sp++ = fixcase(*l1++);
446 if (sp >= &genbuf[LBSIZE])
447 return (0);
448 }
449 return (sp);
450}
451
452snote(total, lines)
453 register int total, lines;
454{
455
456 if (!notable(total))
457 return;
458 printf(mesg("%d subs|%d substitutions"), total);
459 if (lines != 1 && lines != total)
460 printf(" on %d lines", lines);
461 noonl();
462 flush();
463}
464
465compile(eof, oknl)
466 int eof;
467 int oknl;
468{
469 register int c;
470 register char *ep;
471 char *lastep;
472 char bracket[NBRA], *bracketp, *rhsp;
473 int cclcnt;
474
475 if (isalpha(eof) || isdigit(eof))
476 error("Regular expressions cannot be delimited by letters or digits");
477 ep = expbuf;
478 c = getchar();
479 if (eof == '\\')
480 switch (c) {
481
482 case '/':
483 case '?':
484 if (scanre.Expbuf[0] == 0)
485error("No previous scan re|No previous scanning regular expression");
486 resre(scanre);
487 return (c);
488
489 case '&':
490 if (subre.Expbuf[0] == 0)
491error("No previous substitute re|No previous substitute regular expression");
492 resre(subre);
493 return (c);
494
495 default:
496 error("Badly formed re|Regular expression \\ must be followed by / or ?");
497 }
498 if (c == eof || c == '\n' || c == EOF) {
499 if (*ep == 0)
500 error("No previous re|No previous regular expression");
501 if (c == '\n' && oknl == 0)
502 error("Missing closing delimiter@for regular expression");
503 if (c != eof)
504 ungetchar(c);
505 return (eof);
506 }
507 bracketp = bracket;
508 nbra = 0;
509 circfl = 0;
510 if (c == '^') {
511 c = getchar();
512 circfl++;
513 }
514 ungetchar(c);
515 for (;;) {
516 if (ep >= &expbuf[ESIZE - 2])
517complex:
518 cerror("Re too complex|Regular expression too complicated");
519 c = getchar();
520 if (c == eof || c == EOF) {
521 if (bracketp != bracket)
522cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
523 *ep++ = CEOF;
524 if (c == EOF)
525 ungetchar(c);
526 return (eof);
527 }
528 if (value(MAGIC)) {
529 if (c != '*' || ep == expbuf)
530 lastep = ep;
531 } else
532 if (c != '\\' || peekchar() != '*' || ep == expbuf)
533 lastep = ep;
534 switch (c) {
535
536 case '\\':
537 c = getchar();
538 switch (c) {
539
540 case '(':
541 if (nbra >= NBRA)
542cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
543 *bracketp++ = nbra;
544 *ep++ = CBRA;
545 *ep++ = nbra++;
546 continue;
547
548 case ')':
549 if (bracketp <= bracket)
550cerror("Extra \\)|More \\)'s than \\('s in regular expression");
551 *ep++ = CKET;
552 *ep++ = *--bracketp;
553 continue;
554
555 case '<':
556 *ep++ = CBRC;
557 continue;
558
559 case '>':
560 *ep++ = CLET;
561 continue;
562 }
563 if (value(MAGIC) == 0)
564magic:
565 switch (c) {
566
567 case '.':
568 *ep++ = CDOT;
569 continue;
570
571 case '~':
572 rhsp = rhsbuf;
573 while (*rhsp) {
574 if (*rhsp & QUOTE) {
575 c = *rhsp & TRIM;
576 if (c == '&')
577error("Replacement pattern contains &@- cannot use in re");
578 if (c >= '1' && c <= '9')
579error("Replacement pattern contains \\d@- cannot use in re");
580 }
581 if (ep >= &expbuf[ESIZE-2])
582 goto complex;
583 *ep++ = CCHR;
584 *ep++ = *rhsp++ & TRIM;
585 }
586 continue;
587
588 case '*':
589 if (ep == expbuf)
590 break;
591 if (*lastep == CBRA || *lastep == CKET)
592cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
593 if (*lastep == CCHR && (lastep[1] & QUOTE))
594cerror("Illegal *|Can't * a \\n in regular expression");
595 *lastep |= STAR;
596 continue;
597
598 case '[':
599 *ep++ = CCL;
600 *ep++ = 0;
601 cclcnt = 1;
602 c = getchar();
603 if (c == '^') {
604 c = getchar();
605 ep[-2] = NCCL;
606 }
607 if (c == ']')
608cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
609 while (c != ']') {
610 if (c == '\\' && any(peekchar(), "]-^\\"))
611 c = getchar() | QUOTE;
612 if (c == '\n' || c == EOF)
613 cerror("Missing ]");
614 *ep++ = c;
615 cclcnt++;
616 if (ep >= &expbuf[ESIZE])
617 goto complex;
618 c = getchar();
619 }
620 lastep[1] = cclcnt;
621 continue;
622 }
623 if (c == EOF) {
624 ungetchar(EOF);
625 c = '\\';
626 goto defchar;
627 }
628 *ep++ = CCHR;
629 if (c == '\n')
630cerror("No newlines in re's|Can't escape newlines into regular expressions");
631/*
632 if (c < '1' || c > NBRA + '1') {
633*/
634 *ep++ = c;
635 continue;
636/*
637 }
638 c -= '1';
639 if (c >= nbra)
640cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
641 *ep++ = c | QUOTE;
642 continue;
643*/
644
645 case '\n':
646 if (oknl) {
647 ungetchar(c);
648 *ep++ = CEOF;
649 return (eof);
650 }
651cerror("Badly formed re|Missing closing delimiter for regular expression");
652
653 case '$':
654 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
655 *ep++ = CDOL;
656 continue;
657 }
658 goto defchar;
659
660 case '.':
661 case '~':
662 case '*':
663 case '[':
664 if (value(MAGIC))
665 goto magic;
666defchar:
667 default:
668 *ep++ = CCHR;
669 *ep++ = c;
670 continue;
671 }
672 }
673}
674
675cerror(s)
676 char *s;
677{
678
679 expbuf[0] = 0;
680 error(s);
681}
682
683same(a, b)
684 register int a, b;
685{
686
687 return (a == b || value(IGNORECASE) &&
688 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
689}
690
691char *locs;
692
693execute(gf, addr)
694 line *addr;
695{
696 register char *p1, *p2;
697 register int c;
698
699 if (gf) {
700 if (circfl)
701 return (0);
702#ifdef notdef
703 if (loc1 == loc2)
704 loc2++;
705#endif
706 locs = p1 = loc2;
707 } else {
708 if (addr == zero)
709 return (0);
710 p1 = linebuf;
711 getline(*addr);
712 locs = 0;
713 }
714 p2 = expbuf;
715 if (circfl) {
716 loc1 = p1;
717 return (advance(p1, p2));
718 }
719 /* fast check for first character */
720 if (*p2 == CCHR) {
721 c = p2[1];
722 do {
723 if (c != *p1 && (!value(IGNORECASE) ||
724 !((islower(c) && toupper(c) == *p1) ||
725 (islower(*p1) && toupper(*p1) == c))))
726 continue;
727 if (advance(p1, p2)) {
728 loc1 = p1;
729 return (1);
730 }
731 } while (*p1++);
732 return (0);
733 }
734 /* regular algorithm */
735 do {
736 if (advance(p1, p2)) {
737 loc1 = p1;
738 return (1);
739 }
740 } while (*p1++);
741 return (0);
742}
743
744#define uletter(c) (isalpha(c) || c == '_')
745
746advance(lp, ep)
747 register char *lp, *ep;
748{
749 register char *curlp;
750 char *sp, *sp1;
751 int c;
752
753 for (;;) switch (*ep++) {
754
755 case CCHR:
756/* useless
757 if (*ep & QUOTE) {
758 c = *ep++ & TRIM;
759 sp = braslist[c];
760 sp1 = braelist[c];
761 while (sp < sp1) {
762 if (!same(*sp, *lp))
763 return (0);
764 sp++, lp++;
765 }
766 continue;
767 }
768*/
769 if (!same(*ep, *lp))
770 return (0);
771 ep++, lp++;
772 continue;
773
774 case CDOT:
775 if (*lp++)
776 continue;
777 return (0);
778
779 case CDOL:
780 if (*lp == 0)
781 continue;
782 return (0);
783
784 case CEOF:
785 loc2 = lp;
786 return (1);
787
788 case CCL:
789 if (cclass(ep, *lp++, 1)) {
790 ep += *ep;
791 continue;
792 }
793 return (0);
794
795 case NCCL:
796 if (cclass(ep, *lp++, 0)) {
797 ep += *ep;
798 continue;
799 }
800 return (0);
801
802 case CBRA:
803 braslist[*ep++] = lp;
804 continue;
805
806 case CKET:
807 braelist[*ep++] = lp;
808 continue;
809
810 case CDOT|STAR:
811 curlp = lp;
812 while (*lp++)
813 continue;
814 goto star;
815
816 case CCHR|STAR:
817 curlp = lp;
818 while (same(*lp, *ep))
819 lp++;
820 lp++;
821 ep++;
822 goto star;
823
824 case CCL|STAR:
825 case NCCL|STAR:
826 curlp = lp;
827 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
828 continue;
829 ep += *ep;
830 goto star;
831star:
832 do {
833 lp--;
834 if (lp == locs)
835 break;
836 if (advance(lp, ep))
837 return (1);
838 } while (lp > curlp);
839 return (0);
840
841 case CBRC:
842 if (lp == expbuf)
843 continue;
844 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
845 continue;
846 return (0);
847
848 case CLET:
849 if (!uletter(*lp) && !isdigit(*lp))
850 continue;
851 return (0);
852
853 default:
854 error("Re internal error");
855 }
856}
857
858cclass(set, c, af)
859 register char *set;
860 register int c;
861 int af;
862{
863 register int n;
864
865 if (c == 0)
866 return (0);
867 if (value(IGNORECASE) && isupper(c))
868 c = tolower(c);
869 n = *set++;
870 while (--n)
871 if (n > 2 && set[1] == '-') {
872 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
873 return (af);
874 set += 3;
875 n -= 2;
876 } else
877 if ((*set++ & TRIM) == c)
878 return (af);
879 return (!af);
880}