usr/src/usr.bin/ex/ex_re.c

/*
 * Copyright (c) 1980 Regents of the University of California.
 * All rights reserved.  The Berkeley software License Agreement
 * specifies the terms and conditions for redistribution.
 */

#ifndef lint
static char *sccsid = "@(#)ex_re.c      7.6 (Berkeley) %G%";
#endif not lint

#include "ex.h"
#include "ex_re.h"

/*
 * Global, substitute and regular expressions.
 * Very similar to ed, with some re extensions and
 * confirmed substitute.
 */
global(k)
        bool k;
{
        register char *gp;
        register int c;
        register line *a1;
        char globuf[GBSIZE], *Cwas;
        int lines = lineDOL();
        int oinglobal = inglobal;
        char *oglobp = globp;

        Cwas = Command;
        /*
         * States of inglobal:
         *  0: ordinary - not in a global command.
         *  1: text coming from some buffer, not tty.
         *  2: like 1, but the source of the buffer is a global command.
         * Hence you're only in a global command if inglobal==2. This
         * strange sounding convention is historically derived from
         * everybody simulating a global command.
         */
        if (inglobal==2)
                error("Global within global@not allowed");
        markDOT();
        setall();
        nonzero();
        if (skipend())
                error("Global needs re|Missing regular expression for global");
        c = ex_getchar();
        ignore(compile(c, 1));
        savere(scanre);
        gp = globuf;
        while ((c = ex_getchar()) != '\n') {
                switch (c) {

                case EOF:
                        c = '\n';
                        goto brkwh;

                case '\\':
                        c = ex_getchar();
                        switch (c) {

                        case '\\':
                                ungetchar(c);
                                break;

                        case '\n':
                                break;

                        default:
                                *gp++ = '\\';
                                break;
                        }
                        break;
                }
                *gp++ = c;
                if (gp >= &globuf[GBSIZE - 2])
                        error("Global command too long");
        }
brkwh:
        ungetchar(c);
        newline();
        *gp++ = c;
        *gp++ = 0;
        saveall();
        inglobal = 2;
        for (a1 = one; a1 <= dol; a1++) {
                *a1 &= ~01;
                if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
                        *a1 |= 01;
        }
#ifdef notdef
/*
 * This code is commented out for now.  The problem is that we don't
 * fix up the undo area the way we should.  Basically, I think what has
 * to be done is to copy the undo area down (since we shrunk everything)
 * and move the various pointers into it down too.  I will do this later
 * when I have time. (Mark, 10-20-80)
 */
        /*
         * Special case: g/.../d (avoid n^2 algorithm)
         */
        if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
                gdelete();
                return;
        }
#endif
        if (inopen)
                inopen = -1;
        /*
         * Now for each marked line, set dot there and do the commands.
         * Note the n^2 behavior here for lots of lines matching.
         * This is really needed: in some cases you could delete lines,
         * causing a marked line to be moved before a1 and missed if
         * we didn't restart at zero each time.
         */
        for (a1 = one; a1 <= dol; a1++) {
                if (*a1 & 01) {
                        *a1 &= ~01;
                        dot = a1;
                        globp = globuf;
                        commands(1, 1);
                        a1 = zero;
                }
        }
        globp = oglobp;
        inglobal = oinglobal;
        endline = 1;
        Command = Cwas;
        netchHAD(lines);
        setlastchar(EOF);
        if (inopen) {
                ungetchar(EOF);
                inopen = 1;
        }
}

/*
 * gdelete: delete inside a global command. Handles the
 * special case g/r.e./d. All lines to be deleted have
 * already been marked. Squeeze the remaining lines together.
 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
 * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
 * good reason for this except the question: where to you draw the line?
 */
gdelete()
{
        register line *a1, *a2, *a3;

        a3 = dol;
        /* find first marked line. can skip all before it */
        for (a1=zero; (*a1&01)==0; a1++)
                if (a1>=a3)
                        return;
        /* copy down unmarked lines, compacting as we go. */
        for (a2=a1+1; a2<=a3;) {
                if (*a2&01) {
                        a2++;           /* line is marked, skip it */
                        dot = a1;       /* dot left after line deletion */
                } else
                        *a1++ = *a2++;  /* unmarked, copy it */
        }
        dol = a1-1;
        if (dot>dol)
                dot = dol;
        change();
}

bool    cflag;
int     scount, slines, stotal;

substitute(c)
        int c;
{
        register line *addr;
        register int n;
        int gsubf, hopcount;

        gsubf = compsub(c);
        if(FIXUNDO)
                save12(), undkind = UNDCHANGE;
        stotal = 0;
        slines = 0;
        for (addr = addr1; addr <= addr2; addr++) {
                scount = hopcount = 0;
                if (dosubcon(0, addr) == 0)
                        continue;
                if (gsubf) {
                        /*
                         * The loop can happen from s/\</&/g
                         * but we don't want to break other, reasonable cases.
                         */
                        while (*loc2) {
                                if (++hopcount > sizeof linebuf)
                                        error("substitution loop");
                                if (dosubcon(1, addr) == 0)
                                        break;
                        }
                }
                if (scount) {
                        stotal += scount;
                        slines++;
                        putmark(addr);
                        n = append(getsub, addr);
                        addr += n;
                        addr2 += n;
                }
        }
        if (stotal == 0 && !inglobal && !cflag)
                error("Fail|Substitute pattern match failed");
        snote(stotal, slines);
        return (stotal);
}

compsub(ch)
{
        register int seof, c, uselastre;
        static int gsubf;

        if (!value(EDCOMPATIBLE))
                gsubf = cflag = 0;
        uselastre = 0;
        switch (ch) {

        case 's':
                ignore(skipwh());
                seof = ex_getchar();
                if (endcmd(seof) || any(seof, "gcr")) {
                        ungetchar(seof);
                        goto redo;
                }
                if (isalpha(seof) || isdigit(seof))
                        error("Substitute needs re|Missing regular expression for substitute");
                seof = compile(seof, 1);
                uselastre = 1;
                comprhs(seof);
                gsubf = 0;
                cflag = 0;
                break;

        case '~':
                uselastre = 1;
                /* fall into ... */
        case '&':
        redo:
                if (re.Expbuf[0] == 0)
                        error("No previous re|No previous regular expression");
                if (subre.Expbuf[0] == 0)
                        error("No previous substitute re|No previous substitute to repeat");
                break;
        }
        for (;;) {
                c = ex_getchar();
                switch (c) {

                case 'g':
                        gsubf = !gsubf;
                        continue;

                case 'c':
                        cflag = !cflag;
                        continue;

                case 'r':
                        uselastre = 1;
                        continue;

                default:
                        ungetchar(c);
                        setcount();
                        newline();
                        if (uselastre)
                                savere(subre);
                        else
                                resre(subre);
                        return (gsubf);
                }
        }
}

comprhs(seof)
        int seof;
{
        register char *rp, *orp;
        register int c;
        char orhsbuf[RHSSIZE];

        rp = rhsbuf;
        CP(orhsbuf, rp);
        for (;;) {
                c = ex_getchar();
                if (c == seof)
                        break;
                switch (c) {

                case '\\':
                        c = ex_getchar();
                        if (c == EOF) {
                                ungetchar(c);
                                break;
                        }
                        if (value(MAGIC)) {
                                /*
                                 * When "magic", \& turns into a plain &,
                                 * and all other chars work fine quoted.
                                 */
                                if (c != '&')
                                        c |= QUOTE;
                                break;
                        }
magic:
                        if (c == '~') {
                                for (orp = orhsbuf; *orp; *rp++ = *orp++)
                                        if (rp >= &rhsbuf[RHSSIZE - 1])
                                                goto toobig;
                                continue;
                        }
                        c |= QUOTE;
                        break;

                case '\n':
                case EOF:
                        if (!(globp && globp[0])) {
                                ungetchar(c);
                                goto endrhs;
                        }

                case '~':
                case '&':
                        if (value(MAGIC))
                                goto magic;
                        break;
                }
                if (rp >= &rhsbuf[RHSSIZE - 1]) {
toobig:
                        *rp = 0;
                        error("Replacement pattern too long@- limit 256 characters");
                }
                *rp++ = c;
        }
endrhs:
        *rp++ = 0;
}

getsub()
{
        register char *p;

        if ((p = linebp) == 0)
                return (EOF);
        strcLIN(p);
        linebp = 0;
        return (0);
}

dosubcon(f, a)
        bool f;
        line *a;
{

        if (execute(f, a) == 0)
                return (0);
        if (confirmed(a)) {
                dosub();
                scount++;
        }
        return (1);
}

confirmed(a)
        line *a;
{
        register int c, ch;

        if (cflag == 0)
                return (1);
        pofix();
        pline(lineno(a));
        if (inopen)
                ex_putchar('\n' | QUOTE);
        c = column(loc1 - 1);
        ugo(c - 1 + (inopen ? 1 : 0), ' ');
        ugo(column(loc2 - 1) - c, '^');
        flush();
        ch = c = getkey();
again:
        if (c == '\r')
                c = '\n';
        if (inopen)
                ex_putchar(c), flush();
        if (c != '\n' && c != EOF) {
                c = getkey();
                goto again;
        }
        noteinp();
        return (ch == 'y');
}

getch()
{
        char c;

        if (read(2, &c, 1) != 1)
                return (EOF);
        return (c & TRIM);
}

ugo(cnt, with)
        int with;
        int cnt;
{

        if (cnt > 0)
                do
                        ex_putchar(with);
                while (--cnt > 0);
}

int     casecnt;
bool    destuc;

dosub()
{
        register char *lp, *sp, *rp;
        int c;

        lp = linebuf;
        sp = genbuf;
        rp = rhsbuf;
        while (lp < loc1)
                *sp++ = *lp++;
        casecnt = 0;
        while (c = *rp++) {
                /* ^V <return> from vi to split lines */
                if (c == '\r')
                        c = '\n';

                if (c & QUOTE)
                        switch (c & TRIM) {

                        case '&':
                                sp = place(sp, loc1, loc2);
                                if (sp == 0)
                                        goto ovflo;
                                continue;

                        case 'l':
                                casecnt = 1;
                                destuc = 0;
                                continue;

                        case 'L':
                                casecnt = LBSIZE;
                                destuc = 0;
                                continue;

                        case 'u':
                                casecnt = 1;
                                destuc = 1;
                                continue;

                        case 'U':
                                casecnt = LBSIZE;
                                destuc = 1;
                                continue;

                        case 'E':
                        case 'e':
                                casecnt = 0;
                                continue;
                        }
                if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
                        sp = place(sp, braslist[c - '1'], braelist[c - '1']);
                        if (sp == 0)
                                goto ovflo;
                        continue;
                }
                if (casecnt)
                        *sp++ = fixcase(c & TRIM);
                else
                        *sp++ = c & TRIM;
                if (sp >= &genbuf[LBSIZE])
ovflo:
                        error("Line overflow@in substitute");
        }
        lp = loc2;
        loc2 = sp + (linebuf - genbuf);
        while (*sp++ = *lp++)
                if (sp >= &genbuf[LBSIZE])
                        goto ovflo;
        strcLIN(genbuf);
}

fixcase(c)
        register int c;
{

        if (casecnt == 0)
                return (c);
        casecnt--;
        if (destuc) {
                if (islower(c))
                        c = toupper(c);
        } else
                if (isupper(c))
                        c = tolower(c);
        return (c);
}

char *
place(sp, l1, l2)
        register char *sp, *l1, *l2;
{

        while (l1 < l2) {
                *sp++ = fixcase(*l1++);
                if (sp >= &genbuf[LBSIZE])
                        return (0);
        }
        return (sp);
}

snote(total, lines)
        register int total, lines;
{

        if (!notable(total))
                return;
        ex_printf(mesg("%d subs|%d substitutions"), total);
        if (lines != 1 && lines != total)
                ex_printf(" on %d lines", lines);
        noonl();
        flush();
}

compile(eof, oknl)
        int eof;
        int oknl;
{
        register int c;
        register char *ep;
        char *lastep;
        char bracket[NBRA], *bracketp, *rhsp;
        int cclcnt;

        if (isalpha(eof) || isdigit(eof))
                error("Regular expressions cannot be delimited by letters or digits");
        ep = expbuf;
        c = ex_getchar();
        if (eof == '\\')
                switch (c) {

                case '/':
                case '?':
                        if (scanre.Expbuf[0] == 0)
error("No previous scan re|No previous scanning regular expression");
                        resre(scanre);
                        return (c);

                case '&':
                        if (subre.Expbuf[0] == 0)
error("No previous substitute re|No previous substitute regular expression");
                        resre(subre);
                        return (c);

                default:
                        error("Badly formed re|Regular expression \\ must be followed by / or ?");
                }
        if (c == eof || c == '\n' || c == EOF) {
                if (*ep == 0)
                        error("No previous re|No previous regular expression");
                if (c == '\n' && oknl == 0)
                        error("Missing closing delimiter@for regular expression");
                if (c != eof)
                        ungetchar(c);
                return (eof);
        }
        bracketp = bracket;
        nbra = 0;
        circfl = 0;
        if (c == '^') {
                c = ex_getchar();
                circfl++;
        }
        ungetchar(c);
        for (;;) {
                if (ep >= &expbuf[ESIZE - 2])
complex:
                        cerror("Re too complex|Regular expression too complicated");
                c = ex_getchar();
                if (c == eof || c == EOF) {
                        if (bracketp != bracket)
cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
                        *ep++ = CEOFC;
                        if (c == EOF)
                                ungetchar(c);
                        return (eof);
                }
                if (value(MAGIC)) {
                        if (c != '*' || ep == expbuf)
                                lastep = ep;
                } else
                        if (c != '\\' || peekchar() != '*' || ep == expbuf)
                                lastep = ep;
                switch (c) {

                case '\\':
                        c = ex_getchar();
                        switch (c) {

                        case '(':
                                if (nbra >= NBRA)
cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
                                *bracketp++ = nbra;
                                *ep++ = CBRA;
                                *ep++ = nbra++;
                                continue;

                        case ')':
                                if (bracketp <= bracket)
cerror("Extra \\)|More \\)'s than \\('s in regular expression");
                                *ep++ = CKET;
                                *ep++ = *--bracketp;
                                continue;

                        case '<':
                                *ep++ = CBRC;
                                continue;

                        case '>':
                                *ep++ = CLET;
                                continue;
                        }
                        if (value(MAGIC) == 0)
magic:
                        switch (c) {

                        case '.':
                                *ep++ = CDOT;
                                continue;

                        case '~':
                                rhsp = rhsbuf;
                                while (*rhsp) {
                                        if (*rhsp & QUOTE) {
                                                c = *rhsp & TRIM;
                                                if (c == '&')
error("Replacement pattern contains &@- cannot use in re");
                                                if (c >= '1' && c <= '9')
error("Replacement pattern contains \\d@- cannot use in re");
                                        }
                                        if (ep >= &expbuf[ESIZE-2])
                                                goto complex;
                                        *ep++ = CCHR;
                                        *ep++ = *rhsp++ & TRIM;
                                }
                                continue;

                        case '*':
                                if (ep == expbuf)
                                        break;
                                if (*lastep == CBRA || *lastep == CKET)
cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
                                if (*lastep == CCHR && (lastep[1] & QUOTE))
cerror("Illegal *|Can't * a \\n in regular expression");
                                *lastep |= STAR;
                                continue;

                        case '[':
                                *ep++ = CCL;
                                *ep++ = 0;
                                cclcnt = 1;
                                c = ex_getchar();
                                if (c == '^') {
                                        c = ex_getchar();
                                        ep[-2] = NCCL;
                                }
                                if (c == ']')
cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
                                while (c != ']') {
                                        if (c == '\\' && any(peekchar(), "]-^\\"))
                                                c = ex_getchar() | QUOTE;
                                        if (c == '\n' || c == EOF)
                                                cerror("Missing ]");
                                        *ep++ = c;
                                        cclcnt++;
                                        if (ep >= &expbuf[ESIZE])
                                                goto complex;
                                        c = ex_getchar();
                                }
                                lastep[1] = cclcnt;
                                continue;
                        }
                        if (c == EOF) {
                                ungetchar(EOF);
                                c = '\\';
                                goto defchar;
                        }
                        *ep++ = CCHR;
                        if (c == '\n')
cerror("No newlines in re's|Can't escape newlines into regular expressions");
/*
                        if (c < '1' || c > NBRA + '1') {
*/
                                *ep++ = c;
                                continue;
/*
                        }
                        c -= '1';
                        if (c >= nbra)
cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
                        *ep++ = c | QUOTE;
                        continue;
*/

                case '\n':
                        if (oknl) {
                                ungetchar(c);
                                *ep++ = CEOFC;
                                return (eof);
                        }
cerror("Badly formed re|Missing closing delimiter for regular expression");

                case '$':
                        if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
                                *ep++ = CDOL;
                                continue;
                        }
                        goto defchar;

                case '.':
                case '~':
                case '*':
                case '[':
                        if (value(MAGIC))
                                goto magic;
defchar:
                default:
                        *ep++ = CCHR;
                        *ep++ = c;
                        continue;
                }
        }
}

cerror(s)
        char *s;
{

        expbuf[0] = 0;
        error(s);
}

same(a, b)
        register int a, b;
{

        return (a == b || value(IGNORECASE) &&
           ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
}

char    *locs;

/* VARARGS1 */
execute(gf, addr)
        line *addr;
{
        register char *p1, *p2;
        register int c;

        if (gf) {
                if (circfl)
                        return (0);
                locs = p1 = loc2;
        } else {
                if (addr == zero)
                        return (0);
                p1 = linebuf;
                getline(*addr);
                locs = 0;
        }
        p2 = expbuf;
        if (circfl) {
                loc1 = p1;
                return (advance(p1, p2));
        }
        /* fast check for first character */
        if (*p2 == CCHR) {
                c = p2[1];
                do {
                        if (c != *p1 && (!value(IGNORECASE) ||
                           !((islower(c) && toupper(c) == *p1) ||
                           (islower(*p1) && toupper(*p1) == c))))
                                continue;
                        if (advance(p1, p2)) {
                                loc1 = p1;
                                return (1);
                        }
                } while (*p1++);
                return (0);
        }
        /* regular algorithm */
        do {
                if (advance(p1, p2)) {
                        loc1 = p1;
                        return (1);
                }
        } while (*p1++);
        return (0);
}

#define uletter(c)      (isalpha(c) || c == '_')

advance(lp, ep)
        register char *lp, *ep;
{
        register char *curlp;

        for (;;) switch (*ep++) {

        case CCHR:
/* useless
                if (*ep & QUOTE) {
                        c = *ep++ & TRIM;
                        sp = braslist[c];
                        sp1 = braelist[c];
                        while (sp < sp1) {
                                if (!same(*sp, *lp))
                                        return (0);
                                sp++, lp++;
                        }
                        continue;
                }
*/
                if (!same(*ep, *lp))
                        return (0);
                ep++, lp++;
                continue;

        case CDOT:
                if (*lp++)
                        continue;
                return (0);

        case CDOL:
                if (*lp == 0)
                        continue;
                return (0);

        case CEOFC:
                loc2 = lp;
                return (1);

        case CCL:
                if (cclass(ep, *lp++, 1)) {
                        ep += *ep;
                        continue;
                }
                return (0);

        case NCCL:
                if (cclass(ep, *lp++, 0)) {
                        ep += *ep;
                        continue;
                }
                return (0);

        case CBRA:
                braslist[*ep++] = lp;
                continue;

        case CKET:
                braelist[*ep++] = lp;
                continue;

        case CDOT|STAR:
                curlp = lp;
                while (*lp++)
                        continue;
                goto star;

        case CCHR|STAR:
                curlp = lp;
                while (same(*lp, *ep))
                        lp++;
                lp++;
                ep++;
                goto star;

        case CCL|STAR:
        case NCCL|STAR:
                curlp = lp;
                while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
                        continue;
                ep += *ep;
                goto star;
star:
                do {
                        lp--;
                        if (lp == locs)
                                break;
                        if (advance(lp, ep))
                                return (1);
                } while (lp > curlp);
                return (0);

        case CBRC:
                if (lp == linebuf)
                        continue;
                if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
                        continue;
                return (0);

        case CLET:
                if (!uletter(*lp) && !isdigit(*lp))
                        continue;
                return (0);

        default:
                error("Re internal error");
        }
}

cclass(set, c, af)
        register char *set;
        register int c;
        int af;
{
        register int n;

        if (c == 0)
                return (0);
        if (value(IGNORECASE) && isupper(c))
                c = tolower(c);
        n = *set++;
        while (--n)
                if (n > 2 && set[1] == '-') {
                        if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
                                return (af);
                        set += 3;
                        n -= 2;
                } else
                        if ((*set++ & TRIM) == c)
                                return (af);
        return (!af);
}