usr/src/usr.bin/ex/ex_re.c

/* Copyright (c) 1979 Regents of the University of California */
#include "ex.h"
#include "ex_re.h"

/*
 * Global, substitute and regular expressions.
 * Very similar to ed, with some re extensions and
 * confirmed substitute.
 */
global(k)
	bool k;
{
	register char *gp;
	register int c;
	register line *a1;
	char globuf[GBSIZE], *Cwas;
	int lines = lineDOL();
	int oinglobal = inglobal;
	char *oglobp = globp;

	Cwas = Command;
	/*
	 * States of inglobal:
	 *  0: ordinary - not in a global command.
	 *  1: text coming from some buffer, not tty.
	 *  2: like 1, but the source of the buffer is a global command.
	 * Hence you're only in a global command if inglobal==2. This
	 * strange sounding convention is historically derived from
	 * everybody simulating a global command.
	 */
	if (inglobal==2)
		error("Global within global@not allowed");
	markDOT();
	setall();
	nonzero();
	if (skipend())
		error("Global needs re|Missing regular expression for global");
	c = getchar();
	ignore(compile(c, 1));
	savere(scanre);
	gp = globuf;
	while ((c = getchar()) != '\n') {
		switch (c) {

		case EOF:
			c = '\n';
			goto brkwh;

		case '\\':
			c = getchar();
			switch (c) {

			case '\\':
				ungetchar(c);
				break;

			case '\n':
				break;

			default:
				*gp++ = '\\';
				break;
			}
			break;
		}
		*gp++ = c;
		if (gp >= &globuf[GBSIZE - 2])
			error("Global command too long");
	}
brkwh:
	ungetchar(c);
out:
	newline();
	*gp++ = c;
	*gp++ = 0;
	saveall();
	inglobal = 2;
	for (a1 = one; a1 <= dol; a1++) {
		*a1 &= ~01;
		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
			*a1 |= 01;
	}
	/* should use gdelete from ed to avoid n**2 here on g/.../d */
	if (inopen)
		inopen = -1;
	for (a1 = one; a1 <= dol; a1++) {
		if (*a1 & 01) {
			*a1 &= ~01;
			dot = a1;
			globp = globuf;
			commands(1, 1);
			a1 = zero;
		}
	}
	globp = oglobp;
	inglobal = oinglobal;
	endline = 1;
	Command = Cwas;
	netchHAD(lines);
	setlastchar(EOF);
	if (inopen) {
		ungetchar(EOF);
		inopen = 1;
	}
}

bool	xflag;
int	scount, slines, stotal;

substitute(c)
	int c;
{
	register line *addr;
	register int n;
	int gsubf;

	gsubf = compsub(c);
	if(FIXUNDO)
		save12(), undkind = UNDCHANGE;
	stotal = 0;
	slines = 0;
	for (addr = addr1; addr <= addr2; addr++) {
		scount = 0;
		if (dosubcon(0, addr) == 0)
			continue;
		if (gsubf) {
#ifdef notdef
			/*
			 * should check but loc2 is already munged.
			 * This needs a fancier check later.
			 */
			if (loc1 == loc2)
				error("substitution loop");
#endif
			while (*loc2)
				if (dosubcon(1, addr) == 0)
					break;
		}
		if (scount) {
			stotal += scount;
			slines++;
			putmark(addr);
			n = append(getsub, addr);
			addr += n;
			addr2 += n;
		}
	}
	if (stotal == 0 && !inglobal && !xflag)
		error("Fail|Substitute pattern match failed");
	snote(stotal, slines);
	return (stotal);
}

compsub(ch)
{
	register int seof, c, uselastre;
	static int gsubf;

	if (!value(EDCOMPATIBLE))
		gsubf = xflag = 0;
	uselastre = 0;
	switch (ch) {

	case 's':
		ignore(skipwh());
		seof = getchar();
		if (endcmd(seof) || any(seof, "gcr")) {
			ungetchar(seof);
			goto redo;
		}
		if (isalpha(seof) || isdigit(seof))
			error("Substitute needs re|Missing regular expression for substitute");
		seof = compile(seof, 1);
		uselastre = 1;
		comprhs(seof);
		gsubf = 0;
		xflag = 0;
		break;

	case '~':
		uselastre = 1;
		/* fall into ... */
	case '&':
	redo:
		if (re.Expbuf[0] == 0)
			error("No previous re|No previous regular expression");
		break;
	}
	for (;;) {
		c = getchar();
		switch (c) {

		case 'g':
			gsubf = !gsubf;
			continue;

		case 'c':
			xflag = !xflag;
			continue;

		case 'r':
			uselastre = 1;
			continue;

		default:
			ungetchar(c);
			setcount();
			newline();
			if (uselastre)
				savere(subre);
			else
				resre(subre);
			return (gsubf);
		}
	}
}

comprhs(seof)
	int seof;
{
	register char *rp, *orp;
	register int c;
	char orhsbuf[LBSIZE / 2];

	rp = rhsbuf;
	CP(orhsbuf, rp);
	for (;;) {
		c = getchar();
		if (c == seof)
			break;
		switch (c) {

		case '\\':
			c = getchar();
			if (c == EOF) {
				ungetchar(c);
				break;
			}
			if (value(MAGIC)) {
				/*
				 * When "magic", \& turns into a plain &,
				 * and all other chars work fine quoted.
				 */
				if (c != '&')
					c |= QUOTE;
				break;
			}
magic:
			if (c == '~') {
				for (orp = orhsbuf; *orp; *rp++ = *orp++)
					if (rp >= &rhsbuf[LBSIZE / 2 + 1])
						goto toobig;
				continue;
			}
			c |= QUOTE;
			break;

		case '\n':
		case EOF:
			if (!(globp && globp[0])) {
				ungetchar(c);
				goto endrhs;
			}

		case '~':
		case '&':
			if (value(MAGIC))
				goto magic;
			break;
		}
		if (rp >= &rhsbuf[LBSIZE / 2 - 1])
toobig:
			error("Replacement pattern too long@- limit 256 characters");
		*rp++ = c;
	}
endrhs:
	*rp++ = 0;
}

getsub()
{
	register char *p;

	if ((p = linebp) == 0)
		return (EOF);
	strcLIN(p);
	linebp = 0;
	return (0);
}

dosubcon(f, a)
	bool f;
	line *a;
{

	if (execute(f, a) == 0)
		return (0);
	if (confirmed(a)) {
		dosub();
		scount++;
	}
	return (1);
}

confirmed(a)
	line *a;
{
	register int c, ch;

	if (xflag == 0)
		return (1);
	pofix();
	pline(lineno(a));
	if (inopen)
		putchar('\n' | QUOTE);
	c = column(loc1 - 1);
	ugo(c - 1 + (inopen ? 1 : 0), ' ');
	ugo(column(loc2 - 1) - c, '^');
	flush();
	ch = c = getkey();
again:
	if (c == '\r')
		c = '\n';
	if (inopen)
		putchar(c), flush();
	if (c != '\n' && c != EOF) {
		c = getkey();
		goto again;
	}
	noteinp();
	return (ch == 'y');
}

getch()
{
	char c;

	if (read(2, &c, 1) != 1)
		return (EOF);
	return (c & TRIM);
}

ugo(cnt, with)
	int with;
	int cnt;
{

	if (cnt > 0)
		do
			putchar(with);
		while (--cnt > 0);
}

int	casecnt;
bool	destuc;

dosub()
{
	register char *lp, *sp, *rp;
	int c;

	lp = linebuf;
	sp = genbuf;
	rp = rhsbuf;
	while (lp < loc1)
		*sp++ = *lp++;
	casecnt = 0;
	while (c = *rp++) {
		if (c & QUOTE)
			switch (c & TRIM) {

			case '&':
				sp = place(sp, loc1, loc2);
				if (sp == 0)
					goto ovflo;
				continue;

			case 'l':
				casecnt = 1;
				destuc = 0;
				continue;

			case 'L':
				casecnt = LBSIZE;
				destuc = 0;
				continue;

			case 'u':
				casecnt = 1;
				destuc = 1;
				continue;

			case 'U':
				casecnt = LBSIZE;
				destuc = 1;
				continue;

			case 'E':
			case 'e':
				casecnt = 0;
				continue;
			}
		if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
			sp = place(sp, braslist[c - '1'], braelist[c - '1']);
			if (sp == 0)
				goto ovflo;
			continue;
		}
		if (casecnt)
			*sp++ = fixcase(c & TRIM);
		else
			*sp++ = c & TRIM;
		if (sp >= &genbuf[LBSIZE])
ovflo:
			error("Line overflow@in substitute");
	}
	lp = loc2;
	loc2 = sp + (linebuf - genbuf);
	while (*sp++ = *lp++)
		if (sp >= &genbuf[LBSIZE])
			goto ovflo;
	strcLIN(genbuf);
}

fixcase(c)
	register int c;
{

	if (casecnt == 0)
		return (c);
	casecnt--;
	if (destuc) {
		if (islower(c))
			c = toupper(c);
	} else
		if (isupper(c))
			c = tolower(c);
	return (c);
}

char *
place(sp, l1, l2)
	register char *sp, *l1, *l2;
{

	while (l1 < l2) {
		*sp++ = fixcase(*l1++);
		if (sp >= &genbuf[LBSIZE])
			return (0);
	}
	return (sp);
}

snote(total, lines)
	register int total, lines;
{

	if (!notable(total))
		return;
	printf(mesg("%d subs|%d substitutions"), total);
	if (lines != 1 && lines != total)
		printf(" on %d lines", lines);
	noonl();
	flush();
}

compile(eof, oknl)
	int eof;
	int oknl;
{
	register int c;
	register char *ep;
	char *lastep;
	char bracket[NBRA], *bracketp, *rhsp;
	int cclcnt;

	if (isalpha(eof) || isdigit(eof))
		error("Regular expressions cannot be delimited by letters or digits");
	ep = expbuf;
	c = getchar();
	if (eof == '\\')
		switch (c) {

		case '/':
		case '?':
			if (scanre.Expbuf[0] == 0)
error("No previous scan re|No previous scanning regular expression");
			resre(scanre);
			return (c);

		case '&':
			if (subre.Expbuf[0] == 0)
error("No previous substitute re|No previous substitute regular expression");
			resre(subre);
			return (c);

		default:
			error("Badly formed re|Regular expression \\ must be followed by / or ?");
		}
	if (c == eof || c == '\n' || c == EOF) {
		if (*ep == 0)
			error("No previous re|No previous regular expression");
		if (c == '\n' && oknl == 0)
			error("Missing closing delimiter@for regular expression");
		if (c != eof)
			ungetchar(c);
		return (eof);
	}
	bracketp = bracket;
	nbra = 0;
	circfl = 0;
	if (c == '^') {
		c = getchar();
		circfl++;
	}
	ungetchar(c);
	for (;;) {
		if (ep >= &expbuf[ESIZE - 2])
complex:
			cerror("Re too complex|Regular expression too complicated");
		c = getchar();
		if (c == eof || c == EOF) {
			if (bracketp != bracket)
cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
			*ep++ = CEOF;
			if (c == EOF)
				ungetchar(c);
			return (eof);
		}
		if (value(MAGIC)) {
			if (c != '*' || ep == expbuf)
				lastep = ep;
		} else
			if (c != '\\' || peekchar() != '*' || ep == expbuf)
				lastep = ep;
		switch (c) {

		case '\\':
			c = getchar();
			switch (c) {

			case '(':
				if (nbra >= NBRA)
cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
				*bracketp++ = nbra;
				*ep++ = CBRA;
				*ep++ = nbra++;
				continue;

			case ')':
				if (bracketp <= bracket)
cerror("Extra \\)|More \\)'s than \\('s in regular expression");
				*ep++ = CKET;
				*ep++ = *--bracketp;
				continue;

			case '<':
				*ep++ = CBRC;
				continue;

			case '>':
				*ep++ = CLET;
				continue;
			}
			if (value(MAGIC) == 0)
magic:
			switch (c) {

			case '.':
				*ep++ = CDOT;
				continue;

			case '~':
				rhsp = rhsbuf;
				while (*rhsp) {
					if (*rhsp & QUOTE) {
						c = *rhsp & TRIM;
						if (c == '&')
error("Replacement pattern contains &@- cannot use in re");
						if (c >= '1' && c <= '9')
error("Replacement pattern contains \\d@- cannot use in re");
					}
					if (ep >= &expbuf[ESIZE-2])
						goto complex;
					*ep++ = CCHR;
					*ep++ = *rhsp++ & TRIM;
				}
				continue;

			case '*':
				if (ep == expbuf)
					break;
				if (*lastep == CBRA || *lastep == CKET)
cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
				if (*lastep == CCHR && (lastep[1] & QUOTE))
cerror("Illegal *|Can't * a \\n in regular expression");
				*lastep |= STAR;
				continue;

			case '[':
				*ep++ = CCL;
				*ep++ = 0;
				cclcnt = 1;
				c = getchar();
				if (c == '^') {
					c = getchar();
					ep[-2] = NCCL;
				}
				if (c == ']')
cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
				while (c != ']') {
					if (c == '\\' && any(peekchar(), "]-^\\"))
						c = getchar() | QUOTE;
					if (c == '\n' || c == EOF)
						cerror("Missing ]");
					*ep++ = c;
					cclcnt++;
					if (ep >= &expbuf[ESIZE])
						goto complex;
					c = getchar();
				}
				lastep[1] = cclcnt;
				continue;
			}
			if (c == EOF) {
				ungetchar(EOF);
				c = '\\';
				goto defchar;
			}
			*ep++ = CCHR;
			if (c == '\n')
cerror("No newlines in re's|Can't escape newlines into regular expressions");
/*
			if (c < '1' || c > NBRA + '1') {
*/
				*ep++ = c;
				continue;
/*
			}
			c -= '1';
			if (c >= nbra)
cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
			*ep++ = c | QUOTE;
			continue;
*/

		case '\n':
			if (oknl) {
				ungetchar(c);
				*ep++ = CEOF;
				return (eof);
			}
cerror("Badly formed re|Missing closing delimiter for regular expression");

		case '$':
			if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
				*ep++ = CDOL;
				continue;
			}
			goto defchar;

		case '.':
		case '~':
		case '*':
		case '[':
			if (value(MAGIC))
				goto magic;
defchar:
		default:
			*ep++ = CCHR;
			*ep++ = c;
			continue;
		}
	}
}

cerror(s)
	char *s;
{

	expbuf[0] = 0;
	error(s);
}

same(a, b)
	register int a, b;
{

	return (a == b || value(IGNORECASE) &&
	   ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
}

char	*locs;

execute(gf, addr)
	line *addr;
{
	register char *p1, *p2;
	register int c;

	if (gf) {
		if (circfl)
			return (0);
#ifdef notdef
		if (loc1 == loc2)
			loc2++;
#endif
		locs = p1 = loc2;
	} else {
		if (addr == zero)
			return (0);
		p1 = linebuf;
		getline(*addr);
		locs = 0;
	}
	p2 = expbuf;
	if (circfl) {
		loc1 = p1;
		return (advance(p1, p2));
	}
	/* fast check for first character */
	if (*p2 == CCHR) {
		c = p2[1];
		do {
			if (c != *p1 && (!value(IGNORECASE) ||
			   !((islower(c) && toupper(c) == *p1) ||
			   (islower(*p1) && toupper(*p1) == c))))
				continue;
			if (advance(p1, p2)) {
				loc1 = p1;
				return (1);
			}
		} while (*p1++);
		return (0);
	}
	/* regular algorithm */
	do {
		if (advance(p1, p2)) {
			loc1 = p1;
			return (1);
		}
	} while (*p1++);
	return (0);
}

#define	uletter(c)	(isalpha(c) || c == '_')

advance(lp, ep)
	register char *lp, *ep;
{
	register char *curlp;
	char *sp, *sp1;
	int c;

	for (;;) switch (*ep++) {

	case CCHR:
/* useless
		if (*ep & QUOTE) {
			c = *ep++ & TRIM;
			sp = braslist[c];
			sp1 = braelist[c];
			while (sp < sp1) {
				if (!same(*sp, *lp))
					return (0);
				sp++, lp++;
			}
			continue;
		}
*/
		if (!same(*ep, *lp))
			return (0);
		ep++, lp++;
		continue;

	case CDOT:
		if (*lp++)
			continue;
		return (0);

	case CDOL:
		if (*lp == 0)
			continue;
		return (0);

	case CEOF:
		loc2 = lp;
		return (1);

	case CCL:
		if (cclass(ep, *lp++, 1)) {
			ep += *ep;
			continue;
		}
		return (0);

	case NCCL:
		if (cclass(ep, *lp++, 0)) {
			ep += *ep;
			continue;
		}
		return (0);

	case CBRA:
		braslist[*ep++] = lp;
		continue;

	case CKET:
		braelist[*ep++] = lp;
		continue;

	case CDOT|STAR:
		curlp = lp;
		while (*lp++)
			continue;
		goto star;

	case CCHR|STAR:
		curlp = lp;
		while (same(*lp, *ep))
			lp++;
		lp++;
		ep++;
		goto star;

	case CCL|STAR:
	case NCCL|STAR:
		curlp = lp;
		while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
			continue;
		ep += *ep;
		goto star;
star:
		do {
			lp--;
			if (lp == locs)
				break;
			if (advance(lp, ep))
				return (1);
		} while (lp > curlp);
		return (0);

	case CBRC:
		if (lp == expbuf)
			continue;
		if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
			continue;
		return (0);

	case CLET:
		if (!uletter(*lp) && !isdigit(*lp))
			continue;
		return (0);

	default:
		error("Re internal error");
	}
}

cclass(set, c, af)
	register char *set;
	register int c;
	int af;
{
	register int n;

	if (c == 0)
		return (0);
	if (value(IGNORECASE) && isupper(c))
		c = tolower(c);
	n = *set++;
	while (--n)
		if (n > 2 && set[1] == '-') {
			if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
				return (af);
			set += 3;
			n -= 2;
		} else
			if ((*set++ & TRIM) == c)
				return (af);
	return (!af);
}
Commit	Line	Data
	1	/* Copyright (c) 1979 Regents of the University of California */
	2	#include "ex.h"
	3	#include "ex_re.h"
	4
	5	/*
	6	* Global, substitute and regular expressions.
	7	* Very similar to ed, with some re extensions and
	8	* confirmed substitute.
	9	*/
	10	global(k)
	11	bool k;
	12	{
	13	register char *gp;
	14	register int c;
	15	register line *a1;
	16	char globuf[GBSIZE], *Cwas;
	17	int lines = lineDOL();
	18	int oinglobal = inglobal;
	19	char *oglobp = globp;
	20
	21	Cwas = Command;
	22	/*
	23	* States of inglobal:
	24	* 0: ordinary - not in a global command.
	25	* 1: text coming from some buffer, not tty.
	26	* 2: like 1, but the source of the buffer is a global command.
	27	* Hence you're only in a global command if inglobal==2. This
	28	* strange sounding convention is historically derived from
	29	* everybody simulating a global command.
	30	*/
	31	if (inglobal==2)
	32	error("Global within global@not allowed");
	33	markDOT();
	34	setall();
	35	nonzero();
	36	if (skipend())
	37	error("Global needs re\|Missing regular expression for global");
	38	c = getchar();
	39	ignore(compile(c, 1));
	40	savere(scanre);
	41	gp = globuf;
	42	while ((c = getchar()) != '\n') {
	43	switch (c) {
	44
	45	case EOF:
	46	c = '\n';
	47	goto brkwh;
	48
	49	case '\\':
	50	c = getchar();
	51	switch (c) {
	52
	53	case '\\':
	54	ungetchar(c);
	55	break;
	56
	57	case '\n':
	58	break;
	59
	60	default:
	61	*gp++ = '\\';
	62	break;
	63	}
	64	break;
	65	}
	66	*gp++ = c;
	67	if (gp >= &globuf[GBSIZE - 2])
	68	error("Global command too long");
	69	}
	70	brkwh:
	71	ungetchar(c);
	72	out:
	73	newline();
	74	*gp++ = c;
	75	*gp++ = 0;
	76	saveall();
	77	inglobal = 2;
	78	for (a1 = one; a1 <= dol; a1++) {
	79	*a1 &= ~01;
	80	if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
	81	*a1 \|= 01;
	82	}
	83	/* should use gdelete from ed to avoid n*2 here on g/.../d /
	84	if (inopen)
	85	inopen = -1;
	86	for (a1 = one; a1 <= dol; a1++) {
	87	if (*a1 & 01) {
	88	*a1 &= ~01;
	89	dot = a1;
	90	globp = globuf;
	91	commands(1, 1);
	92	a1 = zero;
	93	}
	94	}
	95	globp = oglobp;
	96	inglobal = oinglobal;
	97	endline = 1;
	98	Command = Cwas;
	99	netchHAD(lines);
	100	setlastchar(EOF);
	101	if (inopen) {
	102	ungetchar(EOF);
	103	inopen = 1;
	104	}
	105	}
	106
	107	bool xflag;
	108	int scount, slines, stotal;
	109
	110	substitute(c)
	111	int c;
	112	{
	113	register line *addr;
	114	register int n;
	115	int gsubf;
	116
	117	gsubf = compsub(c);
	118	if(FIXUNDO)
	119	save12(), undkind = UNDCHANGE;
	120	stotal = 0;
	121	slines = 0;
	122	for (addr = addr1; addr <= addr2; addr++) {
	123	scount = 0;
	124	if (dosubcon(0, addr) == 0)
	125	continue;
	126	if (gsubf) {
	127	#ifdef notdef
	128	/*
	129	* should check but loc2 is already munged.
	130	* This needs a fancier check later.
	131	*/
	132	if (loc1 == loc2)
	133	error("substitution loop");
	134	#endif
	135	while (*loc2)
	136	if (dosubcon(1, addr) == 0)
	137	break;
	138	}
	139	if (scount) {
	140	stotal += scount;
	141	slines++;
	142	putmark(addr);
	143	n = append(getsub, addr);
	144	addr += n;
	145	addr2 += n;
	146	}
	147	}
	148	if (stotal == 0 && !inglobal && !xflag)
	149	error("Fail\|Substitute pattern match failed");
	150	snote(stotal, slines);
	151	return (stotal);
	152	}
	153
	154	compsub(ch)
	155	{
	156	register int seof, c, uselastre;
	157	static int gsubf;
	158
	159	if (!value(EDCOMPATIBLE))
	160	gsubf = xflag = 0;
	161	uselastre = 0;
	162	switch (ch) {
	163
	164	case 's':
	165	ignore(skipwh());
	166	seof = getchar();
	167	if (endcmd(seof) \|\| any(seof, "gcr")) {
	168	ungetchar(seof);
	169	goto redo;
	170	}
	171	if (isalpha(seof) \|\| isdigit(seof))
	172	error("Substitute needs re\|Missing regular expression for substitute");
	173	seof = compile(seof, 1);
	174	uselastre = 1;
	175	comprhs(seof);
	176	gsubf = 0;
	177	xflag = 0;
	178	break;
	179
	180	case '~':
	181	uselastre = 1;
	182	/* fall into ... */
	183	case '&':
	184	redo:
	185	if (re.Expbuf[0] == 0)
	186	error("No previous re\|No previous regular expression");
	187	break;
	188	}
	189	for (;;) {
	190	c = getchar();
	191	switch (c) {
	192
	193	case 'g':
	194	gsubf = !gsubf;
	195	continue;
	196
	197	case 'c':
	198	xflag = !xflag;
	199	continue;
	200
	201	case 'r':
	202	uselastre = 1;
	203	continue;
	204
	205	default:
	206	ungetchar(c);
	207	setcount();
	208	newline();
	209	if (uselastre)
	210	savere(subre);
	211	else
	212	resre(subre);
	213	return (gsubf);
	214	}
	215	}
	216	}
	217
	218	comprhs(seof)
	219	int seof;
	220	{
	221	register char rp, orp;
	222	register int c;
	223	char orhsbuf[LBSIZE / 2];
	224
	225	rp = rhsbuf;
	226	CP(orhsbuf, rp);
	227	for (;;) {
	228	c = getchar();
	229	if (c == seof)
	230	break;
	231	switch (c) {
	232
	233	case '\\':
	234	c = getchar();
	235	if (c == EOF) {
	236	ungetchar(c);
	237	break;
	238	}
	239	if (value(MAGIC)) {
	240	/*
	241	* When "magic", \& turns into a plain &,
	242	* and all other chars work fine quoted.
	243	*/
	244	if (c != '&')
	245	c \|= QUOTE;
	246	break;
	247	}
	248	magic:
	249	if (c == '~') {
	250	for (orp = orhsbuf; orp; rp++ = *orp++)
	251	if (rp >= &rhsbuf[LBSIZE / 2 + 1])
	252	goto toobig;
	253	continue;
	254	}
	255	c \|= QUOTE;
	256	break;
	257
	258	case '\n':
	259	case EOF:
	260	if (!(globp && globp[0])) {
	261	ungetchar(c);
	262	goto endrhs;
	263	}
	264
	265	case '~':
	266	case '&':
	267	if (value(MAGIC))
	268	goto magic;
	269	break;
	270	}
	271	if (rp >= &rhsbuf[LBSIZE / 2 - 1])
	272	toobig:
	273	error("Replacement pattern too long@- limit 256 characters");
	274	*rp++ = c;
	275	}
	276	endrhs:
	277	*rp++ = 0;
	278	}
	279
	280	getsub()
	281	{
	282	register char *p;
	283
	284	if ((p = linebp) == 0)
	285	return (EOF);
	286	strcLIN(p);
	287	linebp = 0;
	288	return (0);
	289	}
	290
	291	dosubcon(f, a)
	292	bool f;
	293	line *a;
	294	{
	295
	296	if (execute(f, a) == 0)
	297	return (0);
	298	if (confirmed(a)) {
	299	dosub();
	300	scount++;
	301	}
	302	return (1);
	303	}
	304
	305	confirmed(a)
	306	line *a;
	307	{
	308	register int c, ch;
	309
	310	if (xflag == 0)
	311	return (1);
	312	pofix();
	313	pline(lineno(a));
	314	if (inopen)
	315	putchar('\n' \| QUOTE);
	316	c = column(loc1 - 1);
	317	ugo(c - 1 + (inopen ? 1 : 0), ' ');
	318	ugo(column(loc2 - 1) - c, '^');
	319	flush();
	320	ch = c = getkey();
	321	again:
	322	if (c == '\r')
	323	c = '\n';
	324	if (inopen)
	325	putchar(c), flush();
	326	if (c != '\n' && c != EOF) {
	327	c = getkey();
	328	goto again;
	329	}
	330	noteinp();
	331	return (ch == 'y');
	332	}
	333
	334	getch()
	335	{
	336	char c;
	337
	338	if (read(2, &c, 1) != 1)
	339	return (EOF);
	340	return (c & TRIM);
	341	}
	342
	343	ugo(cnt, with)
	344	int with;
	345	int cnt;
	346	{
	347
	348	if (cnt > 0)
	349	do
	350	putchar(with);
	351	while (--cnt > 0);
	352	}
	353
	354	int casecnt;
	355	bool destuc;
	356
	357	dosub()
	358	{
	359	register char lp, sp, *rp;
	360	int c;
	361
	362	lp = linebuf;
	363	sp = genbuf;
	364	rp = rhsbuf;
	365	while (lp < loc1)
	366	sp++ = lp++;
	367	casecnt = 0;
	368	while (c = *rp++) {
	369	if (c & QUOTE)
	370	switch (c & TRIM) {
	371
	372	case '&':
	373	sp = place(sp, loc1, loc2);
	374	if (sp == 0)
	375	goto ovflo;
	376	continue;
	377
	378	case 'l':
	379	casecnt = 1;
	380	destuc = 0;
	381	continue;
	382
	383	case 'L':
	384	casecnt = LBSIZE;
	385	destuc = 0;
	386	continue;
	387
	388	case 'u':
	389	casecnt = 1;
	390	destuc = 1;
	391	continue;
	392
	393	case 'U':
	394	casecnt = LBSIZE;
	395	destuc = 1;
	396	continue;
	397
	398	case 'E':
	399	case 'e':
	400	casecnt = 0;
	401	continue;
	402	}
	403	if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
	404	sp = place(sp, braslist[c - '1'], braelist[c - '1']);
	405	if (sp == 0)
	406	goto ovflo;
	407	continue;
	408	}
	409	if (casecnt)
	410	*sp++ = fixcase(c & TRIM);
	411	else
	412	*sp++ = c & TRIM;
	413	if (sp >= &genbuf[LBSIZE])
	414	ovflo:
	415	error("Line overflow@in substitute");
	416	}
	417	lp = loc2;
	418	loc2 = sp + (linebuf - genbuf);
	419	while (sp++ = lp++)
	420	if (sp >= &genbuf[LBSIZE])
	421	goto ovflo;
	422	strcLIN(genbuf);
	423	}
	424
	425	fixcase(c)
	426	register int c;
	427	{
	428
	429	if (casecnt == 0)
	430	return (c);
	431	casecnt--;
	432	if (destuc) {
	433	if (islower(c))
	434	c = toupper(c);
	435	} else
	436	if (isupper(c))
	437	c = tolower(c);
	438	return (c);
	439	}
	440
	441	char *
	442	place(sp, l1, l2)
	443	register char sp, l1, *l2;
	444	{
	445
	446	while (l1 < l2) {
	447	sp++ = fixcase(l1++);
	448	if (sp >= &genbuf[LBSIZE])
	449	return (0);
	450	}
	451	return (sp);
	452	}
	453
	454	snote(total, lines)
	455	register int total, lines;
	456	{
	457
	458	if (!notable(total))
	459	return;
	460	printf(mesg("%d subs\|%d substitutions"), total);
	461	if (lines != 1 && lines != total)
	462	printf(" on %d lines", lines);
	463	noonl();
	464	flush();
	465	}
	466
	467	compile(eof, oknl)
	468	int eof;
	469	int oknl;
	470	{
	471	register int c;
	472	register char *ep;
	473	char *lastep;
	474	char bracket[NBRA], bracketp, rhsp;
	475	int cclcnt;
	476
	477	if (isalpha(eof) \|\| isdigit(eof))
	478	error("Regular expressions cannot be delimited by letters or digits");
	479	ep = expbuf;
	480	c = getchar();
	481	if (eof == '\\')
	482	switch (c) {
	483
	484	case '/':
	485	case '?':
	486	if (scanre.Expbuf[0] == 0)
	487	error("No previous scan re\|No previous scanning regular expression");
	488	resre(scanre);
	489	return (c);
	490
	491	case '&':
	492	if (subre.Expbuf[0] == 0)
	493	error("No previous substitute re\|No previous substitute regular expression");
	494	resre(subre);
	495	return (c);
	496
	497	default:
	498	error("Badly formed re\|Regular expression \\ must be followed by / or ?");
	499	}
	500	if (c == eof \|\| c == '\n' \|\| c == EOF) {
	501	if (*ep == 0)
	502	error("No previous re\|No previous regular expression");
	503	if (c == '\n' && oknl == 0)
	504	error("Missing closing delimiter@for regular expression");
	505	if (c != eof)
	506	ungetchar(c);
	507	return (eof);
	508	}
	509	bracketp = bracket;
	510	nbra = 0;
	511	circfl = 0;
	512	if (c == '^') {
	513	c = getchar();
	514	circfl++;
	515	}
	516	ungetchar(c);
	517	for (;;) {
	518	if (ep >= &expbuf[ESIZE - 2])
	519	complex:
	520	cerror("Re too complex\|Regular expression too complicated");
	521	c = getchar();
	522	if (c == eof \|\| c == EOF) {
	523	if (bracketp != bracket)
	524	cerror("Unmatched \\(\|More \\('s than \\)'s in regular expression");
	525	*ep++ = CEOF;
	526	if (c == EOF)
	527	ungetchar(c);
	528	return (eof);
	529	}
	530	if (value(MAGIC)) {
	531	if (c != '*' \|\| ep == expbuf)
	532	lastep = ep;
	533	} else
	534	if (c != '\\' \|\| peekchar() != '*' \|\| ep == expbuf)
	535	lastep = ep;
	536	switch (c) {
	537
	538	case '\\':
	539	c = getchar();
	540	switch (c) {
	541
	542	case '(':
	543	if (nbra >= NBRA)
	544	cerror("Awash in \\('s!\|Too many \\('d subexressions in a regular expression");
	545	*bracketp++ = nbra;
	546	*ep++ = CBRA;
	547	*ep++ = nbra++;
	548	continue;
	549
	550	case ')':
	551	if (bracketp <= bracket)
	552	cerror("Extra \\)\|More \\)'s than \\('s in regular expression");
	553	*ep++ = CKET;
	554	ep++ = --bracketp;
	555	continue;
	556
	557	case '<':
	558	*ep++ = CBRC;
	559	continue;
	560
	561	case '>':
	562	*ep++ = CLET;
	563	continue;
	564	}
	565	if (value(MAGIC) == 0)
	566	magic:
	567	switch (c) {
	568
	569	case '.':
	570	*ep++ = CDOT;
	571	continue;
	572
	573	case '~':
	574	rhsp = rhsbuf;
	575	while (*rhsp) {
	576	if (*rhsp & QUOTE) {
	577	c = *rhsp & TRIM;
	578	if (c == '&')
	579	error("Replacement pattern contains &@- cannot use in re");
	580	if (c >= '1' && c <= '9')
	581	error("Replacement pattern contains \\d@- cannot use in re");
	582	}
	583	if (ep >= &expbuf[ESIZE-2])
	584	goto complex;
	585	*ep++ = CCHR;
	586	ep++ = rhsp++ & TRIM;
	587	}
	588	continue;
	589
	590	case '*':
	591	if (ep == expbuf)
	592	break;
	593	if (lastep == CBRA \|\| lastep == CKET)
	594	cerror("Illegal \|Can't a \\( ... \\) in regular expression");
	595	if (*lastep == CCHR && (lastep[1] & QUOTE))
	596	cerror("Illegal \|Can't a \\n in regular expression");
	597	*lastep \|= STAR;
	598	continue;
	599
	600	case '[':
	601	*ep++ = CCL;
	602	*ep++ = 0;
	603	cclcnt = 1;
	604	c = getchar();
	605	if (c == '^') {
	606	c = getchar();
	607	ep[-2] = NCCL;
	608	}
	609	if (c == ']')
	610	cerror("Bad character class\|Empty character class '[]' or '[^]' cannot match");
	611	while (c != ']') {
	612	if (c == '\\' && any(peekchar(), "]-^\\"))
	613	c = getchar() \| QUOTE;
	614	if (c == '\n' \|\| c == EOF)
	615	cerror("Missing ]");
	616	*ep++ = c;
	617	cclcnt++;
	618	if (ep >= &expbuf[ESIZE])
	619	goto complex;
	620	c = getchar();
	621	}
	622	lastep[1] = cclcnt;
	623	continue;
	624	}
	625	if (c == EOF) {
	626	ungetchar(EOF);
	627	c = '\\';
	628	goto defchar;
	629	}
	630	*ep++ = CCHR;
	631	if (c == '\n')
	632	cerror("No newlines in re's\|Can't escape newlines into regular expressions");
	633	/*
	634	if (c < '1' \|\| c > NBRA + '1') {
	635	*/
	636	*ep++ = c;
	637	continue;
	638	/*
	639	}
	640	c -= '1';
	641	if (c >= nbra)
	642	cerror("Bad \\n\|\\n in regular expression with n greater than the number of \\('s");
	643	*ep++ = c \| QUOTE;
	644	continue;
	645	*/
	646
	647	case '\n':
	648	if (oknl) {
	649	ungetchar(c);
	650	*ep++ = CEOF;
	651	return (eof);
	652	}
	653	cerror("Badly formed re\|Missing closing delimiter for regular expression");
	654
	655	case '$':
	656	if (peekchar() == eof \|\| peekchar() == EOF \|\| oknl && peekchar() == '\n') {
	657	*ep++ = CDOL;
	658	continue;
	659	}
	660	goto defchar;
	661
	662	case '.':
	663	case '~':
	664	case '*':
	665	case '[':
	666	if (value(MAGIC))
	667	goto magic;
	668	defchar:
	669	default:
	670	*ep++ = CCHR;
	671	*ep++ = c;
	672	continue;
	673	}
	674	}
	675	}
	676
	677	cerror(s)
	678	char *s;
	679	{
	680
	681	expbuf[0] = 0;
	682	error(s);
	683	}
	684
	685	same(a, b)
	686	register int a, b;
	687	{
	688
	689	return (a == b \|\| value(IGNORECASE) &&
	690	((islower(a) && toupper(a) == b) \|\| (islower(b) && toupper(b) == a)));
	691	}
	692
	693	char *locs;
	694
	695	execute(gf, addr)
	696	line *addr;
	697	{
	698	register char p1, p2;
	699	register int c;
	700
	701	if (gf) {
	702	if (circfl)
	703	return (0);
	704	#ifdef notdef
	705	if (loc1 == loc2)
	706	loc2++;
	707	#endif
	708	locs = p1 = loc2;
	709	} else {
	710	if (addr == zero)
	711	return (0);
	712	p1 = linebuf;
	713	getline(*addr);
	714	locs = 0;
	715	}
	716	p2 = expbuf;
	717	if (circfl) {
	718	loc1 = p1;
	719	return (advance(p1, p2));
	720	}
	721	/* fast check for first character */
	722	if (*p2 == CCHR) {
	723	c = p2[1];
	724	do {
	725	if (c != *p1 && (!value(IGNORECASE) \|\|
	726	!((islower(c) && toupper(c) == *p1) \|\|
	727	(islower(p1) && toupper(p1) == c))))
	728	continue;
	729	if (advance(p1, p2)) {
	730	loc1 = p1;
	731	return (1);
	732	}
	733	} while (*p1++);
	734	return (0);
	735	}
	736	/* regular algorithm */
	737	do {
	738	if (advance(p1, p2)) {
	739	loc1 = p1;
	740	return (1);
	741	}
	742	} while (*p1++);
	743	return (0);
	744	}
	745
	746	#define uletter(c) (isalpha(c) \|\| c == '_')
	747
	748	advance(lp, ep)
	749	register char lp, ep;
	750	{
	751	register char *curlp;
	752	char sp, sp1;
	753	int c;
	754
	755	for (;;) switch (*ep++) {
	756
	757	case CCHR:
	758	/* useless
	759	if (*ep & QUOTE) {
	760	c = *ep++ & TRIM;
	761	sp = braslist[c];
	762	sp1 = braelist[c];
	763	while (sp < sp1) {
	764	if (!same(sp, lp))
	765	return (0);
	766	sp++, lp++;
	767	}
	768	continue;
	769	}
	770	*/
	771	if (!same(ep, lp))
	772	return (0);
	773	ep++, lp++;
	774	continue;
	775
	776	case CDOT:
	777	if (*lp++)
	778	continue;
	779	return (0);
	780
	781	case CDOL:
	782	if (*lp == 0)
	783	continue;
	784	return (0);
	785
	786	case CEOF:
	787	loc2 = lp;
	788	return (1);
	789
	790	case CCL:
	791	if (cclass(ep, *lp++, 1)) {
	792	ep += *ep;
	793	continue;
	794	}
	795	return (0);
	796
	797	case NCCL:
	798	if (cclass(ep, *lp++, 0)) {
	799	ep += *ep;
	800	continue;
	801	}
	802	return (0);
	803
	804	case CBRA:
	805	braslist[*ep++] = lp;
	806	continue;
	807
	808	case CKET:
	809	braelist[*ep++] = lp;
	810	continue;
	811
	812	case CDOT\|STAR:
	813	curlp = lp;
	814	while (*lp++)
	815	continue;
	816	goto star;
	817
	818	case CCHR\|STAR:
	819	curlp = lp;
	820	while (same(lp, ep))
	821	lp++;
	822	lp++;
	823	ep++;
	824	goto star;
	825
	826	case CCL\|STAR:
	827	case NCCL\|STAR:
	828	curlp = lp;
	829	while (cclass(ep, *lp++, ep[-1] == (CCL\|STAR)))
	830	continue;
	831	ep += *ep;
	832	goto star;
	833	star:
	834	do {
	835	lp--;
	836	if (lp == locs)
	837	break;
	838	if (advance(lp, ep))
	839	return (1);
	840	} while (lp > curlp);
	841	return (0);
	842
	843	case CBRC:
	844	if (lp == expbuf)
	845	continue;
	846	if ((isdigit(lp) \|\| uletter(lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
	847	continue;
	848	return (0);
	849
	850	case CLET:
	851	if (!uletter(lp) && !isdigit(lp))
	852	continue;
	853	return (0);
	854
	855	default:
	856	error("Re internal error");
	857	}
	858	}
	859
	860	cclass(set, c, af)
	861	register char *set;
	862	register int c;
	863	int af;
	864	{
	865	register int n;
	866
	867	if (c == 0)
	868	return (0);
	869	if (value(IGNORECASE) && isupper(c))
	870	c = tolower(c);
	871	n = *set++;
	872	while (--n)
	873	if (n > 2 && set[1] == '-') {
	874	if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
	875	return (af);
	876	set += 3;
	877	n -= 2;
	878	} else
	879	if ((*set++ & TRIM) == c)
	880	return (af);
	881	return (!af);
	882	}